diff --git a/src/extract.py b/src/extract.py index 00eb896..d12c565 100644 --- a/src/extract.py +++ b/src/extract.py @@ -6,7 +6,6 @@ import json from dotenv import load_dotenv from datetime import date, datetime - load_dotenv google_api_key = os.getenv('GOOGLE_API_KEY') diff --git a/src/load.py b/src/load.py index 01517b3..5641b1a 100644 --- a/src/load.py +++ b/src/load.py @@ -4,6 +4,7 @@ import json import psycopg from dotenv import load_dotenv from datetime import date, datetime +import sql_statements load_dotenv() @@ -16,48 +17,29 @@ db_password = os.getenv('DB_PASSWORD') today = date.today() -collections_table_creation = ''' - CREATE TABLE IF NOT EXISTS Collection_Item( - "id" BIGSERIAL PRIMARY KEY, - "title" VARCHAR(255) NULL, - "author" VARCHAR(255) NULL, - "publisher" VARCHAR(255) NULL, - "publishing_date" VARCHAR(255) NULL, - "loc_number" VARCHAR(255) NULL, - "dewey_decimal_number" VARCHAR(255) NULL, - "isbn" BIGINT NULL, - "sort_title" VARCHAR(255) NULL, - "format" VARCHAR(255) NULL, - "language" VARCHAR(255) NULL, - "page_count" BIGINT NULL, - "categories" VARCHAR(255) NULL, - "description" BIGINT NULL, - "price_in_cents" BIGINT NULL, - "cover_image_uri" VARCHAR(255) NULL, - "is_checked_in" BOOLEAN NULL, - "is_archived" BOOLEAN NULL, - "is_lost" BOOLEAN NULL, - "lost_date" DATE NULL - ) - ''' - def start(): with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \ open(f'output/transformed_{today}.json', 'r') as transformed_books: - with conn.cursor() as cur: - # cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED - cur.execute(collections_table_creation) books = json.loads(transformed_books.read()) + with conn.cursor() as cur: + cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED + cur.execute(sql_statements.collections_table_creation) + load_transformed_books(cur, books) + - for book in books['books']: - cur.execute(f'INSERT INTO Collection_Item ' \ - '(title, author, publisher, publishing_date, isbn, sort_title, format, language, categories, page_count, is_checked_in, is_archived, is_lost) ' \ - f'VALUES (\'{book['title']}\',\'{book['author']}\',\'{book['publisher']}\',\'{book['publishing_date']}\',{book['isbn']},\'{book['sort_title']}\',' - f'\'{book['format']}\',\'{book['language']}\',\'{book['categories']}\',{book['page_count']},{book['is_checked_in']},{book['is_archived']},{book['is_lost']});') - logger.info(f'{datetime.now()}:Book {book['title']} loaded.') +def load_transformed_books(cursor, books): + ''' + Takes a pyscopg connection cursor and a dictionary of books and inserts + the books into a PostgreSQL database. -def load_transformed_books(): - pass + Keyword arguments: + cursor - a psycopg.connect.cursor object + books - a dictionary of transformed books following the schema for the + `collection_item` SQL table + ''' + for book in books['books']: + cursor.execute(sql_statements.collection_insert_statement(book)) + logger.info(f'{datetime.now()}:Book {book['title']} loaded.') if __name__ == '__main__': print('Loading Started') diff --git a/src/sql_statements.py b/src/sql_statements.py new file mode 100644 index 0000000..050719c --- /dev/null +++ b/src/sql_statements.py @@ -0,0 +1,37 @@ +collections_table_creation = ''' + CREATE TABLE IF NOT EXISTS Collection_Item( + "id" BIGSERIAL PRIMARY KEY, + "title" VARCHAR(255) NULL, + "author" VARCHAR(255) NULL, + "publisher" VARCHAR(255) NULL, + "publishing_date" VARCHAR(255) NULL, + "loc_number" VARCHAR(255) NULL, + "dewey_decimal_number" VARCHAR(255) NULL, + "isbn" BIGINT NULL, + "sort_title" VARCHAR(255) NULL, + "format" VARCHAR(255) NULL, + "language" VARCHAR(255) NULL, + "page_count" BIGINT NULL, + "categories" VARCHAR(255) NULL, + "description" BIGINT NULL, + "price_in_cents" BIGINT NULL, + "cover_image_uri" VARCHAR(255) NULL, + "is_checked_in" BOOLEAN NULL, + "is_archived" BOOLEAN NULL, + "is_lost" BOOLEAN NULL, + "lost_date" DATE NULL + ) + ''' + + +def collection_insert_statement(book): + return ('INSERT INTO Collection_Item ' + '(title, author, publisher, publishing_date, isbn, sort_title, format, ' + 'language, categories, page_count, is_checked_in, is_archived, is_lost) ' + f'VALUES (\'{book['title']}\', \'{book['author']}\', ' + f'\'{book['publisher']}\', \'{book['publishing_date']}\', ' + f'\'{book['isbn']}\', \'{book['sort_title']}\',' + f'\'{book['format']}\', \'{book['language']}\', ' + f'\'{book['categories']}\', \'{book['page_count']}\', ' + f'\'{book['is_checked_in']}\', \'{book['is_archived']}\', ' + f'\'{book['is_lost']}\');') \ No newline at end of file