From 619b38c0b0c8440e9ba0c937ef628bcd46e3c5ab Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 2 Aug 2025 18:00:05 -0400 Subject: [PATCH 1/3] Refactored SQL statements --- src/load.py | 33 ++++----------------------------- src/sql_statements.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 29 deletions(-) create mode 100644 src/sql_statements.py diff --git a/src/load.py b/src/load.py index 01517b3..17c22aa 100644 --- a/src/load.py +++ b/src/load.py @@ -4,6 +4,7 @@ import json import psycopg from dotenv import load_dotenv from datetime import date, datetime +import sql_statements load_dotenv() @@ -16,44 +17,18 @@ db_password = os.getenv('DB_PASSWORD') today = date.today() -collections_table_creation = ''' - CREATE TABLE IF NOT EXISTS Collection_Item( - "id" BIGSERIAL PRIMARY KEY, - "title" VARCHAR(255) NULL, - "author" VARCHAR(255) NULL, - "publisher" VARCHAR(255) NULL, - "publishing_date" VARCHAR(255) NULL, - "loc_number" VARCHAR(255) NULL, - "dewey_decimal_number" VARCHAR(255) NULL, - "isbn" BIGINT NULL, - "sort_title" VARCHAR(255) NULL, - "format" VARCHAR(255) NULL, - "language" VARCHAR(255) NULL, - "page_count" BIGINT NULL, - "categories" VARCHAR(255) NULL, - "description" BIGINT NULL, - "price_in_cents" BIGINT NULL, - "cover_image_uri" VARCHAR(255) NULL, - "is_checked_in" BOOLEAN NULL, - "is_archived" BOOLEAN NULL, - "is_lost" BOOLEAN NULL, - "lost_date" DATE NULL - ) - ''' +collections_table_creation = sql_statements.collections_table_creation def start(): with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \ open(f'output/transformed_{today}.json', 'r') as transformed_books: with conn.cursor() as cur: - # cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED + cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED cur.execute(collections_table_creation) books = json.loads(transformed_books.read()) for book in books['books']: - cur.execute(f'INSERT INTO Collection_Item ' \ - '(title, author, publisher, publishing_date, isbn, sort_title, format, language, categories, page_count, is_checked_in, is_archived, is_lost) ' \ - f'VALUES (\'{book['title']}\',\'{book['author']}\',\'{book['publisher']}\',\'{book['publishing_date']}\',{book['isbn']},\'{book['sort_title']}\',' - f'\'{book['format']}\',\'{book['language']}\',\'{book['categories']}\',{book['page_count']},{book['is_checked_in']},{book['is_archived']},{book['is_lost']});') + cur.execute(sql_statements.collection_insert_statement(book)) logger.info(f'{datetime.now()}:Book {book['title']} loaded.') def load_transformed_books(): diff --git a/src/sql_statements.py b/src/sql_statements.py new file mode 100644 index 0000000..97b6147 --- /dev/null +++ b/src/sql_statements.py @@ -0,0 +1,40 @@ + + + +collections_table_creation = ''' + CREATE TABLE IF NOT EXISTS Collection_Item( + "id" BIGSERIAL PRIMARY KEY, + "title" VARCHAR(255) NULL, + "author" VARCHAR(255) NULL, + "publisher" VARCHAR(255) NULL, + "publishing_date" VARCHAR(255) NULL, + "loc_number" VARCHAR(255) NULL, + "dewey_decimal_number" VARCHAR(255) NULL, + "isbn" BIGINT NULL, + "sort_title" VARCHAR(255) NULL, + "format" VARCHAR(255) NULL, + "language" VARCHAR(255) NULL, + "page_count" BIGINT NULL, + "categories" VARCHAR(255) NULL, + "description" BIGINT NULL, + "price_in_cents" BIGINT NULL, + "cover_image_uri" VARCHAR(255) NULL, + "is_checked_in" BOOLEAN NULL, + "is_archived" BOOLEAN NULL, + "is_lost" BOOLEAN NULL, + "lost_date" DATE NULL + ) + ''' + + +def collection_insert_statement(book): + return 'INSERT INTO Collection_Item ' \ + '(title, author, publisher, publishing_date, isbn, sort_title, format, '\ + 'language, categories, page_count, is_checked_in, is_archived, is_lost) '\ + f'VALUES (\'{book['title']}\', \'{book['author']}\', '\ + f'\'{book['publisher']}\', \'{book['publishing_date']}\', '\ + f'\'{book['isbn']}\', \'{book['sort_title']}\',' \ + f'\'{book['format']}\', \'{book['language']}\', '\ + f'\'{book['categories']}\', \'{book['page_count']}\', '\ + f'\'{book['is_checked_in']}\', \'{book['is_archived']}\', '\ + f'\'{book['is_lost']}\');' \ No newline at end of file From 3d085666c865af6f22041420356313746d03a5fc Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 2 Aug 2025 18:26:25 -0400 Subject: [PATCH 2/3] refactored functions --- src/load.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/load.py b/src/load.py index 17c22aa..5641b1a 100644 --- a/src/load.py +++ b/src/load.py @@ -17,22 +17,29 @@ db_password = os.getenv('DB_PASSWORD') today = date.today() -collections_table_creation = sql_statements.collections_table_creation - def start(): with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \ open(f'output/transformed_{today}.json', 'r') as transformed_books: - with conn.cursor() as cur: - cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED - cur.execute(collections_table_creation) books = json.loads(transformed_books.read()) + with conn.cursor() as cur: + cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED + cur.execute(sql_statements.collections_table_creation) + load_transformed_books(cur, books) + - for book in books['books']: - cur.execute(sql_statements.collection_insert_statement(book)) - logger.info(f'{datetime.now()}:Book {book['title']} loaded.') +def load_transformed_books(cursor, books): + ''' + Takes a pyscopg connection cursor and a dictionary of books and inserts + the books into a PostgreSQL database. -def load_transformed_books(): - pass + Keyword arguments: + cursor - a psycopg.connect.cursor object + books - a dictionary of transformed books following the schema for the + `collection_item` SQL table + ''' + for book in books['books']: + cursor.execute(sql_statements.collection_insert_statement(book)) + logger.info(f'{datetime.now()}:Book {book['title']} loaded.') if __name__ == '__main__': print('Loading Started') From 61488e2449b903d2e63eaa8effd57b00946f726c Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 2 Aug 2025 22:06:26 -0400 Subject: [PATCH 3/3] Minor updates --- src/extract.py | 1 - src/sql_statements.py | 23 ++++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/extract.py b/src/extract.py index 00eb896..d12c565 100644 --- a/src/extract.py +++ b/src/extract.py @@ -6,7 +6,6 @@ import json from dotenv import load_dotenv from datetime import date, datetime - load_dotenv google_api_key = os.getenv('GOOGLE_API_KEY') diff --git a/src/sql_statements.py b/src/sql_statements.py index 97b6147..050719c 100644 --- a/src/sql_statements.py +++ b/src/sql_statements.py @@ -1,6 +1,3 @@ - - - collections_table_creation = ''' CREATE TABLE IF NOT EXISTS Collection_Item( "id" BIGSERIAL PRIMARY KEY, @@ -28,13 +25,13 @@ collections_table_creation = ''' def collection_insert_statement(book): - return 'INSERT INTO Collection_Item ' \ - '(title, author, publisher, publishing_date, isbn, sort_title, format, '\ - 'language, categories, page_count, is_checked_in, is_archived, is_lost) '\ - f'VALUES (\'{book['title']}\', \'{book['author']}\', '\ - f'\'{book['publisher']}\', \'{book['publishing_date']}\', '\ - f'\'{book['isbn']}\', \'{book['sort_title']}\',' \ - f'\'{book['format']}\', \'{book['language']}\', '\ - f'\'{book['categories']}\', \'{book['page_count']}\', '\ - f'\'{book['is_checked_in']}\', \'{book['is_archived']}\', '\ - f'\'{book['is_lost']}\');' \ No newline at end of file + return ('INSERT INTO Collection_Item ' + '(title, author, publisher, publishing_date, isbn, sort_title, format, ' + 'language, categories, page_count, is_checked_in, is_archived, is_lost) ' + f'VALUES (\'{book['title']}\', \'{book['author']}\', ' + f'\'{book['publisher']}\', \'{book['publishing_date']}\', ' + f'\'{book['isbn']}\', \'{book['sort_title']}\',' + f'\'{book['format']}\', \'{book['language']}\', ' + f'\'{book['categories']}\', \'{book['page_count']}\', ' + f'\'{book['is_checked_in']}\', \'{book['is_archived']}\', ' + f'\'{book['is_lost']}\');') \ No newline at end of file