From 977ac079746f77b63138b1b0e189b14e9babc5d2 Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Thu, 19 Jun 2025 16:46:46 -0400 Subject: [PATCH] Tweaks and INSERT logic --- src/load.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/load.py b/src/load.py index 11e6327..e5c8dc7 100644 --- a/src/load.py +++ b/src/load.py @@ -1,13 +1,14 @@ import os -from dotenv import load_dotenv -import psycopg -from datetime import date import logging +import json +import psycopg +from dotenv import load_dotenv +from datetime import date, datetime load_dotenv() logger = logging.getLogger('load.py') -logging.basicConfig(filename='lms-etl.log', level=logging.DEBUG) +logging.basicConfig(filename=os.getenv('LOG_FILE'), level=os.getenv('LOGGING_LEVEL')) db_name = os.getenv('DB_NAME') db_user = os.getenv('DB_USER') @@ -15,21 +16,21 @@ db_password = os.getenv('DB_PASSWORD') today = date.today() -collections_table_creation = """ - CREATE TABLE IF NOT EXISTS "Collection_Item"( - "id" BIGINT PRIMARY KEY, +collections_table_creation = ''' + CREATE TABLE IF NOT EXISTS Collection_Item( + "id" BIGSERIAL PRIMARY KEY, "title" VARCHAR(255) NULL, "author" VARCHAR(255) NULL, "publisher" VARCHAR(255) NULL, - "publishing_date" DATE NULL, + "publishing_date" VARCHAR(255) NULL, "loc_number" VARCHAR(255) NULL, "dewey_decimal_number" VARCHAR(255) NULL, + "isbn" BIGINT NULL, "sort_title" VARCHAR(255) NULL, "format" VARCHAR(255) NULL, "language" VARCHAR(255) NULL, "page_count" BIGINT NULL, - "genre" VARCHAR(255) NULL, - "subject" VARCHAR(255) NULL, + "categories" VARCHAR(255) NULL, "description" BIGINT NULL, "price_in_cents" BIGINT NULL, "cover_image_uri" VARCHAR(255) NULL, @@ -38,18 +39,29 @@ collections_table_creation = """ "is_lost" BOOLEAN NULL, "lost_date" DATE NULL ) - """ + ''' def start(): with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \ - open(f"output/transformed_{today}.json", 'r'): + open(f'output/transformed_{today}.json', 'r') as transformed_books: with conn.cursor() as cur: + cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED cur.execute(collections_table_creation) + books = json.loads(transformed_books.read()) + for book in books['books']: + cur.execute(f'INSERT INTO Collection_Item ' \ + '(title, author, publisher, publishing_date, isbn, sort_title, format, language, categories, page_count, is_checked_in, is_archived, is_lost) ' \ + f'VALUES (\'{book['title']}\',\'{book['author']}\',\'{book['publisher']}\',\'{book['publishing_date']}\',{book['isbn']},\'{book['sort_title']}\',' + f'\'{book['format']}\',\'{book['language']}\',\'{book['categories']}\',{book['page_count']},{book['is_checked_in']},{book['is_archived']},{book['is_lost']});') + logger.info(f'{datetime.now()}:Book {book['title']} loaded.') def load_transformed_books(): pass if __name__ == '__main__': + print('Loading Started') + logger.info(f'{datetime.now()}:Loading Started') start() - print('Loading Done') \ No newline at end of file + print('Loading Done') + logger.info(f'{datetime.now()}:Loading Done')