From abea021044da56a772b58f3cdf6bea7d8da7266f Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 9 Aug 2025 21:21:55 -0400 Subject: [PATCH 1/4] created book json file --- config/title.json | 130 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 config/title.json diff --git a/config/title.json b/config/title.json new file mode 100644 index 0000000..88e7c67 --- /dev/null +++ b/config/title.json @@ -0,0 +1,130 @@ +[ + { + "title": "An Absolutely Remarkable Thing", + "loc_number": "PS3607.R43285 A27", + "dewey_decimal_number": "813.6 GRE", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Annihilation", + "loc_number": "PS3572.A4284", + "dewey_decimal_number": "813.54 VAN", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Caliban's War", + "loc_number": "PS3601.B677 C35", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Abaddon's Gate", + "loc_number": "PS3601.B677 A64", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Cibola Burn", + "loc_number": "PS3601.B677 C53", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Nemesis Games", + "loc_number": "PS3601.B677", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Babylon's Ashes", + "loc_number": "PS3601.B677 B33", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Persepolis Rising", + "loc_number": "PS3601.B677 P47", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Tiamat's Wrath", + "loc_number": "PS3601.B677 T53", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Leviathan Falls", + "loc_number": "PS3601.B677 L478", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Everything is Tuberculosis", + "loc_number": "RC311.G85", + "dewey_decimal_number": "616.995 GRE", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "The Fellowship of the Ring", + "loc_number": "PR6039.O32", + "dewey_decimal_number": "823.912 TOL", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Leviathan Wakes", + "loc_number": "PS3601.B677", + "dewey_decimal_number": "813.6 COR", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Lovecraft Country", + "loc_number": "PS3568.U3615", + "dewey_decimal_number": "813.6 RUF", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "The New Jim Crow", + "loc_number": "HV9950.A437", + "dewey_decimal_number": "364.973 ALE", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + }, + { + "title": "Pale Blue Dot", + "loc_number": "QB500.S24", + "dewey_decimal_number": "919.904 SAG", + "description": "blank", + "price_in_cents": 0, + "cover_image_uri": "default" + } +] \ No newline at end of file From 24345c8b4e680bea1e4f3fbd9ea147aa0af037b7 Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 9 Aug 2025 21:22:10 -0400 Subject: [PATCH 2/4] utilized new json file --- src/extract.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/extract.py b/src/extract.py index d12c565..e193894 100644 --- a/src/extract.py +++ b/src/extract.py @@ -79,12 +79,12 @@ def get_open_library_book_data(query, offset=0): def start(): - titles = [] + titles = None google_books_array = [] open_lib_array = [] - with open('config/title.txt', 'r') as google_books_file: - for line in google_books_file: - titles.append(line.strip()) + + with open('config/title.json', 'r') as title_json: + titles = json.loads(title_json.read()) with open(f'output/raw_google_books_{today}.json', 'w') as google_books_file, \ open(f'output/raw_open_lib_books_{today}.json', 'w') as open_lib_file: @@ -92,7 +92,7 @@ def start(): open_lib_file.write('{"book_data":') for title in titles: - open_lib_query = f'title={title}' + open_lib_query = f'title={title['title']}' open_lib_books = get_open_library_book_data(open_lib_query) for books in open_lib_books['docs']: if 'author_name' in books \ @@ -109,6 +109,11 @@ def start(): 'author_name': books['author_name'], 'title': books['title'], 'isbn': isbn, + 'loc_number': title['loc_number'], + 'dewey_decimal_number': title['dewey_decimal_number'], + 'description': title['description'], + 'price_in_cents': title['price_in_cents'], + 'cover_image_uri': title['cover_image_uri'], } logger.info(f'{datetime.now()}:Book found: {str(potential_ol_book)}') open_lib_array.append(potential_ol_book) From 425e27e189043901da4751c2271eea564263b04e Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 9 Aug 2025 21:23:28 -0400 Subject: [PATCH 3/4] cleaned up book entry declaration --- src/transform.py | 54 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/transform.py b/src/transform.py index 2cd723b..3afb9d5 100644 --- a/src/transform.py +++ b/src/transform.py @@ -56,47 +56,65 @@ def combine_raw_jsons(google_json, ol_json): if 'categories' in google_json['book_data'][index]['volumeInfo']: categories = ', '.join(google_json['book_data'][index]['volumeInfo']['categories']) else: - categories = None + categories = '' if 'publisher' in google_json['book_data'][index]['volumeInfo']: publisher = str(google_json['book_data'][index]['volumeInfo']['publisher']).translate(replace_quote) else: - publisher = None + publisher = '' if 'publishedDate' in google_json['book_data'][index]['volumeInfo']: published_date = google_json['book_data'][index]['volumeInfo']['publishedDate'] + if len(published_date) == 4: + published_date += '-12-31' + elif len(published_date) < 10: + published_date = published_date[0:3] + '-12-31' else: - published_date = None + published_date = '9999-12-31' + + lost_date = '9999-12-31' if 'printType' in google_json['book_data'][index]['volumeInfo']: print_type = google_json['book_data'][index]['volumeInfo']['printType'] else: - print_type = None + print_type = '' if 'language' in google_json['book_data'][index]['volumeInfo']: language = google_json['book_data'][index]['volumeInfo']['language'] else: - language = None + language = '' if 'pageCount' in google_json['book_data'][index]['volumeInfo']: pageCount = google_json['book_data'][index]['volumeInfo']['pageCount'] else: pageCount = 0 + loc_number = ol_json['book_data'][index]['loc_number'] + dewey_decimal_number = ol_json['book_data'][index]['dewey_decimal_number'] + description = ol_json['book_data'][index]['description'] + price_in_cents = ol_json['book_data'][index]['price_in_cents'] + cover_image_uri = ol_json['book_data'][index]['cover_image_uri'] + transformed_dictionary_entry = { - 'title': title, - 'author': author, - 'publisher': publisher, - 'publishing_date': published_date, - 'isbn': isbn, - 'sort_title': sort_title, - 'format': print_type, - 'language': language, - 'categories': categories, - 'page_count': pageCount, - 'is_checked_in': True, - 'is_archived': False, - 'is_lost': False, + 'title': title, + 'author': author, + 'publisher': publisher, + 'publishing_date': published_date, + 'isbn': isbn, + 'sort_title': sort_title, + 'format': print_type, + 'language': language, + 'categories': categories, + 'page_count': pageCount, + 'is_checked_in': True, + 'is_archived': False, + 'is_lost': False, + 'lost_date': lost_date, + 'loc_number': loc_number, + 'dewey_decimal_number': dewey_decimal_number, + 'description': description, + 'price_in_cents': price_in_cents, + 'cover_image_uri': cover_image_uri, } transformed_dictionary['books'].append(transformed_dictionary_entry) From ef2cfe79ddf4534e46207fb17e467f4be117e1f9 Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Sat, 9 Aug 2025 21:23:44 -0400 Subject: [PATCH 4/4] Major SQL update --- src/load.py | 12 +++- src/sql_statements.py | 145 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 136 insertions(+), 21 deletions(-) diff --git a/src/load.py b/src/load.py index 5641b1a..db106c7 100644 --- a/src/load.py +++ b/src/load.py @@ -5,6 +5,7 @@ import psycopg from dotenv import load_dotenv from datetime import date, datetime import sql_statements +import random load_dotenv() @@ -23,7 +24,7 @@ def start(): books = json.loads(transformed_books.read()) with conn.cursor() as cur: cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED - cur.execute(sql_statements.collections_table_creation) + cur.execute(sql_statements.collection_item_table_creation) load_transformed_books(cur, books) @@ -38,8 +39,13 @@ def load_transformed_books(cursor, books): `collection_item` SQL table ''' for book in books['books']: - cursor.execute(sql_statements.collection_insert_statement(book)) - logger.info(f'{datetime.now()}:Book {book['title']} loaded.') + # This simulates a library buying multiple copies of a book. + try: + for i in range(random.randrange(1, 10)): + cursor.execute(sql_statements.collection_insert_statement(book)) + logger.info(f'{datetime.now()}:Book {book['title']} loaded {i+1} times.') + except Exception as err: + logger.error(f'{err} at {book.title}') if __name__ == '__main__': print('Loading Started') diff --git a/src/sql_statements.py b/src/sql_statements.py index 050719c..b803049 100644 --- a/src/sql_statements.py +++ b/src/sql_statements.py @@ -1,37 +1,146 @@ -collections_table_creation = ''' - CREATE TABLE IF NOT EXISTS Collection_Item( +collection_item_table_creation = ''' + CREATE TABLE IF NOT EXISTS "collection_item"( "id" BIGSERIAL PRIMARY KEY, - "title" VARCHAR(255) NULL, - "author" VARCHAR(255) NULL, - "publisher" VARCHAR(255) NULL, - "publishing_date" VARCHAR(255) NULL, + "title" VARCHAR(255) NOT NULL, + "author" VARCHAR(255) NOT NULL, + "publisher" VARCHAR(255) NOT NULL, + "publishing_date" VARCHAR(255) NOT NULL, "loc_number" VARCHAR(255) NULL, "dewey_decimal_number" VARCHAR(255) NULL, - "isbn" BIGINT NULL, - "sort_title" VARCHAR(255) NULL, - "format" VARCHAR(255) NULL, + "isbn" BIGINT NOT NULL, + "sort_title" VARCHAR(255) NOT NULL, + "format" VARCHAR(255) NOT NULL, "language" VARCHAR(255) NULL, "page_count" BIGINT NULL, "categories" VARCHAR(255) NULL, - "description" BIGINT NULL, + "description" VARCHAR(2550) NULL, "price_in_cents" BIGINT NULL, "cover_image_uri" VARCHAR(255) NULL, - "is_checked_in" BOOLEAN NULL, - "is_archived" BOOLEAN NULL, - "is_lost" BOOLEAN NULL, - "lost_date" DATE NULL - ) + "is_checked_in" BOOLEAN NOT NULL, + "is_archived" BOOLEAN NOT NULL, + "is_lost" BOOLEAN NOT NULL, + "lost_date" DATE NOT NULL + ); ''' +patron_table_creation = ''' + CREATE TABLE IF NOT EXISTS "patron"( + "id" BIGSERIAL PRIMARY KEY, + "name" VARCHAR(255) NOT NULL, + "has_good_standing" BOOLEAN NOT NULL, + "fee_total" INTEGER NOT NULL, + "is_archived" BOOLEAN NOT NULL, + "last_login" DATE NOT NULL, + "password" VARCHAR(255) NOT NULL + ); + ''' + +staff_table_creation = ''' + CREATE TABLE IF NOT EXISTS "staff"( + "id" BIGINT NOT NULL, + "name" VARCHAR(255) NOT NULL, + "password" VARCHAR(255) NOT NULL, + "is_archived" BOOLEAN NOT NULL + ); + ''' + +library_table_creation = ''' + CREATE TABLE IF NOT EXISTS "library"( + "id" BIGINT NOT NULL, + "name" VARCHAR(255) NOT NULL, + "address" VARCHAR(255) NOT NULL + ); + ''' + +library_collection_table_creation = ''' + CREATE TABLE IF NOT EXISTS "library_collection"( + "id" BIGINT NOT NULL, + "library_id" BIGINT NOT NULL, + "item_id" BIGINT NOT NULL + ); + ''' + +lost_item_table_creation = ''' + CREATE TABLE IF NOT EXISTS "lost_item"( + "id" BIGSERIAL PRIMARY KEY, + "item_id" BIGINT NOT NULL, + "patron_id" BIGINT NOT NULL, + "due_date" DATE NOT NULL, + "cost_in_cents" INTEGER NOT NULL + ); + ALTER TABLE + "lost_item" ADD CONSTRAINT "lost_item_item_id_unique" UNIQUE("item_id"); + ''' + +late_item_table_creation = ''' + CREATE TABLE IF NOT EXISTS "late_item"( + "id" BIGSERIAL PRIMARY KEY, + "item_id" BIGINT NOT NULL, + "patron_id" BIGINT NOT NULL, + "due_date" DATE NOT NULL, + "fees_in_cents" INTEGER NOT NULL + ); + ALTER TABLE + "late_item" ADD CONSTRAINT "late_item_item_id_unique" UNIQUE("item_id"); + ''' + +checked_out_item_table_creation = ''' + CREATE TABLE IF NOT EXISTS "checked_out_item"( + "id" BIGINT NOT NULL, + "item_id" BIGINT NOT NULL, + "patron_id" BIGINT NOT NULL, + "due_date" DATE NOT NULL + ); + ALTER TABLE + "checked_out_item" ADD CONSTRAINT "checked_out_item_item_id_unique" UNIQUE("item_id"); + ''' + +# Does not include the constrant on item_id (like the above) as the same book can be on hold for multiple patrons +# This might need to be changed to item_title instead of item_id since libraries can have multiple copies +on_hold_item_table_creation = ''' + CREATE TABLE IF NOT EXISTS "on_hold_item"( + "id" BIGINT NOT NULL, + "item_id" BIGINT NOT NULL, + "patron_id" BIGINT NOT NULL, + "is_current_hold" BOOLEAN NOT NULL, + "hold_release_date" DATE NOT NULL + ); + ''' + +# To be ran after all the above +foreign_key_creation = ''' + ALTER TABLE + "lost_item" ADD CONSTRAINT "lost_item_item_id_foreign" FOREIGN KEY("item_id") REFERENCES "collection_item"("id"); + ALTER TABLE + "late_item" ADD CONSTRAINT "late_item_item_id_foreign" FOREIGN KEY("item_id") REFERENCES "collection_item"("id"); + ALTER TABLE + "checked_out_item" ADD CONSTRAINT "checked_out_item_patron_id_foreign" FOREIGN KEY("patron_id") REFERENCES "patron"("id"); + ALTER TABLE + "checked_out_item" ADD CONSTRAINT "checked_out_item_item_id_foreign" FOREIGN KEY("item_id") REFERENCES "collection_item"("id"); + ALTER TABLE + "late_item" ADD CONSTRAINT "late_item_patron_id_foreign" FOREIGN KEY("patron_id") REFERENCES "patron"("id"); + ALTER TABLE + "library_collection" ADD CONSTRAINT "library_collection_library_foreign" FOREIGN KEY("library_id") REFERENCES "library"("id"); + ALTER TABLE + "lost_item" ADD CONSTRAINT "lost_item_patron_id_foreign" FOREIGN KEY("patron_id") REFERENCES "patron"("id"); + ALTER TABLE + "library_collection" ADD CONSTRAINT "library_collection_item_foreign" FOREIGN KEY("item_id") REFERENCES "collection_item"("id"); + ALTER TABLE + "on_hold_item" ADD CONSTRAINT "on_hold_item_patron_id_foreign" FOREIGN KEY("patron_id") REFERENCES "patron"("id"); + ALTER TABLE + "on_hold_item" ADD CONSTRAINT "on_hold_item_item_id_foreign" FOREIGN KEY("item_id") REFERENCES "collection_item"("id"); + ''' def collection_insert_statement(book): return ('INSERT INTO Collection_Item ' - '(title, author, publisher, publishing_date, isbn, sort_title, format, ' - 'language, categories, page_count, is_checked_in, is_archived, is_lost) ' + '(title, author, publisher, publishing_date, loc_number, dewey_decimal_number, isbn, sort_title, format, ' + 'language, categories, page_count, description, price_in_cents, is_checked_in, is_archived, is_lost, lost_date) ' f'VALUES (\'{book['title']}\', \'{book['author']}\', ' f'\'{book['publisher']}\', \'{book['publishing_date']}\', ' + f'\'{book['loc_number']}\', \'{book['dewey_decimal_number']}\', ' f'\'{book['isbn']}\', \'{book['sort_title']}\',' f'\'{book['format']}\', \'{book['language']}\', ' f'\'{book['categories']}\', \'{book['page_count']}\', ' + f'\'{book['description']}\', \'{book['price_in_cents']}\', ' f'\'{book['is_checked_in']}\', \'{book['is_archived']}\', ' - f'\'{book['is_lost']}\');') \ No newline at end of file + f'\'{book['is_lost']}\', \'{book['lost_date']}\');') \ No newline at end of file