Minor updates

This commit is contained in:
2025-08-02 15:50:06 -04:00
parent 477dff02ce
commit 01977eab4e
4 changed files with 25 additions and 14 deletions

View File

@@ -9,7 +9,7 @@ Environmental Variables:
`LOG_FILE` - The file location for logs to be saved to.
## extract.py
The extract.py file contains functions to pull data related to books from different APIs.
The extract.py file contains functions to pull data related to books from different APIs. Currently, this project uses the Google Books and OpenLibrary APIs. The former being the only one that needs an API key.
## transform.py
Takes the raw JSON stored by extract.py and transforms the entries into a single entry whose keys

View File

@@ -26,13 +26,21 @@ def extract_book_json(url, header=[]):
url -- the url used to make the request.
header -- the optional headers passed to specify things needed for the queries, like API keys.
'''
try:
response = requests.get(url, headers=header)
response.raise_for_status()
except requests.exceptions.HTTPError as err:
logger.error(f'An error occurred: {err}')
return {}
return response.json()
attempts = 0
max_attempts = 5
response_json = {}
while attempts < max_attempts:
try:
response = requests.get(url, headers=header)
response.raise_for_status()
response_json = response.json()
break
except requests.exceptions.HTTPError as err:
logger.error(f'An error occurred: {err}')
time.sleep(.25)
attempts += 1
continue
return response_json
def get_google_book_data(query, offset=0):
'''
@@ -88,7 +96,6 @@ def start():
open_lib_query = f'title={title}'
open_lib_books = get_open_library_book_data(open_lib_query)
for books in open_lib_books['docs']:
logger.info(f'{datetime.now()}:Book found: {str(books)}')
if 'author_name' in books \
and 'title' in books \
and 'isbn' in books:
@@ -98,9 +105,13 @@ def start():
google_book_info = get_google_book_data(google_query)
if google_book_info != {}:
potential_ol_book = books
potential_ol_book['isbn'] = isbn
potential_ol_book = {
'_timestamp': str(datetime.now()),
'author_name': books['author_name'],
'title': books['title'],
'isbn': isbn,
}
logger.info(f'{datetime.now()}:Book found: {str(potential_ol_book)}')
open_lib_array.append(potential_ol_book)
google_books_array.append(google_book_info['items'][0])
time.sleep(.5)

View File

@@ -45,7 +45,7 @@ def start():
with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \
open(f'output/transformed_{today}.json', 'r') as transformed_books:
with conn.cursor() as cur:
cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED
# cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED
cur.execute(collections_table_creation)
books = json.loads(transformed_books.read())

View File

@@ -48,7 +48,7 @@ def combine_raw_jsons(google_json, ol_json):
transformed_dictionary_entry = {}
replace_quote = str.maketrans({"'": r"_"})
title = str(ol_json['book_data'][index]['title']).translate(replace_quote)
title = str(google_json['book_data'][index]['volumeInfo']['title']).translate(replace_quote)
author = ', '.join(ol_json['book_data'][index]['author_name']).translate(replace_quote)
isbn = ol_json['book_data'][index]['isbn']
sort_title = format_sort_title(title)