Minor updates
This commit is contained in:
@@ -9,7 +9,7 @@ Environmental Variables:
|
||||
`LOG_FILE` - The file location for logs to be saved to.
|
||||
|
||||
## extract.py
|
||||
The extract.py file contains functions to pull data related to books from different APIs.
|
||||
The extract.py file contains functions to pull data related to books from different APIs. Currently, this project uses the Google Books and OpenLibrary APIs. The former being the only one that needs an API key.
|
||||
|
||||
## transform.py
|
||||
Takes the raw JSON stored by extract.py and transforms the entries into a single entry whose keys
|
||||
|
||||
@@ -26,13 +26,21 @@ def extract_book_json(url, header=[]):
|
||||
url -- the url used to make the request.
|
||||
header -- the optional headers passed to specify things needed for the queries, like API keys.
|
||||
'''
|
||||
try:
|
||||
response = requests.get(url, headers=header)
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.HTTPError as err:
|
||||
logger.error(f'An error occurred: {err}')
|
||||
return {}
|
||||
return response.json()
|
||||
attempts = 0
|
||||
max_attempts = 5
|
||||
response_json = {}
|
||||
while attempts < max_attempts:
|
||||
try:
|
||||
response = requests.get(url, headers=header)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
break
|
||||
except requests.exceptions.HTTPError as err:
|
||||
logger.error(f'An error occurred: {err}')
|
||||
time.sleep(.25)
|
||||
attempts += 1
|
||||
continue
|
||||
return response_json
|
||||
|
||||
def get_google_book_data(query, offset=0):
|
||||
'''
|
||||
@@ -88,7 +96,6 @@ def start():
|
||||
open_lib_query = f'title={title}'
|
||||
open_lib_books = get_open_library_book_data(open_lib_query)
|
||||
for books in open_lib_books['docs']:
|
||||
logger.info(f'{datetime.now()}:Book found: {str(books)}')
|
||||
if 'author_name' in books \
|
||||
and 'title' in books \
|
||||
and 'isbn' in books:
|
||||
@@ -98,9 +105,13 @@ def start():
|
||||
google_book_info = get_google_book_data(google_query)
|
||||
|
||||
if google_book_info != {}:
|
||||
potential_ol_book = books
|
||||
potential_ol_book['isbn'] = isbn
|
||||
|
||||
potential_ol_book = {
|
||||
'_timestamp': str(datetime.now()),
|
||||
'author_name': books['author_name'],
|
||||
'title': books['title'],
|
||||
'isbn': isbn,
|
||||
}
|
||||
logger.info(f'{datetime.now()}:Book found: {str(potential_ol_book)}')
|
||||
open_lib_array.append(potential_ol_book)
|
||||
google_books_array.append(google_book_info['items'][0])
|
||||
time.sleep(.5)
|
||||
|
||||
@@ -45,7 +45,7 @@ def start():
|
||||
with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \
|
||||
open(f'output/transformed_{today}.json', 'r') as transformed_books:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED
|
||||
# cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED
|
||||
cur.execute(collections_table_creation)
|
||||
books = json.loads(transformed_books.read())
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ def combine_raw_jsons(google_json, ol_json):
|
||||
transformed_dictionary_entry = {}
|
||||
replace_quote = str.maketrans({"'": r"_"})
|
||||
|
||||
title = str(ol_json['book_data'][index]['title']).translate(replace_quote)
|
||||
title = str(google_json['book_data'][index]['volumeInfo']['title']).translate(replace_quote)
|
||||
author = ', '.join(ol_json['book_data'][index]['author_name']).translate(replace_quote)
|
||||
isbn = ol_json['book_data'][index]['isbn']
|
||||
sort_title = format_sort_title(title)
|
||||
|
||||
Reference in New Issue
Block a user