Minor updates

2025-08-02 15:50:06 -04:00
parent 477dff02ce
commit 01977eab4e
4 changed files with 25 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Environmental Variables:
 `LOG_FILE`       - The file location for logs to be saved to.  

 ## extract.py
-The extract.py file contains functions to pull data related to books from different APIs.
+The extract.py file contains functions to pull data related to books from different APIs. Currently, this project uses the Google Books and OpenLibrary APIs. The former being the only one that needs an API key.

 ## transform.py
 Takes the raw JSON stored by extract.py and transforms the entries into a single entry whose keys  
--- a/src/extract.py
+++ b/src/extract.py
@@ -26,13 +26,21 @@ def extract_book_json(url, header=[]):
        url    -- the url used to make the request.
        header -- the optional headers passed to specify things needed for the queries, like API keys.
    '''
-    try:
-        response = requests.get(url, headers=header)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as err:
-        logger.error(f'An error occurred: {err}')
-        return {}
-    return response.json()
+    attempts = 0
+    max_attempts = 5
+    response_json = {}
+    while attempts < max_attempts:
+        try:
+            response = requests.get(url, headers=header)
+            response.raise_for_status()
+            response_json = response.json()
+            break
+        except requests.exceptions.HTTPError as err:
+            logger.error(f'An error occurred: {err}')
+            time.sleep(.25)
+            attempts += 1
+            continue
+    return response_json

 def get_google_book_data(query, offset=0):
    '''
@@ -88,7 +96,6 @@ def start():
            open_lib_query = f'title={title}'
            open_lib_books = get_open_library_book_data(open_lib_query)
            for books in open_lib_books['docs']:
-                logger.info(f'{datetime.now()}:Book found: {str(books)}')
                if 'author_name' in books \
                and 'title' in books \
                and 'isbn' in books:
@@ -98,9 +105,13 @@ def start():
                            google_book_info = get_google_book_data(google_query)

                            if google_book_info != {}:
-                                potential_ol_book = books
-                                potential_ol_book['isbn'] = isbn
-
+                                potential_ol_book = {
+                                    '_timestamp': str(datetime.now()),
+                                    'author_name': books['author_name'],
+                                    'title': books['title'],
+                                    'isbn': isbn,
+                                }
+                                logger.info(f'{datetime.now()}:Book found: {str(potential_ol_book)}')
                                open_lib_array.append(potential_ol_book)
                                google_books_array.append(google_book_info['items'][0])
                            time.sleep(.5)
--- a/src/load.py
+++ b/src/load.py
@@ -45,7 +45,7 @@ def start():
    with psycopg.connect(f'dbname={db_name} user={db_user} password={db_password}') as conn, \
        open(f'output/transformed_{today}.json', 'r') as transformed_books:
        with conn.cursor() as cur:
-            cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED
+            # cur.execute(f'DROP TABLE IF EXISTS Collection_Item') # TODO: REMOVE WHEN TESTING COMPLETED
            cur.execute(collections_table_creation)
            books = json.loads(transformed_books.read())

--- a/src/transform.py
+++ b/src/transform.py
@@ -48,7 +48,7 @@ def combine_raw_jsons(google_json, ol_json):
        transformed_dictionary_entry = {}
        replace_quote = str.maketrans({"'": r"_"})
        
-        title = str(ol_json['book_data'][index]['title']).translate(replace_quote)
+        title = str(google_json['book_data'][index]['volumeInfo']['title']).translate(replace_quote)
        author = ', '.join(ol_json['book_data'][index]['author_name']).translate(replace_quote)
        isbn = ol_json['book_data'][index]['isbn']
        sort_title = format_sort_title(title)