Minor changes

2025-06-19 16:46:12 -04:00
parent 0e2b929a30
commit 58644f5334
1 changed files with 27 additions and 15 deletions
--- a/src/extract.py
+++ b/src/extract.py
@@ -1,10 +1,11 @@
 import os
-from dotenv import load_dotenv
-import requests
-import json
-from datetime import date, datetime
 import time
 import logging
+import requests
+import json
+from dotenv import load_dotenv
+from datetime import date, datetime
+

 load_dotenv

@@ -12,10 +13,10 @@ google_api_key = os.getenv('GOOGLE_API_KEY')
 google_header = {'key': google_api_key}
 open_lib_header = {'User-Agent': 'Kalar-LMS nick@kalar.codes'}

-today = date.today()
-
 logger = logging.getLogger('extract.py')
-logging.basicConfig(filename='lms-etl.log', level=logging.DEBUG)
+logging.basicConfig(filename=os.getenv('LOG_FILE'), level=os.getenv('LOGGING_LEVEL'))
+
+today = date.today()

 def extract_book_json(url, header=[]):
    '''
@@ -38,25 +39,34 @@ def get_google_book_data(query, offset=0):
        Returns a dictionary of books from the Google Books API based on a query.

        Keyword arguments:
-        query  --
+        query  -- a string used to find a list of books by a specific attribute provided by 
+                  the Google Books API.
        offset -- the optional page offset for a query. Google Books API limits the number 
                  of responses per query and returns an ordered list. This allows you to skip 
                  the first x number of responses.
    '''
    query = query.replace(' ', '+')
-    fields = ("items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,"
-            "volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,volumeInfo/printType)")
+    fields = ('items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,'
+            'volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,'
+            'volumeInfo/language,volumeInfo/printType,volumeInfo/description)')
    url = (f'https://www.googleapis.com/books/v1/volumes?q={query}'
            f'&fields={fields}&startIndex={offset}')
    return extract_book_json(url, google_header)

 def get_open_library_book_data(query, offset=0):
    '''
-    
+        Returns a dictionary of books from the Open Library API based on a query.
+
+        Keyword arguments:
+        query  -- a string used to find a list of books by a specific attribute provided by 
+                  the Open Library API.
+        offset -- the optional page offset for a query. The Open Library API limits the number 
+                  of responses per query and returns an ordered list. This allows you to skip 
+                  the first x number of responses.
    '''
    query = query.replace(' ', '+')
    fields = 'author_name,title,isbn'
-    url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}'
+    url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}&offset={offset}'

    return extract_book_json(url, open_lib_header)

@@ -78,7 +88,7 @@ def start():
            open_lib_query = f'title={title}'
            open_lib_books = get_open_library_book_data(open_lib_query)
            for books in open_lib_books['docs']:
-                logger.debug(f'{datetime.now()}:Book found: {str(books)}')
+                logger.info(f'{datetime.now()}:Book found: {str(books)}')
                if 'author_name' in books \
                and 'title' in books \
                and 'isbn' in books:
@@ -101,6 +111,8 @@ def start():
    

 if __name__ == '__main__':
-    print('Starting Extraction.')
+    print('Extraction Started')
+    logger.info(f'{datetime.now()}:Extraction Started')
    start()
-    print('Extraction done.')
+    print('Extraction Done')
+    logger.info(f'{datetime.now()}:Extraction Done')