From 58644f5334887f37a2cb9f814e0351c01122fb7a Mon Sep 17 00:00:00 2001 From: Nicholas Kalar Date: Thu, 19 Jun 2025 16:46:12 -0400 Subject: [PATCH] Minor changes --- src/extract.py | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/extract.py b/src/extract.py index 01468ab..a0b00d7 100644 --- a/src/extract.py +++ b/src/extract.py @@ -1,10 +1,11 @@ import os -from dotenv import load_dotenv -import requests -import json -from datetime import date, datetime import time import logging +import requests +import json +from dotenv import load_dotenv +from datetime import date, datetime + load_dotenv @@ -12,10 +13,10 @@ google_api_key = os.getenv('GOOGLE_API_KEY') google_header = {'key': google_api_key} open_lib_header = {'User-Agent': 'Kalar-LMS nick@kalar.codes'} -today = date.today() - logger = logging.getLogger('extract.py') -logging.basicConfig(filename='lms-etl.log', level=logging.DEBUG) +logging.basicConfig(filename=os.getenv('LOG_FILE'), level=os.getenv('LOGGING_LEVEL')) + +today = date.today() def extract_book_json(url, header=[]): ''' @@ -38,25 +39,34 @@ def get_google_book_data(query, offset=0): Returns a dictionary of books from the Google Books API based on a query. Keyword arguments: - query -- + query -- a string used to find a list of books by a specific attribute provided by + the Google Books API. offset -- the optional page offset for a query. Google Books API limits the number of responses per query and returns an ordered list. This allows you to skip the first x number of responses. ''' query = query.replace(' ', '+') - fields = ("items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate," - "volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,volumeInfo/printType)") + fields = ('items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,' + 'volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,' + 'volumeInfo/language,volumeInfo/printType,volumeInfo/description)') url = (f'https://www.googleapis.com/books/v1/volumes?q={query}' f'&fields={fields}&startIndex={offset}') return extract_book_json(url, google_header) def get_open_library_book_data(query, offset=0): ''' - + Returns a dictionary of books from the Open Library API based on a query. + + Keyword arguments: + query -- a string used to find a list of books by a specific attribute provided by + the Open Library API. + offset -- the optional page offset for a query. The Open Library API limits the number + of responses per query and returns an ordered list. This allows you to skip + the first x number of responses. ''' query = query.replace(' ', '+') fields = 'author_name,title,isbn' - url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}' + url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}&offset={offset}' return extract_book_json(url, open_lib_header) @@ -78,7 +88,7 @@ def start(): open_lib_query = f'title={title}' open_lib_books = get_open_library_book_data(open_lib_query) for books in open_lib_books['docs']: - logger.debug(f'{datetime.now()}:Book found: {str(books)}') + logger.info(f'{datetime.now()}:Book found: {str(books)}') if 'author_name' in books \ and 'title' in books \ and 'isbn' in books: @@ -101,6 +111,8 @@ def start(): if __name__ == '__main__': - print('Starting Extraction.') + print('Extraction Started') + logger.info(f'{datetime.now()}:Extraction Started') start() - print('Extraction done.') + print('Extraction Done') + logger.info(f'{datetime.now()}:Extraction Done')