Minor changes

This commit is contained in:
2025-06-19 16:46:12 -04:00
parent 0e2b929a30
commit 58644f5334

View File

@@ -1,10 +1,11 @@
import os
from dotenv import load_dotenv
import requests
import json
from datetime import date, datetime
import time
import logging
import requests
import json
from dotenv import load_dotenv
from datetime import date, datetime
load_dotenv
@@ -12,10 +13,10 @@ google_api_key = os.getenv('GOOGLE_API_KEY')
google_header = {'key': google_api_key}
open_lib_header = {'User-Agent': 'Kalar-LMS nick@kalar.codes'}
today = date.today()
logger = logging.getLogger('extract.py')
logging.basicConfig(filename='lms-etl.log', level=logging.DEBUG)
logging.basicConfig(filename=os.getenv('LOG_FILE'), level=os.getenv('LOGGING_LEVEL'))
today = date.today()
def extract_book_json(url, header=[]):
'''
@@ -38,25 +39,34 @@ def get_google_book_data(query, offset=0):
Returns a dictionary of books from the Google Books API based on a query.
Keyword arguments:
query --
query -- a string used to find a list of books by a specific attribute provided by
the Google Books API.
offset -- the optional page offset for a query. Google Books API limits the number
of responses per query and returns an ordered list. This allows you to skip
the first x number of responses.
'''
query = query.replace(' ', '+')
fields = ("items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,"
"volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,volumeInfo/printType)")
fields = ('items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,'
'volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,'
'volumeInfo/language,volumeInfo/printType,volumeInfo/description)')
url = (f'https://www.googleapis.com/books/v1/volumes?q={query}'
f'&fields={fields}&startIndex={offset}')
return extract_book_json(url, google_header)
def get_open_library_book_data(query, offset=0):
'''
Returns a dictionary of books from the Open Library API based on a query.
Keyword arguments:
query -- a string used to find a list of books by a specific attribute provided by
the Open Library API.
offset -- the optional page offset for a query. The Open Library API limits the number
of responses per query and returns an ordered list. This allows you to skip
the first x number of responses.
'''
query = query.replace(' ', '+')
fields = 'author_name,title,isbn'
url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}'
url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}&offset={offset}'
return extract_book_json(url, open_lib_header)
@@ -78,7 +88,7 @@ def start():
open_lib_query = f'title={title}'
open_lib_books = get_open_library_book_data(open_lib_query)
for books in open_lib_books['docs']:
logger.debug(f'{datetime.now()}:Book found: {str(books)}')
logger.info(f'{datetime.now()}:Book found: {str(books)}')
if 'author_name' in books \
and 'title' in books \
and 'isbn' in books:
@@ -101,6 +111,8 @@ def start():
if __name__ == '__main__':
print('Starting Extraction.')
print('Extraction Started')
logger.info(f'{datetime.now()}:Extraction Started')
start()
print('Extraction done.')
print('Extraction Done')
logger.info(f'{datetime.now()}:Extraction Done')