Minor changes

This commit is contained in:
2025-06-19 16:46:12 -04:00
parent 0e2b929a30
commit 58644f5334

View File

@@ -1,10 +1,11 @@
import os import os
from dotenv import load_dotenv
import requests
import json
from datetime import date, datetime
import time import time
import logging import logging
import requests
import json
from dotenv import load_dotenv
from datetime import date, datetime
load_dotenv load_dotenv
@@ -12,10 +13,10 @@ google_api_key = os.getenv('GOOGLE_API_KEY')
google_header = {'key': google_api_key} google_header = {'key': google_api_key}
open_lib_header = {'User-Agent': 'Kalar-LMS nick@kalar.codes'} open_lib_header = {'User-Agent': 'Kalar-LMS nick@kalar.codes'}
today = date.today()
logger = logging.getLogger('extract.py') logger = logging.getLogger('extract.py')
logging.basicConfig(filename='lms-etl.log', level=logging.DEBUG) logging.basicConfig(filename=os.getenv('LOG_FILE'), level=os.getenv('LOGGING_LEVEL'))
today = date.today()
def extract_book_json(url, header=[]): def extract_book_json(url, header=[]):
''' '''
@@ -38,25 +39,34 @@ def get_google_book_data(query, offset=0):
Returns a dictionary of books from the Google Books API based on a query. Returns a dictionary of books from the Google Books API based on a query.
Keyword arguments: Keyword arguments:
query -- query -- a string used to find a list of books by a specific attribute provided by
the Google Books API.
offset -- the optional page offset for a query. Google Books API limits the number offset -- the optional page offset for a query. Google Books API limits the number
of responses per query and returns an ordered list. This allows you to skip of responses per query and returns an ordered list. This allows you to skip
the first x number of responses. the first x number of responses.
''' '''
query = query.replace(' ', '+') query = query.replace(' ', '+')
fields = ("items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate," fields = ('items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,'
"volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,volumeInfo/printType)") 'volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,'
'volumeInfo/language,volumeInfo/printType,volumeInfo/description)')
url = (f'https://www.googleapis.com/books/v1/volumes?q={query}' url = (f'https://www.googleapis.com/books/v1/volumes?q={query}'
f'&fields={fields}&startIndex={offset}') f'&fields={fields}&startIndex={offset}')
return extract_book_json(url, google_header) return extract_book_json(url, google_header)
def get_open_library_book_data(query, offset=0): def get_open_library_book_data(query, offset=0):
''' '''
Returns a dictionary of books from the Open Library API based on a query.
Keyword arguments:
query -- a string used to find a list of books by a specific attribute provided by
the Open Library API.
offset -- the optional page offset for a query. The Open Library API limits the number
of responses per query and returns an ordered list. This allows you to skip
the first x number of responses.
''' '''
query = query.replace(' ', '+') query = query.replace(' ', '+')
fields = 'author_name,title,isbn' fields = 'author_name,title,isbn'
url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}' url = f'https://openlibrary.org/search.json?{query}&lang=en&fields={fields}&offset={offset}'
return extract_book_json(url, open_lib_header) return extract_book_json(url, open_lib_header)
@@ -78,7 +88,7 @@ def start():
open_lib_query = f'title={title}' open_lib_query = f'title={title}'
open_lib_books = get_open_library_book_data(open_lib_query) open_lib_books = get_open_library_book_data(open_lib_query)
for books in open_lib_books['docs']: for books in open_lib_books['docs']:
logger.debug(f'{datetime.now()}:Book found: {str(books)}') logger.info(f'{datetime.now()}:Book found: {str(books)}')
if 'author_name' in books \ if 'author_name' in books \
and 'title' in books \ and 'title' in books \
and 'isbn' in books: and 'isbn' in books:
@@ -101,6 +111,8 @@ def start():
if __name__ == '__main__': if __name__ == '__main__':
print('Starting Extraction.') print('Extraction Started')
logger.info(f'{datetime.now()}:Extraction Started')
start() start()
print('Extraction done.') print('Extraction Done')
logger.info(f'{datetime.now()}:Extraction Done')