diff --git a/config/title.txt b/config/title.txt index 1be290c..6216234 100644 --- a/config/title.txt +++ b/config/title.txt @@ -1,3 +1,5 @@ Everything is Tuberculosis The Fellowship of the Ring Pale Blue Dot +The New Jim Crow +Leviathan Wakes diff --git a/src/extract.py b/src/extract.py index 9c7b0bc..594e4b7 100644 --- a/src/extract.py +++ b/src/extract.py @@ -1,19 +1,22 @@ import os +from dotenv import load_dotenv import requests import json from datetime import date import time +load_dotenv + google_api_key = os.getenv('GOOGLE_API_KEY') today = date.today() -def extract_book_data(url, header): - try: - response = requests.get(url, headers=header) - response.raise_for_status() - except requests.exceptions.HTTPError as err: - raise SystemError(err) - return response.json() +def extract_book_json(url, header): + try: + response = requests.get(url, headers=header) + response.raise_for_status() + except requests.exceptions.HTTPError as err: + raise SystemError(err) + return response.json() class GoogleBooks(): header = {'key': google_api_key} @@ -25,7 +28,7 @@ class GoogleBooks(): url = (f'https://www.googleapis.com/books/v1/volumes?q=inauthor:{author}' f'&fields={self.fields}&startIndex={offset}') - return extract_book_data(url, self.header) + return extract_book_json(url, self.header) def fetch_book_data_by_title(self, title, offset=0): @@ -33,7 +36,7 @@ class GoogleBooks(): url = (f'https://www.googleapis.com/books/v1/volumes?q=intitle:{title}' f'&fields={self.fields}&startIndex={offset}') - return extract_book_data(url, self.header) + return extract_book_json(url, self.header) def fetch_book_data_by_genre(self, genre, offset=0): @@ -41,13 +44,13 @@ class GoogleBooks(): url = (f'https://www.googleapis.com/books/v1/volumes?q=subject:{genre}' f'&fields={self.fields}&startIndex={offset}') - return extract_book_data(url, self.header) + return extract_book_json(url, self.header) def fetch_book_data_by_query(self, query, offset=0): url = (f'https://www.googleapis.com/books/v1/volumes?q={query}' f'&fields={self.fields}&startIndex={offset}') - return extract_book_data(url, self.header) + return extract_book_json(url, self.header) class OpenLibrary(): @@ -58,74 +61,62 @@ class OpenLibrary(): author = author.replace(' ', '+') url = f'https://openlibrary.org/search.json?author={author}&lang=en&fields={self.fields}' - return extract_book_data(url, self.header) - + return extract_book_json(url, self.header) def fetch_book_data_by_title(self, title): title = title.replace(' ', '+') url = f'https://openlibrary.org/search.json?title={title}&lang=en&fields={self.fields}' - return extract_book_data(url, self.header) - + return extract_book_json(url, self.header) def fetch_book_data_by_genre(self, genre): genre = genre.replace(' ', '+') url = f'https://openlibrary.org/search.json?subject={genre}&lang=en&fields={self.fields}' - return extract_book_data(url, self.header) + return extract_book_json(url, self.header) -def write_open_lib_json(open_lib): - open_lib_json = json.dumps(open_lib.fetch_book_data_by_title('Pale Blue Dot'), indent=4) - with open(f'output/open_lib_{today}.json', 'w') as f: - f.write(open_lib_json) - -def write_google_books_json(google_books, query): - google_books_json = json.dumps(google_books.fetch_book_data_by_query(query), indent=4) - with open(f'output/google_books_{today}.json', 'a') as f: - if google_books_json != None: - f.write(google_books_json) - -def get_google_books_info(google_books, query): - return google_books.fetch_book_data_by_query(query) - -if __name__ == '__main__': +def start(): titles = [] - with open('config/title.txt', 'r') as f: - for line in f: + with open('config/title.txt', 'r') as google_books_file: + for line in google_books_file: titles.append(line.strip()) google_books = GoogleBooks() open_lib = OpenLibrary() - google_books_json = {'items':[]} - open_lib_json = {'items':[]} + google_books_array = [] + open_lib_array = [] - for title in titles: - open_lib_books = open_lib.fetch_book_data_by_title(title) - for books in open_lib_books['docs']: - potential_book = { - 'author': books['author_name'], - 'title': books['title'], - } + with open(f'output/raw_google_books_{today}.json', 'w') as google_books_file, \ + open(f'output/raw_open_lib_books_{today}.json', 'w') as open_lib_file: + google_books_file.write('{"book_data":') + open_lib_file.write('{"book_data":') + + for title in titles: + open_lib_books = open_lib.fetch_book_data_by_title(title) + for books in open_lib_books['docs']: + print(str(books)) + if 'author_name' in books \ + and 'title' in books \ + and 'isbn' in books: + for isbn in books['isbn']: + if len(isbn) == 13: + query = 'isbn:' + isbn + google_book_info = google_books.fetch_book_data_by_query(query) - for isbn in books['isbn']: - if len(isbn) == 13: - query = 'isbn:' + isbn - book_info = get_google_books_info(google_books, query) - if book_info != {}: + if google_book_info != {}: + potential_ol_book = books + potential_ol_book['isbn'] = isbn - potential_book['isbn'] = isbn + open_lib_array.append(potential_ol_book) + google_books_array.append(google_book_info['items'][0]) + time.sleep(.5) + time.sleep(.5) - open_lib_json['items'].append(potential_book) - google_books_json['items'].append(book_info['items'][0]) + google_books_file.write(json.dumps(google_books_array)+'}') + open_lib_file.write(json.dumps(open_lib_array)+'}') + - with open(f'output/google_books_{today}.json', 'a') as f: - f.write(json.dumps(google_books_json)+',') - - with open(f'output/open_lib_books_{today}.json', 'a') as f: - f.write(json.dumps(open_lib_json)+',') - - print('Title Done') - time.sleep(5) - print('Starting Next') +if __name__ == '__main__': + start()