Class methods added
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -194,3 +194,4 @@ cython_debug/
|
|||||||
.cursorindexingignore
|
.cursorindexingignore
|
||||||
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
output/*
|
||||||
3
config/title.txt
Normal file
3
config/title.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
Everything is Tuberculosis
|
||||||
|
The Fellowship of the Ring
|
||||||
|
Pale Blue Dot
|
||||||
140
src/extract.py
140
src/extract.py
@@ -1,45 +1,131 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
from datetime import date
|
||||||
|
import time
|
||||||
|
|
||||||
google_api_key = os.getenv("GOOGLE_API_KEY")
|
google_api_key = os.getenv('GOOGLE_API_KEY')
|
||||||
|
today = date.today()
|
||||||
|
|
||||||
|
def extract_book_data(url, header):
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=header)
|
||||||
|
response.raise_for_status()
|
||||||
|
except requests.exceptions.HTTPError as err:
|
||||||
|
raise SystemError(err)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
class GoogleBooks():
|
class GoogleBooks():
|
||||||
def fetch_book_data_by_author(author, offset=0):
|
header = {'key': google_api_key}
|
||||||
url = (f"https://www.googleapis.com/books/v1/volumes?q=inauthor:{author}"
|
fields = "items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate," \
|
||||||
"&fields=items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,"
|
"volumeInfo/publisher,volumeInfo/categories,volumeInfo/pageCount,volumeInfo/printType)"
|
||||||
f"volumeInfo/industryIdentifiers,volumeInfo/categories)&startIndex={offset}")
|
|
||||||
|
|
||||||
response = requests.get(url, headers={"key": google_api_key})
|
def fetch_book_data_by_author(self, author, offset=0):
|
||||||
return response.json()
|
author = author.replace(' ', '+')
|
||||||
|
url = (f'https://www.googleapis.com/books/v1/volumes?q=inauthor:{author}'
|
||||||
|
f'&fields={self.fields}&startIndex={offset}')
|
||||||
|
|
||||||
|
return extract_book_data(url, self.header)
|
||||||
|
|
||||||
|
|
||||||
def fetch_book_data_by_title(title, offset=0):
|
def fetch_book_data_by_title(self, title, offset=0):
|
||||||
title = title.replace(" ", "+")
|
title = title.replace(' ', '+')
|
||||||
|
url = (f'https://www.googleapis.com/books/v1/volumes?q=intitle:{title}'
|
||||||
|
f'&fields={self.fields}&startIndex={offset}')
|
||||||
|
|
||||||
url = (f"https://www.googleapis.com/books/v1/volumes?q=intitle:{title}"
|
return extract_book_data(url, self.header)
|
||||||
"&fields=items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,"
|
|
||||||
f"volumeInfo/industryIdentifiers,volumeInfo/categories)&startIndex={offset}")
|
|
||||||
|
|
||||||
response = requests.get(url, headers={'key': google_api_key})
|
|
||||||
return response.json()
|
|
||||||
|
|
||||||
def fetch_book_data_by_genre(genre, offset=0):
|
def fetch_book_data_by_genre(self, genre, offset=0):
|
||||||
pass
|
genre = genre.replace(' ', '+')
|
||||||
|
url = (f'https://www.googleapis.com/books/v1/volumes?q=subject:{genre}'
|
||||||
|
f'&fields={self.fields}&startIndex={offset}')
|
||||||
|
|
||||||
|
return extract_book_data(url, self.header)
|
||||||
|
|
||||||
|
def fetch_book_data_by_query(self, query, offset=0):
|
||||||
|
url = (f'https://www.googleapis.com/books/v1/volumes?q={query}'
|
||||||
|
f'&fields={self.fields}&startIndex={offset}')
|
||||||
|
|
||||||
|
return extract_book_data(url, self.header)
|
||||||
|
|
||||||
|
|
||||||
class OpenLibrary():
|
class OpenLibrary():
|
||||||
def fetch_book_data_by_author(author):
|
header = {'User-Agent': 'Kalar-LMS nick@kalar.codes'}
|
||||||
url = f"https://openlibrary.org/search.json?author={author}&lang=en&fields=author_name,title,key,isbn"
|
fields = 'author_name,title,isbn'
|
||||||
|
|
||||||
response = requests.get(url, headers={'User-Agent': 'Kalar-LMS nick@kalar.codes'})
|
def fetch_book_data_by_author(self, author):
|
||||||
return response.json()
|
author = author.replace(' ', '+')
|
||||||
|
url = f'https://openlibrary.org/search.json?author={author}&lang=en&fields={self.fields}'
|
||||||
|
|
||||||
def fetch_book_data_by_title(title):
|
return extract_book_data(url, self.header)
|
||||||
pass
|
|
||||||
|
|
||||||
def fetch_book_data_by_genre(genre):
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def fetch_book_data_by_title(self, title):
|
||||||
print(GoogleBooks.fetch_book_data_by_author('Tolkien'))
|
title = title.replace(' ', '+')
|
||||||
print(OpenLibrary.fetch_book_data_by_author('Tolkien'))
|
url = f'https://openlibrary.org/search.json?title={title}&lang=en&fields={self.fields}'
|
||||||
|
|
||||||
|
return extract_book_data(url, self.header)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_book_data_by_genre(self, genre):
|
||||||
|
genre = genre.replace(' ', '+')
|
||||||
|
url = f'https://openlibrary.org/search.json?subject={genre}&lang=en&fields={self.fields}'
|
||||||
|
|
||||||
|
return extract_book_data(url, self.header)
|
||||||
|
|
||||||
|
|
||||||
|
def write_open_lib_json(open_lib):
|
||||||
|
open_lib_json = json.dumps(open_lib.fetch_book_data_by_title('Pale Blue Dot'), indent=4)
|
||||||
|
with open(f'output/open_lib_{today}.json', 'w') as f:
|
||||||
|
f.write(open_lib_json)
|
||||||
|
|
||||||
|
def write_google_books_json(google_books, query):
|
||||||
|
google_books_json = json.dumps(google_books.fetch_book_data_by_query(query), indent=4)
|
||||||
|
with open(f'output/google_books_{today}.json', 'a') as f:
|
||||||
|
if google_books_json != None:
|
||||||
|
f.write(google_books_json)
|
||||||
|
|
||||||
|
def get_google_books_info(google_books, query):
|
||||||
|
return google_books.fetch_book_data_by_query(query)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
titles = []
|
||||||
|
with open('config/title.txt', 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
titles.append(line.strip())
|
||||||
|
|
||||||
|
google_books = GoogleBooks()
|
||||||
|
open_lib = OpenLibrary()
|
||||||
|
|
||||||
|
google_books_json = {'items':[]}
|
||||||
|
open_lib_json = {'items':[]}
|
||||||
|
|
||||||
|
for title in titles:
|
||||||
|
open_lib_books = open_lib.fetch_book_data_by_title(title)
|
||||||
|
for books in open_lib_books['docs']:
|
||||||
|
potential_book = {
|
||||||
|
'author': books['author_name'],
|
||||||
|
'title': books['title'],
|
||||||
|
}
|
||||||
|
|
||||||
|
for isbn in books['isbn']:
|
||||||
|
if len(isbn) == 13:
|
||||||
|
query = 'isbn:' + isbn
|
||||||
|
book_info = get_google_books_info(google_books, query)
|
||||||
|
if book_info != {}:
|
||||||
|
|
||||||
|
potential_book['isbn'] = isbn
|
||||||
|
|
||||||
|
open_lib_json['items'].append(potential_book)
|
||||||
|
google_books_json['items'].append(book_info['items'][0])
|
||||||
|
|
||||||
|
with open(f'output/google_books_{today}.json', 'a') as f:
|
||||||
|
f.write(json.dumps(google_books_json)+',')
|
||||||
|
|
||||||
|
with open(f'output/open_lib_books_{today}.json', 'a') as f:
|
||||||
|
f.write(json.dumps(open_lib_json)+',')
|
||||||
|
|
||||||
|
print('Title Done')
|
||||||
|
time.sleep(5)
|
||||||
|
print('Starting Next')
|
||||||
|
|||||||
Reference in New Issue
Block a user