diff --git a/.gitignore b/.gitignore index 7b004e5..9335c70 100644 --- a/.gitignore +++ b/.gitignore @@ -191,4 +191,6 @@ cython_debug/ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data # refer to https://docs.cursor.com/context/ignore-files .cursorignore -.cursorindexingignore \ No newline at end of file +.cursorindexingignore + +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index a018604..04d050c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,8 @@ # LMS-DB-ETL An Extract, Transform, Load app to gather book information from public API for a POC LMS project + +## extract.py + +## transform.py + +## load.py \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..89a1214 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.32.4 +python-dotenv==1.1.0 \ No newline at end of file diff --git a/src/extract.py b/src/extract.py new file mode 100644 index 0000000..2cd97c2 --- /dev/null +++ b/src/extract.py @@ -0,0 +1,45 @@ +import os +import requests +import json + +google_api_key = os.getenv("GOOGLE_API_KEY") + +class GoogleBooks(): + def fetch_book_data_by_author(author, offset=0): + url = (f"https://www.googleapis.com/books/v1/volumes?q=inauthor:{author}" + "&fields=items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate," + f"volumeInfo/industryIdentifiers,volumeInfo/categories)&startIndex={offset}") + + response = requests.get(url, headers={"key": google_api_key}) + return response.json() + + + def fetch_book_data_by_title(title, offset=0): + title = title.replace(" ", "+") + + url = (f"https://www.googleapis.com/books/v1/volumes?q=intitle:{title}" + "&fields=items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate," + f"volumeInfo/industryIdentifiers,volumeInfo/categories)&startIndex={offset}") + + response = requests.get(url, headers={'key': google_api_key}) + return response.json() + + def fetch_book_data_by_genre(genre, offset=0): + pass + +class OpenLibrary(): + def fetch_book_data_by_author(author): + url = f"https://openlibrary.org/search.json?author={author}&lang=en&fields=author_name,title,key,isbn" + + response = requests.get(url, headers={'User-Agent': 'Kalar-LMS nick@kalar.codes'}) + return response.json() + + def fetch_book_data_by_title(title): + pass + + def fetch_book_data_by_genre(genre): + pass + +if __name__ == "__main__": + print(GoogleBooks.fetch_book_data_by_author('Tolkien')) + print(OpenLibrary.fetch_book_data_by_author('Tolkien')) \ No newline at end of file diff --git a/src/load.py b/src/load.py new file mode 100644 index 0000000..e69de29 diff --git a/src/transform.py b/src/transform.py new file mode 100644 index 0000000..e69de29