Initial creation of ETL files

This commit is contained in:
2025-06-10 11:13:19 -04:00
parent 839141feb7
commit 8e7e9dd205
6 changed files with 56 additions and 1 deletions

4
.gitignore vendored
View File

@@ -191,4 +191,6 @@ cython_debug/
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
.cursorindexingignore
.DS_Store

View File

@@ -1,2 +1,8 @@
# LMS-DB-ETL
An Extract, Transform, Load app to gather book information from public API for a POC LMS project
## extract.py
## transform.py
## load.py

2
requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
requests==2.32.4
python-dotenv==1.1.0

45
src/extract.py Normal file
View File

@@ -0,0 +1,45 @@
import os
import requests
import json
google_api_key = os.getenv("GOOGLE_API_KEY")
class GoogleBooks():
def fetch_book_data_by_author(author, offset=0):
url = (f"https://www.googleapis.com/books/v1/volumes?q=inauthor:{author}"
"&fields=items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,"
f"volumeInfo/industryIdentifiers,volumeInfo/categories)&startIndex={offset}")
response = requests.get(url, headers={"key": google_api_key})
return response.json()
def fetch_book_data_by_title(title, offset=0):
title = title.replace(" ", "+")
url = (f"https://www.googleapis.com/books/v1/volumes?q=intitle:{title}"
"&fields=items(volumeInfo/title,volumeInfo/authors,volumeInfo/publishedDate,"
f"volumeInfo/industryIdentifiers,volumeInfo/categories)&startIndex={offset}")
response = requests.get(url, headers={'key': google_api_key})
return response.json()
def fetch_book_data_by_genre(genre, offset=0):
pass
class OpenLibrary():
def fetch_book_data_by_author(author):
url = f"https://openlibrary.org/search.json?author={author}&lang=en&fields=author_name,title,key,isbn"
response = requests.get(url, headers={'User-Agent': 'Kalar-LMS nick@kalar.codes'})
return response.json()
def fetch_book_data_by_title(title):
pass
def fetch_book_data_by_genre(genre):
pass
if __name__ == "__main__":
print(GoogleBooks.fetch_book_data_by_author('Tolkien'))
print(OpenLibrary.fetch_book_data_by_author('Tolkien'))

0
src/load.py Normal file
View File

0
src/transform.py Normal file
View File