Update Exploring Near Earth Objects project and add Meme Generator project

This commit is contained in:
2026-01-03 21:55:24 -08:00
parent 9a4c3f7854
commit 155f0c9c6d
36 changed files with 754 additions and 65 deletions

View File

@@ -0,0 +1,24 @@
"""Module for ingesting CSV files containing quotes."""
from typing import List
from .IngestorInterface import IngestorInterface
from .QuoteModel import QuoteModel
import pandas as pd
class CSVIngestor(IngestorInterface):
"""Subclass for ingesting CSV files."""
allowed_extensions = ["csv"]
@classmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Parse the CSV file to extract quotes."""
if not cls.can_ingest(path):
raise Exception("Invalid ingest path")
quotes = []
df = pd.read_csv(path, header=0, sep=",", names=["body", "author"])
for _, row in df.iterrows():
quotes.append(QuoteModel(row["body"], row["author"]))
return quotes

View File

@@ -0,0 +1,25 @@
"""Module for ingesting Docx files containing quotes."""
import docx
from typing import List
from .IngestorInterface import IngestorInterface
from .QuoteModel import QuoteModel
class DocxIngestor(IngestorInterface):
"""Subclass for ingesting Docx files."""
allowed_extensions = ["docx"]
@classmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Parse the Docx file to extract quotes."""
if not cls.can_ingest(path):
raise Exception("Invalid ingest path")
quotes = []
doc = docx.Document(path)
for para in doc.paragraphs:
if para.text != "":
parts = para.text.split(" - ")
quotes.append(QuoteModel(parts[0], parts[1]))
return quotes

View File

@@ -0,0 +1,22 @@
"""Ingestor module to select appropriate ingestor based on file type."""
from typing import List
from .IngestorInterface import IngestorInterface
from .QuoteModel import QuoteModel
from .CSVIngestor import CSVIngestor
from .TextIngestor import TextIngestor
from .DocxIngestor import DocxIngestor
from .PDFIngestor import PDFIngestor
class Ingestor(IngestorInterface):
"""Subclass to select appropriate ingestor."""
ingestors = [CSVIngestor, TextIngestor, DocxIngestor, PDFIngestor]
@classmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Select the appropriate ingestor to parse the file."""
for ingestor in cls.ingestors:
if ingestor.can_ingest(path):
return ingestor.parse(path)

View File

@@ -0,0 +1,23 @@
"""Ingestor Interface module for quote ingestion."""
from abc import ABC, abstractmethod
from typing import List
from .QuoteModel import QuoteModel
class IngestorInterface(ABC):
"""Base Ingestor Interface."""
allowed_extensions = []
@classmethod
def can_ingest(cls, path: str) -> bool:
"""Check if the ingestor can ingest the file based on its extension."""
ext = path.split(".")[-1]
return ext in cls.allowed_extensions
@classmethod
@abstractmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Abstract method to parse the file and return a list of QuoteModel objects."""
pass

View File

@@ -0,0 +1,40 @@
"""Module for ingesting PDF files containing quotes."""
import os
import random
import subprocess
from typing import List
from .IngestorInterface import IngestorInterface
from .QuoteModel import QuoteModel
class PDFIngestor(IngestorInterface):
"""Subclass for ingesting PDF files."""
allowed_extensions = ["pdf"]
@classmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Parse the PDF file to extract quotes."""
if not cls.can_ingest(path):
raise Exception("Invalid ingest path")
quotes = []
tmp = f"./tmp/{random.randint(0, 10000)}.txt"
try:
# pdftotext <input-pdf> <output-text-file>
call = subprocess.call(["pdftotext", path, tmp])
with open(tmp, "r") as file:
lines = file.readlines()
except FileNotFoundError as err:
print(f"Error: {err}")
else:
for line in lines:
line = line.strip()
if line:
parts = line.split(" - ")
quotes.append(QuoteModel(parts[0], parts[1]))
finally:
if os.path.exists(tmp):
os.remove(tmp)
return quotes

View File

@@ -0,0 +1,14 @@
"""QuoteModel module for representing a quote with its body and author."""
class QuoteModel:
"""Quote model class."""
def __init__(self, body, author):
"""Initialize the QuoteModel object."""
self.body = body
self.author = author
def __repr__(self):
"""String representation of the QuoteModel object."""
return f"{self.body} - {self.author}"

View File

@@ -0,0 +1,26 @@
"""Module for ingesting text files containing quotes."""
from typing import List
from .IngestorInterface import IngestorInterface
from .QuoteModel import QuoteModel
class TextIngestor(IngestorInterface):
"""Subcalss for ingesting text files."""
allowed_extensions = ["txt"]
@classmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Parse the text file to extract quotes."""
if not cls.can_ingest(path):
raise Exception("Invalid ingest path")
quotes = []
with open(path, "r") as file:
for line in file.readlines():
line = line.strip()
if line:
parts = line.split(" - ", 1)
if len(parts) == 2:
quotes.append(QuoteModel(parts[0], parts[1]))
return quotes

View File

@@ -0,0 +1,7 @@
from .IngestorInterface import IngestorInterface
from .CSVIngestor import CSVIngestor
from .DocxIngestor import DocxIngestor
from .PDFIngestor import PDFIngestor
from .TextIngestor import TextIngestor
from .Ingestor import Ingestor
from .QuoteModel import QuoteModel