Update Exploring Near Earth Objects project and add Meme Generator project
This commit is contained in:
24
Meme_Generator/QuoteEngine/CSVIngestor.py
Normal file
24
Meme_Generator/QuoteEngine/CSVIngestor.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Module for ingesting CSV files containing quotes."""
|
||||
|
||||
from typing import List
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .QuoteModel import QuoteModel
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class CSVIngestor(IngestorInterface):
|
||||
"""Subclass for ingesting CSV files."""
|
||||
|
||||
allowed_extensions = ["csv"]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Parse the CSV file to extract quotes."""
|
||||
if not cls.can_ingest(path):
|
||||
raise Exception("Invalid ingest path")
|
||||
quotes = []
|
||||
df = pd.read_csv(path, header=0, sep=",", names=["body", "author"])
|
||||
for _, row in df.iterrows():
|
||||
quotes.append(QuoteModel(row["body"], row["author"]))
|
||||
return quotes
|
||||
25
Meme_Generator/QuoteEngine/DocxIngestor.py
Normal file
25
Meme_Generator/QuoteEngine/DocxIngestor.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Module for ingesting Docx files containing quotes."""
|
||||
|
||||
import docx
|
||||
from typing import List
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .QuoteModel import QuoteModel
|
||||
|
||||
|
||||
class DocxIngestor(IngestorInterface):
|
||||
"""Subclass for ingesting Docx files."""
|
||||
|
||||
allowed_extensions = ["docx"]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Parse the Docx file to extract quotes."""
|
||||
if not cls.can_ingest(path):
|
||||
raise Exception("Invalid ingest path")
|
||||
quotes = []
|
||||
doc = docx.Document(path)
|
||||
for para in doc.paragraphs:
|
||||
if para.text != "":
|
||||
parts = para.text.split(" - ")
|
||||
quotes.append(QuoteModel(parts[0], parts[1]))
|
||||
return quotes
|
||||
22
Meme_Generator/QuoteEngine/Ingestor.py
Normal file
22
Meme_Generator/QuoteEngine/Ingestor.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""Ingestor module to select appropriate ingestor based on file type."""
|
||||
|
||||
from typing import List
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .QuoteModel import QuoteModel
|
||||
from .CSVIngestor import CSVIngestor
|
||||
from .TextIngestor import TextIngestor
|
||||
from .DocxIngestor import DocxIngestor
|
||||
from .PDFIngestor import PDFIngestor
|
||||
|
||||
|
||||
class Ingestor(IngestorInterface):
|
||||
"""Subclass to select appropriate ingestor."""
|
||||
|
||||
ingestors = [CSVIngestor, TextIngestor, DocxIngestor, PDFIngestor]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Select the appropriate ingestor to parse the file."""
|
||||
for ingestor in cls.ingestors:
|
||||
if ingestor.can_ingest(path):
|
||||
return ingestor.parse(path)
|
||||
23
Meme_Generator/QuoteEngine/IngestorInterface.py
Normal file
23
Meme_Generator/QuoteEngine/IngestorInterface.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""Ingestor Interface module for quote ingestion."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
from .QuoteModel import QuoteModel
|
||||
|
||||
|
||||
class IngestorInterface(ABC):
|
||||
"""Base Ingestor Interface."""
|
||||
|
||||
allowed_extensions = []
|
||||
|
||||
@classmethod
|
||||
def can_ingest(cls, path: str) -> bool:
|
||||
"""Check if the ingestor can ingest the file based on its extension."""
|
||||
ext = path.split(".")[-1]
|
||||
return ext in cls.allowed_extensions
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Abstract method to parse the file and return a list of QuoteModel objects."""
|
||||
pass
|
||||
40
Meme_Generator/QuoteEngine/PDFIngestor.py
Normal file
40
Meme_Generator/QuoteEngine/PDFIngestor.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Module for ingesting PDF files containing quotes."""
|
||||
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
from typing import List
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .QuoteModel import QuoteModel
|
||||
|
||||
|
||||
class PDFIngestor(IngestorInterface):
|
||||
"""Subclass for ingesting PDF files."""
|
||||
|
||||
allowed_extensions = ["pdf"]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Parse the PDF file to extract quotes."""
|
||||
if not cls.can_ingest(path):
|
||||
raise Exception("Invalid ingest path")
|
||||
|
||||
quotes = []
|
||||
tmp = f"./tmp/{random.randint(0, 10000)}.txt"
|
||||
try:
|
||||
# pdftotext <input-pdf> <output-text-file>
|
||||
call = subprocess.call(["pdftotext", path, tmp])
|
||||
with open(tmp, "r") as file:
|
||||
lines = file.readlines()
|
||||
except FileNotFoundError as err:
|
||||
print(f"Error: {err}")
|
||||
else:
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line:
|
||||
parts = line.split(" - ")
|
||||
quotes.append(QuoteModel(parts[0], parts[1]))
|
||||
finally:
|
||||
if os.path.exists(tmp):
|
||||
os.remove(tmp)
|
||||
return quotes
|
||||
14
Meme_Generator/QuoteEngine/QuoteModel.py
Normal file
14
Meme_Generator/QuoteEngine/QuoteModel.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""QuoteModel module for representing a quote with its body and author."""
|
||||
|
||||
|
||||
class QuoteModel:
|
||||
"""Quote model class."""
|
||||
|
||||
def __init__(self, body, author):
|
||||
"""Initialize the QuoteModel object."""
|
||||
self.body = body
|
||||
self.author = author
|
||||
|
||||
def __repr__(self):
|
||||
"""String representation of the QuoteModel object."""
|
||||
return f"{self.body} - {self.author}"
|
||||
26
Meme_Generator/QuoteEngine/TextIngestor.py
Normal file
26
Meme_Generator/QuoteEngine/TextIngestor.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Module for ingesting text files containing quotes."""
|
||||
|
||||
from typing import List
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .QuoteModel import QuoteModel
|
||||
|
||||
|
||||
class TextIngestor(IngestorInterface):
|
||||
"""Subcalss for ingesting text files."""
|
||||
|
||||
allowed_extensions = ["txt"]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Parse the text file to extract quotes."""
|
||||
if not cls.can_ingest(path):
|
||||
raise Exception("Invalid ingest path")
|
||||
quotes = []
|
||||
with open(path, "r") as file:
|
||||
for line in file.readlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
parts = line.split(" - ", 1)
|
||||
if len(parts) == 2:
|
||||
quotes.append(QuoteModel(parts[0], parts[1]))
|
||||
return quotes
|
||||
7
Meme_Generator/QuoteEngine/__init__.py
Normal file
7
Meme_Generator/QuoteEngine/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .CSVIngestor import CSVIngestor
|
||||
from .DocxIngestor import DocxIngestor
|
||||
from .PDFIngestor import PDFIngestor
|
||||
from .TextIngestor import TextIngestor
|
||||
from .Ingestor import Ingestor
|
||||
from .QuoteModel import QuoteModel
|
||||
Reference in New Issue
Block a user