Update Exploring Near Earth Objects project and add Meme Generator project
This commit is contained in:
40
Meme_Generator/QuoteEngine/PDFIngestor.py
Normal file
40
Meme_Generator/QuoteEngine/PDFIngestor.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Module for ingesting PDF files containing quotes."""
|
||||
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
from typing import List
|
||||
from .IngestorInterface import IngestorInterface
|
||||
from .QuoteModel import QuoteModel
|
||||
|
||||
|
||||
class PDFIngestor(IngestorInterface):
|
||||
"""Subclass for ingesting PDF files."""
|
||||
|
||||
allowed_extensions = ["pdf"]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, path: str) -> List[QuoteModel]:
|
||||
"""Parse the PDF file to extract quotes."""
|
||||
if not cls.can_ingest(path):
|
||||
raise Exception("Invalid ingest path")
|
||||
|
||||
quotes = []
|
||||
tmp = f"./tmp/{random.randint(0, 10000)}.txt"
|
||||
try:
|
||||
# pdftotext <input-pdf> <output-text-file>
|
||||
call = subprocess.call(["pdftotext", path, tmp])
|
||||
with open(tmp, "r") as file:
|
||||
lines = file.readlines()
|
||||
except FileNotFoundError as err:
|
||||
print(f"Error: {err}")
|
||||
else:
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line:
|
||||
parts = line.split(" - ")
|
||||
quotes.append(QuoteModel(parts[0], parts[1]))
|
||||
finally:
|
||||
if os.path.exists(tmp):
|
||||
os.remove(tmp)
|
||||
return quotes
|
||||
Reference in New Issue
Block a user