Files
Udacity_Nanodegree_Intermed…/Meme_Generator/QuoteEngine/PDFIngestor.py

41 lines
1.2 KiB
Python

"""Module for ingesting PDF files containing quotes."""
import os
import random
import subprocess
from typing import List
from .IngestorInterface import IngestorInterface
from .QuoteModel import QuoteModel
class PDFIngestor(IngestorInterface):
"""Subclass for ingesting PDF files."""
allowed_extensions = ["pdf"]
@classmethod
def parse(cls, path: str) -> List[QuoteModel]:
"""Parse the PDF file to extract quotes."""
if not cls.can_ingest(path):
raise Exception("Invalid ingest path")
quotes = []
tmp = f"./tmp/{random.randint(0, 10000)}.txt"
try:
# pdftotext <input-pdf> <output-text-file>
call = subprocess.call(["pdftotext", path, tmp])
with open(tmp, "r") as file:
lines = file.readlines()
except FileNotFoundError as err:
print(f"Error: {err}")
else:
for line in lines:
line = line.strip()
if line:
parts = line.split(" - ")
quotes.append(QuoteModel(parts[0], parts[1]))
finally:
if os.path.exists(tmp):
os.remove(tmp)
return quotes