"""Module for ingesting Docx files containing quotes.""" import docx from typing import List from .IngestorInterface import IngestorInterface from .QuoteModel import QuoteModel class DocxIngestor(IngestorInterface): """Subclass for ingesting Docx files.""" allowed_extensions = ["docx"] @classmethod def parse(cls, path: str) -> List[QuoteModel]: """Parse the Docx file to extract quotes.""" if not cls.can_ingest(path): raise Exception("Invalid ingest path") quotes = [] doc = docx.Document(path) for para in doc.paragraphs: if para.text != "": parts = para.text.split(" - ") quotes.append(QuoteModel(parts[0], parts[1])) return quotes