Commit b6c0d39b authored by Vít Starý Novotný's avatar Vít Starý Novotný
Browse files

Prevent crossing page boundaries to non-relevant documents

parent 8b6e7818
Loading
Loading
Loading
Loading
Loading
+12 −3
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ from markdown import markdown as to_html
from markdown_strings import esc_format as escape, link, header, bold, italics
from humanize import naturaldelta

from ..document import Document
from ..document import Document, Documents
from ..entity import Entity
from ..config import CONFIG as _CONFIG

@@ -124,7 +124,16 @@ class SearchResultList:
                print(ner_tags, file=ntf)
        LOGGER.debug(f'Saved {sentences_filename} and {ner_tags_filename}')

    def get_sentences_and_ner_tags(self, only_relevant: bool, *args, **kwargs) -> Iterable[Tuple[str, str]]:
    def get_sentences_and_ner_tags(self, only_relevant: bool, cross_page_boundaries: bool,
                                   documents: Optional[Documents]) -> Iterable[Tuple[str, str]]:
        if only_relevant and cross_page_boundaries:
            assert documents is not None
            documents = {
                basename: document
                for basename, document
                in documents.items()
                if document.is_relevant
            }
        for entity in self:
            for result in self[entity]:
                if TYPE_CHECKING:
@@ -134,7 +143,7 @@ class SearchResultList:
                if only_relevant and not document.is_relevant:
                    continue

                snippet = document.get_sentence(position, *args, **kwargs)
                snippet = document.get_sentence(position, cross_page_boundaries, documents)
                if snippet is None:
                    continue