From 7b0f1e1556b6714d6f0d255c478896d51ee9f27d Mon Sep 17 00:00:00 2001 From: Jussi Laasonen <404469+jlaasonen@users.noreply.github.com> Date: Wed, 15 Dec 2021 12:55:03 +0100 Subject: [PATCH] Add endpoint for chapter display (#170) * Update dependencies * Extract method to get chapter title * Remove spurious commas * Upgrade black and format It works with pypy again: https://github.com/psf/black/issues/727 * Add model for cahpter display * Move ChapterIdSchema to id_schemas * Add schema for ChapterDisplay * Add methods for finding chapters for display * 'Refactored by Sourcery' (#176) Co-authored-by: Sourcery AI <> * Add route for chapter display * Remove Theia settings * Add title to ChapterDisplay * Add title to ChapterDisplay schema * Add intertext to line display * Get intertext from database * Add is_single_stage to ChapterDisplay Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- .theia/settings.json | 9 -- Pipfile | 2 +- Pipfile.lock | 98 ++++++++++--------- ebl/corpus/application/corpus.py | 8 ++ ebl/corpus/application/display_schemas.py | 46 +++++++++ ebl/corpus/application/id_schemas.py | 14 ++- ebl/corpus/domain/chapter.py | 16 +++ ebl/corpus/domain/chapter_display.py | 68 +++++++++++++ ebl/corpus/domain/text.py | 21 ++-- .../infrastructure/mongo_text_repository.py | 28 +++++- ebl/corpus/infrastructure/queries.py | 79 +++++++++++++++ ebl/corpus/web/bootstrap.py | 40 ++++---- ebl/corpus/web/chapter_info_schema.py | 10 +- ebl/corpus/web/chapters.py | 23 ++++- .../application/fragment_repository.py | 8 +- .../infrastructure/fragment_repository.py | 2 +- ebl/tests/corpus/test_chapter_display.py | 42 ++++++++ .../corpus/test_chapter_display_route.py | 56 +++++++++++ ebl/tests/corpus/test_chapter_display_schema | 56 +++++++++++ ebl/tests/corpus/test_corpus.py | 17 +++- ebl/tests/corpus/test_corpus_text.py | 4 +- ebl/tests/corpus/test_make_title.py | 12 +++ .../corpus/test_mongo_text_repository.py | 32 ++++-- ebl/tests/corpus/test_text.py | 23 +++++ .../fragmentarium/test_fragment_repository.py | 2 +- .../test_retrieve_annotations.py | 19 ++-- ebl/tests/transliteration/test_markup.py | 51 +++++++++- ebl/tests/transliteration/test_text_schema.py | 17 ++-- .../transliteration/test_translation_line.py | 29 ------ ebl/transliteration/domain/markup.py | 12 +++ ebl/transliteration/domain/text.py | 2 +- .../domain/translation_line.py | 10 -- 32 files changed, 674 insertions(+), 182 deletions(-) delete mode 100644 .theia/settings.json create mode 100644 ebl/corpus/application/display_schemas.py create mode 100644 ebl/corpus/domain/chapter_display.py create mode 100644 ebl/tests/corpus/test_chapter_display.py create mode 100644 ebl/tests/corpus/test_chapter_display_route.py create mode 100644 ebl/tests/corpus/test_chapter_display_schema create mode 100644 ebl/tests/corpus/test_make_title.py create mode 100644 ebl/tests/corpus/test_text.py diff --git a/.theia/settings.json b/.theia/settings.json deleted file mode 100644 index 1405ea01c..000000000 --- a/.theia/settings.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "editor.rulers": [ - 88 - ], - "python.linting.flake8Enabled": true, - "python.linting.mypyEnabled": false, - "python.linting.enabled": true, - "python.formatting.provider": "black" -} \ No newline at end of file diff --git a/Pipfile b/Pipfile index 46d1063b3..2fc133cd9 100644 --- a/Pipfile +++ b/Pipfile @@ -39,7 +39,7 @@ pyhamcrest = "*" flake8-bugbear = "*" httpretty = "*" pyre-check = "==0.0.60" -black = "==19.3b0" +black = "*" pymongo-inmemory = "*" pillow = "*" pytest-xdist = "*" diff --git a/Pipfile.lock b/Pipfile.lock index f9aab5f7f..2a39ac073 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f560677b9bc3119731a9d5bba184e556abbd1eae6efeae669202c8cd5c11a826" + "sha256": "ba5a11d2dfb49ef48860919b09539a4d275fb42934f616086ad67f210f84c3b0" }, "pipfile-spec": 6, "requires": { @@ -93,11 +93,11 @@ }, "charset-normalizer": { "hashes": [ - "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0", - "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405" + "sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721", + "sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c" ], "markers": "python_version >= '3'", - "version": "==2.0.8" + "version": "==2.0.9" }, "cryptography": { "hashes": [ @@ -248,39 +248,39 @@ }, "pycryptodomex": { "hashes": [ - "sha256:0398366656bb55ebdb1d1d493a7175fc48ade449283086db254ac44c7d318d6d", - "sha256:1580db5878b1d16a233550829f7c189c43005f7aa818f2f95c7dddbd6a7163cc", - "sha256:15d25c532de744648f0976c56bd10d07b2a44b7eb2a6261ffe2497980b1102d8", - "sha256:1d4d13c59d2cfbc0863c725f5812d66ff0d6836ba738ef26a52e1291056a1c7c", - "sha256:1dd4271d8d022216533c3547f071662b44d703fd5dbb632c4b5e77b3ee47567f", - "sha256:207e53bdbf3a26de6e9dcf3ebaf67ba70a61f733f84c464eca55d278211c1b71", - "sha256:252ac9c1e1ae1c256a75539e234be3096f2d100b9f4bae42ef88067787b9b249", - "sha256:2b586d13ef07fa6197b6348a48dbbe9525f4f496205de14edfa4e91d99e69672", - "sha256:4c7c6418a3c08b2ebfc2cf50ce52de267618063b533083a2c73b40ec54a1b6f5", - "sha256:5baf690d27f39f2ba22f06e8e32c5f1972573ca65db6bdbb8b2c7177a0112dab", - "sha256:64a83ab6f54496ab968a6f21a41a620afe0a742573d609fd03dcab7210645153", - "sha256:6a76d7821ae43df8a0e814cca32114875916b9fc2158603b364853de37eb9002", - "sha256:7abfd84a362e4411f7c5f5758c18cbf377a2a2be64b9232e78544d75640c677e", - "sha256:7cc5ee80b2d5ee8f59a761741cfb916a068c97cac5e700c8ce01e1927616aa2f", - "sha256:91662b27f5aa8a6d2ad63be9a7d1a403e07bf3c2c5b265a7cc5cbadf6f988e06", - "sha256:919cadcedad552e78349d1626115cfd246fc03ad469a4a62c91a12204f0f0d85", - "sha256:9eace1e5420abc4f9e76de01e49caca349b7c80bda9c1643193e23a06c2a332c", - "sha256:adc25aa8cfc537373dd46ae97863f16fd955edee14bf54d3eb52bde4e4ac8c7b", - "sha256:bf2ea67eaa1fff0aecef6da881144f0f91e314b4123491f9a4fa8df0598e48fe", - "sha256:c10b2f6bcbaa9aa51fe08207654100074786d423b03482c0cbe44406ca92d146", - "sha256:c391ec5c423a374a36b90f7c8805fdf51a0410a2b5be9cebd8990e0021cb6da4", - "sha256:c43ddcff251e8b427b3e414b026636617276e008a9d78a44a9195d4bdfcaa0fe", - "sha256:c825611a951baad63faeb9ef1517ef96a20202d6029ae2485b729152cc703fab", - "sha256:c91772cf6808cc2d80279e80b491c48cb688797b6d914ff624ca95d855c24ee5", - "sha256:cf30b5e03d974874185b989839c396d799f6e2d4b4d5b2d8bd3ba464eb3cc33f", - "sha256:ef25d682d0d9ab25c5022a298b5cba9084c7b148a3e71846df2c67ea664eacc7", - "sha256:f35ccfa44a1dd267e392cd76d8525cfcfabee61dd070e15ad2119c54c0c31ddf", - "sha256:fbe09e3ae95f47c7551a24781d2e348974cde4a0b33bc3b1566f6216479db2b1", - "sha256:fe2b8c464ba335e71aed74f830bf2b2881913f8905d166f9c0fe06ca44a1cb5e", - "sha256:ff0826f3886e85708a0e8ef7ec47020723b998cfed6ae47962d915fcb89ec780" + "sha256:08c809e9f4be8d4f9948cf4d5ebc7431bbd9e1c0cd5ff478d0d5229f1bc4ad50", + "sha256:097095a7c24b9e7eec865417f620f78adf218162c03b68e4fde194bf87801a67", + "sha256:0981e8071d2ee457d842821438f833e362eed9a25a445d54ad7610b24293118f", + "sha256:1bd9d158afa33dca04748b23e7b9d4055f8c8015ace2e972a866519af02d5eed", + "sha256:1f6c370abf11546b1c9b70062918d601ac8fb7ff113554601b43175eed7480ef", + "sha256:2595b7be43b8b2da953ea3506a8d71c07fc9b479d5c118b0c44a5eca2a1664f6", + "sha256:2d173a5db4e306cd32558b1a3ceb45bd2ebeb6596525fd5945963798b3851e3d", + "sha256:33c06d6819a0204fac675d100f92aa472c704cd774a47171a5949c75c1040ed6", + "sha256:3559da56e1045ad567e69fcc74245073fe1943b07b137bfd1073c7a540a89df7", + "sha256:3bfa2936f8391bfaa17ed6a5c726e33acad56d7b47b8bf824b1908b16b140025", + "sha256:4361881388817f89aa819a553e987200a6eb664df995632b063997dd373a7cee", + "sha256:43af464dcac1ae53e6e14a0ae6f08373b538f3c49fb9e426423618571edfecff", + "sha256:44097663c62b3aa03b5b403b816dedafa592984e8c6857a061ade41f32a2666e", + "sha256:4cbaea8ab8bfa283e6219af39624d921f72f8174765a35416aab4d4b4dec370e", + "sha256:5b0fd9fc81d43cd54dc8e4b2df8730ffd1e34f1f0679920deae16f6487aa1414", + "sha256:676d9f4286f490612fa35ca8fe4b1fced8ff18e653abc1dda34fbf166129d6c2", + "sha256:79ad48096ceb5c714fbc4dc82e3e6b37f095f627b1fef90d94d85e19a19d6611", + "sha256:83379f1fd7b99c4993f5e905f2a6ddb9003996655151ea3c2ee77332ad009d08", + "sha256:88dc997e3e9199a0d67b547fba36c6d1c54fca7d83c4bfe0d3f34f55a4717a2c", + "sha256:8c5b97953130ff76500c6e8e159f2b881c737ebf00034006517b57f382d5317c", + "sha256:922e9dac0166e4617e5c7980d2cff6912a6eb5cb5c13e7ece222438650bd7f66", + "sha256:9c037aaf6affc8f7c4f6f9f6279aa57dd526734246fb5221a0fff3124f57e0b1", + "sha256:a896b41c518269c1cceb582e298a868e6c74bb3cbfd362865ea686c78aebe91d", + "sha256:b1a6f17c4ad896ed628663b021cd797b098c7e9537fd259958f6ffb3b8921081", + "sha256:b5ddaee74e1f06af9c0765a147904dddacf4ca9707f8f079e14e2b14b4f5a544", + "sha256:d55374ebc36de7a3217f2e2318886f0801dd5e486e21aba1fc4ca08e3b6637d7", + "sha256:ddac6a092b97aa11d2a21aec33e941b4453ef774da3d98f2b7c1e01da05e6d5e", + "sha256:de9832ac3c51484fe1757ca4303695423b16cfa54874dae9239bf41f50a2affa", + "sha256:e42a82c63952ed70be3c13782c6984a519b514e6b10108a9647c7576b6c86650", + "sha256:ea8e83bf4731a2369350d7771a1f2ef8d72ad3da70a37d86b1374be8c675abd0" ], "index": "pypi", - "version": "==3.11.0" + "version": "==3.12.0" }, "pydash": { "hashes": [ @@ -499,13 +499,6 @@ } }, "develop": { - "appdirs": { - "hashes": [ - "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", - "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" - ], - "version": "==1.4.4" - }, "async-generator": { "hashes": [ "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b", @@ -524,11 +517,11 @@ }, "black": { "hashes": [ - "sha256:09a9dcb7c46ed496a9850b76e4e825d6049ecd38b611f1224857a79bd985a8cf", - "sha256:68950ffd4d9169716bcb8719a56c07a2f4485354fec061cdd5910aa07369731c" + "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3", + "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f" ], "index": "pypi", - "version": "==19.3b0" + "version": "==21.12b0" }, "click": { "hashes": [ @@ -738,6 +731,13 @@ "markers": "python_version >= '3.6'", "version": "==21.3" }, + "pathspec": { + "hashes": [ + "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a", + "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1" + ], + "version": "==0.9.0" + }, "pillow": { "hashes": [ "sha256:066f3999cb3b070a95c3652712cffa1a748cd02d60ad7b4e485c3748a04d9d76", @@ -785,6 +785,14 @@ "index": "pypi", "version": "==8.4.0" }, + "platformdirs": { + "hashes": [ + "sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2", + "sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d" + ], + "markers": "python_version >= '3.6'", + "version": "==2.4.0" + }, "pluggy": { "hashes": [ "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", diff --git a/ebl/corpus/application/corpus.py b/ebl/corpus/application/corpus.py index 2753d1b93..b553825c4 100644 --- a/ebl/corpus/application/corpus.py +++ b/ebl/corpus/application/corpus.py @@ -12,6 +12,7 @@ from ebl.corpus.application.text_validator import TextValidator from ebl.corpus.domain.alignment import Alignment from ebl.corpus.domain.chapter import Chapter, ChapterId +from ebl.corpus.domain.chapter_display import ChapterDisplay from ebl.corpus.domain.chapter_info import ChapterInfo from ebl.corpus.domain.lines_update import LinesUpdate from ebl.corpus.domain.manuscript import Manuscript @@ -43,6 +44,10 @@ def find(self, id_: TextId) -> Text: def find_chapter(self, id_: ChapterId) -> Chapter: ... + @abstractmethod + def find_chapter_for_display(self, id_: ChapterId) -> ChapterDisplay: + ... + @abstractmethod def list(self) -> List[Text]: ... @@ -86,6 +91,9 @@ def find_chapter(self, id_: ChapterId) -> Chapter: chapter = self._repository.find_chapter(id_) return self._hydrate_references(chapter) + def find_chapter_for_display(self, id_: ChapterId) -> ChapterDisplay: + return self._repository.find_chapter_for_display(id_) + def find_manuscripts(self, id_: ChapterId) -> Sequence[Manuscript]: return self._hydrate_manuscripts( self._repository.query_manuscripts_by_chapter(id_) diff --git a/ebl/corpus/application/display_schemas.py b/ebl/corpus/application/display_schemas.py new file mode 100644 index 000000000..076b7fef8 --- /dev/null +++ b/ebl/corpus/application/display_schemas.py @@ -0,0 +1,46 @@ +from marshmallow import Schema, fields, post_load + +from ebl.corpus.application.id_schemas import ChapterIdSchema +from ebl.corpus.domain.chapter_display import ChapterDisplay, LineDisplay +from ebl.transliteration.application.line_number_schemas import OneOfLineNumberSchema +from ebl.transliteration.application.note_line_part_schemas import ( + OneOfNoteLinePartSchema, +) +from ebl.transliteration.application.token_schemas import OneOfTokenSchema + + +class LineDisplaySchema(Schema): + number = fields.Nested(OneOfLineNumberSchema, required=True) + intertext = fields.List( + fields.Nested(OneOfNoteLinePartSchema), load_default=tuple() + ) + reconstruction = fields.List(fields.Nested(OneOfTokenSchema), load_default=tuple()) + translation = fields.List( + fields.Nested(OneOfNoteLinePartSchema), load_default=tuple() + ) + + @post_load + def make_line(self, data: dict, **kwargs) -> LineDisplay: + return LineDisplay( + data["number"], + tuple(data["intertext"]), + tuple(data["reconstruction"]), + tuple(data["translation"]), + ) + + +class ChapterDisplaySchema(Schema): + id_ = fields.Nested(ChapterIdSchema, required=True, data_key="id") + text_name = fields.String(required=True, data_key="textName") + is_single_stage = fields.Boolean(required=True, data_key="isSingleStage") + title = fields.List(fields.Nested(OneOfNoteLinePartSchema), dump_only=True) + lines = fields.Nested(LineDisplaySchema, many=True, required=True) + + @post_load + def make_chapter(self, data: dict, **kwargs) -> ChapterDisplay: + return ChapterDisplay( + data["id_"], + data["text_name"], + data["is_single_stage"], + tuple(data["lines"]), + ) diff --git a/ebl/corpus/application/id_schemas.py b/ebl/corpus/application/id_schemas.py index 6af20ffe3..97e137c2b 100644 --- a/ebl/corpus/application/id_schemas.py +++ b/ebl/corpus/application/id_schemas.py @@ -1,5 +1,7 @@ -from marshmallow import fields, post_load, Schema, validate +from marshmallow import Schema, fields, post_load, validate +from ebl.corpus.domain.chapter import ChapterId +from ebl.corpus.domain.stage import Stage from ebl.corpus.domain.text_id import TextId from ebl.schemas import ValueEnum from ebl.transliteration.domain.genre import Genre @@ -13,3 +15,13 @@ class TextIdSchema(Schema): @post_load def make_id(self, data, **kwargs) -> TextId: return TextId(data["genre"], data["category"], data["index"]) + + +class ChapterIdSchema(Schema): + text_id = fields.Nested(TextIdSchema, required=True, data_key="textId") + stage = ValueEnum(Stage, required=True) + name = fields.String(required=True, validate=validate.Length(min=1)) + + @post_load + def make_id(self, data, **kwargs) -> ChapterId: + return ChapterId(data["text_id"], data["stage"], data["name"]) diff --git a/ebl/corpus/domain/chapter.py b/ebl/corpus/domain/chapter.py index 624eacb2d..223344851 100644 --- a/ebl/corpus/domain/chapter.py +++ b/ebl/corpus/domain/chapter.py @@ -12,6 +12,11 @@ from ebl.errors import NotFoundError from ebl.fragmentarium.domain.museum_number import MuseumNumber from ebl.merger import Merger +from ebl.transliteration.domain.markup import MarkupPart, to_title +from ebl.transliteration.domain.translation_line import ( + DEFAULT_LANGUAGE, + TranslationLine, +) from ebl.transliteration.domain.text_line import TextLine from ebl.transliteration.domain.transliteration_query import TransliterationQuery import ebl.corpus.domain.chapter_validators as validators @@ -59,6 +64,17 @@ def __str__(self) -> str: return f"{self.text_id} {self.stage.abbreviation} {self.name}" +def make_title(translation: Sequence[TranslationLine]) -> Sequence[MarkupPart]: + return next( + ( + to_title(line.parts) + for line in translation + if line.language == DEFAULT_LANGUAGE + ), + tuple(), + ) + + @attr.s(auto_attribs=True, frozen=True) class Chapter: text_id: TextId diff --git a/ebl/corpus/domain/chapter_display.py b/ebl/corpus/domain/chapter_display.py new file mode 100644 index 000000000..b25376c5c --- /dev/null +++ b/ebl/corpus/domain/chapter_display.py @@ -0,0 +1,68 @@ +from typing import Iterable, Sequence +import attr + +from ebl.corpus.domain.chapter import ChapterId, Chapter +from ebl.corpus.domain.line import Line +from ebl.corpus.domain.text import Text +from ebl.transliteration.domain.translation_line import ( + DEFAULT_LANGUAGE, + TranslationLine, +) +from ebl.transliteration.domain.line_number import AbstractLineNumber +from ebl.transliteration.domain.tokens import Token +from ebl.transliteration.domain.markup import MarkupPart, to_title + + +def get_default_translation( + translations: Iterable[TranslationLine], +) -> Sequence[MarkupPart]: + return next( + ( + translation_line.parts + for translation_line in translations + if translation_line.language == DEFAULT_LANGUAGE + ), + tuple(), + ) + + +@attr.s(frozen=True, auto_attribs=True) +class LineDisplay: + number: AbstractLineNumber + intertext: Sequence[MarkupPart] + reconstruction: Sequence[Token] + translation: Sequence[MarkupPart] + + @property + def title(self) -> Sequence[MarkupPart]: + return to_title(self.translation) + + @staticmethod + def of_line(line: Line) -> "LineDisplay": + return LineDisplay( + line.number, + line.variants[0].intertext, + line.variants[0].reconstruction, + get_default_translation(line.translation), + ) + + +@attr.s(frozen=True, auto_attribs=True) +class ChapterDisplay: + id_: ChapterId + text_name: str + is_single_stage: bool + lines: Sequence[LineDisplay] + + @property + def title(self) -> Sequence[MarkupPart]: + return self.lines[0].title if self.lines else tuple() + + @staticmethod + def of_chapter(text: Text, chapter: Chapter) -> "ChapterDisplay": + return ChapterDisplay( + chapter.id_, + text.name, + not text.has_multiple_stages, + tuple(map(LineDisplay.of_line, chapter.lines)), + ) diff --git a/ebl/corpus/domain/text.py b/ebl/corpus/domain/text.py index ef88bddd6..f6c77f7b7 100644 --- a/ebl/corpus/domain/text.py +++ b/ebl/corpus/domain/text.py @@ -1,17 +1,14 @@ from typing import Sequence import attr -import pydash from ebl.bibliography.domain.reference import Reference +from ebl.corpus.domain.chapter import make_title from ebl.corpus.domain.stage import Stage from ebl.corpus.domain.text_id import TextId from ebl.transliteration.domain.genre import Genre from ebl.transliteration.domain.markup import MarkupPart -from ebl.transliteration.domain.translation_line import ( - DEFAULT_LANGUAGE, - TranslationLine, -) +from ebl.transliteration.domain.translation_line import TranslationLine from ebl.fragmentarium.domain.museum_number import MuseumNumber @@ -30,15 +27,7 @@ class ChapterListing: @property def title(self) -> Sequence[MarkupPart]: - return ( - pydash.chain(self.translation) - .filter(lambda line: line.language == DEFAULT_LANGUAGE) - .map(lambda line: line.rstrip().title_case()) - .map(lambda line: line.parts) - .head() - .value() - or tuple() - ) + return make_title(self.translation) @attr.s(auto_attribs=True, frozen=True) @@ -56,3 +45,7 @@ class Text: @property def id(self) -> TextId: return TextId(self.genre, self.category, self.index) + + @property + def has_multiple_stages(self) -> bool: + return len({chapter.stage for chapter in self.chapters}) > 1 diff --git a/ebl/corpus/infrastructure/mongo_text_repository.py b/ebl/corpus/infrastructure/mongo_text_repository.py index b197164ac..27f6e1c9c 100644 --- a/ebl/corpus/infrastructure/mongo_text_repository.py +++ b/ebl/corpus/infrastructure/mongo_text_repository.py @@ -4,16 +4,22 @@ from ebl.bibliography.infrastructure.bibliography import join_reference_documents from ebl.corpus.application.corpus import TextRepository +from ebl.corpus.application.display_schemas import ChapterDisplaySchema +from ebl.corpus.application.schemas import ChapterSchema, ManuscriptSchema, TextSchema from ebl.corpus.domain.chapter import Chapter, ChapterId +from ebl.corpus.domain.chapter_display import ChapterDisplay from ebl.corpus.domain.manuscript import Manuscript from ebl.corpus.domain.text import Text, TextId from ebl.corpus.infrastructure.collections import CHAPTERS_COLLECTION, TEXTS_COLLECTION -from ebl.corpus.infrastructure.queries import chapter_id_query, join_chapters +from ebl.corpus.infrastructure.queries import ( + aggregate_chapter_display, + chapter_id_query, + join_chapters, +) from ebl.errors import NotFoundError +from ebl.fragmentarium.infrastructure.queries import is_in_fragmentarium, join_joins from ebl.mongo_collection import MongoCollection from ebl.transliteration.domain.transliteration_query import TransliterationQuery -from ebl.corpus.application.schemas import ChapterSchema, ManuscriptSchema, TextSchema -from ebl.fragmentarium.infrastructure.queries import is_in_fragmentarium, join_joins def text_not_found(id_: TextId) -> Exception: @@ -101,6 +107,22 @@ def find_chapter(self, id_: ChapterId) -> Chapter: except NotFoundError: raise chapter_not_found(id_) + def find_chapter_for_display(self, id_: ChapterId) -> ChapterDisplay: + try: + text = self.find(id_.text_id) + chapters = self._chapters.aggregate(aggregate_chapter_display(id_)) + return ChapterDisplaySchema().load( + { + **next(chapters), + "textName": text.name, + "isSingleStage": not text.has_multiple_stages, + } + ) + except NotFoundError as error: + raise text_not_found(id_.text_id) from error + except StopIteration as error: + raise chapter_not_found(id_) from error + def list(self) -> List[Text]: return TextSchema().load( self._texts.aggregate( diff --git a/ebl/corpus/infrastructure/queries.py b/ebl/corpus/infrastructure/queries.py index 85c614ae5..7d8ed678c 100644 --- a/ebl/corpus/infrastructure/queries.py +++ b/ebl/corpus/infrastructure/queries.py @@ -3,6 +3,7 @@ from ebl.corpus.domain.chapter import ChapterId from ebl.corpus.infrastructure.collections import CHAPTERS_COLLECTION from ebl.fragmentarium.infrastructure.queries import is_in_fragmentarium +from ebl.transliteration.domain.translation_line import DEFAULT_LANGUAGE def chapter_id_query(id_: ChapterId) -> dict: @@ -107,3 +108,81 @@ def join_chapters(include_uncertain_fragmnets: bool) -> List[dict]: }, {"$project": {"_id": 0}}, ] + + +def aggregate_chapter_display(id_: ChapterId) -> List[dict]: + return [ + {"$match": chapter_id_query(id_)}, + { + "$addFields": { + "id": { + "textId": "$textId", + "stage": "$stage", + "name": "$name", + }, + "lines": { + "$map": { + "input": "$lines", + "as": "line", + "in": { + "number": "$$line.number", + "translation": { + "$arrayElemAt": [ + { + "$map": { + "input": { + "$filter": { + "input": "$$line.translation", + "as": "translation", + "cond": { + "eq": [ + "$$translation.language", + DEFAULT_LANGUAGE, + ] + }, + } + }, + "as": "en_translation", + "in": "$$en_translation.parts", + } + }, + 0, + ] + }, + "intertext": { + "$arrayElemAt": [ + { + "$map": { + "input": "$$line.variants", + "as": "variant", + "in": "$$variant.intertext", + } + }, + 0, + ] + }, + "reconstruction": { + "$arrayElemAt": [ + { + "$map": { + "input": "$$line.variants", + "as": "variant", + "in": "$$variant.reconstruction", + } + }, + 0, + ] + }, + }, + } + }, + } + }, + { + "$project": { + "_id": False, + "id": True, + "lines": True, + } + }, + ] diff --git a/ebl/corpus/web/bootstrap.py b/ebl/corpus/web/bootstrap.py index 35e8978ad..66941c537 100644 --- a/ebl/corpus/web/bootstrap.py +++ b/ebl/corpus/web/bootstrap.py @@ -3,7 +3,7 @@ from ebl.context import Context from ebl.corpus.application.corpus import Corpus from ebl.corpus.web.alignments import AlignmentResource -from ebl.corpus.web.chapters import ChaptersResource +from ebl.corpus.web.chapters import ChaptersDisplayResource, ChaptersResource from ebl.corpus.web.colophons import ColophonsResource from ebl.corpus.web.extant_lines import ExtantLinesResource from ebl.corpus.web.lemmatizations import LemmatizationResource @@ -31,6 +31,7 @@ def create_corpus_routes(api: falcon.API, context: Context): corpus, TransliterationQueryFactory(context.sign_repository) ) chapters = ChaptersResource(corpus) + chapters_display = ChaptersDisplayResource(corpus) alignment = AlignmentResource(corpus) manuscript_lemmatization = LemmatizationResource(corpus) manuscript = ManuscriptsResource(corpus) @@ -40,36 +41,29 @@ def create_corpus_routes(api: falcon.API, context: Context): unplaced_lines = UnplacedLinesResource(corpus) extant_lines = ExtantLinesResource(corpus) + text_url = "/texts/{genre}/{category}/{index}" + chapter_url = text_url + "/chapters/{stage}/{name}" + api.add_route("/texts", texts) api.add_route("/textsearch", text_search) - api.add_route("/texts/{genre}/{category}/{index}", text) - api.add_route("/texts/{genre}/{category}/{index}/chapters/{stage}/{name}", chapters) - api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/alignment", alignment - ) - api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/lemmatization", - manuscript_lemmatization, - ) - api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/manuscripts", - manuscript, - ) + api.add_route(text_url, text) + api.add_route(chapter_url, chapters) + api.add_route(f"{chapter_url}/display", chapters_display) + api.add_route(f"{chapter_url}/alignment", alignment) + api.add_route(f"{chapter_url}/lemmatization", manuscript_lemmatization) api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/lines", lines - ) - api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/import", lines_import - ) - api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/colophons", colophons + f"{chapter_url}/manuscripts", + manuscript, ) + api.add_route(f"{chapter_url}/lines", lines) + api.add_route(f"{chapter_url}/import", lines_import) + api.add_route(f"{chapter_url}/colophons", colophons) api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/unplaced_lines", + f"{chapter_url}/unplaced_lines", unplaced_lines, ) api.add_route( - "/texts/{genre}/{category}/{index}/chapters/{stage}/{name}/extant_lines", + f"{chapter_url}/extant_lines", extant_lines, ) diff --git a/ebl/corpus/web/chapter_info_schema.py b/ebl/corpus/web/chapter_info_schema.py index 0e960351d..79ba62755 100644 --- a/ebl/corpus/web/chapter_info_schema.py +++ b/ebl/corpus/web/chapter_info_schema.py @@ -1,15 +1,7 @@ from marshmallow import Schema, fields -from ebl.corpus.application.id_schemas import TextIdSchema -from ebl.corpus.domain.stage import Stage +from ebl.corpus.application.id_schemas import ChapterIdSchema from ebl.corpus.web.chapter_schemas import ApiLineSchema -from ebl.schemas import ValueEnum - - -class ChapterIdSchema(Schema): - text_id = fields.Nested(TextIdSchema, data_key="textId") - stage = ValueEnum(Stage) - name = fields.String() class LineSchema(Schema): diff --git a/ebl/corpus/web/chapters.py b/ebl/corpus/web/chapters.py index 7915ee5f9..9e5a3d17b 100644 --- a/ebl/corpus/web/chapters.py +++ b/ebl/corpus/web/chapters.py @@ -1,8 +1,9 @@ import falcon from ebl.corpus.application.corpus import Corpus -from ebl.corpus.web.text_utils import create_chapter_id +from ebl.corpus.application.display_schemas import ChapterDisplaySchema from ebl.corpus.web.chapter_schemas import ApiChapterSchema +from ebl.corpus.web.text_utils import create_chapter_id from ebl.users.web.require_scope import require_scope @@ -24,3 +25,23 @@ def on_get( chapter_id = create_chapter_id(genre, category, index, stage, name) chapter = self._corpus.find_chapter(chapter_id) resp.media = ApiChapterSchema().dump(chapter) + + +class ChaptersDisplayResource: + def __init__(self, corpus: Corpus): + self._corpus = corpus + + @falcon.before(require_scope, "read:texts") + def on_get( + self, + _, + resp: falcon.Response, + genre: str, + category: str, + index: str, + stage: str, + name: str, + ) -> None: + chapter_id = create_chapter_id(genre, category, index, stage, name) + chapter = self._corpus.find_chapter_for_display(chapter_id) + resp.media = ChapterDisplaySchema().dump(chapter) diff --git a/ebl/fragmentarium/application/fragment_repository.py b/ebl/fragmentarium/application/fragment_repository.py index 789c6300a..c47236082 100644 --- a/ebl/fragmentarium/application/fragment_repository.py +++ b/ebl/fragmentarium/application/fragment_repository.py @@ -44,7 +44,7 @@ def query_random_by_transliterated(self) -> List[Fragment]: ... @abstractmethod - def query_path_of_the_pioneers(self,) -> List[Fragment]: + def query_path_of_the_pioneers(self) -> List[Fragment]: ... @abstractmethod @@ -52,7 +52,7 @@ def query_by_transliterated_sorted_by_date(self) -> List[Fragment]: ... @abstractmethod - def query_by_transliterated_not_revised_by_other(self,) -> List[FragmentInfo]: + def query_by_transliterated_not_revised_by_other(self) -> List[FragmentInfo]: ... @abstractmethod @@ -64,7 +64,9 @@ def query_transliterated_numbers(self) -> List[MuseumNumber]: ... @abstractmethod - def query_transliterated_line_to_vec(self,) -> List[LineToVecEntry]: + def query_transliterated_line_to_vec( + self, + ) -> List[LineToVecEntry]: ... @abstractmethod diff --git a/ebl/fragmentarium/infrastructure/fragment_repository.py b/ebl/fragmentarium/infrastructure/fragment_repository.py index 2a997a6c9..f3588552a 100644 --- a/ebl/fragmentarium/infrastructure/fragment_repository.py +++ b/ebl/fragmentarium/infrastructure/fragment_repository.py @@ -159,7 +159,7 @@ def query_transliterated_numbers(self): fragment["museumNumber"] for fragment in cursor ) - def query_transliterated_line_to_vec(self,) -> List[LineToVecEntry]: + def query_transliterated_line_to_vec(self) -> List[LineToVecEntry]: cursor = self._fragments.find_many(HAS_TRANSLITERATION, {"text": False}) return [ LineToVecEntry( diff --git a/ebl/tests/corpus/test_chapter_display.py b/ebl/tests/corpus/test_chapter_display.py new file mode 100644 index 000000000..4db01ebd1 --- /dev/null +++ b/ebl/tests/corpus/test_chapter_display.py @@ -0,0 +1,42 @@ +from ebl.corpus.domain.chapter import make_title +from ebl.corpus.domain.chapter_display import ChapterDisplay, LineDisplay +from ebl.tests.factories.corpus import ChapterFactory, LineFactory, TextFactory +from ebl.transliteration.domain.markup import StringPart +from ebl.transliteration.domain.translation_line import ( + TranslationLine, + DEFAULT_LANGUAGE, +) + + +def test_line_display_of_line() -> None: + expected_translation = (StringPart("foo"),) + translation_lines = ( + TranslationLine((StringPart("bar"),), "de", None), + TranslationLine(expected_translation, DEFAULT_LANGUAGE, None), + ) + line = LineFactory.build(translation=translation_lines) + + line_display = LineDisplay.of_line(line) + + assert line_display == LineDisplay( + line.number, + line.variants[0].intertext, + line.variants[0].reconstruction, + expected_translation, + ) + assert line_display.title == make_title(translation_lines) + + +def test_chapter_display_of_chapter() -> None: + text = TextFactory.build() + chapter = ChapterFactory.build() + + chapter_display = ChapterDisplay.of_chapter(text, chapter) + + assert chapter_display == ChapterDisplay( + chapter.id_, + text.name, + not text.has_multiple_stages, + tuple(LineDisplay.of_line(line) for line in chapter.lines), + ) + assert chapter_display.title == make_title(chapter.lines[0].translation) diff --git a/ebl/tests/corpus/test_chapter_display_route.py b/ebl/tests/corpus/test_chapter_display_route.py new file mode 100644 index 000000000..a0d2820bd --- /dev/null +++ b/ebl/tests/corpus/test_chapter_display_route.py @@ -0,0 +1,56 @@ +import falcon +import pytest + +from ebl.corpus.application.display_schemas import ChapterDisplaySchema +from ebl.corpus.domain.chapter import Chapter +from ebl.corpus.domain.chapter_display import ChapterDisplay +from ebl.corpus.domain.text import Text +from ebl.tests.corpus.support import create_chapter_url +from ebl.tests.factories.corpus import ChapterFactory, TextFactory + + +@pytest.fixture +def chapter() -> Chapter: + return ChapterFactory.build() + + +@pytest.fixture +def text(chapter: Chapter) -> Text: + return TextFactory.build( + genre=chapter.text_id.genre, + category=chapter.text_id.category, + index=chapter.text_id.index, + ) + + +@pytest.fixture +def url(chapter: Chapter) -> str: + return create_chapter_url(chapter, "/display") + + +def test_get(client, text_repository, text, chapter, url): + text_repository.create(text) + text_repository.create_chapter(chapter) + chapter_display = ChapterDisplay.of_chapter(text, chapter) + + get_result = client.simulate_get(url) + + assert get_result.status == falcon.HTTP_OK + assert get_result.headers["Access-Control-Allow-Origin"] == "*" + assert get_result.json == ChapterDisplaySchema().dump(chapter_display) + + +def test_text_not_found(client, text_repository, text, chapter, url): + text_repository.create(text) + + result = client.simulate_get(url) + + assert result.status == falcon.HTTP_NOT_FOUND + + +def test_chapter_not_found(client, text_repository, chapter, url): + text_repository.create_chapter(chapter) + + result = client.simulate_get(url) + + assert result.status == falcon.HTTP_NOT_FOUND diff --git a/ebl/tests/corpus/test_chapter_display_schema b/ebl/tests/corpus/test_chapter_display_schema new file mode 100644 index 000000000..b8cdc03da --- /dev/null +++ b/ebl/tests/corpus/test_chapter_display_schema @@ -0,0 +1,56 @@ +from ebl.corpus.application.display_schemas import ChapterDisplaySchema +from ebl.corpus.domain.chapter_display import ChapterDisplay +from ebl.tests.factories.corpus import ( + ChapterFactory, + TextFactory, +) +from ebl.transliteration.application.line_number_schemas import OneOfLineNumberSchema +from ebl.transliteration.application.note_line_part_schemas import ( + OneOfNoteLinePartSchema, +) +from ebl.transliteration.application.token_schemas import OneOfTokenSchema + + +CHAPTER_DISPLAY = ChapterDisplay.of_chapter(TextFactory.build(), ChapterFactory.build()) + + +def to_dict(chapter: ChapterDisplay) -> dict: + return { + "id": { + "stage": chapter.id_.stage.value, + "name": chapter.id_.name, + "textId": { + "genre": chapter.id_.text_id.genre.value, + "category": chapter.id_.text_id.category, + "index": chapter.id_.text_id.index, + }, + }, + "textName": chapter.text_name, + "isSingleStage": chapter.is_single_stage, + "lines": [ + { + "number": OneOfLineNumberSchema().dump(line.number), + "intertext": OneOfNoteLinePartSchema().dump( + line.intertext, many=True + ), + "reconstruction": OneOfTokenSchema().dump( + line.reconstruction, many=True + ), + "translation": OneOfNoteLinePartSchema().dump( + line.translation, many=True + ), + } + for line in chapter.lines + ], + } + + +def test_dump(): + assert ChapterDisplaySchema().dump(CHAPTER_DISPLAY) == { + **to_dict(CHAPTER_DISPLAY), + "title": OneOfNoteLinePartSchema().dump(CHAPTER_DISPLAY.title, many=True), + } + + +def test_load(): + assert ChapterDisplaySchema().load(to_dict(CHAPTER_DISPLAY)) == CHAPTER_DISPLAY diff --git a/ebl/tests/corpus/test_corpus.py b/ebl/tests/corpus/test_corpus.py index 99f2f1136..11d02e749 100644 --- a/ebl/tests/corpus/test_corpus.py +++ b/ebl/tests/corpus/test_corpus.py @@ -7,7 +7,9 @@ ) from ebl.corpus.application.schemas import ChapterSchema from ebl.corpus.domain.alignment import Alignment, ManuscriptLineAlignment +from ebl.corpus.domain.chapter_display import ChapterDisplay from ebl.corpus.domain.line import Line, LineVariant, ManuscriptLine +from ebl.corpus.domain.lines_update import LinesUpdate from ebl.corpus.domain.parser import parse_chapter from ebl.dictionary.domain.word import WordId from ebl.errors import DataError, Defect, NotFoundError @@ -25,7 +27,7 @@ from ebl.transliteration.domain.text_line import TextLine from ebl.transliteration.domain.tokens import Joiner, LanguageShift, ValueToken from ebl.transliteration.domain.word_tokens import Word -from ebl.corpus.domain.lines_update import LinesUpdate + CHAPTERS_COLLECTION = "chapters" TEXT = TextFactory.build() @@ -133,6 +135,15 @@ def test_find_chapter(corpus, text_repository, bibliography, when) -> None: assert corpus.find_chapter(CHAPTER.id_) == CHAPTER +def test_find_chapter_for_display(corpus, text_repository, when) -> None: + chapter_display = ChapterDisplay.of_chapter(TEXT, CHAPTER) + when(text_repository).find_chapter_for_display(CHAPTER.id_).thenReturn( + chapter_display + ) + + assert corpus.find_chapter_for_display(CHAPTER.id_) == chapter_display + + def test_find_manuscripts(corpus, text_repository, bibliography, when) -> None: expect_bibliography(bibliography, when) when(text_repository).query_manuscripts_by_chapter(CHAPTER.id_).thenReturn( @@ -760,9 +771,7 @@ def test_merging_lines( def test_update_lines_raises_exception_if_invalid_signs( corpus, text_repository, bibliography, when ) -> None: - lines = LinesUpdate( - [], set(), {index: line for index, line in enumerate(CHAPTER.lines)} - ) + lines = LinesUpdate([], set(), dict(enumerate(CHAPTER.lines))) when(text_repository).find_chapter(CHAPTER.id_).thenReturn( CHAPTER_WITHOUT_DOCUMENTS ) diff --git a/ebl/tests/corpus/test_corpus_text.py b/ebl/tests/corpus/test_corpus_text.py index cb32ccf17..45c730f29 100644 --- a/ebl/tests/corpus/test_corpus_text.py +++ b/ebl/tests/corpus/test_corpus_text.py @@ -1,4 +1,4 @@ -from ebl.corpus.domain.chapter import Classification +from ebl.corpus.domain.chapter import Classification, make_title from ebl.corpus.domain.stage import Stage from ebl.corpus.domain.text import ChapterListing, Text, UncertainFragment from ebl.corpus.domain.text_id import TextId @@ -44,4 +44,4 @@ def test_text_constructor_sets_correct_fields() -> None: def test_title() -> None: - assert CHAPTER.title == (StringPart("The Title"),) + assert CHAPTER.title == make_title(TRANSLATION) diff --git a/ebl/tests/corpus/test_make_title.py b/ebl/tests/corpus/test_make_title.py new file mode 100644 index 000000000..22e85bc34 --- /dev/null +++ b/ebl/tests/corpus/test_make_title.py @@ -0,0 +1,12 @@ +from ebl.corpus.domain.chapter import make_title +from ebl.transliteration.domain.markup import StringPart +from ebl.transliteration.domain.translation_line import TranslationLine + +TRANSLATION = ( + TranslationLine([StringPart("not the title")], "de"), + TranslationLine([StringPart("the title,")], "en"), +) + + +def test_make_title() -> None: + assert make_title(TRANSLATION) == (StringPart("The Title"),) diff --git a/ebl/tests/corpus/test_mongo_text_repository.py b/ebl/tests/corpus/test_mongo_text_repository.py index 7f2f2a8db..ece558b66 100644 --- a/ebl/tests/corpus/test_mongo_text_repository.py +++ b/ebl/tests/corpus/test_mongo_text_repository.py @@ -2,22 +2,23 @@ import pytest from ebl.corpus.application.schemas import ChapterSchema, TextSchema +from ebl.corpus.domain.chapter_display import ChapterDisplay +from ebl.corpus.domain.text import UncertainFragment +from ebl.corpus.domain.text_id import TextId from ebl.errors import DuplicateError, NotFoundError +from ebl.fragmentarium.application.joins_schema import JoinSchema +from ebl.fragmentarium.domain.fragment import Fragment +from ebl.fragmentarium.domain.joins import Join, Joins +from ebl.fragmentarium.domain.museum_number import MuseumNumber from ebl.tests.factories.corpus import ( ChapterFactory, LineFactory, ManuscriptFactory, TextFactory, ) -from ebl.transliteration.domain.transliteration_query import TransliterationQuery -from ebl.transliteration.domain.genre import Genre -from ebl.corpus.domain.text_id import TextId -from ebl.fragmentarium.application.joins_schema import JoinSchema -from ebl.fragmentarium.domain.museum_number import MuseumNumber -from ebl.fragmentarium.domain.joins import Join, Joins from ebl.tests.factories.fragment import FragmentFactory -from ebl.fragmentarium.domain.fragment import Fragment -from ebl.corpus.domain.text import UncertainFragment +from ebl.transliteration.domain.genre import Genre +from ebl.transliteration.domain.transliteration_query import TransliterationQuery TEXTS_COLLECTION = "texts" @@ -137,6 +138,21 @@ def test_listing_texts(database, text_repository, bibliography_repository) -> No assert text_repository.list() == [TEXT, another_text] +def test_finding_chapter(database, text_repository) -> None: + when_chapter_in_collection(database) + + assert text_repository.find_chapter(CHAPTER.id_) == CHAPTER + + +def test_finding_chapter_for_display(database, text_repository) -> None: + when_text_in_collection(database) + when_chapter_in_collection(database) + + assert text_repository.find_chapter_for_display( + CHAPTER.id_ + ) == ChapterDisplay.of_chapter(TEXT, CHAPTER) + + def test_updating_chapter(database, text_repository) -> None: updated_chapter = attr.evolve( CHAPTER, lines=tuple(), manuscripts=tuple(), signs=tuple() diff --git a/ebl/tests/corpus/test_text.py b/ebl/tests/corpus/test_text.py new file mode 100644 index 000000000..ec338e672 --- /dev/null +++ b/ebl/tests/corpus/test_text.py @@ -0,0 +1,23 @@ +import pytest + +from ebl.corpus.domain.stage import Stage +from ebl.tests.factories.corpus import ChapterListingFactory, TextFactory + + +@pytest.mark.parametrize( # pyre-ignore[56] + "chapters,expected", + [ + (tuple(), False), + (ChapterListingFactory.build_batch(2, stage=Stage.NEO_ASSYRIAN), False), + ( + [ + ChapterListingFactory.build(stage=Stage.NEO_ASSYRIAN), + ChapterListingFactory.build(stage=Stage.OLD_ASSYRIAN), + ], + True, + ), + ], +) +def test_has_multiple_stages(chapters, expected) -> None: + text = TextFactory.build(chapters=chapters) + assert text.has_multiple_stages == expected diff --git a/ebl/tests/fragmentarium/test_fragment_repository.py b/ebl/tests/fragmentarium/test_fragment_repository.py index 8c85aaf93..e88c9cbd5 100644 --- a/ebl/tests/fragmentarium/test_fragment_repository.py +++ b/ebl/tests/fragmentarium/test_fragment_repository.py @@ -145,7 +145,7 @@ def test_fragment_not_found(fragment_repository): fragment_repository.query_by_museum_number(MuseumNumber("unknown", "id")) -def test_find_random(fragment_repository,): +def test_find_random(fragment_repository): fragment = FragmentFactory.build() transliterated_fragment = TransliteratedFragmentFactory.build() for a_fragment in fragment, transliterated_fragment: diff --git a/ebl/tests/fragmentarium/test_retrieve_annotations.py b/ebl/tests/fragmentarium/test_retrieve_annotations.py index 93e4505b2..e1645e3eb 100644 --- a/ebl/tests/fragmentarium/test_retrieve_annotations.py +++ b/ebl/tests/fragmentarium/test_retrieve_annotations.py @@ -50,14 +50,17 @@ def test_create_annotations(photo_repository, when, photo): def test_from_relative_to_absolute_coordinates(): geometry = GeometryFactory.build(x=0, y=0, width=100, height=100) shape = (640, 480) - assert BoundingBox.from_relative_coordinates( - geometry.x, - geometry.y, - geometry.width, - geometry.height, - image_width=shape[0], - image_height=shape[1], - ) == (BoundingBox(0, 0, 640, 480)) + assert ( + BoundingBox.from_relative_coordinates( + geometry.x, + geometry.y, + geometry.width, + geometry.height, + image_width=shape[0], + image_height=shape[1], + ) + == (BoundingBox(0, 0, 640, 480)) + ) def test_write_annotations(tmp_path): diff --git a/ebl/tests/transliteration/test_markup.py b/ebl/tests/transliteration/test_markup.py index 2ba47847a..4a0ba92f6 100644 --- a/ebl/tests/transliteration/test_markup.py +++ b/ebl/tests/transliteration/test_markup.py @@ -1,3 +1,5 @@ +from typing import Sequence + import pytest from ebl.bibliography.domain.reference import Reference, ReferenceType, BibliographyId @@ -8,6 +10,9 @@ LanguagePart, MarkupPart, StringPart, + rstrip, + title_case, + to_title, ) from ebl.transliteration.domain.sign_tokens import Divider, Reading @@ -36,7 +41,7 @@ (BIBLIOGRAPHY_PART, BIBLIOGRAPHY_PART), ], ) -def test_rstrip(part: MarkupPart, expected: MarkupPart) -> None: +def test_part_rstrip(part: MarkupPart, expected: MarkupPart) -> None: assert part.rstrip() == expected @@ -49,5 +54,47 @@ def test_rstrip(part: MarkupPart, expected: MarkupPart) -> None: (BIBLIOGRAPHY_PART, BIBLIOGRAPHY_PART), ], ) -def test_title(part: MarkupPart, expected: MarkupPart) -> None: +def test_part_title_case(part: MarkupPart, expected: MarkupPart) -> None: assert part.title_case() == expected + + +@pytest.mark.parametrize( # pyre-ignore[56] + "parts,expected", + [ + (tuple(), tuple()), + ([StringPart("foo--")], (StringPart("foo"),)), + ( + [StringPart("foo--"), StringPart("foo--")], + (StringPart("foo--"), StringPart("foo")), + ), + ], +) +def test_rstrip(parts: Sequence[MarkupPart], expected: Sequence[MarkupPart]) -> None: + assert rstrip(parts) == expected + + +@pytest.mark.parametrize( # pyre-ignore[56] + "parts,expected", + [ + (tuple(), tuple()), + ( + [StringPart("foo bar")], + (StringPart("Foo Bar"),), + ), + ], +) +def test_title_case( + parts: Sequence[MarkupPart], expected: Sequence[MarkupPart] +) -> None: + assert title_case(parts) == expected + + +@pytest.mark.parametrize( # pyre-ignore[56] + "parts", + [ + tuple(), + [StringPart("foo-- bar--")], + ], +) +def test_to_title(parts: Sequence[MarkupPart]) -> None: + assert to_title(parts) == title_case(rstrip(parts)) diff --git a/ebl/tests/transliteration/test_text_schema.py b/ebl/tests/transliteration/test_text_schema.py index 25130f663..04efd59e9 100644 --- a/ebl/tests/transliteration/test_text_schema.py +++ b/ebl/tests/transliteration/test_text_schema.py @@ -71,10 +71,13 @@ def test_dump_line(): def test_load_line(lines): parser_version = "2.3.1" serialized_lines = OneOfLineSchema().dump(lines, many=True) - assert TextSchema().load( - { - "lines": serialized_lines, - "parser_version": parser_version, - "numberOfLines": 1, - } - ) == Text.of_iterable(lines).set_parser_version(parser_version) + assert ( + TextSchema().load( + { + "lines": serialized_lines, + "parser_version": parser_version, + "numberOfLines": 1, + } + ) + == Text.of_iterable(lines).set_parser_version(parser_version) + ) diff --git a/ebl/tests/transliteration/test_translation_line.py b/ebl/tests/transliteration/test_translation_line.py index e22ee6984..077cb728f 100644 --- a/ebl/tests/transliteration/test_translation_line.py +++ b/ebl/tests/transliteration/test_translation_line.py @@ -34,32 +34,3 @@ def test_parallel_fragment(parts, language, extent, prefix, translation) -> None assert line.translation == translation assert line.atf == Atf(f"{prefix}: {translation}") assert line.lemmatization == (LemmatizationToken(translation),) - - -@pytest.mark.parametrize( - "line,expected", - [ - (TranslationLine(tuple()), TranslationLine(tuple())), - (TranslationLine([StringPart("foo--")]), TranslationLine([StringPart("foo")])), - ( - TranslationLine([StringPart("foo--"), StringPart("foo--")]), - TranslationLine([StringPart("foo--"), StringPart("foo")]), - ), - ], -) -def test_rstrip(line: TranslationLine, expected: TranslationLine) -> None: - assert line.rstrip() == expected - - -@pytest.mark.parametrize( - "line,expected", - [ - (TranslationLine(tuple()), TranslationLine(tuple())), - ( - TranslationLine([StringPart("foo bar")]), - TranslationLine([StringPart("Foo Bar")]), - ), - ], -) -def test_title_case(line: TranslationLine, expected: TranslationLine) -> None: - assert line.title_case() == expected diff --git a/ebl/transliteration/domain/markup.py b/ebl/transliteration/domain/markup.py index c7f1f8f44..3fd9080b5 100644 --- a/ebl/transliteration/domain/markup.py +++ b/ebl/transliteration/domain/markup.py @@ -130,3 +130,15 @@ def of(id: BibliographyId, pages: str) -> "BibliographyPart": def convert_part_sequence(parts: Iterable[MarkupPart]) -> Tuple[MarkupPart, ...]: return tuple(parts) + + +def rstrip(parts: Sequence[MarkupPart]) -> Sequence[MarkupPart]: + return tuple([*parts[:-1], parts[-1].rstrip()]) if parts else parts + + +def title_case(parts: Sequence[MarkupPart]) -> Sequence[MarkupPart]: + return tuple(part.title_case() for part in parts) + + +def to_title(parts: Sequence[MarkupPart]) -> Sequence[MarkupPart]: + return title_case(rstrip(parts)) diff --git a/ebl/transliteration/domain/text.py b/ebl/transliteration/domain/text.py index 4cd7aa6bd..4aaadfe6c 100644 --- a/ebl/transliteration/domain/text.py +++ b/ebl/transliteration/domain/text.py @@ -102,7 +102,7 @@ def atf(self) -> Atf: return Atf("\n".join(line.atf for line in self.lines)) @property - def labels(self,) -> Sequence[LineLabel]: + def labels(self) -> Sequence[LineLabel]: current: LineLabel = LineLabel(None, None, None, None) labels: List[LineLabel] = [] diff --git a/ebl/transliteration/domain/translation_line.py b/ebl/transliteration/domain/translation_line.py index c2da36076..788563f6b 100644 --- a/ebl/transliteration/domain/translation_line.py +++ b/ebl/transliteration/domain/translation_line.py @@ -62,13 +62,3 @@ def atf(self) -> Atf: @property def lemmatization(self) -> Sequence[LemmatizationToken]: return (LemmatizationToken(self.translation),) - - def rstrip(self) -> "TranslationLine": - if not self.parts: - return self - - [*head, tail] = self.parts - return attr.evolve(self, parts=(*head, tail.rstrip())) - - def title_case(self) -> "TranslationLine": - return attr.evolve(self, parts=(part.title_case() for part in self.parts))