From 4e131430672b3127af73250e528f3823d3dacad9 Mon Sep 17 00:00:00 2001 From: tarepan Date: Mon, 3 Jun 2024 01:17:00 +0900 Subject: [PATCH] =?UTF-8?q?=E6=95=B4=E7=90=86:=20=E5=86=85=E9=83=A8?= =?UTF-8?q?=E5=9E=8B=20`WordProperty`=20=E3=82=92=E8=BF=BD=E5=8A=A0=20(#13?= =?UTF-8?q?33)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: 内部型 `SimpleUserDictWord` を追加 * refactor: `SimpleUserDictWord` の docstring を充実 * refactor: `SimpleUserDictWord` → `WordProperty` にリネーム * fix: lint * fix: import 元を間接から直接へ修正 * Update voicevox_engine/user_dict/user_dict_word.py --------- Co-authored-by: Hiroshiba --- test/user_dict/test_user_dict.py | 49 ++++++------ voicevox_engine/app/routers/user_dict.py | 27 ++++--- .../user_dict/user_dict_manager.py | 77 ++----------------- voicevox_engine/user_dict/user_dict_word.py | 53 ++++++------- 4 files changed, 69 insertions(+), 137 deletions(-) diff --git a/test/user_dict/test_user_dict.py b/test/user_dict/test_user_dict.py index 14caad4b0..35d13c9cf 100644 --- a/test/user_dict/test_user_dict.py +++ b/test/user_dict/test_user_dict.py @@ -10,6 +10,7 @@ from voicevox_engine.user_dict.user_dict_word import ( MAX_PRIORITY, UserDictInputError, + WordProperty, create_word, part_of_speech_data, ) @@ -75,11 +76,7 @@ def test_read_not_exist_json(tmp_path: Path) -> None: def test_create_word() -> None: # 将来的に品詞などが追加された時にテストを増やす assert create_word( - surface="test", - pronunciation="テスト", - accent_type=1, - word_type=None, - priority=None, + WordProperty(surface="test", pronunciation="テスト", accent_type=1) ) == UserDictWord( surface="test", priority=5, @@ -104,7 +101,9 @@ def test_apply_word_without_json(tmp_path: Path) -> None: user_dict_path=tmp_path / "test_apply_word_without_json.json", compiled_dict_path=tmp_path / "test_apply_word_without_json.dic", ) - user_dict.apply_word(surface="test", pronunciation="テスト", accent_type=1) + user_dict.apply_word( + WordProperty(surface="test", pronunciation="テスト", accent_type=1) + ) res = user_dict.read_dict() assert len(res) == 1 new_word = get_new_word(res) @@ -125,9 +124,7 @@ def test_apply_word_with_json(tmp_path: Path) -> None: compiled_dict_path=tmp_path / "test_apply_word_with_json.dic", ) user_dict.apply_word( - surface="test2", - pronunciation="テストツー", - accent_type=3, + WordProperty(surface="test2", pronunciation="テストツー", accent_type=3) ) res = user_dict.read_dict() assert len(res) == 2 @@ -150,10 +147,8 @@ def test_rewrite_word_invalid_id(tmp_path: Path) -> None: ) with pytest.raises(UserDictInputError): user_dict.rewrite_word( - word_uuid="c2be4dc5-d07d-4767-8be1-04a1bb3f05a9", - surface="test2", - pronunciation="テストツー", - accent_type=2, + "c2be4dc5-d07d-4767-8be1-04a1bb3f05a9", + WordProperty(surface="test2", pronunciation="テストツー", accent_type=2), ) @@ -167,10 +162,8 @@ def test_rewrite_word_valid_id(tmp_path: Path) -> None: compiled_dict_path=tmp_path / "test_rewrite_word_valid_id.dic", ) user_dict.rewrite_word( - word_uuid="aab7dda2-0d97-43c8-8cb7-3f440dab9b4e", - surface="test2", - pronunciation="テストツー", - accent_type=2, + "aab7dda2-0d97-43c8-8cb7-3f440dab9b4e", + WordProperty(surface="test2", pronunciation="テストツー", accent_type=2), ) new_word = user_dict.read_dict()["aab7dda2-0d97-43c8-8cb7-3f440dab9b4e"] assert (new_word.surface, new_word.pronunciation, new_word.accent_type) == ( @@ -211,11 +204,13 @@ def test_priority() -> None: for i in range(MAX_PRIORITY + 1): assert ( create_word( - surface="test", - pronunciation="テスト", - accent_type=1, - word_type=pos, - priority=i, + WordProperty( + surface="test", + pronunciation="テスト", + accent_type=1, + word_type=pos, + priority=i, + ) ).priority == i ) @@ -316,10 +311,12 @@ def test_update_dict(tmp_path: Path) -> None: assert g2p(text=test_text, kana=True) != success_pronunciation user_dict.apply_word( - surface=test_text, - pronunciation=success_pronunciation, - accent_type=1, - priority=10, + WordProperty( + surface=test_text, + pronunciation=success_pronunciation, + accent_type=1, + priority=10, + ) ) assert g2p(text=test_text, kana=True) == success_pronunciation diff --git a/voicevox_engine/app/routers/user_dict.py b/voicevox_engine/app/routers/user_dict.py index 3c31b0704..e160fc685 100644 --- a/voicevox_engine/app/routers/user_dict.py +++ b/voicevox_engine/app/routers/user_dict.py @@ -11,6 +11,7 @@ MAX_PRIORITY, MIN_PRIORITY, UserDictInputError, + WordProperty, ) from ..dependencies import check_disabled_mutable_api @@ -65,11 +66,13 @@ def add_user_dict_word( """ try: word_uuid = user_dict.apply_word( - surface=surface, - pronunciation=pronunciation, - accent_type=accent_type, - word_type=word_type, - priority=priority, + WordProperty( + surface=surface, + pronunciation=pronunciation, + accent_type=accent_type, + word_type=word_type, + priority=priority, + ) ) return word_uuid except ValidationError as e: @@ -115,12 +118,14 @@ def rewrite_user_dict_word( """ try: user_dict.rewrite_word( - surface=surface, - pronunciation=pronunciation, - accent_type=accent_type, - word_uuid=word_uuid, - word_type=word_type, - priority=priority, + word_uuid, + WordProperty( + surface=surface, + pronunciation=pronunciation, + accent_type=accent_type, + word_type=word_type, + priority=priority, + ), ) except ValidationError as e: raise HTTPException( diff --git a/voicevox_engine/user_dict/user_dict_manager.py b/voicevox_engine/user_dict/user_dict_manager.py index eb1da93c5..a13e81eeb 100644 --- a/voicevox_engine/user_dict/user_dict_manager.py +++ b/voicevox_engine/user_dict/user_dict_manager.py @@ -14,6 +14,7 @@ from .model import UserDictWord, WordTypes from .user_dict_word import ( UserDictInputError, + WordProperty, cost2priority, create_word, part_of_speech_data, @@ -296,44 +297,12 @@ def import_user_dict( compiled_dict_path=self._compiled_dict_path, ) - def apply_word( - self, - surface: str, - pronunciation: str, - accent_type: int, - word_type: WordTypes | None = None, - priority: int | None = None, - ) -> str: - """ - 新規単語を追加する。 - Parameters - ---------- - surface : str - 単語情報 - pronunciation : str - 単語情報 - accent_type : int - 単語情報 - word_type : WordTypes | None - 品詞 - priority : int | None - 優先度 - Returns - ------- - word_uuid : UserDictWord - 追加された単語に発行されたUUID - """ + def apply_word(self, word_property: WordProperty) -> str: + """新規単語を追加し、その単語に割り当てられた UUID を返す。""" # 新規単語の追加による辞書データの更新 - word = create_word( - surface=surface, - pronunciation=pronunciation, - accent_type=accent_type, - word_type=word_type, - priority=priority, - ) user_dict = _read_dict(user_dict_path=self._user_dict_path) word_uuid = str(uuid4()) - user_dict[word_uuid] = word + user_dict[word_uuid] = create_word(word_property) # 更新された辞書データの保存と適用 _write_to_json(user_dict, self._user_dict_path) @@ -345,45 +314,13 @@ def apply_word( return word_uuid - def rewrite_word( - self, - word_uuid: str, - surface: str, - pronunciation: str, - accent_type: int, - word_type: WordTypes | None = None, - priority: int | None = None, - ) -> None: - """ - 既存単語を上書き更新する。 - Parameters - ---------- - word_uuid : str - 単語UUID - surface : str - 単語情報 - pronunciation : str - 単語情報 - accent_type : int - 単語情報 - word_type : WordTypes | None - 品詞 - priority : int | None - 優先度 - """ - word = create_word( - surface=surface, - pronunciation=pronunciation, - accent_type=accent_type, - word_type=word_type, - priority=priority, - ) - + def rewrite_word(self, word_uuid: str, word_property: WordProperty) -> None: + """単語 UUID で指定された単語を上書き更新する。""" # 既存単語の上書きによる辞書データの更新 user_dict = _read_dict(user_dict_path=self._user_dict_path) if word_uuid not in user_dict: raise UserDictInputError("UUIDに該当するワードが見つかりませんでした") - user_dict[word_uuid] = word + user_dict[word_uuid] = create_word(word_property) # 更新された辞書データの保存と適用 _write_to_json(user_dict, self._user_dict_path) diff --git a/voicevox_engine/user_dict/user_dict_word.py b/voicevox_engine/user_dict/user_dict_word.py index 7889bbcdf..3ddf3e033 100644 --- a/voicevox_engine/user_dict/user_dict_word.py +++ b/voicevox_engine/user_dict/user_dict_word.py @@ -1,5 +1,7 @@ """ユーザー辞書を構成する言葉(単語)関連の処理""" +from dataclasses import dataclass + import numpy as np from pydantic import BaseModel, Field @@ -164,43 +166,34 @@ class PartOfSpeechDetail(BaseModel): } -def create_word( - surface: str, - pronunciation: str, - accent_type: int, - word_type: WordTypes | None, - priority: int | None, -) -> UserDictWord: - """ - 単語オブジェクトの生成 - Parameters - ---------- - surface : str - 単語情報 - pronunciation : str - 単語情報 - accent_type : int - 単語情報 - word_type : WordTypes | None - 品詞 - priority : int | None - 優先度 - Returns - ------- - : UserDictWord - 単語オブジェクト - """ +@dataclass +class WordProperty: + """単語属性のあつまり""" + + surface: str # 単語情報 + pronunciation: str # 単語情報 + accent_type: int # 単語情報 + word_type: WordTypes | None = None # 品詞 + priority: int | None = None # 優先度 + + +def create_word(word_property: WordProperty) -> UserDictWord: + """単語オブジェクトを生成する。""" + word_type: WordTypes | None = word_property.word_type if word_type is None: word_type = WordTypes.PROPER_NOUN if word_type not in part_of_speech_data.keys(): raise UserDictInputError("不明な品詞です") + + priority: int | None = word_property.priority if priority is None: priority = 5 if not MIN_PRIORITY <= priority <= MAX_PRIORITY: raise UserDictInputError("優先度の値が無効です") + pos_detail = part_of_speech_data[word_type] return UserDictWord( - surface=surface, + surface=word_property.surface, context_id=pos_detail.context_id, priority=priority, part_of_speech=pos_detail.part_of_speech, @@ -210,9 +203,9 @@ def create_word( inflectional_type="*", inflectional_form="*", stem="*", - yomi=pronunciation, - pronunciation=pronunciation, - accent_type=accent_type, + yomi=word_property.pronunciation, + pronunciation=word_property.pronunciation, + accent_type=word_property.accent_type, mora_count=None, accent_associative_rule="*", )