Skip to content

Commit

Permalink
整理: ユーザー辞書に関するリネーム・lint (VOICEVOX#1396)
Browse files Browse the repository at this point in the history
* refactor: PRIORITY 変数をリネーム

* refactor: `cost_candidates` を切り出し

* refactor: `PartOfSpeechDetail` をプライベートにリネーム

* refactor: プライベート変数に `_` prefix を付けてリネーム

* fix: `PRIORITY` に `USER_DICT_` prefix を付与
  • Loading branch information
tarepan authored Jun 19, 2024
1 parent 7dfcc8c commit cd559a2
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 119 deletions.
4 changes: 2 additions & 2 deletions test/unit/user_dict/test_user_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from voicevox_engine.user_dict.model import UserDictWord, WordTypes
from voicevox_engine.user_dict.user_dict_manager import UserDictionary
from voicevox_engine.user_dict.user_dict_word import (
MAX_PRIORITY,
USER_DICT_MAX_PRIORITY,
UserDictInputError,
WordProperty,
create_word,
Expand Down Expand Up @@ -201,7 +201,7 @@ def test_delete_word_valid_id(tmp_path: Path) -> None:

def test_priority() -> None:
for pos in part_of_speech_data:
for i in range(MAX_PRIORITY + 1):
for i in range(USER_DICT_MAX_PRIORITY + 1):
assert (
create_word(
WordProperty(
Expand Down
22 changes: 14 additions & 8 deletions voicevox_engine/app/routers/user_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from voicevox_engine.user_dict.model import UserDictWord, WordTypes
from voicevox_engine.user_dict.user_dict_manager import UserDictionary
from voicevox_engine.user_dict.user_dict_word import (
MAX_PRIORITY,
MIN_PRIORITY,
USER_DICT_MAX_PRIORITY,
USER_DICT_MIN_PRIORITY,
UserDictInputError,
WordProperty,
)
Expand Down Expand Up @@ -58,11 +58,14 @@ def add_user_dict_word(
priority: Annotated[
int | SkipJsonSchema[None],
Query(
ge=MIN_PRIORITY,
le=MAX_PRIORITY,
ge=USER_DICT_MIN_PRIORITY,
le=USER_DICT_MAX_PRIORITY,
description="単語の優先度(0から10までの整数)。数字が大きいほど優先度が高くなる。1から9までの値を指定することを推奨",
# "SkipJsonSchema[None]"の副作用でスキーマーが欠落する問題に対するワークアラウンド
json_schema_extra={"maximum": MAX_PRIORITY, "minimum": MIN_PRIORITY},
json_schema_extra={
"maximum": USER_DICT_MAX_PRIORITY,
"minimum": USER_DICT_MIN_PRIORITY,
},
),
] = None,
) -> str:
Expand Down Expand Up @@ -112,11 +115,14 @@ def rewrite_user_dict_word(
priority: Annotated[
int | SkipJsonSchema[None],
Query(
ge=MIN_PRIORITY,
le=MAX_PRIORITY,
ge=USER_DICT_MIN_PRIORITY,
le=USER_DICT_MAX_PRIORITY,
description="単語の優先度(0から10までの整数)。数字が大きいほど優先度が高くなる。1から9までの値を指定することを推奨。",
# "SkipJsonSchema[None]"の副作用でスキーマーが欠落する問題に対するワークアラウンド
json_schema_extra={"maximum": MAX_PRIORITY, "minimum": MIN_PRIORITY},
json_schema_extra={
"maximum": USER_DICT_MAX_PRIORITY,
"minimum": USER_DICT_MIN_PRIORITY,
},
),
] = None,
) -> None:
Expand Down
140 changes: 31 additions & 109 deletions voicevox_engine/user_dict/user_dict_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,8 @@
WordTypes,
)

MIN_PRIORITY = USER_DICT_MIN_PRIORITY
MAX_PRIORITY = USER_DICT_MAX_PRIORITY


class PartOfSpeechDetail(BaseModel):
class _PartOfSpeechDetail(BaseModel):
"""
品詞ごとの情報
"""
Expand All @@ -32,136 +29,58 @@ class PartOfSpeechDetail(BaseModel):
accent_associative_rules: list[str] = Field(title="アクセント結合規則の一覧")


part_of_speech_data: dict[WordTypes, PartOfSpeechDetail] = {
WordTypes.PROPER_NOUN: PartOfSpeechDetail(
_costs_proper_noun = [-988, 3488, 4768, 6048, 7328, 8609, 8734, 8859, 8984, 9110, 14176]
_costs_common_noun = [-4445, 49, 1473, 2897, 4321, 5746, 6554, 7362, 8170, 8979, 15001]
_costs_verb = [3100, 6160, 6360, 6561, 6761, 6962, 7414, 7866, 8318, 8771, 13433]
_costs_adjective = [1527, 3266, 3561, 3857, 4153, 4449, 5149, 5849, 6549, 7250, 10001]
_costs_suffix = [4399, 5373, 6041, 6710, 7378, 8047, 9440, 10834, 12228, 13622, 15847]


part_of_speech_data: dict[WordTypes, _PartOfSpeechDetail] = {
WordTypes.PROPER_NOUN: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="固有名詞",
part_of_speech_detail_2="一般",
part_of_speech_detail_3="*",
context_id=1348,
cost_candidates=[
-988,
3488,
4768,
6048,
7328,
8609,
8734,
8859,
8984,
9110,
14176,
],
accent_associative_rules=[
"*",
"C1",
"C2",
"C3",
"C4",
"C5",
],
cost_candidates=_costs_proper_noun,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.COMMON_NOUN: PartOfSpeechDetail(
WordTypes.COMMON_NOUN: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="一般",
part_of_speech_detail_2="*",
part_of_speech_detail_3="*",
context_id=1345,
cost_candidates=[
-4445,
49,
1473,
2897,
4321,
5746,
6554,
7362,
8170,
8979,
15001,
],
accent_associative_rules=[
"*",
"C1",
"C2",
"C3",
"C4",
"C5",
],
cost_candidates=_costs_common_noun,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.VERB: PartOfSpeechDetail(
WordTypes.VERB: _PartOfSpeechDetail(
part_of_speech="動詞",
part_of_speech_detail_1="自立",
part_of_speech_detail_2="*",
part_of_speech_detail_3="*",
context_id=642,
cost_candidates=[
3100,
6160,
6360,
6561,
6761,
6962,
7414,
7866,
8318,
8771,
13433,
],
accent_associative_rules=[
"*",
],
cost_candidates=_costs_verb,
accent_associative_rules=["*"],
),
WordTypes.ADJECTIVE: PartOfSpeechDetail(
WordTypes.ADJECTIVE: _PartOfSpeechDetail(
part_of_speech="形容詞",
part_of_speech_detail_1="自立",
part_of_speech_detail_2="*",
part_of_speech_detail_3="*",
context_id=20,
cost_candidates=[
1527,
3266,
3561,
3857,
4153,
4449,
5149,
5849,
6549,
7250,
10001,
],
accent_associative_rules=[
"*",
],
cost_candidates=_costs_adjective,
accent_associative_rules=["*"],
),
WordTypes.SUFFIX: PartOfSpeechDetail(
WordTypes.SUFFIX: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="接尾",
part_of_speech_detail_2="一般",
part_of_speech_detail_3="*",
context_id=1358,
cost_candidates=[
4399,
5373,
6041,
6710,
7378,
8047,
9440,
10834,
12228,
13622,
15847,
],
accent_associative_rules=[
"*",
"C1",
"C2",
"C3",
"C4",
"C5",
],
cost_candidates=_costs_suffix,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
}

Expand All @@ -188,7 +107,7 @@ def create_word(word_property: WordProperty) -> UserDictWord:
priority: int | None = word_property.priority
if priority is None:
priority = 5
if not MIN_PRIORITY <= priority <= MAX_PRIORITY:
if not USER_DICT_MIN_PRIORITY <= priority <= USER_DICT_MAX_PRIORITY:
raise UserDictInputError("優先度の値が無効です")

pos_detail = part_of_speech_data[word_type]
Expand Down Expand Up @@ -230,10 +149,13 @@ def cost2priority(context_id: int, cost: int) -> int:
# cost_candidatesの中にある値で最も近い値を元にpriorityを返す
# 参考: https://qiita.com/Krypf/items/2eada91c37161d17621d
# この関数とpriority2cost関数によって、辞書ファイルのcostを操作しても最も近いpriorityのcostに上書きされる
return MAX_PRIORITY - np.argmin(np.abs(np.array(cost_candidates) - cost)).item()
return (
USER_DICT_MAX_PRIORITY
- np.argmin(np.abs(np.array(cost_candidates) - cost)).item()
)


def priority2cost(context_id: int, priority: int) -> int:
assert MIN_PRIORITY <= priority <= MAX_PRIORITY
assert USER_DICT_MIN_PRIORITY <= priority <= USER_DICT_MAX_PRIORITY
cost_candidates = _search_cost_candidates(context_id)
return cost_candidates[MAX_PRIORITY - priority]
return cost_candidates[USER_DICT_MAX_PRIORITY - priority]

0 comments on commit cd559a2

Please sign in to comment.