From 927cb7853fcf07b67ac258aef40064875a6a28c3 Mon Sep 17 00:00:00 2001
From: buckw6eat <mistlain@gmail.com>
Date: Wed, 8 Sep 2021 23:39:15 +0900
Subject: [PATCH 1/7] =?UTF-8?q?=E9=9F=B3=E7=B4=A0=E9=95=B7=E3=81=A8?=
 =?UTF-8?q?=E9=9F=B3=E9=AB=98=E8=A8=AD=E5=AE=9A=E3=81=A7=20API=20=E3=82=92?=
 =?UTF-8?q?=E5=88=86=E3=81=91=E3=82=8B=20(#71)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: divide mora API for length and pitch

* revert: remove useless case
---
 run.py                              | 30 ++++++++++++++-
 voicevox_engine/synthesis_engine.py | 58 ++++++++++++++++++++---------
 2 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/run.py b/run.py
index a73e6f267..7faf765eb 100644
--- a/run.py
+++ b/run.py
@@ -98,8 +98,12 @@ def generate_app(engine: SynthesisEngine) -> FastAPI:
     def replace_mora_data(
         accent_phrases: List[AccentPhrase], speaker_id: int
     ) -> List[AccentPhrase]:
-        return engine.replace_phoneme_data(
-            accent_phrases=accent_phrases, speaker_id=speaker_id
+        return engine.replace_mora_pitch(
+            accent_phrases=engine.replace_phoneme_length(
+                accent_phrases=accent_phrases,
+                speaker_id=speaker_id,
+            ),
+            speaker_id=speaker_id,
         )
 
     def create_accent_phrases(text: str, speaker_id: int) -> List[AccentPhrase]:
@@ -192,6 +196,28 @@ def accent_phrases(text: str, speaker: int):
     def mora_data(accent_phrases: List[AccentPhrase], speaker: int):
         return replace_mora_data(accent_phrases, speaker_id=speaker)
 
+    @app.post(
+        "/mora_length",
+        response_model=List[AccentPhrase],
+        tags=["クエリ編集"],
+        summary="アクセント句から音素長を得る",
+    )
+    def mora_length(accent_phrases: List[AccentPhrase], speaker: int):
+        return engine.replace_phoneme_length(
+            accent_phrases=accent_phrases, speaker_id=speaker
+        )
+
+    @app.post(
+        "/mora_pitch",
+        response_model=List[AccentPhrase],
+        tags=["クエリ編集"],
+        summary="アクセント句から音高を得る",
+    )
+    def mora_pitch(accent_phrases: List[AccentPhrase], speaker: int):
+        return engine.replace_mora_pitch(
+            accent_phrases=accent_phrases, speaker_id=speaker
+        )
+
     @app.post(
         "/synthesis",
         response_class=FileResponse,
diff --git a/voicevox_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine.py
index e41d8f842..ca2215162 100644
--- a/voicevox_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine.py
@@ -97,7 +97,45 @@ def __init__(
         self.yukarin_s_phoneme_class = OjtPhoneme
         self.yukarin_soso_phoneme_class = OjtPhoneme
 
-    def replace_phoneme_data(self, accent_phrases: List[AccentPhrase], speaker_id: int):
+    def replace_phoneme_length(
+        self, accent_phrases: List[AccentPhrase], speaker_id: int
+    ) -> List[AccentPhrase]:
+        # phoneme
+        flatten_moras = to_flatten_moras(accent_phrases)
+
+        phoneme_each_mora = [
+            ([mora.consonant] if mora.consonant is not None else []) + [mora.vowel]
+            for mora in flatten_moras
+        ]
+        phoneme_str_list = list(chain.from_iterable(phoneme_each_mora))
+        phoneme_str_list = ["pau"] + phoneme_str_list + ["pau"]
+
+        phoneme_data_list = to_phoneme_data_list(phoneme_str_list)
+        _, _, vowel_indexes_data = split_mora(phoneme_data_list)
+
+        # yukarin_s
+        phoneme_list_s = numpy.array(
+            [p.phoneme_id for p in phoneme_data_list], dtype=numpy.int64
+        )
+        phoneme_length = self.yukarin_s_forwarder(
+            length=len(phoneme_list_s),
+            phoneme_list=phoneme_list_s,
+            speaker_id=numpy.array(speaker_id, dtype=numpy.int64).reshape(-1),
+        )
+
+        for i, mora in enumerate(flatten_moras):
+            mora.consonant_length = (
+                phoneme_length[vowel_indexes_data[i + 1] - 1]
+                if mora.consonant is not None
+                else None
+            )
+            mora.vowel_length = phoneme_length[vowel_indexes_data[i + 1]]
+
+        return accent_phrases
+
+    def replace_mora_pitch(
+        self, accent_phrases: List[AccentPhrase], speaker_id: int
+    ) -> List[AccentPhrase]:
         # phoneme
         flatten_moras = to_flatten_moras(accent_phrases)
 
@@ -193,16 +231,6 @@ def _repeat_with_mora(array, accent_phrase):
             vowel_indexes_data,
         ) = split_mora(phoneme_data_list)
 
-        # yukarin_s
-        phoneme_list_s = numpy.array(
-            [p.phoneme_id for p in phoneme_data_list], dtype=numpy.int64
-        )
-        phoneme_length = self.yukarin_s_forwarder(
-            length=len(phoneme_list_s),
-            phoneme_list=phoneme_list_s,
-            speaker_id=numpy.array(speaker_id, dtype=numpy.int64).reshape(-1),
-        )
-
         # yukarin_sa
         vowel_indexes = numpy.array(vowel_indexes_data, dtype=numpy.int64)
 
@@ -236,12 +264,6 @@ def _repeat_with_mora(array, accent_phrase):
 
         for i, mora in enumerate(flatten_moras):
             mora.pitch = f0_list[i + 1]
-            mora.consonant_length = (
-                phoneme_length[vowel_indexes_data[i + 1] - 1]
-                if mora.consonant is not None
-                else None
-            )
-            mora.vowel_length = phoneme_length[vowel_indexes_data[i + 1]]
 
         return accent_phrases
 
@@ -257,12 +279,12 @@ def synthesis(self, query: AudioQuery, speaker_id: int):
         phoneme_str_list = list(chain.from_iterable(phoneme_each_mora))
         phoneme_str_list = ["pau"] + phoneme_str_list + ["pau"]
 
-        # yukarin_s
         phoneme_data_list = to_phoneme_data_list(phoneme_str_list)
         phoneme_list_s = numpy.array(
             [p.phoneme_id for p in phoneme_data_list], dtype=numpy.int64
         )
 
+        # length
         phoneme_length_list = (
             [query.prePhonemeLength]
             + [

From 20a18a27470effa5481eaf0a72d802a6517249e4 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Thu, 9 Sep 2021 01:20:14 +0900
Subject: [PATCH 2/7] to 0.5.1 (#74)

* to 0.5.0

* update core

* update numpy version

* update __file__

* 0.5.1
---
 VERSION.txt | 2 +-
 run.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/VERSION.txt b/VERSION.txt
index 79a2734bb..5d4294b91 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-0.5.0
\ No newline at end of file
+0.5.1
\ No newline at end of file
diff --git a/run.py b/run.py
index 7faf765eb..6627f4256 100644
--- a/run.py
+++ b/run.py
@@ -84,7 +84,7 @@ def generate_app(engine: SynthesisEngine) -> FastAPI:
     app = FastAPI(
         title="VOICEVOX ENGINE",
         description="VOICEVOXの音声合成エンジンです。",
-        version=(root_dir / "VERSION.txt").read_text(),
+        version=(root_dir / "VERSION.txt").read_text().strip(),
     )
 
     app.add_middleware(

From 46a0d08cfcc05340e15375c16a51499cc0ec3c3d Mon Sep 17 00:00:00 2001
From: ISHIDA Naoto <naoto@isnot.jp>
Date: Thu, 9 Sep 2021 14:36:48 +0900
Subject: [PATCH 3/7] =?UTF-8?q?[Mock]=20/synthesis=20=E3=81=A7=E3=80=81?=
 =?UTF-8?q?=E9=9B=91=E9=9F=B3=EF=BC=88=E7=84=A1=E9=9F=B3=EF=BC=9F=EF=BC=89?=
 =?UTF-8?q?=E3=81=AE=E4=BB=A3=E3=82=8F=E3=82=8A=E3=81=AB=E3=80=81=E5=9B=BA?=
 =?UTF-8?q?=E5=AE=9A=E3=81=AE=E3=83=80=E3=83=9F=E3=83=BC=E3=83=BB=E3=83=86?=
 =?UTF-8?q?=E3=82=AD=E3=82=B9=E3=83=88=E3=82=92=E8=AA=AD=E3=81=BF=E4=B8=8A?=
 =?UTF-8?q?=E3=81=92=E3=82=8B=20#27=20(#77)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* update: use dummy text instead of void audio for synthesis #27

* fix: add comment #27
---
 voicevox_engine/dev/core/mock.py | 42 ++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py
index 11f024f24..db5845111 100644
--- a/voicevox_engine/dev/core/mock.py
+++ b/voicevox_engine/dev/core/mock.py
@@ -2,9 +2,13 @@
 from typing import Any, Dict, List
 
 import numpy as np
+from pyopenjtalk import tts
+from resampy import resample
 
+DUMMY_TEXT = "これはダミーのテキストです"
 
-def initialize(*args: List[Any]) -> None:
+
+def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None:
     pass
 
 
@@ -25,8 +29,42 @@ def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
 
 
 def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
+    """
+    合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます（DUMMY_TEXT）
+    参照→SynthesisEngine のdocstring [Mock]
+
+    Parameters
+    ----------
+    length : int
+        フレームの長さ
+
+    Returns
+    -------
+    wave : np.ndarray
+        音声合成した波形データ
+
+    Note
+    -------
+        ここで行う音声合成では、調声（ピッチ等）を反映しない
+        また、入力内容によらず常に固定の文言を読み上げる
+
+        # pyopenjtalk.tts()の出力仕様
+        dtype=np.float64, 16 bit, mono 48000 Hz
+
+        # resampleの説明
+        本来はfloat64の入力でも問題ないのかと思われたが、実際には出力が音割れひどかった。
+        対策として、あらかじめint16に型変換しておくと、期待通りの結果になった。
+        非モックdecode_forwardと合わせるために、出力を24kHzに変換した。
+    """
     logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
     logger.info(
         "Sorry, decode_forward() is a mock. Return values are incorrect.",
     )
-    return np.ones(length * 256)
+    wave, sr = tts(DUMMY_TEXT)
+    wave = resample(
+        wave.astype("int16"),
+        sr,
+        24000,
+        filter="kaiser_fast",
+    )
+    return wave

From ea79f9540f37b9ff73a7ab6e88d5dee0a5dcdc17 Mon Sep 17 00:00:00 2001
From: Yosshi999 <Yosshi999@users.noreply.github.com>
Date: Thu, 9 Sep 2021 23:35:48 +0900
Subject: [PATCH 4/7] =?UTF-8?q?Softalk=E3=83=A9=E3=82=A4=E3=82=AF=E3=81=AA?=
 =?UTF-8?q?=E8=AA=AD=E3=81=BF=E4=BB=AE=E5=90=8D=E3=81=AE=E5=85=A5=E5=87=BA?=
 =?UTF-8?q?=E5=8A=9B=E5=AF=BE=E5=BF=9C=20(#73)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* support softalk-ish query

* set 400 error

* move definition to model.py

* kana parser for /accent_phrases

* apply pysen lint

* change model

* bugfix: unvoice in created kana

* test for kana parser

* apply pysen lint

* remove unused package import

* remove is_kana field from audio_query, use the term 'AquesTalk'

* refactoring

* replace mora data

* rename badrequest
---
 run.py                         |  44 ++++-
 test/test_kana_parser.py       |  76 ++++++++
 voicevox_engine/kana_parser.py | 122 ++++++++++++
 voicevox_engine/model.py       |  40 +++-
 voicevox_engine/mora_list.py   | 339 +++++++++++++++++----------------
 5 files changed, 449 insertions(+), 172 deletions(-)
 create mode 100644 test/test_kana_parser.py
 create mode 100644 voicevox_engine/kana_parser.py

diff --git a/run.py b/run.py
index 6627f4256..bcbd7b512 100644
--- a/run.py
+++ b/run.py
@@ -8,12 +8,20 @@
 import resampy
 import soundfile
 import uvicorn
-from fastapi import FastAPI, Response
+from fastapi import FastAPI, HTTPException, Response
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import FileResponse
 
 from voicevox_engine.full_context_label import extract_full_context_label
-from voicevox_engine.model import AccentPhrase, AudioQuery, Mora, Speaker
+from voicevox_engine.kana_parser import create_kana, parse_kana
+from voicevox_engine.model import (
+    AccentPhrase,
+    AudioQuery,
+    Mora,
+    ParseKanaBadRequest,
+    ParseKanaError,
+    Speaker,
+)
 from voicevox_engine.mora_list import openjtalk_mora2text
 from voicevox_engine.synthesis_engine import SynthesisEngine
 
@@ -166,8 +174,9 @@ def audio_query(text: str, speaker: int):
         """
         クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
         """
+        accent_phrases = create_accent_phrases(text, speaker_id=speaker)
         return AudioQuery(
-            accent_phrases=create_accent_phrases(text, speaker_id=speaker),
+            accent_phrases=accent_phrases,
             speedScale=1,
             pitchScale=0,
             intonationScale=1,
@@ -176,6 +185,7 @@ def audio_query(text: str, speaker: int):
             postPhonemeLength=0.1,
             outputSamplingRate=default_sampling_rate,
             outputStereo=False,
+            kana=create_kana(accent_phrases),
         )
 
     @app.post(
@@ -183,9 +193,33 @@ def audio_query(text: str, speaker: int):
         response_model=List[AccentPhrase],
         tags=["クエリ編集"],
         summary="テキストからアクセント句を得る",
+        responses={
+            400: {
+                "description": "読み仮名のパースに失敗",
+                "model": ParseKanaBadRequest,
+            }
+        },
     )
-    def accent_phrases(text: str, speaker: int):
-        return create_accent_phrases(text, speaker_id=speaker)
+    def accent_phrases(text: str, speaker: int, is_kana: bool = False):
+        """
+        テキストからアクセント句を得ます。
+        is_kanaが`true`のとき、テキストは次のようなAquesTalkライクな記法に従う読み仮名として処理されます。デフォルトは`false`です。
+        * 全てのカナはカタカナで記述される
+        * アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。
+        * カナの手前に`_`を入れるとそのカナは無声化される
+        * アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。
+        """
+        if is_kana:
+            try:
+                accent_phrases = parse_kana(text)
+            except ParseKanaError as err:
+                raise HTTPException(
+                    status_code=400,
+                    detail=ParseKanaBadRequest(err).dict(),
+                )
+            return replace_mora_data(accent_phrases=accent_phrases, speaker_id=speaker)
+        else:
+            return create_accent_phrases(text, speaker_id=speaker)
 
     @app.post(
         "/mora_data",
diff --git a/test/test_kana_parser.py b/test/test_kana_parser.py
new file mode 100644
index 000000000..98ff4e303
--- /dev/null
+++ b/test/test_kana_parser.py
@@ -0,0 +1,76 @@
+from unittest import TestCase
+
+from voicevox_engine.kana_parser import create_kana, parse_kana
+from voicevox_engine.model import ParseKanaError, ParseKanaErrorCode
+
+
+class TestParseKana(TestCase):
+    def test_phrase_length(self):
+        self.assertEqual(len(parse_kana("ア'/ア'")), 2)
+        self.assertEqual(len(parse_kana("ア'、ア'")), 2)
+        self.assertEqual(len(parse_kana("ア'/ア'/ア'/ア'/ア'")), 5)
+        self.assertEqual(len(parse_kana("ス'")), 1)
+        self.assertEqual(len(parse_kana("_ス'")), 1)
+        self.assertEqual(len(parse_kana("ギェ'")), 1)
+        self.assertEqual(len(parse_kana("ギェ'、ギェ'/ギェ'")), 3)
+
+    def test_accent(self):
+        self.assertEqual(parse_kana("シャ'シシュシェショ")[0].accent, 1)
+        self.assertEqual(parse_kana("シャ'_シシュシェショ")[0].accent, 1)
+        self.assertEqual(parse_kana("シャシ'シュシェショ")[0].accent, 2)
+        self.assertEqual(parse_kana("シャ_シ'シュシェショ")[0].accent, 2)
+        self.assertEqual(parse_kana("シャシシュ'シェショ")[0].accent, 3)
+        self.assertEqual(parse_kana("シャ_シシュ'シェショ")[0].accent, 3)
+        self.assertEqual(parse_kana("シャシシュシェショ'")[0].accent, 5)
+        self.assertEqual(parse_kana("シャ_シシュシェショ'")[0].accent, 5)
+
+    def test_mora_length(self):
+        self.assertEqual(len(parse_kana("シャ'シシュシェショ")[0].moras), 5)
+        self.assertEqual(len(parse_kana("シャ'_シシュシェショ")[0].moras), 5)
+        self.assertEqual(len(parse_kana("シャシ'シュシェショ")[0].moras), 5)
+        self.assertEqual(len(parse_kana("シャ_シ'シュシェショ")[0].moras), 5)
+        self.assertEqual(len(parse_kana("シャシシュシェショ'")[0].moras), 5)
+        self.assertEqual(len(parse_kana("シャ_シシュシェショ'")[0].moras), 5)
+
+    def test_pause(self):
+        self.assertIsNone(parse_kana("ア'/ア'")[0].pause_mora)
+        self.assertIsNone(parse_kana("ア'/ア'")[1].pause_mora)
+        self.assertIsNotNone(parse_kana("ア'、ア'")[0].pause_mora)
+        self.assertIsNone(parse_kana("ア'、ア'")[1].pause_mora)
+
+    def test_unvoice(self):
+        self.assertEqual(parse_kana("ス'")[0].moras[0].vowel, "u")
+        self.assertEqual(parse_kana("_ス'")[0].moras[0].vowel, "U")
+
+    def test_roundtrip(self):
+        for text in ["コンニチワ'", "ワタシワ'/シャチョオデ'_ス", "トテモ'、エラ'インデス"]:
+            self.assertEqual(create_kana(parse_kana(text)), text)
+
+        for text in ["ヲ'", "ェ'"]:
+            self.assertEqual(create_kana(parse_kana(text)), text)
+
+
+class TestParseKanaException(TestCase):
+    def _assert_error_code(self, kana: str, code: ParseKanaErrorCode):
+        with self.assertRaises(ParseKanaError) as err:
+            parse_kana(kana)
+        self.assertEqual(err.exception.errcode, code)
+
+    def test_exceptions(self):
+        self._assert_error_code("アクセント", ParseKanaErrorCode.ACCENT_NOTFOUND)
+        self._assert_error_code("'アクセント", ParseKanaErrorCode.ACCENT_TOP)
+        self._assert_error_code("ア'ク'セント", ParseKanaErrorCode.ACCENT_TWICE)
+        self._assert_error_code("ひ'らがな", ParseKanaErrorCode.UNKNOWN_TEXT)
+        self._assert_error_code("__ス'", ParseKanaErrorCode.UNKNOWN_TEXT)
+        self._assert_error_code("ア'/", ParseKanaErrorCode.EMPTY_PHRASE)
+        self._assert_error_code("/ア'", ParseKanaErrorCode.EMPTY_PHRASE)
+
+        with self.assertRaises(ParseKanaError) as err:
+            parse_kana("ヒト'ツメ/フタツメ")
+        self.assertEqual(err.exception.errcode, ParseKanaErrorCode.ACCENT_NOTFOUND)
+        self.assertEqual(err.exception.kwargs, {"text": "フタツメ"})
+
+        with self.assertRaises(ParseKanaError) as err:
+            parse_kana("ア'/")
+        self.assertEqual(err.exception.errcode, ParseKanaErrorCode.EMPTY_PHRASE)
+        self.assertEqual(err.exception.kwargs, {"position": "2"})
diff --git a/voicevox_engine/kana_parser.py b/voicevox_engine/kana_parser.py
new file mode 100644
index 000000000..4092f01cd
--- /dev/null
+++ b/voicevox_engine/kana_parser.py
@@ -0,0 +1,122 @@
+from typing import List, Optional
+
+from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
+from voicevox_engine.mora_list import openjtalk_text2mora
+
+LOOP_LIMIT = 300
+UNVOICE_SYMBOL = "_"
+ACCENT_SYMBOL = "'"
+NOPAUSE_DELIMITER = "/"
+PAUSE_DELIMITER = "、"
+
+text2mora_with_unvoice = {}
+for text, (consonant, vowel) in openjtalk_text2mora.items():
+    text2mora_with_unvoice[text] = Mora(
+        text=text,
+        consonant=consonant if len(consonant) > 0 else None,
+        consonant_length=0 if len(consonant) > 0 else None,
+        vowel=vowel,
+        vowel_length=0,
+        pitch=0,
+    )
+    if vowel in ["a", "i", "u", "e", "o"]:
+        text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora(
+            text=text,
+            consonant=consonant if len(consonant) > 0 else None,
+            consonant_length=0 if len(consonant) > 0 else None,
+            vowel=vowel.upper(),
+            vowel_length=0,
+            pitch=0,
+        )
+
+
+def _text_to_accent_phrase(phrase: str) -> List[AccentPhrase]:
+    """
+    longest matchにより読み仮名からAccentPhraseを生成
+    入力長Nに対し計算量O(N^2)
+    """
+    accent_index: Optional[int] = None
+    moras: List[Mora] = []
+
+    base_index = 0  # パース開始位置。ここから右の文字列をstackに詰めていく。
+    stack = ""  # 保留中の文字列
+    matched_text: Optional[str] = None  # 保留中の文字列内で最後にマッチした仮名
+
+    outer_loop = 0
+    while base_index < len(phrase):
+        outer_loop += 1
+        if phrase[base_index] == ACCENT_SYMBOL:
+            if len(moras) == 0:
+                raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase)
+            if accent_index is not None:
+                raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase)
+            accent_index = len(moras)
+            base_index += 1
+            continue
+        for watch_index in range(base_index, len(phrase)):
+            if phrase[watch_index] == ACCENT_SYMBOL:
+                break
+            # 普通の文字の場合
+            stack += phrase[watch_index]
+            if stack in text2mora_with_unvoice:
+                matched_text = stack
+        # push mora
+        if matched_text is None:
+            raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
+        else:
+            moras.append(text2mora_with_unvoice[matched_text])
+            base_index += len(matched_text)
+            stack = ""
+            matched_text = None
+        if outer_loop > LOOP_LIMIT:
+            raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP)
+    if accent_index is None:
+        raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase)
+    else:
+        return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None)
+
+
+def parse_kana(text: str) -> List[AccentPhrase]:
+    """
+    AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換
+    """
+    parsed_results: List[AccentPhrase] = []
+    phrase_base = 0
+    for i in range(len(text) + 1):
+        if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]:
+            phrase = text[phrase_base:i]
+            if len(phrase) == 0:
+                raise ParseKanaError(
+                    ParseKanaErrorCode.EMPTY_PHRASE,
+                    position=str(len(parsed_results) + 1),
+                )
+            phrase_base = i + 1
+            accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase)
+            if i < len(text) and text[i] == PAUSE_DELIMITER:
+                accent_phrase.pause_mora = Mora(
+                    text="、",
+                    consonant=None,
+                    consonant_length=None,
+                    vowel="pau",
+                    vowel_length=0,
+                    pitch=0,
+                )
+            parsed_results.append(accent_phrase)
+    return parsed_results
+
+
+def create_kana(accent_phrases: List[AccentPhrase]) -> str:
+    text = ""
+    for i, phrase in enumerate(accent_phrases):
+        for j, mora in enumerate(phrase.moras):
+            if mora.vowel in ["A", "I", "U", "E", "O"]:
+                text += UNVOICE_SYMBOL
+            text += mora.text
+            if j + 1 == phrase.accent:
+                text += ACCENT_SYMBOL
+        if i < len(accent_phrases) - 1:
+            if phrase.pause_mora is None:
+                text += NOPAUSE_DELIMITER
+            else:
+                text += PAUSE_DELIMITER
+    return text
diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
index 0cf41721d..c94209056 100644
--- a/voicevox_engine/model.py
+++ b/voicevox_engine/model.py
@@ -1,4 +1,5 @@
-from typing import List, Optional
+from enum import Enum
+from typing import Dict, List, Optional
 
 from pydantic import BaseModel, Field
 
@@ -40,6 +41,43 @@ class AudioQuery(BaseModel):
     postPhonemeLength: float = Field(title="音声の後の無音時間")
     outputSamplingRate: int = Field(title="音声データの出力サンプリングレート")
     outputStereo: bool = Field(title="音声データをステレオ出力するか否か")
+    kana: Optional[str] = Field(title="[読み取り専用]AquesTalkライクな読み仮名。音声合成クエリとしては無視される")
+
+
+class ParseKanaErrorCode(Enum):
+    UNKNOWN_TEXT = "判別できない読み仮名があります: {text}"
+    ACCENT_TOP = "句頭にアクセントは置けません: {text}"
+    ACCENT_TWICE = "1つのアクセント句に二つ以上のアクセントは置けません: {text}"
+    ACCENT_NOTFOUND = "アクセントを指定していないアクセント句があります: {text}"
+    EMPTY_PHRASE = "{position}番目のアクセント句が空白です"
+    INFINITE_LOOP = "処理時に無限ループになってしまいました...バグ報告をお願いします。"
+
+
+class ParseKanaError(Exception):
+    def __init__(self, errcode: ParseKanaErrorCode, **kwargs):
+        self.errcode = errcode
+        self.errname = errcode.name
+        self.kwargs: Dict[str, str] = kwargs
+        err_fmt: str = errcode.value
+        self.text = err_fmt.format(**kwargs)
+
+
+class ParseKanaBadRequest(BaseModel):
+    text: str = Field(title="エラーメッセージ")
+    error_name: str = Field(
+        title="エラー名",
+        description="|name|description|\n|---|---|\n"
+        + "\n".join(
+            [
+                "| {} | {} |".format(err.name, err.value)
+                for err in list(ParseKanaErrorCode)
+            ]
+        ),
+    )
+    error_args: Dict[str, str] = Field(title="エラーを起こした箇所")
+
+    def __init__(self, err: ParseKanaError):
+        super().__init__(text=err.text, error_name=err.errname, error_args=err.kwargs)
 
 
 class Speaker(BaseModel):
diff --git a/voicevox_engine/mora_list.py b/voicevox_engine/mora_list.py
index af7da55d9..5a49f4a3a 100644
--- a/voicevox_engine/mora_list.py
+++ b/voicevox_engine/mora_list.py
@@ -41,171 +41,178 @@
 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 """
+_mora_list_minimum = [
+    ["ヴォ", "v", "o"],
+    ["ヴェ", "v", "e"],
+    ["ヴィ", "v", "i"],
+    ["ヴァ", "v", "a"],
+    ["ヴ", "v", "u"],
+    ["ン", "", "N"],
+    ["ワ", "w", "a"],
+    ["ロ", "r", "o"],
+    ["レ", "r", "e"],
+    ["ル", "r", "u"],
+    ["リョ", "ry", "o"],
+    ["リュ", "ry", "u"],
+    ["リャ", "ry", "a"],
+    ["リェ", "ry", "e"],
+    ["リ", "r", "i"],
+    ["ラ", "r", "a"],
+    ["ヨ", "y", "o"],
+    ["ユ", "y", "u"],
+    ["ヤ", "y", "a"],
+    ["モ", "m", "o"],
+    ["メ", "m", "e"],
+    ["ム", "m", "u"],
+    ["ミョ", "my", "o"],
+    ["ミュ", "my", "u"],
+    ["ミャ", "my", "a"],
+    ["ミェ", "my", "e"],
+    ["ミ", "m", "i"],
+    ["マ", "m", "a"],
+    ["ポ", "p", "o"],
+    ["ボ", "b", "o"],
+    ["ホ", "h", "o"],
+    ["ペ", "p", "e"],
+    ["ベ", "b", "e"],
+    ["ヘ", "h", "e"],
+    ["プ", "p", "u"],
+    ["ブ", "b", "u"],
+    ["フォ", "f", "o"],
+    ["フェ", "f", "e"],
+    ["フィ", "f", "i"],
+    ["ファ", "f", "a"],
+    ["フ", "f", "u"],
+    ["ピョ", "py", "o"],
+    ["ピュ", "py", "u"],
+    ["ピャ", "py", "a"],
+    ["ピェ", "py", "e"],
+    ["ピ", "p", "i"],
+    ["ビョ", "by", "o"],
+    ["ビュ", "by", "u"],
+    ["ビャ", "by", "a"],
+    ["ビェ", "by", "e"],
+    ["ビ", "b", "i"],
+    ["ヒョ", "hy", "o"],
+    ["ヒュ", "hy", "u"],
+    ["ヒャ", "hy", "a"],
+    ["ヒェ", "hy", "e"],
+    ["ヒ", "h", "i"],
+    ["パ", "p", "a"],
+    ["バ", "b", "a"],
+    ["ハ", "h", "a"],
+    ["ノ", "n", "o"],
+    ["ネ", "n", "e"],
+    ["ヌ", "n", "u"],
+    ["ニョ", "ny", "o"],
+    ["ニュ", "ny", "u"],
+    ["ニャ", "ny", "a"],
+    ["ニェ", "ny", "e"],
+    ["ニ", "n", "i"],
+    ["ナ", "n", "a"],
+    ["ドゥ", "d", "u"],
+    ["ド", "d", "o"],
+    ["トゥ", "t", "u"],
+    ["ト", "t", "o"],
+    ["デョ", "dy", "o"],
+    ["デュ", "dy", "u"],
+    ["デャ", "dy", "a"],
+    ["デェ", "dy", "e"],
+    ["ディ", "d", "i"],
+    ["デ", "d", "e"],
+    ["テョ", "ty", "o"],
+    ["テュ", "ty", "u"],
+    ["テャ", "ty", "a"],
+    ["ティ", "t", "i"],
+    ["テ", "t", "e"],
+    ["ツォ", "ts", "o"],
+    ["ツェ", "ts", "e"],
+    ["ツィ", "ts", "i"],
+    ["ツァ", "ts", "a"],
+    ["ツ", "ts", "u"],
+    ["ッ", "", "cl"],
+    ["チョ", "ch", "o"],
+    ["チュ", "ch", "u"],
+    ["チャ", "ch", "a"],
+    ["チェ", "ch", "e"],
+    ["チ", "ch", "i"],
+    ["ダ", "d", "a"],
+    ["タ", "t", "a"],
+    ["ゾ", "z", "o"],
+    ["ソ", "s", "o"],
+    ["ゼ", "z", "e"],
+    ["セ", "s", "e"],
+    ["ズィ", "z", "i"],
+    ["ズ", "z", "u"],
+    ["スィ", "s", "i"],
+    ["ス", "s", "u"],
+    ["ジョ", "j", "o"],
+    ["ジュ", "j", "u"],
+    ["ジャ", "j", "a"],
+    ["ジェ", "j", "e"],
+    ["ジ", "j", "i"],
+    ["ショ", "sh", "o"],
+    ["シュ", "sh", "u"],
+    ["シャ", "sh", "a"],
+    ["シェ", "sh", "e"],
+    ["シ", "sh", "i"],
+    ["ザ", "z", "a"],
+    ["サ", "s", "a"],
+    ["ゴ", "g", "o"],
+    ["コ", "k", "o"],
+    ["ゲ", "g", "e"],
+    ["ケ", "k", "e"],
+    ["グヮ", "gw", "a"],
+    ["グ", "g", "u"],
+    ["クヮ", "kw", "a"],
+    ["ク", "k", "u"],
+    ["ギョ", "gy", "o"],
+    ["ギュ", "gy", "u"],
+    ["ギャ", "gy", "a"],
+    ["ギェ", "gy", "e"],
+    ["ギ", "g", "i"],
+    ["キョ", "ky", "o"],
+    ["キュ", "ky", "u"],
+    ["キャ", "ky", "a"],
+    ["キェ", "ky", "e"],
+    ["キ", "k", "i"],
+    ["ガ", "g", "a"],
+    ["カ", "k", "a"],
+    ["オ", "", "o"],
+    ["エ", "", "e"],
+    ["ウォ", "w", "o"],
+    ["ウェ", "w", "e"],
+    ["ウィ", "w", "i"],
+    ["ウ", "", "u"],
+    ["イェ", "y", "e"],
+    ["イ", "", "i"],
+    ["ア", "", "a"],
+]
+_mora_list_additional = [
+    ["ヴョ", "by", "o"],
+    ["ヴュ", "by", "u"],
+    ["ヴャ", "by", "a"],
+    ["ヲ", "", "o"],
+    ["ヱ", "", "e"],
+    ["ヰ", "", "i"],
+    ["ヮ", "w", "a"],
+    ["ョ", "y", "o"],
+    ["ュ", "y", "u"],
+    ["ヅ", "z", "u"],
+    ["ヂ", "j", "i"],
+    ["ヶ", "k", "e"],
+    ["ャ", "y", "a"],
+    ["ォ", "", "o"],
+    ["ェ", "", "e"],
+    ["ゥ", "", "u"],
+    ["ィ", "", "i"],
+    ["ァ", "", "a"],
+]
+
 openjtalk_mora2text = {
-    consonant + vowel: text
-    for [text, consonant, vowel] in [
-        # ["ヴョ", "by", "o"],
-        # ["ヴュ", "by", "u"],
-        # ["ヴャ", "by", "a"],
-        ["ヴォ", "v", "o"],
-        ["ヴェ", "v", "e"],
-        ["ヴィ", "v", "i"],
-        ["ヴァ", "v", "a"],
-        ["ヴ", "v", "u"],
-        ["ン", "", "N"],
-        # ["ヲ", "", "o"],
-        # ["ヱ", "", "e"],
-        # ["ヰ", "", "i"],
-        ["ワ", "w", "a"],
-        # ["ヮ", "w", "a"],
-        ["ロ", "r", "o"],
-        ["レ", "r", "e"],
-        ["ル", "r", "u"],
-        ["リョ", "ry", "o"],
-        ["リュ", "ry", "u"],
-        ["リャ", "ry", "a"],
-        ["リェ", "ry", "e"],
-        ["リ", "r", "i"],
-        ["ラ", "r", "a"],
-        ["ヨ", "y", "o"],
-        # ["ョ", "y", "o"],
-        ["ユ", "y", "u"],
-        # ["ュ", "y", "u"],
-        ["ヤ", "y", "a"],
-        # ["ャ", "y", "a"],
-        ["モ", "m", "o"],
-        ["メ", "m", "e"],
-        ["ム", "m", "u"],
-        ["ミョ", "my", "o"],
-        ["ミュ", "my", "u"],
-        ["ミャ", "my", "a"],
-        ["ミェ", "my", "e"],
-        ["ミ", "m", "i"],
-        ["マ", "m", "a"],
-        ["ポ", "p", "o"],
-        ["ボ", "b", "o"],
-        ["ホ", "h", "o"],
-        ["ペ", "p", "e"],
-        ["ベ", "b", "e"],
-        ["ヘ", "h", "e"],
-        ["プ", "p", "u"],
-        ["ブ", "b", "u"],
-        ["フォ", "f", "o"],
-        ["フェ", "f", "e"],
-        ["フィ", "f", "i"],
-        ["ファ", "f", "a"],
-        ["フ", "f", "u"],
-        ["ピョ", "py", "o"],
-        ["ピュ", "py", "u"],
-        ["ピャ", "py", "a"],
-        ["ピェ", "py", "e"],
-        ["ピ", "p", "i"],
-        ["ビョ", "by", "o"],
-        ["ビュ", "by", "u"],
-        ["ビャ", "by", "a"],
-        ["ビェ", "by", "e"],
-        ["ビ", "b", "i"],
-        ["ヒョ", "hy", "o"],
-        ["ヒュ", "hy", "u"],
-        ["ヒャ", "hy", "a"],
-        ["ヒェ", "hy", "e"],
-        ["ヒ", "h", "i"],
-        ["パ", "p", "a"],
-        ["バ", "b", "a"],
-        ["ハ", "h", "a"],
-        ["ノ", "n", "o"],
-        ["ネ", "n", "e"],
-        ["ヌ", "n", "u"],
-        ["ニョ", "ny", "o"],
-        ["ニュ", "ny", "u"],
-        ["ニャ", "ny", "a"],
-        ["ニェ", "ny", "e"],
-        ["ニ", "n", "i"],
-        ["ナ", "n", "a"],
-        ["ドゥ", "d", "u"],
-        ["ド", "d", "o"],
-        ["トゥ", "t", "u"],
-        ["ト", "t", "o"],
-        ["デョ", "dy", "o"],
-        ["デュ", "dy", "u"],
-        ["デャ", "dy", "a"],
-        ["デェ", "dy", "e"],
-        ["ディ", "d", "i"],
-        ["デ", "d", "e"],
-        ["テョ", "ty", "o"],
-        ["テュ", "ty", "u"],
-        ["テャ", "ty", "a"],
-        ["ティ", "t", "i"],
-        ["テ", "t", "e"],
-        # ["ヅ", "z", "u"],
-        ["ツォ", "ts", "o"],
-        ["ツェ", "ts", "e"],
-        ["ツィ", "ts", "i"],
-        ["ツァ", "ts", "a"],
-        ["ツ", "ts", "u"],
-        ["ッ", "", "cl"],
-        # ["ヂ", "j", "i"],
-        ["チョ", "ch", "o"],
-        ["チュ", "ch", "u"],
-        ["チャ", "ch", "a"],
-        ["チェ", "ch", "e"],
-        ["チ", "ch", "i"],
-        ["ダ", "d", "a"],
-        ["タ", "t", "a"],
-        ["ゾ", "z", "o"],
-        ["ソ", "s", "o"],
-        ["ゼ", "z", "e"],
-        ["セ", "s", "e"],
-        ["ズィ", "z", "i"],
-        ["ズ", "z", "u"],
-        ["スィ", "s", "i"],
-        ["ス", "s", "u"],
-        ["ジョ", "j", "o"],
-        ["ジュ", "j", "u"],
-        ["ジャ", "j", "a"],
-        ["ジェ", "j", "e"],
-        ["ジ", "j", "i"],
-        ["ショ", "sh", "o"],
-        ["シュ", "sh", "u"],
-        ["シャ", "sh", "a"],
-        ["シェ", "sh", "e"],
-        ["シ", "sh", "i"],
-        ["ザ", "z", "a"],
-        ["サ", "s", "a"],
-        ["ゴ", "g", "o"],
-        ["コ", "k", "o"],
-        ["ゲ", "g", "e"],
-        ["ケ", "k", "e"],
-        # ["ヶ", "k", "e"],
-        ["グヮ", "gw", "a"],
-        ["グ", "g", "u"],
-        ["クヮ", "kw", "a"],
-        ["ク", "k", "u"],
-        ["ギョ", "gy", "o"],
-        ["ギュ", "gy", "u"],
-        ["ギャ", "gy", "a"],
-        ["ギェ", "gy", "e"],
-        ["ギ", "g", "i"],
-        ["キョ", "ky", "o"],
-        ["キュ", "ky", "u"],
-        ["キャ", "ky", "a"],
-        ["キェ", "ky", "e"],
-        ["キ", "k", "i"],
-        ["ガ", "g", "a"],
-        ["カ", "k", "a"],
-        ["オ", "", "o"],
-        # ["ォ", "", "o"],
-        ["エ", "", "e"],
-        # ["ェ", "", "e"],
-        ["ウォ", "w", "o"],
-        ["ウェ", "w", "e"],
-        ["ウィ", "w", "i"],
-        ["ウ", "", "u"],
-        # ["ゥ", "", "u"],
-        ["イェ", "y", "e"],
-        ["イ", "", "i"],
-        # ["ィ", "", "i"],
-        ["ア", "", "a"],
-        # ["ァ", "", "a"],
-    ]
+    consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum
+}
+openjtalk_text2mora = {
+    text: (consonant, vowel)
+    for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional
 }

From 5ed623e5edc6b714c80c9e91fcf66074d826d93a Mon Sep 17 00:00:00 2001
From: ISHIDA Naoto <naoto@isnot.jp>
Date: Fri, 10 Sep 2021 00:43:25 +0900
Subject: [PATCH 5/7] =?UTF-8?q?[Mock]=20SynthesisEngine=E3=81=AE=E7=8E=87?=
 =?UTF-8?q?=E7=9B=B4=E3=81=AAMock=20ref.=20#27=20(#78)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: add Mock class of SynthesisEngine w/ pyopenjtalk

* update: use Mock class for SynthesisEngine if import core was failure.

* fix: add new line for pass ci/lint

* fix: type annotation. uses numpy.typing

* fix: boolean value was reversed
---
 run.py                                        |  19 ++-
 .../dev/synthesis_engine/__init__.py          |   3 +
 voicevox_engine/dev/synthesis_engine/mock.py  | 128 ++++++++++++++++++
 3 files changed, 146 insertions(+), 4 deletions(-)
 create mode 100644 voicevox_engine/dev/synthesis_engine/__init__.py
 create mode 100644 voicevox_engine/dev/synthesis_engine/mock.py

diff --git a/run.py b/run.py
index bcbd7b512..4b13f78d4 100644
--- a/run.py
+++ b/run.py
@@ -52,11 +52,14 @@ def make_synthesis_engine(
         if voicevox_dir.exists():
             sys.path.insert(0, str(voicevox_dir))
 
+    has_voicevox_core = True
     try:
         import core
     except ImportError:
         from voicevox_engine.dev import core
 
+        has_voicevox_core = False
+
         # 音声ライブラリの Python モジュールをロードできなかった
         print(
             "Notice: mock-library will be used. Try re-run with valid --voicevox_dir",  # noqa
@@ -68,12 +71,20 @@ def make_synthesis_engine(
 
     core.initialize(voicelib_dir.as_posix() + "/", use_gpu)
 
-    return SynthesisEngine(
-        yukarin_s_forwarder=core.yukarin_s_forward,
-        yukarin_sa_forwarder=core.yukarin_sa_forward,
-        decode_forwarder=core.decode_forward,
+    if has_voicevox_core:
+        return SynthesisEngine(
+            yukarin_s_forwarder=core.yukarin_s_forward,
+            yukarin_sa_forwarder=core.yukarin_sa_forward,
+            decode_forwarder=core.decode_forward,
+        )
+
+    from voicevox_engine.dev.synthesis_engine import (
+        SynthesisEngine as mock_synthesis_engine,
     )
 
+    # モックで置き換える
+    return mock_synthesis_engine()
+
 
 def mora_to_text(mora: str):
     if mora[-1:] in ["A", "I", "U", "E", "O"]:
diff --git a/voicevox_engine/dev/synthesis_engine/__init__.py b/voicevox_engine/dev/synthesis_engine/__init__.py
new file mode 100644
index 000000000..373ee86de
--- /dev/null
+++ b/voicevox_engine/dev/synthesis_engine/__init__.py
@@ -0,0 +1,3 @@
+from .mock import SynthesisEngine
+
+__all__ = ["SynthesisEngine"]
diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
new file mode 100644
index 000000000..d8b6a3449
--- /dev/null
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -0,0 +1,128 @@
+from logging import getLogger
+from typing import Any, Dict, List
+
+import numpy as np
+import numpy.typing as npt
+from pyopenjtalk import tts
+from resampy import resample
+
+from voicevox_engine.model import AccentPhrase, AudioQuery
+from voicevox_engine.synthesis_engine import to_flatten_moras
+
+
+class SynthesisEngine:
+    """
+    SynthesisEngine [Mock]
+    """
+
+    def __init__(self, **kwargs):
+        """
+        __init__ [Mock]
+        """
+        super().__init__()
+
+    def replace_phoneme_length(
+        self, accent_phrases: List[AccentPhrase], speaker_id: int
+    ) -> List[AccentPhrase]:
+        """
+        replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock]
+
+        Parameters
+        ----------
+        accent_phrases : List[AccentPhrase]
+            フレーズ句のリスト
+        speaker_id : int
+            話者
+
+        Returns
+        -------
+        List[AccentPhrase]
+            フレーズ句のリスト（変更なし）
+        """
+        return accent_phrases
+
+    def replace_mora_pitch(
+        self, accent_phrases: List[AccentPhrase], speaker_id: int
+    ) -> List[AccentPhrase]:
+        """
+        replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock]
+
+        Parameters
+        ----------
+        accent_phrases : List[AccentPhrase]
+            フレーズ句のリスト
+        speaker_id : int
+            話者
+
+        Returns
+        -------
+        List[AccentPhrase]
+            フレーズ句のリスト（変更なし）
+        """
+        return accent_phrases
+
+    def synthesis(self, query: AudioQuery, speaker_id: int) -> npt.NDArray[np.int16]:
+        """
+        synthesis voicevox coreを使わずに、音声合成する [Mock]
+
+        Parameters
+        ----------
+        query : AudioQuery
+            /audio_query APIで得たjson
+        speaker_id : int
+            話者
+
+        Returns
+        -------
+        wave [npt.NDArray[np.int16]]
+            音声波形データをNumPy配列で返します
+        """
+        # recall text in katakana
+        flatten_moras = to_flatten_moras(query.accent_phrases)
+        kana_text = "".join([mora.text for mora in flatten_moras])
+
+        wave = self.forward(kana_text)
+
+        # volume
+        if query.volumeScale != 1:
+            wave *= query.volumeScale
+
+        return wave.astype("int16")
+
+    def forward(self, text: str, **kwargs: Dict[str, Any]) -> npt.NDArray[np.int16]:
+        """
+        forward tts via pyopenjtalk.tts()
+        参照→SynthesisEngine のdocstring [Mock]
+
+        Parameters
+        ----------
+        text : str
+            入力文字列（例：読み上げたい文章をカタカナにした文字列、等）
+
+        Returns
+        -------
+        wave [npt.NDArray[np.int16]]
+            音声波形データをNumPy配列で返します
+
+        Note
+        -------
+        ここで行う音声合成では、調声（ピッチ等）を反映しない
+
+        # pyopenjtalk.tts()の出力仕様
+        dtype=np.float64, 16 bit, mono 48000 Hz
+
+        # resampleの説明
+        本来はfloat64の入力でも問題ないのかと思われたが、実際には出力が音割れひどかった。
+        対策として、あらかじめint16に型変換しておくと、期待通りの結果になった。
+        非モック実装（decode_forward）と合わせるために、出力を24kHzに変換した。
+        """
+        logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
+        logger.info("[Mock] input text: %s" % text)
+        wave, sr = tts(text)
+        wave = resample(
+            wave.astype("int16"),
+            sr,
+            24000,
+            filter="kaiser_fast",
+        )
+        return wave.astype("int16")

From f3f9f91c3068bbbba0a194f5783ed9998dd0e374 Mon Sep 17 00:00:00 2001
From: buckw6eat <mistlain@gmail.com>
Date: Sat, 11 Sep 2021 00:50:59 +0900
Subject: [PATCH 6/7] fix: align numpy 1.20 syntax (#81)

---
 voicevox_engine/dev/synthesis_engine/mock.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py
index d8b6a3449..801d5b348 100644
--- a/voicevox_engine/dev/synthesis_engine/mock.py
+++ b/voicevox_engine/dev/synthesis_engine/mock.py
@@ -2,7 +2,6 @@
 from typing import Any, Dict, List
 
 import numpy as np
-import numpy.typing as npt
 from pyopenjtalk import tts
 from resampy import resample
 
@@ -61,7 +60,7 @@ def replace_mora_pitch(
         """
         return accent_phrases
 
-    def synthesis(self, query: AudioQuery, speaker_id: int) -> npt.NDArray[np.int16]:
+    def synthesis(self, query: AudioQuery, speaker_id: int) -> np.ndarray:
         """
         synthesis voicevox coreを使わずに、音声合成する [Mock]
 
@@ -89,7 +88,7 @@ def synthesis(self, query: AudioQuery, speaker_id: int) -> npt.NDArray[np.int16]
 
         return wave.astype("int16")
 
-    def forward(self, text: str, **kwargs: Dict[str, Any]) -> npt.NDArray[np.int16]:
+    def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
         """
         forward tts via pyopenjtalk.tts()
         参照→SynthesisEngine のdocstring [Mock]

From 264aff64cfdf4cf91f51972685a080df0f1271c9 Mon Sep 17 00:00:00 2001
From: buckw6eat <mistlain@gmail.com>
Date: Sat, 11 Sep 2021 01:09:31 +0900
Subject: [PATCH 7/7] =?UTF-8?q?=E9=9F=B3=E7=B4=A0=E5=8C=96=E3=81=95?=
 =?UTF-8?q?=E3=82=8C=E3=81=AA=E3=81=84=E3=83=86=E3=82=AD=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E5=85=A5=E5=8A=9B=E3=81=AB=E5=AF=BE=E5=BF=9C=20(#82)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: make sure `utterance` isn't empty

* Apply suggestions from code review

テスト

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 run.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/run.py b/run.py
index 4b13f78d4..cf59c1bfd 100644
--- a/run.py
+++ b/run.py
@@ -130,6 +130,9 @@ def create_accent_phrases(text: str, speaker_id: int) -> List[AccentPhrase]:
             return []
 
         utterance = extract_full_context_label(text)
+        if len(utterance.breath_groups) == 0:
+            return []
+
         return replace_mora_data(
             accent_phrases=[
                 AccentPhrase(