diff --git a/.gitattributes b/.gitattributes index 537137ab5..c0428316d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,5 @@ +test/**/__snapshots__/**/*.json linguist-generated=true + * text=auto *.png -text -*.wav -text \ No newline at end of file +*.wav -text diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index f146f3bff..8ce46a1c0 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -32,7 +32,7 @@ jobs: id: vars run: | : # releaseタグ名か、workflow_dispatchでのバージョン名か、latestが入る - echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> $GITHUB_OUTPUT + echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> "$GITHUB_OUTPUT" build-docker: needs: [config] diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2084ff6b6..470a14ac2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,9 +44,9 @@ jobs: id: vars run: | : # release タグ名, または workflow_dispatch でのバージョン名. リリースでない (push event) 場合は空文字列 - echo "version=${{ github.event.release.tag_name || github.event.inputs.version }}" >> $GITHUB_OUTPUT + echo "version=${{ github.event.release.tag_name || github.event.inputs.version }}" >> "$GITHUB_OUTPUT" : # release タグ名, または workflow_dispatch でのバージョン名, または 'latest' - echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> $GITHUB_OUTPUT + echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> "$GITHUB_OUTPUT" build-and-upload: needs: [config] @@ -108,7 +108,7 @@ jobs: - name: declare variables id: vars run: | - echo "package_name=voicevox_engine-${{ matrix.target }}-${{ needs.config.outputs.version }}" >> $GITHUB_OUTPUT + echo "package_name=voicevox_engine-${{ matrix.target }}-${{ needs.config.outputs.version }}" >> "$GITHUB_OUTPUT" - uses: actions/checkout@v4 diff --git a/.github/workflows/release-test-docker.yml b/.github/workflows/release-test-docker.yml index da73a3a8f..d30337798 100644 --- a/.github/workflows/release-test-docker.yml +++ b/.github/workflows/release-test-docker.yml @@ -58,9 +58,9 @@ jobs: id: docker_vars run: | if [ "${{ matrix.tag }}" != "" ]; then - echo "image_tag=${{ env.IMAGE_NAME }}:${{ matrix.tag }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT + echo "image_tag=${{ env.IMAGE_NAME }}:${{ matrix.tag }}-${{ env.VERSION }}" >> "$GITHUB_OUTPUT" else - echo "image_tag=${{ env.IMAGE_NAME }}:${{ env.VERSION }}" >> $GITHUB_OUTPUT + echo "image_tag=${{ env.IMAGE_NAME }}:${{ env.VERSION }}" >> "$GITHUB_OUTPUT" fi - name: Docker pull @@ -81,14 +81,14 @@ jobs: max_attempts=10 sleep_interval=5 - for i in $(seq 1 $max_attempts); do - status=$(curl -o /dev/null -s -w '%{http_code}\n' $url) - if [ $status -eq 200 ]; then - echo "Container is ready! Response status code: $status" + for i in $(seq 1 "$max_attempts"); do + status=$(curl -o /dev/null -s -w '%{http_code}\n' "$url") + if [ "$status" -eq 200 ]; then + echo "Container is ready! Response status code: ${status}" exit 0 else - echo "Attempt $i/$max_attempts: Response status code $status" - sleep $sleep_interval + echo "Attempt ${i}/${max_attempts}: Response status code $status" + sleep "${sleep_interval}" fi done exit 1 diff --git a/.github/workflows/release-test.yml b/.github/workflows/release-test.yml index d5995ae18..ec680ab10 100644 --- a/.github/workflows/release-test.yml +++ b/.github/workflows/release-test.yml @@ -56,8 +56,8 @@ jobs: - name: declare variables id: vars run: | - echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT - echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT + echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> "$GITHUB_OUTPUT" + echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> "$GITHUB_OUTPUT" - uses: actions/checkout@v4 @@ -72,7 +72,7 @@ jobs: curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt" cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%" 7z x "download/$(head -n1 download/list.txt)" - mv ${{ matrix.target }} dist/ + mv "${{ matrix.target }}" dist/ - name: chmod +x if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos') diff --git a/build_util/create_venv_and_generate_licenses.bash b/build_util/create_venv_and_generate_licenses.bash index 71a5f61c9..fc9dd0dc5 100644 --- a/build_util/create_venv_and_generate_licenses.bash +++ b/build_util/create_venv_and_generate_licenses.bash @@ -17,7 +17,7 @@ else fi pip install -r requirements-license.txt -python build_util/generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH +python build_util/generate_licenses.py > "${OUTPUT_LICENSE_JSON_PATH}" deactivate diff --git a/build_util/process_voicevox_resource.bash b/build_util/process_voicevox_resource.bash index 7bd1d31f9..c085dfeee 100644 --- a/build_util/process_voicevox_resource.bash +++ b/build_util/process_voicevox_resource.bash @@ -6,22 +6,22 @@ if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then fi rm -r speaker_info -cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info +cp -r "${DOWNLOAD_RESOURCE_PATH}/character_info" speaker_info # キャラクター情報の前処理をする -python $DOWNLOAD_RESOURCE_PATH/scripts/clean_character_info.py \ +python "${DOWNLOAD_RESOURCE_PATH}/scripts/clean_character_info.py" \ --character_info_dir speaker_info/ # マニフェスト -jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \ +jq -s '.[0] * .[1]' engine_manifest.json "${DOWNLOAD_RESOURCE_PATH}/engine/engine_manifest.json" \ > engine_manifest.json.tmp mv engine_manifest.json.tmp engine_manifest.json python build_util/merge_update_infos.py \ engine_manifest_assets/update_infos.json \ - $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \ + "${DOWNLOAD_RESOURCE_PATH}/engine/engine_manifest_assets/update_infos.json" \ engine_manifest_assets/update_infos.json for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do - cp $f ./engine_manifest_assets/ + cp "${f}" ./engine_manifest_assets/ done diff --git a/run.py b/run.py index 15d18783a..9037d2d85 100644 --- a/run.py +++ b/run.py @@ -550,7 +550,7 @@ def morphable_targets( 指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。 モーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。 プロパティが存在しない場合は、モーフィングが許可されているとみなします。 - 返り値の話者はstring型なので注意。 + 返り値のスタイルIDはstring型なので注意。 """ core = get_core(core_version) @@ -605,7 +605,7 @@ def _synthesis_morphing( if not is_permitted: raise HTTPException( status_code=400, - detail="指定された話者ペアでのモーフィングはできません", + detail="指定されたスタイルペアでのモーフィングはできません", ) except StyleIdNotFoundError as e: raise HTTPException( diff --git a/test/conftest.py b/test/conftest.py index dd7920d24..e354d3809 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -10,7 +10,7 @@ def snapshot_json(snapshot: SnapshotAssertion) -> SnapshotAssertion: Examples -------- - >>> def test_foo(snapshot_json: JSONSnapshotExtension): + >>> def test_foo(snapshot_json: SnapshotAssertion): >>> assert snapshot_json == {"key": "value"} """ return snapshot.use_extension(JSONSnapshotExtension) diff --git "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" index 6a173a16e..f766359ce 100644 --- "a/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" +++ "b/test/e2e/__snapshots__/test_openapi/test_OpenAPI\343\201\256\345\275\242\343\201\214\345\244\211\343\202\217\343\201\243\343\201\246\343\201\204\343\201\252\343\201\204\343\201\223\343\201\250\343\202\222\347\242\272\350\252\215.json" @@ -2054,7 +2054,7 @@ }, "/morphable_targets": { "post": { - "description": "指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。\nモーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。\nプロパティが存在しない場合は、モーフィングが許可されているとみなします。\n返り値の話者はstring型なので注意。", + "description": "指定されたベーススタイルに対してエンジン内の各話者がモーフィング機能を利用可能か返します。\nモーフィングの許可/禁止は`/speakers`の`speaker.supported_features.synthesis_morphing`に記載されています。\nプロパティが存在しない場合は、モーフィングが許可されているとみなします。\n返り値のスタイルIDはstring型なので注意。", "operationId": "morphable_targets_morphable_targets_post", "parameters": [ { diff --git "a/test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" new file mode 100644 index 000000000..07e2707b8 --- /dev/null +++ "b/test/e2e/__snapshots__/test_preset/test_\343\203\227\343\203\252\343\202\273\343\203\203\343\203\210\344\270\200\350\246\247\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" @@ -0,0 +1,14 @@ +[ + { + "id": 1, + "intonationScale": 1.0, + "name": "サンプルプリセット", + "pitchScale": 0.0, + "postPhonemeLength": 0.1, + "prePhonemeLength": 0.1, + "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", + "speedScale": 1.0, + "style_id": 0, + "volumeScale": 1.0 + } +] diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" new file mode 100644 index 000000000..9c95c48d5 --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" @@ -0,0 +1,11 @@ +{ + "policy": "dummy2 policy\n\nhttps://voicevox.hiroshiba.jp/\n", + "style_infos": [ + { + "id": 5 + }, + { + "id": 7 + } + ] +} diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" new file mode 100644 index 000000000..de6120e14 --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" @@ -0,0 +1,11 @@ +{ + "policy": "dummy1 policy\n\nhttps://voicevox.hiroshiba.jp/\n", + "style_infos": [ + { + "id": 4 + }, + { + "id": 6 + } + ] +} diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" new file mode 100644 index 000000000..73bb6af62 --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[b1a81618-b27b-40d2-b0ea-27a9ad408c4b].json" @@ -0,0 +1,8 @@ +{ + "policy": "dummy4 policy\n\nhttps://voicevox.hiroshiba.jp/\n", + "style_infos": [ + { + "id": 9 + } + ] +} diff --git "a/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" new file mode 100644 index 000000000..e421371b7 --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\346\255\214\346\211\213\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" @@ -0,0 +1,57 @@ +[ + { + "name": "dummy1", + "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", + "styles": [ + { + "id": 4, + "name": "style2", + "type": "frame_decode" + }, + { + "id": 6, + "name": "style3", + "type": "frame_decode" + } + ], + "supported_features": { + "permitted_synthesis_morphing": "ALL" + }, + "version": "mock" + }, + { + "name": "dummy2", + "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9", + "styles": [ + { + "id": 5, + "name": "style2", + "type": "frame_decode" + }, + { + "id": 7, + "name": "style3", + "type": "sing" + } + ], + "supported_features": { + "permitted_synthesis_morphing": "SELF_ONLY" + }, + "version": "mock" + }, + { + "name": "dummy4", + "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b", + "styles": [ + { + "id": 9, + "name": "style0", + "type": "sing" + } + ], + "supported_features": { + "permitted_synthesis_morphing": "ALL" + }, + "version": "mock" + } +] diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json" new file mode 100644 index 000000000..236cc8d5a --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[35b2c544-660e-401e-b503-0e14c635303a].json" @@ -0,0 +1,8 @@ +{ + "policy": "dummy3 policy\n\nhttps://voicevox.hiroshiba.jp/\n", + "style_infos": [ + { + "id": 8 + } + ] +} diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" new file mode 100644 index 000000000..e7f286678 --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[388f246b-8c41-4ac1-8e2d-5d79f3ff56d9].json" @@ -0,0 +1,11 @@ +{ + "policy": "dummy2 policy\n\nhttps://voicevox.hiroshiba.jp/\n", + "style_infos": [ + { + "id": 1 + }, + { + "id": 3 + } + ] +} diff --git "a/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" new file mode 100644 index 000000000..b5566c22b --- /dev/null +++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\343\201\256\346\203\205\345\240\261\343\202\222\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213[7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff].json" @@ -0,0 +1,11 @@ +{ + "policy": "dummy1 policy\n\nhttps://voicevox.hiroshiba.jp/\n", + "style_infos": [ + { + "id": 0 + }, + { + "id": 2 + } + ] +} diff --git a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" similarity index 62% rename from test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json rename to "test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" index fba38bc61..bf6d37e51 100644 --- a/test/e2e/__snapshots__/test_validate_speakers/test_fetch_speakers_success.json +++ "b/test/e2e/__snapshots__/test_speakers/test_\350\251\261\350\200\205\344\270\200\350\246\247\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" @@ -12,16 +12,6 @@ "id": 2, "name": "style1", "type": "talk" - }, - { - "id": 4, - "name": "style2", - "type": "talk" - }, - { - "id": 6, - "name": "style3", - "type": "talk" } ], "supported_features": { @@ -42,16 +32,6 @@ "id": 3, "name": "style1", "type": "talk" - }, - { - "id": 5, - "name": "style2", - "type": "talk" - }, - { - "id": 7, - "name": "style3", - "type": "talk" } ], "supported_features": { @@ -73,20 +53,5 @@ "permitted_synthesis_morphing": "NOTHING" }, "version": "mock" - }, - { - "name": "dummy4", - "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b", - "styles": [ - { - "id": 9, - "name": "style0", - "type": "talk" - } - ], - "supported_features": { - "permitted_synthesis_morphing": "ALL" - }, - "version": "mock" } ] diff --git a/test/e2e/test_audio_query.py b/test/e2e/test_audio_query.py index 6f8c6fa86..a77db614e 100644 --- a/test/e2e/test_audio_query.py +++ b/test/e2e/test_audio_query.py @@ -5,11 +5,11 @@ from test.utility import round_floats from fastapi.testclient import TestClient -from syrupy.extensions.json import JSONSnapshotExtension +from syrupy.assertion import SnapshotAssertion def test_speakerを指定して音声合成クエリが取得できる( - client: TestClient, snapshot_json: JSONSnapshotExtension + client: TestClient, snapshot_json: SnapshotAssertion ) -> None: response = client.post("/audio_query", params={"text": "テストです", "speaker": 0}) assert response.status_code == 200 diff --git a/test/e2e/test_openapi.py b/test/e2e/test_openapi.py index 005d0fd1e..d26a2b7c5 100644 --- a/test/e2e/test_openapi.py +++ b/test/e2e/test_openapi.py @@ -1,12 +1,10 @@ from typing import Any from fastapi import FastAPI -from syrupy.extensions.json import JSONSnapshotExtension +from syrupy.assertion import SnapshotAssertion -def test_OpenAPIの形が変わっていないことを確認( - app: FastAPI, snapshot_json: JSONSnapshotExtension -) -> None: +def test_OpenAPIの形が変わっていないことを確認(app: FastAPI, snapshot_json: SnapshotAssertion) -> None: # 変更があった場合はREADMEの「スナップショットの更新」の手順で更新可能 openapi: Any = app.openapi() # snapshot_jsonがmypyに対応していないのでワークアラウンド assert snapshot_json == openapi diff --git a/test/e2e/test_preset.py b/test/e2e/test_preset.py new file mode 100644 index 000000000..d1020d07b --- /dev/null +++ b/test/e2e/test_preset.py @@ -0,0 +1,12 @@ +""" +プリセットAPIのテスト +""" + +from fastapi.testclient import TestClient +from syrupy.assertion import SnapshotAssertion + + +def test_プリセット一覧を取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None: + response = client.get("/presets") + assert response.status_code == 200 + assert snapshot_json == response.json() diff --git a/test/e2e/test_speakers.py b/test/e2e/test_speakers.py new file mode 100644 index 000000000..683d57203 --- /dev/null +++ b/test/e2e/test_speakers.py @@ -0,0 +1,57 @@ +""" +話者・歌手のテスト。 +TODO: 話者と歌手の両ドメイン共通のドメイン用語を定め、このテストファイル名を変更する。 +""" + +from fastapi.testclient import TestClient +from pydantic import parse_obj_as +from syrupy import filters +from syrupy.assertion import SnapshotAssertion + +from voicevox_engine.metas.Metas import Speaker + + +def test_話者一覧が取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None: + response = client.get("/speakers") + assert response.status_code == 200 + assert snapshot_json == response.json() + + +def test_話者の情報を取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None: + speakers = parse_obj_as(list[Speaker], client.get("/speakers").json()) + for speaker in speakers: + response = client.get( + "/speaker_info", params={"speaker_uuid": speaker.speaker_uuid} + ) + assert ( + snapshot_json( + name=speaker.speaker_uuid, + exclude=filters.props( + "portrait", "icon", "voice_samples" + ), # バイナリファイル系は除外 FIXME: 除外せずにハッシュ化する + ) + == response.json() + ) + + +def test_歌手一覧が取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None: + response = client.get("/singers") + assert response.status_code == 200 + assert snapshot_json == response.json() + + +def test_歌手の情報を取得できる(client: TestClient, snapshot_json: SnapshotAssertion) -> None: + singers = parse_obj_as(list[Speaker], client.get("/singers").json()) + for singer in singers: + response = client.get( + "/singer_info", params={"speaker_uuid": singer.speaker_uuid} + ) + assert ( + snapshot_json( + name=singer.speaker_uuid, + exclude=filters.props( + "portrait", "icon", "voice_samples" + ), # バイナリファイル系は除外 FIXME: 除外せずにハッシュ化する + ) + == response.json() + ) diff --git a/test/e2e/test_validate_speakers.py b/test/e2e/test_validate_speakers.py deleted file mode 100644 index b93c5f6a6..000000000 --- a/test/e2e/test_validate_speakers.py +++ /dev/null @@ -1,10 +0,0 @@ -from fastapi.testclient import TestClient -from syrupy.extensions.json import JSONSnapshotExtension - - -def test_fetch_speakers_success( - client: TestClient, snapshot_json: JSONSnapshotExtension -) -> None: - response = client.get("/speakers") - assert response.status_code == 200 - assert snapshot_json == response.json() diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json new file mode 100644 index 000000000..ed97c822c --- /dev/null +++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[query].json @@ -0,0 +1,268 @@ +[ + [ + { + "frame_length": 4, + "phoneme": "pau" + }, + { + "frame_length": 6, + "phoneme": "d" + }, + { + "frame_length": 4, + "phoneme": "o" + }, + { + "frame_length": 8, + "phoneme": "r" + }, + { + "frame_length": 13, + "phoneme": "e" + }, + { + "frame_length": 4, + "phoneme": "m" + }, + { + "frame_length": 21, + "phoneme": "i" + }, + { + "frame_length": 3, + "phoneme": "pau" + }, + { + "frame_length": 2, + "phoneme": "f" + }, + { + "frame_length": 6, + "phoneme": "a" + }, + { + "frame_length": 6, + "phoneme": "s" + }, + { + "frame_length": 17, + "phoneme": "o" + }, + { + "frame_length": 10, + "phoneme": "pau" + } + ], + [ + 0.0, + 0.0, + 0.0, + 0.0, + 262.93, + 262.93, + 262.93, + 262.93, + 262.93, + 262.93, + 264.0, + 264.0, + 264.0, + 264.0, + 296.53, + 296.53, + 296.53, + 296.53, + 296.53, + 296.53, + 296.53, + 296.53, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 295.27, + 332.32, + 332.32, + 332.32, + 332.32, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 331.95, + 0.0, + 0.0, + 0.0, + 351.21, + 351.21, + 350.58, + 350.58, + 350.58, + 350.58, + 350.58, + 350.58, + 396.0, + 396.0, + 396.0, + 396.0, + 396.0, + 396.0, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 395.56, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.61, + 0.61, + 0.61, + 0.61, + 0.61, + 0.61, + 1.53, + 1.53, + 1.53, + 1.53, + 1.96, + 1.96, + 1.96, + 1.96, + 1.96, + 1.96, + 1.96, + 1.96, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 0.83, + 1.79, + 1.79, + 1.79, + 1.79, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 1.44, + 0.0, + 0.0, + 0.0, + 1.11, + 1.11, + 0.51, + 0.51, + 0.51, + 0.51, + 0.51, + 0.51, + 3.0, + 3.0, + 3.0, + 3.0, + 3.0, + 3.0, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 2.57, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] +] diff --git a/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json new file mode 100644 index 000000000..3b711372a --- /dev/null +++ b/test/tts_pipeline/__snapshots__/test_tts_engine/test_mocked_synthesize_wave_from_score_output[wave].json @@ -0,0 +1,3995 @@ +[ + [ + 0.7 + ], + [ + 1.41 + ], + [ + 1.24 + ], + [ + 1.34 + ], + [ + 1.27 + ], + [ + 1.32 + ], + [ + 1.28 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.35 + ], + [ + 1.45 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.45 + ], + [ + 1.44 + ], + [ + 1.45 + ], + [ + 1.44 + ], + [ + 1.45 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.44 + ], + [ + 1.45 + ], + [ + 1.44 + ], + [ + 1.45 + ], + [ + 1.43 + ], + [ + 1.46 + ], + [ + 1.42 + ], + [ + 1.48 + ], + [ + 1.38 + ], + [ + 1.84 + ], + [ + 2.27 + ], + [ + 2.16 + ], + [ + 2.22 + ], + [ + 2.18 + ], + [ + 2.21 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.2 + ], + [ + 2.19 + ], + [ + 2.21 + ], + [ + 2.19 + ], + [ + 2.21 + ], + [ + 2.18 + ], + [ + 2.22 + ], + [ + 2.15 + ], + [ + 2.37 + ], + [ + 2.75 + ], + [ + 2.7 + ], + [ + 2.73 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.72 + ], + [ + 2.71 + ], + [ + 2.73 + ], + [ + 2.71 + ], + [ + 2.73 + ], + [ + 2.7 + ], + [ + 2.73 + ], + [ + 2.7 + ], + [ + 2.73 + ], + [ + 2.7 + ], + [ + 2.73 + ], + [ + 2.7 + ], + [ + 2.73 + ], + [ + 2.68 + ], + [ + 1.7 + ], + [ + 1.5 + ], + [ + 1.58 + ], + [ + 1.53 + ], + [ + 1.57 + ], + [ + 1.53 + ], + [ + 1.57 + ], + [ + 1.54 + ], + [ + 1.57 + ], + [ + 1.54 + ], + [ + 1.56 + ], + [ + 1.54 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.55 + ], + [ + 1.56 + ], + [ + 1.54 + ], + [ + 1.57 + ], + [ + 1.53 + ], + [ + 1.59 + ], + [ + 1.48 + ], + [ + 2.02 + ], + [ + 2.52 + ], + [ + 2.4 + ], + [ + 2.47 + ], + [ + 2.42 + ], + [ + 2.45 + ], + [ + 2.43 + ], + [ + 2.45 + ], + [ + 2.43 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.44 + ], + [ + 2.43 + ], + [ + 2.45 + ], + [ + 2.43 + ], + [ + 2.45 + ], + [ + 2.42 + ], + [ + 2.47 + ], + [ + 2.31 + ], + [ + 2.02 + ], + [ + 2.05 + ], + [ + 2.03 + ], + [ + 2.05 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.04 + ], + [ + 2.05 + ], + [ + 2.03 + ], + [ + 2.05 + ], + [ + 2.03 + ], + [ + 2.06 + ], + [ + 2.01 + ], + [ + 2.11 + ], + [ + 1.65 + ], + [ + 1.23 + ], + [ + 1.33 + ], + [ + 1.28 + ], + [ + 1.32 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.28 + ], + [ + 1.37 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.74 + ], + [ + 1.72 + ], + [ + 1.74 + ], + [ + 1.72 + ], + [ + 1.74 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.73 + ], + [ + 1.72 + ], + [ + 1.74 + ], + [ + 1.72 + ], + [ + 1.76 + ], + [ + 1.55 + ], + [ + 1.36 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.4 + ], + [ + 1.38 + ], + [ + 1.4 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.38 + ], + [ + 1.4 + ], + [ + 1.39 + ], + [ + 1.38 + ], + [ + 1.41 + ], + [ + 1.35 + ], + [ + 1.47 + ], + [ + 1.24 + ], + [ + 3.51 + ], + [ + 4.6 + ], + [ + 4.24 + ], + [ + 4.46 + ], + [ + 4.3 + ], + [ + 4.43 + ], + [ + 4.32 + ], + [ + 4.41 + ], + [ + 4.34 + ], + [ + 4.4 + ], + [ + 4.35 + ], + [ + 4.39 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.36 + ], + [ + 4.37 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.37 + ], + [ + 4.36 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.37 + ], + [ + 4.36 + ], + [ + 4.37 + ], + [ + 4.36 + ], + [ + 4.37 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.36 + ], + [ + 4.38 + ], + [ + 4.34 + ], + [ + 3.65 + ], + [ + 3.52 + ], + [ + 3.57 + ], + [ + 3.53 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.55 + ], + [ + 3.54 + ], + [ + 3.56 + ], + [ + 3.54 + ], + [ + 3.57 + ], + [ + 3.53 + ], + [ + 3.58 + ], + [ + 3.51 + ], + [ + 3.59 + ], + [ + 3.49 + ], + [ + 3.62 + ], + [ + 3.44 + ], + [ + 3.74 + ], + [ + 2.8 + ], + [ + 1.15 + ], + [ + 1.37 + ], + [ + 1.26 + ], + [ + 1.32 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.31 + ], + [ + 1.29 + ], + [ + 1.32 + ], + [ + 1.28 + ], + [ + 1.33 + ], + [ + 1.27 + ], + [ + 1.34 + ], + [ + 1.24 + ], + [ + 1.41 + ] +] diff --git a/test/tts_pipeline/test_tts_engine.py b/test/tts_pipeline/test_tts_engine.py index 3bcd0e0f0..fe36c8640 100644 --- a/test/tts_pipeline/test_tts_engine.py +++ b/test/tts_pipeline/test_tts_engine.py @@ -4,14 +4,17 @@ import numpy as np from numpy.typing import NDArray -from syrupy.extensions.json import JSONSnapshotExtension +from syrupy.assertion import SnapshotAssertion from voicevox_engine.dev.core.mock import MockCoreWrapper from voicevox_engine.metas.Metas import StyleId -from voicevox_engine.model import AccentPhrase, AudioQuery, Mora -from voicevox_engine.tts_pipeline.acoustic_feature_extractor import ( - UNVOICED_MORA_TAIL_PHONEMES, - Phoneme, +from voicevox_engine.model import ( + AccentPhrase, + AudioQuery, + FrameAudioQuery, + Mora, + Note, + Score, ) from voicevox_engine.tts_pipeline.tts_engine import ( TTSEngine, @@ -182,6 +185,21 @@ def _gen_hello_hiho_query() -> AudioQuery: ) +def _gen_doremi_score() -> Score: + return Score( + notes=[ + Note(key=None, frame_length=10, lyric=""), + Note(key=60, frame_length=12, lyric="ど"), + Note(key=62, frame_length=17, lyric="れ"), + Note(key=64, frame_length=21, lyric="み"), + Note(key=None, frame_length=5, lyric=""), + Note(key=65, frame_length=12, lyric="ふぁ"), + Note(key=67, frame_length=17, lyric="そ"), + Note(key=None, frame_length=10, lyric=""), + ] + ) + + class TestTTSEngine(TestCase): def setUp(self): super().setUp() @@ -204,8 +222,8 @@ def test_to_flatten_moras(self): def test_update_length(self): # Inputs hello_hiho = _gen_hello_hiho_accent_phrases() - # Outputs & Indirect Outputs(yukarin_sに渡される値) - result = self.tts_engine.update_length(hello_hiho, StyleId(1)) + # Indirect Outputs(yukarin_sに渡される値) + self.tts_engine.update_length(hello_hiho, StyleId(1)) yukarin_s_args = self.yukarin_s_mock.call_args[1] list_length = yukarin_s_args["length"] phoneme_list = yukarin_s_args["phoneme_list"] @@ -216,24 +234,7 @@ def test_update_length(self): true_phoneme_list_1 = [0, 23, 30, 4, 28, 21, 10, 21, 42, 7] true_phoneme_list_2 = [0, 19, 21, 19, 30, 12, 14, 35, 6, 0] true_phoneme_list = true_phoneme_list_1 + true_phoneme_list_2 - true_result = _gen_hello_hiho_accent_phrases() - index = 1 - - def result_value(i: int) -> float: - return np.float32(round(float(phoneme_list[i] * 0.0625 + 1), 2)).item() - - for accent_phrase in true_result: - moras = accent_phrase.moras - for mora in moras: - if mora.consonant is not None: - mora.consonant_length = result_value(index) - index += 1 - mora.vowel_length = result_value(index) - index += 1 - if accent_phrase.pause_mora is not None: - accent_phrase.pause_mora.vowel_length = result_value(index) - index += 1 - # Tests + self.assertEqual(list_length, true_list_length) self.assertEqual(list_length, len(phoneme_list)) self.assertEqual(style_id, true_style_id) @@ -241,7 +242,6 @@ def result_value(i: int) -> float: phoneme_list, np.array(true_phoneme_list, dtype=np.int64), ) - self.assertEqual(result, true_result) def test_update_pitch(self): # 空のリストでエラーを吐かないか @@ -256,8 +256,8 @@ def test_update_pitch(self): # Inputs hello_hiho = _gen_hello_hiho_accent_phrases() - # Outputs & Indirect Outputs(yukarin_saに渡される値) - result = self.tts_engine.update_pitch(hello_hiho, StyleId(1)) + # Indirect Outputs(yukarin_saに渡される値) + self.tts_engine.update_pitch(hello_hiho, StyleId(1)) yukarin_sa_args = self.yukarin_sa_mock.call_args[1] list_length = yukarin_sa_args["length"] vowel_phoneme_list = yukarin_sa_args["vowel_phoneme_list"][0] @@ -274,42 +274,7 @@ def test_update_pitch(self): true_accent_ends = np.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) true_phrase_starts = np.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]) true_phrase_ends = np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]) - true_result = _gen_hello_hiho_accent_phrases() - index = 1 - def result_value(i: int) -> float: - # unvoiced_vowel_likesのPhoneme ID版 - unvoiced_mora_tail_ids = [ - Phoneme(p).id for p in UNVOICED_MORA_TAIL_PHONEMES - ] - if vowel_phoneme_list[i] in unvoiced_mora_tail_ids: - return 0 - return np.float32( - round( - ( - ( - vowel_phoneme_list[i] - + consonant_phoneme_list[i] - + start_accent_list[i] - + end_accent_list[i] - + start_accent_phrase_list[i] - + end_accent_phrase_list[i] - ) - * 0.0625 - + 1 - ), - 2, - ) - ).item() - - for accent_phrase in true_result: - moras = accent_phrase.moras - for mora in moras: - mora.pitch = result_value(index) - index += 1 - if accent_phrase.pause_mora is not None: - accent_phrase.pause_mora.pitch = result_value(index) - index += 1 # Tests self.assertEqual(list_length, 12) self.assertEqual(list_length, len(vowel_phoneme_list)) @@ -325,10 +290,9 @@ def result_value(i: int) -> float: np.testing.assert_array_equal(end_accent_list, true_accent_ends) np.testing.assert_array_equal(start_accent_phrase_list, true_phrase_starts) np.testing.assert_array_equal(end_accent_phrase_list, true_phrase_ends) - self.assertEqual(result, true_result) -def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> None: +def test_mocked_update_length_output(snapshot_json: SnapshotAssertion) -> None: """モックされた `TTSEngine.update_length()` の出力スナップショットが一定である""" # Inputs tts_engine = TTSEngine(MockCoreWrapper()) @@ -339,7 +303,7 @@ def test_mocked_update_length_output(snapshot_json: JSONSnapshotExtension) -> No assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2) -def test_mocked_update_pitch_output(snapshot_json: JSONSnapshotExtension) -> None: +def test_mocked_update_pitch_output(snapshot_json: SnapshotAssertion) -> None: """モックされた `TTSEngine.update_pitch()` の出力スナップショットが一定である""" # Inputs tts_engine = TTSEngine(MockCoreWrapper()) @@ -351,7 +315,7 @@ def test_mocked_update_pitch_output(snapshot_json: JSONSnapshotExtension) -> Non def test_mocked_update_length_and_pitch_output( - snapshot_json: JSONSnapshotExtension, + snapshot_json: SnapshotAssertion, ) -> None: """モックされた `TTSEngine.update_length_and_pitch()` の出力スナップショットが一定である""" # Inputs @@ -364,7 +328,7 @@ def test_mocked_update_length_and_pitch_output( def test_mocked_create_accent_phrases_output( - snapshot_json: JSONSnapshotExtension, + snapshot_json: SnapshotAssertion, ) -> None: """モックされた `TTSEngine.create_accent_phrases()` の出力スナップショットが一定である""" # Inputs @@ -377,7 +341,7 @@ def test_mocked_create_accent_phrases_output( def test_mocked_create_accent_phrases_from_kana_output( - snapshot_json: JSONSnapshotExtension, + snapshot_json: SnapshotAssertion, ) -> None: """モックされた `TTSEngine.create_accent_phrases_from_kana()` の出力スナップショットが一定である""" # Inputs @@ -389,7 +353,7 @@ def test_mocked_create_accent_phrases_from_kana_output( assert snapshot_json == round_floats(pydantic_to_native_type(result), round_value=2) -def test_mocked_synthesize_wave_output(snapshot_json: JSONSnapshotExtension) -> None: +def test_mocked_synthesize_wave_output(snapshot_json: SnapshotAssertion) -> None: """モックされた `TTSEngine.synthesize_wave()` の出力スナップショットが一定である""" # Inputs tts_engine = TTSEngine(MockCoreWrapper()) @@ -400,6 +364,41 @@ def test_mocked_synthesize_wave_output(snapshot_json: JSONSnapshotExtension) -> assert snapshot_json == round_floats(result.tolist(), round_value=2) +def test_mocked_synthesize_wave_from_score_output( + snapshot_json: SnapshotAssertion, +) -> None: + """ + モックされた `TTSEngine.create_sing_phoneme_and_f0_and_volume()` と + `TTSEngine.frame_synthsize_wave()` の出力スナップショットが一定である + """ + # Inputs + tts_engine = TTSEngine(MockCoreWrapper()) + doremi_srore = _gen_doremi_score() + # Outputs + result = tts_engine.create_sing_phoneme_and_f0_and_volume(doremi_srore, StyleId(1)) + # Tests + assert snapshot_json(name="query") == round_floats( + pydantic_to_native_type(result), round_value=2 + ) + + # Inputs + phonemes, f0, volume = result + doremi_query = FrameAudioQuery( + f0=f0, + volume=volume, + phonemes=phonemes, + volumeScale=1.3, + outputSamplingRate=1200, + outputStereo=False, + ) + # Outputs + result_wave = tts_engine.frame_synthsize_wave(doremi_query, StyleId(1)) + # Tests + assert snapshot_json(name="wave") == round_floats( + result_wave.tolist(), round_value=2 + ) + + def koreha_arimasuka_base_expected(): return [ AccentPhrase( diff --git a/voicevox_engine/core/core_wrapper.py b/voicevox_engine/core/core_wrapper.py index d8fb10f7f..5426c9750 100644 --- a/voicevox_engine/core/core_wrapper.py +++ b/voicevox_engine/core/core_wrapper.py @@ -763,17 +763,17 @@ def predict_sing_f0_forward( Parameters ---------- length : int - 音素列の長さ + フレームの長さ phoneme : NDArray[np.int64] - 音素列 + フレームごとの音素 note : NDArray[np.int64] - ノート列 + フレームごとのノート style_id : NDArray[np.int64] スタイル番号 Returns ------- output : NDArray[np.float32] - フレームごとのF0 + フレームごとの音高 """ output = np.zeros((length,), dtype=np.float32) self.assert_core_success( @@ -800,17 +800,19 @@ def predict_sing_volume_forward( Parameters ---------- length : int - 音素列の長さ + フレームの長さ phoneme : NDArray[np.int64] - 音素列 + フレームごとの音素 note : NDArray[np.int64] - ノート列 + フレームごとのノート + f0 : NDArray[np.float32] + フレームごとの音高 style_id : NDArray[np.int64] スタイル番号 Returns ------- output : NDArray[np.float32] - フレームごとのF0 + フレームごとの音量 """ output = np.zeros((length,), dtype=np.float32) self.assert_core_success( diff --git a/voicevox_engine/dev/core/mock.py b/voicevox_engine/dev/core/mock.py index d0862d80c..25190ea55 100644 --- a/voicevox_engine/dev/core/mock.py +++ b/voicevox_engine/dev/core/mock.py @@ -22,28 +22,31 @@ def __init__( def metas(self) -> str: return json.dumps( [ + # トーク2つ・ハミング2つ { "name": "dummy1", "styles": [ {"name": "style0", "id": 0}, {"name": "style1", "id": 2}, - {"name": "style2", "id": 4}, - {"name": "style3", "id": 6}, + {"name": "style2", "id": 4, "type": "frame_decode"}, + {"name": "style3", "id": 6, "type": "frame_decode"}, ], "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", "version": "mock", }, + # トーク2つ・ハミング1つ・ソング1つ { "name": "dummy2", "styles": [ {"name": "style0", "id": 1}, {"name": "style1", "id": 3}, - {"name": "style2", "id": 5}, - {"name": "style3", "id": 7}, + {"name": "style2", "id": 5, "type": "frame_decode"}, + {"name": "style3", "id": 7, "type": "sing"}, ], "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9", "version": "mock", }, + # トーク1つ { "name": "dummy3", "styles": [ @@ -52,10 +55,11 @@ def metas(self) -> str: "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a", "version": "mock", }, + # ソング1つ { "name": "dummy4", "styles": [ - {"name": "style0", "id": 9}, + {"name": "style0", "id": 9, "type": "sing"}, ], "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b", "version": "mock", @@ -67,6 +71,8 @@ def yukarin_s_forward( self, length: int, phoneme_list: NDArray[np.int64], style_id: NDArray[np.int64] ) -> NDArray[np.float32]: """音素系列サイズ・音素ID系列・スタイルIDから音素長系列を生成する""" + # TODO: トークスタイル以外はエラーにする + result = [] # mockとしての適当な処理、特に意味はない for i in range(length): @@ -87,6 +93,8 @@ def yukarin_sa_forward( """モーラ系列サイズ・母音系列・子音系列・アクセント位置・アクセント句区切り・スタイルIDからモーラ音高系列を生成する""" assert length > 1, "前後無音を必ず付与しなければならない" + # TODO: トークスタイル以外はエラーにする + result = [] # mockとしての適当な処理、特に意味はない for i in range(length): @@ -118,6 +126,8 @@ def decode_forward( style_id: NDArray[np.int64], ) -> NDArray[np.float32]: """フレーム長・音素種類数・フレーム音高・フレーム音素onehot・スタイルIDからダミー音声波形を生成する""" + # TODO: トークスタイル以外はエラーにする + # 入力値を反映し、長さが 256 倍であるダミー配列を出力する result: list[NDArray[np.float32]] = [] for i in range(length): @@ -126,6 +136,98 @@ def decode_forward( ] * 256 return np.array(result, dtype=np.float32) + def predict_sing_consonant_length_forward( + self, + length: int, + consonant: NDArray[np.int64], + vowel: NDArray[np.int64], + note_duration: NDArray[np.int64], + style_id: NDArray[np.int64], + ) -> NDArray[np.int64]: + """母音系列・子音系列・ノート列・スタイルIDから子音長系列を生成する""" + result = [] + # mockとしての適当な処理、特に意味はない + for i in range(length): + # 子音が無い場合は長さ0 + if consonant[0, i] == -1: + result.append(0) + continue + + result.append( + ( + consonant[0, i] % 3 + + vowel[0, i] % 5 + + note_duration[0, i] % 7 + + style_id % 11 + ).item() + ) + return np.array(result, dtype=np.int64) + + def predict_sing_f0_forward( + self, + length: int, + phoneme: NDArray[np.int64], + note: NDArray[np.int64], + style_id: NDArray[np.int64], + ) -> NDArray[np.float32]: + """音素系列・ノート系列・スタイルIDから音高系列を生成する""" + result = [] + # mockとしての適当な処理。大体MIDIノートに従う周波数になるように調整 + for i in range(length): + if note[0, i] == -1: + result.append(0) + continue + result.append( + ( + 2 ** ((note[0, i] - 69) / 12) + * (440 + phoneme[0, i] / 10 + style_id) + ).item() + ) + return np.array(result, dtype=np.float32) + + def predict_sing_volume_forward( + self, + length: int, + phoneme: NDArray[np.int64], + note: NDArray[np.int64], + f0: NDArray[np.float32], + style_id: NDArray[np.int64], + ) -> NDArray[np.float32]: + """音素系列・ノート系列・音高系列・スタイルIDから音量系列を生成する""" + result = [] + # mockとしての適当な処理。大体0~10の範囲になるように調整 + for i in range(length): + if note[0, i] == -1: + result.append(0) + continue + result.append( + ( + (phoneme[0, i] / 40) + * (note[0, i] / 88) + * (f0[0, i] / 440) + * ((1 / 2) ** style_id) + * 10 + ).item() + ) + return np.array(result, dtype=np.float32) + + def sf_decode_forward( + self, + length: int, + phoneme: NDArray[np.int64], + f0: NDArray[np.float32], + volume: NDArray[np.float32], + style_id: NDArray[np.int64], + ) -> NDArray[np.float32]: + """入力からダミー音声波形を生成する""" + # 入力値を反映し、長さが 256 倍であるダミー配列を出力する + result: list[NDArray[np.float32]] = [] + for i in range(length): + result += [ + (f0[0, i] / 440) * volume[0, i] * (phoneme[0, i] / 40) + style_id + ] * 256 + return np.array(result, dtype=np.float32) + def supported_devices(self): return json.dumps( { diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py index c0c553516..17d06c3e4 100644 --- a/voicevox_engine/tts_pipeline/tts_engine.py +++ b/voicevox_engine/tts_pipeline/tts_engine.py @@ -261,6 +261,7 @@ def calc_phoneme_lengths( note_duration = note_durations[i] # もし、次のノートの子音長が負になる場合、現在のノートの半分にする + # NOTE: 将来的にコアは非負になるのでこの処理は不要になる if next_consonant_length < 0: next_consonant_length = consonant_lengths[i + 1] = note_duration // 2 vowel_length = note_duration - next_consonant_length