整理: e2e single API テスト vol 8 (#1166)

* refactor: e2e single API スナップショットテストを追加 * fix: `POST synthesis_morphing` disable * fix: lint * fix: テスト fail 説明文を明確化 * refactor: wavスナップショット共通処理を util 関数に切り出し * fix: Any エラーを手動型付けで回避 * fix: FIXME/NOTE の混同を解消 Co-authored-by: Hiroshiba <[email protected]> * fix: 冗長な動作説明コメントを削除 Co-authored-by: Hiroshiba <[email protected]> * fix: 音声波形ハッシュ化の名称を厳格化 * fix: 現象報告と既存問題を混同しうるコメントを明確化 * fix: wavファイル常時ハッシュ化 Co-authored-by: Hiroshiba <[email protected]> * fix: missing import * fix: wavファイルスナップショットを更新 --------- Co-authored-by: Hiroshiba <[email protected]>
VOICEVOX · May 3, 2024 · 2d4666c · 2d4666c
1 parent ec64646
commit 2d4666c
Show file tree

Hide file tree

Showing 11 changed files with 61 additions and 19 deletions.
diff --git a/test/e2e/__snapshots__/test_tts.ambr b/test/e2e/__snapshots__/test_tts.ambr
@@ -1,4 +1,4 @@
 # serializer version: 1
 # name: test_テキストと話者IDから音声を合成できる
-  'MD5:9cb1070db2510240ff63a16fd42907c9'
+  'MD5:8f7ddc461c68542d4d8ef4cd5c54ca82'
 # ---
diff --git a/...query/test_speakerを指定して音声合成クエリが取得できる.json → ...udio_query/test_post_audio_query_200.json b/...query/test_speakerを指定して音声合成クエリが取得できる.json → ...udio_query/test_post_audio_query_200.json
diff --git a/test/e2e/single_api/__snapshots__/test_frame_synthesis.ambr b/test/e2e/single_api/__snapshots__/test_frame_synthesis.ambr
@@ -0,0 +1,4 @@
+# serializer version: 1
+# name: test_post_frame_synthesis_200
+  'MD5:1c385210acba238994604a8cee96aee3'
+# ---
diff --git a/test/e2e/single_api/__snapshots__/test_synthesis.ambr b/test/e2e/single_api/__snapshots__/test_synthesis.ambr
@@ -0,0 +1,4 @@
+# serializer version: 1
+# name: test_post_synthesis_200
+  'MD5:f7d42ce5787856549abc3d2d7561c06f'
+# ---
diff --git a/test/e2e/single_api/test_audio_query.py b/test/e2e/single_api/test_audio_query.py
@@ -1,5 +1,5 @@
 """
-AudioQuery APIのテスト
+/audio_query API のテスト
 """
 
 from test.utility import round_floats
@@ -8,7 +8,7 @@
 from syrupy.assertion import SnapshotAssertion
 
 
-def test_speakerを指定して音声合成クエリが取得できる(
+def test_post_audio_query_200(
     client: TestClient, snapshot_json: SnapshotAssertion
 ) -> None:
     response = client.post("/audio_query", params={"text": "テストです", "speaker": 0})

diff --git a/test/e2e/single_api/test_frame_synthesis.py b/test/e2e/single_api/test_frame_synthesis.py
@@ -2,10 +2,15 @@
 /frame_synthesis API のテスト
 """
 
+from test.utility import hash_wave_floats_from_wav_bytes
+
 from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
 
 
-def test_post_frame_synthesis_200(client: TestClient) -> None:
+def test_post_frame_synthesis_200(
+    client: TestClient, snapshot: SnapshotAssertion
+) -> None:
     query = {
         "f0": [
             0.0,
@@ -81,3 +86,7 @@ def test_post_frame_synthesis_200(client: TestClient) -> None:
     }
     response = client.post("/frame_synthesis", params={"speaker": 0}, json=query)
     assert response.status_code == 200
+
+    # FileResponse 内の .wav から抽出された音声波形が一致する
+    assert response.headers["content-type"] == "audio/wav"
+    assert snapshot == hash_wave_floats_from_wav_bytes(response.read())
diff --git a/test/e2e/single_api/test_multi_synthesis.py b/test/e2e/single_api/test_multi_synthesis.py
@@ -59,3 +59,14 @@ def test_post_multi_synthesis_200(client: TestClient) -> None:
     ]
     response = client.post("/multi_synthesis", params={"speaker": 0}, json=queries)
     assert response.status_code == 200
+
+    # FileResponse 内の zip ファイルに圧縮された .wav から抽出された音声波形が一致する
+    # FIXME: スナップショットテストを足す
+    # NOTE: ZIP ファイル内の .wav に Linux-Windows 数値精度問題があるため解凍が必要
+    assert response.headers["content-type"] == "application/zip"
+    # from test.utility import summarize_wav_bytes
+    # from syrupy.assertion import SnapshotAssertion
+    # # zip 解凍
+    # wav_summarys = map(lambda wav_byte: summarize_wav_bytes(wav_byte), wav_bytes)
+    # wavs_summary = concatenate_func(wav_summarys)
+    # assert snapshot == wavs_summary
diff --git a/test/e2e/single_api/test_synthesis.py b/test/e2e/single_api/test_synthesis.py
@@ -3,11 +3,13 @@
 """
 
 from test.e2e.single_api.utils import gen_mora
+from test.utility import hash_wave_floats_from_wav_bytes
 
 from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
 
 
-def test_post_synthesis_200(client: TestClient) -> None:
+def test_post_synthesis_200(client: TestClient, snapshot: SnapshotAssertion) -> None:
     query = {
         "accent_phrases": [
             {
@@ -33,3 +35,7 @@ def test_post_synthesis_200(client: TestClient) -> None:
     }
     response = client.post("/synthesis", params={"speaker": 0}, json=query)
     assert response.status_code == 200
+
+    # 音声波形が一致する
+    assert response.headers["content-type"] == "audio/wav"
+    assert snapshot == hash_wave_floats_from_wav_bytes(response.read())
diff --git a/test/e2e/single_api/test_synthesis_morphing.py b/test/e2e/single_api/test_synthesis_morphing.py
@@ -37,3 +37,10 @@ def test_post_synthesis_morphing_200(client: TestClient) -> None:
         json=queries,
     )
     assert response.status_code == 200
+
+    # FIXME: LinuxとMacOSで計算結果が一致しないためスナップショットテストがコケる（原因不明）
+    # from test.utility import summarize_wav_bytes
+    # from syrupy.assertion import SnapshotAssertion
+    # # FileResponse 内の .wav から抽出された音声波形が一致する
+    # assert response.headers["content-type"] == "audio/wav"
+    # assert snapshot == summarize_wav_bytes(response.read())
diff --git a/test/e2e/test_tts.py b/test/e2e/test_tts.py
@@ -2,10 +2,8 @@
 TTSのテスト
 """
 
-import io
-from test.utility import hash_long_string, round_floats
+from test.utility import hash_wave_floats_from_wav_bytes
 
-import soundfile as sf
 from fastapi.testclient import TestClient
 from syrupy.assertion import SnapshotAssertion
 
@@ -22,17 +20,9 @@ def test_テキストと話者IDから音声を合成できる(
     # AudioQuery から音声波形を生成する
     synthesis_res = client.post("/synthesis", params={"speaker": 0}, json=audio_query)
 
-    # wav ファイルを含む FileResponse から音声波形を抽出する
-    wav_bytes = io.BytesIO(synthesis_res.read())
-    wave = sf.read(wav_bytes)[0].tolist()
-
-    # NOTE: Linux-Windows 数値精度問題に対するワークアラウンド
-    wave = round_floats(wave, 2)
-
     # リクエストが成功している
     assert synthesis_res.status_code == 200
-    # レスポンスが音声ファイルである
+
+    # FileResponse 内の .wav から抽出された音声波形が一致する
     assert synthesis_res.headers["content-type"] == "audio/wav"
-    # 音声波形が commit 間で不変である
-    wave_str = " ".join(map(lambda point: str(point), wave))
-    assert snapshot == hash_long_string(wave_str)
+    assert snapshot == hash_wave_floats_from_wav_bytes(synthesis_res.read())
diff --git a/test/utility.py b/test/utility.py
@@ -1,7 +1,10 @@
 import hashlib
+import io
 import json
 from typing import Any
 
+import numpy as np
+import soundfile as sf
 from pydantic.json import pydantic_encoder
 
 
@@ -36,3 +39,11 @@ def to_hash(value: str) -> str:
         return {k: hash_long_string(v) for k, v in value.items()}
     else:
         return value
+
+
+def hash_wave_floats_from_wav_bytes(wav_bytes: bytes) -> str:
+    """.wavファイルバイト列から音声波形を抽出しハッシュ化する"""
+    wave = sf.read(io.BytesIO(wav_bytes))[0].tolist()
+    # NOTE: Linux-Windows 数値精度問題に対するワークアラウンド
+    wave = round_floats(wave, 2)
+    return "MD5:" + hashlib.md5(np.array(wave).tobytes()).hexdigest()