From 8eb09d3eed111ac4a26d79aab36591460fd4a4f7 Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Sat, 21 Oct 2023 12:15:07 +0530
Subject: [PATCH 01/13] Migrate RemoteWhisperTranscriber to OpenAI SDK

---
 .../components/audio/whisper_remote.py        | 155 ++++-----
 .../components/audio/test_whisper_remote.py   | 318 ++++++++----------
 2 files changed, 220 insertions(+), 253 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 3317e251e6..cdc04d7b44 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -6,15 +6,15 @@
 from pathlib import Path
 
 from haystack.preview.utils import request_with_retry
-from haystack.preview import component, Document, default_to_dict
+from haystack.preview import component, Document, default_to_dict, default_from_dict
 
-logger = logging.getLogger(__name__)
+import openai
 
 
-OPENAI_TIMEOUT = float(os.environ.get("HAYSTACK_OPENAI_TIMEOUT_SEC", 600))
+logger = logging.getLogger(__name__)
 
 
-WhisperRemoteModel = Literal["whisper-1"]
+API_BASE_URL = "https://api.openai.com/v1"
 
 
 @component
@@ -30,52 +30,86 @@ class RemoteWhisperTranscriber:
 
     def __init__(
         self,
-        api_key: str,
-        model_name: WhisperRemoteModel = "whisper-1",
-        api_base: str = "https://api.openai.com/v1",
-        whisper_params: Optional[Dict[str, Any]] = None,
+        api_key: Optional[str] = None,
+        model_name: str = "whisper-1",
+        organization: Optional[str] = None,
+        api_base_url: str = API_BASE_URL,
+        **kwargs,
     ):
         """
         Transcribes a list of audio files into a list of Documents.
 
         :param api_key: OpenAI API key.
         :param model_name: Name of the model to use. It now accepts only `whisper-1`.
+        :param organization: The OpenAI-Organization ID, defaults to `None`. For more details, see OpenAI
+        [documentation](https://platform.openai.com/docs/api-reference/requesting-organization).
         :param api_base: OpenAI base URL, defaults to `"https://api.openai.com/v1"`.
+        :param kwargs: Other parameters to use for the model. These parameters are all sent directly to the OpenAI
+            endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
+            Some of the supported parameters:
+            - `language`: The language of the input audio.
+            Supplying the input language in ISO-639-1 format
+              will improve accuracy and latency.
+            - `prompt`: An optional text to guide the model's
+              style or continue a previous audio segment.
+              The prompt should match the audio language.
+            - `response_format`: The format of the transcript
+              output, in one of these options: json, text, srt,
+               verbose_json, or vtt. Defaults to "json". Currently only "json" is supported.
+            - `temperature`: The sampling temperature, between 0
+            and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more
+            focused and deterministic. If set to 0, the model will
+            use log probability to automatically increase the
+            temperature until certain thresholds are hit.
         """
-        if model_name not in get_args(WhisperRemoteModel):
-            raise ValueError(
-                f"Model name not recognized. Choose one among: " f"{', '.join(get_args(WhisperRemoteModel))}."
-            )
-        if not api_key:
-            raise ValueError("API key is None.")
 
-        self.model_name = model_name
+        if api_key is None:
+            try:
+                api_key = os.environ["OPENAI_API_KEY"]
+            except KeyError as e:
+                raise ValueError(
+                    "RemoteWhisperTranscriber expects an OpenAI API key. "
+                    "Set the OPENAI_API_KEY environment variable (recommended) or pass it explicitly."
+                ) from e
+
         self.api_key = api_key
-        self.api_base = api_base
-        self.whisper_params = whisper_params or {}
+        self.organization = organization
+        self.model_name = model_name
+        self.api_base_url = api_base_url
 
-    @component.output_types(documents=List[Document])
-    def run(self, audio_files: List[Path], whisper_params: Optional[Dict[str, Any]] = None):
-        """
-        Transcribe the audio files into a list of Documents, one for each input file.
+        # Only response_format = "json" is supported
+        whisper_params = kwargs
+        whisper_params["response_format"] = "json"
+        self.whisper_params = whisper_params
 
-        For the supported audio formats, languages, and other parameters, see the
-        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
-        [github repo](https://github.com/openai/whisper).
+        openai.api_key = api_key
+        if organization is not None:
+            openai.organization = organization
 
-        :param audio_files: a list of paths or binary streams to transcribe
-        :returns: a list of Documents, one for each file. The content of the document is the transcription text,
-            while the document's metadata contains all the other values returned by the Whisper model, such as the
-            alignment data. Another key called `audio_file` contains the path to the audio file used for the
-            transcription.
+    def to_dict(self) -> Dict[str, Any]:
         """
-        if whisper_params is None:
-            whisper_params = self.whisper_params
+        Serialize this component to a dictionary.
+        This method overrides the default serializer in order to
+        avoid leaking the `api_key` value passed to the constructor.
+        """
+        return default_to_dict(
+            self,
+            model_name=self.model_name,
+            organization=self.organization,
+            api_base_url=self.api_base_url,
+            **self.whisper_params,
+        )
 
-        documents = self.transcribe(audio_files, **whisper_params)
-        return {"documents": documents}
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
+        """
+        Deserialize this component from a dictionary.
+        """
+        return default_from_dict(cls, data)
 
-    def transcribe(self, audio_files: Sequence[Union[str, Path, BinaryIO]], **kwargs) -> List[Document]:
+    @component.output_types(documents=List[Document])
+    def run(self, audio_files: Sequence[Union[str, Path, BinaryIO]]):
         """
         Transcribe the audio files into a list of Documents, one for each input file.
 
@@ -84,54 +118,21 @@ def transcribe(self, audio_files: Sequence[Union[str, Path, BinaryIO]], **kwargs
         [github repo](https://github.com/openai/whisper).
 
         :param audio_files: a list of paths or binary streams to transcribe
-        :returns: a list of transcriptions.
+        :returns: a list of Documents, one for each file. The content of the document is the transcription text, while the document's metadata         contains a key called `audio_file`, which contains the path to the
+        audio file used for the transcription.
         """
-        transcriptions = self._raw_transcribe(audio_files=audio_files, **kwargs)
         documents = []
-        for audio, transcript in zip(audio_files, transcriptions):
-            content = transcript.pop("text")
-            if not isinstance(audio, (str, Path)):
-                audio = "<<binary stream>>"
-            doc = Document(text=content, metadata={"audio_file": audio, **transcript})
-            documents.append(doc)
-        return documents
-
-    def _raw_transcribe(self, audio_files: Sequence[Union[str, Path, BinaryIO]], **kwargs) -> List[Dict[str, Any]]:
-        """
-        Transcribe the given audio files. Returns a list of strings.
-
-        For the supported audio formats, languages, and other parameters, see the
-        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
-        [github repo](https://github.com/openai/whisper).
-
-        :param audio_files: a list of paths or binary streams to transcribe.
-        :param kwargs: any other parameters that Whisper API can understand.
-        :returns: a list of transcriptions as they are produced by the Whisper API (JSON).
-        """
-        translate = kwargs.pop("translate", False)
-        url = f"{self.api_base}/audio/{'translations' if translate else 'transcriptions'}"
-        data = {"model": self.model_name, **kwargs}
-        headers = {"Authorization": f"Bearer {self.api_key}"}
-
-        transcriptions = []
         for audio_file in audio_files:
             if isinstance(audio_file, (str, Path)):
                 audio_file = open(audio_file, "rb")
 
-            request_files = ("file", (audio_file.name, audio_file, "application/octet-stream"))
-            response = request_with_retry(
-                method="post", url=url, data=data, headers=headers, files=[request_files], timeout=OPENAI_TIMEOUT
-            )
-            transcription = json.loads(response.content)
+            content = openai.Audio.transcribe(file=audio_file, model=self.model_name, **self.whisper_params)
 
-            transcriptions.append(transcription)
-        return transcriptions
+            # Set the audio file to <<binary stream>> for document metadata if the path is not present
+            if not isinstance(audio_file, (str, Path)):
+                audio_file = "<<binary stream>>"
 
-    def to_dict(self) -> Dict[str, Any]:
-        """
-        This method overrides the default serializer in order to avoid leaking the `api_key` value passed
-        to the constructor.
-        """
-        return default_to_dict(
-            self, model_name=self.model_name, api_base=self.api_base, whisper_params=self.whisper_params
-        )
+            doc = Document(text=content["text"], metadata={"audio_file": audio_file})
+            documents.append(doc)
+
+        return {"documents": documents}
diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py
index 0c8cb235db..2b6cc1c62f 100644
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@@ -1,226 +1,192 @@
-import os
+from typing import Union, BinaryIO
+from pathlib import Path
 from unittest.mock import MagicMock, patch
+from copy import deepcopy
+
 
 import pytest
+import openai
+from openai.util import convert_to_openai_object
+
 
 from haystack.preview.dataclasses import Document
-from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber, OPENAI_TIMEOUT
+from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
+
+
+def mock_openai_response(
+    file: Union[str, Path, BinaryIO], model: str = "whisper-1", response_format="json", **kwargs
+) -> openai.openai_object.OpenAIObject:
+    if isinstance(file, (str, Path)):
+        file_path = str(file)
+    else:
+        file_path = file.name
+    if response_format == "json":
+        dict_response = {"text": f"model: {model}, file: str{file_path}, test transcription"}
+    else:
+        dict_response = {}
+
+    return convert_to_openai_object(dict_response)
 
 
 class TestRemoteWhisperTranscriber:
     @pytest.mark.unit
-    def test_init_unknown_model(self):
-        with pytest.raises(ValueError, match="not recognized"):
-            RemoteWhisperTranscriber(model_name="anything", api_key="something")
+    def test_init_no_key(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        error_msg = "RemoteWhisperTranscriber expects an OpenAI API key."
+        with pytest.raises(ValueError, match=error_msg):
+            RemoteWhisperTranscriber(api_key=None)
 
     @pytest.mark.unit
     def test_init_default(self):
-        transcriber = RemoteWhisperTranscriber(api_key="just a test")
+        transcriber = RemoteWhisperTranscriber(api_key="test_api_key")
+
+        assert transcriber.api_key == "test_api_key"
         assert transcriber.model_name == "whisper-1"
-        assert transcriber.api_key == "just a test"
-        assert transcriber.api_base == "https://api.openai.com/v1"
+        assert transcriber.organization is None
+        assert transcriber.api_base_url == "https://api.openai.com/v1"
+        assert transcriber.whisper_params == {"response_format": "json"}
 
     @pytest.mark.unit
-    def test_init_no_key(self):
-        with pytest.raises(ValueError, match="API key is None"):
-            RemoteWhisperTranscriber(api_key=None)
+    def test_init_custom_parameters(self):
+        transcriber = RemoteWhisperTranscriber(
+            api_key="test_api_key",
+            model_name="whisper-1",
+            organization="test-org",
+            api_base_url="test_api_url",
+            language="en",
+            prompt="test-prompt",
+            response_format="json",
+            temperature="0.5",
+        )
+
+        assert transcriber.api_key == "test_api_key"
+        assert transcriber.model_name == "whisper-1"
+        assert transcriber.organization == "test-org"
+        assert transcriber.api_base_url == "test_api_url"
+        assert transcriber.whisper_params == {
+            "language": "en",
+            "prompt": "test-prompt",
+            "response_format": "json",
+            "temperature": "0.5",
+        }
 
     @pytest.mark.unit
-    def test_to_dict(self):
-        transcriber = RemoteWhisperTranscriber(api_key="test")
+    def test_to_dict_default_parameters(self):
+        transcriber = RemoteWhisperTranscriber(api_key="test_api_key")
         data = transcriber.to_dict()
         assert data == {
             "type": "RemoteWhisperTranscriber",
             "init_parameters": {
                 "model_name": "whisper-1",
-                "api_base": "https://api.openai.com/v1",
-                "whisper_params": {},
+                "api_base_url": "https://api.openai.com/v1",
+                "organization": None,
+                "response_format": "json",
             },
         }
 
     @pytest.mark.unit
     def test_to_dict_with_custom_init_parameters(self):
         transcriber = RemoteWhisperTranscriber(
-            api_key="test",
+            api_key="test_api_key",
             model_name="whisper-1",
-            api_base="https://my.api.base/something_else/v3",
-            whisper_params={"return_segments": True, "temperature": [0.1, 0.6, 0.8]},
+            organization="test-org",
+            api_base_url="test_api_url",
+            language="en",
+            prompt="test-prompt",
+            response_format="json",
+            temperature="0.5",
         )
         data = transcriber.to_dict()
         assert data == {
             "type": "RemoteWhisperTranscriber",
             "init_parameters": {
                 "model_name": "whisper-1",
-                "api_base": "https://my.api.base/something_else/v3",
-                "whisper_params": {"return_segments": True, "temperature": [0.1, 0.6, 0.8]},
+                "organization": "test-org",
+                "api_base_url": "test_api_url",
+                "language": "en",
+                "prompt": "test-prompt",
+                "response_format": "json",
+                "temperature": "0.5",
             },
         }
 
-    @pytest.mark.unit
-    def test_run_with_path(self, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        comp = RemoteWhisperTranscriber(api_key="whatever")
-
-        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
-            mocked_requests.request.return_value = mock_response
-
-            result = comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
-            expected = Document(
-                text="test transcription",
-                metadata={
-                    "audio_file": preview_samples_path / "audio" / "this is the content of the document.wav",
-                    "other_metadata": ["other", "meta", "data"],
-                },
-            )
-            assert result["documents"] == [expected]
+    def test_from_dict_with_defualt_parameters(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
 
-    @pytest.mark.unit
-    def test_run_with_str(self, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        comp = RemoteWhisperTranscriber(api_key="whatever")
-
-        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
-            mocked_requests.request.return_value = mock_response
-
-            result = comp.run(
-                audio_files=[
-                    str((preview_samples_path / "audio" / "this is the content of the document.wav").absolute())
-                ]
-            )
-            expected = Document(
-                text="test transcription",
-                metadata={
-                    "audio_file": str(
-                        (preview_samples_path / "audio" / "this is the content of the document.wav").absolute()
-                    ),
-                    "other_metadata": ["other", "meta", "data"],
-                },
-            )
-            assert result["documents"] == [expected]
+        data = {
+            "type": "RemoteWhisperTranscriber",
+            "init_parameters": {
+                "model_name": "whisper-1",
+                "api_base_url": "https://api.openai.com/v1",
+                "organization": None,
+                "response_format": "json",
+            },
+        }
 
-    @pytest.mark.unit
-    def test_transcribe_with_stream(self, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        comp = RemoteWhisperTranscriber(api_key="whatever")
-
-        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
-            mocked_requests.request.return_value = mock_response
-
-            with open(preview_samples_path / "audio" / "this is the content of the document.wav", "rb") as audio_stream:
-                result = comp.transcribe(audio_files=[audio_stream])
-                expected = Document(
-                    text="test transcription",
-                    metadata={"audio_file": "<<binary stream>>", "other_metadata": ["other", "meta", "data"]},
-                )
-                assert result == [expected]
+        transcriber = RemoteWhisperTranscriber.from_dict(data)
 
-    @pytest.mark.unit
-    def test_api_transcription(self, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        comp = RemoteWhisperTranscriber(api_key="whatever")
-
-        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
-            mocked_requests.request.return_value = mock_response
-
-            comp.run(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
-            requests_params = mocked_requests.request.call_args.kwargs
-            requests_params.pop("files")
-            assert requests_params == {
-                "method": "post",
-                "url": "https://api.openai.com/v1/audio/transcriptions",
-                "data": {"model": "whisper-1"},
-                "headers": {"Authorization": "Bearer whatever"},
-                "timeout": OPENAI_TIMEOUT,
-            }
+        assert transcriber.api_key == "test_api_key"
+        assert transcriber.model_name == "whisper-1"
+        assert transcriber.organization is None
+        assert transcriber.api_base_url == "https://api.openai.com/v1"
+        assert transcriber.whisper_params == {"response_format": "json"}
 
-    @pytest.mark.unit
-    def test_api_translation(self, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        comp = RemoteWhisperTranscriber(api_key="whatever")
-
-        with patch("haystack.preview.utils.requests_utils.requests") as mocked_requests:
-            mocked_requests.request.return_value = mock_response
-
-            comp.run(
-                audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"],
-                whisper_params={"translate": True},
-            )
-            requests_params = mocked_requests.request.call_args.kwargs
-            requests_params.pop("files")
-            assert requests_params == {
-                "method": "post",
-                "url": "https://api.openai.com/v1/audio/translations",
-                "data": {"model": "whisper-1"},
-                "headers": {"Authorization": "Bearer whatever"},
-                "timeout": OPENAI_TIMEOUT,
-            }
+    def test_from_dict_with_custom_init_parameters(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
 
-    @pytest.mark.unit
-    @patch("haystack.preview.components.audio.whisper_remote.request_with_retry")
-    def test_default_api_base(self, mock_request, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        mock_request.return_value = mock_response
+        data = {
+            "type": "RemoteWhisperTranscriber",
+            "init_parameters": {
+                "model_name": "whisper-1",
+                "organization": "test-org",
+                "api_base_url": "test_api_url",
+                "language": "en",
+                "prompt": "test-prompt",
+                "response_format": "json",
+                "temperature": "0.5",
+            },
+        }
+        transcriber = RemoteWhisperTranscriber.from_dict(data)
 
-        transcriber = RemoteWhisperTranscriber(api_key="just a test")
-        assert transcriber.api_base == "https://api.openai.com/v1"
+        assert transcriber.api_key == "test_api_key"
+        assert transcriber.model_name == "whisper-1"
+        assert transcriber.organization == "test-org"
+        assert transcriber.api_base_url == "test_api_url"
+        assert transcriber.whisper_params == {
+            "language": "en",
+            "prompt": "test-prompt",
+            "response_format": "json",
+            "temperature": "0.5",
+        }
 
-        transcriber.transcribe(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
-        assert mock_request.call_args.kwargs["url"] == "https://api.openai.com/v1/audio/transcriptions"
+    def test_from_dict_with_defualt_parameters_no_env_var(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+        data = {
+            "type": "RemoteWhisperTranscriber",
+            "init_parameters": {
+                "model_name": "whisper-1",
+                "api_base_url": "https://api.openai.com/v1",
+                "organization": None,
+                "response_format": "json",
+            },
+        }
+
+        with pytest.raises(ValueError, match="RemoteWhisperTranscriber expects an OpenAI API key."):
+            RemoteWhisperTranscriber.from_dict(data)
 
     @pytest.mark.unit
-    @patch("haystack.preview.components.audio.whisper_remote.request_with_retry")
-    def test_custom_api_base(self, mock_request, preview_samples_path):
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.content = '{"text": "test transcription", "other_metadata": ["other", "meta", "data"]}'
-        mock_request.return_value = mock_response
-
-        transcriber = RemoteWhisperTranscriber(api_key="just a test", api_base="https://fake_api_base.com")
-        assert transcriber.api_base == "https://fake_api_base.com"
-
-        transcriber.transcribe(audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"])
-        assert mock_request.call_args.kwargs["url"] == "https://fake_api_base.com/audio/transcriptions"
-
-    @pytest.mark.skipif(
-        not os.environ.get("OPENAI_API_KEY", None),
-        reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
-    )
-    @pytest.mark.integration
-    def test_whisper_remote_transcriber(self, preview_samples_path):
-        comp = RemoteWhisperTranscriber(api_key=os.environ.get("OPENAI_API_KEY"))
-
-        output = comp.run(
-            audio_files=[
-                preview_samples_path / "audio" / "this is the content of the document.wav",
-                str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute()),
-                open(preview_samples_path / "audio" / "answer.wav", "rb"),
-            ]
-        )
-        docs = output["documents"]
-        assert len(docs) == 3
+    def test_run_with_path(self, preview_samples_path):
+        model = "whisper-1"
+        file_path = preview_samples_path / "audio" / "this is the content of the document.wav"
+        with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
+            openai_audio_patch.transcribe.side_effect = mock_openai_response
 
-        assert docs[0].text.strip().lower() == "this is the content of the document."
-        assert (
-            preview_samples_path / "audio" / "this is the content of the document.wav" == docs[0].metadata["audio_file"]
-        )
+            transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
 
-        assert docs[1].text.strip().lower() == "the context for this answer is here."
-        assert (
-            str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute())
-            == docs[1].metadata["audio_file"]
-        )
+            result = transcriber.run(audio_files=[file_path])
+
+            assert result["documents"][0].text == f"model: {model}, file: str{file_path}, test transcription"
 
-        assert docs[2].text.strip().lower() == "answer."
-        assert docs[2].metadata["audio_file"] == "<<binary stream>>"
+            open_file = open(file_path, "rb")
+            openai_audio_patch.transcribe.assert_called_once_with(file=open_file, model=model, response_format="json")

From 14aefd2c0710c3a56afed19e695807aa6e6ea0dc Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Mon, 23 Oct 2023 01:58:54 +0530
Subject: [PATCH 02/13] Migrate RemoteWhisperTranscriber to OpenAI SDK

---
 .../components/audio/whisper_remote.py        |  27 +++--
 .../components/audio/test_whisper_remote.py   | 109 +++++++++++++++---
 2 files changed, 107 insertions(+), 29 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index cdc04d7b44..16632b53e3 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -109,7 +109,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
         return default_from_dict(cls, data)
 
     @component.output_types(documents=List[Document])
-    def run(self, audio_files: Sequence[Union[str, Path, BinaryIO]]):
+    def run(self, audio_files: List[Union[str, Path, BinaryIO]]):
         """
         Transcribe the audio files into a list of Documents, one for each input file.
 
@@ -118,21 +118,26 @@ def run(self, audio_files: Sequence[Union[str, Path, BinaryIO]]):
         [github repo](https://github.com/openai/whisper).
 
         :param audio_files: a list of paths or binary streams to transcribe
-        :returns: a list of Documents, one for each file. The content of the document is the transcription text, while the document's metadata         contains a key called `audio_file`, which contains the path to the
+        :returns: a list of Documents, one for each file. The content of the
+        document is the transcription text, while the document's metadata
+        contains a key called `audio_file`, which contains the path to the
         audio file used for the transcription.
         """
         documents = []
+
         for audio_file in audio_files:
             if isinstance(audio_file, (str, Path)):
-                audio_file = open(audio_file, "rb")
-
-            content = openai.Audio.transcribe(file=audio_file, model=self.model_name, **self.whisper_params)
-
-            # Set the audio file to <<binary stream>> for document metadata if the path is not present
-            if not isinstance(audio_file, (str, Path)):
-                audio_file = "<<binary stream>>"
-
-            doc = Document(text=content["text"], metadata={"audio_file": audio_file})
+                if isinstance(audio_file, str):
+                    file_name = audio_file
+                else:
+                    file_name = str(audio_file.absolute())
+                with open(audio_file, "rb") as file:
+                    content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
+            else:
+                file_name = "<<binary stream>>"
+                content = openai.Audio.transcribe(file=audio_file, model=self.model_name, **self.whisper_params)
+
+            doc = Document(text=content["text"], metadata={"audio_file": file_name})
             documents.append(doc)
 
         return {"documents": documents}
diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py
index 2b6cc1c62f..3a1a04b6ea 100644
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@@ -1,7 +1,5 @@
-from typing import Union, BinaryIO
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-from copy import deepcopy
+from unittest.mock import patch
+import os
 
 
 import pytest
@@ -13,15 +11,10 @@
 from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
 
 
-def mock_openai_response(
-    file: Union[str, Path, BinaryIO], model: str = "whisper-1", response_format="json", **kwargs
-) -> openai.openai_object.OpenAIObject:
-    if isinstance(file, (str, Path)):
-        file_path = str(file)
-    else:
-        file_path = file.name
+def mock_openai_response(response_format="json", **kwargs) -> openai.openai_object.OpenAIObject:
     if response_format == "json":
-        dict_response = {"text": f"model: {model}, file: str{file_path}, test transcription"}
+        dict_response = {"text": "test transcription"}
+    # Currently only "json" is supported.
     else:
         dict_response = {}
 
@@ -175,18 +168,98 @@ def test_from_dict_with_defualt_parameters_no_env_var(self, monkeypatch):
         with pytest.raises(ValueError, match="RemoteWhisperTranscriber expects an OpenAI API key."):
             RemoteWhisperTranscriber.from_dict(data)
 
+    @pytest.mark.unit
+    def test_run(self, preview_samples_path):
+        with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
+            model = "whisper-1"
+            openai_audio_patch.transcribe.side_effect = mock_openai_response
+
+            transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
+            with open(preview_samples_path / "audio" / "this is the content of the document.wav", "rb") as audio_stream:
+                result = transcriber.run(audio_files=[audio_stream])
+
+                assert result["documents"][0].text == "test transcription"
+
+                openai_audio_patch.transcribe.assert_called_once_with(
+                    file=audio_stream, model=model, response_format="json"
+                )
+
     @pytest.mark.unit
     def test_run_with_path(self, preview_samples_path):
-        model = "whisper-1"
-        file_path = preview_samples_path / "audio" / "this is the content of the document.wav"
         with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
+            model = "whisper-1"
+            openai_audio_patch.transcribe.side_effect = mock_openai_response
+
+            transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
+
+            result = transcriber.run(
+                audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"]
+            )
+
+            expected = Document(
+                text="test transcription",
+                metadata={
+                    "audio_file": str(
+                        (preview_samples_path / "audio" / "this is the content of the document.wav").absolute()
+                    )
+                },
+            )
+            assert result["documents"][0].text == expected.text
+            assert result["documents"][0].metadata == expected.metadata
+
+    @pytest.mark.unit
+    def test_run_with_str(self, preview_samples_path):
+        with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
+            model = "whisper-1"
             openai_audio_patch.transcribe.side_effect = mock_openai_response
 
             transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
 
-            result = transcriber.run(audio_files=[file_path])
+            result = transcriber.run(
+                audio_files=[
+                    str((preview_samples_path / "audio" / "this is the content of the document.wav").absolute())
+                ]
+            )
+
+            expected = Document(
+                text="test transcription",
+                metadata={
+                    "audio_file": str(
+                        (preview_samples_path / "audio" / "this is the content of the document.wav").absolute()
+                    )
+                },
+            )
+            assert result["documents"][0].text == expected.text
+            assert result["documents"][0].metadata == expected.metadata
 
-            assert result["documents"][0].text == f"model: {model}, file: str{file_path}, test transcription"
+    @pytest.mark.skipif(
+        not os.environ.get("OPENAI_API_KEY", None),
+        reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_whisper_remote_transcriber(self, preview_samples_path):
+        comp = RemoteWhisperTranscriber(api_key=os.environ.get("OPENAI_API_KEY"))
+
+        output = comp.run(
+            audio_files=[
+                preview_samples_path / "audio" / "this is the content of the document.wav",
+                str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute()),
+                open(preview_samples_path / "audio" / "answer.wav", "rb"),
+            ]
+        )
+        docs = output["documents"]
+        assert len(docs) == 3
+
+        assert docs[0].text.strip().lower() == "this is the content of the document."
+        assert (
+            preview_samples_path / "audio" / "this is the content of the document.wav" == docs[0].metadata["audio_file"]
+        )
+
+        assert docs[1].text.strip().lower() == "the context for this answer is here."
+        assert (
+            str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute())
+            == docs[1].metadata["audio_file"]
+        )
 
-            open_file = open(file_path, "rb")
-            openai_audio_patch.transcribe.assert_called_once_with(file=open_file, model=model, response_format="json")
+        assert docs[2].text.strip().lower() == "answer."
+        assert docs[2].metadata["audio_file"] == "<<binary stream>>"

From 0deb5124740fb6d0027be742c56cfbfaaa98ee83 Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Mon, 23 Oct 2023 01:59:43 +0530
Subject: [PATCH 03/13] Remove unnecessary imports

---
 haystack/preview/components/audio/whisper_remote.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 16632b53e3..e62cbf7b3e 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -1,7 +1,6 @@
-from typing import List, Optional, Dict, Any, Union, BinaryIO, Literal, get_args, Sequence
+from typing import List, Optional, Dict, Any, Union, BinaryIO
 
 import os
-import json
 import logging
 from pathlib import Path
 

From 10f6ba2d7f4dd84e90c8d34f7c8708d04010e421 Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Mon, 23 Oct 2023 02:12:49 +0530
Subject: [PATCH 04/13] Add release notes

---
 ...te-whisper-transcriber-to-openai-sdk-980ae6f54ddfd7df.yaml | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 releasenotes/notes/migrate-remote-whisper-transcriber-to-openai-sdk-980ae6f54ddfd7df.yaml

diff --git a/releasenotes/notes/migrate-remote-whisper-transcriber-to-openai-sdk-980ae6f54ddfd7df.yaml b/releasenotes/notes/migrate-remote-whisper-transcriber-to-openai-sdk-980ae6f54ddfd7df.yaml
new file mode 100644
index 0000000000..98e61e5f49
--- /dev/null
+++ b/releasenotes/notes/migrate-remote-whisper-transcriber-to-openai-sdk-980ae6f54ddfd7df.yaml
@@ -0,0 +1,4 @@
+---
+preview:
+  - |
+    Migrate RemoteWhisperTranscriber to OpenAI SDK.

From d04fcb137ea41a5a92d1d9b2864b3069e37842bd Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Mon, 23 Oct 2023 02:13:47 +0530
Subject: [PATCH 05/13] Fix api_key serialization

---
 haystack/preview/components/audio/whisper_remote.py  |  1 -
 test/preview/components/audio/test_whisper_remote.py | 11 ++++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index e62cbf7b3e..cde0a1c5ae 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -72,7 +72,6 @@ def __init__(
                     "Set the OPENAI_API_KEY environment variable (recommended) or pass it explicitly."
                 ) from e
 
-        self.api_key = api_key
         self.organization = organization
         self.model_name = model_name
         self.api_base_url = api_base_url
diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py
index 3a1a04b6ea..514c022e38 100644
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@@ -33,7 +33,7 @@ def test_init_no_key(self, monkeypatch):
     def test_init_default(self):
         transcriber = RemoteWhisperTranscriber(api_key="test_api_key")
 
-        assert transcriber.api_key == "test_api_key"
+        assert openai.api_key == "test_api_key"
         assert transcriber.model_name == "whisper-1"
         assert transcriber.organization is None
         assert transcriber.api_base_url == "https://api.openai.com/v1"
@@ -52,7 +52,7 @@ def test_init_custom_parameters(self):
             temperature="0.5",
         )
 
-        assert transcriber.api_key == "test_api_key"
+        assert openai.api_key == "test_api_key"
         assert transcriber.model_name == "whisper-1"
         assert transcriber.organization == "test-org"
         assert transcriber.api_base_url == "test_api_url"
@@ -118,7 +118,7 @@ def test_from_dict_with_defualt_parameters(self, monkeypatch):
 
         transcriber = RemoteWhisperTranscriber.from_dict(data)
 
-        assert transcriber.api_key == "test_api_key"
+        assert openai.api_key == "test_api_key"
         assert transcriber.model_name == "whisper-1"
         assert transcriber.organization is None
         assert transcriber.api_base_url == "https://api.openai.com/v1"
@@ -141,7 +141,7 @@ def test_from_dict_with_custom_init_parameters(self, monkeypatch):
         }
         transcriber = RemoteWhisperTranscriber.from_dict(data)
 
-        assert transcriber.api_key == "test_api_key"
+        assert openai.api_key == "test_api_key"
         assert transcriber.model_name == "whisper-1"
         assert transcriber.organization == "test-org"
         assert transcriber.api_base_url == "test_api_url"
@@ -252,7 +252,8 @@ def test_whisper_remote_transcriber(self, preview_samples_path):
 
         assert docs[0].text.strip().lower() == "this is the content of the document."
         assert (
-            preview_samples_path / "audio" / "this is the content of the document.wav" == docs[0].metadata["audio_file"]
+            str((preview_samples_path / "audio" / "this is the content of the document.wav").absolute())
+            == docs[0].metadata["audio_file"]
         )
 
         assert docs[1].text.strip().lower() == "the context for this answer is here."

From c57b7ff5d46d52b1262b121648f6cd65296aedee Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Mon, 23 Oct 2023 14:45:11 +0530
Subject: [PATCH 06/13] Fix linting

---
 haystack/preview/components/audio/whisper_remote.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index cde0a1c5ae..56f2e01e07 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -4,11 +4,10 @@
 import logging
 from pathlib import Path
 
-from haystack.preview.utils import request_with_retry
-from haystack.preview import component, Document, default_to_dict, default_from_dict
-
 import openai
 
+from haystack.preview import component, Document, default_to_dict, default_from_dict
+
 
 logger = logging.getLogger(__name__)
 

From bc137032a12cec91bb57ca83309fadd2f503d23d Mon Sep 17 00:00:00 2001
From: Ashwin Mathur <97467100+awinml@users.noreply.github.com>
Date: Tue, 24 Oct 2023 18:11:58 +0530
Subject: [PATCH 07/13] Apply suggestions from code review

Co-authored-by: ZanSara <sarazanzo94@gmail.com>
---
 haystack/preview/components/audio/whisper_remote.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 56f2e01e07..bfc555d3d1 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -62,6 +62,8 @@ def __init__(
             temperature until certain thresholds are hit.
         """
 
+        # if the user does not provide the API key, check if it is set in the module client
+        api_key = api_key or openai.api_key
         if api_key is None:
             try:
                 api_key = os.environ["OPENAI_API_KEY"]
@@ -70,6 +72,7 @@ def __init__(
                     "RemoteWhisperTranscriber expects an OpenAI API key. "
                     "Set the OPENAI_API_KEY environment variable (recommended) or pass it explicitly."
                 ) from e
+        openai.api_key = api_key
 
         self.organization = organization
         self.model_name = model_name
@@ -77,10 +80,11 @@ def __init__(
 
         # Only response_format = "json" is supported
         whisper_params = kwargs
+        if whisper_params.get("response_format") != "json":
+            logger.warning("RemoteWhisperTranscriber only supports 'response_format: json'. This parameter will be overwritten.")
         whisper_params["response_format"] = "json"
         self.whisper_params = whisper_params
 
-        openai.api_key = api_key
         if organization is not None:
             openai.organization = organization
 

From 8e4eebf74059f555346b1c99cfab7dd86abb49a0 Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Tue, 24 Oct 2023 21:13:00 +0530
Subject: [PATCH 08/13] Add additional tests for api_key

---
 .../preview/components/audio/whisper_remote.py    |  4 +++-
 .../components/audio/test_whisper_remote.py       | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index bfc555d3d1..14713cb115 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -81,7 +81,9 @@ def __init__(
         # Only response_format = "json" is supported
         whisper_params = kwargs
         if whisper_params.get("response_format") != "json":
-            logger.warning("RemoteWhisperTranscriber only supports 'response_format: json'. This parameter will be overwritten.")
+            logger.warning(
+                "RemoteWhisperTranscriber only supports 'response_format: json'. This parameter will be overwritten."
+            )
         whisper_params["response_format"] = "json"
         self.whisper_params = whisper_params
 
diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py
index 514c022e38..e69fee63e2 100644
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@@ -24,11 +24,25 @@ def mock_openai_response(response_format="json", **kwargs) -> openai.openai_obje
 class TestRemoteWhisperTranscriber:
     @pytest.mark.unit
     def test_init_no_key(self, monkeypatch):
+        openai.api_key = None
         monkeypatch.delenv("OPENAI_API_KEY", raising=False)
         error_msg = "RemoteWhisperTranscriber expects an OpenAI API key."
         with pytest.raises(ValueError, match=error_msg):
             RemoteWhisperTranscriber(api_key=None)
 
+    def test_init_key_env_var(self, monkeypatch):
+        openai.api_key = None
+        monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
+        RemoteWhisperTranscriber(api_key=None)
+        assert openai.api_key == "test_api_key"
+
+    def test_init_key_module_env_and_global_var(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test_api_key_2")
+        openai.api_key = "test_api_key_1"
+        RemoteWhisperTranscriber(api_key=None)
+        # The module global variable takes preference
+        assert openai.api_key == "test_api_key_1"
+
     @pytest.mark.unit
     def test_init_default(self):
         transcriber = RemoteWhisperTranscriber(api_key="test_api_key")
@@ -153,6 +167,7 @@ def test_from_dict_with_custom_init_parameters(self, monkeypatch):
         }
 
     def test_from_dict_with_defualt_parameters_no_env_var(self, monkeypatch):
+        openai.api_key = None
         monkeypatch.delenv("OPENAI_API_KEY", raising=False)
 
         data = {

From 25ecec59d302f5aa9a938415c8c0aad9ea8c50da Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Wed, 25 Oct 2023 01:49:43 +0530
Subject: [PATCH 09/13] Adapt .run() to take ByteStream inputs

---
 .../components/audio/whisper_remote.py        | 23 ++---
 .../components/audio/test_whisper_remote.py   | 97 ++++++-------------
 2 files changed, 35 insertions(+), 85 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 14713cb115..70ca58d905 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -1,13 +1,12 @@
-from typing import List, Optional, Dict, Any, Union, BinaryIO
+from typing import List, Optional, Dict, Any
 
 import os
 import logging
-from pathlib import Path
 
 import openai
 
 from haystack.preview import component, Document, default_to_dict, default_from_dict
-
+from haystack.preview.dataclasses import ByteStream
 
 logger = logging.getLogger(__name__)
 
@@ -112,7 +111,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
         return default_from_dict(cls, data)
 
     @component.output_types(documents=List[Document])
-    def run(self, audio_files: List[Union[str, Path, BinaryIO]]):
+    def run(self, streams: List[ByteStream]):
         """
         Transcribe the audio files into a list of Documents, one for each input file.
 
@@ -128,19 +127,9 @@ def run(self, audio_files: List[Union[str, Path, BinaryIO]]):
         """
         documents = []
 
-        for audio_file in audio_files:
-            if isinstance(audio_file, (str, Path)):
-                if isinstance(audio_file, str):
-                    file_name = audio_file
-                else:
-                    file_name = str(audio_file.absolute())
-                with open(audio_file, "rb") as file:
-                    content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
-            else:
-                file_name = "<<binary stream>>"
-                content = openai.Audio.transcribe(file=audio_file, model=self.model_name, **self.whisper_params)
-
-            doc = Document(text=content["text"], metadata={"audio_file": file_name})
+        for stream in streams:
+            content = openai.Audio.transcribe(file=stream.data, model=self.model_name, **self.whisper_params)
+            doc = Document(text=content["text"], metadata=stream.metadata)
             documents.append(doc)
 
         return {"documents": documents}
diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py
index e69fee63e2..9b799243a5 100644
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@@ -1,13 +1,10 @@
 from unittest.mock import patch
 import os
-
-
 import pytest
 import openai
 from openai.util import convert_to_openai_object
 
-
-from haystack.preview.dataclasses import Document
+from haystack.preview.dataclasses import ByteStream
 from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
 
 
@@ -187,95 +184,59 @@ def test_from_dict_with_defualt_parameters_no_env_var(self, monkeypatch):
     def test_run(self, preview_samples_path):
         with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
             model = "whisper-1"
+            file_path = preview_samples_path / "audio" / "this is the content of the document.wav"
             openai_audio_patch.transcribe.side_effect = mock_openai_response
 
             transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
-            with open(preview_samples_path / "audio" / "this is the content of the document.wav", "rb") as audio_stream:
-                result = transcriber.run(audio_files=[audio_stream])
+            with open(file_path, "rb") as audio_stream:
+                byte_stream = audio_stream.read()
+                audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())})
+                result = transcriber.run(streams=[audio_file])
 
                 assert result["documents"][0].text == "test transcription"
+                assert result["documents"][0].metadata["file_path"] == str(file_path.absolute())
 
                 openai_audio_patch.transcribe.assert_called_once_with(
-                    file=audio_stream, model=model, response_format="json"
+                    file=audio_file.data, model=model, response_format="json"
                 )
 
-    @pytest.mark.unit
-    def test_run_with_path(self, preview_samples_path):
-        with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
-            model = "whisper-1"
-            openai_audio_patch.transcribe.side_effect = mock_openai_response
-
-            transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
-
-            result = transcriber.run(
-                audio_files=[preview_samples_path / "audio" / "this is the content of the document.wav"]
-            )
-
-            expected = Document(
-                text="test transcription",
-                metadata={
-                    "audio_file": str(
-                        (preview_samples_path / "audio" / "this is the content of the document.wav").absolute()
-                    )
-                },
-            )
-            assert result["documents"][0].text == expected.text
-            assert result["documents"][0].metadata == expected.metadata
-
-    @pytest.mark.unit
-    def test_run_with_str(self, preview_samples_path):
-        with patch("haystack.preview.components.audio.whisper_remote.openai.Audio") as openai_audio_patch:
-            model = "whisper-1"
-            openai_audio_patch.transcribe.side_effect = mock_openai_response
-
-            transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json")
-
-            result = transcriber.run(
-                audio_files=[
-                    str((preview_samples_path / "audio" / "this is the content of the document.wav").absolute())
-                ]
-            )
-
-            expected = Document(
-                text="test transcription",
-                metadata={
-                    "audio_file": str(
-                        (preview_samples_path / "audio" / "this is the content of the document.wav").absolute()
-                    )
-                },
-            )
-            assert result["documents"][0].text == expected.text
-            assert result["documents"][0].metadata == expected.metadata
-
     @pytest.mark.skipif(
         not os.environ.get("OPENAI_API_KEY", None),
         reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
     )
     @pytest.mark.integration
     def test_whisper_remote_transcriber(self, preview_samples_path):
-        comp = RemoteWhisperTranscriber(api_key=os.environ.get("OPENAI_API_KEY"))
-
-        output = comp.run(
-            audio_files=[
-                preview_samples_path / "audio" / "this is the content of the document.wav",
-                str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute()),
-                open(preview_samples_path / "audio" / "answer.wav", "rb"),
-            ]
-        )
+        transcriber = RemoteWhisperTranscriber(api_key=os.environ.get("OPENAI_API_KEY"))
+
+        paths = [
+            preview_samples_path / "audio" / "this is the content of the document.wav",
+            preview_samples_path / "audio" / "the context for this answer is here.wav",
+            preview_samples_path / "audio" / "answer.wav",
+            "rb",
+        ]
+
+        audio_files = []
+        for file_path in paths:
+            with open(file_path, "rb") as audio_stream:
+                byte_stream = audio_stream.read()
+                audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())})
+                audio_files.append(audio_file)
+
+        output = transcriber.run(streams=audio_files)
+
         docs = output["documents"]
         assert len(docs) == 3
-
         assert docs[0].text.strip().lower() == "this is the content of the document."
         assert (
             str((preview_samples_path / "audio" / "this is the content of the document.wav").absolute())
-            == docs[0].metadata["audio_file"]
+            == docs[0].metadata["file_path"]
         )
 
         assert docs[1].text.strip().lower() == "the context for this answer is here."
         assert (
             str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute())
-            == docs[1].metadata["audio_file"]
+            == docs[1].metadata["file_path"]
         )
 
         assert docs[2].text.strip().lower() == "answer."
-        assert docs[2].metadata["audio_file"] == "<<binary stream>>"
+        assert str((preview_samples_path / "audio" / "answer.wav").absolute()) == docs[2].metadata["file_path"]

From d069035cd462628bb3399c53bc7f427d41228558 Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Wed, 25 Oct 2023 02:14:54 +0530
Subject: [PATCH 10/13] Update docstrings

---
 haystack/preview/components/audio/whisper_remote.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 70ca58d905..9d255a1bed 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -119,11 +119,8 @@ def run(self, streams: List[ByteStream]):
         [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
         [github repo](https://github.com/openai/whisper).
 
-        :param audio_files: a list of paths or binary streams to transcribe
-        :returns: a list of Documents, one for each file. The content of the
-        document is the transcription text, while the document's metadata
-        contains a key called `audio_file`, which contains the path to the
-        audio file used for the transcription.
+        :param audio_files: a list of ByteStream objects to transcribe.
+        :returns: a list of Documents, one for each file. The content of the document is the transcription text.
         """
         documents = []
 

From 212c94fc494ded2a26b710cf8962b8af347fd81e Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Thu, 26 Oct 2023 13:31:43 +0530
Subject: [PATCH 11/13] Rework implementation to use io.BytesIO

---
 .../components/audio/whisper_remote.py        | 20 ++++++++++++++-----
 .../components/audio/test_whisper_remote.py   | 12 +++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 9d255a1bed..275a6c478b 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -1,11 +1,11 @@
-from typing import List, Optional, Dict, Any
-
-import os
+import io
 import logging
+import os
+from typing import Any, Dict, List, Optional
 
 import openai
 
-from haystack.preview import component, Document, default_to_dict, default_from_dict
+from haystack.preview import Document, component, default_from_dict, default_to_dict
 from haystack.preview.dataclasses import ByteStream
 
 logger = logging.getLogger(__name__)
@@ -125,7 +125,17 @@ def run(self, streams: List[ByteStream]):
         documents = []
 
         for stream in streams:
-            content = openai.Audio.transcribe(file=stream.data, model=self.model_name, **self.whisper_params)
+            try:
+                file = io.BytesIO(stream.data)
+                file.name = stream.metadata["file_path"]
+            except Exception as e:
+                logger.warning(
+                    "Could not read audio file. Skipping it. Make sure the 'file_path' is present in the metadata. Error message: %s",
+                    e,
+                )
+                continue
+
+            content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
             doc = Document(text=content["text"], metadata=stream.metadata)
             documents.append(doc)
 
diff --git a/test/preview/components/audio/test_whisper_remote.py b/test/preview/components/audio/test_whisper_remote.py
index 9b799243a5..3fae7fc3e3 100644
--- a/test/preview/components/audio/test_whisper_remote.py
+++ b/test/preview/components/audio/test_whisper_remote.py
@@ -1,11 +1,12 @@
-from unittest.mock import patch
 import os
-import pytest
+from unittest.mock import patch
+
 import openai
+import pytest
 from openai.util import convert_to_openai_object
 
-from haystack.preview.dataclasses import ByteStream
 from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
+from haystack.preview.dataclasses import ByteStream
 
 
 def mock_openai_response(response_format="json", **kwargs) -> openai.openai_object.OpenAIObject:
@@ -191,15 +192,12 @@ def test_run(self, preview_samples_path):
             with open(file_path, "rb") as audio_stream:
                 byte_stream = audio_stream.read()
                 audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())})
+
                 result = transcriber.run(streams=[audio_file])
 
                 assert result["documents"][0].text == "test transcription"
                 assert result["documents"][0].metadata["file_path"] == str(file_path.absolute())
 
-                openai_audio_patch.transcribe.assert_called_once_with(
-                    file=audio_file.data, model=model, response_format="json"
-                )
-
     @pytest.mark.skipif(
         not os.environ.get("OPENAI_API_KEY", None),
         reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",

From 3714122f91148eb31a7e7ff7631d07156ff2470f Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Thu, 26 Oct 2023 14:07:41 +0530
Subject: [PATCH 12/13] Update error message

---
 haystack/preview/components/audio/whisper_remote.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 275a6c478b..65f64df290 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -128,7 +128,7 @@ def run(self, streams: List[ByteStream]):
             try:
                 file = io.BytesIO(stream.data)
                 file.name = stream.metadata["file_path"]
-            except Exception as e:
+            except KeyError as e:
                 logger.warning(
                     "Could not read audio file. Skipping it. Make sure the 'file_path' is present in the metadata. Error message: %s",
                     e,

From 5fbbbb5c17fc5dcbce196c49cfd14c562577c63c Mon Sep 17 00:00:00 2001
From: awinml <97467100+awinml@users.noreply.github.com>
Date: Thu, 26 Oct 2023 18:33:01 +0530
Subject: [PATCH 13/13] Add default file name

---
 haystack/preview/components/audio/whisper_remote.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/haystack/preview/components/audio/whisper_remote.py b/haystack/preview/components/audio/whisper_remote.py
index 65f64df290..4ef4821b30 100644
--- a/haystack/preview/components/audio/whisper_remote.py
+++ b/haystack/preview/components/audio/whisper_remote.py
@@ -125,15 +125,11 @@ def run(self, streams: List[ByteStream]):
         documents = []
 
         for stream in streams:
+            file = io.BytesIO(stream.data)
             try:
-                file = io.BytesIO(stream.data)
                 file.name = stream.metadata["file_path"]
-            except KeyError as e:
-                logger.warning(
-                    "Could not read audio file. Skipping it. Make sure the 'file_path' is present in the metadata. Error message: %s",
-                    e,
-                )
-                continue
+            except KeyError:
+                file.name = "audio_input.wav"
 
             content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params)
             doc = Document(text=content["text"], metadata=stream.metadata)