diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index aa81884a..bf6233be 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10"]
     
     steps:
       - uses: actions/checkout@v3
diff --git a/README.rst b/README.rst
index f9bde14e..410e289d 100644
--- a/README.rst
+++ b/README.rst
@@ -39,6 +39,7 @@ Speech recognition engine/API support:
 * `Tensorflow <https://www.tensorflow.org/>`__
 * `Vosk API <https://github.com/alphacep/vosk-api/>`__ (works offline)
 * `OpenAI whisper <https://github.com/openai/whisper>`__ (works offline)
+* `Whisper API <https://platform.openai.com/docs/guides/speech-to-text>`__
 
 **Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
 
@@ -88,13 +89,14 @@ Requirements
 
 To use all of the functionality of the library, you should have:
 
-* **Python** 3.7+ (required)
+* **Python** 3.8+ (required)
 * **PyAudio** 0.2.11+ (required only if you need to use microphone input, ``Microphone``)
 * **PocketSphinx** (required only if you need to use the Sphinx recognizer, ``recognizer_instance.recognize_sphinx``)
 * **Google API Client Library for Python** (required only if you need to use the Google Cloud Speech API, ``recognizer_instance.recognize_google_cloud``)
 * **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
 * **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
 * **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
+* **openai** (required only if you need to use Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
 
 The following requirements are optional, but can improve or extend functionality in some situations:
 
@@ -105,7 +107,7 @@ The following sections go over the details of each requirement.
 Python
 ~~~~~~
 
-The first software requirement is `Python 3.7+ <https://www.python.org/downloads/>`__. This is required to use the library.
+The first software requirement is `Python 3.8+ <https://www.python.org/downloads/>`__. This is required to use the library.
 
 PyAudio (for microphone users)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -169,6 +171,15 @@ Whisper is **required if and only if you want to use whisper** (``recognizer_ins
 
 You can install it with ``python3 -m pip install git+https://github.com/openai/whisper.git soundfile``.
 
+Whisper API (for Whisper API users) 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The library `openai <https://pypi.org/project/openai/>`__ is **required if and only if you want to use Whisper API** (``recognizer_instance.recognize_whisper_api``).
+
+If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_whisper_api`` will raise an ``RequestError``.
+
+You can install it with ``python3 -m pip install openai``.
+
 Troubleshooting
 ---------------
 
diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py
index 56168b29..c46b412a 100644
--- a/examples/microphone_recognition.py
+++ b/examples/microphone_recognition.py
@@ -92,3 +92,10 @@
     print("Whisper could not understand audio")
 except sr.RequestError as e:
     print("Could not request results from Whisper")
+
+# recognize speech using Whisper API
+OPENAI_API_KEY = "INSERT OPENAI API KEY HERE"
+try:
+    print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}")
+except sr.RequestError as e:
+    print("Could not request results from Whisper API")
diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 7323bd9b..0aa7a8ce 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -314,6 +314,17 @@ You can translate the result to english with Whisper by passing translate=True
 
 Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options
 
+``recognizer_instance.recognize_whisper_api(audio_data: AudioData, model: str = "whisper-1", api_key: str | None = None)``
+--------------------------------------------------------------------------------------------------------------------------
+
+Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.
+
+This function requires an OpenAI account; visit https://platform.openai.com/signup, then generate API Key in `User settings <https://platform.openai.com/account/api-keys>`__.
+
+Detail: https://platform.openai.com/docs/guides/speech-to-text
+
+Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing.
+
 ``AudioSource``
 ---------------
 
diff --git a/setup.cfg b/setup.cfg
index 3af79a7b..69c937c4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,3 +2,7 @@
 # the `universal` setting means that the project runs unmodified on both Python 2 and 3,
 # and doesn't use any C extensions to Python
 universal=1
+
+[options.extras_require]
+whisper-api =
+    openai
diff --git a/setup.py b/setup.py
index 231e390d..2b10a084 100644
--- a/setup.py
+++ b/setup.py
@@ -58,13 +58,12 @@ def run(self):
         "Operating System :: Other OS",
         "Programming Language :: Python",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Topic :: Software Development :: Libraries :: Python Modules",
         "Topic :: Multimedia :: Sound/Audio :: Speech",
     ],
-    python_requires=">=3.7",
+    python_requires=">=3.8",
     install_requires=['requests>=2.26.0'],
 )
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 66ebc04c..8365d8e3 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -15,13 +15,10 @@
 import json
 import base64
 import threading
-import platform
-import stat
 import hashlib
 import hmac
 import time
 import uuid
-from pprint import pprint
 
 try:
     import requests
@@ -36,20 +33,15 @@
 from urllib.request import Request, urlopen
 from urllib.error import URLError, HTTPError
 
-
-class WaitTimeoutError(Exception): pass
-
-
-class RequestError(Exception): pass
-
-
-class UnknownValueError(Exception): pass
-
-
-class TranscriptionNotReady(Exception): pass
-
-
-class TranscriptionFailed(Exception): pass
+from .audio import AudioData, get_flac_converter
+from .exceptions import (
+    RequestError,
+    TranscriptionFailed, 
+    TranscriptionNotReady,
+    UnknownValueError,
+    WaitTimeoutError,
+)
+from .recognizers import whisper
 
 
 class AudioSource(object):
@@ -331,180 +323,6 @@ def read(self, size=-1):
             return buffer
 
 
-class AudioData(object):
-    """
-    Creates a new ``AudioData`` instance, which represents mono audio data.
-
-    The raw audio data is specified by ``frame_data``, which is a sequence of bytes representing audio samples. This is the frame data structure used by the PCM WAV format.
-
-    The width of each sample, in bytes, is specified by ``sample_width``. Each group of ``sample_width`` bytes represents a single audio sample.
-
-    The audio data is assumed to have a sample rate of ``sample_rate`` samples per second (Hertz).
-
-    Usually, instances of this class are obtained from ``recognizer_instance.record`` or ``recognizer_instance.listen``, or in the callback for ``recognizer_instance.listen_in_background``, rather than instantiating them directly.
-    """
-    def __init__(self, frame_data, sample_rate, sample_width):
-        assert sample_rate > 0, "Sample rate must be a positive integer"
-        assert sample_width % 1 == 0 and 1 <= sample_width <= 4, "Sample width must be between 1 and 4 inclusive"
-        self.frame_data = frame_data
-        self.sample_rate = sample_rate
-        self.sample_width = int(sample_width)
-
-    def get_segment(self, start_ms=None, end_ms=None):
-        """
-        Returns a new ``AudioData`` instance, trimmed to a given time interval. In other words, an ``AudioData`` instance with the same audio data except starting at ``start_ms`` milliseconds in and ending ``end_ms`` milliseconds in.
-
-        If not specified, ``start_ms`` defaults to the beginning of the audio, and ``end_ms`` defaults to the end.
-        """
-        assert start_ms is None or start_ms >= 0, "``start_ms`` must be a non-negative number"
-        assert end_ms is None or end_ms >= (0 if start_ms is None else start_ms), "``end_ms`` must be a non-negative number greater or equal to ``start_ms``"
-        if start_ms is None:
-            start_byte = 0
-        else:
-            start_byte = int((start_ms * self.sample_rate * self.sample_width) // 1000)
-        if end_ms is None:
-            end_byte = len(self.frame_data)
-        else:
-            end_byte = int((end_ms * self.sample_rate * self.sample_width) // 1000)
-        return AudioData(self.frame_data[start_byte:end_byte], self.sample_rate, self.sample_width)
-
-    def get_raw_data(self, convert_rate=None, convert_width=None):
-        """
-        Returns a byte string representing the raw frame data for the audio represented by the ``AudioData`` instance.
-
-        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
-
-        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
-
-        Writing these bytes directly to a file results in a valid `RAW/PCM audio file <https://en.wikipedia.org/wiki/Raw_audio_format>`__.
-        """
-        assert convert_rate is None or convert_rate > 0, "Sample rate to convert to must be a positive integer"
-        assert convert_width is None or (convert_width % 1 == 0 and 1 <= convert_width <= 4), "Sample width to convert to must be between 1 and 4 inclusive"
-
-        raw_data = self.frame_data
-
-        # make sure unsigned 8-bit audio (which uses unsigned samples) is handled like higher sample width audio (which uses signed samples)
-        if self.sample_width == 1:
-            raw_data = audioop.bias(raw_data, 1, -128)  # subtract 128 from every sample to make them act like signed samples
-
-        # resample audio at the desired rate if specified
-        if convert_rate is not None and self.sample_rate != convert_rate:
-            raw_data, _ = audioop.ratecv(raw_data, self.sample_width, 1, self.sample_rate, convert_rate, None)
-
-        # convert samples to desired sample width if specified
-        if convert_width is not None and self.sample_width != convert_width:
-            if convert_width == 3:  # we're converting the audio into 24-bit (workaround for https://bugs.python.org/issue12866)
-                raw_data = audioop.lin2lin(raw_data, self.sample_width, 4)  # convert audio into 32-bit first, which is always supported
-                try: audioop.bias(b"", 3, 0)  # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do)
-                except audioop.error:  # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less)
-                    raw_data = b"".join(raw_data[i + 1:i + 4] for i in range(0, len(raw_data), 4))  # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample
-                else:  # 24-bit audio fully supported, we don't need to shim anything
-                    raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width)
-            else:
-                raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width)
-
-        # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again
-        if convert_width == 1:
-            raw_data = audioop.bias(raw_data, 1, 128)  # add 128 to every sample to make them act like unsigned samples again
-
-        return raw_data
-
-    def get_wav_data(self, convert_rate=None, convert_width=None):
-        """
-        Returns a byte string representing the contents of a WAV file containing the audio represented by the ``AudioData`` instance.
-
-        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
-
-        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
-
-        Writing these bytes directly to a file results in a valid `WAV file <https://en.wikipedia.org/wiki/WAV>`__.
-        """
-        raw_data = self.get_raw_data(convert_rate, convert_width)
-        sample_rate = self.sample_rate if convert_rate is None else convert_rate
-        sample_width = self.sample_width if convert_width is None else convert_width
-
-        # generate the WAV file contents
-        with io.BytesIO() as wav_file:
-            wav_writer = wave.open(wav_file, "wb")
-            try:  # note that we can't use context manager, since that was only added in Python 3.4
-                wav_writer.setframerate(sample_rate)
-                wav_writer.setsampwidth(sample_width)
-                wav_writer.setnchannels(1)
-                wav_writer.writeframes(raw_data)
-                wav_data = wav_file.getvalue()
-            finally:  # make sure resources are cleaned up
-                wav_writer.close()
-        return wav_data
-
-    def get_aiff_data(self, convert_rate=None, convert_width=None):
-        """
-        Returns a byte string representing the contents of an AIFF-C file containing the audio represented by the ``AudioData`` instance.
-
-        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
-
-        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
-
-        Writing these bytes directly to a file results in a valid `AIFF-C file <https://en.wikipedia.org/wiki/Audio_Interchange_File_Format>`__.
-        """
-        raw_data = self.get_raw_data(convert_rate, convert_width)
-        sample_rate = self.sample_rate if convert_rate is None else convert_rate
-        sample_width = self.sample_width if convert_width is None else convert_width
-
-        # the AIFF format is big-endian, so we need to convert the little-endian raw data to big-endian
-        if hasattr(audioop, "byteswap"):  # ``audioop.byteswap`` was only added in Python 3.4
-            raw_data = audioop.byteswap(raw_data, sample_width)
-        else:  # manually reverse the bytes of each sample, which is slower but works well enough as a fallback
-            raw_data = raw_data[sample_width - 1::-1] + b"".join(raw_data[i + sample_width:i:-1] for i in range(sample_width - 1, len(raw_data), sample_width))
-
-        # generate the AIFF-C file contents
-        with io.BytesIO() as aiff_file:
-            aiff_writer = aifc.open(aiff_file, "wb")
-            try:  # note that we can't use context manager, since that was only added in Python 3.4
-                aiff_writer.setframerate(sample_rate)
-                aiff_writer.setsampwidth(sample_width)
-                aiff_writer.setnchannels(1)
-                aiff_writer.writeframes(raw_data)
-                aiff_data = aiff_file.getvalue()
-            finally:  # make sure resources are cleaned up
-                aiff_writer.close()
-        return aiff_data
-
-    def get_flac_data(self, convert_rate=None, convert_width=None):
-        """
-        Returns a byte string representing the contents of a FLAC file containing the audio represented by the ``AudioData`` instance.
-
-        Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width`` is not specified, then the resulting FLAC will be a 24-bit FLAC.
-
-        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
-
-        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
-
-        Writing these bytes directly to a file results in a valid `FLAC file <https://en.wikipedia.org/wiki/FLAC>`__.
-        """
-        assert convert_width is None or (convert_width % 1 == 0 and 1 <= convert_width <= 3), "Sample width to convert to must be between 1 and 3 inclusive"
-
-        if self.sample_width > 3 and convert_width is None:  # resulting WAV data would be 32-bit, which is not convertable to FLAC using our encoder
-            convert_width = 3  # the largest supported sample width is 24-bit, so we'll limit the sample width to that
-
-        # run the FLAC converter with the WAV data to get the FLAC data
-        wav_data = self.get_wav_data(convert_rate, convert_width)
-        flac_converter = get_flac_converter()
-        if os.name == "nt":  # on Windows, specify that the process is to be started without showing a console window
-            startup_info = subprocess.STARTUPINFO()
-            startup_info.dwFlags |= subprocess.STARTF_USESHOWWINDOW  # specify that the wShowWindow field of `startup_info` contains a value
-            startup_info.wShowWindow = subprocess.SW_HIDE  # specify that the console window should be hidden
-        else:
-            startup_info = None  # default startupinfo
-        process = subprocess.Popen([
-            flac_converter,
-            "--stdout", "--totally-silent",  # put the resulting FLAC file in stdout, and make sure it's not mixed with any program output
-            "--best",  # highest level of compression available
-            "-",  # the input FLAC file contents will be given in stdin
-        ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, startupinfo=startup_info)
-        flac_data, stderr = process.communicate(wav_data)
-        return flac_data
-
-
 class Recognizer(AudioSource):
     def __init__(self):
         """
@@ -1683,6 +1501,7 @@ def recognize_whisper(self, audio_data, model="base", show_dict=False, load_opti
         else:
             return result["text"]
 
+    recognize_whisper_api = whisper.recognize_whisper_api
             
     def recognize_vosk(self, audio_data, language='en'):
         from vosk import Model, KaldiRecognizer
@@ -1702,47 +1521,6 @@ def recognize_vosk(self, audio_data, language='en'):
         
         return finalRecognition
 
-def get_flac_converter():
-    """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
-    flac_converter = shutil_which("flac")  # check for installed version first
-    if flac_converter is None:  # flac utility is not installed
-        base_path = os.path.dirname(os.path.abspath(__file__))  # directory of the current module file, where all the FLAC bundled binaries are stored
-        system, machine = platform.system(), platform.machine()
-        if system == "Windows" and machine in {"i686", "i786", "x86", "x86_64", "AMD64"}:
-            flac_converter = os.path.join(base_path, "flac-win32.exe")
-        elif system == "Darwin" and machine in {"i686", "i786", "x86", "x86_64", "AMD64"}:
-            flac_converter = os.path.join(base_path, "flac-mac")
-        elif system == "Linux" and machine in {"i686", "i786", "x86"}:
-            flac_converter = os.path.join(base_path, "flac-linux-x86")
-        elif system == "Linux" and machine in {"x86_64", "AMD64"}:
-            flac_converter = os.path.join(base_path, "flac-linux-x86_64")
-        else:  # no FLAC converter available
-            raise OSError("FLAC conversion utility not available - consider installing the FLAC command line application by running `apt-get install flac` or your operating system's equivalent")
-
-    # mark FLAC converter as executable if possible
-    try:
-        # handle known issue when running on docker:
-        # run executable right after chmod() may result in OSError "Text file busy"
-        # fix: flush FS with sync
-        if not os.access(flac_converter, os.X_OK):
-            stat_info = os.stat(flac_converter)
-            os.chmod(flac_converter, stat_info.st_mode | stat.S_IEXEC)
-            if 'Linux' in platform.system():
-                os.sync() if sys.version_info >= (3, 3) else os.system('sync')
-
-    except OSError: pass
-
-    return flac_converter
-
-
-def shutil_which(pgm):
-    """Python 2 compatibility: backport of ``shutil.which()`` from Python 3"""
-    path = os.getenv('PATH')
-    for p in path.split(os.path.pathsep):
-        p = os.path.join(p, pgm)
-        if os.path.exists(p) and os.access(p, os.X_OK):
-            return p
-
 
 class PortableNamedTemporaryFile(object):
     """Limited replacement for ``tempfile.NamedTemporaryFile``, except unlike ``tempfile.NamedTemporaryFile``, the file can be opened again while it's currently open, even on Windows."""
diff --git a/speech_recognition/audio.py b/speech_recognition/audio.py
new file mode 100644
index 00000000..732f7e01
--- /dev/null
+++ b/speech_recognition/audio.py
@@ -0,0 +1,317 @@
+import aifc
+import audioop
+import io
+import os
+import platform
+import stat
+import subprocess
+import sys
+import wave
+
+
+class AudioData(object):
+    """
+    Creates a new ``AudioData`` instance, which represents mono audio data.
+
+    The raw audio data is specified by ``frame_data``, which is a sequence of bytes representing audio samples. This is the frame data structure used by the PCM WAV format.
+
+    The width of each sample, in bytes, is specified by ``sample_width``. Each group of ``sample_width`` bytes represents a single audio sample.
+
+    The audio data is assumed to have a sample rate of ``sample_rate`` samples per second (Hertz).
+
+    Usually, instances of this class are obtained from ``recognizer_instance.record`` or ``recognizer_instance.listen``, or in the callback for ``recognizer_instance.listen_in_background``, rather than instantiating them directly.
+    """
+
+    def __init__(self, frame_data, sample_rate, sample_width):
+        assert sample_rate > 0, "Sample rate must be a positive integer"
+        assert (
+            sample_width % 1 == 0 and 1 <= sample_width <= 4
+        ), "Sample width must be between 1 and 4 inclusive"
+        self.frame_data = frame_data
+        self.sample_rate = sample_rate
+        self.sample_width = int(sample_width)
+
+    def get_segment(self, start_ms=None, end_ms=None):
+        """
+        Returns a new ``AudioData`` instance, trimmed to a given time interval. In other words, an ``AudioData`` instance with the same audio data except starting at ``start_ms`` milliseconds in and ending ``end_ms`` milliseconds in.
+
+        If not specified, ``start_ms`` defaults to the beginning of the audio, and ``end_ms`` defaults to the end.
+        """
+        assert (
+            start_ms is None or start_ms >= 0
+        ), "``start_ms`` must be a non-negative number"
+        assert end_ms is None or end_ms >= (
+            0 if start_ms is None else start_ms
+        ), "``end_ms`` must be a non-negative number greater or equal to ``start_ms``"
+        if start_ms is None:
+            start_byte = 0
+        else:
+            start_byte = int(
+                (start_ms * self.sample_rate * self.sample_width) // 1000
+            )
+        if end_ms is None:
+            end_byte = len(self.frame_data)
+        else:
+            end_byte = int(
+                (end_ms * self.sample_rate * self.sample_width) // 1000
+            )
+        return AudioData(
+            self.frame_data[start_byte:end_byte],
+            self.sample_rate,
+            self.sample_width,
+        )
+
+    def get_raw_data(self, convert_rate=None, convert_width=None):
+        """
+        Returns a byte string representing the raw frame data for the audio represented by the ``AudioData`` instance.
+
+        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+
+        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+
+        Writing these bytes directly to a file results in a valid `RAW/PCM audio file <https://en.wikipedia.org/wiki/Raw_audio_format>`__.
+        """
+        assert (
+            convert_rate is None or convert_rate > 0
+        ), "Sample rate to convert to must be a positive integer"
+        assert convert_width is None or (
+            convert_width % 1 == 0 and 1 <= convert_width <= 4
+        ), "Sample width to convert to must be between 1 and 4 inclusive"
+
+        raw_data = self.frame_data
+
+        # make sure unsigned 8-bit audio (which uses unsigned samples) is handled like higher sample width audio (which uses signed samples)
+        if self.sample_width == 1:
+            raw_data = audioop.bias(
+                raw_data, 1, -128
+            )  # subtract 128 from every sample to make them act like signed samples
+
+        # resample audio at the desired rate if specified
+        if convert_rate is not None and self.sample_rate != convert_rate:
+            raw_data, _ = audioop.ratecv(
+                raw_data,
+                self.sample_width,
+                1,
+                self.sample_rate,
+                convert_rate,
+                None,
+            )
+
+        # convert samples to desired sample width if specified
+        if convert_width is not None and self.sample_width != convert_width:
+            if (
+                convert_width == 3
+            ):  # we're converting the audio into 24-bit (workaround for https://bugs.python.org/issue12866)
+                raw_data = audioop.lin2lin(
+                    raw_data, self.sample_width, 4
+                )  # convert audio into 32-bit first, which is always supported
+                try:
+                    audioop.bias(
+                        b"", 3, 0
+                    )  # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do)
+                except (
+                    audioop.error
+                ):  # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less)
+                    raw_data = b"".join(
+                        raw_data[i + 1 : i + 4]
+                        for i in range(0, len(raw_data), 4)
+                    )  # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample
+                else:  # 24-bit audio fully supported, we don't need to shim anything
+                    raw_data = audioop.lin2lin(
+                        raw_data, self.sample_width, convert_width
+                    )
+            else:
+                raw_data = audioop.lin2lin(
+                    raw_data, self.sample_width, convert_width
+                )
+
+        # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again
+        if convert_width == 1:
+            raw_data = audioop.bias(
+                raw_data, 1, 128
+            )  # add 128 to every sample to make them act like unsigned samples again
+
+        return raw_data
+
+    def get_wav_data(self, convert_rate=None, convert_width=None):
+        """
+        Returns a byte string representing the contents of a WAV file containing the audio represented by the ``AudioData`` instance.
+
+        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+
+        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+
+        Writing these bytes directly to a file results in a valid `WAV file <https://en.wikipedia.org/wiki/WAV>`__.
+        """
+        raw_data = self.get_raw_data(convert_rate, convert_width)
+        sample_rate = (
+            self.sample_rate if convert_rate is None else convert_rate
+        )
+        sample_width = (
+            self.sample_width if convert_width is None else convert_width
+        )
+
+        # generate the WAV file contents
+        with io.BytesIO() as wav_file:
+            wav_writer = wave.open(wav_file, "wb")
+            try:  # note that we can't use context manager, since that was only added in Python 3.4
+                wav_writer.setframerate(sample_rate)
+                wav_writer.setsampwidth(sample_width)
+                wav_writer.setnchannels(1)
+                wav_writer.writeframes(raw_data)
+                wav_data = wav_file.getvalue()
+            finally:  # make sure resources are cleaned up
+                wav_writer.close()
+        return wav_data
+
+    def get_aiff_data(self, convert_rate=None, convert_width=None):
+        """
+        Returns a byte string representing the contents of an AIFF-C file containing the audio represented by the ``AudioData`` instance.
+
+        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+
+        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+
+        Writing these bytes directly to a file results in a valid `AIFF-C file <https://en.wikipedia.org/wiki/Audio_Interchange_File_Format>`__.
+        """
+        raw_data = self.get_raw_data(convert_rate, convert_width)
+        sample_rate = (
+            self.sample_rate if convert_rate is None else convert_rate
+        )
+        sample_width = (
+            self.sample_width if convert_width is None else convert_width
+        )
+
+        # the AIFF format is big-endian, so we need to convert the little-endian raw data to big-endian
+        if hasattr(
+            audioop, "byteswap"
+        ):  # ``audioop.byteswap`` was only added in Python 3.4
+            raw_data = audioop.byteswap(raw_data, sample_width)
+        else:  # manually reverse the bytes of each sample, which is slower but works well enough as a fallback
+            raw_data = raw_data[sample_width - 1 :: -1] + b"".join(
+                raw_data[i + sample_width : i : -1]
+                for i in range(sample_width - 1, len(raw_data), sample_width)
+            )
+
+        # generate the AIFF-C file contents
+        with io.BytesIO() as aiff_file:
+            aiff_writer = aifc.open(aiff_file, "wb")
+            try:  # note that we can't use context manager, since that was only added in Python 3.4
+                aiff_writer.setframerate(sample_rate)
+                aiff_writer.setsampwidth(sample_width)
+                aiff_writer.setnchannels(1)
+                aiff_writer.writeframes(raw_data)
+                aiff_data = aiff_file.getvalue()
+            finally:  # make sure resources are cleaned up
+                aiff_writer.close()
+        return aiff_data
+
+    def get_flac_data(self, convert_rate=None, convert_width=None):
+        """
+        Returns a byte string representing the contents of a FLAC file containing the audio represented by the ``AudioData`` instance.
+
+        Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width`` is not specified, then the resulting FLAC will be a 24-bit FLAC.
+
+        If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+
+        If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+
+        Writing these bytes directly to a file results in a valid `FLAC file <https://en.wikipedia.org/wiki/FLAC>`__.
+        """
+        assert convert_width is None or (
+            convert_width % 1 == 0 and 1 <= convert_width <= 3
+        ), "Sample width to convert to must be between 1 and 3 inclusive"
+
+        if (
+            self.sample_width > 3 and convert_width is None
+        ):  # resulting WAV data would be 32-bit, which is not convertable to FLAC using our encoder
+            convert_width = 3  # the largest supported sample width is 24-bit, so we'll limit the sample width to that
+
+        # run the FLAC converter with the WAV data to get the FLAC data
+        wav_data = self.get_wav_data(convert_rate, convert_width)
+        flac_converter = get_flac_converter()
+        if (
+            os.name == "nt"
+        ):  # on Windows, specify that the process is to be started without showing a console window
+            startup_info = subprocess.STARTUPINFO()
+            startup_info.dwFlags |= (
+                subprocess.STARTF_USESHOWWINDOW
+            )  # specify that the wShowWindow field of `startup_info` contains a value
+            startup_info.wShowWindow = (
+                subprocess.SW_HIDE
+            )  # specify that the console window should be hidden
+        else:
+            startup_info = None  # default startupinfo
+        process = subprocess.Popen(
+            [
+                flac_converter,
+                "--stdout",
+                "--totally-silent",  # put the resulting FLAC file in stdout, and make sure it's not mixed with any program output
+                "--best",  # highest level of compression available
+                "-",  # the input FLAC file contents will be given in stdin
+            ],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            startupinfo=startup_info,
+        )
+        flac_data, stderr = process.communicate(wav_data)
+        return flac_data
+
+
+def get_flac_converter():
+    """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
+    flac_converter = shutil_which("flac")  # check for installed version first
+    if flac_converter is None:  # flac utility is not installed
+        base_path = os.path.dirname(
+            os.path.abspath(__file__)
+        )  # directory of the current module file, where all the FLAC bundled binaries are stored
+        system, machine = platform.system(), platform.machine()
+        if system == "Windows" and machine in {
+            "i686",
+            "i786",
+            "x86",
+            "x86_64",
+            "AMD64",
+        }:
+            flac_converter = os.path.join(base_path, "flac-win32.exe")
+        elif system == "Darwin" and machine in {
+            "i686",
+            "i786",
+            "x86",
+            "x86_64",
+            "AMD64",
+        }:
+            flac_converter = os.path.join(base_path, "flac-mac")
+        elif system == "Linux" and machine in {"i686", "i786", "x86"}:
+            flac_converter = os.path.join(base_path, "flac-linux-x86")
+        elif system == "Linux" and machine in {"x86_64", "AMD64"}:
+            flac_converter = os.path.join(base_path, "flac-linux-x86_64")
+        else:  # no FLAC converter available
+            raise OSError(
+                "FLAC conversion utility not available - consider installing the FLAC command line application by running `apt-get install flac` or your operating system's equivalent"
+            )
+
+    # mark FLAC converter as executable if possible
+    try:
+        # handle known issue when running on docker:
+        # run executable right after chmod() may result in OSError "Text file busy"
+        # fix: flush FS with sync
+        if not os.access(flac_converter, os.X_OK):
+            stat_info = os.stat(flac_converter)
+            os.chmod(flac_converter, stat_info.st_mode | stat.S_IEXEC)
+            if "Linux" in platform.system():
+                os.sync() if sys.version_info >= (3, 3) else os.system("sync")
+
+    except OSError:
+        pass
+
+    return flac_converter
+
+
+def shutil_which(pgm):
+    """Python 2 compatibility: backport of ``shutil.which()`` from Python 3"""
+    path = os.getenv("PATH")
+    for p in path.split(os.path.pathsep):
+        p = os.path.join(p, pgm)
+        if os.path.exists(p) and os.access(p, os.X_OK):
+            return p
diff --git a/speech_recognition/exceptions.py b/speech_recognition/exceptions.py
new file mode 100644
index 00000000..3e208a12
--- /dev/null
+++ b/speech_recognition/exceptions.py
@@ -0,0 +1,22 @@
+class SetupError(Exception):
+    pass
+
+
+class WaitTimeoutError(Exception):
+    pass
+
+
+class RequestError(Exception):
+    pass
+
+
+class UnknownValueError(Exception):
+    pass
+
+
+class TranscriptionNotReady(Exception):
+    pass
+
+
+class TranscriptionFailed(Exception):
+    pass
diff --git a/speech_recognition/recognizers/__init__.py b/speech_recognition/recognizers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/speech_recognition/recognizers/whisper.py b/speech_recognition/recognizers/whisper.py
new file mode 100644
index 00000000..505c60ac
--- /dev/null
+++ b/speech_recognition/recognizers/whisper.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import os
+from io import BytesIO
+
+from speech_recognition.audio import AudioData
+from speech_recognition.exceptions import SetupError
+
+
+def recognize_whisper_api(
+    recognizer,
+    audio_data: "AudioData",
+    *,
+    model: str = "whisper-1",
+    api_key: str | None = None,
+):
+    """
+    Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.
+
+    This function requires an OpenAI account; visit https://platform.openai.com/signup, then generate API Key in `User settings <https://platform.openai.com/account/api-keys>`__.
+
+    Detail: https://platform.openai.com/docs/guides/speech-to-text
+
+    Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing.
+    """
+    if not isinstance(audio_data, AudioData):
+        raise ValueError("``audio_data`` must be an ``AudioData`` instance")
+    if api_key is None and os.environ.get("OPENAI_API_KEY") is None:
+        raise SetupError("Set environment variable ``OPENAI_API_KEY``")
+
+    try:
+        import openai
+    except ImportError:
+        raise SetupError(
+            "missing openai module: ensure that openai is set up correctly."
+        )
+
+    wav_data = BytesIO(audio_data.get_wav_data())
+    wav_data.name = "SpeechRecognition_audio.wav"
+
+    transcript = openai.Audio.transcribe(model, wav_data, api_key=api_key)
+    return transcript["text"]
diff --git a/tests/test_recognition.py b/tests/test_recognition.py
index 5759d657..a4e5f4a0 100644
--- a/tests/test_recognition.py
+++ b/tests/test_recognition.py
@@ -85,7 +85,7 @@ def test_ibm_chinese(self):
     def test_whisper_english(self):
         r = sr.Recognizer()
         with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
-        self.assertEqual(r.recognize_whisper(audio, language="english", **self.WHISPER_CONFIG), " 1, 2, 3")
+        self.assertEqual(r.recognize_whisper(audio, language="english", **self.WHISPER_CONFIG), " 1, 2, 3.")
 
     def test_whisper_french(self):
         r = sr.Recognizer()