feat: Add transcription and translation endpoints

groq · Mar 8, 2024 · 5a422c4 · 5a422c4
1 parent 41691e1
commit 5a422c4
Show file tree

Hide file tree

Showing 15 changed files with 296 additions and 66 deletions.
diff --git a/.stats.yml b/.stats.yml
@@ -1 +1 @@
-configured_endpoints: 4
+configured_endpoints: 6
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -42,9 +42,7 @@ If you’d like to use the repository from source, you can either install from g
 To install via git:
 
 ```bash
-npm install --save git+ssh://[email protected]:groq/groq-typescript.git
-# or
-yarn add git+ssh://[email protected]:groq/groq-typescript.git
+npm install git+ssh://[email protected]:groq/groq-typescript.git
 ```
 
 Alternatively, to link a local copy of the repo:

diff --git a/README.md b/README.md
@@ -9,10 +9,7 @@ The REST API documentation can be found [on console.groq.com](https://console.gr
 ## Installation
 
 ```sh
-# install from NPM
-npm install --save groq-sdk
-# or
-yarn add groq-sdk
+npm install groq-sdk
 ```
 
 ## Usage
@@ -80,7 +77,7 @@ async function main() {
       ],
       model: 'mixtral-8x7b-32768',
     })
-    .catch((err) => {
+    .catch(async (err) => {
       if (err instanceof Groq.APIError) {
         console.log(err.status); // 400
         console.log(err.name); // BadRequestError
@@ -233,18 +230,26 @@ If you would like to disable or customize this behavior, for example to use the
 <!-- prettier-ignore -->
 ```ts
 import http from 'http';
-import HttpsProxyAgent from 'https-proxy-agent';
+import { HttpsProxyAgent } from 'https-proxy-agent';
 
 // Configure the default for all requests:
 const groq = new Groq({
   httpAgent: new HttpsProxyAgent(process.env.PROXY_URL),
 });
 
 // Override per-request:
-await groq.chat.completions.create({ messages: [{ role: 'system', content: 'You are a helpful assisstant.' }, { role: 'user', content: 'Explain the importance of low latency LLMs' }], model: 'mixtral-8x7b-32768' }, {
-  baseURL: 'http://localhost:8080/test-api',
-  httpAgent: new http.Agent({ keepAlive: false }),
-})
+await groq.chat.completions.create(
+  {
+    messages: [
+      { role: 'system', content: 'You are a helpful assisstant.' },
+      { role: 'user', content: 'Explain the importance of low latency LLMs' },
+    ],
+    model: 'mixtral-8x7b-32768',
+  },
+  {
+    httpAgent: new http.Agent({ keepAlive: false }),
+  },
+);
 ```
 
 ## Semantic Versioning

diff --git a/api.md b/api.md
@@ -10,6 +10,28 @@ Methods:
 
 - <code title="post /openai/v1/chat/completions">client.chat.completions.<a href="./src/resources/chat/completions.ts">create</a>({ ...params }) -> ChatCompletion</code>
 
+# Audio
+
+Types:
+
+- <code><a href="./src/resources/audio/audio.ts">Translation</a></code>
+
+## Transcriptions
+
+Types:
+
+- <code><a href="./src/resources/audio/transcriptions.ts">Transcription</a></code>
+
+Methods:
+
+- <code title="post /openai/v1/audio/transcriptions">client.audio.transcriptions.<a href="./src/resources/audio/transcriptions.ts">create</a>({ ...params }) -> Transcription</code>
+
+## Translations
+
+Methods:
+
+- <code title="post /openai/v1/audio/translations">client.audio.translations.<a href="./src/resources/audio/translations.ts">create</a>({ ...params }) -> Translation</code>
+
 # Models
 
 Types:

diff --git a/src/core.ts b/src/core.ts
@@ -1,5 +1,4 @@
 import { VERSION } from './version';
-import { Stream } from './lib/streaming';
 import {
   GroqError,
   APIError,
@@ -39,19 +38,6 @@ type APIResponseProps = {
 
 async function defaultParseResponse<T>(props: APIResponseProps): Promise<T> {
   const { response } = props;
-  if (props.options.stream) {
-    debug('response', response.status, response.url, response.headers, response.body);
-
-    // Note: there is an invariant here that isn't represented in the type system
-    // that if you set `stream: true` the response type must also be `Stream<T>`
-
-    if (props.options.__streamClass) {
-      return props.options.__streamClass.fromSSEResponse(response, props.controller) as any;
-    }
-
-    return Stream.fromSSEResponse(response, props.controller) as any;
-  }
-
   // fetch refuses to read the body when the status code is 204.
   if (response.status === 204) {
     return null as T;
@@ -750,7 +736,6 @@ export type RequestOptions<Req = unknown | Record<string, unknown> | Readable> =
   idempotencyKey?: string;
 
   __binaryResponse?: boolean | undefined;
-  __streamClass?: typeof Stream;
 };
 
 // This is required so that we can determine if a given object matches the RequestOptions
@@ -771,7 +756,6 @@ const requestOptionsKeys: KeysEnum<RequestOptions> = {
   idempotencyKey: true,
 
   __binaryResponse: true,
-  __streamClass: true,
 };
 
 export const isRequestOptions = (obj: unknown): obj is RequestOptions => {

diff --git a/src/index.ts b/src/index.ts
@@ -130,6 +130,7 @@ export class Groq extends Core.APIClient {
   }
 
   chat: API.Chat = new API.Chat(this);
+  audio: API.Audio = new API.Audio(this);
   models: API.Models = new API.Models(this);
 
   protected override defaultQuery(): Core.DefaultQuery | undefined {
@@ -192,6 +193,9 @@ export namespace Groq {
 
   export import Chat = API.Chat;
 
+  export import Audio = API.Audio;
+  export import Translation = API.Translation;
+
   export import Models = API.Models;
   export import Model = API.Model;
   export import ModelList = API.ModelList;

diff --git a/src/lib/chat_completions_ext.ts b/src/lib/chat_completions_ext.ts
@@ -1,5 +1,5 @@
 // Manually curated models for streaming chat completions.
-import { ChatCompletion } from '../resources/chat'
+import { ChatCompletion } from '../resources/chat';
 
 export interface ChatCompletionChunk {
   id: string;
@@ -74,7 +74,7 @@ export namespace ChatCompletionChunk {
     id?: string;
     usage?: ChatCompletion.Usage;
     error?: string;
-  }
+  };
 }
 
 export interface ChatCompletionTokenLogprob {

diff --git a/src/resources/audio/audio.ts b/src/resources/audio/audio.ts
@@ -0,0 +1,24 @@
+// File generated from our OpenAPI spec by Stainless.
+
+import { APIResource } from 'groq-sdk/resource';
+import * as AudioAPI from 'groq-sdk/resources/audio/audio';
+import * as TranscriptionsAPI from 'groq-sdk/resources/audio/transcriptions';
+import * as TranslationsAPI from 'groq-sdk/resources/audio/translations';
+
+export class Audio extends APIResource {
+  transcriptions: TranscriptionsAPI.Transcriptions = new TranscriptionsAPI.Transcriptions(this._client);
+  translations: TranslationsAPI.Translations = new TranslationsAPI.Translations(this._client);
+}
+
+export interface Translation {
+  text: string;
+}
+
+export namespace Audio {
+  export import Translation = AudioAPI.Translation;
+  export import Transcriptions = TranscriptionsAPI.Transcriptions;
+  export import Transcription = TranscriptionsAPI.Transcription;
+  export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
+  export import Translations = TranslationsAPI.Translations;
+  export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
+}
diff --git a/src/resources/audio/index.ts b/src/resources/audio/index.ts
@@ -0,0 +1,5 @@
+// File generated from our OpenAPI spec by Stainless.
+
+export { Transcription, TranscriptionCreateParams, Transcriptions } from './transcriptions';
+export { Translation, Audio } from './audio';
+export { TranslationCreateParams, Translations } from './translations';
diff --git a/src/resources/audio/transcriptions.ts b/src/resources/audio/transcriptions.ts
@@ -0,0 +1,85 @@
+// File generated from our OpenAPI spec by Stainless.
+
+import * as Core from 'groq-sdk/core';
+import { APIResource } from 'groq-sdk/resource';
+import * as TranscriptionsAPI from 'groq-sdk/resources/audio/transcriptions';
+import { type Uploadable, multipartFormRequestOptions } from 'groq-sdk/core';
+
+export class Transcriptions extends APIResource {
+  /**
+   * Transcribes audio into the input language.
+   */
+  create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription> {
+    return this._client.post(
+      '/openai/v1/audio/transcriptions',
+      multipartFormRequestOptions({ body, ...options }),
+    );
+  }
+}
+
+/**
+ * Represents a transcription response returned by model, based on the provided
+ * input.
+ */
+export interface Transcription {
+  /**
+   * The transcribed text.
+   */
+  text: string;
+}
+
+export interface TranscriptionCreateParams {
+  /**
+   * The audio file object (not file name) to transcribe, in one of these formats:
+   * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+   */
+  file: Uploadable;
+
+  /**
+   * ID of the model to use. Only `whisper-large-v3` is currently available.
+   */
+  model: (string & {}) | 'whisper-large-v3';
+
+  /**
+   * The language of the input audio. Supplying the input language in
+   * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
+   * improve accuracy and latency.
+   */
+  language?: string;
+
+  /**
+   * An optional text to guide the model's style or continue a previous audio
+   * segment. The [prompt](/docs/guides/speech-to-text/prompting) should match the
+   * audio language.
+   */
+  prompt?: string;
+
+  /**
+   * The format of the transcript output, in one of these options: `json`, `text`,
+   * `srt`, `verbose_json`, or `vtt`.
+   */
+  response_format?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt';
+
+  /**
+   * The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+   * output more random, while lower values like 0.2 will make it more focused and
+   * deterministic. If set to 0, the model will use
+   * [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+   * automatically increase the temperature until certain thresholds are hit.
+   */
+  temperature?: number;
+
+  /**
+   * The timestamp granularities to populate for this transcription.
+   * `response_format` must be set `verbose_json` to use timestamp granularities.
+   * Either or both of these options are supported: `word`, or `segment`. Note: There
+   * is no additional latency for segment timestamps, but generating word timestamps
+   * incurs additional latency.
+   */
+  timestamp_granularities?: Array<'word' | 'segment'>;
+}
+
+export namespace Transcriptions {
+  export import Transcription = TranscriptionsAPI.Transcription;
+  export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
+}
diff --git a/src/resources/audio/translations.ts b/src/resources/audio/translations.ts
@@ -0,0 +1,61 @@
+// File generated from our OpenAPI spec by Stainless.
+
+import * as Core from 'groq-sdk/core';
+import { APIResource } from 'groq-sdk/resource';
+import * as TranslationsAPI from 'groq-sdk/resources/audio/translations';
+import * as AudioAPI from 'groq-sdk/resources/audio/audio';
+import { type Uploadable, multipartFormRequestOptions } from 'groq-sdk/core';
+
+export class Translations extends APIResource {
+  /**
+   * Translates audio into English.
+   */
+  create(
+    body: TranslationCreateParams,
+    options?: Core.RequestOptions,
+  ): Core.APIPromise<AudioAPI.Translation> {
+    return this._client.post(
+      '/openai/v1/audio/translations',
+      multipartFormRequestOptions({ body, ...options }),
+    );
+  }
+}
+
+export interface TranslationCreateParams {
+  /**
+   * The audio file object (not file name) translate, in one of these formats: flac,
+   * mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+   */
+  file: Uploadable;
+
+  /**
+   * ID of the model to use. Only `whisper-large-v3` is currently available.
+   */
+  model: (string & {}) | 'whisper-large-v3';
+
+  /**
+   * An optional text to guide the model's style or continue a previous audio
+   * segment. The [prompt](/docs/guides/speech-to-text/prompting) should be in
+   * English.
+   */
+  prompt?: string;
+
+  /**
+   * The format of the transcript output, in one of these options: `json`, `text`,
+   * `srt`, `verbose_json`, or `vtt`.
+   */
+  response_format?: string;
+
+  /**
+   * The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+   * output more random, while lower values like 0.2 will make it more focused and
+   * deterministic. If set to 0, the model will use
+   * [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+   * automatically increase the temperature until certain thresholds are hit.
+   */
+  temperature?: number;
+}
+
+export namespace Translations {
+  export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
+}