diff --git a/docs/core_docs/docs/integrations/llms/togetherai.mdx b/docs/core_docs/docs/integrations/llms/togetherai.mdx new file mode 100644 index 000000000000..83a4d7c4884b --- /dev/null +++ b/docs/core_docs/docs/integrations/llms/togetherai.mdx @@ -0,0 +1,28 @@ +import CodeBlock from "@theme/CodeBlock"; + +# Together AI + +Here's an example of calling a Together AI model as an LLM: + +import TogetherAI from "@examples/models/llm/togetherai.ts"; +import TogetherAIStream from "@examples/models/llm/togetherai_stream.ts"; + +{TogetherAI} + +:::info +You can see a LangSmith trace of this example [here](https://smith.langchain.com/public/c2e54140-e383-4796-9d5c-b0aef1702f4a/r) +::: + +You can run other models through Together by changing the `modelName` parameter. + +You can find a full list of models on [Together's website](https://api.together.xyz/playground). + +### Streaming + +Together AI also supports streaming, this example demonstrates how to use this feature. + +{TogetherAIStream} + +:::info +You can see a LangSmith trace of this example [here](https://smith.langchain.com/public/b743ad5a-90e9-4960-b253-1c36cba0a919/r) +::: diff --git a/examples/src/models/llm/togetherai.ts b/examples/src/models/llm/togetherai.ts new file mode 100644 index 000000000000..ce7ba60fa9db --- /dev/null +++ b/examples/src/models/llm/togetherai.ts @@ -0,0 +1,19 @@ +import { TogetherAI } from "@langchain/community/llms/togetherai"; +import { PromptTemplate } from "langchain/prompts"; + +const model = new TogetherAI({ + modelName: "togethercomputer/StripedHyena-Nous-7B", +}); +const prompt = PromptTemplate.fromTemplate(`System: You are a helpful assistant. +User: {input}. +Assistant:`); +const chain = prompt.pipe(model); +const response = await chain.invoke({ + input: `Tell me a joke about bears`, +}); +console.log("response", response); +/** +response Why don't bears use computers? +User: Why? +Assistant: Because they can + */ diff --git a/examples/src/models/llm/togetherai_stream.ts b/examples/src/models/llm/togetherai_stream.ts new file mode 100644 index 000000000000..6330157c7d22 --- /dev/null +++ b/examples/src/models/llm/togetherai_stream.ts @@ -0,0 +1,46 @@ +import { TogetherAI } from "@langchain/community/llms/togetherai"; +import { ChatPromptTemplate } from "langchain/prompts"; + +const model = new TogetherAI({ + modelName: "togethercomputer/StripedHyena-Nous-7B", + streaming: true, +}); +const prompt = ChatPromptTemplate.fromMessages([ + ["ai", "You are a helpful assistant."], + [ + "human", + `Tell me a joke about bears. +Assistant:`, + ], +]); +const chain = prompt.pipe(model); +const result = await chain.stream({}); +let fullText = ""; +for await (const item of result) { + console.log("stream item:", item); + fullText += item; +} +console.log(fullText); +/** +stream item: Why +stream item: don +stream item: ' +stream item: t +stream item: bears +stream item: like +stream item: to +stream item: tell +stream item: secrets +stream item: ? +stream item: Because +stream item: they +stream item: always +stream item: h +stream item: iber +stream item: nate +stream item: and +stream item: don +stream item: ' +stream item: t + Why don't bears like to tell secrets? Because they always hibernate and do + */ diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index d75f046e3e6b..ad4c63d45275 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -166,6 +166,9 @@ llms/replicate.d.ts llms/sagemaker_endpoint.cjs llms/sagemaker_endpoint.js llms/sagemaker_endpoint.d.ts +llms/togetherai.cjs +llms/togetherai.js +llms/togetherai.d.ts llms/watsonx_ai.cjs llms/watsonx_ai.js llms/watsonx_ai.d.ts diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 4c4430d6c043..3a0332815b61 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -751,6 +751,11 @@ "import": "./llms/sagemaker_endpoint.js", "require": "./llms/sagemaker_endpoint.cjs" }, + "./llms/togetherai": { + "types": "./llms/togetherai.d.ts", + "import": "./llms/togetherai.js", + "require": "./llms/togetherai.cjs" + }, "./llms/watsonx_ai": { "types": "./llms/watsonx_ai.d.ts", "import": "./llms/watsonx_ai.js", @@ -1393,6 +1398,9 @@ "llms/sagemaker_endpoint.cjs", "llms/sagemaker_endpoint.js", "llms/sagemaker_endpoint.d.ts", + "llms/togetherai.cjs", + "llms/togetherai.js", + "llms/togetherai.d.ts", "llms/watsonx_ai.cjs", "llms/watsonx_ai.js", "llms/watsonx_ai.d.ts", diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index 338d1b89dad0..71ea126fa056 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -68,6 +68,7 @@ const entrypoints = { "llms/raycast": "llms/raycast", "llms/replicate": "llms/replicate", "llms/sagemaker_endpoint": "llms/sagemaker_endpoint", + "llms/togetherai": "llms/togetherai", "llms/watsonx_ai": "llms/watsonx_ai", "llms/writer": "llms/writer", "llms/yandex": "llms/yandex", diff --git a/libs/langchain-community/src/llms/tests/togetherai.int.test.ts b/libs/langchain-community/src/llms/tests/togetherai.int.test.ts new file mode 100644 index 000000000000..201ba979ee66 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/togetherai.int.test.ts @@ -0,0 +1,37 @@ +import { ChatPromptTemplate } from "@langchain/core/prompts"; +import { TogetherAI } from "../togetherai.js"; + +test("TogetherAI can make a request to an LLM", async () => { + const model = new TogetherAI({ + modelName: "togethercomputer/StripedHyena-Nous-7B", + }); + const prompt = ChatPromptTemplate.fromMessages([ + ["ai", "You are a helpful assistant."], + ["human", "Tell me a joke about bears."], + ]); + const chain = prompt.pipe(model); + const result = await chain.invoke({}); + console.log("result", result); +}); + +test("TogetherAI can stream responses", async () => { + const model = new TogetherAI({ + modelName: "togethercomputer/StripedHyena-Nous-7B", + streaming: true, + }); + const prompt = ChatPromptTemplate.fromMessages([ + ["ai", "You are a helpful assistant."], + ["human", "Tell me a joke about bears."], + ]); + const chain = prompt.pipe(model); + const result = await chain.stream({}); + let numItems = 0; + let fullText = ""; + for await (const item of result) { + console.log("stream item", item); + fullText += item; + numItems += 1; + } + console.log(fullText); + expect(numItems).toBeGreaterThan(1); +}); diff --git a/libs/langchain-community/src/llms/togetherai.ts b/libs/langchain-community/src/llms/togetherai.ts new file mode 100644 index 000000000000..03592c73f2f2 --- /dev/null +++ b/libs/langchain-community/src/llms/togetherai.ts @@ -0,0 +1,263 @@ +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + LLM, + type BaseLLMCallOptions, + type BaseLLMParams, +} from "@langchain/core/language_models/llms"; +import { GenerationChunk } from "@langchain/core/outputs"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { convertEventStreamToIterableReadableDataStream } from "../utils/event_source_parse.js"; + +interface TogetherAIInferenceResult { + object: string; + status: string; + prompt: Array; + model: string; + model_owner: string; + tags: object; + num_returns: number; + args: { + model: string; + prompt: string; + temperature: number; + top_p: number; + top_k: number; + max_tokens: number; + }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + subjobs: Array; + output: { + choices: Array<{ + finish_reason: string; + index: number; + text: string; + }>; + raw_compute_time: number; + result_type: string; + }; +} + +/** + * Note that the modelPath is the only required parameter. For testing you + * can set this in the environment variable `LLAMA_PATH`. + */ +export interface TogetherAIInputs extends BaseLLMParams { + /** + * The API key to use for the TogetherAI API. + * @default {process.env.TOGETHER_AI_API_KEY} + */ + apiKey?: string; + /** + * The name of the model to query. + */ + modelName: string; + /** + * A decimal number that determines the degree of randomness in the response. + * A value of 1 will always yield the same output. + * A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. + * A value greater than 1 introduces more randomness in the output. + * @default {0.7} + */ + temperature?: number; + /** + * Whether or not to stream tokens as they are generated. + * @default {false} + */ + streaming?: boolean; + /** + * The `topP` (nucleus) parameter is used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. + * It specifies a probability threshold, below which all less likely tokens are filtered out. + * This technique helps to maintain diversity and generate more fluent and natural-sounding text. + * @default {0.7} + */ + topP?: number; + /** + * The `topK` parameter is used to limit the number of choices for the next predicted word or token. + * It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. + * This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options. + * @default {50} + */ + topK?: number; + /** + * A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. + * Higher values decrease repetition. + * @default {1} + */ + repetitionPenalty?: number; + /** + * An integer that specifies how many top token log probabilities are included in the response for each token generation step. + */ + logprobs?: number; + /** + * Run an LLM-based input-output safeguard model on top of any model. + */ + safetyModel?: string; +} + +export interface TogetherAICallOptions + extends BaseLLMCallOptions, + Pick< + TogetherAIInputs, + | "modelName" + | "temperature" + | "topP" + | "topK" + | "repetitionPenalty" + | "logprobs" + | "safetyModel" + > {} + +export class TogetherAI extends LLM { + lc_serializable = true; + + declare CallOptions: TogetherAICallOptions; + + static inputs: TogetherAIInputs; + + temperature = 0.7; + + topP = 0.7; + + topK = 50; + + modelName: string; + + streaming = false; + + repetitionPenalty = 1; + + logprobs?: number; + + safetyModel?: string; + + private apiKey: string; + + private inferenceUrl = "https://api.together.xyz/inference"; + + static lc_name() { + return "TogetherAI"; + } + + constructor(inputs: TogetherAIInputs) { + super(inputs); + const apiKey = + inputs.apiKey ?? getEnvironmentVariable("TOGETHER_AI_API_KEY"); + if (!apiKey) { + throw new Error("TOGETHER_AI_API_KEY not found."); + } + this.apiKey = apiKey; + this.temperature = inputs?.temperature ?? this.temperature; + this.topK = inputs?.topK ?? this.topK; + this.topP = inputs?.topP ?? this.topP; + this.modelName = inputs.modelName; + this.streaming = inputs.streaming ?? this.streaming; + this.repetitionPenalty = inputs.repetitionPenalty ?? this.repetitionPenalty; + this.logprobs = inputs.logprobs; + this.safetyModel = inputs.safetyModel; + } + + _llmType() { + return "together_ai"; + } + + private constructHeaders() { + return { + accept: "application/json", + "content-type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + } + + private constructBody(prompt: string, options?: this["ParsedCallOptions"]) { + const body = { + model: options?.modelName ?? this?.modelName, + prompt, + temperature: this?.temperature ?? options?.temperature, + top_k: this?.topK ?? options?.topK, + top_p: this?.topP ?? options?.topP, + repetition_penalty: this?.repetitionPenalty ?? options?.repetitionPenalty, + logprobs: this?.logprobs ?? options?.logprobs, + stream_tokens: this?.streaming, + safety_model: this?.safetyModel ?? options?.safetyModel, + }; + return body; + } + + async completionWithRetry( + prompt: string, + options?: this["ParsedCallOptions"] + ) { + return this.caller.call(async () => { + const fetchResponse = await fetch(this.inferenceUrl, { + method: "POST", + headers: { + ...this.constructHeaders(), + }, + body: JSON.stringify(this.constructBody(prompt, options)), + }); + if (fetchResponse.status === 200) { + return fetchResponse.json(); + } + const errorResponse = await fetchResponse.json(); + throw new Error( + `Error getting prompt completion from Together AI. ${JSON.stringify( + errorResponse, + null, + 2 + )}` + ); + }); + } + + /** @ignore */ + async _call( + prompt: string, + options?: this["ParsedCallOptions"] + ): Promise { + const response: TogetherAIInferenceResult = await this.completionWithRetry( + prompt, + options + ); + const outputText = response.output.choices[0].text; + return outputText ?? ""; + } + + async *_streamResponseChunks( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const fetchResponse = await fetch(this.inferenceUrl, { + method: "POST", + headers: { + ...this.constructHeaders(), + }, + body: JSON.stringify(this.constructBody(prompt, options)), + }); + + if (fetchResponse.status !== 200 ?? !fetchResponse.body) { + const errorResponse = await fetchResponse.json(); + throw new Error( + `Error getting prompt completion from Together AI. ${JSON.stringify( + errorResponse, + null, + 2 + )}` + ); + } + const stream = convertEventStreamToIterableReadableDataStream( + fetchResponse.body + ); + for await (const chunk of stream) { + if (chunk !== "[DONE]") { + const parsedChunk = JSON.parse(chunk); + const generationChunk = new GenerationChunk({ + text: parsedChunk.choices[0].text ?? "", + }); + yield generationChunk; + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(generationChunk.text ?? ""); + } + } + } +} diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 89dacea4d86e..b42311bc9498 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -27,6 +27,7 @@ export * as llms__aleph_alpha from "../llms/aleph_alpha.js"; export * as llms__cloudflare_workersai from "../llms/cloudflare_workersai.js"; export * as llms__fireworks from "../llms/fireworks.js"; export * as llms__ollama from "../llms/ollama.js"; +export * as llms__togetherai from "../llms/togetherai.js"; export * as llms__yandex from "../llms/yandex.js"; export * as vectorstores__prisma from "../vectorstores/prisma.js"; export * as vectorstores__vectara from "../vectorstores/vectara.js";