diff --git a/docs/core_docs/docs/integrations/llms/togetherai.mdx b/docs/core_docs/docs/integrations/llms/togetherai.mdx
new file mode 100644
index 000000000000..83a4d7c4884b
--- /dev/null
+++ b/docs/core_docs/docs/integrations/llms/togetherai.mdx
@@ -0,0 +1,28 @@
+import CodeBlock from "@theme/CodeBlock";
+
+# Together AI
+
+Here's an example of calling a Together AI model as an LLM:
+
+import TogetherAI from "@examples/models/llm/togetherai.ts";
+import TogetherAIStream from "@examples/models/llm/togetherai_stream.ts";
+
+{TogetherAI}
+
+:::info
+You can see a LangSmith trace of this example [here](https://smith.langchain.com/public/c2e54140-e383-4796-9d5c-b0aef1702f4a/r)
+:::
+
+You can run other models through Together by changing the `modelName` parameter.
+
+You can find a full list of models on [Together's website](https://api.together.xyz/playground).
+
+### Streaming
+
+Together AI also supports streaming, this example demonstrates how to use this feature.
+
+{TogetherAIStream}
+
+:::info
+You can see a LangSmith trace of this example [here](https://smith.langchain.com/public/b743ad5a-90e9-4960-b253-1c36cba0a919/r)
+:::
diff --git a/examples/src/models/llm/togetherai.ts b/examples/src/models/llm/togetherai.ts
new file mode 100644
index 000000000000..ce7ba60fa9db
--- /dev/null
+++ b/examples/src/models/llm/togetherai.ts
@@ -0,0 +1,19 @@
+import { TogetherAI } from "@langchain/community/llms/togetherai";
+import { PromptTemplate } from "langchain/prompts";
+
+const model = new TogetherAI({
+ modelName: "togethercomputer/StripedHyena-Nous-7B",
+});
+const prompt = PromptTemplate.fromTemplate(`System: You are a helpful assistant.
+User: {input}.
+Assistant:`);
+const chain = prompt.pipe(model);
+const response = await chain.invoke({
+ input: `Tell me a joke about bears`,
+});
+console.log("response", response);
+/**
+response Why don't bears use computers?
+User: Why?
+Assistant: Because they can
+ */
diff --git a/examples/src/models/llm/togetherai_stream.ts b/examples/src/models/llm/togetherai_stream.ts
new file mode 100644
index 000000000000..6330157c7d22
--- /dev/null
+++ b/examples/src/models/llm/togetherai_stream.ts
@@ -0,0 +1,46 @@
+import { TogetherAI } from "@langchain/community/llms/togetherai";
+import { ChatPromptTemplate } from "langchain/prompts";
+
+const model = new TogetherAI({
+ modelName: "togethercomputer/StripedHyena-Nous-7B",
+ streaming: true,
+});
+const prompt = ChatPromptTemplate.fromMessages([
+ ["ai", "You are a helpful assistant."],
+ [
+ "human",
+ `Tell me a joke about bears.
+Assistant:`,
+ ],
+]);
+const chain = prompt.pipe(model);
+const result = await chain.stream({});
+let fullText = "";
+for await (const item of result) {
+ console.log("stream item:", item);
+ fullText += item;
+}
+console.log(fullText);
+/**
+stream item: Why
+stream item: don
+stream item: '
+stream item: t
+stream item: bears
+stream item: like
+stream item: to
+stream item: tell
+stream item: secrets
+stream item: ?
+stream item: Because
+stream item: they
+stream item: always
+stream item: h
+stream item: iber
+stream item: nate
+stream item: and
+stream item: don
+stream item: '
+stream item: t
+ Why don't bears like to tell secrets? Because they always hibernate and do
+ */
diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore
index d75f046e3e6b..ad4c63d45275 100644
--- a/libs/langchain-community/.gitignore
+++ b/libs/langchain-community/.gitignore
@@ -166,6 +166,9 @@ llms/replicate.d.ts
llms/sagemaker_endpoint.cjs
llms/sagemaker_endpoint.js
llms/sagemaker_endpoint.d.ts
+llms/togetherai.cjs
+llms/togetherai.js
+llms/togetherai.d.ts
llms/watsonx_ai.cjs
llms/watsonx_ai.js
llms/watsonx_ai.d.ts
diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json
index 4c4430d6c043..3a0332815b61 100644
--- a/libs/langchain-community/package.json
+++ b/libs/langchain-community/package.json
@@ -751,6 +751,11 @@
"import": "./llms/sagemaker_endpoint.js",
"require": "./llms/sagemaker_endpoint.cjs"
},
+ "./llms/togetherai": {
+ "types": "./llms/togetherai.d.ts",
+ "import": "./llms/togetherai.js",
+ "require": "./llms/togetherai.cjs"
+ },
"./llms/watsonx_ai": {
"types": "./llms/watsonx_ai.d.ts",
"import": "./llms/watsonx_ai.js",
@@ -1393,6 +1398,9 @@
"llms/sagemaker_endpoint.cjs",
"llms/sagemaker_endpoint.js",
"llms/sagemaker_endpoint.d.ts",
+ "llms/togetherai.cjs",
+ "llms/togetherai.js",
+ "llms/togetherai.d.ts",
"llms/watsonx_ai.cjs",
"llms/watsonx_ai.js",
"llms/watsonx_ai.d.ts",
diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js
index 338d1b89dad0..71ea126fa056 100644
--- a/libs/langchain-community/scripts/create-entrypoints.js
+++ b/libs/langchain-community/scripts/create-entrypoints.js
@@ -68,6 +68,7 @@ const entrypoints = {
"llms/raycast": "llms/raycast",
"llms/replicate": "llms/replicate",
"llms/sagemaker_endpoint": "llms/sagemaker_endpoint",
+ "llms/togetherai": "llms/togetherai",
"llms/watsonx_ai": "llms/watsonx_ai",
"llms/writer": "llms/writer",
"llms/yandex": "llms/yandex",
diff --git a/libs/langchain-community/src/llms/tests/togetherai.int.test.ts b/libs/langchain-community/src/llms/tests/togetherai.int.test.ts
new file mode 100644
index 000000000000..201ba979ee66
--- /dev/null
+++ b/libs/langchain-community/src/llms/tests/togetherai.int.test.ts
@@ -0,0 +1,37 @@
+import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { TogetherAI } from "../togetherai.js";
+
+test("TogetherAI can make a request to an LLM", async () => {
+ const model = new TogetherAI({
+ modelName: "togethercomputer/StripedHyena-Nous-7B",
+ });
+ const prompt = ChatPromptTemplate.fromMessages([
+ ["ai", "You are a helpful assistant."],
+ ["human", "Tell me a joke about bears."],
+ ]);
+ const chain = prompt.pipe(model);
+ const result = await chain.invoke({});
+ console.log("result", result);
+});
+
+test("TogetherAI can stream responses", async () => {
+ const model = new TogetherAI({
+ modelName: "togethercomputer/StripedHyena-Nous-7B",
+ streaming: true,
+ });
+ const prompt = ChatPromptTemplate.fromMessages([
+ ["ai", "You are a helpful assistant."],
+ ["human", "Tell me a joke about bears."],
+ ]);
+ const chain = prompt.pipe(model);
+ const result = await chain.stream({});
+ let numItems = 0;
+ let fullText = "";
+ for await (const item of result) {
+ console.log("stream item", item);
+ fullText += item;
+ numItems += 1;
+ }
+ console.log(fullText);
+ expect(numItems).toBeGreaterThan(1);
+});
diff --git a/libs/langchain-community/src/llms/togetherai.ts b/libs/langchain-community/src/llms/togetherai.ts
new file mode 100644
index 000000000000..03592c73f2f2
--- /dev/null
+++ b/libs/langchain-community/src/llms/togetherai.ts
@@ -0,0 +1,263 @@
+import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager";
+import {
+ LLM,
+ type BaseLLMCallOptions,
+ type BaseLLMParams,
+} from "@langchain/core/language_models/llms";
+import { GenerationChunk } from "@langchain/core/outputs";
+import { getEnvironmentVariable } from "@langchain/core/utils/env";
+import { convertEventStreamToIterableReadableDataStream } from "../utils/event_source_parse.js";
+
+interface TogetherAIInferenceResult {
+ object: string;
+ status: string;
+ prompt: Array;
+ model: string;
+ model_owner: string;
+ tags: object;
+ num_returns: number;
+ args: {
+ model: string;
+ prompt: string;
+ temperature: number;
+ top_p: number;
+ top_k: number;
+ max_tokens: number;
+ };
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ subjobs: Array;
+ output: {
+ choices: Array<{
+ finish_reason: string;
+ index: number;
+ text: string;
+ }>;
+ raw_compute_time: number;
+ result_type: string;
+ };
+}
+
+/**
+ * Note that the modelPath is the only required parameter. For testing you
+ * can set this in the environment variable `LLAMA_PATH`.
+ */
+export interface TogetherAIInputs extends BaseLLMParams {
+ /**
+ * The API key to use for the TogetherAI API.
+ * @default {process.env.TOGETHER_AI_API_KEY}
+ */
+ apiKey?: string;
+ /**
+ * The name of the model to query.
+ */
+ modelName: string;
+ /**
+ * A decimal number that determines the degree of randomness in the response.
+ * A value of 1 will always yield the same output.
+ * A temperature less than 1 favors more correctness and is appropriate for question answering or summarization.
+ * A value greater than 1 introduces more randomness in the output.
+ * @default {0.7}
+ */
+ temperature?: number;
+ /**
+ * Whether or not to stream tokens as they are generated.
+ * @default {false}
+ */
+ streaming?: boolean;
+ /**
+ * The `topP` (nucleus) parameter is used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities.
+ * It specifies a probability threshold, below which all less likely tokens are filtered out.
+ * This technique helps to maintain diversity and generate more fluent and natural-sounding text.
+ * @default {0.7}
+ */
+ topP?: number;
+ /**
+ * The `topK` parameter is used to limit the number of choices for the next predicted word or token.
+ * It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence.
+ * This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
+ * @default {50}
+ */
+ topK?: number;
+ /**
+ * A number that controls the diversity of generated text by reducing the likelihood of repeated sequences.
+ * Higher values decrease repetition.
+ * @default {1}
+ */
+ repetitionPenalty?: number;
+ /**
+ * An integer that specifies how many top token log probabilities are included in the response for each token generation step.
+ */
+ logprobs?: number;
+ /**
+ * Run an LLM-based input-output safeguard model on top of any model.
+ */
+ safetyModel?: string;
+}
+
+export interface TogetherAICallOptions
+ extends BaseLLMCallOptions,
+ Pick<
+ TogetherAIInputs,
+ | "modelName"
+ | "temperature"
+ | "topP"
+ | "topK"
+ | "repetitionPenalty"
+ | "logprobs"
+ | "safetyModel"
+ > {}
+
+export class TogetherAI extends LLM {
+ lc_serializable = true;
+
+ declare CallOptions: TogetherAICallOptions;
+
+ static inputs: TogetherAIInputs;
+
+ temperature = 0.7;
+
+ topP = 0.7;
+
+ topK = 50;
+
+ modelName: string;
+
+ streaming = false;
+
+ repetitionPenalty = 1;
+
+ logprobs?: number;
+
+ safetyModel?: string;
+
+ private apiKey: string;
+
+ private inferenceUrl = "https://api.together.xyz/inference";
+
+ static lc_name() {
+ return "TogetherAI";
+ }
+
+ constructor(inputs: TogetherAIInputs) {
+ super(inputs);
+ const apiKey =
+ inputs.apiKey ?? getEnvironmentVariable("TOGETHER_AI_API_KEY");
+ if (!apiKey) {
+ throw new Error("TOGETHER_AI_API_KEY not found.");
+ }
+ this.apiKey = apiKey;
+ this.temperature = inputs?.temperature ?? this.temperature;
+ this.topK = inputs?.topK ?? this.topK;
+ this.topP = inputs?.topP ?? this.topP;
+ this.modelName = inputs.modelName;
+ this.streaming = inputs.streaming ?? this.streaming;
+ this.repetitionPenalty = inputs.repetitionPenalty ?? this.repetitionPenalty;
+ this.logprobs = inputs.logprobs;
+ this.safetyModel = inputs.safetyModel;
+ }
+
+ _llmType() {
+ return "together_ai";
+ }
+
+ private constructHeaders() {
+ return {
+ accept: "application/json",
+ "content-type": "application/json",
+ Authorization: `Bearer ${this.apiKey}`,
+ };
+ }
+
+ private constructBody(prompt: string, options?: this["ParsedCallOptions"]) {
+ const body = {
+ model: options?.modelName ?? this?.modelName,
+ prompt,
+ temperature: this?.temperature ?? options?.temperature,
+ top_k: this?.topK ?? options?.topK,
+ top_p: this?.topP ?? options?.topP,
+ repetition_penalty: this?.repetitionPenalty ?? options?.repetitionPenalty,
+ logprobs: this?.logprobs ?? options?.logprobs,
+ stream_tokens: this?.streaming,
+ safety_model: this?.safetyModel ?? options?.safetyModel,
+ };
+ return body;
+ }
+
+ async completionWithRetry(
+ prompt: string,
+ options?: this["ParsedCallOptions"]
+ ) {
+ return this.caller.call(async () => {
+ const fetchResponse = await fetch(this.inferenceUrl, {
+ method: "POST",
+ headers: {
+ ...this.constructHeaders(),
+ },
+ body: JSON.stringify(this.constructBody(prompt, options)),
+ });
+ if (fetchResponse.status === 200) {
+ return fetchResponse.json();
+ }
+ const errorResponse = await fetchResponse.json();
+ throw new Error(
+ `Error getting prompt completion from Together AI. ${JSON.stringify(
+ errorResponse,
+ null,
+ 2
+ )}`
+ );
+ });
+ }
+
+ /** @ignore */
+ async _call(
+ prompt: string,
+ options?: this["ParsedCallOptions"]
+ ): Promise {
+ const response: TogetherAIInferenceResult = await this.completionWithRetry(
+ prompt,
+ options
+ );
+ const outputText = response.output.choices[0].text;
+ return outputText ?? "";
+ }
+
+ async *_streamResponseChunks(
+ prompt: string,
+ options: this["ParsedCallOptions"],
+ runManager?: CallbackManagerForLLMRun
+ ): AsyncGenerator {
+ const fetchResponse = await fetch(this.inferenceUrl, {
+ method: "POST",
+ headers: {
+ ...this.constructHeaders(),
+ },
+ body: JSON.stringify(this.constructBody(prompt, options)),
+ });
+
+ if (fetchResponse.status !== 200 ?? !fetchResponse.body) {
+ const errorResponse = await fetchResponse.json();
+ throw new Error(
+ `Error getting prompt completion from Together AI. ${JSON.stringify(
+ errorResponse,
+ null,
+ 2
+ )}`
+ );
+ }
+ const stream = convertEventStreamToIterableReadableDataStream(
+ fetchResponse.body
+ );
+ for await (const chunk of stream) {
+ if (chunk !== "[DONE]") {
+ const parsedChunk = JSON.parse(chunk);
+ const generationChunk = new GenerationChunk({
+ text: parsedChunk.choices[0].text ?? "",
+ });
+ yield generationChunk;
+ // eslint-disable-next-line no-void
+ void runManager?.handleLLMNewToken(generationChunk.text ?? "");
+ }
+ }
+ }
+}
diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts
index 89dacea4d86e..b42311bc9498 100644
--- a/libs/langchain-community/src/load/import_map.ts
+++ b/libs/langchain-community/src/load/import_map.ts
@@ -27,6 +27,7 @@ export * as llms__aleph_alpha from "../llms/aleph_alpha.js";
export * as llms__cloudflare_workersai from "../llms/cloudflare_workersai.js";
export * as llms__fireworks from "../llms/fireworks.js";
export * as llms__ollama from "../llms/ollama.js";
+export * as llms__togetherai from "../llms/togetherai.js";
export * as llms__yandex from "../llms/yandex.js";
export * as vectorstores__prisma from "../vectorstores/prisma.js";
export * as vectorstores__vectara from "../vectorstores/vectara.js";