diff --git a/docs/api_refs/typedoc.json b/docs/api_refs/typedoc.json index fa0fda258ea8..04e3f0d15a64 100644 --- a/docs/api_refs/typedoc.json +++ b/docs/api_refs/typedoc.json @@ -270,6 +270,7 @@ "./langchain/src/util/time.ts", "./langchain/src/experimental/autogpt/index.ts", "./langchain/src/experimental/openai_assistant/index.ts", + "./langchain/src/experimental/openai_files/index.ts", "./langchain/src/experimental/babyagi/index.ts", "./langchain/src/experimental/generative_agents/index.ts", "./langchain/src/experimental/plan_and_execute/index.ts", diff --git a/docs/core_docs/docs/modules/agents/agent_types/openai_assistant.mdx b/docs/core_docs/docs/modules/agents/agent_types/openai_assistant.mdx index 6d62fd1105bd..3521f8432c1e 100644 --- a/docs/core_docs/docs/modules/agents/agent_types/openai_assistant.mdx +++ b/docs/core_docs/docs/modules/agents/agent_types/openai_assistant.mdx @@ -194,3 +194,165 @@ console.log(assistantResponse); ``` Here the assistant was able to utilize the `code_interpreter` tool to calculate the answer to our question. + +# OpenAI Files + +Files are used to upload documents that can be used with features like Assistants and Fine-tuning. + +We've implemented the File API in LangChain with create and delete. You can see [the official API reference here](https://platform.openai.com/docs/api-reference/files/object). + +The `File` object represents a document that has been uploaded to OpenAI. + +``` +{ + "id": "file-abc123", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "salesOverview.pdf", + "purpose": "assistants", +} +``` + +## Create a File + +Upload a file that can be used across various endpoints. The size of all the files uploaded by one organization can be up to **100 GB**. + +The size of individual files can be a maximum of **512 MB**. See the Assistants Tools guide above to learn more about the types of files supported. The Fine-tuning API only supports `.jsonl` files. + +```typescript +import { OpenAIFiles } from "langchain/experimental/openai_files"; + +const openAIFiles = new OpenAIFiles(); +const file = await openAIFiles.create({ + file: fs.createReadStream(path.resolve(__dirname, `./test.txt`)), + purpose: "assistants", +}); +/** +* Output + { + "id": "file-BK7bzQj3FfZFXr7DbL6xJwfo", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "salesOverview.pdf", + "purpose": "assistants", + } +*/ +``` + +## Use File in AI Assistant + +```typescript +import { OpenAIAssistantRunnable } from "langchain/experimental/openai_assistant"; +import { OpenAIFiles } from "langchain/experimental/openai_files"; + +const openAIFiles = new OpenAIFiles(); +const file = await openAIFiles.createFile({ + file: fs.createReadStream(path.resolve(__dirname, `./test.txt`)), + purpose: "assistants", +}); + +const agent = await OpenAIAssistantRunnable.createAssistant({ + model: "gpt-3.5-turbo-1106", + instructions: + "You are a weather bot. Use the provided functions to answer questions.", + name: "Weather Assistant", + tools, + asAgent: true, + file_ids: [file.id], +}); +``` + +## Delete a File + +Delete a file. + +```typescript +import { OpenAIFiles } from "langchain/experimental/openai_files"; + +const openAIFiles = new OpenAIFiles(); +const result = await openAIFiles.deleteFile({ fileId: file.id }); +/** +* Output: + { + "id": "file-abc123", + "object": "file", + "deleted": true + } +*/ +``` + +## List all Files + +Returns a list of files that belong to the user's organization. + +`purpose`?: string +Only return files with the given purpose. + +```typescript +import { OpenAIFiles } from "langchain/experimental/openai_files"; + +const openAIFiles = new OpenAIFiles(); +const result = await openAIFiles.listFiles({ purpose: "assistants" }); +/** +* Output: + { + "data": [ + { + "id": "file-abc123", + "object": "file", + "bytes": 175, + "created_at": 1613677385, + "filename": "salesOverview.pdf", + "purpose": "assistants", + }, + { + "id": "file-abc123", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "filename": "puppy.jsonl", + "purpose": "fine-tune", + } + ], + "object": "list" + } +*/ +``` + +## Retrieve File + +Returns information about a specific file. + +```typescript +import { OpenAIFiles } from "langchain/experimental/openai_files"; + +const openAIFiles = new OpenAIFiles(); +const result = await openAIFiles.retrieveFile({ fileId: file.id }); +/** +* Output: + { + "id": "file-abc123", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "mydata.jsonl", + "purpose": "fine-tune", + } +*/ +``` + +## Retrieve File Content + +Returns the contents of the specified file. + +You can't retrieve the contents of a file that was uploaded with the "purpose": "assistants" API. + +```typescript +import { OpenAIFiles } from "langchain/experimental/openai_files"; + +const openAIFiles = new OpenAIFiles(); +const result = await openAIFiles.retrieveFileContent({ fileId: file.id }); +// Return the file content. +``` diff --git a/environment_tests/test-exports-bun/src/entrypoints.js b/environment_tests/test-exports-bun/src/entrypoints.js index 116a4ceac3cd..0d6c599746bf 100644 --- a/environment_tests/test-exports-bun/src/entrypoints.js +++ b/environment_tests/test-exports-bun/src/entrypoints.js @@ -92,6 +92,7 @@ export * from "langchain/util/math"; export * from "langchain/util/time"; export * from "langchain/experimental/autogpt"; export * from "langchain/experimental/openai_assistant"; +export * from "langchain/experimental/openai_files"; export * from "langchain/experimental/babyagi"; export * from "langchain/experimental/generative_agents"; export * from "langchain/experimental/plan_and_execute"; diff --git a/environment_tests/test-exports-cf/src/entrypoints.js b/environment_tests/test-exports-cf/src/entrypoints.js index 116a4ceac3cd..0d6c599746bf 100644 --- a/environment_tests/test-exports-cf/src/entrypoints.js +++ b/environment_tests/test-exports-cf/src/entrypoints.js @@ -92,6 +92,7 @@ export * from "langchain/util/math"; export * from "langchain/util/time"; export * from "langchain/experimental/autogpt"; export * from "langchain/experimental/openai_assistant"; +export * from "langchain/experimental/openai_files"; export * from "langchain/experimental/babyagi"; export * from "langchain/experimental/generative_agents"; export * from "langchain/experimental/plan_and_execute"; diff --git a/environment_tests/test-exports-cjs/src/entrypoints.js b/environment_tests/test-exports-cjs/src/entrypoints.js index 3fe9c8a191c8..9705ca765de9 100644 --- a/environment_tests/test-exports-cjs/src/entrypoints.js +++ b/environment_tests/test-exports-cjs/src/entrypoints.js @@ -92,6 +92,7 @@ const util_math = require("langchain/util/math"); const util_time = require("langchain/util/time"); const experimental_autogpt = require("langchain/experimental/autogpt"); const experimental_openai_assistant = require("langchain/experimental/openai_assistant"); +const experimental_openai_files = require("langchain/experimental/openai_files"); const experimental_babyagi = require("langchain/experimental/babyagi"); const experimental_generative_agents = require("langchain/experimental/generative_agents"); const experimental_plan_and_execute = require("langchain/experimental/plan_and_execute"); diff --git a/environment_tests/test-exports-esbuild/src/entrypoints.js b/environment_tests/test-exports-esbuild/src/entrypoints.js index a0f8701b4049..32fabd0d55fa 100644 --- a/environment_tests/test-exports-esbuild/src/entrypoints.js +++ b/environment_tests/test-exports-esbuild/src/entrypoints.js @@ -92,6 +92,7 @@ import * as util_math from "langchain/util/math"; import * as util_time from "langchain/util/time"; import * as experimental_autogpt from "langchain/experimental/autogpt"; import * as experimental_openai_assistant from "langchain/experimental/openai_assistant"; +import * as experimental_openai_files from "langchain/experimental/openai_files"; import * as experimental_babyagi from "langchain/experimental/babyagi"; import * as experimental_generative_agents from "langchain/experimental/generative_agents"; import * as experimental_plan_and_execute from "langchain/experimental/plan_and_execute"; diff --git a/environment_tests/test-exports-esm/src/entrypoints.js b/environment_tests/test-exports-esm/src/entrypoints.js index a0f8701b4049..32fabd0d55fa 100644 --- a/environment_tests/test-exports-esm/src/entrypoints.js +++ b/environment_tests/test-exports-esm/src/entrypoints.js @@ -92,6 +92,7 @@ import * as util_math from "langchain/util/math"; import * as util_time from "langchain/util/time"; import * as experimental_autogpt from "langchain/experimental/autogpt"; import * as experimental_openai_assistant from "langchain/experimental/openai_assistant"; +import * as experimental_openai_files from "langchain/experimental/openai_files"; import * as experimental_babyagi from "langchain/experimental/babyagi"; import * as experimental_generative_agents from "langchain/experimental/generative_agents"; import * as experimental_plan_and_execute from "langchain/experimental/plan_and_execute"; diff --git a/environment_tests/test-exports-vercel/src/entrypoints.js b/environment_tests/test-exports-vercel/src/entrypoints.js index 116a4ceac3cd..0d6c599746bf 100644 --- a/environment_tests/test-exports-vercel/src/entrypoints.js +++ b/environment_tests/test-exports-vercel/src/entrypoints.js @@ -92,6 +92,7 @@ export * from "langchain/util/math"; export * from "langchain/util/time"; export * from "langchain/experimental/autogpt"; export * from "langchain/experimental/openai_assistant"; +export * from "langchain/experimental/openai_files"; export * from "langchain/experimental/babyagi"; export * from "langchain/experimental/generative_agents"; export * from "langchain/experimental/plan_and_execute"; diff --git a/environment_tests/test-exports-vite/src/entrypoints.js b/environment_tests/test-exports-vite/src/entrypoints.js index 116a4ceac3cd..0d6c599746bf 100644 --- a/environment_tests/test-exports-vite/src/entrypoints.js +++ b/environment_tests/test-exports-vite/src/entrypoints.js @@ -92,6 +92,7 @@ export * from "langchain/util/math"; export * from "langchain/util/time"; export * from "langchain/experimental/autogpt"; export * from "langchain/experimental/openai_assistant"; +export * from "langchain/experimental/openai_files"; export * from "langchain/experimental/babyagi"; export * from "langchain/experimental/generative_agents"; export * from "langchain/experimental/plan_and_execute"; diff --git a/langchain/.gitignore b/langchain/.gitignore index fc2be83f13e7..d5ecb045b7e7 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -754,6 +754,9 @@ experimental/autogpt.d.ts experimental/openai_assistant.cjs experimental/openai_assistant.js experimental/openai_assistant.d.ts +experimental/openai_files.cjs +experimental/openai_files.js +experimental/openai_files.d.ts experimental/babyagi.cjs experimental/babyagi.js experimental/babyagi.d.ts diff --git a/langchain/package.json b/langchain/package.json index f6aaedad24b1..59d2bda6bd6d 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -766,6 +766,9 @@ "experimental/openai_assistant.cjs", "experimental/openai_assistant.js", "experimental/openai_assistant.d.ts", + "experimental/openai_files.cjs", + "experimental/openai_files.js", + "experimental/openai_files.d.ts", "experimental/babyagi.cjs", "experimental/babyagi.js", "experimental/babyagi.d.ts", @@ -2668,6 +2671,11 @@ "import": "./experimental/openai_assistant.js", "require": "./experimental/openai_assistant.cjs" }, + "./experimental/openai_files": { + "types": "./experimental/openai_files.d.ts", + "import": "./experimental/openai_files.js", + "require": "./experimental/openai_files.cjs" + }, "./experimental/babyagi": { "types": "./experimental/babyagi.d.ts", "import": "./experimental/babyagi.js", diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js index 98200151910d..35fa5c311112 100644 --- a/langchain/scripts/create-entrypoints.js +++ b/langchain/scripts/create-entrypoints.js @@ -296,6 +296,7 @@ const entrypoints = { // experimental "experimental/autogpt": "experimental/autogpt/index", "experimental/openai_assistant": "experimental/openai_assistant/index", + "experimental/openai_files": "experimental/openai_files/index", "experimental/babyagi": "experimental/babyagi/index", "experimental/generative_agents": "experimental/generative_agents/index", "experimental/plan_and_execute": "experimental/plan_and_execute/index", diff --git a/langchain/src/experimental/openai_assistant/index.ts b/langchain/src/experimental/openai_assistant/index.ts index 6b1de22abbea..60f95757f778 100644 --- a/langchain/src/experimental/openai_assistant/index.ts +++ b/langchain/src/experimental/openai_assistant/index.ts @@ -60,11 +60,13 @@ export class OpenAIAssistantRunnable< clientOptions, asAgent, pollIntervalMs, + fileIds, }: Omit, "assistantId"> & { model: string; name?: string; instructions?: string; tools?: OpenAIToolType | Array; + fileIds?: string[]; }) { const formattedTools = tools?.map((tool) => { @@ -80,6 +82,7 @@ export class OpenAIAssistantRunnable< instructions, tools: formattedTools, model, + file_ids: fileIds, }); return new this({ diff --git a/langchain/src/experimental/openai_files/index.ts b/langchain/src/experimental/openai_files/index.ts new file mode 100644 index 000000000000..052778afecbc --- /dev/null +++ b/langchain/src/experimental/openai_files/index.ts @@ -0,0 +1,114 @@ +import { ClientOptions, OpenAI as OpenAIClient } from "openai"; +import { Serializable } from "../../load/serializable.js"; + +export type OpenAIFilesInput = { + client?: OpenAIClient; + clientOptions?: ClientOptions; +}; + +export class OpenAIFiles extends Serializable { + lc_namespace = ["langchain", "experimental"]; + + private oaiClient: OpenAIClient; + + constructor(fields?: OpenAIFilesInput) { + super(fields); + this.oaiClient = fields?.client ?? new OpenAIClient(fields?.clientOptions); + } + + /** + * Upload file + * Upload a file that can be used across various endpoints. The size of all the files uploaded by one organization can be up to 100 GB. + * + * @note The size of individual files can be a maximum of 512 MB. See the Assistants Tools guide to learn more about the types of files supported. The Fine-tuning API only supports .jsonl files. + * + * @link {https://platform.openai.com/docs/api-reference/files/create} + * @param {OpenAIClient.FileCreateParams['file']} file + * @param {OpenAIClient.FileCreateParams['purpose']} purpose + * @param {OpenAIClient.RequestOptions | undefined} options + * @returns {Promise} + */ + async createFile({ + file, + purpose, + options, + }: OpenAIClient.FileCreateParams & { + options?: OpenAIClient.RequestOptions; + }) { + return this.oaiClient.files.create({ file, purpose }, options); + } + + /** + * Delete a file. + * + * @link {https://platform.openai.com/docs/api-reference/files/delete} + * @param {string} fileId + * @param {OpenAIClient.RequestOptions | undefined} options + * @returns {Promise} + */ + async deleteFile({ + fileId, + options, + }: { + fileId: string; + options?: OpenAIClient.RequestOptions; + }) { + return this.oaiClient.files.del(fileId, options); + } + + /** + * List files + * Returns a list of files that belong to the user's organization. + * + * @link {https://platform.openai.com/docs/api-reference/files/list} + * @param {OpenAIClient.Files.FileListParams | undefined} query + * @param {OpenAIClient.RequestOptions | undefined} options + * @returns {Promise} + */ + async listFiles(props?: { + query?: OpenAIClient.Files.FileListParams; + options?: OpenAIClient.RequestOptions; + }) { + return this.oaiClient.files.list(props?.query, props?.options); + } + + /** + * Retrieve file + * Returns information about a specific file. + * + * @link {https://platform.openai.com/docs/api-reference/files/retrieve} + * @param {string} fileId + * @param {OpenAIClient.RequestOptions | undefined} options + * @returns {Promise} + */ + async retrieveFile({ + fileId, + options, + }: { + fileId: string; + options?: OpenAIClient.RequestOptions; + }) { + return this.oaiClient.files.retrieve(fileId, options); + } + + /** + * Retrieve file content + * Returns the contents of the specified file. + * + * @note You can't retrieve the contents of a file that was uploaded with the "purpose": "assistants" API. + * + * @link {https://platform.openai.com/docs/api-reference/files/retrieve-contents} + * @param {string} fileId + * @param {OpenAIClient.RequestOptions | undefined} options + * @returns {Promise} + */ + async retrieveFileContent({ + fileId, + options, + }: { + fileId: string; + options?: OpenAIClient.RequestOptions; + }) { + return this.oaiClient.files.retrieveContent(fileId, options); + } +} diff --git a/langchain/src/experimental/openai_files/tests/openai_file.int.test.ts b/langchain/src/experimental/openai_files/tests/openai_file.int.test.ts new file mode 100644 index 000000000000..68d6ff128566 --- /dev/null +++ b/langchain/src/experimental/openai_files/tests/openai_file.int.test.ts @@ -0,0 +1,89 @@ +import * as fs from "fs"; +import * as path from "path"; +import { fileURLToPath } from "url"; +import { dirname } from "path"; +import { OpenAIFiles } from "../index.js"; + +/** + * Otherwise we got the error __dirname doesn't exist + */ +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +test("Use file with Open AI", async () => { + const openAIFiles = new OpenAIFiles(); + const file = await openAIFiles.createFile({ + file: fs.createReadStream(path.resolve(__dirname, `./test.jsonl`)), + purpose: "fine-tune", + }); + expect(file.id).toBeDefined(); + expect(file.object).toBe("file"); + /** + * Output + { + "id": "file-BK7bzQj3FfZFXr7DbL6xJwfo", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "salesOverview.pdf", + "purpose": "assistants", + } + */ + const fileContent = await openAIFiles.retrieveFileContent({ + fileId: file.id, + }); + console.log(fileContent); + expect(fileContent).toBeDefined(); + /** + * Output + { + "id": "file-BK7bzQj3FfZFXr7DbL6xJwfo", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "salesOverview.pdf", + "purpose": "assistants", + } + */ + const retrievedFile = await openAIFiles.retrieveFile({ + fileId: file.id, + }); + expect(retrievedFile.id).toBeDefined(); + expect(retrievedFile.object).toBe("file"); + /** + * Output + { + "id": "file-BK7bzQj3FfZFXr7DbL6xJwfo", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "salesOverview.pdf", + "purpose": "assistants", + } + */ + const list = await openAIFiles.listFiles(); + expect(list).toBeDefined(); + expect(!!list.data.find((f) => f.id === file.id)).toBeTruthy(); + /** + * Output + { + "id": "file-BK7bzQj3FfZFXr7DbL6xJwfo", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "filename": "salesOverview.pdf", + "purpose": "assistants", + } + */ + const result = await openAIFiles.deleteFile({ fileId: file.id }); + expect(result.id).toBe(file.id); + expect(result.deleted).toBeTruthy(); + /** + * Output: + { + "id": "file-abc123", + "object": "file", + "deleted": true + } + */ +}); diff --git a/langchain/src/experimental/openai_files/tests/test.jsonl b/langchain/src/experimental/openai_files/tests/test.jsonl new file mode 100644 index 000000000000..cb98632f423b --- /dev/null +++ b/langchain/src/experimental/openai_files/tests/test.jsonl @@ -0,0 +1 @@ +{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]} diff --git a/langchain/src/load/import_map.ts b/langchain/src/load/import_map.ts index d968a29215d9..f6c5c17b957e 100644 --- a/langchain/src/load/import_map.ts +++ b/langchain/src/load/import_map.ts @@ -93,6 +93,7 @@ export * as util__math from "../util/math.js"; export * as util__time from "../util/time.js"; export * as experimental__autogpt from "../experimental/autogpt/index.js"; export * as experimental__openai_assistant from "../experimental/openai_assistant/index.js"; +export * as experimental__openai_files from "../experimental/openai_files/index.js"; export * as experimental__babyagi from "../experimental/babyagi/index.js"; export * as experimental__generative_agents from "../experimental/generative_agents/index.js"; export * as experimental__plan_and_execute from "../experimental/plan_and_execute/index.js";