From c66125cbbc881c899cb0f2d46ef955483dff8f00 Mon Sep 17 00:00:00 2001 From: Zeneos <95008961+Zeneos@users.noreply.github.com> Date: Fri, 24 Nov 2023 22:39:03 -0500 Subject: [PATCH 1/9] copy pasting and basic error fixing --- .../document_loaders/file_loaders/chatgpt.mdx | 69 ++++ langchain/src/document_loaders/fs/chatgpt.ts | 97 +++++ .../document_loaders/tests/chatgpt.test.ts | 44 ++ .../example_data/example_conversations.json | 380 ++++++++++++++++++ 4 files changed, 590 insertions(+) create mode 100644 docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx create mode 100644 langchain/src/document_loaders/fs/chatgpt.ts create mode 100644 langchain/src/document_loaders/tests/chatgpt.test.ts create mode 100644 langchain/src/document_loaders/tests/example_data/example_conversations.json diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx new file mode 100644 index 000000000000..fb3e0437469b --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx @@ -0,0 +1,69 @@ +# CSV files + +This example goes over how to load conversations.json from your ChatGPT data export folder. You can get your data export by email by going to: ChatGPT -> (Profile) - Settings -> Export data -> Confirm export -> Check email. + +``` +``` + +## Usage + +Example ChatGPT file: + +```json +id,text +1,This is a sentence. +2,This is another sentence. +``` + +Example code: + +```typescript +import { ChatGPTLoader } from "langchain/document_loaders/web/chatgpt"; + +const loader = new ChatGPTLoader(logFile="./example_data/example_conversations.json") + +const docs = await loader.load(); + +console.log(docs); +``` + +## Usage, extracting a single column + +Example CSV file: + +```csv +id,text +1,This is a sentence. +2,This is another sentence. +``` + +Example code: + +```typescript +import { CSVLoader } from "langchain/document_loaders/fs/csv"; + +const loader = new CSVLoader( + "src/document_loaders/example_data/example.csv", + "text" +); + +const docs = await loader.load(); +/* +[ + Document { + "metadata": { + "line": 1, + "source": "src/document_loaders/example_data/example.csv", + }, + "pageContent": "This is a sentence.", + }, + Document { + "metadata": { + "line": 2, + "source": "src/document_loaders/example_data/example.csv", + }, + "pageContent": "This is another sentence.", + }, +] +*/ +``` diff --git a/langchain/src/document_loaders/fs/chatgpt.ts b/langchain/src/document_loaders/fs/chatgpt.ts new file mode 100644 index 000000000000..21fb93bc029a --- /dev/null +++ b/langchain/src/document_loaders/fs/chatgpt.ts @@ -0,0 +1,97 @@ +import { TextLoader } from "./text.js"; +import { Document } from "../../document.js"; + +interface ChatGPTMessage { + author: { + role: string; + }; + content: { + parts: string[]; + }; + create_time: number; +} + +interface ChatGPTLog { + title: string; + mapping: Record; +} + +function concatenateRows(message: ChatGPTMessage, title: string): string { + /** + * Combine message information in a readable format ready to be used. + * @param {ChatGPTMessage} message - Message to be concatenated + * @param {string} title - Title of the conversation + * + * @returns {string} Concatenated message + */ + if (!message) { + return ""; + } + + const sender = message.author ? message.author.role : "unknown"; + const text = message.content.parts[0]; + const date = new Date(message.create_time * 1000).toISOString().slice(0, 19).replace('T', ' '); + return `${title} - ${sender} on ${date}: ${text}\n\n`; +} + +export class ChatGPTLoader extends TextLoader { + public numLogs: number; + + constructor(filePathOrBlob: string | Blob, numLogs: number = 0) { + super(filePathOrBlob); + this.numLogs = numLogs; + } + + protected async parse(raw: string): Promise { + let data; + try { + data = JSON.parse(raw); + } catch (e) { + console.error("Failed to parse JSON:", e); + return []; + } + + const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data; + + return truncatedData.map((d: ChatGPTLog) => { + return Object.values(d.mapping) + .filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system")) + .map(msg => concatenateRows(msg.message, d.title)) + .join(''); + }); + } + + public async load(): Promise { + let text: string; + let metadata: Record; + if (typeof this.filePathOrBlob === "string") { + const { readFile } = await TextLoader.imports(); + try { + text = await readFile(this.filePathOrBlob, "utf8"); + } catch (e) { + console.error("Failed to read file:", e); + return []; + } + metadata = { source: this.filePathOrBlob }; + } else { + try { + text = await this.filePathOrBlob.text(); + } catch (e) { + console.error("Failed to read blob:", e); + return []; + } + metadata = { source: "blob", blobType: this.filePathOrBlob.type }; + } + + const parsed = await this.parse(text); + return parsed.map( + (pageContent, i) => new Document({ + pageContent, + metadata: { + ...metadata, + logIndex: i + 1 + } + }) + ); + } +} diff --git a/langchain/src/document_loaders/tests/chatgpt.test.ts b/langchain/src/document_loaders/tests/chatgpt.test.ts new file mode 100644 index 000000000000..2500d70913dc --- /dev/null +++ b/langchain/src/document_loaders/tests/chatgpt.test.ts @@ -0,0 +1,44 @@ +import * as url from "node:url"; +import * as path from "node:path"; +import { test, expect } from '@jest/globals'; +import { Document } from "../../document.js"; +import { ChatGPTLoader } from '../fs/chatgpt.js'; + +test('Test ChatGPT loader to load all documents', async () => { + const filePath = path.resolve( + path.dirname(url.fileURLToPath(import.meta.url)), + "./example_data/example_conversations.json" + ); + const loader = new ChatGPTLoader(filePath); + const docs = await loader.load(); + expect(docs.length).toBe(2); + expect(docs[0]).toEqual( + new Document({ + metadata: {'source': './example_data/example_conversations.json'}, + pageContent: "Example Usage - user on 2023-06-16 05:26:57: Hello, what is your name?\n\nExample Usage - assistant on 2023-06-16 05:27:03: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", + + }) + ); + expect(docs[1]).toEqual( + new Document({ + metadata: {'source': './example_data/example_conversations.json'}, + pageContent: "Example Usage 2 - user on 2023-06-11 00:48:59: What should I do today?\n\nExample Usage 2 - assistant on 2023-06-11 00:49:07: You should contribute to LangChain!\n\nExample Usage 2 - user on 2023-06-11 00:50:10: How can I start?\n\nExample Usage 2 - assistant on 2023-06-11 00:50:18: You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file.\n\nExample Usage 2 - user on 2023-06-11 01:56:04: Thank you!\n\nExample Usage 2 - assistant on 2023-06-11 01:56:14: You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out.\n\n", + }) + ); +}); + +test('Test ChatGPT loader to only load 1 document', async () => { + const filePath = path.resolve( + path.dirname(url.fileURLToPath(import.meta.url)), + "./example_data/example_conversations.json" + ); + const loader = new ChatGPTLoader(filePath, 1); + const docs = await loader.load(); + expect(docs.length).toBe(1); + expect(docs[0]).toEqual( + new Document({ + metadata: {'source': './example_data/example_conversations.json'}, + pageContent: "Example Usage - user on 2023-06-16 05:26:57: Hello, what is your name?\n\nExample Usage - assistant on 2023-06-16 05:27:03: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", + }) + ); +}); diff --git a/langchain/src/document_loaders/tests/example_data/example_conversations.json b/langchain/src/document_loaders/tests/example_data/example_conversations.json new file mode 100644 index 000000000000..77f09a054bc1 --- /dev/null +++ b/langchain/src/document_loaders/tests/example_data/example_conversations.json @@ -0,0 +1,380 @@ +[ + { + "title": "Example Usage", + "create_time": 1697499617.697575, + "update_time": 1697499624.514186, + "mapping": { + "91c9fd24-0a12-41b8-b860-8f2cc237e838": { + "id": "91c9fd24-0a12-41b8-b860-8f2cc237e838", + "message": { + "id": "91c9fd24-0a12-41b8-b860-8f2cc237e838", + "author": { + "role": "system", + "name": null, + "metadata": {} + }, + "create_time": null, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "" + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 0.0, + "metadata": {}, + "recipient": "all" + }, + "parent": "aaa1ab70-044a-4f2b-b4f7-74d4342f34da", + "children": [ + "aaa29ce8-0430-461e-9d42-74f750409cc7" + ] + }, + "aaa1ab70-044a-4f2b-b4f7-74d4342f34da": { + "id": "aaa1ab70-044a-4f2b-b4f7-74d4342f34da", + "message": null, + "parent": null, + "children": [ + "91c9fd24-0a12-41b8-b860-8f2cc237e838" + ] + }, + "aaa29ce8-0430-461e-9d42-74f750409cc7": { + "id": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "message": { + "id": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697499617.698544, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "Hello, what is your name?" + ] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "91c9fd24-0a12-41b8-b860-8f2cc237e838", + "children": [ + "e47e5933-13f9-460b-b0f0-fc032e44ff64" + ] + }, + "e47e5933-13f9-460b-b0f0-fc032e44ff64": { + "id": "e47e5933-13f9-460b-b0f0-fc032e44ff64", + "message": { + "id": "e47e5933-13f9-460b-b0f0-fc032e44ff64", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697499623.489646, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?" + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [ + 100260 + ] + }, + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "timestamp_": "absolute" + }, + "recipient": "all" + }, + "parent": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "children": [] + } + }, + "moderation_results": [], + "current_node": "e47e5933-13f9-460b-b0f0-fc032e44ff64", + "plugin_ids": null, + "conversation_id": "47867fd2-2a41-4102-80ef-101510becc08", + "conversation_template_id": null, + "id": "47867fd2-2a41-4102-80ef-101510becc08" + }, + { + "title": "Example Usage 2", + "create_time": 1697238139.540548, + "update_time": 1697238574.062126, + "mapping": { + "f3602996-5cb6-4050-87d9-0001149ba7ee": { + "id": "f3602996-5cb6-4050-87d9-0001149ba7ee", + "message": { + "id": "f3602996-5cb6-4050-87d9-0001149ba7ee", + "author": { + "role": "system", + "name": null, + "metadata": {} + }, + "create_time": null, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "" + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 0.0, + "metadata": {}, + "recipient": "all" + }, + "parent": "aaa1590a-2f7f-410a-b12c-9119a3264ef9", + "children": [ + "aaa2fff4-49fb-40e5-9122-a2addfeedfc9" + ] + }, + "aaa1590a-2f7f-410a-b12c-9119a3264ef9": { + "id": "aaa1590a-2f7f-410a-b12c-9119a3264ef9", + "message": null, + "parent": null, + "children": [ + "f3602996-5cb6-4050-87d9-0001149ba7ee" + ] + }, + "aaa2fff4-49fb-40e5-9122-a2addfeedfc9": { + "id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "message": { + "id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697238139.541115, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "What should I do today?" + ] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "f3602996-5cb6-4050-87d9-0001149ba7ee", + "children": [ + "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8" + ] + }, + "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8": { + "id": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", + "message": { + "id": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697238147.462413, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "You should contribute to LangChain!" + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [ + 100260 + ] + }, + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "timestamp_": "absolute" + }, + "recipient": "all" + }, + "parent": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "children": [ + "aaa24ae6-3972-4094-8c84-3b31750bc212" + ] + }, + "aaa24ae6-3972-4094-8c84-3b31750bc212": { + "id": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "message": { + "id": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697238210.739114, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "How can I start?" + ] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", + "children": [ + "713c8a38-0685-41f2-8a28-36c880d65480" + ] + }, + "713c8a38-0685-41f2-8a28-36c880d65480": { + "id": "713c8a38-0685-41f2-8a28-36c880d65480", + "message": { + "id": "713c8a38-0685-41f2-8a28-36c880d65480", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697238218.628864, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file." + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [ + 100260 + ] + }, + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "timestamp_": "absolute" + }, + "recipient": "all" + }, + "parent": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "children": [ + "aaa266ba-b466-4207-bfbc-19b2c1bb9b78" + ] + }, + "aaa266ba-b466-4207-bfbc-19b2c1bb9b78": { + "id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "message": { + "id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697238564.084255, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "Thank you!" + ] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "713c8a38-0685-41f2-8a28-36c880d65480", + "children": [ + "6ea89c28-95de-465d-9d25-a8fd67d4ba9c" + ] + }, + "6ea89c28-95de-465d-9d25-a8fd67d4ba9c": { + "id": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", + "message": { + "id": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697238574.056793, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out." + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [ + 100260 + ] + }, + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "timestamp_": "absolute" + }, + "recipient": "all" + }, + "parent": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "children": [] + } + }, + "moderation_results": [], + "current_node": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", + "plugin_ids": null, + "conversation_id": "588ae2e0-a336-475b-b4ef-1904078b8c24", + "conversation_template_id": null, + "id": "588ae2e0-a336-475b-b4ef-1904078b8c24" + } +] \ No newline at end of file From 1ffabdd3ae99efca5915842fde171635ac26c2a9 Mon Sep 17 00:00:00 2001 From: Zeneos <95008961+Zeneos@users.noreply.github.com> Date: Tue, 28 Nov 2023 10:12:01 -0500 Subject: [PATCH 2/9] fixed testing issue - fixed issue - fixed code - added blob tests - fixed test timestamps --- .../document_loaders/file_loaders/chatgpt.mdx | 53 +- langchain/src/document_loaders/fs/chatgpt.ts | 156 ++-- .../tests/chatgpt-blob.test.ts | 52 ++ .../document_loaders/tests/chatgpt.test.ts | 68 +- .../example_data/example_conversations.json | 702 ++++++++---------- 5 files changed, 507 insertions(+), 524 deletions(-) create mode 100644 langchain/src/document_loaders/tests/chatgpt-blob.test.ts diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx index fb3e0437469b..6f06a94426cf 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx @@ -1,69 +1,34 @@ -# CSV files +# ChatGPT files This example goes over how to load conversations.json from your ChatGPT data export folder. You can get your data export by email by going to: ChatGPT -> (Profile) - Settings -> Export data -> Confirm export -> Check email. ``` ``` -## Usage - -Example ChatGPT file: - -```json -id,text -1,This is a sentence. -2,This is another sentence. -``` +## Usage, extracting all logs Example code: ```typescript import { ChatGPTLoader } from "langchain/document_loaders/web/chatgpt"; -const loader = new ChatGPTLoader(logFile="./example_data/example_conversations.json") +const loader = new ChatGPTLoader("./example_data/example_conversations.json") const docs = await loader.load(); console.log(docs); ``` -## Usage, extracting a single column - -Example CSV file: - -```csv -id,text -1,This is a sentence. -2,This is another sentence. -``` +## Usage, extracting a single log Example code: ```typescript -import { CSVLoader } from "langchain/document_loaders/fs/csv"; +import { ChatGPTLoader } from "langchain/document_loaders/web/chatgpt"; -const loader = new CSVLoader( - "src/document_loaders/example_data/example.csv", - "text" -); +const loader = new ChatGPTLoader("./example_data/example_conversations.json", 1) const docs = await loader.load(); -/* -[ - Document { - "metadata": { - "line": 1, - "source": "src/document_loaders/example_data/example.csv", - }, - "pageContent": "This is a sentence.", - }, - Document { - "metadata": { - "line": 2, - "source": "src/document_loaders/example_data/example.csv", - }, - "pageContent": "This is another sentence.", - }, -] -*/ -``` + +console.log(docs); +``` \ No newline at end of file diff --git a/langchain/src/document_loaders/fs/chatgpt.ts b/langchain/src/document_loaders/fs/chatgpt.ts index 21fb93bc029a..adc9c266abeb 100644 --- a/langchain/src/document_loaders/fs/chatgpt.ts +++ b/langchain/src/document_loaders/fs/chatgpt.ts @@ -2,96 +2,102 @@ import { TextLoader } from "./text.js"; import { Document } from "../../document.js"; interface ChatGPTMessage { - author: { - role: string; - }; - content: { - parts: string[]; - }; - create_time: number; + author: { + role: string; + }; + content: { + parts: string[]; + }; + create_time: number; } interface ChatGPTLog { - title: string; - mapping: Record; + title: string; + mapping: Record; } function concatenateRows(message: ChatGPTMessage, title: string): string { - /** - * Combine message information in a readable format ready to be used. - * @param {ChatGPTMessage} message - Message to be concatenated - * @param {string} title - Title of the conversation - * - * @returns {string} Concatenated message - */ - if (!message) { - return ""; - } + /** + * Combine message information in a readable format ready to be used. + * @param {ChatGPTMessage} message - Message to be concatenated + * @param {string} title - Title of the conversation + * + * @returns {string} Concatenated message + */ + if (!message) { + return ""; + } - const sender = message.author ? message.author.role : "unknown"; - const text = message.content.parts[0]; - const date = new Date(message.create_time * 1000).toISOString().slice(0, 19).replace('T', ' '); - return `${title} - ${sender} on ${date}: ${text}\n\n`; + const sender = message.author ? message.author.role : "unknown"; + const text = message.content.parts[0]; + const date = new Date(message.create_time * 1000) + .toISOString() + .slice(0, 19) + .replace("T", " "); + return `${title} - ${sender} on ${date}: ${text}\n\n`; } export class ChatGPTLoader extends TextLoader { - public numLogs: number; + public numLogs: number; + + constructor(filePathOrBlob: string | Blob, numLogs: number = 0) { + super(filePathOrBlob); + this.numLogs = numLogs; + } - constructor(filePathOrBlob: string | Blob, numLogs: number = 0) { - super(filePathOrBlob); - this.numLogs = numLogs; + protected async parse(raw: string): Promise { + let data; + try { + data = JSON.parse(raw); + } catch (e) { + console.error("Failed to parse JSON:", e); + return []; } - protected async parse(raw: string): Promise { - let data; - try { - data = JSON.parse(raw); - } catch (e) { - console.error("Failed to parse JSON:", e); - return []; - } + const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data; - const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data; + return truncatedData.map((d: ChatGPTLog) => { + return Object.values(d.mapping) + .filter( + (msg, idx) => !(idx === 0 && msg.message.author.role === "system") + ) + .map((msg) => concatenateRows(msg.message, d.title)) + .join(""); + }); + } - return truncatedData.map((d: ChatGPTLog) => { - return Object.values(d.mapping) - .filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system")) - .map(msg => concatenateRows(msg.message, d.title)) - .join(''); - }); + public async load(): Promise { + let text: string; + let metadata: Record; + if (typeof this.filePathOrBlob === "string") { + const { readFile } = await TextLoader.imports(); + try { + text = await readFile(this.filePathOrBlob, "utf8"); + } catch (e) { + console.error("Failed to read file:", e); + return []; + } + metadata = { source: this.filePathOrBlob }; + } else { + try { + text = await this.filePathOrBlob.text(); + } catch (e) { + console.error("Failed to read blob:", e); + return []; + } + metadata = { source: "blob", blobType: this.filePathOrBlob.type }; } - public async load(): Promise { - let text: string; - let metadata: Record; - if (typeof this.filePathOrBlob === "string") { - const { readFile } = await TextLoader.imports(); - try { - text = await readFile(this.filePathOrBlob, "utf8"); - } catch (e) { - console.error("Failed to read file:", e); - return []; - } - metadata = { source: this.filePathOrBlob }; - } else { - try { - text = await this.filePathOrBlob.text(); - } catch (e) { - console.error("Failed to read blob:", e); - return []; - } - metadata = { source: "blob", blobType: this.filePathOrBlob.type }; - } - - const parsed = await this.parse(text); - return parsed.map( - (pageContent, i) => new Document({ - pageContent, - metadata: { - ...metadata, - logIndex: i + 1 - } - }) - ); - } + const parsed = await this.parse(text); + return parsed.map( + (pageContent, i) => + new Document({ + pageContent, + metadata: { + ...metadata, + logIndex: i + 1, + }, + }) + ); + } } diff --git a/langchain/src/document_loaders/tests/chatgpt-blob.test.ts b/langchain/src/document_loaders/tests/chatgpt-blob.test.ts new file mode 100644 index 000000000000..1b2081bbfc2a --- /dev/null +++ b/langchain/src/document_loaders/tests/chatgpt-blob.test.ts @@ -0,0 +1,52 @@ +import * as url from "node:url"; +import * as path from "node:path"; +import * as fs from "node:fs/promises"; +import { test, expect } from "@jest/globals"; +import { Document } from "../../document.js"; +import { ChatGPTLoader } from "../fs/chatgpt.js"; + +test("Test ChatGPT loader from blob to load all documents", async () => { + const filePath = path.resolve( + path.dirname(url.fileURLToPath(import.meta.url)), + "./example_data/example_conversations.json" + ); + const loader = new ChatGPTLoader( + new Blob([await fs.readFile(filePath)], { type: "application/json" }) + ); + const docs = await loader.load(); + expect(docs.length).toBe(2); + expect(docs[0]).toEqual( + new Document({ + metadata: { source: "blob", blobType: "application/json", logIndex: 1 }, + pageContent: + "Example Usage - user on 2023-10-16 23:40:17: Hello, what is your name?\n\nExample Usage - assistant on 2023-10-16 23:40:23: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", + }) + ); + expect(docs[1]).toEqual( + new Document({ + metadata: { source: "blob", blobType: "application/json", logIndex: 2 }, + pageContent: + "Example Usage 2 - user on 2023-10-13 23:02:19: What should I do today?\n\nExample Usage 2 - assistant on 2023-10-13 23:02:27: You should contribute to LangChain!\n\nExample Usage 2 - user on 2023-10-13 23:03:30: How can I start?\n\nExample Usage 2 - assistant on 2023-10-13 23:03:38: You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file.\n\nExample Usage 2 - user on 2023-10-13 23:09:24: Thank you!\n\nExample Usage 2 - assistant on 2023-10-13 23:09:34: You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out.\n\n", + }) + ); +}); + +test("Test ChatGPT loader from blob to only load 1 document", async () => { + const filePath = path.resolve( + path.dirname(url.fileURLToPath(import.meta.url)), + "./example_data/example_conversations.json" + ); + const loader = new ChatGPTLoader( + new Blob([await fs.readFile(filePath)], { type: "application/json" }), + 1 + ); + const docs = await loader.load(); + expect(docs.length).toBe(1); + expect(docs[0]).toEqual( + new Document({ + metadata: { source: "blob", blobType: "application/json", logIndex: 1 }, + pageContent: + "Example Usage - user on 2023-10-16 23:40:17: Hello, what is your name?\n\nExample Usage - assistant on 2023-10-16 23:40:23: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", + }) + ); +}); diff --git a/langchain/src/document_loaders/tests/chatgpt.test.ts b/langchain/src/document_loaders/tests/chatgpt.test.ts index 2500d70913dc..77d410ece14d 100644 --- a/langchain/src/document_loaders/tests/chatgpt.test.ts +++ b/langchain/src/document_loaders/tests/chatgpt.test.ts @@ -1,44 +1,46 @@ import * as url from "node:url"; import * as path from "node:path"; -import { test, expect } from '@jest/globals'; +import { test, expect } from "@jest/globals"; import { Document } from "../../document.js"; -import { ChatGPTLoader } from '../fs/chatgpt.js'; +import { ChatGPTLoader } from "../fs/chatgpt.js"; -test('Test ChatGPT loader to load all documents', async () => { - const filePath = path.resolve( +test("Test ChatGPT loader to load all documents", async () => { + const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/example_conversations.json" - ); - const loader = new ChatGPTLoader(filePath); - const docs = await loader.load(); - expect(docs.length).toBe(2); - expect(docs[0]).toEqual( - new Document({ - metadata: {'source': './example_data/example_conversations.json'}, - pageContent: "Example Usage - user on 2023-06-16 05:26:57: Hello, what is your name?\n\nExample Usage - assistant on 2023-06-16 05:27:03: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", - - }) - ); - expect(docs[1]).toEqual( - new Document({ - metadata: {'source': './example_data/example_conversations.json'}, - pageContent: "Example Usage 2 - user on 2023-06-11 00:48:59: What should I do today?\n\nExample Usage 2 - assistant on 2023-06-11 00:49:07: You should contribute to LangChain!\n\nExample Usage 2 - user on 2023-06-11 00:50:10: How can I start?\n\nExample Usage 2 - assistant on 2023-06-11 00:50:18: You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file.\n\nExample Usage 2 - user on 2023-06-11 01:56:04: Thank you!\n\nExample Usage 2 - assistant on 2023-06-11 01:56:14: You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out.\n\n", - }) - ); + ); + const loader = new ChatGPTLoader(filePath); + const docs = await loader.load(); + expect(docs.length).toBe(2); + expect(docs[0]).toEqual( + new Document({ + metadata: { source: filePath, logIndex: 1 }, + pageContent: + "Example Usage - user on 2023-10-16 23:40:17: Hello, what is your name?\n\nExample Usage - assistant on 2023-10-16 23:40:23: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", + }) + ); + expect(docs[1]).toEqual( + new Document({ + metadata: { source: filePath, logIndex: 2 }, + pageContent: + "Example Usage 2 - user on 2023-10-13 23:02:19: What should I do today?\n\nExample Usage 2 - assistant on 2023-10-13 23:02:27: You should contribute to LangChain!\n\nExample Usage 2 - user on 2023-10-13 23:03:30: How can I start?\n\nExample Usage 2 - assistant on 2023-10-13 23:03:38: You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file.\n\nExample Usage 2 - user on 2023-10-13 23:09:24: Thank you!\n\nExample Usage 2 - assistant on 2023-10-13 23:09:34: You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out.\n\n", + }) + ); }); -test('Test ChatGPT loader to only load 1 document', async () => { - const filePath = path.resolve( +test("Test ChatGPT loader to only load 1 document", async () => { + const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/example_conversations.json" - ); - const loader = new ChatGPTLoader(filePath, 1); - const docs = await loader.load(); - expect(docs.length).toBe(1); - expect(docs[0]).toEqual( - new Document({ - metadata: {'source': './example_data/example_conversations.json'}, - pageContent: "Example Usage - user on 2023-06-16 05:26:57: Hello, what is your name?\n\nExample Usage - assistant on 2023-06-16 05:27:03: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", - }) - ); + ); + const loader = new ChatGPTLoader(filePath, 1); + const docs = await loader.load(); + expect(docs.length).toBe(1); + expect(docs[0]).toEqual( + new Document({ + metadata: { source: filePath, logIndex: 1 }, + pageContent: + "Example Usage - user on 2023-10-16 23:40:17: Hello, what is your name?\n\nExample Usage - assistant on 2023-10-16 23:40:23: Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?\n\n", + }) + ); }); diff --git a/langchain/src/document_loaders/tests/example_data/example_conversations.json b/langchain/src/document_loaders/tests/example_data/example_conversations.json index 77f09a054bc1..0caf083f6de7 100644 --- a/langchain/src/document_loaders/tests/example_data/example_conversations.json +++ b/langchain/src/document_loaders/tests/example_data/example_conversations.json @@ -1,380 +1,338 @@ [ - { - "title": "Example Usage", - "create_time": 1697499617.697575, - "update_time": 1697499624.514186, - "mapping": { - "91c9fd24-0a12-41b8-b860-8f2cc237e838": { - "id": "91c9fd24-0a12-41b8-b860-8f2cc237e838", - "message": { - "id": "91c9fd24-0a12-41b8-b860-8f2cc237e838", - "author": { - "role": "system", - "name": null, - "metadata": {} - }, - "create_time": null, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "" - ] - }, - "status": "finished_successfully", - "end_turn": true, - "weight": 0.0, - "metadata": {}, - "recipient": "all" - }, - "parent": "aaa1ab70-044a-4f2b-b4f7-74d4342f34da", - "children": [ - "aaa29ce8-0430-461e-9d42-74f750409cc7" - ] - }, - "aaa1ab70-044a-4f2b-b4f7-74d4342f34da": { - "id": "aaa1ab70-044a-4f2b-b4f7-74d4342f34da", - "message": null, - "parent": null, - "children": [ - "91c9fd24-0a12-41b8-b860-8f2cc237e838" - ] - }, - "aaa29ce8-0430-461e-9d42-74f750409cc7": { - "id": "aaa29ce8-0430-461e-9d42-74f750409cc7", - "message": { - "id": "aaa29ce8-0430-461e-9d42-74f750409cc7", - "author": { - "role": "user", - "name": null, - "metadata": {} - }, - "create_time": 1697499617.698544, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "Hello, what is your name?" - ] - }, - "status": "finished_successfully", - "end_turn": null, - "weight": 1.0, - "metadata": { - "timestamp_": "absolute", - "message_type": null - }, - "recipient": "all" - }, - "parent": "91c9fd24-0a12-41b8-b860-8f2cc237e838", - "children": [ - "e47e5933-13f9-460b-b0f0-fc032e44ff64" - ] + { + "title": "Example Usage", + "create_time": 1697499617.697575, + "update_time": 1697499624.514186, + "mapping": { + "91c9fd24-0a12-41b8-b860-8f2cc237e838": { + "id": "91c9fd24-0a12-41b8-b860-8f2cc237e838", + "message": { + "id": "91c9fd24-0a12-41b8-b860-8f2cc237e838", + "author": { + "role": "system", + "name": null, + "metadata": {} + }, + "create_time": null, + "update_time": null, + "content": { + "content_type": "text", + "parts": [""] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 0.0, + "metadata": {}, + "recipient": "all" + }, + "parent": "aaa1ab70-044a-4f2b-b4f7-74d4342f34da", + "children": ["aaa29ce8-0430-461e-9d42-74f750409cc7"] + }, + "aaa1ab70-044a-4f2b-b4f7-74d4342f34da": { + "id": "aaa1ab70-044a-4f2b-b4f7-74d4342f34da", + "message": null, + "parent": null, + "children": ["91c9fd24-0a12-41b8-b860-8f2cc237e838"] + }, + "aaa29ce8-0430-461e-9d42-74f750409cc7": { + "id": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "message": { + "id": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697499617.698544, + "update_time": null, + "content": { + "content_type": "text", + "parts": ["Hello, what is your name?"] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "91c9fd24-0a12-41b8-b860-8f2cc237e838", + "children": ["e47e5933-13f9-460b-b0f0-fc032e44ff64"] + }, + "e47e5933-13f9-460b-b0f0-fc032e44ff64": { + "id": "e47e5933-13f9-460b-b0f0-fc032e44ff64", + "message": { + "id": "e47e5933-13f9-460b-b0f0-fc032e44ff64", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697499623.489646, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?" + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [100260] }, - "e47e5933-13f9-460b-b0f0-fc032e44ff64": { - "id": "e47e5933-13f9-460b-b0f0-fc032e44ff64", - "message": { - "id": "e47e5933-13f9-460b-b0f0-fc032e44ff64", - "author": { - "role": "assistant", - "name": null, - "metadata": {} - }, - "create_time": 1697499623.489646, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "Hello! I'm just a computer program created by OpenAI, so I don't have a personal name. You can call me ChatGPT or simply ask me any questions or chat about topics you're interested in. How can I assist you today?" - ] - }, - "status": "finished_successfully", - "end_turn": true, - "weight": 1.0, - "metadata": { - "finish_details": { - "type": "stop", - "stop_tokens": [ - 100260 - ] - }, - "is_complete": true, - "message_type": null, - "model_slug": "text-davinci-002-render-sha", - "parent_id": "aaa29ce8-0430-461e-9d42-74f750409cc7", - "timestamp_": "absolute" - }, - "recipient": "all" - }, - "parent": "aaa29ce8-0430-461e-9d42-74f750409cc7", - "children": [] - } + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "timestamp_": "absolute" + }, + "recipient": "all" }, - "moderation_results": [], - "current_node": "e47e5933-13f9-460b-b0f0-fc032e44ff64", - "plugin_ids": null, - "conversation_id": "47867fd2-2a41-4102-80ef-101510becc08", - "conversation_template_id": null, - "id": "47867fd2-2a41-4102-80ef-101510becc08" + "parent": "aaa29ce8-0430-461e-9d42-74f750409cc7", + "children": [] + } }, - { - "title": "Example Usage 2", - "create_time": 1697238139.540548, - "update_time": 1697238574.062126, - "mapping": { - "f3602996-5cb6-4050-87d9-0001149ba7ee": { - "id": "f3602996-5cb6-4050-87d9-0001149ba7ee", - "message": { - "id": "f3602996-5cb6-4050-87d9-0001149ba7ee", - "author": { - "role": "system", - "name": null, - "metadata": {} - }, - "create_time": null, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "" - ] - }, - "status": "finished_successfully", - "end_turn": true, - "weight": 0.0, - "metadata": {}, - "recipient": "all" - }, - "parent": "aaa1590a-2f7f-410a-b12c-9119a3264ef9", - "children": [ - "aaa2fff4-49fb-40e5-9122-a2addfeedfc9" - ] - }, - "aaa1590a-2f7f-410a-b12c-9119a3264ef9": { - "id": "aaa1590a-2f7f-410a-b12c-9119a3264ef9", - "message": null, - "parent": null, - "children": [ - "f3602996-5cb6-4050-87d9-0001149ba7ee" - ] - }, - "aaa2fff4-49fb-40e5-9122-a2addfeedfc9": { - "id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", - "message": { - "id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", - "author": { - "role": "user", - "name": null, - "metadata": {} - }, - "create_time": 1697238139.541115, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "What should I do today?" - ] - }, - "status": "finished_successfully", - "end_turn": null, - "weight": 1.0, - "metadata": { - "timestamp_": "absolute", - "message_type": null - }, - "recipient": "all" - }, - "parent": "f3602996-5cb6-4050-87d9-0001149ba7ee", - "children": [ - "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8" - ] - }, - "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8": { - "id": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", - "message": { - "id": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", - "author": { - "role": "assistant", - "name": null, - "metadata": {} - }, - "create_time": 1697238147.462413, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "You should contribute to LangChain!" - ] - }, - "status": "finished_successfully", - "end_turn": true, - "weight": 1.0, - "metadata": { - "finish_details": { - "type": "stop", - "stop_tokens": [ - 100260 - ] - }, - "is_complete": true, - "message_type": null, - "model_slug": "text-davinci-002-render-sha", - "parent_id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", - "timestamp_": "absolute" - }, - "recipient": "all" - }, - "parent": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", - "children": [ - "aaa24ae6-3972-4094-8c84-3b31750bc212" - ] - }, - "aaa24ae6-3972-4094-8c84-3b31750bc212": { - "id": "aaa24ae6-3972-4094-8c84-3b31750bc212", - "message": { - "id": "aaa24ae6-3972-4094-8c84-3b31750bc212", - "author": { - "role": "user", - "name": null, - "metadata": {} - }, - "create_time": 1697238210.739114, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "How can I start?" - ] - }, - "status": "finished_successfully", - "end_turn": null, - "weight": 1.0, - "metadata": { - "timestamp_": "absolute", - "message_type": null - }, - "recipient": "all" - }, - "parent": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", - "children": [ - "713c8a38-0685-41f2-8a28-36c880d65480" - ] + "moderation_results": [], + "current_node": "e47e5933-13f9-460b-b0f0-fc032e44ff64", + "plugin_ids": null, + "conversation_id": "47867fd2-2a41-4102-80ef-101510becc08", + "conversation_template_id": null, + "id": "47867fd2-2a41-4102-80ef-101510becc08" + }, + { + "title": "Example Usage 2", + "create_time": 1697238139.540548, + "update_time": 1697238574.062126, + "mapping": { + "f3602996-5cb6-4050-87d9-0001149ba7ee": { + "id": "f3602996-5cb6-4050-87d9-0001149ba7ee", + "message": { + "id": "f3602996-5cb6-4050-87d9-0001149ba7ee", + "author": { + "role": "system", + "name": null, + "metadata": {} + }, + "create_time": null, + "update_time": null, + "content": { + "content_type": "text", + "parts": [""] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 0.0, + "metadata": {}, + "recipient": "all" + }, + "parent": "aaa1590a-2f7f-410a-b12c-9119a3264ef9", + "children": ["aaa2fff4-49fb-40e5-9122-a2addfeedfc9"] + }, + "aaa1590a-2f7f-410a-b12c-9119a3264ef9": { + "id": "aaa1590a-2f7f-410a-b12c-9119a3264ef9", + "message": null, + "parent": null, + "children": ["f3602996-5cb6-4050-87d9-0001149ba7ee"] + }, + "aaa2fff4-49fb-40e5-9122-a2addfeedfc9": { + "id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "message": { + "id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697238139.541115, + "update_time": null, + "content": { + "content_type": "text", + "parts": ["What should I do today?"] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "f3602996-5cb6-4050-87d9-0001149ba7ee", + "children": ["fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8"] + }, + "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8": { + "id": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", + "message": { + "id": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697238147.462413, + "update_time": null, + "content": { + "content_type": "text", + "parts": ["You should contribute to LangChain!"] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [100260] }, - "713c8a38-0685-41f2-8a28-36c880d65480": { - "id": "713c8a38-0685-41f2-8a28-36c880d65480", - "message": { - "id": "713c8a38-0685-41f2-8a28-36c880d65480", - "author": { - "role": "assistant", - "name": null, - "metadata": {} - }, - "create_time": 1697238218.628864, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file." - ] - }, - "status": "finished_successfully", - "end_turn": true, - "weight": 1.0, - "metadata": { - "finish_details": { - "type": "stop", - "stop_tokens": [ - 100260 - ] - }, - "is_complete": true, - "message_type": null, - "model_slug": "text-davinci-002-render-sha", - "parent_id": "aaa24ae6-3972-4094-8c84-3b31750bc212", - "timestamp_": "absolute" - }, - "recipient": "all" - }, - "parent": "aaa24ae6-3972-4094-8c84-3b31750bc212", - "children": [ - "aaa266ba-b466-4207-bfbc-19b2c1bb9b78" - ] + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "timestamp_": "absolute" + }, + "recipient": "all" + }, + "parent": "aaa2fff4-49fb-40e5-9122-a2addfeedfc9", + "children": ["aaa24ae6-3972-4094-8c84-3b31750bc212"] + }, + "aaa24ae6-3972-4094-8c84-3b31750bc212": { + "id": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "message": { + "id": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697238210.739114, + "update_time": null, + "content": { + "content_type": "text", + "parts": ["How can I start?"] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "fc75f9c1-d769-4d1b-a94f-0ebdf25af1f8", + "children": ["713c8a38-0685-41f2-8a28-36c880d65480"] + }, + "713c8a38-0685-41f2-8a28-36c880d65480": { + "id": "713c8a38-0685-41f2-8a28-36c880d65480", + "message": { + "id": "713c8a38-0685-41f2-8a28-36c880d65480", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697238218.628864, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "You can take a look at the current LangChain issues and see if you can contribute to any! Don't forget to read the contributing.md file." + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [100260] }, - "aaa266ba-b466-4207-bfbc-19b2c1bb9b78": { - "id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", - "message": { - "id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", - "author": { - "role": "user", - "name": null, - "metadata": {} - }, - "create_time": 1697238564.084255, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "Thank you!" - ] - }, - "status": "finished_successfully", - "end_turn": null, - "weight": 1.0, - "metadata": { - "timestamp_": "absolute", - "message_type": null - }, - "recipient": "all" - }, - "parent": "713c8a38-0685-41f2-8a28-36c880d65480", - "children": [ - "6ea89c28-95de-465d-9d25-a8fd67d4ba9c" - ] + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "timestamp_": "absolute" + }, + "recipient": "all" + }, + "parent": "aaa24ae6-3972-4094-8c84-3b31750bc212", + "children": ["aaa266ba-b466-4207-bfbc-19b2c1bb9b78"] + }, + "aaa266ba-b466-4207-bfbc-19b2c1bb9b78": { + "id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "message": { + "id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "author": { + "role": "user", + "name": null, + "metadata": {} + }, + "create_time": 1697238564.084255, + "update_time": null, + "content": { + "content_type": "text", + "parts": ["Thank you!"] + }, + "status": "finished_successfully", + "end_turn": null, + "weight": 1.0, + "metadata": { + "timestamp_": "absolute", + "message_type": null + }, + "recipient": "all" + }, + "parent": "713c8a38-0685-41f2-8a28-36c880d65480", + "children": ["6ea89c28-95de-465d-9d25-a8fd67d4ba9c"] + }, + "6ea89c28-95de-465d-9d25-a8fd67d4ba9c": { + "id": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", + "message": { + "id": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", + "author": { + "role": "assistant", + "name": null, + "metadata": {} + }, + "create_time": 1697238574.056793, + "update_time": null, + "content": { + "content_type": "text", + "parts": [ + "You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out." + ] + }, + "status": "finished_successfully", + "end_turn": true, + "weight": 1.0, + "metadata": { + "finish_details": { + "type": "stop", + "stop_tokens": [100260] }, - "6ea89c28-95de-465d-9d25-a8fd67d4ba9c": { - "id": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", - "message": { - "id": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", - "author": { - "role": "assistant", - "name": null, - "metadata": {} - }, - "create_time": 1697238574.056793, - "update_time": null, - "content": { - "content_type": "text", - "parts": [ - "You're welcome! If you have any more questions or need further assistance in the future, feel free to reach out." - ] - }, - "status": "finished_successfully", - "end_turn": true, - "weight": 1.0, - "metadata": { - "finish_details": { - "type": "stop", - "stop_tokens": [ - 100260 - ] - }, - "is_complete": true, - "message_type": null, - "model_slug": "text-davinci-002-render-sha", - "parent_id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", - "timestamp_": "absolute" - }, - "recipient": "all" - }, - "parent": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", - "children": [] - } + "is_complete": true, + "message_type": null, + "model_slug": "text-davinci-002-render-sha", + "parent_id": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "timestamp_": "absolute" + }, + "recipient": "all" }, - "moderation_results": [], - "current_node": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", - "plugin_ids": null, - "conversation_id": "588ae2e0-a336-475b-b4ef-1904078b8c24", - "conversation_template_id": null, - "id": "588ae2e0-a336-475b-b4ef-1904078b8c24" - } -] \ No newline at end of file + "parent": "aaa266ba-b466-4207-bfbc-19b2c1bb9b78", + "children": [] + } + }, + "moderation_results": [], + "current_node": "6ea89c28-95de-465d-9d25-a8fd67d4ba9c", + "plugin_ids": null, + "conversation_id": "588ae2e0-a336-475b-b4ef-1904078b8c24", + "conversation_template_id": null, + "id": "588ae2e0-a336-475b-b4ef-1904078b8c24" + } +] From 17e5e70fb53c7bfb0d19b350bc60ac6270217e34 Mon Sep 17 00:00:00 2001 From: Zeneos <95008961+Zeneos@users.noreply.github.com> Date: Tue, 28 Nov 2023 10:22:30 -0500 Subject: [PATCH 3/9] Update chatgpt.mdx --- .../integrations/document_loaders/file_loaders/chatgpt.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx index 6f06a94426cf..53a68c4dfc92 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx @@ -10,7 +10,7 @@ This example goes over how to load conversations.json from your ChatGPT data exp Example code: ```typescript -import { ChatGPTLoader } from "langchain/document_loaders/web/chatgpt"; +import { ChatGPTLoader } from "langchain/document_loaders/fs/chatgpt"; const loader = new ChatGPTLoader("./example_data/example_conversations.json") @@ -24,7 +24,7 @@ console.log(docs); Example code: ```typescript -import { ChatGPTLoader } from "langchain/document_loaders/web/chatgpt"; +import { ChatGPTLoader } from "langchain/document_loaders/fs/chatgpt"; const loader = new ChatGPTLoader("./example_data/example_conversations.json", 1) From deda86365da698a16b3f24882f7a37465582fe13 Mon Sep 17 00:00:00 2001 From: Zeneos Date: Thu, 30 Nov 2023 15:49:19 -0500 Subject: [PATCH 4/9] Update chatgpt.mdx --- .../integrations/document_loaders/file_loaders/chatgpt.mdx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx index 53a68c4dfc92..6ee8c66a0c01 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx @@ -2,9 +2,6 @@ This example goes over how to load conversations.json from your ChatGPT data export folder. You can get your data export by email by going to: ChatGPT -> (Profile) - Settings -> Export data -> Confirm export -> Check email. -``` -``` - ## Usage, extracting all logs Example code: @@ -31,4 +28,4 @@ const loader = new ChatGPTLoader("./example_data/example_conversations.json", 1) const docs = await loader.load(); console.log(docs); -``` \ No newline at end of file +``` From 209ef2a8c836617f6b1a5b130162a63803bdda0a Mon Sep 17 00:00:00 2001 From: Zeneos Date: Thu, 30 Nov 2023 16:58:53 -0500 Subject: [PATCH 5/9] Update chatgpt.ts --- langchain/src/document_loaders/fs/chatgpt.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/langchain/src/document_loaders/fs/chatgpt.ts b/langchain/src/document_loaders/fs/chatgpt.ts index adc9c266abeb..1ff6252fdfaa 100644 --- a/langchain/src/document_loaders/fs/chatgpt.ts +++ b/langchain/src/document_loaders/fs/chatgpt.ts @@ -40,7 +40,7 @@ function concatenateRows(message: ChatGPTMessage, title: string): string { export class ChatGPTLoader extends TextLoader { public numLogs: number; - constructor(filePathOrBlob: string | Blob, numLogs: number = 0) { + constructor(filePathOrBlob: string | Blob, numLogs = 0) { super(filePathOrBlob); this.numLogs = numLogs; } @@ -57,7 +57,7 @@ export class ChatGPTLoader extends TextLoader { const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data; return truncatedData.map((d: ChatGPTLog) => { - return Object.values(d.mapping) + Object.values(d.mapping) .filter( (msg, idx) => !(idx === 0 && msg.message.author.role === "system") ) From 0f1d27cde437e892c0143f50b14e04adb12d3ee4 Mon Sep 17 00:00:00 2001 From: Zeneos <95008961+Zeneos@users.noreply.github.com> Date: Thu, 30 Nov 2023 17:24:43 -0500 Subject: [PATCH 6/9] Throws errors also package.json and .gitignore update - console error logging also throws error instead of not doing that - put chatgpt.ts related files into package.json and .gitignore - ran `yarn lint` and `yarn format` many times to be sure --- langchain/.gitignore | 3 +++ langchain/package.json | 3 +++ langchain/src/document_loaders/fs/chatgpt.ts | 18 +++++++++--------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/langchain/.gitignore b/langchain/.gitignore index 093f76cd21eb..b5e15fe79612 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -463,6 +463,9 @@ document_loaders/fs/openai_whisper_audio.d.ts document_loaders/fs/pptx.cjs document_loaders/fs/pptx.js document_loaders/fs/pptx.d.ts +document_loaders/fs/chatgpt.cjs +document_loaders/fs/chatgpt.js +document_loaders/fs/chatgpt.d.ts document_transformers/html_to_text.cjs document_transformers/html_to_text.js document_transformers/html_to_text.d.ts diff --git a/langchain/package.json b/langchain/package.json index 662dbfd68cea..90ab62629cf6 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -475,6 +475,9 @@ "document_loaders/fs/pptx.cjs", "document_loaders/fs/pptx.js", "document_loaders/fs/pptx.d.ts", + "document_loaders/fs/chatgpt.cjs", + "document_loaders/fs/chatgpt.js", + "document_loaders/fs/chatgpt.d.ts", "document_transformers/html_to_text.cjs", "document_transformers/html_to_text.js", "document_transformers/html_to_text.d.ts", diff --git a/langchain/src/document_loaders/fs/chatgpt.ts b/langchain/src/document_loaders/fs/chatgpt.ts index 1ff6252fdfaa..8e8b6c749b86 100644 --- a/langchain/src/document_loaders/fs/chatgpt.ts +++ b/langchain/src/document_loaders/fs/chatgpt.ts @@ -50,20 +50,20 @@ export class ChatGPTLoader extends TextLoader { try { data = JSON.parse(raw); } catch (e) { - console.error("Failed to parse JSON:", e); - return []; + console.error(e); + throw new Error("Failed to parse JSON"); } const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data; - return truncatedData.map((d: ChatGPTLog) => { + return truncatedData.map((d: ChatGPTLog) => Object.values(d.mapping) .filter( (msg, idx) => !(idx === 0 && msg.message.author.role === "system") ) .map((msg) => concatenateRows(msg.message, d.title)) - .join(""); - }); + .join("") + ); } public async load(): Promise { @@ -74,16 +74,16 @@ export class ChatGPTLoader extends TextLoader { try { text = await readFile(this.filePathOrBlob, "utf8"); } catch (e) { - console.error("Failed to read file:", e); - return []; + console.error(e); + throw new Error("Failed to read file"); } metadata = { source: this.filePathOrBlob }; } else { try { text = await this.filePathOrBlob.text(); } catch (e) { - console.error("Failed to read blob:", e); - return []; + console.error(e); + throw new Error("Failed to read blob"); } metadata = { source: "blob", blobType: this.filePathOrBlob.type }; } From 9be677042c949a17eb264a90738335aeb64e9fcc Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 30 Nov 2023 14:37:03 -0800 Subject: [PATCH 7/9] Format --- .../integrations/document_loaders/file_loaders/chatgpt.mdx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx index 6ee8c66a0c01..120abd25aada 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx @@ -9,7 +9,7 @@ Example code: ```typescript import { ChatGPTLoader } from "langchain/document_loaders/fs/chatgpt"; -const loader = new ChatGPTLoader("./example_data/example_conversations.json") +const loader = new ChatGPTLoader("./example_data/example_conversations.json"); const docs = await loader.load(); @@ -23,7 +23,10 @@ Example code: ```typescript import { ChatGPTLoader } from "langchain/document_loaders/fs/chatgpt"; -const loader = new ChatGPTLoader("./example_data/example_conversations.json", 1) +const loader = new ChatGPTLoader( + "./example_data/example_conversations.json", + 1 +); const docs = await loader.load(); From be633bca3d661722506aedadb31c4ef825689a60 Mon Sep 17 00:00:00 2001 From: Zeneos <95008961+Zeneos@users.noreply.github.com> Date: Thu, 30 Nov 2023 17:41:18 -0500 Subject: [PATCH 8/9] whoops one more --- .../integrations/document_loaders/file_loaders/chatgpt.mdx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx index 6ee8c66a0c01..120abd25aada 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/chatgpt.mdx @@ -9,7 +9,7 @@ Example code: ```typescript import { ChatGPTLoader } from "langchain/document_loaders/fs/chatgpt"; -const loader = new ChatGPTLoader("./example_data/example_conversations.json") +const loader = new ChatGPTLoader("./example_data/example_conversations.json"); const docs = await loader.load(); @@ -23,7 +23,10 @@ Example code: ```typescript import { ChatGPTLoader } from "langchain/document_loaders/fs/chatgpt"; -const loader = new ChatGPTLoader("./example_data/example_conversations.json", 1) +const loader = new ChatGPTLoader( + "./example_data/example_conversations.json", + 1 +); const docs = await loader.load(); From 48b5903dbff02738035c677d64029322fe84c647 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 30 Nov 2023 14:46:39 -0800 Subject: [PATCH 9/9] Fix test --- langchain/src/document_loaders/tests/chatgpt-blob.test.ts | 4 ++-- langchain/src/document_loaders/tests/chatgpt.test.ts | 4 ++-- .../example_data/{ => chatgpt}/example_conversations.json | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename langchain/src/document_loaders/tests/example_data/{ => chatgpt}/example_conversations.json (100%) diff --git a/langchain/src/document_loaders/tests/chatgpt-blob.test.ts b/langchain/src/document_loaders/tests/chatgpt-blob.test.ts index 1b2081bbfc2a..b158f7807d3b 100644 --- a/langchain/src/document_loaders/tests/chatgpt-blob.test.ts +++ b/langchain/src/document_loaders/tests/chatgpt-blob.test.ts @@ -8,7 +8,7 @@ import { ChatGPTLoader } from "../fs/chatgpt.js"; test("Test ChatGPT loader from blob to load all documents", async () => { const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), - "./example_data/example_conversations.json" + "./example_data/chatgpt/example_conversations.json" ); const loader = new ChatGPTLoader( new Blob([await fs.readFile(filePath)], { type: "application/json" }) @@ -34,7 +34,7 @@ test("Test ChatGPT loader from blob to load all documents", async () => { test("Test ChatGPT loader from blob to only load 1 document", async () => { const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), - "./example_data/example_conversations.json" + "./example_data/chatgpt/example_conversations.json" ); const loader = new ChatGPTLoader( new Blob([await fs.readFile(filePath)], { type: "application/json" }), diff --git a/langchain/src/document_loaders/tests/chatgpt.test.ts b/langchain/src/document_loaders/tests/chatgpt.test.ts index 77d410ece14d..7e1d68f189f6 100644 --- a/langchain/src/document_loaders/tests/chatgpt.test.ts +++ b/langchain/src/document_loaders/tests/chatgpt.test.ts @@ -7,7 +7,7 @@ import { ChatGPTLoader } from "../fs/chatgpt.js"; test("Test ChatGPT loader to load all documents", async () => { const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), - "./example_data/example_conversations.json" + "./example_data/chatgpt/example_conversations.json" ); const loader = new ChatGPTLoader(filePath); const docs = await loader.load(); @@ -31,7 +31,7 @@ test("Test ChatGPT loader to load all documents", async () => { test("Test ChatGPT loader to only load 1 document", async () => { const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), - "./example_data/example_conversations.json" + "./example_data/chatgpt/example_conversations.json" ); const loader = new ChatGPTLoader(filePath, 1); const docs = await loader.load(); diff --git a/langchain/src/document_loaders/tests/example_data/example_conversations.json b/langchain/src/document_loaders/tests/example_data/chatgpt/example_conversations.json similarity index 100% rename from langchain/src/document_loaders/tests/example_data/example_conversations.json rename to langchain/src/document_loaders/tests/example_data/chatgpt/example_conversations.json