From 0143054a496df3c6c6a24b7edfe90600dcc72718 Mon Sep 17 00:00:00 2001 From: Mark Sturdevant Date: Fri, 13 Dec 2024 15:34:58 -0800 Subject: [PATCH] feat!: use python code tool over http * Modify the existing PythonTool over gRPC to go over HTTP. * Add tests (written for gRPC but adapted for the new one). * Requires CODE_INTERPRETER_URL env var to point to exposed HTTP port (50081). BREAKING CHANGE: Requires exposed port and updated CODE_INTERPRETER_URL. Signed-off-by: Mark Sturdevant --- .../{python_http.test.ts => python.test.ts} | 12 +- src/tools/python/python.ts | 70 +++--- src/tools/python/python_http.ts | 225 ------------------ .../{python_http.test.ts => python.test.ts} | 4 +- 4 files changed, 49 insertions(+), 262 deletions(-) rename src/tools/python/{python_http.test.ts => python.test.ts} (88%) delete mode 100644 src/tools/python/python_http.ts rename tests/e2e/tools/python/{python_http.test.ts => python.test.ts} (95%) diff --git a/src/tools/python/python_http.test.ts b/src/tools/python/python.test.ts similarity index 88% rename from src/tools/python/python_http.test.ts rename to src/tools/python/python.test.ts index 3d64086d..05b9c4c9 100644 --- a/src/tools/python/python_http.test.ts +++ b/src/tools/python/python.test.ts @@ -15,14 +15,14 @@ */ import { describe, it, expect } from "vitest"; -import { PythonHttpTool } from "@/tools/python/python_http.js"; +import { PythonTool } from "@/tools/python/python.js"; import { verifyDeserialization } from "@tests/e2e/utils.js"; import { LocalPythonStorage } from "@/tools/python/storage.js"; const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL || "http://localhost:50081"; const getPythonTool = () => - new PythonHttpTool({ + new PythonTool({ codeInterpreter: { url: codeInterpreterUrl }, storage: new LocalPythonStorage({ interpreterWorkingDir: "/tmp/code-interpreter-storage", @@ -30,11 +30,11 @@ const getPythonTool = () => }), }); -describe("PythonHttpTool", () => { +describe("PythonTool", () => { it("Is the expected tool", () => { const tool = getPythonTool(); - expect(tool).toBeInstanceOf(PythonHttpTool); - expect(PythonHttpTool.isTool(tool)).toBe(true); + expect(tool).toBeInstanceOf(PythonTool); + expect(PythonTool.isTool(tool)).toBe(true); expect(tool.name).toBe("Python"); expect(tool.description).toMatch("Run Python and/or shell code"); }); @@ -69,7 +69,7 @@ describe("PythonHttpTool", () => { it("serializes", async () => { const tool = getPythonTool(); const serialized = tool.serialize(); - const deserializedTool = PythonHttpTool.fromSerialized(serialized); + const deserializedTool = PythonTool.fromSerialized(serialized); verifyDeserialization(tool, deserializedTool); }); }); diff --git a/src/tools/python/python.ts b/src/tools/python/python.ts index 2969e160..d8cc16dd 100644 --- a/src/tools/python/python.ts +++ b/src/tools/python/python.ts @@ -14,10 +14,14 @@ * limitations under the License. */ -import { BaseToolOptions, BaseToolRunOptions, ToolEmitter, Tool, ToolInput } from "@/tools/base.js"; -import { createGrpcTransport } from "@connectrpc/connect-node"; -import { PromiseClient, createPromiseClient } from "@connectrpc/connect"; -import { CodeInterpreterService } from "bee-proto/code_interpreter/v1/code_interpreter_service_connect"; +import { + BaseToolOptions, + BaseToolRunOptions, + ToolEmitter, + Tool, + ToolError, + ToolInput, +} from "@/tools/base.js"; import { z } from "zod"; import { BaseLLMOutput } from "@/llms/base.js"; import { LLM } from "@/llms/llm.js"; @@ -95,7 +99,6 @@ export class PythonTool extends Tool { }); } - protected readonly client: PromiseClient; protected readonly preprocess; public constructor(options: PythonToolOptions) { @@ -109,7 +112,6 @@ export class PythonTool extends Tool { }, ]); } - this.client = this._createClient(); this.preprocess = options.preprocess; this.storage = options.storage; } @@ -118,17 +120,6 @@ export class PythonTool extends Tool { this.register(); } - protected _createClient(): PromiseClient { - return createPromiseClient( - CodeInterpreterService, - createGrpcTransport({ - baseUrl: this.options.codeInterpreter.url, - httpVersion: "2", - nodeOptions: this.options.codeInterpreter.connectionOptions, - }), - ); - } - protected async _run( input: ToolInput, _options: Partial, @@ -156,21 +147,42 @@ export class PythonTool extends Tool { const prefix = "/workspace/"; - const result = await this.client.execute( - { - sourceCode: await getSourceCode(), - executorId: this.options.executorId ?? "default", - files: Object.fromEntries( - inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), - ), - }, - { signal: run.signal }, - ); + let response; + const httpUrl = this.options.codeInterpreter.url + "/v1/execute"; + try { + response = await fetch(httpUrl, { + method: "POST", + headers: { + "Accept": "application/json", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + source_code: await getSourceCode(), + executorId: this.options.executorId ?? "default", + files: Object.fromEntries( + inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), + ), + }), + }); + } catch (error) { + if (error.cause.name == "HTTPParserError") { + throw new ToolError("Python tool over HTTP failed -- not using HTTP endpoint!", [error]); + } else { + throw new ToolError("Python tool over HTTP failed!", [error]); + } + } + + if (!response?.ok) { + throw new ToolError("HTTP request failed!", [new Error(await response.text())]); + } + + const result = await response.json(); // replace absolute paths in "files" with relative paths by removing "/workspace/" // skip files that are not in "/workspace" // skip entries that are also entries in filesInput const filesOutput = await this.storage.download( + // @ts-ignore Object.entries(result.files) .map(([k, v]) => { const file = { path: k, pythonId: v }; @@ -194,12 +206,13 @@ export class PythonTool extends Tool { }) .filter(isTruthy), ); - return new PythonToolOutput(result.stdout, result.stderr, result.exitCode, filesOutput); + return new PythonToolOutput(result.stdout, result.stderr, result.exit_code, filesOutput); } createSnapshot() { return { ...super.createSnapshot(), + files: this.files, storage: this.storage, preprocess: this.preprocess, }; @@ -207,6 +220,5 @@ export class PythonTool extends Tool { loadSnapshot(snapshot: ReturnType): void { super.loadSnapshot(snapshot); - Object.assign(this, { client: this._createClient() }); } } diff --git a/src/tools/python/python_http.ts b/src/tools/python/python_http.ts deleted file mode 100644 index 7dee4329..00000000 --- a/src/tools/python/python_http.ts +++ /dev/null @@ -1,225 +0,0 @@ -/** - * Copyright 2024 IBM Corp. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import { - BaseToolOptions, - BaseToolRunOptions, - ToolEmitter, - Tool, - ToolError, - ToolInput, -} from "@/tools/base.js"; -import { z } from "zod"; -import { BaseLLMOutput } from "@/llms/base.js"; -import { LLM } from "@/llms/llm.js"; -import { PromptTemplate } from "@/template.js"; -import { filter, isIncludedIn, isTruthy, map, pipe, unique, uniqueBy } from "remeda"; -import { PythonFile, PythonStorage } from "@/tools/python/storage.js"; -import { PythonToolOutput } from "@/tools/python/output.js"; -import { ValidationError } from "ajv"; -import { ConnectionOptions } from "node:tls"; -import { RunContext } from "@/context.js"; -import { hasMinLength } from "@/internals/helpers/array.js"; -import { Emitter } from "@/emitter/emitter.js"; - -export interface CodeInterpreterOptions { - url: string; - connectionOptions?: ConnectionOptions; -} - -export interface PythonToolOptions extends BaseToolOptions { - codeInterpreter: CodeInterpreterOptions; - executorId?: string; - preprocess?: { - llm: LLM; - promptTemplate: PromptTemplate.infer<{ input: string }>; - }; - storage: PythonStorage; -} - -export class PythonHttpTool extends Tool { - name = "Python"; - description = [ - "Run Python and/or shell code and return the console output. Use for isolated calculations, computations, data or file manipulation but still prefer assistant's capabilities (IMPORTANT: Do not use for text analysis or summarization).", - "Files provided by the user, or created in a previous run, will be accessible if and only if they are specified in the input. It is necessary to always print() results.", - "The following shell commands are available:", - "Use ffmpeg to convert videos.", - "Use yt-dlp to download videos, and unless specified otherwise use `-S vcodec:h264,res,acodec:m4a` for video and `-x --audio-format mp3` for audio-only.", - "Use pandoc to convert documents between formats (like MD, DOC, DOCX, PDF) -- and don't forget that you can create PDFs by writing markdown and then converting.", - "In Python, the following modules are available:", - "Use numpy, pandas, scipy and sympy for working with data.", - "Use matplotlib to plot charts.", - "Use pillow (import PIL) to create and manipulate images.", - "Use moviepy for complex manipulations with videos.", - "Use PyPDF2, pikepdf, or fitz to manipulate PDFs.", - "Use pdf2image to convert PDF to images.", - "Other Python libraries are also available -- however, prefer using the ones above.", - "Prefer using qualified imports -- `import library; library.thing()` instead of `import thing from library`.", - "Do not attempt to install libraries manually -- it will not work.", - "Each invocation of Python runs in a completely fresh VM -- it will not remember anything from before.", - "Do not use this tool multiple times in a row, always write the full code you want to run in a single invocation.", - ].join(" "); - - public readonly storage: PythonStorage; - protected files: PythonFile[] = []; - - public readonly emitter: ToolEmitter, PythonToolOutput> = Emitter.root.child({ - namespace: ["tool", "python"], - creator: this, - }); - - async inputSchema() { - this.files = await this.storage.list(); - const fileNames = unique(map(this.files, ({ filename }) => filename)); - return z.object({ - language: z.enum(["python", "shell"]).describe("Use shell for ffmpeg, pandoc, yt-dlp"), - code: z.string().describe("full source code file that will be executed"), - ...(hasMinLength(fileNames, 1) - ? { - inputFiles: z - .array(z.enum(fileNames)) - .describe( - "To access an existing file, you must specify it; otherwise, the file will not be accessible. IMPORTANT: If the file is not provided in the input, it will not be accessible.", - ), - } - : {}), - }); - } - - protected readonly preprocess; - - public constructor(options: PythonToolOptions) { - super(options); - if (!options.codeInterpreter.url) { - throw new ValidationError([ - { - message: "Property must be a valid URL!", - data: options, - propertyName: "codeInterpreter.url", - }, - ]); - } - this.preprocess = options.preprocess; - this.storage = options.storage; - } - - static { - this.register(); - } - - protected async _run( - input: ToolInput, - _options: Partial, - run: RunContext, - ) { - const inputFiles = await pipe( - this.files ?? (await this.storage.list()), - uniqueBy((f) => f.filename), - filter((file) => isIncludedIn(file.filename, (input.inputFiles ?? []) as string[])), - (files) => this.storage.upload(files), - ); - - // replace relative paths in "files" with absolute paths by prepending "/workspace" - const getSourceCode = async () => { - if (this.preprocess) { - const { llm, promptTemplate } = this.preprocess; - const response = await llm.generate(promptTemplate.render({ input: input.code }), { - signal: run.signal, - stream: false, - }); - return response.getTextContent().trim(); - } - return input.code; - }; - - const prefix = "/workspace/"; - - let response; - const httpUrl = this.options.codeInterpreter.url + "/v1/execute"; - try { - response = await fetch(httpUrl, { - method: "POST", - headers: { - "Accept": "application/json", - "Content-Type": "application/json", - }, - body: JSON.stringify({ - source_code: await getSourceCode(), - executorId: this.options.executorId ?? "default", - files: Object.fromEntries( - inputFiles.map((file) => [`${prefix}${file.filename}`, file.pythonId]), - ), - }), - }); - } catch (error) { - if (error.cause.name == "HTTPParserError") { - throw new ToolError("Python tool over HTTP failed -- not using HTTP endpoint!", [error]); - } else { - throw new ToolError("Python tool over HTTP failed!", [error]); - } - } - - if (!response?.ok) { - throw new ToolError("HTTP request failed!", [new Error(await response.text())]); - } - - const result = await response.json(); - - // replace absolute paths in "files" with relative paths by removing "/workspace/" - // skip files that are not in "/workspace" - // skip entries that are also entries in filesInput - const filesOutput = await this.storage.download( - // @ts-ignore - Object.entries(result.files) - .map(([k, v]) => { - const file = { path: k, pythonId: v }; - if (!file.path.startsWith(prefix)) { - return; - } - - const filename = file.path.slice(prefix.length); - if ( - inputFiles.some( - (input) => input.filename === filename && input.pythonId === file.pythonId, - ) - ) { - return; - } - - return { - pythonId: file.pythonId, - filename, - }; - }) - .filter(isTruthy), - ); - - return new PythonToolOutput(result.stdout, result.stderr, result.exit_code, filesOutput); - } - - createSnapshot() { - return { - ...super.createSnapshot(), - files: this.files, - storage: this.storage, - preprocess: this.preprocess, - }; - } - - loadSnapshot(snapshot: ReturnType): void { - super.loadSnapshot(snapshot); - } -} diff --git a/tests/e2e/tools/python/python_http.test.ts b/tests/e2e/tools/python/python.test.ts similarity index 95% rename from tests/e2e/tools/python/python_http.test.ts rename to tests/e2e/tools/python/python.test.ts index f18eef4f..838dc0da 100644 --- a/tests/e2e/tools/python/python_http.test.ts +++ b/tests/e2e/tools/python/python.test.ts @@ -15,7 +15,7 @@ */ import { expect } from "vitest"; -import { PythonHttpTool } from "@/tools/python/python_http.js"; +import { PythonTool } from "@/tools/python/python.js"; import { LocalPythonStorage } from "@/tools/python/storage.js"; import { ToolError } from "@/tools/base.js"; @@ -23,7 +23,7 @@ import { ToolError } from "@/tools/base.js"; const codeInterpreterUrl = process.env.CODE_INTERPRETER_URL || "http://localhost:50081"; const getPythonTool = () => - new PythonHttpTool({ + new PythonTool({ codeInterpreter: { url: codeInterpreterUrl }, storage: new LocalPythonStorage({ interpreterWorkingDir: "/tmp/code-interpreter-storage",