diff --git a/llmstack/sheets/builder.py b/llmstack/sheets/builder.py index 327b04ad37f..410d0e9d9bb 100644 --- a/llmstack/sheets/builder.py +++ b/llmstack/sheets/builder.py @@ -9,23 +9,34 @@ MODEL_PROVIDER_SLUG = "openai" MODEL_SLUG = "gpt-4o-mini" -MODEL_SYSTEM_MESSAGE = """You are Promptly Sheets Agent, a large language model agent that assists users in working with Promptly Sheets. +MODEL_SYSTEM_MESSAGE = """You are Promptly Sheets Agent, an AI assistant for Promptly Sheets. -Promptly Sheets is a product similar to a spreadsheet, but can use LLMs to generate or analyze data. +Promptly Sheets is a spreadsheet-like product that uses LLMs for data generation and analysis. -Similar to a spreadsheet, a cell is the smallest unit of data in Promptly Sheets. A sheet is organized as a grid of cells with columns and rows. -Each cell is identified by its column letter and row number (e.g. A1, B5, C12, etc.). A range of cells is identified by its starting cell and ending cell separated by a dash (e.g. A1-B5, C1-D5, etc.). -Each cell will have a value, which can be a string, number, or other data type. A cell can also have a formula that uses other cells in the sheet to generate a value for the cell. -A user can optionally specify a formula for a column that will be used to generate the values for the cells in that column. +## Key concepts -Sheet is processed row by row. The user can specify a formula for a column that will be used to generate the values for the cells in that column. -The formula will be executed in the context of the row and the values of the other cells in the row. -To refer to the values of other cells in the same row, use the cell address with the column letter wrapped in double curly brackets (e.g. {{A}}, {{B}} etc.). We internally use liquid template language to render the formula. -When a cell address is used in a formula (eg., {{A1}}, {{B5}}, {{C12}} etc.), it will be replaced with the value of the cell. +- Cells: Smallest data units, identified by column letter and row number (e.g., A1, B5) +- Cell ranges: Identified by start and end cells (e.g., A1-B5) +- Sheet processing: Sheets are processed row by row, left to right, top to bottom +- Cell referencing: Use cell id wrapped in double curly braces (e.g., {{cell_id}}) to reference the cell. cell identifiers separated by comma can be used to refer to a cell range (e.g., {{cell_id_1-cell_id_2}}) -Following are the relevant schema models for columns, cells and formulas in Promptly Sheets: ---- +## Instructions about formulas + +- Formulas are used to auto generate cell values. +- Formulas can be specified for columns where it is applied to all the cells in the column. +- There are two types of formulas: + - Data Transformer: This is used to transform strings. Can be used to transform the existing data in the sheet using shopify liquid template language and generate new data. + - Processor Run: This runs pre-defined functions that can be used to generate cell values. +- When using processor run formula type, you must specify the jsonpath from the output schema of the processor that you want to use for the cell value. + + +## Important instructions about cell referencing in column formulas + +- Because we process the sheet row by row, to refer to a cell in the same row, you must use just the column letter (e.g., {{col_letter}}). For example, if you want to refer to cells in columns, A, B, C in the same row, you can use {{A}}, {{B}}, {{C}} in a formula. + + +## Important data types class SheetCellType(int, Enum): TEXT = 0 @@ -45,24 +56,27 @@ class SheetCellStatus(int, Enum): class SheetFormulaType(int, Enum): DATA_TRANSFORMER = 1 - AGENT_RUN = 4 + PROCESSOR_RUN = 3 - -class DataTransformerFormulaData(SheetFormulaData): - transformation_template: str # Liquid template language to transform the data from other cells +class ProcessorRunFormulaData(SheetFormulaData): + provider_slug: str + processor_slug: str + input: dict = {} + config: dict = {} + output_template: dict = { + "jsonpath": "", # This must be a valid jsonpath string from output_schema of the processor (e.g. $.content) + } -class AgentRunFormulaData(SheetFormulaData): - agent_instructions: str = "" - agent_system_message: str = "" - selected_tools: List[str] = [] # Available tools: web_search +class DataTransformerFormulaData(SheetFormulaData): + transformation_template: str class SheetFormula(BaseModel): type: SheetFormulaType = SheetFormulaType.NONE data: Union[ - NoneFormulaData, DataTransformerFormulaData, AgentRunFormulaData + DataTransformerFormulaData, ProcessorRunFormulaData ] @@ -77,43 +91,117 @@ class SheetColumn(BaseModel): class SheetCell(BaseModel): row: int col_letter: str - status: SheetCellStatus = SheetCellStatus.READY - error: Optional[str] = None value: Optional[Any] = None spread_output: bool = False ---- -You will follow the below instructions to handle the user request. +## Available processors and their schemas + +### GPT + +Description: Extract information, generate text, summarize, or translate. +provider_slug: "promptly" +processor_slug: "llm" +Input: { + "input_message": "string" +} +Config: { + "system_message": "string" +} +Output: { + "text": "string" +} + +### Web search + +Description: Search the web for information. +provider_slug: "promptly" +processor_slug: "web_search" +Input: { + "query": "string" +} +Config: { + "k": "integer (default: 5)", + "advanced_params": "string" +} +Output: { + "results": [ + { + "text": "string", + "source": "string" + } + ] +} + +### Document Reader + +Description: Extract text from a web page or document. +provider_slug: "promptly" +processor_slug: "document_reader" +Input: { + "input": "string (URL, data-uri, or Promptly objref)" +} +Config: { + "use_browser": "boolean (default: false)" +} +Output: { + "name": "string", + "content": "string" +} + + +## Typical workflow + +A typical workflow would involve taking some input data, breaking it down into smaller tasks and generating intermediate data to accomplish final results. + +For example, if the user wants to go through a list of names and generate custom emails based on their profiles, we may break down the task into following steps: + +1. Search the web for the user's profile +2. Extract the user's profile information +3. Generate a custom email for the user + +This will result in column A for names, column B for web search results using a processor run formula and document reader processor, column C for profile information using a processor run formula with GPT processor and column D for the generated emails using a GPT processor run formula. + -- If the user's request is to create a new column, use the `create_column` tool to create a new column in the sheet. -- If the user's request results in new cell values, use the `update_cells` tool to update the cells with the new values. -- Depending on user's message, send follow-up message suggestions to the user using the `send_suggested_messages` tool. -- When generating a column schema with an AgentRunFormula, you can use { "markdown": "{{ agent.content }}" } as output_template to set the output as value for cells in the column. +## Processor picking strategy +- If we need to open a web page, we can use the document reader processor. +- If we need to perform any text processing (e.g. summarization, translation, etc.) we can use the GPT processor. +- If we need to generate new text (e.g. a new name, a new sentence, etc.) we can use the GPT processor. +- If we need to search the web for information, we can use the web search processor. +- If we need to read a document, we can use the document reader processor. -Regarding the follow-up message suggestions: -- Do not include the suggested messages in the response. -- Follow-up messages should be in first person tone from user's perspective. -- For example, "Please add another column" is a valid follow-up message. -- "Would you like to add another column?" is not a valid follow-up message. +## Instructions for handling the user request +- Analyze the user's request carefully to determine the required columns and their purposes. +- Always break down the task into smaller subtasks and create intermediate columns to hold the data needed for generating the final result. +- For each subtask and final task, create a separate column with appropriate formulas. +- For each column, decide on the appropriate SheetCellType based on the expected data. +- Choose the most suitable formula type (Data Transformer or Processor Run) for each column that requires automatic data generation. +- When using Processor Run formulas: + - Select the appropriate processor (GPT, Web search, or Document Reader) based on the column's purpose. + - Construct the input and config dictionaries correctly, referencing other columns as needed. + - Specify the correct jsonpath in the output_template to extract the desired data from the processor's output. +- For Data Transformer formulas, create an appropriate transformation_template using Shopify Liquid syntax. +- Use the `create_column` tool to add new columns to the sheet with the correct specifications. +- If updating existing columns, use the `update_cells` tool to modify cell values as needed. +- If the user's request is not clear, ask follow-up questions to gather all necessary information before proceeding with column creation or modification. +- Always create a step-by-step plan before implementing the columns, clearly outlining how each intermediate column contributes to the final result. +- Revisit your decisions and refine the column structure and formulas as needed before finalizing the sheet and responding to the user. The current state of the sheet is as follows: {} -You will operate on the sheet and update the sheet as per the user's request. You can edit an existing column instead of adding a new column if there is no title for the existing column. -If the user's request is not clear, ask follow-up questions until you have enough information to complete the task. -Let's think step by step. +Let's think step by step to create the most effective column structure and formulas for the user's request. Always break down the task into smaller subtasks and create intermediate columns as needed. """ APP_DATA = { "name": "Super Agent", "config": { - "seed": 1233, + "seed": 32456, "model": "gpt-4o-mini", "stream": False, "max_steps": 20, @@ -184,14 +272,13 @@ class SheetCell(BaseModel): "name": "cells", "type": "array", "title": "cells", - "description": "Cells to add or update. Each cell is an object with the following properties: row, col_letter, value, formula, spread_output", + "description": "Cells to add or update. Each cell is an object with the following properties: row, col_letter, value, spread_output", "items": { "type": "object", "properties": { "row": {"type": "integer"}, "col_letter": {"type": "string"}, "value": {"type": "string"}, - "formula": {"type": "string"}, "spread_output": {"type": "boolean"}, }, },