From 51014eff7436a79084a68f586f7950702e309de3 Mon Sep 17 00:00:00 2001 From: shubhra Date: Thu, 30 May 2024 23:41:09 +0530 Subject: [PATCH 01/11] chan --- composio/local_tools/tool.py | 1 + 1 file changed, 1 insertion(+) diff --git a/composio/local_tools/tool.py b/composio/local_tools/tool.py index 408abe813b..48bdf075eb 100644 --- a/composio/local_tools/tool.py +++ b/composio/local_tools/tool.py @@ -20,6 +20,7 @@ "RunCommandOnWorkspace": True, "GetCurrentDirCmd": True, "GithubCloneCmd": True, + "GetWorkspaceHistory": True, } From 190461c60cfa7fb97342d9df3e9aa4d898d6b3be Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 10:27:28 +0530 Subject: [PATCH 02/11] adding benchmark code --- .../commons/history_processor.py | 30 +++++++++++ examples/benchmark/evaluation.py | 51 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 examples/benchmark/evaluation.py diff --git a/composio/local_tools/local_workspace/commons/history_processor.py b/composio/local_tools/local_workspace/commons/history_processor.py index 2bcf0c1d3a..e184defc31 100644 --- a/composio/local_tools/local_workspace/commons/history_processor.py +++ b/composio/local_tools/local_workspace/commons/history_processor.py @@ -1,15 +1,31 @@ +import os from collections import defaultdict from functools import wraps +import json +from datetime import datetime +from pathlib import Path from composio.local_tools.local_workspace.commons.get_logger import get_logger logger = get_logger() +script_path = Path(__file__) +script_dir = script_path.parent +submit_logs_dir = script_dir / Path("../../../examples/swe/submit_logs/") class HistoryProcessor: def __init__(self): self.history = defaultdict(list) + # make submit_path directory + try: + date_time_folder = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + base_dir = script_dir / Path(date_time_folder) + if not os.path.exists(base_dir): + os.makedirs(base_dir) + self.base_dir = base_dir + except Exception as e: + raise Exception("error in making submit-path directory") from e def log_command(self, workspace_id, command, output, state): entry = {"command": command, "output": output, "state": state} @@ -19,15 +35,24 @@ def get_history(self, workspace_id, n=5): all_history = self.history.get(workspace_id, []) return all_history[-n:] + def save_history_to_file(self, workspace_id: str, instance_id: str): + # Define the file path using instance-id and ensure it's unique per workspace + file_path = self.base_dir / Path(f"{workspace_id}_instance_{instance_id}.json") + history_logs = self.history.get(workspace_id, []) + with open(file_path, 'w') as file: + json.dump(history_logs, file) + def history_recorder(): def decorator(func): @wraps(func) def wrapper(self, *args, **kwargs): output, return_code = func(self, *args, **kwargs) + is_submit_command = False if hasattr(self, "history_processor") and hasattr(self, "workspace_id"): command = "" if hasattr(self, "command"): + is_submit_command = "submit" in self.command command = self.command + " " + args[0].json() else: logger.error( @@ -39,6 +64,11 @@ def wrapper(self, *args, **kwargs): self.history_processor.log_command( self.workspace_id, command, output, state ) + + # save history to file-path once submit command is submitted + if is_submit_command: + self.history_processor.save_history_to_file(self.workspace_id, self.instance_id) + return output, return_code return wrapper diff --git a/examples/benchmark/evaluation.py b/examples/benchmark/evaluation.py new file mode 100644 index 0000000000..4abeed0a30 --- /dev/null +++ b/examples/benchmark/evaluation.py @@ -0,0 +1,51 @@ +import os +import json +from datetime import datetime, timedelta + + +def evaluate_accuracy_and_check_files(base_path="submit_logs", days_back=1): + # Calculate the starting point for checking directories + start_date = datetime.now() - timedelta(days=days_back) + start_folder = start_date.strftime("%Y-%m-%d_%H-%M-%S") + + successful_submissions = 0 + total_submissions = 0 + patch_files_found = 0 + + # Walk through the base directory + for root, dirs, files in os.walk(base_path): + # Check if the directory is after the start date + dir_name = os.path.basename(root) + if dir_name >= start_folder: + for file in files: + if file.endswith('.json'): + file_path = os.path.join(root, file) + with open(file_path, 'r') as f: + data = json.load(f) + # Assuming that success is defined by some condition in the output + if "success" in data["output"]: + successful_submissions += 1 + total_submissions += 1 + # Check for patch files in the same directory + patch_files = [f for f in os.listdir(root) if f.endswith('.patch')] + patch_files_found += len(patch_files) + + # Calculate accuracy + accuracy = successful_submissions / total_submissions if total_submissions > 0 else 0 + + # Output results + return { + "accuracy": accuracy, + "total_submissions": total_submissions, + "successful_submissions": successful_submissions, + "patch_files_found": patch_files_found + } + + +if __name__ == "__main__": + results = evaluate_accuracy_and_check_files() + print("Evaluation Results:") + print(f"Accuracy: {results['accuracy']:.2f}") + print(f"Total Submissions: {results['total_submissions']}") + print(f"Successful Submissions: {results['successful_submissions']}") + print(f"Patch Files Found: {results['patch_files_found']}") From 96f59abf0e68efe90cf9102494bfee82f1356ec0 Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 11:07:32 +0530 Subject: [PATCH 03/11] add requirements.txt --- examples/benchmark/Readme.md | 1 + examples/benchmark/prepare_data.py | 23 ++++++++++ examples/requirements.txt | 2 + examples/swe/task_flake8.yaml | 72 ++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 examples/benchmark/Readme.md create mode 100644 examples/benchmark/prepare_data.py create mode 100644 examples/requirements.txt create mode 100644 examples/swe/task_flake8.yaml diff --git a/examples/benchmark/Readme.md b/examples/benchmark/Readme.md new file mode 100644 index 0000000000..3ae617ecec --- /dev/null +++ b/examples/benchmark/Readme.md @@ -0,0 +1 @@ +1. git clone \ No newline at end of file diff --git a/examples/benchmark/prepare_data.py b/examples/benchmark/prepare_data.py new file mode 100644 index 0000000000..faf8bd2fe2 --- /dev/null +++ b/examples/benchmark/prepare_data.py @@ -0,0 +1,23 @@ +from datasets import load_dataset + +''' +huggingface dataset download : +- dataset link: https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite +''' + + +def filter_short_problem_statements(instance): + return len(instance["problem_statement"].split()) > 40 + + +def main(): + # Load the SWE-bench dataset + dev_dataset = load_dataset("princeton-nlp/SWE-bench", split="dev") + test_dataset = load_dataset("princeton-nlp/SWE-bench", split="test") + + # Display the first few entries + print(test_dataset[:5]) + + +if __name__ == "__main__": + main() diff --git a/examples/requirements.txt b/examples/requirements.txt new file mode 100644 index 0000000000..050869f384 --- /dev/null +++ b/examples/requirements.txt @@ -0,0 +1,2 @@ +datasets==2.19.1 +crewai==0.30.11 \ No newline at end of file diff --git a/examples/swe/task_flake8.yaml b/examples/swe/task_flake8.yaml new file mode 100644 index 0000000000..35971eb260 --- /dev/null +++ b/examples/swe/task_flake8.yaml @@ -0,0 +1,72 @@ +role: You are an autonomous programmer, and you're working directly in the docker container with a command line interface. +goal: fix the given issue / bug in the code +backstory: |- + You are an autonomous programmer, your task is to solve the issue given in task with the tools in hand + + The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. Before running a command, + use tools to get history of the workspace. Include this history as well when you are making the next decision. + In addition to typical shell commands, these are shell commands available in workspace, + so you need to use given tools to run these special commands. + + For any other commands that you want to run, cmdmanagertool_runcommandonworkspace to run the command. Few examples: + 1 - If you want to run python script, use this tool to run the python script. *NOTE* : while running a script, give complete path of the script. + 2 - Or if you want to `ls -a` use this tool to run the command. + + Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. + If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. + + You need to format your output using two fields; discussion and command. + Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example: + DISCUSSION + First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. + ``` + ls -a + ``` + + You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. + If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. + You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. + However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. + Never issue a find command against "/" directory. It will not work. Always try to find files within the base directory given in the task. + +description: |- + We're currently solving the following issue within our repository. Here's the issue text: + ISSUE: + in the code, lots of pylint errors are coming. You can regenerate pylint errors by issuing command + `tox -e pylint` + all these need to be fixed. + + + INSTRUCTIONS: + Now, you're going to solve this issue on your own. + Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`. + + NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! + + *Base_dir* : /composio/ + you need to focus on finding files in the base directory. Issue related files will only be in the base_dir directory. + + IMPORTANT TIPS: + 1. Always start by initializing the workspace. Workspace is a docker-container with docker image_name='sweagent/swe-agent:latest'. + 2. Use the tools to check status of workspace is running or not. + 3. Use the workspace_id returned from workspace initialization to use tools to run command on that docker container. + 4. Once you have setup the work-space, go to the directory /composio/ directory and try to work in the directory /composio/ of workspace. + 5. always try to replicate the bug that the issues discusses. + If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. + Then start trying to fix it. + When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed. + If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file, + so that you can be sure that the script indeed ran fine all the way through. + 6. Before running a command, always use tools to fetch workspace-history. History is keeps last 5 commands that have run and output of all those commands. + Among other things, also use the history to decide which command to run next. + + 6. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! + + 7. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. + + 8. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command. + + 9. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. + + 10. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. + From c86b3aa1edd2e2ae485a93f738a94bd498059cf7 Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 11:08:24 +0530 Subject: [PATCH 04/11] add requirements.txt --- examples/benchmark/prepare_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/benchmark/prepare_data.py b/examples/benchmark/prepare_data.py index faf8bd2fe2..c47a9a2d26 100644 --- a/examples/benchmark/prepare_data.py +++ b/examples/benchmark/prepare_data.py @@ -2,6 +2,7 @@ ''' huggingface dataset download : +Its a swe-bench lite dataset, description can be found here - dataset link: https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite ''' From a670ab02ad45f42c63c6e4d951b234981f3ebf2a Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 13:15:26 +0530 Subject: [PATCH 05/11] adding readme for preparing data --- examples/benchmark/Readme.md | 81 +++++++++++++++++++++++++++++- examples/benchmark/prepare_data.py | 24 --------- 2 files changed, 80 insertions(+), 25 deletions(-) delete mode 100644 examples/benchmark/prepare_data.py diff --git a/examples/benchmark/Readme.md b/examples/benchmark/Readme.md index 3ae617ecec..9d82997909 100644 --- a/examples/benchmark/Readme.md +++ b/examples/benchmark/Readme.md @@ -1 +1,80 @@ -1. git clone \ No newline at end of file + +# SWE-bench Lite Dataset Usage Guide + +This guide provides instructions on how to download and use the SWE-bench Lite dataset from Hugging Face. + +## Dataset Description + +The SWE-bench Lite dataset is a curated collection of software engineering problems and their solutions. More details about the dataset can be found at the dataset's Hugging Face page: +- [SWE-bench Lite Dataset on Hugging Face](https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite) + +## Getting Started + +To use the dataset, you will need to install the `datasets` library from Hugging Face. You can install it using pip: +```python +from datasets import load_dataset + +def filter_short_problem_statements(instance): + """ + Filter function to exclude problem statements with fewer than 40 words. + """ + return len(instance["problem_statement"].split()) > 40 + +def main(): + """ + Main function to load and display entries from the SWE-bench dataset. + """ + # Load the SWE-bench dataset + dev_dataset = load_dataset("princeton-nlp/SWE-bench_Lite", split="dev") + test_dataset = load_dataset("princeton-nlp/SWE-bench_Lite", split="test") + + # Filter the dataset to include only longer problem statements + filtered_test_dataset = test_dataset.filter(filter_short_problem_statements) + + # Display the first few entries of the filtered dataset + print(filtered_test_dataset[:5]) + +if __name__ == "__main__": + main() +``` +## Dataset Fields + +The SWE-bench Lite dataset includes the following fields: + +- **instance_id**: A formatted instance identifier, usually as repo_owner__repo_name-PR-number. +- **patch**: The gold patch, the patch generated by the PR (minus test-related code), that resolved the issue. +- **repo**: The repository owner/name identifier from GitHub. +- **base_commit**: The commit hash of the repository representing the HEAD of the repository before the solution PR is applied. +- **hints_text**: Comments made on the issue prior to the creation of the solution PR’s first commit creation date. +- **created_at**: The creation date of the pull request. +- **test_patch**: A test-file patch that was contributed by the solution PR. +- **problem_statement**: The issue title and body. +- **version**: Installation version to use for running evaluation. +- **environment_setup_commit**: The commit hash to use for environment setup and installation. +- **FAIL_TO_PASS**: A json list of strings that represent the set of tests resolved by the PR and tied to the issue resolution. +- **PASS_TO_PASS**: A json list of strings that represent tests that should pass before and after the PR application. + +## Usage Example + +To use the dataset, follow the example code below: +To get started with the SWE-bench Lite dataset from Hugging Face, you can use the following Python code snippet. This dataset provides a comprehensive set of fields that are useful for software engineering research, particularly in the context of automated patch generation and issue resolution. + + +## Task Configuration + +To facilitate the use of the dataset in practical scenarios, a `task_config.yaml` file is used to configure the specifics of the task: +```yaml +backstory: |- +issue_description: |- +repo_name: "" +instance_id: "" +``` + + +## Running the Task + +To run the task and save the history of the run, use the following Python code: + +```python +python swe/try-swe.yaml --config_path +``` \ No newline at end of file diff --git a/examples/benchmark/prepare_data.py b/examples/benchmark/prepare_data.py deleted file mode 100644 index c47a9a2d26..0000000000 --- a/examples/benchmark/prepare_data.py +++ /dev/null @@ -1,24 +0,0 @@ -from datasets import load_dataset - -''' -huggingface dataset download : -Its a swe-bench lite dataset, description can be found here -- dataset link: https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite -''' - - -def filter_short_problem_statements(instance): - return len(instance["problem_statement"].split()) > 40 - - -def main(): - # Load the SWE-bench dataset - dev_dataset = load_dataset("princeton-nlp/SWE-bench", split="dev") - test_dataset = load_dataset("princeton-nlp/SWE-bench", split="test") - - # Display the first few entries - print(test_dataset[:5]) - - -if __name__ == "__main__": - main() From 0c728f838c82760e8d76b3467e579f860e67c692 Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 13:17:06 +0530 Subject: [PATCH 06/11] adding readme for preparing data --- examples/benchmark/Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/benchmark/Readme.md b/examples/benchmark/Readme.md index 9d82997909..a9529289d7 100644 --- a/examples/benchmark/Readme.md +++ b/examples/benchmark/Readme.md @@ -1,5 +1,5 @@ -# SWE-bench Lite Dataset Usage Guide +# Running benchamrk on SWE-bench Lite Dataset This guide provides instructions on how to download and use the SWE-bench Lite dataset from Hugging Face. From 650ae966e1a1d5c9486d41e0103b4f2a46e3ae9e Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 13:18:19 +0530 Subject: [PATCH 07/11] adding readme for preparing data --- examples/benchmark/Readme.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/benchmark/Readme.md b/examples/benchmark/Readme.md index a9529289d7..6d764a6fae 100644 --- a/examples/benchmark/Readme.md +++ b/examples/benchmark/Readme.md @@ -54,11 +54,6 @@ The SWE-bench Lite dataset includes the following fields: - **FAIL_TO_PASS**: A json list of strings that represent the set of tests resolved by the PR and tied to the issue resolution. - **PASS_TO_PASS**: A json list of strings that represent tests that should pass before and after the PR application. -## Usage Example - -To use the dataset, follow the example code below: -To get started with the SWE-bench Lite dataset from Hugging Face, you can use the following Python code snippet. This dataset provides a comprehensive set of fields that are useful for software engineering research, particularly in the context of automated patch generation and issue resolution. - ## Task Configuration From 7ec554560a9809dcea34c43a0650b67e519d11fe Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 13:18:59 +0530 Subject: [PATCH 08/11] adding readme for preparing data --- examples/benchmark/Readme.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/benchmark/Readme.md b/examples/benchmark/Readme.md index 6d764a6fae..946c38efae 100644 --- a/examples/benchmark/Readme.md +++ b/examples/benchmark/Readme.md @@ -54,8 +54,16 @@ The SWE-bench Lite dataset includes the following fields: - **FAIL_TO_PASS**: A json list of strings that represent the set of tests resolved by the PR and tied to the issue resolution. - **PASS_TO_PASS**: A json list of strings that represent tests that should pass before and after the PR application. +## Running the Task + +To run the task and save the history of the run, use the following Python code: + +```python +python swe/try-swe.yaml --config_path +``` -## Task Configuration + +### Task Configuration To facilitate the use of the dataset in practical scenarios, a `task_config.yaml` file is used to configure the specifics of the task: ```yaml @@ -66,10 +74,3 @@ instance_id: "" ``` -## Running the Task - -To run the task and save the history of the run, use the following Python code: - -```python -python swe/try-swe.yaml --config_path -``` \ No newline at end of file From a605c81202f00d62c20e50ccdac2ec83e6140105 Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 13:33:01 +0530 Subject: [PATCH 09/11] repo name in config --- examples/swe/task_config.yaml | 17 ++++++++++------- examples/swe/try-swe.py | 4 +++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/swe/task_config.yaml b/examples/swe/task_config.yaml index 54ac96830f..4ca499c533 100644 --- a/examples/swe/task_config.yaml +++ b/examples/swe/task_config.yaml @@ -3,18 +3,19 @@ backstory: |- Your mentor gave you following tips. 1. Always start by initializing the workspace and check the status of workspace to make sure it's running. 2. Use the workspace_id returned to use tools to run commands. - 3. Always try to work from home directory in workspace. - 4. always try to replicate the bug that the issues discusses. + 3. clone the git rep {repo_name} in workspace + 4. Always try to work from home directory in workspace. + 5. always try to replicate the bug that the issues discusses. If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. Then start trying to fix it. When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed. If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file, so that you can be sure that the script indeed ran fine all the way through. - 5. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! - 6. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. - 7. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command. - 8. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. - 9. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. + 6. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! + 7. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. + 8. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command. + 9. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. + 10. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. issue_description: |- We're currently solving the following issue within our repository. Here's the issue text: @@ -195,4 +196,6 @@ issue_description: |- Now, you're going to solve this issue on your own. In case of missing modules, Install required packages using command `conda install pandas -y` +repo_name: pvlib/pvlib-python + diff --git a/examples/swe/try-swe.py b/examples/swe/try-swe.py index 3b04677c3c..7e75d6feb0 100644 --- a/examples/swe/try-swe.py +++ b/examples/swe/try-swe.py @@ -36,11 +36,13 @@ task_config_path = script_dir / Path(CONFIG_FILE_PATH) with open(task_config_path, "r") as stream: task_data = yaml.safe_load(stream) + repo_name = task_data["repo_name"] + b = task_data["backstory"].format(repo_name=repo_name) agent_1 = Agent( role=base_role, goal=goal, - backstory=task_data["backstory"], + backstory=b, verbose=True, tools=tools, llm=llm, From f5519c35a0d190630777778e05b0255a10152552 Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 13:58:17 +0530 Subject: [PATCH 10/11] repo name in config --- composio/client/enums.py | 1 + .../local_workspace/cmd_manager/actions/__init__.py | 1 + .../local_workspace/cmd_manager/actions/clone_github.py | 4 ++-- .../local_workspace/cmd_manager/cmd_manager_tool.py | 2 ++ .../local_tools/local_workspace/test/check_implementation.py | 5 +++++ examples/swe/task_config.yaml | 5 +++-- 6 files changed, 14 insertions(+), 4 deletions(-) diff --git a/composio/client/enums.py b/composio/client/enums.py index 4b4b262967..605feb3cce 100644 --- a/composio/client/enums.py +++ b/composio/client/enums.py @@ -2435,6 +2435,7 @@ def from_app_and_action(cls, app: str, name: str) -> "Action": RUNCOMMANDONWORKSPACE = ("cmdmanagertool", "cmdmanagertool_runcommandonworkspace", True, True) GETCURRENTDIRCMD = ("cmdmanagertool", "cmdmanagertool_getcurrentdircmd", True, True) GETWORKSPACEHISTORY = ("historykeeper", "historykeeper_getworkspacehistory", True, True) + GithubCloneCmd = ("cmdmanagertool", "cmdmanagertool_githubclonecmd", True, True) diff --git a/composio/local_tools/local_workspace/cmd_manager/actions/__init__.py b/composio/local_tools/local_workspace/cmd_manager/actions/__init__.py index 7018a54cb8..4340b9f135 100644 --- a/composio/local_tools/local_workspace/cmd_manager/actions/__init__.py +++ b/composio/local_tools/local_workspace/cmd_manager/actions/__init__.py @@ -6,6 +6,7 @@ OpenCmdRequest, OpenFile, ) +from .clone_github import GithubCloneCmd, GithubCloneRequest from .edit_cmd import EditFile, EditFileRequest from .run_cmd import RunCommandOnWorkspace, RunCommandOnWorkspaceRequest from .scroll_cmds import Scroll, ScrollRequest diff --git a/composio/local_tools/local_workspace/cmd_manager/actions/clone_github.py b/composio/local_tools/local_workspace/cmd_manager/actions/clone_github.py index 43a0381d78..48e33e5136 100644 --- a/composio/local_tools/local_workspace/cmd_manager/actions/clone_github.py +++ b/composio/local_tools/local_workspace/cmd_manager/actions/clone_github.py @@ -34,7 +34,7 @@ class GithubCloneCmd(BaseAction): """ Clones a github repository """ - + _history_maintains: bool = True _display_name = "Clone Github Repository Action" _request_schema = GithubCloneRequest _response_schema = GithubCloneResponse @@ -45,7 +45,7 @@ def execute( ) -> GithubCloneResponse: if not request_data.repo_name or not request_data.repo_name.strip(): raise ValueError( - "repo_name can not be null. Give a directory-name in which to search" + "repo_name can not be null. Give a repo_name to clone" ) if not request_data.github_token or not request_data.github_token.strip(): diff --git a/composio/local_tools/local_workspace/cmd_manager/cmd_manager_tool.py b/composio/local_tools/local_workspace/cmd_manager/cmd_manager_tool.py index 40fbbcda87..660ac5f673 100644 --- a/composio/local_tools/local_workspace/cmd_manager/cmd_manager_tool.py +++ b/composio/local_tools/local_workspace/cmd_manager/cmd_manager_tool.py @@ -11,6 +11,7 @@ Scroll, SearchDirCmd, SearchFileCmd, + GithubCloneCmd, ) from composio.local_tools.local_workspace.commons.history_processor import ( HistoryProcessor, @@ -41,6 +42,7 @@ def actions(self) -> list: EditFile, RunCommandOnWorkspace, GetCurrentDirCmd, + GithubCloneCmd, ] def triggers(self) -> list: diff --git a/composio/local_tools/local_workspace/test/check_implementation.py b/composio/local_tools/local_workspace/test/check_implementation.py index 04aca82b5a..e888453e0d 100644 --- a/composio/local_tools/local_workspace/test/check_implementation.py +++ b/composio/local_tools/local_workspace/test/check_implementation.py @@ -9,6 +9,8 @@ EditFileRequest, RunCommandOnWorkspace, RunCommandOnWorkspaceRequest, + GithubCloneCmd, + GithubCloneRequest ) from composio.local_tools.local_workspace.commons.history_processor import ( HistoryProcessor, @@ -29,6 +31,9 @@ def check_simple_implementation(): h = HistoryProcessor() workspace_id = w.get_workspace_manager(args) + # clone git repo + git_clone = GithubCloneRequest() + # create file create_file_cmd = CreateFileCmd() create_file_cmd.set_workspace_and_history(w, h) diff --git a/examples/swe/task_config.yaml b/examples/swe/task_config.yaml index 4ca499c533..c244496c29 100644 --- a/examples/swe/task_config.yaml +++ b/examples/swe/task_config.yaml @@ -3,7 +3,7 @@ backstory: |- Your mentor gave you following tips. 1. Always start by initializing the workspace and check the status of workspace to make sure it's running. 2. Use the workspace_id returned to use tools to run commands. - 3. clone the git rep {repo_name} in workspace + 3. use clone the git repo {repo_name} in workspace, with github_access_token: '' 4. Always try to work from home directory in workspace. 5. always try to replicate the bug that the issues discusses. If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. @@ -194,8 +194,9 @@ issue_description: |- '0.9.1'>>> pandas.__version__'1.4.3' ``` Now, you're going to solve this issue on your own. - In case of missing modules, Install required packages using command `conda install pandas -y` + In case of missing modules, Install required packages using command `pip install pandas -y` repo_name: pvlib/pvlib-python + From 278b0cc272d907ddaa2bf2ba7c44042e00f3a5e2 Mon Sep 17 00:00:00 2001 From: shubhra Date: Fri, 31 May 2024 14:10:13 +0530 Subject: [PATCH 11/11] fix --- examples/swe/task_flake8.yaml | 72 ----------------------------------- 1 file changed, 72 deletions(-) delete mode 100644 examples/swe/task_flake8.yaml diff --git a/examples/swe/task_flake8.yaml b/examples/swe/task_flake8.yaml deleted file mode 100644 index 35971eb260..0000000000 --- a/examples/swe/task_flake8.yaml +++ /dev/null @@ -1,72 +0,0 @@ -role: You are an autonomous programmer, and you're working directly in the docker container with a command line interface. -goal: fix the given issue / bug in the code -backstory: |- - You are an autonomous programmer, your task is to solve the issue given in task with the tools in hand - - The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. Before running a command, - use tools to get history of the workspace. Include this history as well when you are making the next decision. - In addition to typical shell commands, these are shell commands available in workspace, - so you need to use given tools to run these special commands. - - For any other commands that you want to run, cmdmanagertool_runcommandonworkspace to run the command. Few examples: - 1 - If you want to run python script, use this tool to run the python script. *NOTE* : while running a script, give complete path of the script. - 2 - Or if you want to `ls -a` use this tool to run the command. - - Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. - If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. - - You need to format your output using two fields; discussion and command. - Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example: - DISCUSSION - First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. - ``` - ls -a - ``` - - You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. - If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. - You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. - However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. - Never issue a find command against "/" directory. It will not work. Always try to find files within the base directory given in the task. - -description: |- - We're currently solving the following issue within our repository. Here's the issue text: - ISSUE: - in the code, lots of pylint errors are coming. You can regenerate pylint errors by issuing command - `tox -e pylint` - all these need to be fixed. - - - INSTRUCTIONS: - Now, you're going to solve this issue on your own. - Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`. - - NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! - - *Base_dir* : /composio/ - you need to focus on finding files in the base directory. Issue related files will only be in the base_dir directory. - - IMPORTANT TIPS: - 1. Always start by initializing the workspace. Workspace is a docker-container with docker image_name='sweagent/swe-agent:latest'. - 2. Use the tools to check status of workspace is running or not. - 3. Use the workspace_id returned from workspace initialization to use tools to run command on that docker container. - 4. Once you have setup the work-space, go to the directory /composio/ directory and try to work in the directory /composio/ of workspace. - 5. always try to replicate the bug that the issues discusses. - If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. - Then start trying to fix it. - When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed. - If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file, - so that you can be sure that the script indeed ran fine all the way through. - 6. Before running a command, always use tools to fetch workspace-history. History is keeps last 5 commands that have run and output of all those commands. - Among other things, also use the history to decide which command to run next. - - 6. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! - - 7. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. - - 8. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command. - - 9. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. - - 10. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. -