Skip to content

Commit

Permalink
adding a script to fetch and convert devin's output for evaluation (#81)
Browse files Browse the repository at this point in the history
* adding code to fetch and convert devin's output for evaluation

* update README.md

* update code for fetching and processing devin's outputs

* update code for fetching and processing devin's outputs
  • Loading branch information
Jiaxin-Pei authored Mar 21, 2024
1 parent b84463f commit dc88dac
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 2 deletions.
11 changes: 9 additions & 2 deletions evaluation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,12 @@ all the preprocessing/evaluation/analysis scripts.

## Tasks
### SWE-bench
- analysis
- devin_eval_analysis.ipynb: notebook analyzing devin's outputs
- notebooks
- `devin_eval_analysis.ipynb`: notebook analyzing devin's outputs
- scripts
- `prepare_devin_outputs_for_evaluation.py`: script fetching and converting [devin's output](https://github.com/CognitionAI/devin-swebench-results/tree/main) into the desired json file for evaluation.
- usage: `python prepare_devin_outputs_for_evaluation.py <setting>` where setting can be `passed`, `failed` or `all`
- resources
- Devin's outputs processed for evaluations is available on [Huggingface](https://huggingface.co/datasets/OpenDevin/Devin-SWE-bench-output)
- get predictions that passed the test: `wget https://huggingface.co/datasets/OpenDevin/Devin-SWE-bench-output/raw/main/devin_swe_passed.json`
- get all predictions`wget https://huggingface.co/datasets/OpenDevin/Devin-SWE-bench-output/raw/main/devin_swe_outputs.json`
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
'''
Script used to convert devin's output into the desired json format for evaluation on SWE-bench
Usage:
python prepare_devin_outputs_for_evaluation.py <setting>
<setting> can be "passed", "failed", "all"
Outputs:
two json files under evaluation/SWE-bench/data/
'''

#fetch devin's outputs into a json file for evaluation
import os
import sys
import json
import requests
from tqdm import tqdm

def get_devin_eval_output(setting):
repo_url = "CognitionAI/devin-swebench-results"
folder_path = "output_diffs"

base_url = "https://api.github.com/repos/"
pass_api_url = f"{base_url}{repo_url}/contents/{folder_path}/pass"
failed_api_url = f"{base_url}{repo_url}/contents/{folder_path}/fail"

pass_files_info = []
failed_files_info = []

def get_files(api_url, subfolder_name, files_info):
response = requests.get(api_url)
if response.status_code == 200:
contents = response.json()
for item in tqdm(contents):
if item["type"] == "file":
file_url = f"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}"
file_content = requests.get(file_url).text
instance_id = item['name'][:-9]
model_name = "Devin" # Update with actual model name
files_info.append({
"instance_id": instance_id,
"model_patch": file_content,
"model_name_or_path": model_name,
"pass_or_fail": subfolder_name
})

if setting == "passed" or setting == "all":
get_files(pass_api_url, "pass", pass_files_info)
if setting == "failed" or setting == "all":
get_files(failed_api_url, "fail", failed_files_info)

script_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = os.path.join(script_dir, "../data/devin/")

if not os.path.exists(output_dir):
os.makedirs(output_dir)

if setting == "passed" or setting == "all":
with open(os.path.join(output_dir, "devin_swe_passed.json"), "w") as pass_file:
json.dump(pass_files_info, pass_file, indent=4)

if setting == "failed" or setting == "all":
with open(os.path.join(output_dir, "devin_swe_failed.json"), "w") as fail_file:
json.dump(failed_files_info, fail_file, indent=4)

if setting == "all":
merged_output = pass_files_info + failed_files_info
with open(os.path.join(output_dir, "devin_swe_outputs.json"), "w") as merge_file:
json.dump(merged_output, merge_file, indent=4)


if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python script_name.py <setting>")
sys.exit(1)

setting = sys.argv[1]
get_devin_eval_output(setting)

0 comments on commit dc88dac

Please sign in to comment.