From 3c1f36b90a08f0db025964557595d6a30621654a Mon Sep 17 00:00:00 2001
From: Jiaxin-Pei <pedropei@vip.qq.com>
Date: Thu, 21 Mar 2024 10:09:58 -0400
Subject: [PATCH 1/4] adding code to fetch and convert devin's output for
 evaluation

---
 evaluation/README.md                          |  6 +-
 .../prepare_devin_outputs_for_evaluation.py   | 62 +++++++++++++++++++
 2 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py

diff --git a/evaluation/README.md b/evaluation/README.md
index b8a4e2bb4793..9ed1d24990ad 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -11,5 +11,7 @@ all the preprocessing/evaluation/analysis scripts.
 
 ## Tasks
 ### SWE-bench
-- analysis
-  - devin_eval_analysis.ipynb: notebook analyzing devin's outputs
\ No newline at end of file
+- notebooks
+  - `devin_eval_analysis.ipynb`: notebook analyzing devin's outputs
+- src
+  - `prepare_devin_outputs_for_evaluation.py`: script fetching and converting devin's output into the desired json file for evaluation
diff --git a/evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py b/evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py
new file mode 100644
index 000000000000..4fc895f5c1ce
--- /dev/null
+++ b/evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py
@@ -0,0 +1,62 @@
+'''
+Script used to convert devin's output into the desired json format for evaluation on SWE-bench
+
+Usage:
+    python prepare_devin_outputs_for_evaluation.py
+
+Outputs:
+    two json files under evaluation/SWE-bench/data/
+
+'''
+
+import requests
+import os
+from tqdm import tqdm
+import json
+
+#fetch devin's outputs into a json file for evaluation
+def get_devin_eval_output():
+    repo_url = "CognitionAI/devin-swebench-results"
+    folder_path = "output_diffs"
+
+    base_url = "https://api.github.com/repos/"
+    pass_api_url = f"{base_url}{repo_url}/contents/{folder_path}/pass"
+    failed_api_url = f"{base_url}{repo_url}/contents/{folder_path}/fail"
+
+    pass_files_info = []
+    failed_files_info = []
+
+    def get_files(api_url, subfolder_name, files_info):
+        response = requests.get(api_url)
+        if response.status_code == 200:
+            contents = response.json()
+            for item in tqdm(contents):
+                if item["type"] == "file":
+                    file_url = f"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}"
+                    file_content = requests.get(file_url).text
+                    instance_id = item['name'][:-9]
+                    model_name = "Devin"  # Update with actual model name
+                    files_info.append({
+                        "instance_id": instance_id,
+                        "model_patch": file_content,
+                        "model_name_or_path": model_name
+                    })
+
+    get_files(pass_api_url, "pass", pass_files_info)
+    get_files(failed_api_url, "fail", failed_files_info)
+
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    output_dir = os.path.join(script_dir, "../data/devin/")
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    with open(os.path.join(output_dir, "pass_output.json"), "w") as pass_file:
+        json.dump(pass_files_info, pass_file, indent=4)
+
+    with open(os.path.join(output_dir, "fail_output.json"), "w") as fail_file:
+        json.dump(failed_files_info, fail_file, indent=4)
+
+
+if __name__ == '__main__':
+    get_devin_eval_output()
\ No newline at end of file

From 7e95a01f8987e1ebc4449ce88f9160461e5375ae Mon Sep 17 00:00:00 2001
From: Jiaxin-Pei <pedropei@vip.qq.com>
Date: Thu, 21 Mar 2024 10:12:53 -0400
Subject: [PATCH 2/4] update README.md

---
 evaluation/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/evaluation/README.md b/evaluation/README.md
index 9ed1d24990ad..69e5584601f9 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -14,4 +14,5 @@ all the preprocessing/evaluation/analysis scripts.
 - notebooks
   - `devin_eval_analysis.ipynb`: notebook analyzing devin's outputs
 - src
-  - `prepare_devin_outputs_for_evaluation.py`: script fetching and converting devin's output into the desired json file for evaluation
+  - `prepare_devin_outputs_for_evaluation.py`: script fetching and converting devin's output into the desired json file for evaluation.
+    - outputs: two json files under `evaluation/SWE-bench/data/` that can be directly used for evaluation

From b55541a442ead9861bed46aaa8736074b9b02696 Mon Sep 17 00:00:00 2001
From: Jiaxin-Pei <pedropei@vip.qq.com>
Date: Thu, 21 Mar 2024 13:11:23 -0400
Subject: [PATCH 3/4] update code for fetching and processing devin's outputs

---
 evaluation/README.md                          | 10 ++-
 .../prepare_devin_outputs_for_evaluation.py   | 79 +++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)
 create mode 100644 evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py

diff --git a/evaluation/README.md b/evaluation/README.md
index 69e5584601f9..06b0fc1532f1 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -13,6 +13,10 @@ all the preprocessing/evaluation/analysis scripts.
 ### SWE-bench
 - notebooks
   - `devin_eval_analysis.ipynb`: notebook analyzing devin's outputs
-- src
-  - `prepare_devin_outputs_for_evaluation.py`: script fetching and converting devin's output into the desired json file for evaluation.
-    - outputs: two json files under `evaluation/SWE-bench/data/` that can be directly used for evaluation
+- scripts
+  - `prepare_devin_outputs_for_evaluation.py`: script fetching and converting [devin's output](https://github.com/CognitionAI/devin-swebench-results/tree/main) into the desired json file for evaluation.
+    - usage: `python prepare_devin_outputs_for_evaluation.py <setting>` where setting can be `passed`, `failed` or `all`
+- resources
+  - Devin's outputs processed for evaluations is available on [Huggingface](https://huggingface.co/datasets/OpenDevin/Devin-SWE-bench-output)
+    - get predictions that passed the test: `wget https://huggingface.co/datasets/OpenDevin/Devin-SWE-bench-output/raw/main/devin_swe_passed.json`
+    - get all predictions`wget https://huggingface.co/datasets/OpenDevin/Devin-SWE-bench-output/raw/main/devin_swe_outputs.json`
diff --git a/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py b/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py
new file mode 100644
index 000000000000..d4e6906d2ef2
--- /dev/null
+++ b/evaluation/SWE-bench/scripts/prepare_devin_outputs_for_evaluation.py
@@ -0,0 +1,79 @@
+'''
+Script used to convert devin's output into the desired json format for evaluation on SWE-bench
+
+Usage:
+    python prepare_devin_outputs_for_evaluation.py <setting>
+    <setting> can be "passed", "failed", "all"
+
+Outputs:
+    two json files under evaluation/SWE-bench/data/
+
+'''
+
+#fetch devin's outputs into a json file for evaluation
+import os
+import sys
+import json
+import requests
+from tqdm import tqdm
+
+def get_devin_eval_output(setting):
+    repo_url = "CognitionAI/devin-swebench-results"
+    folder_path = "output_diffs"
+
+    base_url = "https://api.github.com/repos/"
+    pass_api_url = f"{base_url}{repo_url}/contents/{folder_path}/pass"
+    failed_api_url = f"{base_url}{repo_url}/contents/{folder_path}/fail"
+
+    pass_files_info = []
+    failed_files_info = []
+
+    def get_files(api_url, subfolder_name, files_info):
+        response = requests.get(api_url)
+        if response.status_code == 200:
+            contents = response.json()
+            for item in tqdm(contents):
+                if item["type"] == "file":
+                    file_url = f"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}"
+                    file_content = requests.get(file_url).text
+                    instance_id = item['name'][:-9]
+                    model_name = "Devin"  # Update with actual model name
+                    files_info.append({
+                        "instance_id": instance_id,
+                        "model_patch": file_content,
+                        "model_name_or_path": model_name,
+                        "pass_or_fail": subfolder_name
+                    })
+
+    if setting == "passed" or setting == "all":
+        get_files(pass_api_url, "pass", pass_files_info)
+    if setting == "failed" or setting == "all":
+        get_files(failed_api_url, "fail", failed_files_info)
+
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    output_dir = os.path.join(script_dir, "../data/devin/")
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    if setting == "passed" or setting == "all":
+        with open(os.path.join(output_dir, "devin_swe_passed.json"), "w") as pass_file:
+            json.dump(pass_files_info, pass_file, indent=4)
+
+    if setting == "failed" or setting == "all":
+        with open(os.path.join(output_dir, "devin_swe_failed.json"), "w") as fail_file:
+            json.dump(failed_files_info, fail_file, indent=4)
+
+    if setting == "all":
+        merged_output = pass_files_info + failed_files_info
+        with open(os.path.join(output_dir, "devin_swe_outputs.json"), "w") as merge_file:
+            json.dump(merged_output, merge_file, indent=4)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print("Usage: python script_name.py <setting>")
+        sys.exit(1)
+
+    setting = sys.argv[1]
+    get_devin_eval_output(setting)
\ No newline at end of file

From b4b6786e5c87dd41a00232a7e203ce6852c9cd77 Mon Sep 17 00:00:00 2001
From: Jiaxin-Pei <pedropei@vip.qq.com>
Date: Thu, 21 Mar 2024 13:12:51 -0400
Subject: [PATCH 4/4] update code for fetching and processing devin's outputs

---
 .../prepare_devin_outputs_for_evaluation.py   | 62 -------------------
 1 file changed, 62 deletions(-)
 delete mode 100644 evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py

diff --git a/evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py b/evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py
deleted file mode 100644
index 4fc895f5c1ce..000000000000
--- a/evaluation/SWE-bench/src/prepare_devin_outputs_for_evaluation.py
+++ /dev/null
@@ -1,62 +0,0 @@
-'''
-Script used to convert devin's output into the desired json format for evaluation on SWE-bench
-
-Usage:
-    python prepare_devin_outputs_for_evaluation.py
-
-Outputs:
-    two json files under evaluation/SWE-bench/data/
-
-'''
-
-import requests
-import os
-from tqdm import tqdm
-import json
-
-#fetch devin's outputs into a json file for evaluation
-def get_devin_eval_output():
-    repo_url = "CognitionAI/devin-swebench-results"
-    folder_path = "output_diffs"
-
-    base_url = "https://api.github.com/repos/"
-    pass_api_url = f"{base_url}{repo_url}/contents/{folder_path}/pass"
-    failed_api_url = f"{base_url}{repo_url}/contents/{folder_path}/fail"
-
-    pass_files_info = []
-    failed_files_info = []
-
-    def get_files(api_url, subfolder_name, files_info):
-        response = requests.get(api_url)
-        if response.status_code == 200:
-            contents = response.json()
-            for item in tqdm(contents):
-                if item["type"] == "file":
-                    file_url = f"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}"
-                    file_content = requests.get(file_url).text
-                    instance_id = item['name'][:-9]
-                    model_name = "Devin"  # Update with actual model name
-                    files_info.append({
-                        "instance_id": instance_id,
-                        "model_patch": file_content,
-                        "model_name_or_path": model_name
-                    })
-
-    get_files(pass_api_url, "pass", pass_files_info)
-    get_files(failed_api_url, "fail", failed_files_info)
-
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    output_dir = os.path.join(script_dir, "../data/devin/")
-
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    with open(os.path.join(output_dir, "pass_output.json"), "w") as pass_file:
-        json.dump(pass_files_info, pass_file, indent=4)
-
-    with open(os.path.join(output_dir, "fail_output.json"), "w") as fail_file:
-        json.dump(failed_files_info, fail_file, indent=4)
-
-
-if __name__ == '__main__':
-    get_devin_eval_output()
\ No newline at end of file