Merge branch 'All-Hands-AI:main' into main

enyst · Nov 29, 2024 · e480910 · e480910
2 parents 9b2f1ac + ea994b6
commit e480910
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 6 deletions.
diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml
@@ -141,8 +141,8 @@ jobs:
         id: create_comment
         uses: KeisukeYamashita/create-comment@v1
         with:
-          # if triggered by PR, use PR number, otherwise use 5077 as fallback issue number for manual triggers
-          number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5077 }}
+          # if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers
+          number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }}
           unique: false
           comment: |
               Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }}
@@ -155,4 +155,4 @@ jobs:
               DeepSeek LLM Test Results:
               ${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
               ---
-              Download evaluation outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
+              Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
diff --git a/evaluation/integration_tests/run_infer.py b/evaluation/integration_tests/run_infer.py
@@ -218,6 +218,8 @@ def load_integration_tests() -> pd.DataFrame:
     )
 
     df = pd.read_json(output_file, lines=True, orient='records')
+
+    # record success and reason for failure for the final report
     df['success'] = df['test_result'].apply(lambda x: x['success'])
     df['reason'] = df['test_result'].apply(lambda x: x['reason'])
     logger.info('-' * 100)
@@ -231,9 +233,16 @@ def load_integration_tests() -> pd.DataFrame:
     )
     logger.info('-' * 100)
 
+    # record cost for each instance, with 3 decimal places
+    df['cost'] = df['metrics'].apply(lambda x: round(x['accumulated_cost'], 3))
+    logger.info(f'Total cost: USD {df["cost"].sum():.2f}')
+
     report_file = os.path.join(metadata.eval_output_dir, 'report.md')
     with open(report_file, 'w') as f:
         f.write(
             f'Success rate: {df["success"].mean():.2%} ({df["success"].sum()}/{len(df)})\n'
         )
-        f.write(df[['instance_id', 'success', 'reason']].to_markdown(index=False))
+        f.write(f'\nTotal cost: USD {df["cost"].sum():.2f}\n')
+        f.write(
+            df[['instance_id', 'success', 'reason', 'cost']].to_markdown(index=False)
+        )
diff --git a/openhands/runtime/impl/runloop/runloop_runtime.py b/openhands/runtime/impl/runloop/runloop_runtime.py
@@ -99,6 +99,7 @@ class RunloopRuntime(EventStreamRuntime):
     """The RunloopRuntime class is an EventStreamRuntime that utilizes Runloop Devbox as a runtime environment."""
 
     _sandbox_port: int = 4444
+    _vscode_port: int = 4445
 
     def __init__(
         self,
@@ -109,6 +110,7 @@ def __init__(
         env_vars: dict[str, str] | None = None,
         status_callback: Callable | None = None,
         attach_to_existing: bool = False,
+        headless_mode: bool = True,
     ):
         assert config.runloop_api_key is not None, 'Runloop API key is required'
         self.devbox: DevboxView | None = None
@@ -127,9 +129,11 @@ def __init__(
             env_vars,
             status_callback,
             attach_to_existing,
+            headless_mode,
         )
         # Buffer for container logs
         self.log_buffer: LogBuffer | None = None
+        self._vscode_url: str | None = None
 
     @tenacity.retry(
         stop=tenacity.stop_after_attempt(120),
@@ -192,7 +196,7 @@ def _create_new_devbox(self) -> DevboxView:
             environment_variables={'DEBUG': 'true'} if self.config.debug else {},
             prebuilt='openhands',
             launch_parameters=LaunchParameters(
-                available_ports=[self._sandbox_port],
+                available_ports=[self._sandbox_port, self._vscode_port],
                 resource_size_request='LARGE',
             ),
             metadata={'container-name': self.container_name},
@@ -221,7 +225,7 @@ async def connect(self):
 
         # Hook up logs
         self.log_buffer = RunloopLogBuffer(self.runloop_api_client, self.devbox.id)
-        self.api_url = f'https://{tunnel.url}'
+        self.api_url = tunnel.url
         logger.info(f'Container started. Server url: {self.api_url}')
 
         # End Runloop connect
@@ -273,3 +277,45 @@ def close(self, rm_all_containers: bool | None = True):
 
         if self.devbox:
             self.runloop_api_client.devboxes.shutdown(self.devbox.id)
+
+    @property
+    def vscode_url(self) -> str | None:
+        if self.vscode_enabled and self.devbox and self.devbox.status == 'running':
+            if self._vscode_url is not None:
+                return self._vscode_url
+
+            try:
+                with send_request(
+                    self.session,
+                    'GET',
+                    f'{self.api_url}/vscode/connection_token',
+                    timeout=10,
+                ) as response:
+                    response_json = response.json()
+                    assert isinstance(response_json, dict)
+                    if response_json['token'] is None:
+                        return None
+                    token = response_json['token']
+
+                self._vscode_url = (
+                    self.runloop_api_client.devboxes.create_tunnel(
+                        id=self.devbox.id,
+                        port=self._vscode_port,
+                    ).url
+                    + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'
+                )
+
+                self.log(
+                    'debug',
+                    f'VSCode URL: {self._vscode_url}',
+                )
+
+                return self._vscode_url
+            except Exception as e:
+                self.log(
+                    'error',
+                    f'Failed to create vscode tunnel {e}',
+                )
+                return None
+        else:
+            return None