Skip to content

Commit

Permalink
Merge branch 'All-Hands-AI:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst authored Nov 29, 2024
2 parents 9b2f1ac + ea994b6 commit e480910
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 6 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/integration-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ jobs:
id: create_comment
uses: KeisukeYamashita/create-comment@v1
with:
# if triggered by PR, use PR number, otherwise use 5077 as fallback issue number for manual triggers
number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5077 }}
# if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers
number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }}
unique: false
comment: |
Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }}
Expand All @@ -155,4 +155,4 @@ jobs:
DeepSeek LLM Test Results:
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
---
Download evaluation outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
11 changes: 10 additions & 1 deletion evaluation/integration_tests/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ def load_integration_tests() -> pd.DataFrame:
)

df = pd.read_json(output_file, lines=True, orient='records')

# record success and reason for failure for the final report
df['success'] = df['test_result'].apply(lambda x: x['success'])
df['reason'] = df['test_result'].apply(lambda x: x['reason'])
logger.info('-' * 100)
Expand All @@ -231,9 +233,16 @@ def load_integration_tests() -> pd.DataFrame:
)
logger.info('-' * 100)

# record cost for each instance, with 3 decimal places
df['cost'] = df['metrics'].apply(lambda x: round(x['accumulated_cost'], 3))
logger.info(f'Total cost: USD {df["cost"].sum():.2f}')

report_file = os.path.join(metadata.eval_output_dir, 'report.md')
with open(report_file, 'w') as f:
f.write(
f'Success rate: {df["success"].mean():.2%} ({df["success"].sum()}/{len(df)})\n'
)
f.write(df[['instance_id', 'success', 'reason']].to_markdown(index=False))
f.write(f'\nTotal cost: USD {df["cost"].sum():.2f}\n')
f.write(
df[['instance_id', 'success', 'reason', 'cost']].to_markdown(index=False)
)
50 changes: 48 additions & 2 deletions openhands/runtime/impl/runloop/runloop_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class RunloopRuntime(EventStreamRuntime):
"""The RunloopRuntime class is an EventStreamRuntime that utilizes Runloop Devbox as a runtime environment."""

_sandbox_port: int = 4444
_vscode_port: int = 4445

def __init__(
self,
Expand All @@ -109,6 +110,7 @@ def __init__(
env_vars: dict[str, str] | None = None,
status_callback: Callable | None = None,
attach_to_existing: bool = False,
headless_mode: bool = True,
):
assert config.runloop_api_key is not None, 'Runloop API key is required'
self.devbox: DevboxView | None = None
Expand All @@ -127,9 +129,11 @@ def __init__(
env_vars,
status_callback,
attach_to_existing,
headless_mode,
)
# Buffer for container logs
self.log_buffer: LogBuffer | None = None
self._vscode_url: str | None = None

@tenacity.retry(
stop=tenacity.stop_after_attempt(120),
Expand Down Expand Up @@ -192,7 +196,7 @@ def _create_new_devbox(self) -> DevboxView:
environment_variables={'DEBUG': 'true'} if self.config.debug else {},
prebuilt='openhands',
launch_parameters=LaunchParameters(
available_ports=[self._sandbox_port],
available_ports=[self._sandbox_port, self._vscode_port],
resource_size_request='LARGE',
),
metadata={'container-name': self.container_name},
Expand Down Expand Up @@ -221,7 +225,7 @@ async def connect(self):

# Hook up logs
self.log_buffer = RunloopLogBuffer(self.runloop_api_client, self.devbox.id)
self.api_url = f'https://{tunnel.url}'
self.api_url = tunnel.url
logger.info(f'Container started. Server url: {self.api_url}')

# End Runloop connect
Expand Down Expand Up @@ -273,3 +277,45 @@ def close(self, rm_all_containers: bool | None = True):

if self.devbox:
self.runloop_api_client.devboxes.shutdown(self.devbox.id)

@property
def vscode_url(self) -> str | None:
if self.vscode_enabled and self.devbox and self.devbox.status == 'running':
if self._vscode_url is not None:
return self._vscode_url

try:
with send_request(
self.session,
'GET',
f'{self.api_url}/vscode/connection_token',
timeout=10,
) as response:
response_json = response.json()
assert isinstance(response_json, dict)
if response_json['token'] is None:
return None
token = response_json['token']

self._vscode_url = (
self.runloop_api_client.devboxes.create_tunnel(
id=self.devbox.id,
port=self._vscode_port,
).url
+ f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'
)

self.log(
'debug',
f'VSCode URL: {self._vscode_url}',
)

return self._vscode_url
except Exception as e:
self.log(
'error',
f'Failed to create vscode tunnel {e}',
)
return None
else:
return None

0 comments on commit e480910

Please sign in to comment.