Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: tool parity with current developer system #527

Merged
merged 13 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/developer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ lazy_static = "1.5"
kill_tree = "0.2.4"
shellexpand = "3.1.0"
indoc = "2.0.5"
xcap = "0.0.14"

[dev-dependencies]
sysinfo = "0.32.1"
Expand Down
138 changes: 137 additions & 1 deletion crates/developer/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::{
collections::HashMap,
fs,
future::Future,
io::Cursor,
path::{Path, PathBuf},
pin::Pin,
};
Expand All @@ -35,6 +36,7 @@ use tokio::io::{stdin, stdout};
use tracing::info;
use tracing_appender::rolling::{RollingFileAppender, Rotation};
use tracing_subscriber::{self, EnvFilter};
use xcap::{Monitor, Window};

pub struct DeveloperRouter {
tools: Vec<Tool>,
Expand Down Expand Up @@ -79,6 +81,37 @@ impl DeveloperRouter {
}),
);

let list_windows_tool = Tool::new(
"list_windows".to_string(),
"List all open windows".to_string(),
json!({
"type": "object",
"required": [],
"properties": {}
}),
);

let screen_capture_tool = Tool::new(
"screen_capture".to_string(),
"Capture a screenshot of a specified display or window.\nYou can capture either:\n1. A full display (monitor) using the display parameter\n2. A specific window by its title using the window_title parameter\n\nOnly one of display or window_title should be specified.".to_string(),
json!({
"type": "object",
"required": [],
"properties": {
"display": {
"type": "integer",
"default": 0,
"description": "The display number to capture (0 is main display)"
},
"window_title": {
"type": "string",
"default": null,
"description": "Optional: the exact title of the window to capture. use the list_windows tool to find the available windows."
}
}
}),
);

let instructions = "Developer instructions...".to_string(); // Reuse from original code

let cwd = std::env::current_dir().unwrap();
Expand All @@ -93,7 +126,7 @@ impl DeveloperRouter {
resources.insert(uri, resource);

Self {
tools: vec![bash_tool, text_editor_tool],
tools: vec![bash_tool, text_editor_tool, list_windows_tool, screen_capture_tool],
cwd: Arc::new(Mutex::new(cwd)),
active_resources: Arc::new(Mutex::new(resources)),
file_history: Arc::new(Mutex::new(HashMap::new())),
Expand Down Expand Up @@ -507,6 +540,96 @@ impl DeveloperRouter {
Ok(())
}

// Implement window listing functionality
async fn list_windows(&self, _params: Value) -> Result<Vec<Content>, ToolError> {
let windows = Window::all()
.map_err(|_| ToolError::ExecutionError("Failed to list windows".into()))?;

let window_titles: Vec<String> = windows
.into_iter()
.map(|w| w.title().to_string())
.collect();

Ok(vec![
Content::text("The following windows are available.").with_audience(vec![Role::Assistant]),
Content::text(format!("Available windows:\n{}", window_titles.join("\n")))
.with_audience(vec![Role::User])
.with_priority(0.0),
])
}

async fn screen_capture(&self, params: Value) -> Result<Vec<Content>, ToolError> {
let mut image = if let Some(window_title) = params.get("window_title").and_then(|v| v.as_str()) {
// Try to find and capture the specified window
let windows = Window::all()
.map_err(|_| ToolError::ExecutionError("Failed to list windows".into()))?;

let window = windows
.into_iter()
.find(|w| w.title() == window_title)
.ok_or_else(|| {
ToolError::ExecutionError(format!(
"No window found with title '{}'",
window_title
))
})?;

window.capture_image().map_err(|e| {
ToolError::ExecutionError(format!(
"Failed to capture window '{}': {}",
window_title, e
))
})?
} else {
// Default to display capture if no window title is specified
let display = params.get("display").and_then(|v| v.as_u64()).unwrap_or(0) as usize;

let monitors = Monitor::all()
.map_err(|_| ToolError::ExecutionError("Failed to access monitors".into()))?;
let monitor = monitors.get(display).ok_or_else(|| {
ToolError::ExecutionError(format!(
"{} was not an available monitor, {} found.",
display,
monitors.len()
))
})?;

monitor.capture_image().map_err(|e| {
ToolError::ExecutionError(format!("Failed to capture display {}: {}", display, e))
})?
};

// Resize the image to a reasonable width while maintaining aspect ratio
let max_width = 768;
if image.width() > max_width {
let scale = max_width as f32 / image.width() as f32;
let new_height = (image.height() as f32 * scale) as u32;
image = xcap::image::imageops::resize(
&image,
max_width,
new_height,
xcap::image::imageops::FilterType::Lanczos3,
)
};

let mut bytes: Vec<u8> = Vec::new();
image
.write_to(&mut Cursor::new(&mut bytes), xcap::image::ImageFormat::Png)
.map_err(|e| {
ToolError::ExecutionError(format!("Failed to write image buffer {}", e))
})?;

// Convert to base64
let data = base64::prelude::BASE64_STANDARD.encode(bytes);

Ok(vec![
Content::text("Screenshot captured").with_audience(vec![Role::Assistant]),
Content::image(data, "image/png")
.with_audience(vec![Role::User])
.with_priority(0.0)
])
}

async fn read_resource_internal(&self, uri: &str) -> Result<String, ResourceError> {
// Ensure the resource exists in the active resources map
let active_resources = self.active_resources.lock().unwrap();
Expand Down Expand Up @@ -574,6 +697,17 @@ impl DeveloperRouter {
))),
}
}

// Add this helper function similar to the other tool calls
async fn call_list_windows(&self, args: Value) -> Result<Value, ToolError> {
let result = self.list_windows(args).await;
self.map_result_to_value(result)
}

async fn call_screen_capture(&self, args: Value) -> Result<Value, ToolError> {
let result = self.screen_capture(args).await;
self.map_result_to_value(result)
}
}

impl Router for DeveloperRouter {
Expand All @@ -600,6 +734,8 @@ impl Router for DeveloperRouter {
match tool_name.as_str() {
"bash" => this.call_bash(arguments).await,
"text_editor" => this.call_text_editor(arguments).await,
"list_windows" => this.call_list_windows(arguments).await,
"screen_capture" => this.call_screen_capture(arguments).await,
_ => Err(ToolError::NotFound(format!("Tool {} not found", tool_name))),
}
})
Expand Down