Skip to content

Commit

Permalink
Add headless browser to the WebSurferAgent, closes microsoft#1481
Browse files Browse the repository at this point in the history
  • Loading branch information
vijaykramesh committed Feb 6, 2024
1 parent 26daa18 commit 266a9ce
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 51 deletions.
28 changes: 17 additions & 11 deletions autogen/agentchat/contrib/web_surfer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import json
import copy
import copy
import logging
import re
from dataclasses import dataclass
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, GroupChatManager, GroupChat, OpenAIWrapper
from autogen.browser_utils import SimpleTextBrowser
from autogen.code_utils import content_str
from datetime import datetime
from autogen.token_count_utils import count_token, get_max_token_limit
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple

from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, OpenAIWrapper
from autogen.browser_utils import SimpleTextBrowser, HeadlessChromeBrowser
from autogen.oai.openai_utils import filter_config
from autogen.token_count_utils import count_token, get_max_token_limit

logger = logging.getLogger(__name__)


class WebSurferAgent(ConversableAgent):
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages."""
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages.
Defaults to a simple text-based browser.
Can be configured to use a headless Chrome browser by providing a browser_config dictionary with the key "headless" set to True.
"""

DEFAULT_PROMPT = (
"You are a helpful AI assistant with access to a web browser (via the provided functions). In fact, YOU ARE THE ONLY MEMBER OF YOUR PARTY WITH ACCESS TO A WEB BROWSER, so please help out where you can by performing web searches, navigating pages, and reporting what you find. Today's date is "
Expand Down Expand Up @@ -84,7 +86,11 @@ def __init__(
if browser_config is None:
self.browser = SimpleTextBrowser()
else:
self.browser = SimpleTextBrowser(**browser_config)
headless = browser_config.pop("headless")
if headless:
self.browser = HeadlessChromeBrowser(**browser_config)
else:
self.browser = SimpleTextBrowser(**browser_config)

# Create a copy of the llm_config for the inner monologue agents to use, and set them up with function calling
if llm_config is None: # Nothing to copy
Expand Down Expand Up @@ -214,7 +220,7 @@ def _browser_state():
current_page = self.browser.viewport_current_page
total_pages = len(self.browser.viewport_pages)

header += f"Viewport position: Showing page {current_page+1} of {total_pages}.\n"
header += f"Viewport position: Showing page {current_page + 1} of {total_pages}.\n"
return (header, self.browser.viewport)

def _informational_search(query):
Expand All @@ -225,7 +231,7 @@ def _informational_search(query):
def _navigational_search(query):
self.browser.visit_page(f"bing: {query}")

# Extract the first linl
# Extract the first link
m = re.search(r"\[.*?\]\((http.*?)\)", self.browser.page_content)
if m:
self.browser.visit_page(m.group(1))
Expand Down
7 changes: 7 additions & 0 deletions autogen/browser_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .simple_text_browser import SimpleTextBrowser
from .headless_chrome_browser import HeadlessChromeBrowser

__all__ = (
"SimpleTextBrowser",
"HeadlessChromeBrowser",
)
48 changes: 48 additions & 0 deletions autogen/browser_utils/abstract_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from abc import ABC, abstractmethod
from typing import Optional, Union, Dict


class AbstractBrowser(ABC):
"""An abstract class for a web browser."""

@abstractmethod
def __init__(
self,
start_page: Optional[str] = "about:blank",
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict, None]] = None,
):
pass

@property
@abstractmethod
def address(self) -> str:
pass

@abstractmethod
def set_address(self, uri_or_path):
pass

@property
@abstractmethod
def viewport(self) -> str:
pass

@property
@abstractmethod
def page_content(self) -> str:
pass

@abstractmethod
def page_down(self):
pass

@abstractmethod
def page_up(self):
pass

@abstractmethod
def visit_page(self, path_or_uri):
pass
110 changes: 110 additions & 0 deletions autogen/browser_utils/headless_chrome_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import re

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

from autogen.browser_utils.abstract_browser import AbstractBrowser

# Optional PDF support
IS_PDF_CAPABLE = False
try:
import pdfminer
import pdfminer.high_level

IS_PDF_CAPABLE = True
except ModuleNotFoundError:
pass

# Other optional dependencies
try:
import pathvalidate
except ModuleNotFoundError:
pass

from typing import Optional, Union, Dict


class HeadlessChromeBrowser(AbstractBrowser):
"""(In preview) A Selenium powered headless Chrome browser. Suitable for Agentic use."""

def __init__(
self,
start_page: Optional[str] = "about:blank",
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict, None]] = None,
):
self.start_page = start_page
self.driver = None
self.viewport_size = viewport_size # Applies only to the standard uri types
self.downloads_folder = downloads_folder
self.history = list()
self.page_title = None
self.viewport_current_page = 0
self.viewport_pages = list()
self.bing_api_key = bing_api_key
self.request_kwargs = request_kwargs

self._start_browser()

def _start_browser(self):
chrome_options = Options()
chrome_options.add_argument("--headless")
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.get(self.start_page)

@property
def address(self) -> str:
return self.driver.current_url

def set_address(self, uri_or_path):
if uri_or_path.startswith("bing:"):
self._bing_search(uri_or_path[len("bing:"):].strip())
else:
self.driver.get(uri_or_path)

@property
def viewport(self) -> str:
# returns the content of the current viewport
return self.page_content

@property
def page_content(self) -> str:
html = self.driver.execute_script("return document.body.innerHTML;")
return self._process_html(html)

def _process_html(self, html: str) -> str:
"""Process the raw HTML content and return the processed text."""
soup = BeautifulSoup(html, "html.parser")

# Remove javascript and style blocks
for script in soup(["script", "style"]):
script.extract()

# Convert to text
text = soup.get_text()

# Remove excessive blank lines
text = re.sub(r"\n{2,}", "\n\n", text).strip()

return text

def _bing_search(self, query):
self.driver.get("https://www.bing.com")

search_bar = self.driver.find_element(By.NAME, "q")
search_bar.clear()
search_bar.send_keys(query)
search_bar.submit()

def page_down(self):
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

def page_up(self):
self.driver.execute_script("window.scrollTo(0, 0);")

def visit_page(self, path_or_uri):
self.set_address(path_or_uri)
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import json
import io
import mimetypes
import os
import requests
import re
import markdownify
import io
import uuid
import mimetypes
from urllib.parse import urljoin, urlparse

import markdownify
import requests
from bs4 import BeautifulSoup
from dataclasses import dataclass
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple

from autogen.browser_utils.abstract_browser import AbstractBrowser

# Optional PDF support
IS_PDF_CAPABLE = False
Expand All @@ -27,17 +27,19 @@
except ModuleNotFoundError:
pass

from typing import Optional, Union, Dict


class SimpleTextBrowser:
class SimpleTextBrowser(AbstractBrowser):
"""(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""

def __init__(
self,
start_page: Optional[str] = "about:blank",
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict, None]] = None,
self,
start_page: Optional[str] = "about:blank",
viewport_size: Optional[int] = 1024 * 8,
downloads_folder: Optional[Union[str, None]] = None,
bing_api_key: Optional[Union[str, None]] = None,
request_kwargs: Optional[Union[Dict, None]] = None,
):
self.start_page = start_page
self.viewport_size = viewport_size # Applies only to the standard uri types
Expand All @@ -64,7 +66,7 @@ def set_address(self, uri_or_path):
if uri_or_path == "about:blank":
self._set_page_content("")
elif uri_or_path.startswith("bing:"):
self._bing_search(uri_or_path[len("bing:") :].strip())
self._bing_search(uri_or_path[len("bing:"):].strip())
else:
if not uri_or_path.startswith("http:") and not uri_or_path.startswith("https:"):
uri_or_path = urljoin(self.address, uri_or_path)
Expand All @@ -77,7 +79,7 @@ def set_address(self, uri_or_path):
def viewport(self) -> str:
"""Return the content of the current viewport."""
bounds = self.viewport_pages[self.viewport_current_page]
return self.page_content[bounds[0] : bounds[1]]
return self.page_content[bounds[0]: bounds[1]]

@property
def page_content(self) -> str:
Expand Down Expand Up @@ -175,8 +177,8 @@ def _bing_search(self, query):
self.page_title = f"{query} - Search"

content = (
f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets)} results:\n\n## Web Results\n"
+ "\n\n".join(web_snippets)
f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets)} results:\n\n## Web Results\n"
+ "\n\n".join(web_snippets)
)
if len(news_snippets) > 0:
content += "\n\n## News Results:\n" + "\n\n".join(news_snippets)
Expand Down Expand Up @@ -223,7 +225,7 @@ def _fetch_page(self, url):
if title_elm and len(title_elm) > 0:
main_title = title_elm.string
webpage_text = (
"# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
"# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
)
else:
webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"teachable": ["chromadb"],
"lmm": ["replicate", "pillow"],
"graphs": ["networkx~=3.2.1", "matplotlib~=3.8.1"],
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate"],
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate", "selenium"],
"redis": ["redis"],
},
classifiers=[
Expand Down
Loading

0 comments on commit 266a9ce

Please sign in to comment.