diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/.gitignore b/llama-index-integrations/tools/llama-index-tools-docker-code/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/BUILD b/llama-index-integrations/tools/llama-index-tools-docker-code/BUILD new file mode 100644 index 0000000000000..0896ca890d8bf --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/BUILD @@ -0,0 +1,3 @@ +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/CHANGELOG.md b/llama-index-integrations/tools/llama-index-tools-docker-code/CHANGELOG.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/Makefile b/llama-index-integrations/tools/llama-index-tools-docker-code/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/README.md b/llama-index-integrations/tools/llama-index-tools-docker-code/README.md new file mode 100644 index 0000000000000..dbe981df4a89e --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/README.md @@ -0,0 +1,3 @@ +# Docker Code Interpreter Tool Spec + +[TODO] diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/BUILD b/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/__init__.py b/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/__init__.py new file mode 100644 index 0000000000000..94494a4c4a28a --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/__init__.py @@ -0,0 +1,4 @@ +## init +from llama_index.tools.docker_code.base import DockerCodeToolSpec + +__all__ = ["DockerCodeToolSpec"] diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/base.py b/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/base.py new file mode 100644 index 0000000000000..b61fa759c982e --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/llama_index/tools/docker_code/base.py @@ -0,0 +1,330 @@ +"""Docker Code Interpreter tool spec for code execution.""" + +import docker +import tempfile +import os +import re +import shutil +import uuid +from pathlib import Path +from typing import Dict, Any, Optional, List, Union + +from llama_index.core.tools.tool_spec.base import BaseToolSpec + + +def _sanitize_input(query: str) -> str: + """Sanitize input and remove whitespace, backtick, and markdown. + + Args: + query: The query to sanitize + + Returns: + str: The sanitized query + """ + # Removes `, whitespace & python from start + query = re.sub(r"^(\s|`)*(?i:python)?\s*", "", query) + # Removes whitespace & ` from end + query = re.sub(r"(\s|`)*$", "", query) + # Add new line if no new line was appended at the end of the query + if not query.endswith("\n"): + query += "\n" + return query + + +class DockerCodeToolSpec(BaseToolSpec): + """Docker Code Interpreter tool spec. + + Leverages Docker to execute Python code and persist the workspace. + """ + + spec_functions = [ + "execute_code", + "execute_file", + "list_files", + "write_file", + "create_directory", + ] + + def __init__( + self, + base_image: str = "python:3.9-slim", + requirements: Optional[Union[List[str], str]] = None, + memory_limit: str = "100m", + max_processes: int = 10, + workspace_dir: Optional[str] = None, + dockerfile: Optional[str] = None, + build_args: Optional[Dict[str, str]] = None, + session_id: Optional[str] = None, + ): + """ + Initialize a persistent Docker session executor with custom dependencies. + + Args: + base_image: Base Docker image to use + requirements: List of pip packages or path to requirements.txt + memory_limit: Maximum memory usage + max_processes: Maximum number of processes allowed + workspace_dir: Optional persistent directory for workspace + dockerfile: Optional custom Dockerfile path + build_args: Optional build arguments for Dockerfile + session_id: Optional unique identifier for the session + """ + self.client = docker.DockerClient() + self.memory_limit = memory_limit + self.max_processes = max_processes + + # Set up workspace directory + if workspace_dir: + self.workspace_root = Path(workspace_dir) + else: + self.workspace_root = Path(tempfile.mkdtemp(prefix="docker_sessions_")) + self.workspace_root.mkdir(parents=True, exist_ok=True) + + # Build custom image with dependencies if needed + if requirements is None and dockerfile is None and build_args is None: + self.image_name = base_image + else: + self.image_name = self._build_custom_image( + base_image=base_image, + requirements=requirements, + dockerfile=dockerfile, + build_args=build_args, + ) + + self.workspace = None + self.container = None + self.session_id = self._create_session(session_id=session_id) + + def _build_custom_image( + self, + base_image: str, + requirements: Optional[Union[List[str], str]], + dockerfile: Optional[str], + build_args: Optional[Dict[str, str]], + ) -> str: + """Build custom Docker image with dependencies.""" + image_tag = f"custom_python_env_{uuid.uuid4().hex[:8]}" + + with tempfile.TemporaryDirectory() as build_dir: + build_path = Path(build_dir) + + if dockerfile: + # Use custom Dockerfile + shutil.copy(dockerfile, build_path / "Dockerfile") + else: + # Generate Dockerfile with requirements + dockerfile_content = [ + f"FROM {base_image}", + "WORKDIR /workspace", + # Install system dependencies + "RUN apt-get update && apt-get install -y --no-install-recommends \\\n", + " build-essential \\\n", + " git \\\n", + " && rm -rf /var/lib/apt/lists/*", + # Upgrade pip + "RUN pip install --no-cache-dir --upgrade pip", + ] + + # Handle requirements + if requirements: + if isinstance(requirements, str) and os.path.isfile(requirements): + # Copy requirements.txt + shutil.copy(requirements, build_path / "requirements.txt") + dockerfile_content.append( + "COPY requirements.txt /workspace/requirements.txt\n" + "RUN pip install --no-cache-dir -r requirements.txt" + ) + elif isinstance(requirements, (list, tuple)): + # Install from list + requirements_str = " ".join(requirements) + dockerfile_content.append( + f"RUN pip install --no-cache-dir {requirements_str}" + ) + + # Write Dockerfile + dockerfile_path = build_path / "Dockerfile" + dockerfile_path.write_text("\n".join(dockerfile_content)) + + # Build the image + try: + image, logs = self.client.images.build( + path=str(build_path), tag=image_tag, buildargs=build_args, rm=True + ) + + return image_tag + + except Exception as e: + raise RuntimeError(f"Failed to build custom image: {e!s}") + + def _create_session( + self, + session_id: Optional[str] = None, + ) -> str: + """ + Create a new persistent Docker session. + + Args: + session_id: Optional unique identifier for the session + + Returns: + session_id: Unique identifier for the session + """ + if session_id is None: + session_id = f"session_{uuid.uuid4().hex[:8]}" + + # Create session workspace + workspace = self.workspace_root / session_id + workspace.mkdir(parents=True, exist_ok=True) + + # Start container + self.container = self.client.containers.run( + self.image_name, + command="tail -f /dev/null", # Keep container running + name=f"session_{session_id}", + detach=True, + working_dir="/workspace", + volumes={str(workspace.absolute()): {"bind": "/workspace", "mode": "rw"}}, + mem_limit=self.memory_limit, + pids_limit=self.max_processes, + network_mode="none", + cap_drop=["ALL"], + security_opt=["no-new-privileges:true"], + ) + self.workspace = workspace + + return session_id + + def execute_code(self, code: str) -> Dict[str, Any]: + """ + Execute Python code in a specific session. + + Args: + code (str): Python code to execute + + Returns: + Dict containing execution results + """ + # Write code to a temporary file in the workspace + code = _sanitize_input(code) + code_file = str(uuid.uuid4())[:8] + ".py" + exec_cmd = f"python -c '{code}'" + + try: + exit_code, output = self.container.exec_run( + cmd=["sh", "-c", exec_cmd], + workdir="/workspace", + demux=True, # Split stdout/stderr + ) + + stdout = output[0].decode("utf-8") if output[0] else "" + stderr = output[1].decode("utf-8") if output[1] else "" + + return { + "success": exit_code == 0, + "stdout": stdout, + "stderr": stderr, + "exit_code": exit_code, + } + + except Exception as e: + return { + "success": False, + "stdout": "", + "stderr": f"Error during execution: {e!s}", + "exit_code": -1, + } + + def execute_file(self, filename: str) -> Dict[str, Any]: + """ + Execute a Python file in the session workspace. + + Args: + filename (str): Name of the file to execute + + Returns: + Dict containing execution results + """ + filepath = Path("/workspace") / filename + + try: + exit_code, output = self.container.exec_run( + cmd=["python", str(filepath)], workdir="/workspace", demux=True + ) + + stdout = output[0].decode("utf-8") if output[0] else "" + stderr = output[1].decode("utf-8") if output[1] else "" + + return { + "success": exit_code == 0, + "stdout": stdout, + "stderr": stderr, + "exit_code": exit_code, + } + + except Exception as e: + return { + "success": False, + "stdout": "", + "stderr": f"Error during execution: {e!s}", + "exit_code": -1, + } + + def list_files(self, path: str = ".") -> List[str]: + """ + List files in the session workspace. + + Args: + path (str): Relative path within workspace + + Returns: + List of files and directories + """ + exit_code, output = self.container.exec_run( + cmd=["ls", "-la", path], workdir="/workspace" + ) + + if exit_code == 0: + return output.decode("utf-8").splitlines() + else: + raise RuntimeError(f"Failed to list files: {output.decode('utf-8')}") + + def write_file(self, filename: str, content: str) -> bool: + """ + Write content to a file in the session workspace. + + Args: + session_id: Session identifier + filename (str): Name of the file to write + content (str): Content to write to the file + + Returns: + bool indicating success + """ + filepath = self.workspace / filename + + try: + filepath.write_text(content) + return True + except Exception as e: + return False + + def create_directory(self, dirname: str) -> bool: + """ + Create a directory in the session workspace. + + Args: + dirname (str): Name of the directory to create + + Returns: + bool indicating success + """ + exit_code, output = self.container.exec_run( + cmd=["mkdir", "-p", dirname], workdir="/workspace" + ) + + return exit_code == 0 + + def __del__(self) -> None: + """Clean up resources when the object is deleted.""" + self.container.stop() + self.client.close() diff --git a/llama-index-integrations/tools/llama-index-tools-docker-code/pyproject.toml b/llama-index-integrations/tools/llama-index-tools-docker-code/pyproject.toml new file mode 100644 index 0000000000000..3d880c908490d --- /dev/null +++ b/llama-index-integrations/tools/llama-index-tools-docker-code/pyproject.toml @@ -0,0 +1,64 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.llamahub] +contains_example = false +import_path = "llama_index.tools.docker_code" + +[tool.llamahub.class_authors] +DockerCodeToolSpec = "logan-markewich" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["Your Name "] +description = "llama-index tools docker code integration" +exclude = ["**/BUILD"] +license = "MIT" +maintainers = ["ajhofmann"] +name = "llama-index-tools-docker-code" +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +docker = "*" +llama-index-core = "^0.11.0" + +[tool.poetry.group.dev.dependencies] +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" +types-setuptools = "67.1.0.0" + +[tool.poetry.group.dev.dependencies.black] +extras = ["jupyter"] +version = "<=23.9.1,>=23.7.0" + +[tool.poetry.group.dev.dependencies.codespell] +extras = ["toml"] +version = ">=v2.2.6" + +[[tool.poetry.packages]] +include = "llama_index/"