From 5e9c3b0ac85e57f89bdb5635e7653f5562c29f26 Mon Sep 17 00:00:00 2001 From: Micky Brunetti Date: Thu, 20 Apr 2023 21:18:33 +0200 Subject: [PATCH 1/3] Fix HTTP code for file request (static, assets, file) (#3895) * Fix HTTP code for file request (static, assets, file) * Fix bugs related to utils.abspath for symlink and unresolvale path * Requesting a directory from the file route now returns 403 --- CHANGELOG.md | 2 ++ gradio/routes.py | 51 ++++++++++++++++++++++++--------------------- gradio/utils.py | 14 +++++++++++-- test/test_routes.py | 24 +++++++++++++++++---- test/test_utils.py | 27 ++++++++++++++++++------ 5 files changed, 82 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc2600109b62d..d97a87f3ec145 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ No changes to highlight. - Fixed bug where all bokeh plots appeared in the same div by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 3896](https://github.com/gradio-app/gradio/pull/3896) - Fixed image outputs to automatically take full output image height, unless explicitly set, by [@aliabid94](https://github.com/aliabid94) in [PR 3905](https://github.com/gradio-app/gradio/pull/3905) - Fix issue in `gr.Gallery()` where setting height causes aspect ratio of images to collapse by [@dawoodkhan82](https://github.com/dawoodkhan82) in [PR 3830](https://github.com/gradio-app/gradio/pull/3830) +- Fix issue where requesting for a non-existing file would trigger a 500 error by [@micky2be](https://github.com/micky2be) in `[PR 3895](https://github.com/gradio-app/gradio/pull/3895)`. +- Fix bugs with abspath about symlinks, and unresolvable path on Windows by [@micky2be](https://github.com/micky2be) in `[PR 3895](https://github.com/gradio-app/gradio/pull/3895)`. ## Documentation Changes: diff --git a/gradio/routes.py b/gradio/routes.py index 4c63414fe71cc..f8d18d40693cf 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -8,7 +8,6 @@ import json import mimetypes import os -import posixpath import secrets import tempfile import traceback @@ -265,16 +264,12 @@ def get_config(): @app.get("/static/{path:path}") def static_resource(path: str): static_file = safe_join(STATIC_PATH_LIB, path) - if static_file is not None: - return FileResponse(static_file) - raise HTTPException(status_code=404, detail="Static file not found") + return FileResponse(static_file) @app.get("/assets/{path:path}") def build_resource(path: str): build_file = safe_join(BUILD_PATH_LIB, path) - if build_file is not None: - return FileResponse(build_file) - raise HTTPException(status_code=404, detail="Build file not found") + return FileResponse(build_file) @app.get("/favicon.ico") async def favicon(): @@ -309,21 +304,23 @@ async def file(path_or_url: str, request: fastapi.Request): return RedirectResponse( url=path_or_url, status_code=status.HTTP_302_FOUND ) - abs_path = str(utils.abspath(path_or_url)) - in_app_dir = utils.abspath(app.cwd) in utils.abspath(path_or_url).parents - created_by_app = abs_path in set().union(*blocks.temp_file_sets) + abs_path = utils.abspath(path_or_url) + in_app_dir = utils.abspath(app.cwd) in abs_path.parents + created_by_app = str(abs_path) in set().union(*blocks.temp_file_sets) in_file_dir = any( ( - utils.abspath(dir) in utils.abspath(path_or_url).parents + utils.abspath(dir) in abs_path.parents for dir in blocks.file_directories ) ) - was_uploaded = ( - utils.abspath(app.uploaded_file_dir) - in utils.abspath(path_or_url).parents - ) + was_uploaded = utils.abspath(app.uploaded_file_dir) in abs_path.parents if in_app_dir or created_by_app or in_file_dir or was_uploaded: + if not abs_path.exists(): + raise HTTPException(404, "File not found") + if abs_path.is_dir(): + raise HTTPException(403) + range_val = request.headers.get("Range", "").strip() if range_val.startswith("bytes=") and "-" in range_val: range_val = range_val[6:] @@ -341,8 +338,9 @@ async def file(path_or_url: str, request: fastapi.Request): return FileResponse(abs_path, headers={"Accept-Ranges": "bytes"}) else: - raise ValueError( - f"File cannot be fetched: {path_or_url}. All files must contained within the Gradio python app working directory, or be a temp file created by the Gradio python app." + raise HTTPException( + 403, + f"File cannot be fetched: {path_or_url}. All files must contained within the Gradio python app working directory, or be a temp file created by the Gradio python app.", ) @app.get("/file/{path:path}", dependencies=[Depends(login_check)]) @@ -592,26 +590,31 @@ def robots_txt(): ######## -def safe_join(directory: str, path: str) -> str | None: +def safe_join(directory: str, path: str) -> str: """Safely path to a base directory to avoid escaping the base directory. Borrowed from: werkzeug.security.safe_join""" _os_alt_seps: List[str] = list( sep for sep in [os.path.sep, os.path.altsep] if sep is not None and sep != "/" ) - if path != "": - filename = posixpath.normpath(path) - else: - return directory + if path == "": + raise HTTPException(400) + filename = os.path.normpath(path) + fullpath = os.path.join(directory, filename) if ( any(sep in filename for sep in _os_alt_seps) or os.path.isabs(filename) or filename == ".." or filename.startswith("../") + or os.path.isdir(fullpath) ): - return None - return posixpath.join(directory, filename) + raise HTTPException(403) + + if not os.path.exists(fullpath): + raise HTTPException(404, "File not found") + + return fullpath def get_types(cls_set: List[Type]): diff --git a/gradio/utils.py b/gradio/utils.py index c795f11d82450..745554d6cc572 100644 --- a/gradio/utils.py +++ b/gradio/utils.py @@ -935,10 +935,20 @@ def tex2svg(formula, *args): def abspath(path: str | Path) -> Path: """Returns absolute path of a str or Path path, but does not resolve symlinks.""" - if Path(path).is_symlink(): + path = Path(path) + + if path.is_absolute(): + return path + + # recursively check if there is a symlink within the path + is_symlink = path.is_symlink() or any( + parent.is_symlink() for parent in path.parents + ) + + if is_symlink or path == path.resolve(): # in case path couldn't be resolved return Path.cwd() / path else: - return Path(path).resolve() + return path.resolve() def get_serializer_name(block: Block) -> str | None: diff --git a/test/test_routes.py b/test/test_routes.py index 24a68b3629dbd..51e7efe02ad4d 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -46,9 +46,9 @@ def test_get_main_route(self, test_client): def test_static_files_served_safely(self, test_client): # Make sure things outside the static folder are not accessible response = test_client.get(r"/static/..%2findex.html") - assert response.status_code == 404 + assert response.status_code == 403 response = test_client.get(r"/static/..%2f..%2fapi_docs.html") - assert response.status_code == 404 + assert response.status_code == 403 def test_get_config_route(self, test_client): response = test_client.get("/config/") @@ -202,8 +202,8 @@ def test_get_file_allowed_by_file_directories(self): ) client = TestClient(app) - with pytest.raises(ValueError): - file_response = client.get(f"/file={allowed_file.name}") + file_response = client.get(f"/file={allowed_file.name}") + assert file_response.status_code == 403 app, _, _ = gr.Interface(lambda s: s.name, gr.File(), gr.File()).launch( prevent_thread_lock=True, @@ -271,6 +271,22 @@ def test_mount_gradio_app(self): assert client.get("/ps").is_success assert client.get("/py").is_success + def test_static_file_missing(self, test_client): + response = test_client.get(r"/static/not-here.js") + assert response.status_code == 404 + + def test_asset_file_missing(self, test_client): + response = test_client.get(r"/assets/not-here.js") + assert response.status_code == 404 + + def test_dynamic_file_missing(self, test_client): + response = test_client.get(r"/file=not-here.js") + assert response.status_code == 404 + + def test_dynamic_file_directory(self, test_client): + response = test_client.get(r"/file=gradio") + assert response.status_code == 403 + def test_mount_gradio_app_raises_error_if_event_queued_but_queue_disabled(self): with gr.Blocks() as demo: with gr.Row(): diff --git a/test/test_utils.py b/test/test_utils.py index 90c89cd884a98..afe3114b63849 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -584,12 +584,27 @@ def test_abspath_no_symlink(self): resolved_path = str(abspath("../gradio/gradio/test_data/lion.jpg")) assert ".." not in resolved_path - @mock.patch( - "pathlib.Path.is_symlink", return_value=True - ) # Have to patch since Windows doesn't allow creation of sym links without administrative privileges - def test_abspath_symlink(self, mock_islink): - resolved_path = str(abspath("../gradio/gradio/test_data/lion.jpg")) - assert ".." in resolved_path + @pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Windows doesn't allow creation of sym links without administrative privileges", + ) + def test_abspath_symlink_path(self): + os.symlink("gradio/test_data", "gradio/test_link", True) + resolved_path = str(abspath("../gradio/gradio/test_link/lion.jpg")) + os.unlink("gradio/test_link") + assert "test_link" in resolved_path + + @pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Windows doesn't allow creation of sym links without administrative privileges", + ) + def test_abspath_symlink_dir(self): + os.symlink("gradio/test_data", "gradio/test_link", True) + full_path = os.path.join(os.getcwd(), "gradio/test_link/lion.jpg") + resolved_path = str(abspath(full_path)) + os.unlink("gradio/test_link") + assert "test_link" in resolved_path + assert full_path == resolved_path class TestGetTypeHints: From 3b114cbc2bad2de2e0fd8a15f74f08ed4d66af24 Mon Sep 17 00:00:00 2001 From: Tenzin Date: Fri, 21 Apr 2023 20:30:57 +0530 Subject: [PATCH 2/3] Fixes typos (#3931) * Fixes typos * Update CHANGELOG.md --------- Co-authored-by: Freddy Boulton --- CHANGELOG.md | 1 + client/python/gradio_client/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d97a87f3ec145..945b741346cf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ No changes to highlight. - Fix issue in `gr.Gallery()` where setting height causes aspect ratio of images to collapse by [@dawoodkhan82](https://github.com/dawoodkhan82) in [PR 3830](https://github.com/gradio-app/gradio/pull/3830) - Fix issue where requesting for a non-existing file would trigger a 500 error by [@micky2be](https://github.com/micky2be) in `[PR 3895](https://github.com/gradio-app/gradio/pull/3895)`. - Fix bugs with abspath about symlinks, and unresolvable path on Windows by [@micky2be](https://github.com/micky2be) in `[PR 3895](https://github.com/gradio-app/gradio/pull/3895)`. +- Fixes type in client `Status` enum by [@10zinten](https://github.com/10zinten) in [PR 3931](https://github.com/gradio-app/gradio/pull/3931) ## Documentation Changes: diff --git a/client/python/gradio_client/utils.py b/client/python/gradio_client/utils.py index d663e1e01f99f..d2b02c0316245 100644 --- a/client/python/gradio_client/utils.py +++ b/client/python/gradio_client/utils.py @@ -77,7 +77,7 @@ class Status(Enum): QUEUE_FULL = "QUEUE_FULL" IN_QUEUE = "IN_QUEUE" SENDING_DATA = "SENDING_DATA" - PROCESSING = "PROCESSSING" + PROCESSING = "PROCESSING" ITERATING = "ITERATING" FINISHED = "FINISHED" CANCELLED = "CANCELLED" From ad2ed23ce0e9f34a78d40773859d5f8094f29e83 Mon Sep 17 00:00:00 2001 From: Freddy Boulton Date: Fri, 21 Apr 2023 17:05:58 -0400 Subject: [PATCH 3/3] Gradio and LLM agents guide (#3934) * Add WIP guide * Fix syntax * Fix guide * Fix guide --- .../gradio-and-llm-agents.md | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 guides/07_other-tutorials/gradio-and-llm-agents.md diff --git a/guides/07_other-tutorials/gradio-and-llm-agents.md b/guides/07_other-tutorials/gradio-and-llm-agents.md new file mode 100644 index 0000000000000..234d612fe5c76 --- /dev/null +++ b/guides/07_other-tutorials/gradio-and-llm-agents.md @@ -0,0 +1,136 @@ +# Gradio & LLM Agents 🤝 + +Large Language Models (LLMs) are very impressive but they can be made even more powerful if we could give them skills to accomplish specialized tasks. + +The [gradio_tools](https://github.com/freddyaboulton/gradio-tools) library can turn any [Gradio](https://github.com/gradio-app/gradio) application into a [tool](https://python.langchain.com/en/latest/modules/agents/tools.html) that an [agent](https://docs.langchain.com/docs/components/agents/agent) can use to complete its task. For example, an LLM could use a Gradio tool to transcribe a voice recording it finds online and then summarize it for you. Or it could use a different Gradio tool to apply OCR to a document on your Google Drive and then answer questions about it. + +This guide will show how you can use `gradio_tools` to grant your LLM Agent access to the cutting edge Gradio applications hosted in the world. Although `gradio_tools` are compatible with more than one agent framework, we will focus on [Langchain Agents](https://docs.langchain.com/docs/components/agents/) in this guide. + +## Some background + +### What are agents? + +A [LangChain agent](https://docs.langchain.com/docs/components/agents/agent) is a Large Language Model (LLM) that takes user input and reports an output based on using one of many tools at its disposal. + +### What is Gradio? +[Gradio](https://github.com/gradio-app/gradio) is the defacto standard framework for building Machine Learning Web Applications and sharing them with the world - all with just python! 🐍 + +## gradio_tools - An end-to-end example + +To get started with `gradio_tools`, all you need to do is import and initialize your tools and pass them to the langchain agent! + +In the following example, we import the `StableDiffusionPromptGeneratorTool` to create a good prompt for stable diffusion, the +`StableDiffusionTool` to create an image with our improved prompt, the `ImageCaptioningTool` to caption the generated image, and +the `TextToVideoTool` to create a video from a prompt. + +We then tell our agent to create an image of a dog riding a skateboard, but to please improve our prompt ahead of time. We also ask +it to caption the generated image and create a video for it. The agent can decide which tool to use without us explicitly telling it. + +```python +import os + +if not os.getenv("OPENAI_API_KEY"): + raise ValueError("OPENAI_API_KEY must be set") + +from langchain.agents import initialize_agent +from langchain.llms import OpenAI +from gradio_tools import (StableDiffusionTool, ImageCaptioningTool, StableDiffusionPromptGeneratorTool, + TextToVideoTool) + +from langchain.memory import ConversationBufferMemory + +llm = OpenAI(temperature=0) +memory = ConversationBufferMemory(memory_key="chat_history") +tools = [StableDiffusionTool().langchain, ImageCaptioningTool().langchain, + StableDiffusionPromptGeneratorTool().langchain, TextToVideoTool().langchain] + + +agent = initialize_agent(tools, llm, memory=memory, agent="conversational-react-description", verbose=True) +output = agent.run(input=("Please create a photo of a dog riding a skateboard " + "but improve my prompt prior to using an image generator." + "Please caption the generated image and create a video for it using the improved prompt.")) +``` + +You'll note that we are using some pre-built tools that come with `gradio_tools`. Please see this [doc](https://github.com/freddyaboulton/gradio-tools#gradio-tools-gradio--llm-agents) for a complete list of the tools that come with `gradio_tools`. +If you would like to use a tool that's not currently in `gradio_tools`, it is very easy to add your own. That's what the next section will cover. + +## gradio_tools - creating your own tool + +The core abstraction is the `GradioTool`, which lets you define a new tool for your LLM as long as you implement a standard interface: + +```python +class GradioTool(BaseTool): + + def __init__(self, name: str, description: str, src: str) -> None: + + @abstractmethod + def create_job(self, query: str) -> Job: + pass + + @abstractmethod + def postprocess(self, output: Tuple[Any] | Any) -> str: + pass +``` +The requirements are: +1. The name for your tool +2. The description for your tool. This is crucial! Agents decide which tool to use based on their description. Be precise and be sure to inclue example of what the input and the output of the tool should look like. +3. The url or space id, e.g. `freddyaboulton/calculator`, of the Gradio application. Based on this value, `gradio_tool` will create a [gradio client](https://github.com/gradio-app/gradio/blob/main/client/python/README.md) instance to query the upstream application via API. Be sure to click the link and learn more about the gradio client library if you are not familiar with it. +4. create_job - Given a string, this method should parse that string and return a job from the client. Most times, this is as simple as passing the string to the `submit` function of the client. More info on creating jobs [here](https://github.com/gradio-app/gradio/blob/main/client/python/README.md#making-a-prediction) +5. postprocess - Given the result of the job, convert it to a string the LLM can display to the user. +6. *Optional* - Some libraries, e.g. [MiniChain](https://github.com/srush/MiniChain/tree/main), may need some info about the underlying gradio input and output types used by the tool. By default, this will return gr.Textbox() but +if you'd like to provide more accurate info, implement the `_block_input(self, gr)` and `_block_output(self, gr)` methods of the tool. The `gr` variable is the gradio module (the result of `import gradio as gr`). It will be +automatically imported by the `GradiTool` parent class and passed to the `_block_input` and `_block_output` methods. + +And that's it! + +Once you have created your tool, open a pull request to the `gradio_tools` repo! We welcome all contributions. + +## Example tool - Stable Diffusion + +Here is the code for the StableDiffusion tool as an example: + +```python +from gradio_tool import GradioTool +import os + +class StableDiffusionTool(GradioTool): + """Tool for calling stable diffusion from llm""" + + def __init__( + self, + name="StableDiffusion", + description=( + "An image generator. Use this to generate images based on " + "text input. Input should be a description of what the image should " + "look like. The output will be a path to an image file." + ), + src="gradio-client-demos/stable-diffusion", + hf_token=None, + ) -> None: + super().__init__(name, description, src, hf_token) + + def create_job(self, query: str) -> Job: + return self.client.submit(query, "", 9, fn_index=1) + + def postprocess(self, output: str) -> str: + return [os.path.join(output, i) for i in os.listdir(output) if not i.endswith("json")][0] + + def _block_input(self, gr) -> "gr.components.Component": + return gr.Textbox() + + def _block_output(self, gr) -> "gr.components.Component": + return gr.Image() +``` + +Some notes on this implementation: +1. All instances of `GradioTool` have an attribute called `client` that is a pointed to the underlying [gradio client](https://github.com/gradio-app/gradio/tree/main/client/python#gradio_client-use-a-gradio-app-as-an-api----in-3-lines-of-python). That is what you should use +in the `create_job` method. +2. `create_job` just passes the query string to the `submit` function of the client with some other parameters hardcoded, i.e. the negative prompt sting and the guidance scale. We could modify our tool to also accept these values from the input string in a subsequent version. +3. The `postprocess` method simply returns the first image from the gallery of images created by the stable diffusion space. We use the `os` module to get the full path of the image. + +## Conclusion + +You now know how to extend the abilities of your LLM with the 1000s of gradio spaces running in the wild! +Again, we welcome any contributions to the [gradio_tools](https://github.com/freddyaboulton/gradio-tools) library. +We're excited to see the tools you all build! +