Skip to content

Commit

Permalink
Add ability to blocklist filepaths, ability to specify where gradio t…
Browse files Browse the repository at this point in the history
…emp files are created (#4047)

* temporary file

* tests

* formatting

* rename

* added another test

* guide

* formatting

* changelog

* added custom gradio temp directory (#4053)

* added custom gradio temp directory

* Update 03_sharing-your-app.md

* rename test

* address review

* remove print
  • Loading branch information
abidlabs authored May 3, 2023
1 parent 35821fe commit ff21ecb
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 20 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
## New Features:

- Add support for `visual-question-answering`, `document-question-answering`, and `image-to-text` using `gr.Interface.load("models/...")` and `gr.Interface.from_pipeline` by [@osanseviero](https://github.com/osanseviero) in [PR 3887](https://github.com/gradio-app/gradio/pull/3887)
- Adds the ability to blocklist filepaths (and also improves the allowlist mechanism) by [@abidlabs](https://github.com/abidlabs) in [PR 4047](https://github.com/gradio-app/gradio/pull/4047).
- Adds the ability to specify the upload directory via an environment variable by [@abidlabs](https://github.com/abidlabs) in [PR 4047](https://github.com/gradio-app/gradio/pull/4047).

## Bug Fixes:

Expand Down
26 changes: 21 additions & 5 deletions gradio/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,8 @@ def __init__(
self.progress_tracking = None
self.ssl_verify = True

self.file_directories = []
self.allowed_paths = []
self.blocked_paths = []

if self.analytics_enabled:
is_custom_theme = not any(
Expand Down Expand Up @@ -1588,6 +1589,8 @@ def launch(
quiet: bool = False,
show_api: bool = True,
file_directories: List[str] | None = None,
allowed_paths: List[str] | None = None,
blocked_paths: List[str] | None = None,
_frontend: bool = True,
) -> Tuple[FastAPI, str, str]:
"""
Expand Down Expand Up @@ -1618,7 +1621,9 @@ def launch(
ssl_verify: If False, skips certificate validation which allows self-signed certificates to be used.
quiet: If True, suppresses most print statements.
show_api: If True, shows the api docs in the footer of the app. Default True. If the queue is enabled, then api_open parameter of .queue() will determine if the api docs are shown, independent of the value of show_api.
file_directories: List of directories that gradio is allowed to serve files from (in addition to the directory containing the gradio python file). Must be absolute paths. Warning: any files in these directories or its children are potentially accessible to all users of your app.
file_directories: This parameter has been renamed to `allowed_paths`. It will be removed in a future version.
allowed_paths: List of complete filepaths or parent directories that gradio is allowed to serve (in addition to the directory containing the gradio python file). Must be absolute paths. Warning: if you provide directories, any files in these directories or their subdirectories are accessible to all users of your app.
blocked_paths: List of complete filepaths or parent directories that gradio is not allowed to serve (i.e. users of your app are not allowed to access). Must be absolute paths. Warning: takes precedence over `allowed_paths` and all other directories exposed by Gradio by default.
Returns:
app: FastAPI app object that is running the demo
local_url: Locally accessible link to the demo
Expand Down Expand Up @@ -1679,9 +1684,20 @@ def reverse(text):
self.queue()
self.show_api = self.api_open if self.enable_queue else show_api

self.file_directories = file_directories if file_directories is not None else []
if not isinstance(self.file_directories, list):
raise ValueError("file_directories must be a list of directories.")
if file_directories is not None:
warnings.warn(
"The `file_directories` parameter has been renamed to `allowed_paths`. Please use that instead.",
DeprecationWarning,
)
if allowed_paths is None:
allowed_paths = file_directories
self.allowed_paths = allowed_paths or []
self.blocked_paths = blocked_paths or []

if not isinstance(self.allowed_paths, list):
raise ValueError("`allowed_paths` must be a list of directories.")
if not isinstance(self.blocked_paths, list):
raise ValueError("`blocked_paths` must be a list of directories.")

self.validate_queue_settings()

Expand Down
5 changes: 4 additions & 1 deletion gradio/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import json
import math
import operator
import os
import random
import secrets
import shutil
Expand Down Expand Up @@ -191,7 +192,9 @@ def __init__(
**kwargs,
):
self.temp_files: Set[str] = set()
self.DEFAULT_TEMP_DIR = tempfile.gettempdir()
self.DEFAULT_TEMP_DIR = os.environ.get("GRADIO_TEMP_DIR") or str(
Path(tempfile.gettempdir()) / "gradio"
)

Component.__init__(
self, elem_id=elem_id, elem_classes=elem_classes, visible=visible, **kwargs
Expand Down
18 changes: 15 additions & 3 deletions gradio/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from asyncio import TimeoutError as AsyncTimeOutError
from collections import defaultdict
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, List, Optional, Type
from urllib.parse import urlparse

Expand Down Expand Up @@ -111,7 +112,9 @@ def __init__(self, **kwargs):
self.lock = asyncio.Lock()
self.queue_token = secrets.token_urlsafe(32)
self.startup_events_triggered = False
self.uploaded_file_dir = str(utils.abspath(tempfile.mkdtemp()))
self.uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
Path(tempfile.gettempdir()) / "gradio"
)
super().__init__(**kwargs, docs_url=None, redoc_url=None)

def configure_app(self, blocks: gradio.Blocks) -> None:
Expand Down Expand Up @@ -305,12 +308,21 @@ async def file(path_or_url: str, request: fastapi.Request):
url=path_or_url, status_code=status.HTTP_302_FOUND
)
abs_path = utils.abspath(path_or_url)
in_blocklist = any(
(
utils.is_in_or_equal(abs_path, blocked_path)
for blocked_path in blocks.blocked_paths
)
)
if in_blocklist:
raise HTTPException(403, f"File not allowed: {path_or_url}.")

in_app_dir = utils.abspath(app.cwd) in abs_path.parents
created_by_app = str(abs_path) in set().union(*blocks.temp_file_sets)
in_file_dir = any(
(
utils.abspath(dir) in abs_path.parents
for dir in blocks.file_directories
utils.is_in_or_equal(abs_path, allowed_path)
for allowed_path in blocks.allowed_paths
)
)
was_uploaded = utils.abspath(app.uploaded_file_dir) in abs_path.parents
Expand Down
13 changes: 13 additions & 0 deletions gradio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,19 @@ def abspath(path: str | Path) -> Path:
return path.resolve()


def is_in_or_equal(path_1: str | Path, path_2: str | Path):
"""
True if path_1 is a descendant (i.e. located within) path_2 or if the paths are the
same, returns False otherwise.
Parameters:
path_1: str or Path (can be a file or directory)
path_2: str or Path (can be a file or directory)
"""
return (abspath(path_2) in abspath(path_1).parents) or abspath(path_1) == abspath(
path_2
)


def get_serializer_name(block: Block) -> str | None:
if not hasattr(block, "serialize"):
return None
Expand Down
6 changes: 3 additions & 3 deletions guides/01_getting-started/03_sharing-your-app.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ In particular, Gradio apps grant users access to three kinds of files:

* Files in the same folder (or a subdirectory) of where the Gradio script is launched from. For example, if the path to your gradio scripts is `/home/usr/scripts/project/app.py` and you launch it from `/home/usr/scripts/project/`, then users of your shared Gradio app will be able to access any files inside `/home/usr/scripts/project/`. This is needed so that you can easily reference these files in your Gradio app.

* Temporary files created by Gradio. These are files that are created by Gradio as part of running your prediction function. For example, if your prediction function returns a video file, then Gradio will save that video to a temporary file and then send the path to the temporary file to the front end.
* Temporary files created by Gradio. These are files that are created by Gradio as part of running your prediction function. For example, if your prediction function returns a video file, then Gradio will save that video to a temporary file and then send the path to the temporary file to the front end. You can customize the location of temporary files created by Gradio by setting the environment variable GRADIO_TEMP_DIR to an absolute path, such as `/home/usr/scripts/project/temp/`.

* Files that you explicitly allow via the `file_directories` parameter in `launch()`. In some cases, you may want to reference other files in your file system. The `file_directories` parameter allows you to pass in a list of additional directories you'd like to provide access to. (By default, there are no additional file directories).
* Files that you explicitly allow via the `allowed_paths` parameter in `launch()`. This parameter allows you to pass in a list of additional directories or exact filepaths you'd like to allow users to have access to. (By default, this parameter is an empty list).

Users should NOT be able to access other arbitrary paths on the host.
Users should NOT be able to access other arbitrary paths on the host. Furthermore, as a security measure, you can also **block** specific files or directories from being able to be accessed by users. To do this, pass in a list of additional directories or exact filepaths to the `blocked_paths` parameter in `launch()`. This parameter takes precedence over the files that Gradio exposes by default or by the `allowed_paths`.
103 changes: 95 additions & 8 deletions test/test_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys
import tempfile
from pathlib import Path
from unittest.mock import patch

import numpy as np
Expand Down Expand Up @@ -54,7 +55,7 @@ def test_get_config_route(self, test_client):
response = test_client.get("/config/")
assert response.status_code == 200

def test_upload_route(self, test_client):
def test_upload_path(self, test_client):
response = test_client.post(
"/upload", files={"files": open("test/test_files/alphabet.txt", "r")}
)
Expand All @@ -65,6 +66,25 @@ def test_upload_route(self, test_client):
with open(file) as saved_file:
assert saved_file.read() == "abcdefghijklmnopqrstuvwxyz"

def test_custom_upload_path(self):
os.environ["GRADIO_TEMP_DIR"] = str(Path(tempfile.gettempdir()) / "gradio-test")
io = Interface(lambda x: x + x, "text", "text")
app, _, _ = io.launch(prevent_thread_lock=True)
test_client = TestClient(app)
try:
response = test_client.post(
"/upload", files={"files": open("test/test_files/alphabet.txt", "r")}
)
assert response.status_code == 200
file = response.json()[0]
assert "alphabet" in file
assert file.startswith(str(Path(tempfile.gettempdir()) / "gradio-test"))
assert file.endswith(".txt")
with open(file) as saved_file:
assert saved_file.read() == "abcdefghijklmnopqrstuvwxyz"
finally:
os.environ["GRADIO_TEMP_DIR"] = ""

def test_predict_route(self, test_client):
response = test_client.post(
"/api/predict/", json={"data": ["test"], "fn_index": 0}
Expand Down Expand Up @@ -192,28 +212,95 @@ def predict(input, history):
output = dict(response.json())
assert output["data"] == ["testtest", None]

def test_get_file_allowed_by_file_directories(self):
def test_get_allowed_paths(self):
allowed_file = tempfile.NamedTemporaryFile(mode="w", delete=False)
allowed_file.write(media_data.BASE64_IMAGE)
allowed_file.flush()

app, _, _ = gr.Interface(lambda s: s.name, gr.File(), gr.File()).launch(
prevent_thread_lock=True,
)
io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(prevent_thread_lock=True)
client = TestClient(app)

file_response = client.get(f"/file={allowed_file.name}")
assert file_response.status_code == 403
io.close()

app, _, _ = gr.Interface(lambda s: s.name, gr.File(), gr.File()).launch(
io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(
prevent_thread_lock=True,
file_directories=[os.path.dirname(allowed_file.name)],
allowed_paths=[os.path.dirname(allowed_file.name)],
)
client = TestClient(app)
file_response = client.get(f"/file={allowed_file.name}")
assert file_response.status_code == 200
assert len(file_response.text) == len(media_data.BASE64_IMAGE)
io.close()

io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(
prevent_thread_lock=True,
allowed_paths=[os.path.abspath(allowed_file.name)],
)
client = TestClient(app)
file_response = client.get(f"/file={allowed_file.name}")
assert file_response.status_code == 200
assert len(file_response.text) == len(media_data.BASE64_IMAGE)
io.close()

def test_get_blocked_paths(self):
# Test that blocking a default Gradio file path works
with tempfile.NamedTemporaryFile(
dir=".", suffix=".jpg", delete=False
) as tmp_file:
io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(
prevent_thread_lock=True,
)
client = TestClient(app)
file_response = client.get(f"/file={tmp_file.name}")
assert file_response.status_code == 200
io.close()
os.remove(tmp_file.name)

with tempfile.NamedTemporaryFile(
dir=".", suffix=".jpg", delete=False
) as tmp_file:
io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(
prevent_thread_lock=True, blocked_paths=[os.path.abspath(tmp_file.name)]
)
client = TestClient(app)
file_response = client.get(f"/file={tmp_file.name}")
assert file_response.status_code == 403
io.close()
os.remove(tmp_file.name)

# Test that blocking a default Gradio directory works
with tempfile.NamedTemporaryFile(
dir=".", suffix=".jpg", delete=False
) as tmp_file:
io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(
prevent_thread_lock=True, blocked_paths=[os.path.abspath(tmp_file.name)]
)
client = TestClient(app)
file_response = client.get(f"/file={tmp_file.name}")
assert file_response.status_code == 403
io.close()
os.remove(tmp_file.name)

# Test that blocking a directory works even if it's also allowed
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
io = gr.Interface(lambda s: s.name, gr.File(), gr.File())
app, _, _ = io.launch(
prevent_thread_lock=True,
allowed_paths=[os.path.dirname(tmp_file.name)],
blocked_paths=[os.path.dirname(tmp_file.name)],
)
client = TestClient(app)
file_response = client.get(f"/file={tmp_file.name}")
assert file_response.status_code == 403
io.close()
os.remove(tmp_file.name)

def test_get_file_created_by_app(self):
app, _, _ = gr.Interface(lambda s: s.name, gr.File(), gr.File()).launch(
Expand Down

0 comments on commit ff21ecb

Please sign in to comment.