Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

application: extract path prefix logic to middleware #2733

Merged
merged 5 commits into from
Oct 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions tensorboard/backend/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ py_library(
visibility = ["//visibility:public"],
deps = [
":http_util",
":path_prefix",
"//tensorboard:errors",
"//tensorboard:expect_sqlite3_installed",
"//tensorboard/backend/event_processing:data_provider",
Expand Down Expand Up @@ -97,6 +98,29 @@ py_test(
],
)

py_library(
name = "path_prefix",
srcs = ["path_prefix.py"],
srcs_version = "PY2AND3",
deps = [
"//tensorboard:errors",
],
)

py_test(
name = "path_prefix_test",
size = "small",
srcs = ["path_prefix_test.py"],
srcs_version = "PY2AND3",
tags = ["support_notf"],
deps = [
":path_prefix",
"//tensorboard:errors",
"//tensorboard:test",
"@org_pocoo_werkzeug",
],
)

py_library(
name = "process_graph",
srcs = ["process_graph.py"],
Expand Down
30 changes: 13 additions & 17 deletions tensorboard/backend/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

from tensorboard import errors
from tensorboard.backend import http_util
from tensorboard.backend import path_prefix
from tensorboard.backend.event_processing import db_import_multiplexer
from tensorboard.backend.event_processing import data_provider as event_data_provider # pylint: disable=line-too-long
from tensorboard.backend.event_processing import plugin_event_accumulator as event_accumulator # pylint: disable=line-too-long
Expand Down Expand Up @@ -258,8 +259,7 @@ def __init__(self, plugins, path_prefix=''):
# TODO(@chihuahua): Delete this RPC once we have skylark rules that
# obviate the need for the frontend to determine which plugins are
# active.
self._path_prefix + DATA_PREFIX + PLUGINS_LISTING_ROUTE:
self._serve_plugins_listing,
DATA_PREFIX + PLUGINS_LISTING_ROUTE: self._serve_plugins_listing,
}
unordered_prefix_routes = {}

Expand Down Expand Up @@ -291,10 +291,11 @@ def __init__(self, plugins, path_prefix=''):
'route does not start with a slash' %
(plugin.plugin_name, route))
if type(plugin) is core_plugin.CorePlugin: # pylint: disable=unidiomatic-typecheck
path = self._path_prefix + route
path = route
else:
path = (self._path_prefix + DATA_PREFIX + PLUGIN_PREFIX + '/' +
plugin.plugin_name + route)
path = (
DATA_PREFIX + PLUGIN_PREFIX + '/' + plugin.plugin_name + route
)

if path.endswith('/*'):
# Note we remove the '*' but leave the slash in place.
Expand Down Expand Up @@ -328,6 +329,7 @@ def __init__(self, plugins, path_prefix=''):
def _create_wsgi_app(self):
"""Apply middleware to create the final WSGI app."""
app = self._route_request
app = path_prefix.PathPrefixMiddleware(app, self._path_prefix)
app = _handling_errors(app)
return app

Expand Down Expand Up @@ -384,7 +386,7 @@ def _serve_plugins_listing(self, request):
loading_mechanism = {
'type': 'IFRAME',
'module_path': ''.join([
self._path_prefix, DATA_PREFIX, PLUGIN_PREFIX, '/',
request.script_root, DATA_PREFIX, PLUGIN_PREFIX, '/',
plugin.plugin_name, es_module_handler,
]),
}
Expand Down Expand Up @@ -427,7 +429,7 @@ def _route_request(self, environ, start_response):
"""
request = wrappers.Request(environ)
parsed_url = urlparse.urlparse(request.path)
clean_path = _clean_path(parsed_url.path, self._path_prefix)
clean_path = _clean_path(parsed_url.path)

# pylint: disable=too-many-function-args
if clean_path in self.exact_routes:
Expand Down Expand Up @@ -579,22 +581,16 @@ def _get_connect_params(query):
return {k: json.loads(v[0]) for k, v in params.items()}


def _clean_path(path, path_prefix=""):
"""Cleans the path of the request.

Removes the ending '/' if the request begins with the path prefix and pings a
non-empty route.
def _clean_path(path):
"""Removes a trailing slash from a non-root path.

Arguments:
path: The path of a request.
path_prefix: The prefix string that every route of this TensorBoard instance
starts with.

Returns:
The route to use to serve the request (with the path prefix stripped if
applicable).
The route to use to serve the request.
"""
if path != path_prefix + '/' and path.endswith('/'):
if path != '/' and path.endswith('/'):
return path[:-1]
return path

Expand Down
65 changes: 65 additions & 0 deletions tensorboard/backend/path_prefix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Internal path prefix support for TensorBoard.

Using a path prefix of `/foo/bar` enables TensorBoard to serve from
`http://localhost:6006/foo/bar/` rather than `http://localhost:6006/`.
See the `--path_prefix` flag docs for more details.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tensorboard import errors


class PathPrefixMiddleware(object):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I had been thinking we could generalize this as taking a dict mapping prefixes to WSGI apps. This unblocks using this for the plugin routing as well as described in #2573.

Feel free to leave this PR as-is, but just FYI, I think it will make sense to fold this into a more generic middleware sooner or later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to me.

"""WSGI middleware for path prefixes.

All requests to this middleware must begin with the specified path
prefix (otherwise, a 404 will be returned immediately). Requests will
be forwarded to the underlying application with the path prefix
stripped and appended to `SCRIPT_NAME` (see the WSGI spec, PEP 3333,
for details).
"""

def __init__(self, application, path_prefix):
"""Initializes this middleware.

Args:
application: The WSGI application to wrap (see PEP 3333).
path_prefix: A string path prefix to be stripped from incoming
requests. If empty, this middleware is a no-op. If non-empty,
the path prefix must start with a slash and not end with one
(e.g., "/tensorboard").
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO the semantics are more obvious if we require that the path prefix end in a slash, since then we don't necessarily even need to support the redirect, and there's no confusion about what the canonical URL will be.

WDYT? I think we could fairly consider that not to be a breaking change given that using the prefix without the trailing slash is broken today due to the lack of redirect.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m fine with making that change at the user level (i.e., a requirement
on the flag value). As an implementation detail, I find it useful to
represent path components like this with a leading slash and no trailing
slash, because then they concatenate properly. This is also what
Werkzeug does for the .script_root property (they don’t expose
SCRIPT_NAME correctly), and is the convention that application.py
uses for constants like DATA_PREFIX = "/data".

Given that this PR doesn’t actually make any user-facing changes modulo
existing undefined behavior, I’m comfortable deferring the tightening of
the flag domain. Let me know if you object.

"""
if path_prefix.endswith("/"):
raise ValueError("Path prefix must not end with slash: %r" % path_prefix)
if path_prefix and not path_prefix.startswith("/"):
raise ValueError(
"Non-empty path prefix must start with slash: %r" % path_prefix
)
self._application = application
self._path_prefix = path_prefix
self._strict_prefix = self._path_prefix + "/"

def __call__(self, environ, start_response):
path = environ.get("PATH_INFO", "")
if path != self._path_prefix and not path.startswith(self._strict_prefix):
raise errors.NotFoundError()
environ["PATH_INFO"] = path[len(self._path_prefix):]
environ["SCRIPT_NAME"] = environ.get("SCRIPT_NAME", "") + self._path_prefix
return self._application(environ, start_response)
112 changes: 112 additions & 0 deletions tensorboard/backend/path_prefix_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for `tensorboard.backend.path_prefix`."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json

import werkzeug

from tensorboard import errors
from tensorboard import test as tb_test
from tensorboard.backend import path_prefix


class PathPrefixMiddlewareTest(tb_test.TestCase):
"""Tests for `PathPrefixMiddleware`."""

def _echo_app(self, environ, start_response):
# https://www.python.org/dev/peps/pep-0333/#environ-variables
data = {
"path": environ.get("PATH_INFO", ""),
"script": environ.get("SCRIPT_NAME", ""),
}
body = json.dumps(data, sort_keys=True)
start_response("200 OK", [("Content-Type", "application/json")])
return [body]

def _assert_ok(self, response, path, script):
self.assertEqual(response.status_code, 200)
actual = json.loads(response.get_data())
expected = dict(path=path, script=script)
self.assertEqual(actual, expected)

def test_bad_path_prefix_without_leading_slash(self):
with self.assertRaises(ValueError) as cm:
path_prefix.PathPrefixMiddleware(self._echo_app, "hmm")
msg = str(cm.exception)
self.assertIn("must start with slash", msg)
self.assertIn(repr("hmm"), msg)

def test_bad_path_prefix_with_trailing_slash(self):
with self.assertRaises(ValueError) as cm:
path_prefix.PathPrefixMiddleware(self._echo_app, "/hmm/")
msg = str(cm.exception)
self.assertIn("must not end with slash", msg)
self.assertIn(repr("/hmm/"), msg)

def test_empty_path_prefix(self):
app = path_prefix.PathPrefixMiddleware(self._echo_app, "")
server = werkzeug.test.Client(app, werkzeug.BaseResponse)

with self.subTest("at empty"):
self._assert_ok(server.get(""), path="", script="")

with self.subTest("at root"):
self._assert_ok(server.get("/"), path="/", script="")

with self.subTest("at subpath"):
response = server.get("/foo/bar")
self._assert_ok(server.get("/foo/bar"), path="/foo/bar", script="")

def test_nonempty_path_prefix(self):
app = path_prefix.PathPrefixMiddleware(self._echo_app, "/pfx")
server = werkzeug.test.Client(app, werkzeug.BaseResponse)

with self.subTest("at root"):
response = server.get("/pfx")
self._assert_ok(response, path="", script="/pfx")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no action required; I simply dunno: how does our server respond when the path is "" vs. "/"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. The answer is that Werkzeug normalizes that out, so our
app (which uses Werkzeug for routing) only sees request.path == "/":

# main.py
import werkzeug

def bare_app(environ, start_response):
  start_response("200 OK", [("Content-Type", "text/plain")])
  return [repr(environ.get("PATH_INFO")).encode("utf-8")]

@werkzeug.Request.application
def werkzeug_app(request):
  return werkzeug.Response(repr(request.path), content_type="text/plain")

start_response = lambda status, headers: None
request_empty = {"REQUEST_METHOD": "GET", "PATH_INFO": ""}
request_root = {"REQUEST_METHOD": "GET", "PATH_INFO": "/"}

print("With base WSGI:")
print(list(bare_app(request_empty, start_response)))
print(list(bare_app(request_root, start_response)))

print("With Werkzeug:")
print(list(werkzeug_app(request_empty, start_response)))
print(list(werkzeug_app(request_root, start_response)))
$ python main.py
With base WSGI:
[b"''"]
[b"'/'"]
With Werkzeug:
[b"'/'"]
[b"'/'"]


with self.subTest("at root with slash"):
response = server.get("/pfx/")
self._assert_ok(response, path="/", script="/pfx")

with self.subTest("at subpath"):
response = server.get("/pfx/foo/bar")
self._assert_ok(response, path="/foo/bar", script="/pfx")

with self.subTest("at non-path-component extension"):
with self.assertRaises(errors.NotFoundError):
server.get("/pfxz")

with self.subTest("above path prefix"):
with self.assertRaises(errors.NotFoundError):
server.get("/hmm")

def test_composition(self):
app = self._echo_app
app = path_prefix.PathPrefixMiddleware(app, "/bar")
app = path_prefix.PathPrefixMiddleware(app, "/foo")
server = werkzeug.test.Client(app, werkzeug.BaseResponse)

response = server.get("/foo/bar/baz/quux")
self._assert_ok(response, path="/baz/quux", script="/foo/bar")


if __name__ == "__main__":
tb_test.main()