Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Support special characters like # and @ in VCS revision strings #6496

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions news/5742.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Support special characters like ``#`` and ``@`` in VCS revision strings
using ``%xx`` escaping.
70 changes: 51 additions & 19 deletions src/pip/_internal/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,29 +1055,61 @@ def _get_encoding_from_headers(headers):
return None


def _clean_url_path_part(part):
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
return urllib_parse.quote(urllib_parse.unquote(part))


def _clean_file_url_path(part):
"""
Clean the first part of a URL path that corresponds to a local
filesystem path (i.e. the first part after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
# Also, on Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
return urllib_request.pathname2url(urllib_request.url2pathname(part))


def _clean_url_path(path, is_local_path):
"""
Clean the path portion of a URL.
"""
if is_local_path:
clean_start = _clean_file_url_path
else:
clean_start = _clean_url_path_part

# Split on the reserved character "@" prior to cleaning so that
# revision strings in VCS URLs are properly preserved.
parts = path.split('@')
# Treat the first path part differently because it's the part that
# can contain a Windows drive letter.
cleaned_parts = [clean_start(parts[0])]
cleaned_parts.extend(_clean_url_path_part(part) for part in parts[1:])

return '@'.join(cleaned_parts)


def _clean_link(url):
# type: (str) -> str
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
the link, it will be rewritten to %20 (while not over-quoting
% or other characters)."""
"""
Make sure a link is fully quoted.

For example, if ' ' occurs in the URL, it will be replaced with "%20",
and without double-quoting other characters.
"""
# Split the URL into parts according to the general structure
# `scheme://netloc/path;parameters?query#fragment`. Note that the
# `netloc` can be empty and the URI will then refer to a local
# filesystem path.
# `scheme://netloc/path;parameters?query#fragment`.
result = urllib_parse.urlparse(url)
# In both cases below we unquote prior to quoting to make sure
# nothing is double quoted.
if result.netloc == "":
# On Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
path = urllib_request.pathname2url(
urllib_request.url2pathname(result.path))
else:
# In addition to the `/` character we protect `@` so that
# revision strings in VCS URLs are properly parsed.
path = urllib_parse.quote(urllib_parse.unquote(result.path), safe="/@")
# If the netloc is empty, then the URL refers to a local filesystem path.
is_local_path = not result.netloc
path = _clean_url_path(result.path, is_local_path=is_local_path)
return urllib_parse.urlunparse(result._replace(path=path))


Expand Down
16 changes: 11 additions & 5 deletions src/pip/_internal/vcs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,15 @@ def make_vcs_requirement_url(repo_url, rev, project_name, subdir=None):
"""
Return the URL for a VCS requirement.

Args:
repo_url: the remote VCS url, with any needed VCS prefix (e.g. "git+").
project_name: the (unescaped) project name.
:repo_url: the remote VCS url, with any needed VCS prefix (e.g. "git+").
:rev: the (unquoted) revision name.
:project_name: the (unescaped) project name.
"""
egg_project_name = pkg_resources.to_filename(project_name)
req = '{}@{}#egg={}'.format(repo_url, rev, egg_project_name)
# Convert the revision to a string before quoting because this can
# be e.g. an int in the case of Subversion.
quoted_rev = urllib_parse.quote(str(rev))
req = '{}@{}#egg={}'.format(repo_url, quoted_rev, egg_project_name)
if subdir:
req += '&subdirectory={}'.format(subdir)

Expand Down Expand Up @@ -340,9 +343,12 @@ def get_url_rev_and_auth(cls, url):
# Remove the vcs prefix.
scheme = scheme.split('+', 1)[1]
netloc, user_pass = cls.get_netloc_and_auth(netloc, scheme)

rev = None
if '@' in path:
path, rev = path.rsplit('@', 1)
path, quoted_rev = path.rsplit('@', 1)
rev = urllib_parse.unquote(quoted_rev)

url = urllib_parse.urlunsplit((scheme, netloc, path, query, ''))
return url, rev, user_pass

Expand Down
77 changes: 71 additions & 6 deletions tests/unit/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from pip._internal.download import PipSession
from pip._internal.index import (
Link, PackageFinder, _clean_link, _determine_base_url, _egg_info_matches,
_find_name_version_sep, _get_html_page,
Link, PackageFinder, _clean_link, _clean_url_path, _determine_base_url,
_egg_info_matches, _find_name_version_sep, _get_html_page,
)


Expand Down Expand Up @@ -282,6 +282,59 @@ def test_request_retries(caplog):
)


@pytest.mark.parametrize(
('path', 'expected'),
[
# Test a character that needs quoting.
('a b', 'a%20b'),
# Test an unquoted "@".
('a @ b', 'a%20@%20b'),
# Test multiple unquoted "@".
('a @ @ b', 'a%20@%20@%20b'),
# Test a quoted "@".
('a %40 b', 'a%20%40%20b'),
# Test a quoted "@" before an unquoted "@".
('a %40b@ c', 'a%20%40b@%20c'),
# Test a quoted "@" after an unquoted "@".
('a @b%40 c', 'a%20@b%40%20c'),
# Test alternating quoted and unquoted "@".
('a %40@b %40@c %40', 'a%20%40@b%20%40@c%20%40'),
]
)
def test_clean_url_path(path, expected):
for is_local_path in (True, False):
actual = _clean_url_path(path, is_local_path=is_local_path)
assert actual == expected, 'is_local_path: {}'.format(is_local_path)


@pytest.mark.parametrize(
('path', 'expected'),
[
# Test a VCS path with a Windows drive letter and revision.
('/T:/with space/[email protected]',
'///T:/with%20space/[email protected]'),
]
)
@pytest.mark.skipif("sys.platform != 'win32'")
def test_clean_url_path_windows(path, expected):
actual = _clean_url_path(path, is_local_path=True)
assert actual == expected


@pytest.mark.parametrize(
('path', 'expected'),
[
# Test a VCS path with a Windows drive letter and revision.
('/T:/with space/[email protected]',
'/T%3A/with%20space/[email protected]'),
]
)
@pytest.mark.skipif("sys.platform == 'win32'")
def test_clean_url_path_non_windows(path, expected):
actual = _clean_url_path(path, is_local_path=True)
assert actual == expected


@pytest.mark.parametrize(
("url", "clean_url"),
[
Expand Down Expand Up @@ -311,11 +364,17 @@ def test_request_retries(caplog):
"https://localhost.localdomain/T%3A/path/"),
# VCS URL containing revision string.
("git+ssh://example.com/path to/[email protected]#egg=my-package-1.0",
"git+ssh://example.com/path%20to/[email protected]#egg=my-package-1.0")
"git+ssh://example.com/path%20to/[email protected]#egg=my-package-1.0"),
# VCS URL with a quoted "#" in the revision string.
("git+https://example.com/repo.git@hash%23symbol#egg=my-package-1.0",
"git+https://example.com/repo.git@hash%23symbol#egg=my-package-1.0"),
# VCS URL with a quoted "@" in the revision string.
("git+https://example.com/repo.git@at%40 space#egg=my-package-1.0",
"git+https://example.com/repo.git@at%40%20space#egg=my-package-1.0"),
]
)
def test_clean_link(url, clean_url):
assert(_clean_link(url) == clean_url)
assert _clean_link(url) == clean_url


@pytest.mark.parametrize(
Expand All @@ -325,7 +384,10 @@ def test_clean_link(url, clean_url):
# letter should not be quoted. The trailing `/` should be
# removed.
("file:///T:/path/with spaces/",
"file:///T:/path/with%20spaces")
"file:///T:/path/with%20spaces"),
# Test a VCS URL with a Windows drive letter and revision.
("git+file:///T:/with space/[email protected]#egg=my-package-1.0",
"git+file:///T:/with%20space/[email protected]#egg=my-package-1.0"),
]
)
@pytest.mark.skipif("sys.platform != 'win32'")
Expand All @@ -339,7 +401,10 @@ def test_clean_link_windows(url, clean_url):
# URL with Windows drive letter, running on non-windows
# platform. The `:` after the drive should be quoted.
("file:///T:/path/with spaces/",
"file:///T%3A/path/with%20spaces/")
"file:///T%3A/path/with%20spaces/"),
# Test a VCS URL with a Windows drive letter and revision.
("git+file:///T:/with space/[email protected]#egg=my-package-1.0",
"git+file:/T%3A/with%20space/[email protected]#egg=my-package-1.0"),
]
)
@pytest.mark.skipif("sys.platform == 'win32'")
Expand Down
13 changes: 13 additions & 0 deletions tests/unit/test_vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ def test_ensure_svn_available():
# Test an unescaped project name.
(('git+https://example.com/pkg', 'dev', 'zope-interface'),
'git+https://example.com/pkg@dev#egg=zope_interface'),
# Test a "#" symbol in the revision name.
(('git+https://example.com/pkg', 'hash#symbol', 'myproj'),
'git+https://example.com/pkg@hash%23symbol#egg=myproj'),
# Test an "@" symbol in the revision name.
(('git+https://example.com/pkg', 'at@symbol', 'myproj'),
'git+https://example.com/pkg@at%40symbol#egg=myproj'),
])
def test_make_vcs_requirement_url(args, expected):
actual = make_vcs_requirement_url(*args)
Expand Down Expand Up @@ -252,12 +258,19 @@ def test_git__get_url_rev__idempotent():
assert result2 == expected


# Each expected value is `(url, rev, (username, password))`.
@pytest.mark.parametrize('url, expected', [
('svn+https://svn.example.com/MyProject',
('https://svn.example.com/MyProject', None, (None, None))),
# Test a "+" in the path portion.
('svn+https://svn.example.com/My+Project',
('https://svn.example.com/My+Project', None, (None, None))),
# Test a quoted "#" symbol in the revision name.
('git+https://example.com/MyProject@hash%23symbol',
('https://example.com/MyProject', 'hash#symbol', (None, None))),
# Test a quoted "@" symbol in the revision name.
('git+https://example.com/MyProject@at%40symbol',
('https://example.com/MyProject', 'at@symbol', (None, None))),
])
def test_version_control__get_url_rev_and_auth(url, expected):
"""
Expand Down