Skip to content

Commit

Permalink
Merge pull request #5606 from cjerdonek/vcs-parse-url-once
Browse files Browse the repository at this point in the history
Change VersionControl to parse the URL only once inside get_url_rev().
  • Loading branch information
pradyunsg authored Jul 22, 2018
2 parents c81597b + c6e65a0 commit a296897
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 101 deletions.
Empty file.
37 changes: 27 additions & 10 deletions src/pip/_internal/vcs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,26 @@ def export(self, location):
"""
raise NotImplementedError

def get_url_rev(self, url):
def get_netloc_and_auth(self, netloc):
"""
Returns the correct repository URL and revision by parsing the given
repository URL
Parse the repository URL's netloc, and return the new netloc to use
along with auth information.
This is mainly for the Subversion class to override, so that auth
information can be provided via the --username and --password options
instead of through the URL. For other subclasses like Git without
such an option, auth information must stay in the URL.
Returns: (netloc, (username, password)).
"""
return netloc, (None, None)

def get_url_rev_and_auth(self, url):
"""
Parse the repository URL to use, and return the URL, revision,
and auth info to use.
Returns: (url, rev, (username, password)).
"""
error_message = (
"Sorry, '%s' is a malformed VCS url. "
Expand All @@ -226,26 +242,27 @@ def get_url_rev(self, url):
assert '+' in url, error_message % url
url = url.split('+', 1)[1]
scheme, netloc, path, query, frag = urllib_parse.urlsplit(url)
netloc, user_pass = self.get_netloc_and_auth(netloc)
rev = None
if '@' in path:
path, rev = path.rsplit('@', 1)
url = urllib_parse.urlunsplit((scheme, netloc, path, query, ''))
return url, rev
return url, rev, user_pass

def get_url_rev_args(self, url):
def make_rev_args(self, username, password):
"""
Return the URL and RevOptions "extra arguments" to use in obtain(),
as a tuple (url, extra_args).
Return the RevOptions "extra arguments" to use in obtain().
"""
return url, []
return []

def get_url_rev_options(self, url):
"""
Return the URL and RevOptions object to use in obtain() and in
some cases export(), as a tuple (url, rev_options).
"""
url, rev = self.get_url_rev(url)
url, extra_args = self.get_url_rev_args(url)
url, rev, user_pass = self.get_url_rev_and_auth(url)
username, password = user_pass
extra_args = self.make_rev_args(username, password)
rev_options = self.make_rev_options(rev, extra_args=extra_args)

return url, rev_options
Expand Down
6 changes: 3 additions & 3 deletions src/pip/_internal/vcs/bazaar.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ def update(self, dest, rev_options):
cmd_args = ['pull', '-q'] + rev_options.to_args()
self.run_command(cmd_args, cwd=dest)

def get_url_rev(self, url):
def get_url_rev_and_auth(self, url):
# hotfix the URL scheme after removing bzr+ from bzr+ssh:// readd it
url, rev = super(Bazaar, self).get_url_rev(url)
url, rev, user_pass = super(Bazaar, self).get_url_rev_and_auth(url)
if url.startswith('ssh://'):
url = 'bzr+' + url
return url, rev
return url, rev, user_pass

def get_url(self, location):
urls = self.run_command(['info'], show_stdout=False, cwd=location)
Expand Down
8 changes: 4 additions & 4 deletions src/pip/_internal/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def get_src_requirement(self, dist, location):
req += '&subdirectory=' + subdirectory
return req

def get_url_rev(self, url):
def get_url_rev_and_auth(self, url):
"""
Prefixes stub URLs like 'user@hostname:user/repo.git' with 'ssh://'.
That's required because although they use SSH they sometimes don't
Expand All @@ -275,12 +275,12 @@ def get_url_rev(self, url):
if '://' not in url:
assert 'file:' not in url
url = url.replace('git+', 'git+ssh://')
url, rev = super(Git, self).get_url_rev(url)
url, rev, user_pass = super(Git, self).get_url_rev_and_auth(url)
url = url.replace('ssh://', '')
else:
url, rev = super(Git, self).get_url_rev(url)
url, rev, user_pass = super(Git, self).get_url_rev_and_auth(url)

return url, rev
return url, rev, user_pass

def update_submodules(self, location):
if not os.path.exists(os.path.join(location, '.gitmodules')):
Expand Down
73 changes: 35 additions & 38 deletions src/pip/_internal/vcs/subversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
import os
import re

from pip._vendor.six.moves.urllib import parse as urllib_parse

from pip._internal.index import Link
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import display_path, remove_auth_from_url, rmtree
from pip._internal.utils.misc import display_path, rmtree
from pip._internal.vcs import VersionControl, vcs

_svn_xml_url_re = re.compile('url="([^"]+)"')
Expand Down Expand Up @@ -102,18 +100,45 @@ def get_revision(self, location):
revision = max(revision, localrev)
return revision

def get_url_rev(self, url):
def get_netloc_and_auth(self, netloc):
"""
Parse out and remove from the netloc the auth information.
This allows the auth information to be provided via the --username
and --password options instead of via the URL.
"""
if '@' not in netloc:
return netloc, (None, None)

# Split from the right because that's how urllib.parse.urlsplit()
# behaves if more than one @ is present (by checking the password
# attribute of urlsplit()'s return value).
auth, netloc = netloc.rsplit('@', 1)
if ':' in auth:
# Split from the left because that's how urllib.parse.urlsplit()
# behaves if more than one : is present (again by checking the
# password attribute of the return value)
user_pass = tuple(auth.split(':', 1))
else:
user_pass = auth, None

return netloc, user_pass

def get_url_rev_and_auth(self, url):
# hotfix the URL scheme after removing svn+ from svn+ssh:// readd it
url, rev = super(Subversion, self).get_url_rev(url)
url, rev, user_pass = super(Subversion, self).get_url_rev_and_auth(url)
if url.startswith('ssh://'):
url = 'svn+' + url
return url, rev
return url, rev, user_pass

def get_url_rev_args(self, url):
extra_args = get_rev_options_args(url)
url = remove_auth_from_url(url)
def make_rev_args(self, username, password):
extra_args = []
if username:
extra_args += ['--username', username]
if password:
extra_args += ['--password', password]

return url, extra_args
return extra_args

def get_url(self, location):
# In cases where the source is in a subdirectory, not alongside
Expand Down Expand Up @@ -193,32 +218,4 @@ def is_commit_id_equal(self, dest, name):
return False


def get_rev_options_args(url):
"""
Return the extra arguments to pass to RevOptions.
"""
r = urllib_parse.urlsplit(url)
if hasattr(r, 'username'):
# >= Python-2.5
username, password = r.username, r.password
else:
netloc = r[1]
if '@' in netloc:
auth = netloc.split('@')[0]
if ':' in auth:
username, password = auth.split(':', 1)
else:
username, password = auth, None
else:
username, password = None, None

extra_args = []
if username:
extra_args += ['--username', username]
if password:
extra_args += ['--password', password]

return extra_args


vcs.register(Subversion)
135 changes: 89 additions & 46 deletions tests/unit/test_vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,23 +129,60 @@ def test_translate_egg_surname():
assert vc.translate_egg_surname("foo/1.2.3") == "foo_1.2.3"


# The non-SVN backends all use the same get_netloc_and_auth(), so only test
# Git as a representative.
@pytest.mark.parametrize('netloc, expected', [
# Test a basic case.
('example.com', ('example.com', (None, None))),
# Test with username and password.
('user:[email protected]', ('user:[email protected]', (None, None))),
])
def test_git__get_netloc_and_auth(netloc, expected):
"""
Test VersionControl.get_netloc_and_auth().
"""
actual = Git().get_netloc_and_auth(netloc)
assert actual == expected


@pytest.mark.parametrize('netloc, expected', [
# Test a basic case.
('example.com', ('example.com', (None, None))),
# Test with username and no password.
('[email protected]', ('example.com', ('user', None))),
# Test with username and password.
('user:[email protected]', ('example.com', ('user', 'pass'))),
# Test the password containing an @ symbol.
('user:pass@[email protected]', ('example.com', ('user', 'pass@word'))),
# Test the password containing a : symbol.
('user:pass:[email protected]', ('example.com', ('user', 'pass:word'))),
])
def test_subversion__get_netloc_and_auth(netloc, expected):
"""
Test Subversion.get_netloc_and_auth().
"""
actual = Subversion().get_netloc_and_auth(netloc)
assert actual == expected


def test_git__get_url_rev__idempotent():
"""
Check that Git.get_url_rev() is idempotent for what the code calls
Check that Git.get_url_rev_and_auth() is idempotent for what the code calls
"stub URLs" (i.e. URLs that don't contain "://").
Also check that it doesn't change self.url.
"""
url = '[email protected]:MyProject#egg=MyProject'
vcs = Git(url)
result1 = vcs.get_url_rev(url)
result1 = vcs.get_url_rev_and_auth(url)
assert vcs.url == url
result2 = vcs.get_url_rev(url)
assert result1 == ('[email protected]:MyProject', None)
assert result2 == ('[email protected]:MyProject', None)
result2 = vcs.get_url_rev_and_auth(url)
expected = ('[email protected]:MyProject', None, (None, None))
assert result1 == expected
assert result2 == expected


def test_bazaar__get_url_rev():
def test_bazaar__get_url_rev_and_auth():
"""
Test bzr url support.
Expand All @@ -170,61 +207,67 @@ def test_bazaar__get_url_rev():
url='bzr+lp:MyLaunchpadProject#egg=MyLaunchpadProject'
)

assert http_bzr_repo.get_url_rev(http_bzr_repo.url) == (
'http://bzr.myproject.org/MyProject/trunk/', None,
assert http_bzr_repo.get_url_rev_and_auth(http_bzr_repo.url) == (
'http://bzr.myproject.org/MyProject/trunk/', None, (None, None),
)
assert https_bzr_repo.get_url_rev(https_bzr_repo.url) == (
'https://bzr.myproject.org/MyProject/trunk/', None,
assert https_bzr_repo.get_url_rev_and_auth(https_bzr_repo.url) == (
'https://bzr.myproject.org/MyProject/trunk/', None, (None, None),
)
assert ssh_bzr_repo.get_url_rev(ssh_bzr_repo.url) == (
'bzr+ssh://bzr.myproject.org/MyProject/trunk/', None,
assert ssh_bzr_repo.get_url_rev_and_auth(ssh_bzr_repo.url) == (
'bzr+ssh://bzr.myproject.org/MyProject/trunk/', None, (None, None),
)
assert ftp_bzr_repo.get_url_rev(ftp_bzr_repo.url) == (
'ftp://bzr.myproject.org/MyProject/trunk/', None,
assert ftp_bzr_repo.get_url_rev_and_auth(ftp_bzr_repo.url) == (
'ftp://bzr.myproject.org/MyProject/trunk/', None, (None, None),
)
assert sftp_bzr_repo.get_url_rev(sftp_bzr_repo.url) == (
'sftp://bzr.myproject.org/MyProject/trunk/', None,
assert sftp_bzr_repo.get_url_rev_and_auth(sftp_bzr_repo.url) == (
'sftp://bzr.myproject.org/MyProject/trunk/', None, (None, None),
)
assert launchpad_bzr_repo.get_url_rev(launchpad_bzr_repo.url) == (
'lp:MyLaunchpadProject', None,
assert launchpad_bzr_repo.get_url_rev_and_auth(launchpad_bzr_repo.url) == (
'lp:MyLaunchpadProject', None, (None, None),
)


def test_get_git_version():
git_version = Git().get_git_version()
assert git_version >= parse_version('1.0.0')


# The non-SVN backends all have the same get_url_rev_args() implementation,
# so test with Git as a representative.
@pytest.mark.parametrize('url, expected', [
# Test a basic case.
('git+https://git.example.com/MyProject#egg=MyProject',
('git+https://git.example.com/MyProject#egg=MyProject', [])),
# Test with username and password.
('git+https://user:[email protected]/MyProject#egg=MyProject',
('git+https://user:[email protected]/MyProject#egg=MyProject', [])),
# The non-SVN backends all use the same make_rev_args(), so only test
# Git as a representative.
@pytest.mark.parametrize('username, password, expected', [
(None, None, []),
('user', None, []),
('user', 'pass', []),
])
def test_git__get_url_rev_args(url, expected):
def test_git__make_rev_args(username, password, expected):
"""
Test Git.get_url_rev_args().
Test VersionControl.make_rev_args().
"""
actual = Git().get_url_rev_args(url)
actual = Git().make_rev_args(username, password)
assert actual == expected


@pytest.mark.parametrize('url, expected', [
# Test a basic case.
('svn+https://svn.example.com/MyProject#egg=MyProject',
('svn+https://svn.example.com/MyProject#egg=MyProject', [])),
# Test with username and password.
('svn+https://user:[email protected]/MyProject#egg=MyProject',
('svn+https://svn.example.com/MyProject#egg=MyProject',
['--username', 'user', '--password', 'pass'])),
@pytest.mark.parametrize('username, password, expected', [
(None, None, []),
('user', None, ['--username', 'user']),
('user', 'pass', ['--username', 'user', '--password', 'pass']),
])
def test_subversion__get_url_rev_args(url, expected):
def test_subversion__make_rev_args(username, password, expected):
"""
Test Subversion.get_url_rev_args().
Test Subversion.make_rev_args().
"""
actual = Subversion().get_url_rev_args(url)
actual = Subversion().make_rev_args(username, password)
assert actual == expected


def test_subversion__get_url_rev_options():
"""
Test Subversion.get_url_rev_options().
"""
url = 'svn+https://user:[email protected]/[email protected]#egg=MyProject'
url, rev_options = Subversion().get_url_rev_options(url)
assert url == 'https://svn.example.com/MyProject'
assert rev_options.rev == 'v1.0'
assert rev_options.extra_args == (
['--username', 'user', '--password', 'pass']
)


def test_get_git_version():
git_version = Git().get_git_version()
assert git_version >= parse_version('1.0.0')

0 comments on commit a296897

Please sign in to comment.