Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix urljoin for arbitrary URL protocols #8020

Merged
merged 9 commits into from
Sep 17, 2023
135 changes: 89 additions & 46 deletions src/poetry/vcs/git/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from subprocess import CalledProcessError
from typing import TYPE_CHECKING
from urllib.parse import urljoin
from urllib.parse import urlparse
from urllib.parse import urlunparse

from dulwich import porcelain
from dulwich.client import HTTPUnauthorized
Expand All @@ -31,6 +33,9 @@

logger = logging.getLogger(__name__)

# A relative URL by definition starts with ../ or ./
RELATIVE_SUBMODULE_REGEX = re.compile(r"^\.{1,2}/")


def is_revision_sha(revision: str | None) -> bool:
return re.match(r"^\b[0-9a-f]{5,40}\b$", revision or "") is not None
Expand Down Expand Up @@ -327,54 +332,64 @@ def _clone_submodules(cls, repo: Repo) -> None:
Helper method to identify configured submodules and clone them recursively.
"""
repo_root = Path(repo.path)
modules_config = repo_root.joinpath(".gitmodules")

# A relative URL by definition starts with ../ or ./
relative_submodule_regex = re.compile(r"^\.{1,2}/")

if modules_config.exists():
config = ConfigFile.from_path(str(modules_config))

url: bytes
path: bytes
submodules = parse_submodules(config)

for path, url, name in submodules:
path_relative = Path(path.decode("utf-8"))
path_absolute = repo_root.joinpath(path_relative)

url_string = url.decode("utf-8")
if relative_submodule_regex.search(url_string):
url_string = urljoin(f"{Git.get_remote_url(repo)}/", url_string)

source_root = path_absolute.parent
source_root.mkdir(parents=True, exist_ok=True)

with repo:
index = repo.open_index()

try:
entry = index[path]
except KeyError:
logger.debug(
"Skip submodule %s in %s, path %s not found",
name,
repo.path,
path,
)
continue

assert isinstance(entry, IndexEntry)
revision = entry.sha.decode("utf-8")

cls.clone(
url=url_string,
source_root=source_root,
name=path_relative.name,
for submodule in cls._get_submodules(repo):
path_absolute = repo_root / submodule.path
source_root = path_absolute.parent
source_root.mkdir(parents=True, exist_ok=True)
cls.clone(
url=submodule.url,
source_root=source_root,
name=path_absolute.name,
revision=submodule.revision,
clean=path_absolute.exists()
and not path_absolute.joinpath(".git").is_dir(),
)

@classmethod
def _get_submodules(cls, repo: Repo) -> list[SubmoduleInfo]:
modules_config = Path(repo.path, ".gitmodules")

if not modules_config.exists():
return []

config = ConfigFile.from_path(str(modules_config))

submodules: list[SubmoduleInfo] = []
for path, url, name in parse_submodules(config):
url_str = url.decode("utf-8")
path_str = path.decode("utf-8")
name_str = name.decode("utf-8")

if RELATIVE_SUBMODULE_REGEX.search(url_str):
url_str = urlpathjoin(f"{cls.get_remote_url(repo)}/", url_str)

with repo:
index = repo.open_index()

try:
entry = index[path]
except KeyError:
logger.debug(
"Skip submodule %s in %s, path %s not found",
name,
repo.path,
path,
)
continue

assert isinstance(entry, IndexEntry)
revision = entry.sha.decode("utf-8")

submodules.append(
SubmoduleInfo(
path=path_str,
url=url_str,
name=name_str,
revision=revision,
clean=path_absolute.exists()
and not path_absolute.joinpath(".git").is_dir(),
)
)

return submodules

@staticmethod
def is_using_legacy_client() -> bool:
Expand Down Expand Up @@ -456,3 +471,31 @@ def clone(

# fallback to legacy git client
return cls._clone_legacy(url=url, refspec=refspec, target=target)


def urlpathjoin(base: str, path: str) -> str:
"""
Allow any URL to be joined with a path

This works around an issue with urllib.parse.urljoin where it only handles
relative URLs for protocols contained in urllib.parse.uses_relative. As it
happens common protocols used with git, like ssh or git+ssh are not in that
list.

Thus we need to implement our own version of urljoin that handles all URLs
protocols. This is accomplished by using urlparse and urlunparse to split
the URL into its components, join the path, and then reassemble the URL.

See: https://github.com/python-poetry/poetry/issues/6499#issuecomment-1564712609
"""
parsed_base = urlparse(base)
new = parsed_base._replace(path=urljoin(parsed_base.path, path))
return urlunparse(new)


@dataclasses.dataclass
class SubmoduleInfo:
path: str
url: str
name: str
revision: str
31 changes: 31 additions & 0 deletions tests/integration/test_utils_vcs_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Iterator
from urllib.parse import urlparse
from urllib.parse import urlunparse

import pytest

Expand Down Expand Up @@ -389,3 +391,32 @@ def test_system_git_called_when_configured(
target=path,
refspec=GitRefSpec(branch="0.1", revision=None, tag=None, ref=b"HEAD"),
)


def test_relative_submodules_with_ssh(
source_url: str, tmpdir: Path, mocker: MockerFixture
) -> None:
target = tmpdir / "temp"
ssh_source_url = urlunparse(urlparse(source_url)._replace(scheme="ssh"))

repo_with_unresolved_submodules = Git._clone(
url=source_url,
refspec=GitRefSpec(branch="relative_submodule"),
target=target,
)

# construct fake git config
fake_config = ConfigFile(
{(b"remote", b"origin"): {b"url": ssh_source_url.encode("utf-8")}}
)
# trick Git into thinking remote.origin is an ssh url
mock_get_config = mocker.patch.object(repo_with_unresolved_submodules, "get_config")
mock_get_config.return_value = fake_config

submodules = Git._get_submodules(repo_with_unresolved_submodules)

assert [s.url for s in submodules] == [
"https://github.com/pypa/sample-namespace-packages.git",
"ssh://github.com/python-poetry/test-fixture-vcs-repository.git",
"ssh://github.com/python-poetry/test-fixture-vcs-repository.git",
]