diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index a3aa0509280..e84588c0995 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -32,6 +32,9 @@ logger = logging.getLogger(__name__) +# A relative URL by definition starts with ../ or ./ +RELATIVE_SUBMODULE_REGEX = re.compile(r"^\.{1,2}/") + def is_revision_sha(revision: str | None) -> bool: return re.match(r"^\b[0-9a-f]{5,40}\b$", revision or "") is not None @@ -332,49 +335,59 @@ def _clone_submodules(cls, repo: Repo) -> None: Helper method to identify configured submodules and clone them recursively. """ repo_root = Path(repo.path) - modules_config = repo_root.joinpath(".gitmodules") - - # A relative URL by definition starts with ../ or ./ - relative_submodule_regex = re.compile(r"^\.{1,2}/") - - if modules_config.exists(): - config = ConfigFile.from_path(str(modules_config)) - - url: bytes - path: bytes - submodules = parse_submodules(config) - - for path, url, name in submodules: - path_relative = Path(path.decode("utf-8")) - path_absolute = repo_root.joinpath(path_relative) - - url_string = url.decode("utf-8") - if relative_submodule_regex.search(url_string): - url_string = _urljoin(f"{Git.get_remote_url(repo)}/", url_string) - - source_root = path_absolute.parent - source_root.mkdir(parents=True, exist_ok=True) - - with repo: - try: - revision = repo.open_index()[path].sha.decode("utf-8") - except KeyError: - logger.debug( - "Skip submodule %s in %s, path %s not found", - name, - repo.path, - path, - ) - continue - - cls.clone( - url=url_string, - source_root=source_root, - name=path_relative.name, + for submodule in cls._get_submodules(repo): + path_absolute = repo_root / submodule.path + source_root = path_absolute.parent + source_root.mkdir(parents=True, exist_ok=True) + cls.clone( + url=submodule.url, + source_root=source_root, + name=path_absolute.name, + revision=submodule.revision, + clean=path_absolute.exists() + and not path_absolute.joinpath(".git").is_dir(), + ) + + @classmethod + def _get_submodules(cls, repo: Repo) -> list[SubmoduleInfo]: + modules_config = Path(repo.path, ".gitmodules") + + if not modules_config.exists(): + return [] + + config = ConfigFile.from_path(str(modules_config)) + + submodules: list[SubmoduleInfo] = [] + for path, url, name in parse_submodules(config): + url_str = url.decode("utf-8") + path_str = path.decode("utf-8") + name_str = name.decode("utf-8") + + if RELATIVE_SUBMODULE_REGEX.search(url_str): + url_str = urlpathjoin(f"{cls.get_remote_url(repo)}/", url_str) + + with repo: + try: + revision = repo.open_index()[path].sha.decode("utf-8") + except KeyError: + logger.debug( + "Skip submodule %s in %s, path %s not found", + name_str, + repo.path, + path_str, + ) + continue + + submodules.append( + SubmoduleInfo( + path=path_str, + url=url_str, + name=name_str, revision=revision, - clean=path_absolute.exists() - and not path_absolute.joinpath(".git").is_dir(), ) + ) + + return submodules @staticmethod def is_using_legacy_client() -> bool: @@ -460,7 +473,7 @@ def clone( return cls._clone_legacy(url=url, refspec=refspec, target=target) -def _urljoin(base: str, path: str) -> str: +def urlpathjoin(base: str, path: str) -> str: """ Allow any URL to be joined with a path @@ -478,3 +491,11 @@ def _urljoin(base: str, path: str) -> str: parsed_base = urlparse(base) new = parsed_base._replace(path=urljoin(parsed_base.path, path)) return urlunparse(new) + + +@dataclasses.dataclass +class SubmoduleInfo: + path: str + url: str + name: str + revision: str diff --git a/tests/integration/test_utils_vcs_git.py b/tests/integration/test_utils_vcs_git.py index dfc59bffee7..09df801c93f 100644 --- a/tests/integration/test_utils_vcs_git.py +++ b/tests/integration/test_utils_vcs_git.py @@ -8,6 +8,8 @@ from pathlib import Path from typing import TYPE_CHECKING from typing import Iterator +from urllib.parse import urlparse +from urllib.parse import urlunparse import pytest @@ -389,3 +391,32 @@ def test_system_git_called_when_configured( target=path, refspec=GitRefSpec(branch="0.1", revision=None, tag=None, ref=b"HEAD"), ) + + +def test_relative_submodules_with_ssh( + source_url: str, tmpdir: Path, mocker: MockerFixture +) -> None: + target = tmpdir / "temp" + ssh_source_url = urlunparse(urlparse(source_url)._replace(scheme="ssh")) + + repo_with_unresolved_submodules = Git._clone( + url=source_url, + refspec=GitRefSpec(branch="relative_submodule"), + target=target, + ) + + # construct fake git config + fake_config = ConfigFile( + {(b"remote", b"origin"): {b"url": ssh_source_url.encode("utf-8")}} + ) + # trick Git into thinking remote.origin is an ssh url + mock_get_config = mocker.patch.object(repo_with_unresolved_submodules, "get_config") + mock_get_config.return_value = fake_config + + submodules = Git._get_submodules(repo_with_unresolved_submodules) + + assert [s.url for s in submodules] == [ + "https://github.com/pypa/sample-namespace-packages.git", + "ssh://github.com/python-poetry/test-fixture-vcs-repository.git", + "ssh://github.com/python-poetry/test-fixture-vcs-repository.git", + ]