Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for git credential helpers #976

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pythontest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ name: Python tests
on:
push:
pull_request:
schedule:
- cron: "0 6 * * *" # Daily 6AM UTC build
# schedule:
# - cron: "0 6 * * *" # Daily 6AM UTC build

jobs:
test:
Expand Down
55 changes: 23 additions & 32 deletions dulwich/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@
import urllib3

import dulwich
from dulwich.config import get_xdg_config_home_path, Config, apply_instead_of
from dulwich.config import Config, StackedConfig, apply_instead_of
from dulwich.credentials import CredentialNotFoundError, get_credentials_from_helper
from dulwich.errors import (
GitProtocolError,
NotGitRepository,
Expand Down Expand Up @@ -2235,17 +2236,32 @@ def __init__(
else:
self.pool_manager = pool_manager

if username is not None:
self.config = config
if not self._username:
try:
self._username, self._password = get_credentials_from_helper(
base_url, config or StackedConfig.default()
)
except CredentialNotFoundError:
pass

if self._username:
# No escaping needed: ":" is not allowed in username:
# https://tools.ietf.org/html/rfc2617#section-2
credentials = f"{username}:{password or ''}"
import urllib3.util
if isinstance(self._username, str):
credentials = f"{self._username}:{self._password or ''}".encode("ascii")
elif isinstance(self._username, bytes):
credentials = self._username + b":" + self._password or b""
else:
raise TypeError
import base64

basic_auth = urllib3.util.make_headers(basic_auth=credentials)
encoded = base64.b64encode(credentials).decode('ascii')
basic_auth = {
"authorization": f"Basic {encoded}"
}
self.pool_manager.headers.update(basic_auth)

self.config = config

super().__init__(
base_url=base_url, dumb=dumb, **kwargs)

Expand Down Expand Up @@ -2431,28 +2447,3 @@ def get_transport_and_path(
return default_local_git_client_cls(**kwargs), location
else:
return SSHGitClient(hostname, username=username, **kwargs), path


DEFAULT_GIT_CREDENTIALS_PATHS = [
os.path.expanduser("~/.git-credentials"),
get_xdg_config_home_path("git", "credentials"),
]


def get_credentials_from_store(
scheme, hostname, username=None, fnames=DEFAULT_GIT_CREDENTIALS_PATHS
):
for fname in fnames:
try:
with open(fname, "rb") as f:
for line in f:
parsed_line = urlparse(line.strip())
if (
parsed_line.scheme == scheme
and parsed_line.hostname == hostname
and (username is None or parsed_line.username == username)
):
return parsed_line.username, parsed_line.password
except FileNotFoundError:
# If the file doesn't exist, try the next one.
continue
188 changes: 186 additions & 2 deletions dulwich/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,24 @@

https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage

Currently Dulwich supports only the `get` operation

"""
import os
import shlex
import shutil
import subprocess
import sys
from typing import Iterator, Optional
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
from urllib.parse import ParseResult, urlparse

from dulwich.config import ConfigDict, SectionLike
from dulwich.config import (ConfigDict, SectionLike, StackedConfig,
get_xdg_config_home_path)

DEFAULT_GIT_CREDENTIALS_PATHS = [
os.path.expanduser("~/.git-credentials"),
get_xdg_config_home_path("git", "credentials"),
]


def match_urls(url: ParseResult, url_prefix: ParseResult) -> bool:
Expand Down Expand Up @@ -87,3 +99,175 @@ def urlmatch_credential_sections(

if is_match:
yield config_section


class CredentialNotFoundError(Exception):
jelmer marked this conversation as resolved.
Show resolved Hide resolved
"""An error occurred while retrieving credentials or no credentials available."""


class CredentialHelper:
"""Helper for retrieving credentials for http/https git remotes

Usage:
>>> helper = CredentialHelper("store") # Use `git credential-store`
>>> credentials = helper.get("https://github.com/dtrifiro/aprivaterepo")
>>> username = credentials["username"]
>>> password = credentials["password"]
"""

def __init__(self, command: str):
self._command = command
self._run_kwargs: Dict[str, Any] = {}
if self._command[0] == "!":
# On Windows this will only work in git-bash and/or WSL2
self._run_kwargs["shell"] = True

def _prepare_command(self) -> Union[str, List[str]]:
if self._command[0] == "!":
return self._command[1:]

if sys.platform != "win32":
argv = shlex.split(self._command)
else:
# On windows, subprocess.run uses subprocess.list2cmdline() to
# join arguments when providing a list, so we can just split
# using whitespace.
argv = self._command.split()

if os.path.isabs(argv[0]):
return argv

executable = f"git-credential-{argv[0]}"
if not shutil.which(executable) and shutil.which("git"):
# If the helper cannot be found in PATH, it might be
# a C git helper in GIT_EXEC_PATH
git_exec_path = subprocess.check_output(
("git", "--exec-path"),
universal_newlines=True, # TODO: replace universal_newlines with `text` when dropping 3.6
).strip()
if shutil.which(executable, path=git_exec_path):
executable = os.path.join(git_exec_path, executable)

return [executable, *argv[1:]]

def get(
self,
*,
protocol: Optional[str] = None,
hostname: Optional[str] = None,
port: Optional[int] = None,
username: Optional[str] = None,
) -> Tuple[bytes, bytes]:
cmd = self._prepare_command()
if isinstance(cmd, str):
cmd += " get"
else:
cmd.append("get")

helper_input = []
if protocol:
helper_input.append(f"protocol={protocol}")
if hostname:
helper_input.append(
f"host={hostname}{':' + str(port) if port is not None else ''}"
)
if username:
helper_input.append(f"username={username}")

if not helper_input:
raise ValueError("One of protocol, hostname must be provided")

helper_input.append("")

try:
res = subprocess.run( # type: ignore # breaks on 3.6
cmd,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
input=os.linesep.join(helper_input).encode("ascii"),
**self._run_kwargs,
)
except subprocess.CalledProcessError as exc:
raise CredentialNotFoundError(exc.stderr) from exc
except FileNotFoundError as exc:
raise CredentialNotFoundError("Helper not found") from exc

credentials = {}
for line in res.stdout.strip().splitlines():
try:
key, value = line.split(b"=")
credentials[key] = value
except ValueError:
continue

if not all(
(credentials, b"username" in credentials, b"password" in credentials)
):
raise CredentialNotFoundError("Could not get credentials from helper")

return credentials[b"username"], credentials[b"password"]

def store(self, *args, **kwargs):
"""Store the credential, if applicable to the helper"""
raise NotImplementedError

def erase(self, *args, **kwargs):
"""Remove a matching credential, if any, from the helper’s storage"""
raise NotImplementedError


def get_credentials_from_store(
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally we'd just have one pythonic interface that's implemented by the static credentials files, the helper class and any other python code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved get_credentials_from_store from client to here since it seemed the right place for it, but it is dead code. I would remove this if that's ok with you.

scheme: bytes,
hostname: bytes,
username: Optional[bytes] = None,
fnames: List[str] = DEFAULT_GIT_CREDENTIALS_PATHS,
):

encoding = sys.getdefaultencoding()
for fname in fnames:
command = f"store --file {fname}"
helper = CredentialHelper(command)
try:
username, password = helper.get(
protocol=scheme.decode(encoding),
hostname=hostname.decode(encoding),
username=username.decode(encoding) if username is not None else None,
)
return username, password
except CredentialNotFoundError:
continue


def get_credentials_from_helper(base_url: str, config) -> Tuple[bytes, bytes]:
"""Retrieves credentials for the given url from git credential helpers"""
if isinstance(config, StackedConfig):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this need to peek under the covers, can't it just use the config API?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given an URL, git loops though the available configs and matches urls in credentials sections with the provided URL, which is something we cannot do with the config API.

For example with a config like this:

[credential "https://github.com/jelmer/dulwich"]
    helper = githubhelper

[credential]
    helper = generichelper

Running

config.get((b"credential", b"https://github.com))

would return generichelper instead of githubhelper, which is not what we want.

I refactored this a bit so that we use urlmatch_credential_sections to cycle though config sections that match the URL.
This could be extracted to become a config API method, although I'm not sure if there's any other use cases where one needs to match URLs in sections, apart from the credential helpers scenario.

backends = config.backends
else:
backends = [config]

for conf in backends:
# We will try to match credential sections' url with the given url,
# falling back to the generic section if there's no match
for section in urlmatch_credential_sections(conf, base_url):
try:
command = conf.get(section, "helper")
except KeyError:
# no helper configured
continue

helper = CredentialHelper(
command.decode(conf.encoding or sys.getdefaultencoding())
)
parsed = urlparse(base_url)
try:
return helper.get(
protocol=parsed.scheme,
hostname=parsed.hostname,
port=parsed.port,
username=parsed.username,
)
except CredentialNotFoundError:
continue

raise CredentialNotFoundError
45 changes: 24 additions & 21 deletions dulwich/porcelain.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
from io import BytesIO, RawIOBase
import datetime
import os
from pathlib import Path
import posixpath
import stat
import sys
Expand Down Expand Up @@ -306,31 +305,33 @@ def path_to_tree_path(repopath, path, tree_encoding=DEFAULT_ENCODING):
if sys.platform == "win32":
path = os.path.abspath(path)

path = Path(path)
resolved_path = path.resolve()
resolved_path = os.path.realpath(path)

# Resolve and abspath seems to behave differently regarding symlinks,
# as we are doing abspath on the file path, we need to do the same on
# the repo path or they might not match
if sys.platform == "win32":
repopath = os.path.abspath(repopath)

repopath = Path(repopath).resolve()
repopath = os.path.realpath(repopath)

try:
relpath = resolved_path.relative_to(repopath)
except ValueError:
if not resolved_path.startswith(repopath):
# If path is a symlink that points to a file outside the repo, we
# want the relpath for the link itself, not the resolved target
if path.is_symlink():
parent = path.parent.resolve()
relpath = (parent / path.name).relative_to(repopath)
else:
raise
if sys.platform == "win32":
return str(relpath).replace(os.path.sep, "/").encode(tree_encoding)
if not os.path.islink(path):
raise ValueError(resolved_path)

parent = os.path.realpath(os.path.basename(os.path.normpath(path)))
relpath = os.path.relpath(
os.path.join(parent, os.path.basename(path)),
repopath
)
else:
return bytes(relpath)
relpath = os.path.relpath(resolved_path, repopath)

if sys.platform == "win32":
return relpath.replace(os.path.sep, "/").encode(tree_encoding)
return os.fsencode(relpath)


class DivergedBranches(Error):
Expand Down Expand Up @@ -587,12 +588,12 @@ def add(repo=".", paths=None):
"""
ignored = set()
with open_repo_closing(repo) as r:
repo_path = Path(r.path).resolve()
repo_path = os.path.realpath(r.path)
ignore_manager = IgnoreFilterManager.from_repo(r)
if not paths:
paths = list(
get_untracked_paths(
str(Path(os.getcwd()).resolve()),
os.getcwd(),
str(repo_path),
r.open_index(),
)
Expand All @@ -601,10 +602,12 @@ def add(repo=".", paths=None):
if not isinstance(paths, list):
paths = [paths]
for p in paths:
path = Path(p)
relpath = str(path.resolve().relative_to(repo_path))
resolved_path = os.path.realpath(p)
if not resolved_path.startswith(repo_path):
raise ValueError("Not in repository: " + str(p))
relpath = os.path.relpath(os.path.realpath(p), repo_path)
# FIXME: Support patterns
if path.is_dir():
if os.path.isdir(p):
relpath = os.path.join(relpath, "")
if ignore_manager.is_ignored(relpath):
ignored.add(relpath)
Expand Down Expand Up @@ -1327,7 +1330,7 @@ def status(repo=".", ignored=False, untracked_files="all"):
untracked_files: How to handle untracked files, defaults to "all":
"no": do not return untracked files
"all": include all files in untracked directories
Using untracked_files="no" can be faster than "all" when the worktreee
Using `untracked_files="no"` can be faster than "all" when the worktree
contains many untracked files/directories.

Note: untracked_files="normal" (git's default) is not implemented.
Expand Down
Loading