Skip to content

Commit

Permalink
enable partial check out of only some files from a repo (#455)
Browse files Browse the repository at this point in the history
* enable checking out only some files from a repo

* Optional[list[str]] valid only for python3.9+, hence the tests fail

* use HEAD if commit is none

---------

Co-authored-by: Uma Moothiringote <[email protected]>
  • Loading branch information
moothiringote and umoothiringote authored Oct 19, 2023
1 parent 4bc8074 commit dbb2e35
Showing 1 changed file with 23 additions and 4 deletions.
27 changes: 23 additions & 4 deletions tools/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import logging
import subprocess as sp
from typing import Optional
from typing import Optional, List
import os

from sisyphus import *

Expand All @@ -14,7 +15,7 @@ class CloneGitRepositoryJob(Job):
Clone a git repository given optional branch name and commit hash
"""

__sis_hash_exclude__ = {"clone_submodules": False}
__sis_hash_exclude__ = {"clone_submodules": False, "files_to_checkout": None}

def __init__(
self,
Expand All @@ -23,6 +24,7 @@ def __init__(
commit: Optional[str] = None,
checkout_folder_name: str = "repository",
clone_submodules: bool = False,
files_to_checkout: Optional[List[str]] = None,
):
"""
Expand All @@ -31,27 +33,44 @@ def __init__(
:param commit: Git commit hash
:param checkout_folder_name: Name of the output path repository folder
:param clone_submodules: Flag to clone submodules if set to True
:param files_to_checkout: List of files to be checked out sparsely. If not set, the entire repo is checked out (default behaviour).
"""
self.url = url
self.branch = branch
self.commit = commit
self.clone_submodules = clone_submodules
self.files_to_checkout = files_to_checkout

self.out_repository = self.output_path(checkout_folder_name, True)

def tasks(self):
yield Task("run", mini_task=True)

def run(self):
args = ["git", "clone", self.url]
if self.files_to_checkout is not None:
args = ["git", "clone", "--no-checkout", self.url]
else:
args = ["git", "clone", self.url]
if self.branch is not None:
args.extend(["-b", self.branch])
repository_dir = self.out_repository.get_path()
args += [repository_dir]
logging.info("running command: %s" % " ".join(args))
sp.run(args, check=True)

if self.commit is not None:
if self.files_to_checkout is not None:
args = ["git", "checkout"]
commit = self.commit if self.commit is not None else "HEAD"
args.extend([commit, "--"] + self.files_to_checkout)
logging.info("running command: %s" % " ".join(args))
sp.run(args, cwd=repository_dir, check=True)
for file in self.files_to_checkout:
# some files may be links, so download the original file to avoid a missing symlink target
if os.path.islink(f"{repository_dir}/{file}"):
args = ["git", "checkout", commit, "--", os.path.realpath(f"{repository_dir}/{file}")]
logging.info("running command: %s" % " ".join(args))
sp.run(args, cwd=repository_dir, check=True)
elif self.commit is not None:
args = ["git", "checkout", self.commit]
logging.info("running command: %s" % " ".join(args))
sp.run(args, cwd=repository_dir, check=True)
Expand Down

0 comments on commit dbb2e35

Please sign in to comment.