From 4c34d5c3f2a4ed7194276a026e0ec6437d339c67 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 18 Nov 2010 22:44:53 +0100 Subject: [PATCH] Split big submodule file into smaller files. Tried to manually get imports right, but its not yet tested --- lib/git/objects/submodule/__init__.py | 3 + .../{submodule.py => submodule/base.py} | 351 +----------------- lib/git/objects/submodule/root.py | 259 +++++++++++++ lib/git/objects/submodule/util.py | 101 +++++ 4 files changed, 369 insertions(+), 345 deletions(-) create mode 100644 lib/git/objects/submodule/__init__.py rename lib/git/objects/{submodule.py => submodule/base.py} (70%) create mode 100644 lib/git/objects/submodule/root.py create mode 100644 lib/git/objects/submodule/util.py diff --git a/lib/git/objects/submodule/__init__.py b/lib/git/objects/submodule/__init__.py new file mode 100644 index 000000000..24663658a --- /dev/null +++ b/lib/git/objects/submodule/__init__.py @@ -0,0 +1,3 @@ + +from base import * +from root import * diff --git a/lib/git/objects/submodule.py b/lib/git/objects/submodule/base.py similarity index 70% rename from lib/git/objects/submodule.py rename to lib/git/objects/submodule/base.py index c769b1600..6cdc57a08 100644 --- a/lib/git/objects/submodule.py +++ b/lib/git/objects/submodule/base.py @@ -1,111 +1,23 @@ -import base -from util import Traversable +import git.objects.base +from util import * +from git.objects.util import Traversable from StringIO import StringIO # need a dict to set bloody .name field from git.util import Iterable, join_path_native, to_native_path_linux -from git.config import GitConfigParser, SectionConstraint +from git.config import SectionConstraint from git.exc import InvalidGitRepositoryError, NoSuchPathError import stat import git import os import sys -import weakref + import shutil __all__ = ("Submodule", "RootModule") -#{ Utilities - -def sm_section(name): - """:return: section title used in .gitmodules configuration file""" - return 'submodule "%s"' % name - -def sm_name(section): - """:return: name of the submodule as parsed from the section name""" - section = section.strip() - return section[11:-1] - -def mkhead(repo, path): - """:return: New branch/head instance""" - return git.Head(repo, git.Head.to_full_path(path)) - -def unbare_repo(func): - """Methods with this decorator raise InvalidGitRepositoryError if they - encounter a bare repository""" - def wrapper(self, *args, **kwargs): - if self.repo.bare: - raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) - #END bare method - return func(self, *args, **kwargs) - # END wrapper - wrapper.__name__ = func.__name__ - return wrapper - -def find_first_remote_branch(remotes, branch): - """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" - for remote in remotes: - try: - return remote.refs[branch.name] - except IndexError: - continue - # END exception handling - #END for remote - raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch) - -#} END utilities - - -#{ Classes - -class SubmoduleConfigParser(GitConfigParser): - """ - Catches calls to _write, and updates the .gitmodules blob in the index - with the new data, if we have written into a stream. Otherwise it will - add the local file to the index to make it correspond with the working tree. - Additionally, the cache must be cleared - - Please note that no mutating method will work in bare mode - """ - - def __init__(self, *args, **kwargs): - self._smref = None - self._index = None - self._auto_write = True - super(SubmoduleConfigParser, self).__init__(*args, **kwargs) - - #{ Interface - def set_submodule(self, submodule): - """Set this instance's submodule. It must be called before - the first write operation begins""" - self._smref = weakref.ref(submodule) - - def flush_to_index(self): - """Flush changes in our configuration file to the index""" - assert self._smref is not None - # should always have a file here - assert not isinstance(self._file_or_files, StringIO) - - sm = self._smref() - if sm is not None: - index = self._index - if index is None: - index = sm.repo.index - # END handle index - index.add([sm.k_modules_file], write=self._auto_write) - sm._clear_cache() - # END handle weakref - - #} END interface - - #{ Overridden Methods - def write(self): - rval = super(SubmoduleConfigParser, self).write() - self.flush_to_index() - return rval - # END overridden methods -class Submodule(base.IndexObject, Iterable, Traversable): +class Submodule(git.objects.base.IndexObject, Iterable, Traversable): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. @@ -879,255 +791,4 @@ def iter_items(cls, repo, parent_commit='HEAD'): # END for each section #} END iterable interface - - -class RootModule(Submodule): - """A (virtual) Root of all submodules in the given repository. It can be used - to more easily traverse all submodules of the master repository""" - - __slots__ = tuple() - - k_root_name = '__ROOT__' - - def __init__(self, repo): - # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) - super(RootModule, self).__init__( - repo, - binsha = self.NULL_BIN_SHA, - mode = self.k_default_mode, - path = '', - name = self.k_root_name, - parent_commit = repo.head.commit, - url = '', - branch = mkhead(repo, self.k_head_default) - ) - - - def _clear_cache(self): - """May not do anything""" - pass - - #{ Interface - - def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, to_latest_revision=False): - """Update the submodules of this repository to the current HEAD commit. - This method behaves smartly by determining changes of the path of a submodules - repository, next to changes to the to-be-checked-out commit or the branch to be - checked out. This works if the submodules ID does not change. - Additionally it will detect addition and removal of submodules, which will be handled - gracefully. - - :param previous_commit: If set to a commit'ish, the commit we should use - as the previous commit the HEAD pointed to before it was set to the commit it points to now. - If None, it defaults to ORIG_HEAD otherwise, or the parent of the current - commit if it is not given - :param recursive: if True, the children of submodules will be updated as well - using the same technique - :param force_remove: If submodules have been deleted, they will be forcibly removed. - Otherwise the update may fail if a submodule's repository cannot be deleted as - changes have been made to it (see Submodule.update() for more information) - :param init: If we encounter a new module which would need to be initialized, then do it. - :param to_latest_revision: If True, instead of checking out the revision pointed to - by this submodule's sha, the checked out tracking branch will be merged with the - newest remote branch fetched from the repository's origin""" - if self.repo.bare: - raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") - # END handle bare - - repo = self.repo - - # HANDLE COMMITS - ################## - cur_commit = repo.head.commit - if previous_commit is None: - symref = repo.head.orig_head() - try: - previous_commit = symref.commit - except Exception: - pcommits = cur_commit.parents - if pcommits: - previous_commit = pcommits[0] - else: - # in this special case, we just diff against ourselve, which - # means exactly no change - previous_commit = cur_commit - # END handle initial commit - # END no ORIG_HEAD - else: - previous_commit = repo.commit(previous_commit) # obtain commit object - # END handle previous commit - - - psms = self.list_items(repo, parent_commit=previous_commit) - sms = self.list_items(self.module()) - spsms = set(psms) - ssms = set(sms) - - # HANDLE REMOVALS - ################### - for rsm in (spsms - ssms): - # fake it into thinking its at the current commit to allow deletion - # of previous module. Trigger the cache to be updated before that - #rsm.url - rsm._parent_commit = repo.head.commit - rsm.remove(configuration=False, module=True, force=force_remove) - # END for each removed submodule - - # HANDLE PATH RENAMES - ##################### - # url changes + branch changes - for csm in (spsms & ssms): - psm = psms[csm.name] - sm = sms[csm.name] - - if sm.path != psm.path and psm.module_exists(): - # move the module to the new path - psm.move(sm.path, module=True, configuration=False) - # END handle path changes - - if sm.module_exists(): - # handle url change - if sm.url != psm.url: - # Add the new remote, remove the old one - # This way, if the url just changes, the commits will not - # have to be re-retrieved - nn = '__new_origin__' - smm = sm.module() - rmts = smm.remotes - - # don't do anything if we already have the url we search in place - if len([r for r in rmts if r.url == sm.url]) == 0: - - - assert nn not in [r.name for r in rmts] - smr = smm.create_remote(nn, sm.url) - smr.fetch() - - # If we have a tracking branch, it should be available - # in the new remote as well. - if len([r for r in smr.refs if r.remote_head == sm.branch.name]) == 0: - raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch.name, sm.url)) - # END head is not detached - - # now delete the changed one - rmt_for_deletion = None - for remote in rmts: - if remote.url == psm.url: - rmt_for_deletion = remote - break - # END if urls match - # END for each remote - - # if we didn't find a matching remote, but have exactly one, - # we can safely use this one - if rmt_for_deletion is None: - if len(rmts) == 1: - rmt_for_deletion = rmts[0] - else: - # if we have not found any remote with the original url - # we may not have a name. This is a special case, - # and its okay to fail here - # Alternatively we could just generate a unique name and leave all - # existing ones in place - raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) - #END handle one single remote - # END handle check we found a remote - - orig_name = rmt_for_deletion.name - smm.delete_remote(rmt_for_deletion) - # NOTE: Currently we leave tags from the deleted remotes - # as well as separate tracking branches in the possibly totally - # changed repository ( someone could have changed the url to - # another project ). At some point, one might want to clean - # it up, but the danger is high to remove stuff the user - # has added explicitly - - # rename the new remote back to what it was - smr.rename(orig_name) - - # early on, we verified that the our current tracking branch - # exists in the remote. Now we have to assure that the - # sha we point to is still contained in the new remote - # tracking branch. - smsha = sm.binsha - found = False - rref = smr.refs[self.branch.name] - for c in rref.commit.traverse(): - if c.binsha == smsha: - found = True - break - # END traverse all commits in search for sha - # END for each commit - - if not found: - # adjust our internal binsha to use the one of the remote - # this way, it will be checked out in the next step - # This will change the submodule relative to us, so - # the user will be able to commit the change easily - print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha - sm.binsha = rref.commit.binsha - #END reset binsha - - #NOTE: All checkout is performed by the base implementation of update - - # END skip remote handling if new url already exists in module - # END handle url - - if sm.branch != psm.branch: - # finally, create a new tracking branch which tracks the - # new remote branch - smm = sm.module() - smmr = smm.remotes - try: - tbr = git.Head.create(smm, sm.branch.name) - except git.GitCommandError, e: - if e.status != 128: - raise - #END handle something unexpected - - # ... or reuse the existing one - tbr = git.Head(smm, git.Head.to_full_path(sm.branch.name)) - #END assure tracking branch exists - - tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch)) - # figure out whether the previous tracking branch contains - # new commits compared to the other one, if not we can - # delete it. - try: - tbr = find_first_remote_branch(smmr, psm.branch) - if len(smm.git.cherry(tbr, psm.branch)) == 0: - psm.branch.delete(smm, psm.branch) - #END delete original tracking branch if there are no changes - except InvalidGitRepositoryError: - # ignore it if the previous branch couldn't be found in the - # current remotes, this just means we can't handle it - pass - # END exception handling - - #NOTE: All checkout is done in the base implementation of update - - #END handle branch - #END handle - # END for each common submodule - - # FINALLY UPDATE ALL ACTUAL SUBMODULES - ###################################### - for sm in sms: - # update the submodule using the default method - sm.update(recursive=True, init=init, to_latest_revision=to_latest_revision) - - # update recursively depth first - question is which inconsitent - # state will be better in case it fails somewhere. Defective branch - # or defective depth. The RootSubmodule type will never process itself, - # which was done in the previous expression - if recursive: - type(self)(sm.module()).update(recursive=True, force_remove=force_remove, - init=init, to_latest_revision=to_latest_revision) - #END handle recursive - # END for each submodule to update - def module(self): - """:return: the actual repository containing the submodules""" - return self.repo - #} END interface -#} END classes diff --git a/lib/git/objects/submodule/root.py b/lib/git/objects/submodule/root.py new file mode 100644 index 000000000..2e02e7de3 --- /dev/null +++ b/lib/git/objects/submodule/root.py @@ -0,0 +1,259 @@ +from base import Submodule +from git.exc import InvalidGitRepositoryError +import git + +import sys + +__all__ = ["RootModule"] + + +class RootModule(Submodule): + """A (virtual) Root of all submodules in the given repository. It can be used + to more easily traverse all submodules of the master repository""" + + __slots__ = tuple() + + k_root_name = '__ROOT__' + + def __init__(self, repo): + # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) + super(RootModule, self).__init__( + repo, + binsha = self.NULL_BIN_SHA, + mode = self.k_default_mode, + path = '', + name = self.k_root_name, + parent_commit = repo.head.commit, + url = '', + branch = mkhead(repo, self.k_head_default) + ) + + + def _clear_cache(self): + """May not do anything""" + pass + + #{ Interface + + def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, to_latest_revision=False): + """Update the submodules of this repository to the current HEAD commit. + This method behaves smartly by determining changes of the path of a submodules + repository, next to changes to the to-be-checked-out commit or the branch to be + checked out. This works if the submodules ID does not change. + Additionally it will detect addition and removal of submodules, which will be handled + gracefully. + + :param previous_commit: If set to a commit'ish, the commit we should use + as the previous commit the HEAD pointed to before it was set to the commit it points to now. + If None, it defaults to ORIG_HEAD otherwise, or the parent of the current + commit if it is not given + :param recursive: if True, the children of submodules will be updated as well + using the same technique + :param force_remove: If submodules have been deleted, they will be forcibly removed. + Otherwise the update may fail if a submodule's repository cannot be deleted as + changes have been made to it (see Submodule.update() for more information) + :param init: If we encounter a new module which would need to be initialized, then do it. + :param to_latest_revision: If True, instead of checking out the revision pointed to + by this submodule's sha, the checked out tracking branch will be merged with the + newest remote branch fetched from the repository's origin""" + if self.repo.bare: + raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") + # END handle bare + + repo = self.repo + + # HANDLE COMMITS + ################## + cur_commit = repo.head.commit + if previous_commit is None: + symref = repo.head.orig_head() + try: + previous_commit = symref.commit + except Exception: + pcommits = cur_commit.parents + if pcommits: + previous_commit = pcommits[0] + else: + # in this special case, we just diff against ourselve, which + # means exactly no change + previous_commit = cur_commit + # END handle initial commit + # END no ORIG_HEAD + else: + previous_commit = repo.commit(previous_commit) # obtain commit object + # END handle previous commit + + + psms = self.list_items(repo, parent_commit=previous_commit) + sms = self.list_items(self.module()) + spsms = set(psms) + ssms = set(sms) + + # HANDLE REMOVALS + ################### + for rsm in (spsms - ssms): + # fake it into thinking its at the current commit to allow deletion + # of previous module. Trigger the cache to be updated before that + #rsm.url + rsm._parent_commit = repo.head.commit + rsm.remove(configuration=False, module=True, force=force_remove) + # END for each removed submodule + + # HANDLE PATH RENAMES + ##################### + # url changes + branch changes + for csm in (spsms & ssms): + psm = psms[csm.name] + sm = sms[csm.name] + + if sm.path != psm.path and psm.module_exists(): + # move the module to the new path + psm.move(sm.path, module=True, configuration=False) + # END handle path changes + + if sm.module_exists(): + # handle url change + if sm.url != psm.url: + # Add the new remote, remove the old one + # This way, if the url just changes, the commits will not + # have to be re-retrieved + nn = '__new_origin__' + smm = sm.module() + rmts = smm.remotes + + # don't do anything if we already have the url we search in place + if len([r for r in rmts if r.url == sm.url]) == 0: + + + assert nn not in [r.name for r in rmts] + smr = smm.create_remote(nn, sm.url) + smr.fetch() + + # If we have a tracking branch, it should be available + # in the new remote as well. + if len([r for r in smr.refs if r.remote_head == sm.branch.name]) == 0: + raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch.name, sm.url)) + # END head is not detached + + # now delete the changed one + rmt_for_deletion = None + for remote in rmts: + if remote.url == psm.url: + rmt_for_deletion = remote + break + # END if urls match + # END for each remote + + # if we didn't find a matching remote, but have exactly one, + # we can safely use this one + if rmt_for_deletion is None: + if len(rmts) == 1: + rmt_for_deletion = rmts[0] + else: + # if we have not found any remote with the original url + # we may not have a name. This is a special case, + # and its okay to fail here + # Alternatively we could just generate a unique name and leave all + # existing ones in place + raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) + #END handle one single remote + # END handle check we found a remote + + orig_name = rmt_for_deletion.name + smm.delete_remote(rmt_for_deletion) + # NOTE: Currently we leave tags from the deleted remotes + # as well as separate tracking branches in the possibly totally + # changed repository ( someone could have changed the url to + # another project ). At some point, one might want to clean + # it up, but the danger is high to remove stuff the user + # has added explicitly + + # rename the new remote back to what it was + smr.rename(orig_name) + + # early on, we verified that the our current tracking branch + # exists in the remote. Now we have to assure that the + # sha we point to is still contained in the new remote + # tracking branch. + smsha = sm.binsha + found = False + rref = smr.refs[self.branch.name] + for c in rref.commit.traverse(): + if c.binsha == smsha: + found = True + break + # END traverse all commits in search for sha + # END for each commit + + if not found: + # adjust our internal binsha to use the one of the remote + # this way, it will be checked out in the next step + # This will change the submodule relative to us, so + # the user will be able to commit the change easily + print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha + sm.binsha = rref.commit.binsha + #END reset binsha + + #NOTE: All checkout is performed by the base implementation of update + + # END skip remote handling if new url already exists in module + # END handle url + + if sm.branch != psm.branch: + # finally, create a new tracking branch which tracks the + # new remote branch + smm = sm.module() + smmr = smm.remotes + try: + tbr = git.Head.create(smm, sm.branch.name) + except git.GitCommandError, e: + if e.status != 128: + raise + #END handle something unexpected + + # ... or reuse the existing one + tbr = git.Head(smm, git.Head.to_full_path(sm.branch.name)) + #END assure tracking branch exists + + tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch)) + # figure out whether the previous tracking branch contains + # new commits compared to the other one, if not we can + # delete it. + try: + tbr = find_first_remote_branch(smmr, psm.branch) + if len(smm.git.cherry(tbr, psm.branch)) == 0: + psm.branch.delete(smm, psm.branch) + #END delete original tracking branch if there are no changes + except InvalidGitRepositoryError: + # ignore it if the previous branch couldn't be found in the + # current remotes, this just means we can't handle it + pass + # END exception handling + + #NOTE: All checkout is done in the base implementation of update + + #END handle branch + #END handle + # END for each common submodule + + # FINALLY UPDATE ALL ACTUAL SUBMODULES + ###################################### + for sm in sms: + # update the submodule using the default method + sm.update(recursive=True, init=init, to_latest_revision=to_latest_revision) + + # update recursively depth first - question is which inconsitent + # state will be better in case it fails somewhere. Defective branch + # or defective depth. The RootSubmodule type will never process itself, + # which was done in the previous expression + if recursive: + type(self)(sm.module()).update(recursive=True, force_remove=force_remove, + init=init, to_latest_revision=to_latest_revision) + #END handle recursive + # END for each submodule to update + + def module(self): + """:return: the actual repository containing the submodules""" + return self.repo + #} END interface +#} END classes diff --git a/lib/git/objects/submodule/util.py b/lib/git/objects/submodule/util.py new file mode 100644 index 000000000..ab5e345a6 --- /dev/null +++ b/lib/git/objects/submodule/util.py @@ -0,0 +1,101 @@ +import git +from git.exc import InvalidGitRepositoryError +from git.config import GitConfigParser +from StringIO import StringIO +import weakref + +__all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', + 'SubmoduleConfigParser') + +#{ Utilities + +def sm_section(name): + """:return: section title used in .gitmodules configuration file""" + return 'submodule "%s"' % name + +def sm_name(section): + """:return: name of the submodule as parsed from the section name""" + section = section.strip() + return section[11:-1] + +def mkhead(repo, path): + """:return: New branch/head instance""" + return git.Head(repo, git.Head.to_full_path(path)) + +def unbare_repo(func): + """Methods with this decorator raise InvalidGitRepositoryError if they + encounter a bare repository""" + def wrapper(self, *args, **kwargs): + if self.repo.bare: + raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) + #END bare method + return func(self, *args, **kwargs) + # END wrapper + wrapper.__name__ = func.__name__ + return wrapper + +def find_first_remote_branch(remotes, branch): + """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" + for remote in remotes: + try: + return remote.refs[branch.name] + except IndexError: + continue + # END exception handling + #END for remote + raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch) + +#} END utilities + + +#{ Classes + +class SubmoduleConfigParser(GitConfigParser): + """ + Catches calls to _write, and updates the .gitmodules blob in the index + with the new data, if we have written into a stream. Otherwise it will + add the local file to the index to make it correspond with the working tree. + Additionally, the cache must be cleared + + Please note that no mutating method will work in bare mode + """ + + def __init__(self, *args, **kwargs): + self._smref = None + self._index = None + self._auto_write = True + super(SubmoduleConfigParser, self).__init__(*args, **kwargs) + + #{ Interface + def set_submodule(self, submodule): + """Set this instance's submodule. It must be called before + the first write operation begins""" + self._smref = weakref.ref(submodule) + + def flush_to_index(self): + """Flush changes in our configuration file to the index""" + assert self._smref is not None + # should always have a file here + assert not isinstance(self._file_or_files, StringIO) + + sm = self._smref() + if sm is not None: + index = self._index + if index is None: + index = sm.repo.index + # END handle index + index.add([sm.k_modules_file], write=self._auto_write) + sm._clear_cache() + # END handle weakref + + #} END interface + + #{ Overridden Methods + def write(self): + rval = super(SubmoduleConfigParser, self).write() + self.flush_to_index() + return rval + # END overridden methods + + +#} END classes