-
Notifications
You must be signed in to change notification settings - Fork 31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
merge: Parameterize merge functions + git merge-file
on marker_size
(with a stubbed lookup)
#101
base: main
Are you sure you want to change the base?
Changes from all commits
18cb980
657bcb4
2425715
406d82d
4359ed9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,16 @@ | |
T = TypeVar("T") # pylint: disable=invalid-name | ||
|
||
|
||
# See git/xdiff/xdiff.h | ||
DEFAULT_CONFLICT_MARKER_SIZE = 7 | ||
|
||
|
||
def get_conflict_marker_size(__repo: Repository, __file: Path) -> int: | ||
# TODO: Determine on a per-file basis by its `conflict-marker-size` attribute. | ||
# See ll_merge_marker_size in git/ll-merge.c | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding your comment about performance concerns: here's a possible way to avoid them:
In the common case (no conflict) we're as fast as ever. I often use git-revise for rewrites that cause the same conflicts - that's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doing an optimistic merge is a good idea to consider. I'd worry a little about being tempted (or tempting others) to look for or save cached resolutions for both marker sizes, which could lead to inconsistency. Probably avoidable, though! Or not a big deal. I do wonder what the actual performance impact of batch-querying There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah, merge_files() would do two runs of "git merge-file" and if the first failed, callers would only ever see the results of the second. Using a persistent process for |
||
return DEFAULT_CONFLICT_MARKER_SIZE | ||
|
||
|
||
class MergeConflict(Exception): | ||
pass | ||
|
||
|
@@ -197,22 +207,26 @@ def merge_blobs( | |
base: Optional[Blob], | ||
other: Blob, | ||
) -> Blob: | ||
# pylint: disable=too-many-locals | ||
repo = current.repo | ||
|
||
tmpdir = repo.get_tempdir() | ||
|
||
marker_size = get_conflict_marker_size(repo, path) | ||
|
||
annotated_labels = ( | ||
f"{path} (new parent): {labels[0]}", | ||
f"{path} (old parent): {labels[1]}", | ||
f"{path} (current): {labels[2]}", | ||
) | ||
(is_clean_merge, merged) = merge_files( | ||
repo, | ||
tmpdir, | ||
marker_size, | ||
annotated_labels, | ||
current.body, | ||
base.body if base else b"", | ||
other.body, | ||
tmpdir, | ||
) | ||
|
||
if is_clean_merge: | ||
|
@@ -226,7 +240,10 @@ def merge_blobs( | |
|
||
preimage = merged | ||
(normalized_preimage, conflict_id, merged_blob) = replay_recorded_resolution( | ||
repo, tmpdir, preimage | ||
repo=repo, | ||
tmpdir=tmpdir, | ||
marker_size=marker_size, | ||
preimage=preimage, | ||
) | ||
if merged_blob is not None: | ||
return merged_blob | ||
|
@@ -245,7 +262,7 @@ def merge_blobs( | |
if merged == preimage: | ||
print("(note) conflicted file is unchanged") | ||
|
||
if b"<<<<<<<" in merged or b"=======" in merged or b">>>>>>>" in merged: | ||
if any((marker * marker_size) in merged for marker in (b"<", b"=", b">")): | ||
print("(note) conflict markers found in the merged file") | ||
|
||
# Was the merge successful? | ||
|
@@ -259,11 +276,12 @@ def merge_blobs( | |
|
||
def merge_files( | ||
repo: Repository, | ||
tmpdir: Path, | ||
marker_size: int, | ||
labels: Tuple[str, str, str], | ||
current: bytes, | ||
base: bytes, | ||
other: bytes, | ||
tmpdir: Path, | ||
) -> Tuple[bool, bytes]: | ||
(tmpdir / "current").write_bytes(current) | ||
(tmpdir / "base").write_bytes(base) | ||
|
@@ -273,8 +291,13 @@ def merge_files( | |
try: | ||
merged = repo.git( | ||
"merge-file", | ||
"-q", | ||
"-p", | ||
# Do not print warnings on conflicts. | ||
"--quiet", | ||
# Send results to stdout instead of overwriting "current file". | ||
"--stdout", | ||
# Ensure markers are the expected length to ensure consistent rerere results. | ||
"--marker-size", | ||
str(marker_size), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice, I thought there was no way to override attributes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's part of the "parameterize marker size" commit. I think that seems like the right place to me? Note that this does not override attributes: they weren't and aren't used at all here. This is merging arbitrary temp files and The note about using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah sorry I thought this commit already had a behavior change |
||
f"-L{labels[0]}", | ||
f"-L{labels[1]}", | ||
f"-L{labels[2]}", | ||
|
@@ -295,7 +318,10 @@ def merge_files( | |
|
||
|
||
def replay_recorded_resolution( | ||
repo: Repository, tmpdir: Path, preimage: bytes | ||
repo: Repository, | ||
tmpdir: Path, | ||
marker_size: int, | ||
preimage: bytes, | ||
) -> Tuple[bytes, Optional[str], Optional[Blob]]: | ||
rr_cache = repo.git_path("rr-cache") | ||
if not repo.bool_config( | ||
|
@@ -304,7 +330,11 @@ def replay_recorded_resolution( | |
): | ||
return (b"", None, None) | ||
|
||
(normalized_preimage, conflict_id) = normalize_conflicted_file(preimage) | ||
(normalized_preimage, conflict_id) = normalize_conflicted_file( | ||
marker_size=marker_size, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if we are supposed to use kwarg-style calls for non-kwargs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I follow what you mean by non-kwargs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. marker_size is a positional argument There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I'm still puzzling over what you mean here. Saying "it's a positional argument" on a line where it's passed by name doesn't make sense. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh sorry I mixed that up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tend to prefer named arguments in general, unless the order is unambiguous (like something like |
||
body=preimage, | ||
) | ||
|
||
conflict_dir = rr_cache / conflict_id | ||
if not conflict_dir.is_dir(): | ||
return (normalized_preimage, conflict_id, None) | ||
|
@@ -323,11 +353,12 @@ def replay_recorded_resolution( | |
|
||
(is_clean_merge, merged) = merge_files( | ||
repo, | ||
tmpdir=tmpdir, | ||
marker_size=marker_size, | ||
labels=("recorded postimage", "recorded preimage", "new preimage"), | ||
current=recorded_postimage, | ||
base=recorded_preimage, | ||
other=normalized_preimage, | ||
tmpdir=tmpdir, | ||
) | ||
if not is_clean_merge: | ||
# We could ask the user to merge this. However, that could be confusing. | ||
|
@@ -365,6 +396,7 @@ class ConflictParseFailed(Exception): | |
|
||
|
||
def normalize_conflict( | ||
marker_size: int, | ||
lines: Iterator[bytes], | ||
hasher: Optional[hashlib._Hash], | ||
) -> bytes: | ||
|
@@ -374,25 +406,25 @@ def normalize_conflict( | |
line = next(lines, None) | ||
if line is None: | ||
raise ConflictParseFailed("unexpected eof") | ||
if line.startswith(b"<<<<<<< "): | ||
if line.startswith((b"<" * marker_size) + b" "): | ||
# parse recursive conflicts, including their processed output in the current hunk | ||
conflict = normalize_conflict(lines, None) | ||
conflict = normalize_conflict(marker_size, lines, None) | ||
if cur_hunk is not None: | ||
cur_hunk += conflict | ||
elif line.startswith(b"|||||||"): | ||
elif line.startswith(b"|" * marker_size): | ||
# ignore the diff3 original section. Must be still parsing the first hunk. | ||
if other_hunk is not None: | ||
raise ConflictParseFailed("unexpected ||||||| conflict marker") | ||
(other_hunk, cur_hunk) = (cur_hunk, None) | ||
elif line.startswith(b"======="): | ||
elif line.startswith(b"=" * marker_size): | ||
# switch into the second hunk | ||
# could be in either the diff3 original section or the first hunk | ||
if cur_hunk is not None: | ||
if other_hunk is not None: | ||
raise ConflictParseFailed("unexpected ======= conflict marker") | ||
other_hunk = cur_hunk | ||
cur_hunk = b"" | ||
elif line.startswith(b">>>>>>> "): | ||
elif line.startswith((b">" * marker_size) + b" "): | ||
# end of conflict. update hasher, and return a normalized conflict | ||
if cur_hunk is None or other_hunk is None: | ||
raise ConflictParseFailed("unexpected >>>>>>> conflict marker") | ||
|
@@ -403,11 +435,14 @@ def normalize_conflict( | |
hasher.update(hunk2 + b"\0") | ||
return b"".join( | ||
( | ||
b"<<<<<<<\n", | ||
b"<" * marker_size, | ||
b"\n", | ||
hunk1, | ||
b"=======\n", | ||
b"=" * marker_size, | ||
b"\n", | ||
hunk2, | ||
b">>>>>>>\n", | ||
b">" * marker_size, | ||
b"\n", | ||
) | ||
) | ||
elif cur_hunk is not None: | ||
|
@@ -416,7 +451,10 @@ def normalize_conflict( | |
cur_hunk += line | ||
|
||
|
||
def normalize_conflicted_file(body: bytes) -> Tuple[bytes, str]: | ||
def normalize_conflicted_file( | ||
marker_size: int, | ||
body: bytes, | ||
) -> Tuple[bytes, str]: | ||
hasher = hashlib.sha1() | ||
normalized = b"" | ||
|
||
|
@@ -425,7 +463,7 @@ def normalize_conflicted_file(body: bytes) -> Tuple[bytes, str]: | |
line = next(lines, None) | ||
if line is None: | ||
return (normalized, hasher.hexdigest()) | ||
if line.startswith(b"<<<<<<< "): | ||
normalized += normalize_conflict(lines, hasher) | ||
if line.startswith((b"<" * marker_size) + b" "): | ||
normalized += normalize_conflict(marker_size, lines, hasher) | ||
else: | ||
normalized += line |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should maybe be a method of Repository (or at least the generic check-attr wrapper should be)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as handling the
"conflict-marker-size"
attribute in particular, it might benefit from staying standalone. There's some extra validation vs other attributes (turning it into an int, and defaulting to 7 if non-positive), and it's very specific to this area of code dealing with merge-conflict text.