From 14a687f2e9bb64775c67936cba4ebb54685a78b8 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Wed, 3 Jul 2024 16:38:08 +0900 Subject: [PATCH 01/11] myers diff algorithm --- .../gno.land/p/demo/uassert/diff_test.gno | 146 ++++++++++++++++++ examples/gno.land/p/demo/uassert/diif.gno | 141 +++++++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 examples/gno.land/p/demo/uassert/diff_test.gno create mode 100644 examples/gno.land/p/demo/uassert/diif.gno diff --git a/examples/gno.land/p/demo/uassert/diff_test.gno b/examples/gno.land/p/demo/uassert/diff_test.gno new file mode 100644 index 00000000000..2c840669bcd --- /dev/null +++ b/examples/gno.land/p/demo/uassert/diff_test.gno @@ -0,0 +1,146 @@ +package uassert + +import ( + "testing" +) + +func TestMyersDiff(t *testing.T) { + testCases := []struct { + name string + old string + new string + expected string + }{ + { + name: "No difference", + old: "abc", + new: "abc", + expected: "abc", + }, + { + name: "Simple insertion", + old: "ac", + new: "abc", + expected: "a[+b]c", + }, + { + name: "Simple deletion", + old: "abc", + new: "ac", + expected: "a[-b]c", + }, + { + name: "Simple substitution", + old: "abc", + new: "abd", + expected: "ab[-c][+d]", + }, + { + name: "Multiple changes", + old: "The quick brown fox jumps over the lazy dog", + new: "The quick brown cat jumps over the lazy dog", + expected: "The quick brown [-fox][+cat] jumps over the lazy dog", + }, + { + name: "Prefix and suffix", + old: "Hello, world!", + new: "Hello, beautiful world!", + expected: "Hello, [+beautiful ]world!", + }, + { + name: "Complete change", + old: "abcdef", + new: "ghijkl", + expected: "[-abcdef][+ghijkl]", + }, + { + name: "Empty strings", + old: "", + new: "", + expected: "", + }, + { + name: "Old empty", + old: "", + new: "abc", + expected: "[+abc]", + }, + { + name: "New empty", + old: "abc", + new: "", + expected: "[-abc]", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diff := myersDiff(tc.old, tc.new) + result := formatDiff(diff) + if result != tc.expected { + t.Errorf("Expected: %s, got: %s", tc.expected, result) + } + }) + } +} + +func TestEqual(t *testing.T) { + testCases := []struct { + name string + expected interface{} + actual interface{} + shouldPass bool + expectedMsg string + }{ + { + name: "Equal strings", + expected: "abc", + actual: "abc", + shouldPass: true, + expectedMsg: "", + }, + { + name: "Different strings", + expected: "abc", + actual: "abd", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: ab[-c][+d]", + }, + { + name: "Equal integers", + expected: 123, + actual: 123, + shouldPass: true, + expectedMsg: "", + }, + { + name: "Different integers", + expected: 123, + actual: 456, + shouldPass: false, + expectedMsg: "error: uassert.Equal: same type but different value\n\texpected: 123\n\tactual: 456", + }, + { + name: "Different types", + expected: "123", + actual: 123, + shouldPass: false, + expectedMsg: "error: uassert.Equal: different types", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + mockT := &mockTestingT{} + result := Equal(mockT, tc.expected, tc.actual) + if result != tc.shouldPass { + t.Errorf("Expected Equal to return %v, but got %v", tc.shouldPass, result) + } + if tc.shouldPass { + mockT.empty(t) + } else { + mockT.equals(t, tc.expectedMsg) + } + }) + } +} diff --git a/examples/gno.land/p/demo/uassert/diif.gno b/examples/gno.land/p/demo/uassert/diif.gno new file mode 100644 index 00000000000..15254eae237 --- /dev/null +++ b/examples/gno.land/p/demo/uassert/diif.gno @@ -0,0 +1,141 @@ +package uassert + +import ( + "strings" +) + +type EditType int + +const ( + EditKeep EditType = iota + EditInsert + EditDelete +) + +type Edit struct { + Type EditType + Char rune +} + +func myersDiff(old, new string) []Edit { + oldRunes, newRunes := []rune(old), []rune(new) + n, m := len(oldRunes), len(newRunes) + + if n == 0 && m == 0 { + return []Edit{} + } + + // old is empty + if n == 0 { + edits := make([]Edit, m) + for i, r := range newRunes { + edits[i] = Edit{Type: EditInsert, Char: r} + } + return edits + } + + if m == 0 { + edits := make([]Edit, n) + for i, r := range oldRunes { + edits[i] = Edit{Type: EditDelete, Char: r} + } + return edits + } + + max := n + m + v := make([]int, 2*max+1) + var trace [][]int + + for d := 0; d <= max; d++ { + for k := -d; k <= d; k += 2 { + var x int + if k == -d || (k != d && v[max+k-1] < v[max+k+1]) { + x = v[max+k+1] + } else { + x = v[max+k-1] + 1 + } + y := x - k + + for x < n && y < m && oldRunes[x] == newRunes[y] { + x++ + y++ + } + + v[max+k] = x + + if x == n && y == m { + trace = append(trace, append([]int(nil), v...)) + goto endSearch + } + } + trace = append(trace, append([]int(nil), v...)) + } +endSearch: + + edits := make([]Edit, 0, n+m) + x, y := n, m + for d := len(trace) - 1; d >= 0; d-- { + vPrev := trace[d] + k := x - y + var prevK int + if k == -d || (k != d && vPrev[max+k-1] < vPrev[max+k+1]) { + prevK = k + 1 + } else { + prevK = k - 1 + } + prevX := vPrev[max+prevK] + prevY := prevX - prevK + + for x > prevX && y > prevY { + if x > 0 && y > 0 { + edits = append([]Edit{{Type: EditKeep, Char: oldRunes[x-1]}}, edits...) + } + x-- + y-- + } + if y > prevY { + if y > 0 { + edits = append([]Edit{{Type: EditInsert, Char: newRunes[y-1]}}, edits...) + } + y-- + } else if x > prevX { + if x > 0 { + edits = append([]Edit{{Type: EditDelete, Char: oldRunes[x-1]}}, edits...) + } + x-- + } + } + + return edits +} + +func formatDiff(edits []Edit) string { + var result strings.Builder + var currentType EditType + var currentChars strings.Builder + + flushCurrent := func() { + if currentChars.Len() > 0 { + switch currentType { + case EditKeep: + result.WriteString(currentChars.String()) + case EditInsert: + result.WriteString("[+" + currentChars.String() + "]") + case EditDelete: + result.WriteString("[-" + currentChars.String() + "]") + } + currentChars.Reset() + } + } + + for _, edit := range edits { + if edit.Type != currentType { + flushCurrent() + currentType = edit.Type + } + currentChars.WriteRune(edit.Char) + } + flushCurrent() + + return result.String() +} From 11ec5c1e2f5a145c4609fa530b127625ecd30093 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Wed, 3 Jul 2024 16:43:52 +0900 Subject: [PATCH 02/11] typo --- examples/gno.land/p/demo/uassert/{diif.gno => diff.gno} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/gno.land/p/demo/uassert/{diif.gno => diff.gno} (100%) diff --git a/examples/gno.land/p/demo/uassert/diif.gno b/examples/gno.land/p/demo/uassert/diff.gno similarity index 100% rename from examples/gno.land/p/demo/uassert/diif.gno rename to examples/gno.land/p/demo/uassert/diff.gno From e1ec172ff2fa236fbb5f6a2bc207bf6d53967699 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Wed, 3 Jul 2024 17:14:29 +0900 Subject: [PATCH 03/11] update `Equal` --- examples/gno.land/p/demo/uassert/uassert.gno | 6 +- .../gno.land/p/demo/uassert/uassert_test.gno | 77 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/examples/gno.land/p/demo/uassert/uassert.gno b/examples/gno.land/p/demo/uassert/uassert.gno index 61885574360..5cddd00ec01 100644 --- a/examples/gno.land/p/demo/uassert/uassert.gno +++ b/examples/gno.land/p/demo/uassert/uassert.gno @@ -126,7 +126,11 @@ func Equal(t TestingT, expected, actual interface{}, msgs ...string) bool { if av, ok := actual.(string); ok { equal = ev == av ok_ = true - es, as = ev, as + es, as = ev, av + if !equal { + diff := myersDiff(ev, av) + return fail(t, msgs, "uassert.Equal: strings are different\n\tDiff: %s", formatDiff(diff)) + } } case std.Address: if av, ok := actual.(std.Address); ok { diff --git a/examples/gno.land/p/demo/uassert/uassert_test.gno b/examples/gno.land/p/demo/uassert/uassert_test.gno index a881070a04b..0a924e2e300 100644 --- a/examples/gno.land/p/demo/uassert/uassert_test.gno +++ b/examples/gno.land/p/demo/uassert/uassert_test.gno @@ -201,3 +201,80 @@ func TestEmpty(t *testing.T) { }) } } + +func TestEqualWithStringDiff(t *testing.T) { + cases := []struct { + name string + expected string + actual string + shouldPass bool + expectedMsg string + }{ + { + name: "Identical strings", + expected: "Hello, world!", + actual: "Hello, world!", + shouldPass: true, + expectedMsg: "", + }, + { + name: "Different strings - simple", + expected: "Hello, world!", + actual: "Hello, World!", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: Hello, [-w][+W]orld!", + }, + { + name: "Different strings - complex", + expected: "The quick brown fox jumps over the lazy dog", + actual: "The quick brown cat jumps over the lazy dog", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: The quick brown [-fox][+cat] jumps over the lazy dog", + }, + { + name: "Different strings - prefix", + expected: "prefix_string", + actual: "string", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: [-prefix_]string", + }, + { + name: "Different strings - suffix", + expected: "string", + actual: "string_suffix", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: string[+_suffix]", + }, + { + name: "Empty string vs non-empty string", + expected: "", + actual: "non-empty", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: [+non-empty]", + }, + { + name: "Non-empty string vs empty string", + expected: "non-empty", + actual: "", + shouldPass: false, + expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: [-non-empty]", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + mockT := &mockTestingT{} + result := Equal(mockT, tc.expected, tc.actual) + + if result != tc.shouldPass { + t.Errorf("Expected Equal to return %v, but got %v", tc.shouldPass, result) + } + + if tc.shouldPass { + mockT.empty(t) + } else { + mockT.equals(t, tc.expectedMsg) + } + }) + } +} From 31610d2d941047eabb82694d8f9e48c069ae96b9 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Wed, 3 Jul 2024 21:16:12 +0900 Subject: [PATCH 04/11] optimize formatter --- examples/gno.land/p/demo/uassert/diff.gno | 20 ++++++++++++++----- .../gno.land/p/demo/uassert/diff_test.gno | 10 ++++++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/examples/gno.land/p/demo/uassert/diff.gno b/examples/gno.land/p/demo/uassert/diff.gno index 15254eae237..77db33beaa7 100644 --- a/examples/gno.land/p/demo/uassert/diff.gno +++ b/examples/gno.land/p/demo/uassert/diff.gno @@ -110,9 +110,15 @@ endSearch: } func formatDiff(edits []Edit) string { - var result strings.Builder - var currentType EditType - var currentChars strings.Builder + if len(edits) == 0 { + return "" + } + + var ( + result strings.Builder + currentType EditType + currentChars strings.Builder + ) flushCurrent := func() { if currentChars.Len() > 0 { @@ -120,9 +126,13 @@ func formatDiff(edits []Edit) string { case EditKeep: result.WriteString(currentChars.String()) case EditInsert: - result.WriteString("[+" + currentChars.String() + "]") + result.WriteString("[+") + result.WriteString(currentChars.String()) + result.WriteByte(']') case EditDelete: - result.WriteString("[-" + currentChars.String() + "]") + result.WriteString("[-") + result.WriteString(currentChars.String()) + result.WriteByte(']') } currentChars.Reset() } diff --git a/examples/gno.land/p/demo/uassert/diff_test.gno b/examples/gno.land/p/demo/uassert/diff_test.gno index 2c840669bcd..24aebfb2d02 100644 --- a/examples/gno.land/p/demo/uassert/diff_test.gno +++ b/examples/gno.land/p/demo/uassert/diff_test.gno @@ -71,6 +71,12 @@ func TestMyersDiff(t *testing.T) { new: "", expected: "[-abc]", }, + { + name: "non-ascii", + old: "ASCII 문자가 아닌 것도 되나?", + new: "ASCII 문자가 아닌 것도 됨.", + expected: "ASCII 문자가 아닌 것도 [-되나?][+됨.]", + }, } for _, tc := range testCases { @@ -85,7 +91,7 @@ func TestMyersDiff(t *testing.T) { } func TestEqual(t *testing.T) { - testCases := []struct { + tests := []struct { name string expected interface{} actual interface{} @@ -129,7 +135,7 @@ func TestEqual(t *testing.T) { }, } - for _, tc := range testCases { + for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { mockT := &mockTestingT{} result := Equal(mockT, tc.expected, tc.actual) From b49bc549ea50f84b068609c4d2ee79da226549e0 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Fri, 5 Jul 2024 17:05:01 +0900 Subject: [PATCH 05/11] make diff as p/demo package --- .../p/demo/{uassert => diff}/diff.gno | 6 +- .../p/demo/{uassert => diff}/diff_test.gno | 71 ++----------------- examples/gno.land/p/demo/diff/gno.mod | 1 + examples/gno.land/p/demo/uassert/uassert.gno | 6 +- 4 files changed, 13 insertions(+), 71 deletions(-) rename examples/gno.land/p/demo/{uassert => diff}/diff.gno (96%) rename examples/gno.land/p/demo/{uassert => diff}/diff_test.gno (54%) create mode 100644 examples/gno.land/p/demo/diff/gno.mod diff --git a/examples/gno.land/p/demo/uassert/diff.gno b/examples/gno.land/p/demo/diff/diff.gno similarity index 96% rename from examples/gno.land/p/demo/uassert/diff.gno rename to examples/gno.land/p/demo/diff/diff.gno index 77db33beaa7..3f6b3b62934 100644 --- a/examples/gno.land/p/demo/uassert/diff.gno +++ b/examples/gno.land/p/demo/diff/diff.gno @@ -1,4 +1,4 @@ -package uassert +package diff import ( "strings" @@ -17,7 +17,7 @@ type Edit struct { Char rune } -func myersDiff(old, new string) []Edit { +func MyersDiff(old, new string) []Edit { oldRunes, newRunes := []rune(old), []rune(new) n, m := len(oldRunes), len(newRunes) @@ -109,7 +109,7 @@ endSearch: return edits } -func formatDiff(edits []Edit) string { +func Format(edits []Edit) string { if len(edits) == 0 { return "" } diff --git a/examples/gno.land/p/demo/uassert/diff_test.gno b/examples/gno.land/p/demo/diff/diff_test.gno similarity index 54% rename from examples/gno.land/p/demo/uassert/diff_test.gno rename to examples/gno.land/p/demo/diff/diff_test.gno index 24aebfb2d02..f6761126499 100644 --- a/examples/gno.land/p/demo/uassert/diff_test.gno +++ b/examples/gno.land/p/demo/diff/diff_test.gno @@ -1,11 +1,11 @@ -package uassert +package diff import ( "testing" ) func TestMyersDiff(t *testing.T) { - testCases := []struct { + tests := []struct { name string old string new string @@ -79,74 +79,13 @@ func TestMyersDiff(t *testing.T) { }, } - for _, tc := range testCases { + for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - diff := myersDiff(tc.old, tc.new) - result := formatDiff(diff) + diff := MyersDiff(tc.old, tc.new) + result := Format(diff) if result != tc.expected { t.Errorf("Expected: %s, got: %s", tc.expected, result) } }) } } - -func TestEqual(t *testing.T) { - tests := []struct { - name string - expected interface{} - actual interface{} - shouldPass bool - expectedMsg string - }{ - { - name: "Equal strings", - expected: "abc", - actual: "abc", - shouldPass: true, - expectedMsg: "", - }, - { - name: "Different strings", - expected: "abc", - actual: "abd", - shouldPass: false, - expectedMsg: "error: uassert.Equal: strings are different\n\tDiff: ab[-c][+d]", - }, - { - name: "Equal integers", - expected: 123, - actual: 123, - shouldPass: true, - expectedMsg: "", - }, - { - name: "Different integers", - expected: 123, - actual: 456, - shouldPass: false, - expectedMsg: "error: uassert.Equal: same type but different value\n\texpected: 123\n\tactual: 456", - }, - { - name: "Different types", - expected: "123", - actual: 123, - shouldPass: false, - expectedMsg: "error: uassert.Equal: different types", - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - mockT := &mockTestingT{} - result := Equal(mockT, tc.expected, tc.actual) - if result != tc.shouldPass { - t.Errorf("Expected Equal to return %v, but got %v", tc.shouldPass, result) - } - if tc.shouldPass { - mockT.empty(t) - } else { - mockT.equals(t, tc.expectedMsg) - } - }) - } -} diff --git a/examples/gno.land/p/demo/diff/gno.mod b/examples/gno.land/p/demo/diff/gno.mod new file mode 100644 index 00000000000..3041b5f62f1 --- /dev/null +++ b/examples/gno.land/p/demo/diff/gno.mod @@ -0,0 +1 @@ +module gno.land/p/demo/diff diff --git a/examples/gno.land/p/demo/uassert/uassert.gno b/examples/gno.land/p/demo/uassert/uassert.gno index 5cddd00ec01..7c10b737d2b 100644 --- a/examples/gno.land/p/demo/uassert/uassert.gno +++ b/examples/gno.land/p/demo/uassert/uassert.gno @@ -5,6 +5,8 @@ import ( "std" "strconv" "strings" + + "gno.land/p/demo/diff" ) // NoError asserts that a function returned no error (i.e. `nil`). @@ -128,8 +130,8 @@ func Equal(t TestingT, expected, actual interface{}, msgs ...string) bool { ok_ = true es, as = ev, av if !equal { - diff := myersDiff(ev, av) - return fail(t, msgs, "uassert.Equal: strings are different\n\tDiff: %s", formatDiff(diff)) + dif := diff.MyersDiff(ev, av) + return fail(t, msgs, "uassert.Equal: strings are different\n\tDiff: %s", diff.Format(dif)) } } case std.Address: From eef6138ecf74700c8981a6e5935b22825f250d0d Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Fri, 5 Jul 2024 17:23:56 +0900 Subject: [PATCH 06/11] tidy --- examples/gno.land/p/demo/uassert/gno.mod | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/gno.land/p/demo/uassert/gno.mod b/examples/gno.land/p/demo/uassert/gno.mod index a70e7db825d..f22276564bf 100644 --- a/examples/gno.land/p/demo/uassert/gno.mod +++ b/examples/gno.land/p/demo/uassert/gno.mod @@ -1 +1,3 @@ module gno.land/p/demo/uassert + +require gno.land/p/demo/diff v0.0.0-latest From b0f5c9cc1f50c93074a3504ba9b1fd7253b56803 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Tue, 16 Jul 2024 20:19:45 +0900 Subject: [PATCH 07/11] Update examples/gno.land/p/demo/diff/diff.gno Co-authored-by: Marc Vertes --- examples/gno.land/p/demo/diff/diff.gno | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gno.land/p/demo/diff/diff.gno b/examples/gno.land/p/demo/diff/diff.gno index 3f6b3b62934..d3c8500bd3c 100644 --- a/examples/gno.land/p/demo/diff/diff.gno +++ b/examples/gno.land/p/demo/diff/diff.gno @@ -4,7 +4,7 @@ import ( "strings" ) -type EditType int +type EditType uint8 const ( EditKeep EditType = iota From 959cf0381e5f746ec8eed6b008318c687332799c Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Tue, 16 Jul 2024 20:22:39 +0900 Subject: [PATCH 08/11] Apply suggestions from code review Co-authored-by: Marc Vertes --- examples/gno.land/p/demo/diff/diff.gno | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/gno.land/p/demo/diff/diff.gno b/examples/gno.land/p/demo/diff/diff.gno index d3c8500bd3c..e79f5e5fc6d 100644 --- a/examples/gno.land/p/demo/diff/diff.gno +++ b/examples/gno.land/p/demo/diff/diff.gno @@ -45,7 +45,7 @@ func MyersDiff(old, new string) []Edit { max := n + m v := make([]int, 2*max+1) var trace [][]int - +search: for d := 0; d <= max; d++ { for k := -d; k <= d; k += 2 { var x int @@ -65,12 +65,11 @@ func MyersDiff(old, new string) []Edit { if x == n && y == m { trace = append(trace, append([]int(nil), v...)) - goto endSearch + break search } } trace = append(trace, append([]int(nil), v...)) } -endSearch: edits := make([]Edit, 0, n+m) x, y := n, m From b3bf3ae755e52817363ebeab7aa2be242de2edc0 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Tue, 16 Jul 2024 20:53:44 +0900 Subject: [PATCH 09/11] update doc --- examples/gno.land/p/demo/diff/diff.gno | 71 ++++++++++++++++++- .../gno.land/p/demo/uassert/uassert_test.gno | 2 +- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/examples/gno.land/p/demo/diff/diff.gno b/examples/gno.land/p/demo/diff/diff.gno index e79f5e5fc6d..c1220d9a1ad 100644 --- a/examples/gno.land/p/demo/diff/diff.gno +++ b/examples/gno.land/p/demo/diff/diff.gno @@ -1,22 +1,60 @@ +// The diff package implements the Myers diff algorithm to compute the edit distance +// and generate a minimal edit script between two strings. +// +// Edit distance, also known as Levenshtein distance, is a measure of the similarity +// between two strings. It is defined as the minimum number of single-character edits (insertions, +// deletions, or substitutions) required to change one string into the other. package diff import ( "strings" ) +// EditType represents the type of edit operation in a diff. type EditType uint8 const ( + // EditKeep indicates that a character is unchanged in both strings. EditKeep EditType = iota + + // EditInsert indicates that a character was inserted in the new string. EditInsert + + // EditDelete indicates that a character was deleted from the old string. EditDelete ) +// Edit represent a single edit operation in a diff. type Edit struct { + // Type is the kind of edit operation. Type EditType + + // Char is the character involved in the edit operation. Char rune } +// MyersDiff computes the difference between two strings using Myers' diff algorithm. +// It returns a slice of Edit operations that transform the old string into the new string. +// This implementation finds the shortest edit script (SES) that represents the minimal +// set of operations to transform one string into the other. +// +// The function handles both ASCII and non-ASCII characters correctly. +// +// Time complexity: O((N+M)D), where N and M are the lengths of the input strings, +// and D is the size of the minimum edit script. +// +// Space complexity: O((N+M)D) +// +// In the worst case, where the strings are completely different, D can be as large as N+M, +// leading to a time and space complexity of O((N+M)^2). However, for strings with many +// common substrings, the performance is much better, often closer to O(N+M). +// +// Parameters: +// - old: the original string. +// - new: the modified string. +// +// Returns: +// - A slice of Edit operations representing the minimum difference between the two strings. func MyersDiff(old, new string) []Edit { oldRunes, newRunes := []rune(old), []rune(new) n, m := len(oldRunes), len(newRunes) @@ -47,15 +85,17 @@ func MyersDiff(old, new string) []Edit { var trace [][]int search: for d := 0; d <= max; d++ { + // iterate through diagonals for k := -d; k <= d; k += 2 { var x int if k == -d || (k != d && v[max+k-1] < v[max+k+1]) { - x = v[max+k+1] + x = v[max+k+1] // move down } else { - x = v[max+k-1] + 1 + x = v[max+k-1] + 1 // move right } y := x - k + // extend the path as far as possible with matching characters for x < n && y < m && oldRunes[x] == newRunes[y] { x++ y++ @@ -63,6 +103,7 @@ search: v[max+k] = x + // check if we've reached the end of both strings if x == n && y == m { trace = append(trace, append([]int(nil), v...)) break search @@ -71,6 +112,7 @@ search: trace = append(trace, append([]int(nil), v...)) } + // backtrack to construct the edit script edits := make([]Edit, 0, n+m) x, y := n, m for d := len(trace) - 1; d >= 0; d-- { @@ -85,6 +127,7 @@ search: prevX := vPrev[max+prevK] prevY := prevX - prevK + // add keep edits for matching characters for x > prevX && y > prevY { if x > 0 && y > 0 { edits = append([]Edit{{Type: EditKeep, Char: oldRunes[x-1]}}, edits...) @@ -108,6 +151,30 @@ search: return edits } +// Format converts a slice of Edit operations into a human-readable string representation. +// It groups consecutive edits of the same type and formats them as follows: +// - Unchanged characters are left as-is +// - Inserted characters are wrapped in [+...] +// - Deleted characters are wrapped in [-...] +// +// This function is useful for visualizing the differences between two strings +// in a compact and intuitive format. +// +// Parameters: +// - edits: A slice of Edit operations, typically produced by MyersDiff +// +// Returns: +// - A formatted string representing the diff +// +// Example output: +// +// For the diff between "abcd" and "acbd", the output might be: +// "a[-b]c[+b]d" +// +// Note: +// +// The function assumes that the input slice of edits is in the correct order. +// An empty input slice will result in an empty string. func Format(edits []Edit) string { if len(edits) == 0 { return "" diff --git a/examples/gno.land/p/demo/uassert/uassert_test.gno b/examples/gno.land/p/demo/uassert/uassert_test.gno index 78c8c630957..5ead848fd15 100644 --- a/examples/gno.land/p/demo/uassert/uassert_test.gno +++ b/examples/gno.land/p/demo/uassert/uassert_test.gno @@ -319,7 +319,7 @@ func TestEqualWithStringDiff(t *testing.T) { mockT.empty(t) } else { mockT.equals(t, tc.expectedMsg) - } + } }) } } From 69141c0841022dd40c129f51a0c09befb61b264f Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Tue, 16 Jul 2024 20:55:22 +0900 Subject: [PATCH 10/11] typo --- examples/gno.land/p/demo/diff/diff.gno | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gno.land/p/demo/diff/diff.gno b/examples/gno.land/p/demo/diff/diff.gno index c1220d9a1ad..0f3da9b3f8e 100644 --- a/examples/gno.land/p/demo/diff/diff.gno +++ b/examples/gno.land/p/demo/diff/diff.gno @@ -54,7 +54,7 @@ type Edit struct { // - new: the modified string. // // Returns: -// - A slice of Edit operations representing the minimum difference between the two strings. +// - A slice of Edit operations representing the minimum difference between the two strings. func MyersDiff(old, new string) []Edit { oldRunes, newRunes := []rune(old), []rune(new) n, m := len(oldRunes), len(newRunes) From 71d1affa24e2356cababe6a596c28ac71dfbe9a4 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Tue, 16 Jul 2024 22:39:37 +0900 Subject: [PATCH 11/11] add more multi-byte char tests --- examples/gno.land/p/demo/diff/diff_test.gno | 93 ++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/examples/gno.land/p/demo/diff/diff_test.gno b/examples/gno.land/p/demo/diff/diff_test.gno index f6761126499..bbf4fcdf3e0 100644 --- a/examples/gno.land/p/demo/diff/diff_test.gno +++ b/examples/gno.land/p/demo/diff/diff_test.gno @@ -1,6 +1,7 @@ package diff import ( + "strings" "testing" ) @@ -72,11 +73,101 @@ func TestMyersDiff(t *testing.T) { expected: "[-abc]", }, { - name: "non-ascii", + name: "non-ascii (Korean characters)", old: "ASCII 문자가 아닌 것도 되나?", new: "ASCII 문자가 아닌 것도 됨.", expected: "ASCII 문자가 아닌 것도 [-되나?][+됨.]", }, + { + name: "Emoji diff", + old: "Hello 👋 World 🌍", + new: "Hello 👋 Beautiful 🌸 World 🌍", + expected: "Hello 👋 [+Beautiful 🌸 ]World 🌍", + }, + { + name: "Mixed multibyte and ASCII", + old: "こんにちは World", + new: "こんばんは World", + expected: "こん[-にち][+ばん]は World", + }, + { + name: "Chinese characters", + old: "我喜欢编程", + new: "我喜欢看书和编程", + expected: "我喜欢[+看书和]编程", + }, + { + name: "Combining characters", + old: "e\u0301", // é (e + ´) + new: "e\u0300", // è (e + `) + expected: "e[-\u0301][+\u0300]", + }, + { + name: "Right-to-Left languages", + old: "שלום", + new: "שלום עולם", + expected: "שלום[+ עולם]", + }, + { + name: "Normalization NFC and NFD", + old: "e\u0301", // NFD (decomposed) + new: "\u00e9", // NFC (precomposed) + expected: "[-e\u0301][+\u00e9]", + }, + { + name: "Case sensitivity", + old: "abc", + new: "Abc", + expected: "[-a][+A]bc", + }, + { + name: "Surrogate pairs", + old: "Hello 🌍", + new: "Hello 🌎", + expected: "Hello [-🌍][+🌎]", + }, + { + name: "Control characters", + old: "Line1\nLine2", + new: "Line1\r\nLine2", + expected: "Line1[+\r]\nLine2", + }, + { + name: "Mixed scripts", + old: "Hello नमस्ते こんにちは", + new: "Hello สวัสดี こんにちは", + expected: "Hello [-नमस्ते][+สวัสดี] こんにちは", + }, + { + name: "Unicode normalization", + old: "é", // U+00E9 (precomposed) + new: "e\u0301", // U+0065 U+0301 (decomposed) + expected: "[-é][+e\u0301]", + }, + { + name: "Directional marks", + old: "Hello\u200Eworld", // LTR mark + new: "Hello\u200Fworld", // RTL mark + expected: "Hello[-\u200E][+\u200F]world", + }, + { + name: "Zero-width characters", + old: "ab\u200Bc", // Zero-width space + new: "abc", + expected: "ab[-\u200B]c", + }, + { + name: "Worst-case scenario (completely different strings)", + old: strings.Repeat("a", 1000), + new: strings.Repeat("b", 1000), + expected: "[-" + strings.Repeat("a", 1000) + "][+" + strings.Repeat("b", 1000) + "]", + }, + { + name: "Very long strings", + old: strings.Repeat("a", 10000) + "b" + strings.Repeat("a", 10000), + new: strings.Repeat("a", 10000) + "c" + strings.Repeat("a", 10000), + expected: strings.Repeat("a", 10000) + "[-b][+c]" + strings.Repeat("a", 10000), + }, } for _, tc := range tests {