Skip to content

Commit

Permalink
Align find_all with C#/python approach.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley committed Jun 1, 2023
1 parent bcf9a3d commit 6d68149
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ type Match_Iterator
Also returns the next iterator, if there was a match.
next : Match_Iterator_Value
next self =
regex_result = if self.cursor >= self.input.char_vector.length then Nothing else self.pattern.internal_regex_object.exec self.input self.cursor
regex_result = if self.cursor > self.input.char_vector.length then Nothing else self.pattern.internal_regex_object.exec self.input self.cursor
case regex_result.is_nothing.not && regex_result.isMatch of
False ->
filler_range = Range.new self.cursor (Text_Utils.char_length self.input)
Expand All @@ -364,7 +364,8 @@ type Match_Iterator
filler_range = Range.new self.cursor match_start
filler_span = (Utf_16_Span.Value filler_range self.input)
match = Match.Value self.pattern regex_result self.input
next_cursor = match.utf_16_end 0
## Handle edge case where match is 0 length
next_cursor = (self.cursor + 1).max (match.utf_16_end 0)
next_iterator = Match_Iterator.Value self.pattern self.input next_cursor
Match_Iterator_Value.Next filler_span match next_iterator

Expand Down
9 changes: 9 additions & 0 deletions test/Tests/src/Data/Text/Regex_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,15 @@ spec =
match = pattern.find_all input
match . should_equal []

Test.specify "should handle matching empty matches" <|
pattern = Regex.compile ".*"
pattern.find_all "Hello World" . should_equal ["Hello World", ""]
pattern.find_all "" . should_equal [""]

pattern_2 = Regex.compile ".*(?=.)"
pattern_2.find_all "Hello World" . should_equal ["Hello Worl", ""]
pattern_2.find_all "" . should_equal []

Test.specify "should correctly handle edge cases where one-letter matches happen at the end of the word" <|
Regex.compile "(a+|1+)" . find_all "a1a1" . should_equal ["a", "1", "a", "1"]
Regex.compile "([a]+|[1]+)" . find_all "a1a1" . should_equal ["a", "1", "a", "1"]
Expand Down
6 changes: 6 additions & 0 deletions test/Tests/src/Data/Text_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -1279,6 +1279,12 @@ spec =
"Hello World!".find_all ".o" . map (match-> match.span 0) . should_equal [Span.Value (3.up_to 5) "Hello World!", Span.Value (6.up_to 8) "Hello World!"]
"foobar".find "BAR" Case_Sensitivity.Insensitive . span 0 . should_equal (Span.Value (3.up_to 6) "foobar")

Test.specify "find_all should handle 0 length matches" <|
"Hello World".find_all ".*" . map (_.text) . should_equal ["Hello World", ""]
"".find_all ".*" . map (_.text) . should_equal [""]
"Hello World".find_all ".*(?=.)" . map (_.text) . should_equal ["Hello Worl", ""]
"".find_all ".*(?=.)" . map (_.text) . should_equal []

Test.specify "should handle accents and other multi-point graphemes" <|
accents = 'a\u{301}e\u{301}o\u{301}he\u{301}h'

Expand Down

0 comments on commit 6d68149

Please sign in to comment.