Skip to content

Commit

Permalink
regexp: add (*Regexp).SubexpIndex
Browse files Browse the repository at this point in the history
SubexpIndex returns the index of the first subexpression with the given name,
or -1 if there is no subexpression with that name.

Fixes #32420

Change-Id: Ie1f9d22d50fb84e18added80a9d9a9f6dca8ffc4
Reviewed-on: https://go-review.googlesource.com/c/go/+/187919
Run-TryBot: Ian Lance Taylor <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Daniel Martí <[email protected]>
  • Loading branch information
sylvinus authored and mvdan committed Apr 10, 2020
1 parent 245409e commit 782fcb4
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 14 deletions.
42 changes: 28 additions & 14 deletions src/regexp/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,24 +418,32 @@ func TestLiteralPrefix(t *testing.T) {
}
}

type subexpIndex struct {
name string
index int
}

type subexpCase struct {
input string
num int
names []string
input string
num int
names []string
indices []subexpIndex
}

var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}

var subexpCases = []subexpCase{
{``, 0, nil},
{`.*`, 0, nil},
{`abba`, 0, nil},
{`ab(b)a`, 1, []string{"", ""}},
{`ab(.*)a`, 1, []string{"", ""}},
{`(.*)ab(.*)a`, 2, []string{"", "", ""}},
{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}},
{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}},
{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}},
{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}},
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}},
{``, 0, nil, emptySubexpIndices},
{`.*`, 0, nil, emptySubexpIndices},
{`abba`, 0, nil, emptySubexpIndices},
{`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
{`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
{`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
}

func TestSubexp(t *testing.T) {
Expand All @@ -458,6 +466,12 @@ func TestSubexp(t *testing.T) {
}
}
}
for _, subexp := range c.indices {
index := re.SubexpIndex(subexp.name)
if index != subexp.index {
t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
}
}
}
}

Expand Down
13 changes: 13 additions & 0 deletions src/regexp/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,19 @@ func ExampleRegexp_SubexpNames() {
// Turing Alan
}

func ExampleRegexp_SubexpIndex() {
re := regexp.MustCompile(`(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)`)
fmt.Println(re.MatchString("Alan Turing"))
matches := re.FindStringSubmatch("Alan Turing")
lastIndex := re.SubexpIndex("last")
fmt.Printf("last => %d\n", lastIndex)
fmt.Println(matches[lastIndex])
// Output:
// true
// last => 2
// Turing
}

func ExampleRegexp_Split() {
a := regexp.MustCompile(`a`)
fmt.Println(a.Split("banana", -1))
Expand Down
18 changes: 18 additions & 0 deletions src/regexp/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,24 @@ func (re *Regexp) SubexpNames() []string {
return re.subexpNames
}

// SubexpIndex returns the index of the first subexpression with the given name,
// or -1 if there is no subexpression with that name.
//
// Note that multiple subexpressions can be written using the same name, as in
// (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob".
// In this case, SubexpIndex returns the index of the leftmost such subexpression
// in the regular expression.
func (re *Regexp) SubexpIndex(name string) int {
if name != "" {
for i, s := range re.subexpNames {
if name == s {
return i
}
}
}
return -1
}

const endOfText rune = -1

// input abstracts different representations of the input text. It provides
Expand Down

0 comments on commit 782fcb4

Please sign in to comment.