Skip to content

Commit

Permalink
internal/lazyregexp: add a lazy Regexp package
Browse files Browse the repository at this point in the history
This was implemented as part of go/doc, but it's going to be useful in
other packages. In particular, many packages under cmd/go like web and
vcs make somewhat heavy use of global regexes, which add a non-trivial
amount of init work to the cmd/go program.

A lazy wrapper around regexp.Regexp will make it trivial to get rid of
the extra cost with a trivial refactor, so make it possible for other
packages in the repository to make use of it. While naming the package,
give the members better names, such as lazyregexp.New and
lazyregexp.Regexp.

We're also considering adding some form of a lazy API to the public
regexp package, so this internal package will allow us to get some
initial experience across std and cmd.

For #29382.

Change-Id: I30b0e72871d5267c309786f95f4cb15c68b2393d
Reviewed-on: https://go-review.googlesource.com/c/164040
Run-TryBot: Daniel Martí <[email protected]>
Reviewed-by: Brad Fitzpatrick <[email protected]>
Reviewed-by: Bryan C. Mills <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
  • Loading branch information
mvdan committed Feb 27, 2019
1 parent 0d8c363 commit f40cb19
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 19 deletions.
11 changes: 6 additions & 5 deletions src/go/build/deps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,11 @@ var pkgDeps = map[string][]string{
"runtime/trace": {"L0", "context", "fmt"},
"text/tabwriter": {"L2"},

"testing": {"L2", "flag", "fmt", "internal/race", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"},
"testing/iotest": {"L2", "log"},
"testing/quick": {"L2", "flag", "fmt", "reflect", "time"},
"internal/testenv": {"L2", "OS", "flag", "testing", "syscall"},
"testing": {"L2", "flag", "fmt", "internal/race", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"},
"testing/iotest": {"L2", "log"},
"testing/quick": {"L2", "flag", "fmt", "reflect", "time"},
"internal/testenv": {"L2", "OS", "flag", "testing", "syscall"},
"internal/lazyregexp": {"L2", "OS", "regexp"},

// L4 is defined as L3+fmt+log+time, because in general once
// you're using L3 packages, use of fmt, log, or time is not a big deal.
Expand All @@ -208,7 +209,7 @@ var pkgDeps = map[string][]string{

// Go parser.
"go/ast": {"L4", "OS", "go/scanner", "go/token"},
"go/doc": {"L4", "OS", "go/ast", "go/token", "regexp", "text/template"},
"go/doc": {"L4", "OS", "go/ast", "go/token", "regexp", "internal/lazyregexp", "text/template"},
"go/parser": {"L4", "OS", "go/ast", "go/scanner", "go/token"},
"go/printer": {"L4", "OS", "go/ast", "go/scanner", "go/token", "text/tabwriter"},
"go/scanner": {"L4", "OS", "go/token"},
Expand Down
5 changes: 3 additions & 2 deletions src/go/doc/comment.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package doc

import (
"bytes"
"internal/lazyregexp"
"io"
"strings"
"text/template" // for HTMLEscape
Expand Down Expand Up @@ -69,7 +70,7 @@ const (
urlRx = protoPart + `://` + hostPart + pathPart
)

var matchRx = newLazyRE(`(` + urlRx + `)|(` + identRx + `)`)
var matchRx = lazyregexp.New(`(` + urlRx + `)|(` + identRx + `)`)

var (
html_a = []byte(`<a href="`)
Expand Down Expand Up @@ -273,7 +274,7 @@ type block struct {
lines []string
}

var nonAlphaNumRx = newLazyRE(`[^a-zA-Z0-9]`)
var nonAlphaNumRx = lazyregexp.New(`[^a-zA-Z0-9]`)

func anchorID(line string) string {
// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
Expand Down
7 changes: 4 additions & 3 deletions src/go/doc/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package doc
import (
"go/ast"
"go/token"
"internal/lazyregexp"
"sort"
"strconv"
)
Expand Down Expand Up @@ -439,9 +440,9 @@ func (r *reader) readFunc(fun *ast.FuncDecl) {
}

var (
noteMarker = `([A-Z][A-Z]+)\(([^)]+)\):?` // MARKER(uid), MARKER at least 2 chars, uid at least 1 char
noteMarkerRx = newLazyRE(`^[ \t]*` + noteMarker) // MARKER(uid) at text start
noteCommentRx = newLazyRE(`^/[/*][ \t]*` + noteMarker) // MARKER(uid) at comment start
noteMarker = `([A-Z][A-Z]+)\(([^)]+)\):?` // MARKER(uid), MARKER at least 2 chars, uid at least 1 char
noteMarkerRx = lazyregexp.New(`^[ \t]*` + noteMarker) // MARKER(uid) at text start
noteCommentRx = lazyregexp.New(`^/[/*][ \t]*` + noteMarker) // MARKER(uid) at comment start
)

// readNote collects a single note from a sequence of comments.
Expand Down
18 changes: 9 additions & 9 deletions src/go/doc/lazyre.go → src/internal/lazyregexp/lazyre.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package doc
package lazyregexp

import (
"os"
Expand All @@ -11,38 +11,38 @@ import (
"sync"
)

type lazyRE struct {
type Regexp struct {
str string
once sync.Once
rx *regexp.Regexp
}

func (r *lazyRE) re() *regexp.Regexp {
func (r *Regexp) re() *regexp.Regexp {
r.once.Do(r.build)
return r.rx
}

func (r *lazyRE) build() {
func (r *Regexp) build() {
r.rx = regexp.MustCompile(r.str)
r.str = ""
}

func (r *lazyRE) FindStringSubmatchIndex(s string) []int {
func (r *Regexp) FindStringSubmatchIndex(s string) []int {
return r.re().FindStringSubmatchIndex(s)
}

func (r *lazyRE) ReplaceAllString(src, repl string) string {
func (r *Regexp) ReplaceAllString(src, repl string) string {
return r.re().ReplaceAllString(src, repl)
}

func (r *lazyRE) MatchString(s string) bool {
func (r *Regexp) MatchString(s string) bool {
return r.re().MatchString(s)
}

var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")

func newLazyRE(str string) *lazyRE {
lr := &lazyRE{str: str}
func New(str string) *Regexp {
lr := &Regexp{str: str}
if inTest {
// In tests, always compile the regexps early.
lr.re()
Expand Down

0 comments on commit f40cb19

Please sign in to comment.