diff --git a/publicsuffix/gen.go b/publicsuffix/gen.go index f85a3c32b..372ffbb24 100644 --- a/publicsuffix/gen.go +++ b/publicsuffix/gen.go @@ -100,6 +100,7 @@ var ( labelsList = []string{} labelsMap = map[string]bool{} rules = []string{} + numICANNRules = 0 // validSuffixRE is used to check that the entries in the public suffix // list are in canonical form (after Punycode encoding). Specifically, @@ -167,11 +168,14 @@ func main1() error { } s = strings.TrimSpace(s) if strings.Contains(s, "BEGIN ICANN DOMAINS") { + if len(rules) != 0 { + return fmt.Errorf(`expected no rules before "BEGIN ICANN DOMAINS"`) + } icann = true continue } if strings.Contains(s, "END ICANN DOMAINS") { - icann = false + icann, numICANNRules = false, len(rules) continue } if s == "" || strings.HasPrefix(s, "//") { @@ -287,7 +291,7 @@ func gitCommit() (sha, date string, retErr error) { func printTest(w io.Writer, n *node) error { fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n") - fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n") + fmt.Fprintf(w, "package publicsuffix\n\nconst numICANNRules = %d\n\nvar rules = [...]string{\n", numICANNRules) for _, rule := range rules { fmt.Fprintf(w, "%q,\n", rule) } diff --git a/publicsuffix/list.go b/publicsuffix/list.go index be9a9b798..8405ac1b7 100644 --- a/publicsuffix/list.go +++ b/publicsuffix/list.go @@ -84,11 +84,12 @@ func (list) String() string { // https://wiki.mozilla.org/Public_Suffix_List/Use_Cases func PublicSuffix(domain string) (publicSuffix string, icann bool) { lo, hi := uint32(0), uint32(numTLD) - s, suffix, wildcard := domain, len(domain), false + s, suffix, icannNode, wildcard := domain, len(domain), false, false loop: for { dot := strings.LastIndex(s, ".") if wildcard { + icann = icannNode suffix = 1 + dot } if lo == hi { @@ -100,7 +101,7 @@ loop: } u := nodes[f] >> (nodesBitsTextOffset + nodesBitsTextLength) - icann = u&(1<>= nodesBitsICANN u = children[u&(1<>= childrenBitsNodeType wildcard = u&(1<= len(rules) { + t.Fatal("no Private rules") + } + // Check the last ICANN and first Private rules. If the underlying public + // suffix list changes, we may need to update these hard-coded checks. + if got, want := rules[numICANNRules-1], "zuerich"; got != want { + t.Errorf("last ICANN rule: got %q, wawnt %q", got, want) + } + if got, want := rules[numICANNRules], "cc.ua"; got != want { + t.Errorf("first Private rule: got %q, wawnt %q", got, want) + } +} + +type slowPublicSuffixRule struct { + ruleParts []string + icann bool +} + // slowPublicSuffix implements the canonical (but O(number of rules)) public // suffix algorithm described at http://publicsuffix.org/list/. // @@ -269,7 +347,7 @@ func TestSlowPublicSuffix(t *testing.T) { // // This function returns the public suffix, not the registrable domain, and so // it stops after step 6. -func slowPublicSuffix(domain string) string { +func slowPublicSuffix(domain string) (string, bool) { match := func(rulePart, domainPart string) bool { switch rulePart[0] { case '*': @@ -281,10 +359,10 @@ func slowPublicSuffix(domain string) string { } domainParts := strings.Split(domain, ".") - var matchingRules [][]string + var matchingRules []slowPublicSuffixRule loop: - for _, rule := range rules { + for i, rule := range rules { ruleParts := strings.Split(rule, ".") if len(domainParts) < len(ruleParts) { continue @@ -296,36 +374,43 @@ loop: continue loop } } - matchingRules = append(matchingRules, ruleParts) + matchingRules = append(matchingRules, slowPublicSuffixRule{ + ruleParts: ruleParts, + icann: i < numICANNRules, + }) } if len(matchingRules) == 0 { - matchingRules = append(matchingRules, []string{"*"}) + matchingRules = append(matchingRules, slowPublicSuffixRule{ + ruleParts: []string{"*"}, + icann: false, + }) } else { sort.Sort(byPriority(matchingRules)) } + prevailing := matchingRules[0] - if prevailing[0][0] == '!' { - prevailing = prevailing[1:] + if prevailing.ruleParts[0][0] == '!' { + prevailing.ruleParts = prevailing.ruleParts[1:] } - if prevailing[0][0] == '*' { - replaced := domainParts[len(domainParts)-len(prevailing)] - prevailing = append([]string{replaced}, prevailing[1:]...) + if prevailing.ruleParts[0][0] == '*' { + replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)] + prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...) } - return strings.Join(prevailing, ".") + return strings.Join(prevailing.ruleParts, "."), prevailing.icann } -type byPriority [][]string +type byPriority []slowPublicSuffixRule func (b byPriority) Len() int { return len(b) } func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] } func (b byPriority) Less(i, j int) bool { - if b[i][0][0] == '!' { + if b[i].ruleParts[0][0] == '!' { return true } - if b[j][0][0] == '!' { + if b[j].ruleParts[0][0] == '!' { return false } - return len(b[i]) > len(b[j]) + return len(b[i].ruleParts) > len(b[j].ruleParts) } // eTLDPlusOneTestCases come from diff --git a/publicsuffix/table_test.go b/publicsuffix/table_test.go index d2c4ea96c..97ca2c917 100644 --- a/publicsuffix/table_test.go +++ b/publicsuffix/table_test.go @@ -2,6 +2,8 @@ package publicsuffix +const numICANNRules = 7334 + var rules = [...]string{ "ac", "com.ac",