diff --git a/.gitignore b/.gitignore index 6c8d4e9..3f904fc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ gron *.tgz *.swp *.exe +cpu.out +gron.test + diff --git a/CHANGELOG.mkd b/CHANGELOG.mkd index 7eb91fb..c842422 100644 --- a/CHANGELOG.mkd +++ b/CHANGELOG.mkd @@ -1,5 +1,10 @@ # Changelog +## 0.1.6 +- Adds proper handling of key quoting using Unicode ranges +- Adds basic benchmarks +- Adds profiling script + ## 0.1.5 - Adds scripted builds for darwin on amd64 diff --git a/script/lint b/script/lint index 57b8f45..58caf60 100755 --- a/script/lint +++ b/script/lint @@ -10,4 +10,4 @@ if [ $? -ne 0 ]; then gometalinter --install fi -gometalinter --disable=gocyclo +gometalinter --disable=gocyclo --disable=dupl diff --git a/script/profile b/script/profile new file mode 100755 index 0000000..4179d80 --- /dev/null +++ b/script/profile @@ -0,0 +1,7 @@ +#!/bin/sh +set -e +PROJDIR=$(cd `dirname $0`/.. && pwd) +cd ${PROJDIR} + +go test -bench . -benchmem -cpuprofile cpu.out +go tool pprof gron.test cpu.out diff --git a/statements.go b/statements.go index 397eb8b..bd72a98 100644 --- a/statements.go +++ b/statements.go @@ -3,7 +3,7 @@ package main import ( "encoding/json" "fmt" - "regexp" + "unicode" ) // The javascript reserved words cannot be used as unquoted keys @@ -159,20 +159,48 @@ func formatValue(s interface{}) string { // a key with spaces -> true // 1startsWithANumber -> true func keyMustBeQuoted(s string) bool { - r := regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`) - if !r.MatchString(s) { - return true - } - - // Check the list of reserved words + // Check the list of reserved words first + // to avoid more expensive checks where possible for _, i := range reservedWords { if s == i { return true } } + + for i, r := range s { + if i == 0 && !validFirstRune(r) { + return true + } + if !validSecondaryRune(r) { + return true + } + } + return false } +// validFirstRune returns true for runes that are valid +// as the first rune in a key. +// E.g: +// 'r' -> true +// '7' -> false +func validFirstRune(r rune) bool { + return unicode.In(r, + unicode.Lu, + unicode.Ll, + unicode.Lm, + unicode.Lo, + unicode.Nl, + ) || r == '$' || r == '_' +} + +// validSecondaryRune returns true for runes that are valid +// as anything other than the first rune in a key. +func validSecondaryRune(r rune) bool { + return validFirstRune(r) || + unicode.In(r, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc) +} + // makePrefix takes the previous prefix and the next key and // returns a new prefix or an error on failure func makePrefix(prev string, next interface{}) (string, error) { @@ -181,9 +209,11 @@ func makePrefix(prev string, next interface{}) (string, error) { return fmt.Sprintf("%s[%d]", prev, v), nil case string: if keyMustBeQuoted(v) { - return fmt.Sprintf("%s[%s]", prev, formatValue(v)), nil + // This is a fairly hot code path, and concatination has + // proven to be faster than fmt.Sprintf, despite the allocations + return prev + "[" + formatValue(v) + "]", nil } - return fmt.Sprintf("%s.%s", prev, v), nil + return prev + "." + v, nil default: return "", fmt.Errorf("could not form prefix for %#v", next) } diff --git a/statements_test.go b/statements_test.go index 33898f8..8828da6 100644 --- a/statements_test.go +++ b/statements_test.go @@ -87,6 +87,7 @@ func TestKeyMustBeQuoted(t *testing.T) { {"dotted", false}, {"dotted123", false}, {"_under_scores", false}, + {"ಠ_ಠ", false}, // Invalid chars {"is-quoted", true}, @@ -105,3 +106,102 @@ func TestKeyMustBeQuoted(t *testing.T) { } } } + +func TestValidFirstRune(t *testing.T) { + tests := []struct { + in rune + want bool + }{ + {'r', true}, + {'ಠ', true}, + {'4', false}, + {'-', false}, + } + + for _, test := range tests { + have := validFirstRune(test.in) + if have != test.want { + t.Errorf("Want %t for validFirstRune(%#U); have %t", test.want, test.in, have) + } + } +} + +func TestValidSecondaryRune(t *testing.T) { + tests := []struct { + in rune + want bool + }{ + {'r', true}, + {'ಠ', true}, + {'4', true}, + {'-', false}, + } + + for _, test := range tests { + have := validSecondaryRune(test.in) + if have != test.want { + t.Errorf("Want %t for validSecondaryRune(%#U); have %t", test.want, test.in, have) + } + } +} + +func BenchmarkKeyMustBeQuoted(b *testing.B) { + for i := 0; i < b.N; i++ { + keyMustBeQuoted("must-be-quoted") + } +} + +func BenchmarkKeyMustBeQuotedUnquoted(b *testing.B) { + for i := 0; i < b.N; i++ { + keyMustBeQuoted("canbeunquoted") + } +} + +func BenchmarkKeyMustBeQuotedReserved(b *testing.B) { + for i := 0; i < b.N; i++ { + keyMustBeQuoted("function") + } +} + +func BenchmarkMakeStatements(b *testing.B) { + j := []byte(`{ + "dotted": "A dotted value", + "a quoted": "value", + "bool1": true, + "bool2": false, + "anull": null, + "anarr": [1, 1.5], + "anob": { + "foo": "bar" + }, + "else": 1 + }`) + + var top interface{} + err := json.Unmarshal(j, &top) + if err != nil { + b.Fatalf("Failed to unmarshal test file: %s", err) + } + + for i := 0; i < b.N; i++ { + _, _ = makeStatements("json", top) + } +} + +func BenchmarkMakePrefixUnquoted(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = makePrefix("json", "isunquoted") + } +} + +func BenchmarkMakePrefixQuoted(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = makePrefix("json", "this-is-quoted") + } +} + +func BenchmarkMakePrefixInt(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = makePrefix("json", 212) + } +} diff --git a/testdata/three.json b/testdata/three.json new file mode 100644 index 0000000..fe2838d --- /dev/null +++ b/testdata/three.json @@ -0,0 +1,16 @@ +{ + "one": 1, + "two": 2.2, + "three-b": "3", + "four": [1,2,3,4], + "five": { + "alpha": ["fo", "fum"], + "beta": { + "hey": "How's tricks?" + } + }, + "abool": true, + "abool2": false, + "isnull": null, + "ಠ_ಠ": "yarly" +}