Skip to content
This repository has been archived by the owner on Aug 30, 2019. It is now read-only.

Commit

Permalink
obfuscate: made tokenConsumer resettable and private
Browse files Browse the repository at this point in the history
  • Loading branch information
gbbr committed Sep 24, 2018
1 parent 507a78a commit 7ae59e3
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 62 deletions.
6 changes: 5 additions & 1 deletion obfuscate/obfuscate.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
// concurrent use.
type Obfuscator struct {
opts *config.ObfuscationConfig
sql *tokenConsumer
es *jsonObfuscator // nil if disabled
mongo *jsonObfuscator // nil if disabled
}
Expand All @@ -22,7 +23,10 @@ func NewObfuscator(cfg *config.ObfuscationConfig) *Obfuscator {
if cfg == nil {
cfg = new(config.ObfuscationConfig)
}
o := Obfuscator{opts: cfg}
o := Obfuscator{
opts: cfg,
sql: newTokenConsumer(),
}
if cfg.ES.Enabled {
o.es = newJSONObfuscator(&cfg.ES)
}
Expand Down
79 changes: 37 additions & 42 deletions obfuscate/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ const (
sqlQuantizeError = "agent.parse.error"
)

// TokenFilter is a generic interface that a TokenConsumer expects. It defines
// tokenFilter is a generic interface that a tokenConsumer expects. It defines
// the Filter() function used to filter or replace given tokens.
// A filter can be stateful and keep an internal state to apply the filter later;
// this can be useful to prevent backtracking in some cases.
type TokenFilter interface {
type tokenFilter interface {
Filter(token, lastToken int, buffer []byte) (int, []byte)
Reset()
}

// DiscardFilter implements the TokenFilter interface so that the given
// discardFilter implements the tokenFilter interface so that the given
// token is discarded or accepted.
type DiscardFilter struct{}
type discardFilter struct{}

// Filter the given token so that a `nil` slice is returned if the token
// is in the token filtered list.
func (f *DiscardFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
func (f *discardFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
// filters based on previous token
switch lastToken {
case FilteredBracketedIdentifier:
Expand All @@ -49,7 +49,7 @@ func (f *DiscardFilter) Filter(token, lastToken int, buffer []byte) (int, []byte
// closing bracket counter-part. See GitHub issue #475.
return FilteredBracketedIdentifier, nil
}
// prevent the next comma from being part of a GroupingFilter
// prevent the next comma from being part of a groupingFilter
return FilteredComma, nil
}

Expand All @@ -65,15 +65,15 @@ func (f *DiscardFilter) Filter(token, lastToken int, buffer []byte) (int, []byte
}
}

// Reset in a DiscardFilter is a noop action
func (f *DiscardFilter) Reset() {}
// Reset in a discardFilter is a noop action
func (f *discardFilter) Reset() {}

// ReplaceFilter implements the TokenFilter interface so that the given
// replaceFilter implements the tokenFilter interface so that the given
// token is replaced with '?' or left unchanged.
type ReplaceFilter struct{}
type replaceFilter struct{}

// Filter the given token so that it will be replaced if in the token replacement list
func (f *ReplaceFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
func (f *replaceFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
switch lastToken {
case Savepoint:
return Filtered, []byte("?")
Expand All @@ -86,12 +86,12 @@ func (f *ReplaceFilter) Filter(token, lastToken int, buffer []byte) (int, []byte
}
}

// Reset in a ReplaceFilter is a noop action
func (f *ReplaceFilter) Reset() {}
// Reset in a replaceFilter is a noop action
func (f *replaceFilter) Reset() {}

// GroupingFilter implements the TokenFilter interface so that when
// groupingFilter implements the tokenFilter interface so that when
// a common pattern is identified, it's discarded to prevent duplicates
type GroupingFilter struct {
type groupingFilter struct {
groupFilter int
groupMulti int
}
Expand All @@ -100,7 +100,7 @@ type GroupingFilter struct {
// has been recognized. A grouping is composed by items like:
// * '( ?, ?, ? )'
// * '( ?, ? ), ( ?, ? )'
func (f *GroupingFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
func (f *groupingFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
// increasing the number of groups means that we're filtering an entire group
// because it can be represented with a single '( ? )'
if (lastToken == '(' && token == Filtered) || (token == '(' && f.groupMulti > 0) {
Expand Down Expand Up @@ -132,26 +132,26 @@ func (f *GroupingFilter) Filter(token, lastToken int, buffer []byte) (int, []byt
return token, buffer
}

// Reset in a GroupingFilter restores variables used to count
// Reset in a groupingFilter restores variables used to count
// escaped token that should be filtered
func (f *GroupingFilter) Reset() {
func (f *groupingFilter) Reset() {
f.groupFilter = 0
f.groupMulti = 0
}

// TokenConsumer is a Tokenizer consumer. It calls the Tokenizer Scan() function until tokens
// tokenConsumer is a Tokenizer consumer. It calls the Tokenizer Scan() function until tokens
// are available or if a LEX_ERROR is raised. After retrieving a token, it is sent in the
// TokenFilter chains so that the token is discarded or replaced.
type TokenConsumer struct {
// tokenFilter chains so that the token is discarded or replaced.
type tokenConsumer struct {
tokenizer *Tokenizer
filters []TokenFilter
filters []tokenFilter
lastToken int
}

// Process the given SQL or No-SQL string so that the resulting one is properly altered. This
// function is generic and the behavior changes according to chosen TokenFilter implementations.
// The process calls all filters inside the []TokenFilter.
func (t *TokenConsumer) Process(in string) (string, error) {
// function is generic and the behavior changes according to chosen tokenFilter implementations.
// The process calls all filters inside the []tokenFilter.
func (t *tokenConsumer) Process(in string) (string, error) {
out := &bytes.Buffer{}
t.tokenizer.InStream.Reset(in)

Expand All @@ -168,6 +168,7 @@ func (t *TokenConsumer) Process(in string) (string, error) {
// apply all registered filters
for _, f := range t.filters {
if token, buff = f.Filter(token, t.lastToken, buff); token == LexError {
t.Reset()
return "", errors.New("the tokenizer was unable to process the string")
}
}
Expand Down Expand Up @@ -201,38 +202,32 @@ func (t *TokenConsumer) Process(in string) (string, error) {
}

// Reset restores the initial states for all components so that memory can be re-used
func (t *TokenConsumer) Reset() {
func (t *tokenConsumer) Reset() {
t.tokenizer.Reset()
for _, f := range t.filters {
f.Reset()
}
}

// NewTokenConsumer returns a new TokenConsumer capable to process SQL and No-SQL strings.
func NewTokenConsumer(filters []TokenFilter) *TokenConsumer {
return &TokenConsumer{
// newTokenConsumer returns a new tokenConsumer capable to process SQL and No-SQL strings.
func newTokenConsumer() *tokenConsumer {
return &tokenConsumer{
tokenizer: NewStringTokenizer(""),
filters: filters,
filters: []tokenFilter{
&discardFilter{},
&replaceFilter{},
&groupingFilter{},
},
}
}

// token consumer that will quantize the query with
// the given filters; this quantizer is used only
// for SQL and CQL strings
var tokenQuantizer = NewTokenConsumer(
[]TokenFilter{
&DiscardFilter{},
&ReplaceFilter{},
&GroupingFilter{},
})

// QuantizeSQL generates resource and sql.query meta for SQL spans
func (*Obfuscator) obfuscateSQL(span *model.Span) {
func (o *Obfuscator) obfuscateSQL(span *model.Span) {
if span.Resource == "" {
return
}

quantizedString, err := tokenQuantizer.Process(span.Resource)
quantizedString, err := o.sql.Process(span.Resource)
if err != nil || quantizedString == "" {
// if we have an error, the partially parsed SQL is discarded so that we don't pollute
// users resources. Here we provide more details to debug the problem.
Expand Down
22 changes: 3 additions & 19 deletions obfuscate/sql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,14 +373,8 @@ func TestMultipleProcess(t *testing.T) {
},
}

filters := []TokenFilter{
&DiscardFilter{},
&ReplaceFilter{},
&GroupingFilter{},
}

// The consumer is the same between executions
consumer := NewTokenConsumer(filters)
consumer := newTokenConsumer()

for _, tc := range testCases {
output, err := consumer.Process(tc.query)
Expand All @@ -395,12 +389,7 @@ func TestConsumerError(t *testing.T) {
// Malformed SQL is not accepted and the outer component knows
// what to do with malformed SQL
input := "SELECT * FROM users WHERE users.id = '1 AND users.name = 'dog'"
filters := []TokenFilter{
&DiscardFilter{},
&ReplaceFilter{},
&GroupingFilter{},
}
consumer := NewTokenConsumer(filters)
consumer := newTokenConsumer()

output, err := consumer.Process(input)
assert.NotNil(err)
Expand All @@ -416,12 +405,7 @@ func BenchmarkTokenizer(b *testing.B) {
{"Escaping", `INSERT INTO delayed_jobs (attempts, created_at, failed_at, handler, last_error, locked_at, locked_by, priority, queue, run_at, updated_at) VALUES (0, '2016-12-04 17:09:59', NULL, '--- !ruby/object:Delayed::PerformableMethod\nobject: !ruby/object:Item\n store:\n - a simple string\n - an \'escaped \' string\n - another \'escaped\' string\n - 42\n string: a string with many \\\\\'escapes\\\\\'\nmethod_name: :show_store\nargs: []\n', NULL, NULL, NULL, 0, NULL, '2016-12-04 17:09:59', '2016-12-04 17:09:59')`},
{"Grouping", `INSERT INTO delayed_jobs (created_at, failed_at, handler) VALUES (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL)`},
}
filters := []TokenFilter{
&DiscardFilter{},
&ReplaceFilter{},
&GroupingFilter{},
}
consumer := NewTokenConsumer(filters)
consumer := newTokenConsumer()

for _, bm := range benchmarks {
b.Run(bm.name+"/"+strconv.Itoa(len(bm.query)), func(b *testing.B) {
Expand Down

0 comments on commit 7ae59e3

Please sign in to comment.