diff --git a/cmd/anonymizer/app/anonymizer/.nocover b/cmd/anonymizer/app/anonymizer/.nocover new file mode 100644 index 00000000000..5b583b79e93 --- /dev/null +++ b/cmd/anonymizer/app/anonymizer/.nocover @@ -0,0 +1 @@ +non-critical test utility diff --git a/cmd/anonymizer/app/anonymizer/anonymizer.go b/cmd/anonymizer/app/anonymizer/anonymizer.go new file mode 100644 index 00000000000..f4dee410e54 --- /dev/null +++ b/cmd/anonymizer/app/anonymizer/anonymizer.go @@ -0,0 +1,168 @@ +// Copyright (c) 2020 The Jaeger Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package anonymizer + +import ( + "encoding/json" + "fmt" + "hash/fnv" + "io/ioutil" + "os" + "path/filepath" + "sync" + "time" + + "go.uber.org/zap" + + "github.com/jaegertracing/jaeger/model" + uiconv "github.com/jaegertracing/jaeger/model/converter/json" + uimodel "github.com/jaegertracing/jaeger/model/json" +) + +var allowedTags = map[string]bool{ + "error": true, + "span.kind": true, + "http.method": true, + "http.status_code": true, + "sampler.type": true, + "sampler.param": true, +} + +// mapping stores the mapping of service/operation names to their one-way hashes, +// so that we can do a reverse lookup should the researchers have questions. +type mapping struct { + Services map[string]string + Operations map[string]string // key=[service]:operation +} + +// Anonymizer transforms Jaeger span in the domain model by obfuscating site-specific strings, +// like service and operation names, and removes custom tags. It returns obfuscated span in the +// Jaeger UI format, to make it easy to visualize traces. +// +// The mapping from original to obfuscated strings is stored in a file and can be reused between runs. +type Anonymizer struct { + mappingFile string + logger *zap.Logger + + lock sync.Mutex + mapping mapping +} + +// New creates new Anonymizer. The mappingFile stores the mapping from original to +// obfuscated strings, in case later investigations require looking at the original traces. +func New(mappingFile string, logger *zap.Logger) *Anonymizer { + a := &Anonymizer{ + mappingFile: mappingFile, + logger: logger, + mapping: mapping{ + Services: make(map[string]string), + Operations: make(map[string]string), + }, + } + if _, err := os.Stat(filepath.Clean(mappingFile)); err == nil { + dat, err := ioutil.ReadFile(filepath.Clean(mappingFile)) + if err != nil { + logger.Fatal("Cannot load previous mapping", zap.Error(err)) + } + if err := json.Unmarshal(dat, &a.mapping); err != nil { + logger.Fatal("Cannot unmarshal previous mapping", zap.Error(err)) + } + } + go func() { + for range time.NewTicker(10 * time.Second).C { + a.SaveMapping() + } + }() + return a +} + +// SaveMapping writes the mapping from original to obfuscated strings to a file. +// It is called by the anonymizer itself periodically, and should be called at +// the end of the extraction run. +func (a *Anonymizer) SaveMapping() { + a.lock.Lock() + defer a.lock.Unlock() + dat, err := json.Marshal(a.mapping) + if err != nil { + a.logger.Error("Failed to marshal mapping file", zap.Error(err)) + return + } + if err := ioutil.WriteFile(filepath.Clean(a.mappingFile), dat, os.ModePerm); err != nil { + a.logger.Error("Failed to write mapping file", zap.Error(err)) + return + } + a.logger.Sugar().Infof("Saved mapping file %s: %s", a.mappingFile, string(dat)) +} + +func (a *Anonymizer) mapServiceName(service string) string { + return a.mapString(service, a.mapping.Services) +} + +func (a *Anonymizer) mapOperationName(service, operation string) string { + v := fmt.Sprintf("[%s]:%s", service, operation) + return a.mapString(v, a.mapping.Operations) +} + +func (a *Anonymizer) mapString(v string, m map[string]string) string { + a.lock.Lock() + defer a.lock.Unlock() + if s, ok := m[v]; ok { + return s + } + s := hash(v) + m[v] = s + return s +} + +func hash(value string) string { + h := fnv.New64() + _, _ = h.Write([]byte(value)) + return fmt.Sprintf("%016x", h.Sum64()) +} + +// AnonymizeSpan obfuscates and converts the span. +func (a *Anonymizer) AnonymizeSpan(span *model.Span) *uimodel.Span { + service := span.Process.ServiceName + span.OperationName = a.mapOperationName(service, span.OperationName) + span.Tags = filterTags(span.Tags) + span.Logs = nil + span.Process.ServiceName = a.mapServiceName(service) + span.Process.Tags = nil + span.Warnings = nil + return uiconv.FromDomainEmbedProcess(span) +} + +func filterTags(tags []model.KeyValue) []model.KeyValue { + out := make([]model.KeyValue, 0, len(tags)) + for _, tag := range tags { + if !allowedTags[tag.Key] { + continue + } + if tag.Key == "error" { + switch tag.VType { + case model.BoolType: + // allowed + case model.StringType: + if tag.VStr != "true" && tag.VStr != "false" { + tag = model.Bool("error", true) + } + default: + tag = model.Bool("error", true) + } + } + out = append(out, tag) + } + return out +} diff --git a/cmd/anonymizer/app/writer/.nocover b/cmd/anonymizer/app/writer/.nocover new file mode 100644 index 00000000000..5d2db86d7c0 --- /dev/null +++ b/cmd/anonymizer/app/writer/.nocover @@ -0,0 +1 @@ +nobn-critical test utility diff --git a/cmd/anonymizer/app/writer/writer.go b/cmd/anonymizer/app/writer/writer.go new file mode 100644 index 00000000000..5e36f8a4db2 --- /dev/null +++ b/cmd/anonymizer/app/writer/writer.go @@ -0,0 +1,132 @@ +// Copyright (c) 2020 The Jaeger Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writer + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "sync" + + "github.com/gogo/protobuf/jsonpb" + "go.uber.org/zap" + + "github.com/jaegertracing/jaeger/cmd/anonymizer/app/anonymizer" + "github.com/jaegertracing/jaeger/model" +) + +// Config contains parameters to NewWriter. +type Config struct { + MaxSpansCount int `yaml:"max_spans_count" name:"max_spans_count"` + CapturedFile string `yaml:"captured_file" name:"captured_file"` + AnonymizedFile string `yaml:"anonymized_file" name:"anonymized_file"` + MappingFile string `yaml:"mapping_file" name:"mapping_file"` +} + +// Writer is a span Writer that obfuscates the span and writes it to a JSON file. +type Writer struct { + config Config + lock sync.Mutex + logger *zap.Logger + capturedFile *os.File + anonymizedFile *os.File + anonymizer *anonymizer.Anonymizer + spanCount int +} + +// New creates an Writer +func New(config Config, logger *zap.Logger) (*Writer, error) { + wd, err := os.Getwd() + if err != nil { + return nil, err + } + logger.Sugar().Infof("Current working dir is %s", wd) + + cf, err := os.OpenFile(config.CapturedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm) + if err != nil { + return nil, fmt.Errorf("cannot create output file: %w", err) + } + logger.Sugar().Infof("Writing captured spans to file %s", config.CapturedFile) + + af, err := os.OpenFile(config.AnonymizedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm) + if err != nil { + return nil, fmt.Errorf("cannot create output file: %w", err) + } + logger.Sugar().Infof("Writing anonymized spans to file %s", config.AnonymizedFile) + + _, err = cf.WriteString("[") + if err != nil { + return nil, fmt.Errorf("cannot write tp output file: %w", err) + } + _, err = af.WriteString("[") + if err != nil { + return nil, fmt.Errorf("cannot write tp output file: %w", err) + } + return &Writer{ + config: config, + logger: logger, + capturedFile: cf, + anonymizedFile: af, + anonymizer: anonymizer.New(config.MappingFile, logger), + }, nil +} + +// WriteSpan anonymized the span and appends it as JSON to w.file. +func (w *Writer) WriteSpan(msg *model.Span) error { + w.lock.Lock() + defer w.lock.Unlock() + + out := new(bytes.Buffer) + if err := new(jsonpb.Marshaler).Marshal(out, msg); err != nil { + return err + } + if w.spanCount > 0 { + w.capturedFile.WriteString(",\n") + } + w.capturedFile.Write(out.Bytes()) + w.capturedFile.Sync() + + span := w.anonymizer.AnonymizeSpan(msg) + + dat, err := json.Marshal(span) + if err != nil { + return err + } + if w.spanCount > 0 { + w.anonymizedFile.WriteString(",\n") + } + if _, err := w.anonymizedFile.Write(dat); err != nil { + return err + } + w.anonymizedFile.Sync() + + w.spanCount++ + if w.spanCount%100 == 0 { + w.logger.Info("progress", zap.Int("numSpans", w.spanCount)) + } + + if w.spanCount >= w.config.MaxSpansCount { + w.logger.Info("Saved enough spans, exiting...") + w.capturedFile.WriteString("\n]\n") + w.capturedFile.Close() + w.anonymizedFile.WriteString("\n]\n") + w.anonymizedFile.Close() + w.anonymizer.SaveMapping() + os.Exit(0) + } + + return nil +} diff --git a/cmd/anonymizer/main.go b/cmd/anonymizer/main.go new file mode 100644 index 00000000000..221bdc04eb8 --- /dev/null +++ b/cmd/anonymizer/main.go @@ -0,0 +1,27 @@ +// Copyright (c) 2020 The Jaeger Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "go.uber.org/zap" + + "github.com/jaegertracing/jaeger/cmd/anonymizer/app/writer" +) + +func main() { + // TODO + _, _ = writer.New(writer.Config{}, zap.NewNop()) + println("not implemented") +}