Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add trace anonymizer prorotype #2328

Merged
merged 3 commits into from
Jul 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/anonymizer/app/anonymizer/.nocover
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
non-critical test utility
168 changes: 168 additions & 0 deletions cmd/anonymizer/app/anonymizer/anonymizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package anonymizer

import (
"encoding/json"
"fmt"
"hash/fnv"
"io/ioutil"
"os"
"path/filepath"
"sync"
"time"

"go.uber.org/zap"

"github.com/jaegertracing/jaeger/model"
uiconv "github.com/jaegertracing/jaeger/model/converter/json"
uimodel "github.com/jaegertracing/jaeger/model/json"
)

var allowedTags = map[string]bool{
"error": true,
"span.kind": true,
"http.method": true,
"http.status_code": true,
"sampler.type": true,
"sampler.param": true,
}

// mapping stores the mapping of service/operation names to their one-way hashes,
// so that we can do a reverse lookup should the researchers have questions.
type mapping struct {
Services map[string]string
Operations map[string]string // key=[service]:operation
}

// Anonymizer transforms Jaeger span in the domain model by obfuscating site-specific strings,
// like service and operation names, and removes custom tags. It returns obfuscated span in the
// Jaeger UI format, to make it easy to visualize traces.
//
// The mapping from original to obfuscated strings is stored in a file and can be reused between runs.
type Anonymizer struct {
mappingFile string
logger *zap.Logger

lock sync.Mutex
mapping mapping
}

// New creates new Anonymizer. The mappingFile stores the mapping from original to
// obfuscated strings, in case later investigations require looking at the original traces.
func New(mappingFile string, logger *zap.Logger) *Anonymizer {
a := &Anonymizer{
mappingFile: mappingFile,
logger: logger,
mapping: mapping{
Services: make(map[string]string),
Operations: make(map[string]string),
},
}
if _, err := os.Stat(filepath.Clean(mappingFile)); err == nil {
dat, err := ioutil.ReadFile(filepath.Clean(mappingFile))
if err != nil {
logger.Fatal("Cannot load previous mapping", zap.Error(err))
}
if err := json.Unmarshal(dat, &a.mapping); err != nil {
logger.Fatal("Cannot unmarshal previous mapping", zap.Error(err))
}
}
go func() {
for range time.NewTicker(10 * time.Second).C {
a.SaveMapping()
}
}()
return a
}

// SaveMapping writes the mapping from original to obfuscated strings to a file.
// It is called by the anonymizer itself periodically, and should be called at
// the end of the extraction run.
func (a *Anonymizer) SaveMapping() {
a.lock.Lock()
defer a.lock.Unlock()
dat, err := json.Marshal(a.mapping)
if err != nil {
a.logger.Error("Failed to marshal mapping file", zap.Error(err))
return
}
if err := ioutil.WriteFile(filepath.Clean(a.mappingFile), dat, os.ModePerm); err != nil {
a.logger.Error("Failed to write mapping file", zap.Error(err))
return
}
a.logger.Sugar().Infof("Saved mapping file %s: %s", a.mappingFile, string(dat))
}

func (a *Anonymizer) mapServiceName(service string) string {
return a.mapString(service, a.mapping.Services)
}

func (a *Anonymizer) mapOperationName(service, operation string) string {
v := fmt.Sprintf("[%s]:%s", service, operation)
return a.mapString(v, a.mapping.Operations)
}

func (a *Anonymizer) mapString(v string, m map[string]string) string {
a.lock.Lock()
defer a.lock.Unlock()
if s, ok := m[v]; ok {
return s
}
s := hash(v)
m[v] = s
return s
}

func hash(value string) string {
h := fnv.New64()
_, _ = h.Write([]byte(value))
return fmt.Sprintf("%016x", h.Sum64())
}

// AnonymizeSpan obfuscates and converts the span.
func (a *Anonymizer) AnonymizeSpan(span *model.Span) *uimodel.Span {
service := span.Process.ServiceName
span.OperationName = a.mapOperationName(service, span.OperationName)
span.Tags = filterTags(span.Tags)
span.Logs = nil
span.Process.ServiceName = a.mapServiceName(service)
span.Process.Tags = nil
span.Warnings = nil
return uiconv.FromDomainEmbedProcess(span)
}

func filterTags(tags []model.KeyValue) []model.KeyValue {
out := make([]model.KeyValue, 0, len(tags))
for _, tag := range tags {
if !allowedTags[tag.Key] {
continue
}
if tag.Key == "error" {
switch tag.VType {
case model.BoolType:
// allowed
case model.StringType:
if tag.VStr != "true" && tag.VStr != "false" {
tag = model.Bool("error", true)
}
default:
tag = model.Bool("error", true)
}
}
out = append(out, tag)
}
return out
}
1 change: 1 addition & 0 deletions cmd/anonymizer/app/writer/.nocover
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nobn-critical test utility
132 changes: 132 additions & 0 deletions cmd/anonymizer/app/writer/writer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package writer

import (
"bytes"
"encoding/json"
"fmt"
"os"
"sync"

"github.com/gogo/protobuf/jsonpb"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/anonymizer/app/anonymizer"
"github.com/jaegertracing/jaeger/model"
)

// Config contains parameters to NewWriter.
type Config struct {
MaxSpansCount int `yaml:"max_spans_count" name:"max_spans_count"`
CapturedFile string `yaml:"captured_file" name:"captured_file"`
AnonymizedFile string `yaml:"anonymized_file" name:"anonymized_file"`
MappingFile string `yaml:"mapping_file" name:"mapping_file"`
}

// Writer is a span Writer that obfuscates the span and writes it to a JSON file.
type Writer struct {
config Config
lock sync.Mutex
logger *zap.Logger
capturedFile *os.File
anonymizedFile *os.File
anonymizer *anonymizer.Anonymizer
spanCount int
}

// New creates an Writer
func New(config Config, logger *zap.Logger) (*Writer, error) {
wd, err := os.Getwd()
if err != nil {
return nil, err
}
logger.Sugar().Infof("Current working dir is %s", wd)

cf, err := os.OpenFile(config.CapturedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, fmt.Errorf("cannot create output file: %w", err)
}
logger.Sugar().Infof("Writing captured spans to file %s", config.CapturedFile)

af, err := os.OpenFile(config.AnonymizedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, fmt.Errorf("cannot create output file: %w", err)
}
logger.Sugar().Infof("Writing anonymized spans to file %s", config.AnonymizedFile)

_, err = cf.WriteString("[")
if err != nil {
return nil, fmt.Errorf("cannot write tp output file: %w", err)
}
_, err = af.WriteString("[")
if err != nil {
return nil, fmt.Errorf("cannot write tp output file: %w", err)
}
return &Writer{
config: config,
logger: logger,
capturedFile: cf,
anonymizedFile: af,
anonymizer: anonymizer.New(config.MappingFile, logger),
}, nil
}

// WriteSpan anonymized the span and appends it as JSON to w.file.
func (w *Writer) WriteSpan(msg *model.Span) error {
w.lock.Lock()
defer w.lock.Unlock()

out := new(bytes.Buffer)
if err := new(jsonpb.Marshaler).Marshal(out, msg); err != nil {
return err
}
if w.spanCount > 0 {
w.capturedFile.WriteString(",\n")
}
w.capturedFile.Write(out.Bytes())
w.capturedFile.Sync()

span := w.anonymizer.AnonymizeSpan(msg)

dat, err := json.Marshal(span)
if err != nil {
return err
}
if w.spanCount > 0 {
w.anonymizedFile.WriteString(",\n")
}
if _, err := w.anonymizedFile.Write(dat); err != nil {
return err
}
w.anonymizedFile.Sync()

w.spanCount++
if w.spanCount%100 == 0 {
w.logger.Info("progress", zap.Int("numSpans", w.spanCount))
}

if w.spanCount >= w.config.MaxSpansCount {
w.logger.Info("Saved enough spans, exiting...")
w.capturedFile.WriteString("\n]\n")
w.capturedFile.Close()
w.anonymizedFile.WriteString("\n]\n")
w.anonymizedFile.Close()
w.anonymizer.SaveMapping()
os.Exit(0)
}

return nil
}
27 changes: 27 additions & 0 deletions cmd/anonymizer/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/anonymizer/app/writer"
)

func main() {
// TODO
_, _ = writer.New(writer.Config{}, zap.NewNop())
println("not implemented")
}