Skip to content

Commit

Permalink
Add trace anonymizer prorotype (#2328)
Browse files Browse the repository at this point in the history
* Add trace anonymizer prototype

Signed-off-by: Yuri Shkuro <[email protected]>

* Delint

Signed-off-by: Yuri Shkuro <[email protected]>

* Fix gosec

Signed-off-by: Yuri Shkuro <[email protected]>
  • Loading branch information
yurishkuro authored Jul 4, 2020
1 parent 00b6e96 commit 5001225
Show file tree
Hide file tree
Showing 5 changed files with 329 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/anonymizer/app/anonymizer/.nocover
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
non-critical test utility
168 changes: 168 additions & 0 deletions cmd/anonymizer/app/anonymizer/anonymizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package anonymizer

import (
"encoding/json"
"fmt"
"hash/fnv"
"io/ioutil"
"os"
"path/filepath"
"sync"
"time"

"go.uber.org/zap"

"github.com/jaegertracing/jaeger/model"
uiconv "github.com/jaegertracing/jaeger/model/converter/json"
uimodel "github.com/jaegertracing/jaeger/model/json"
)

var allowedTags = map[string]bool{
"error": true,
"span.kind": true,
"http.method": true,
"http.status_code": true,
"sampler.type": true,
"sampler.param": true,
}

// mapping stores the mapping of service/operation names to their one-way hashes,
// so that we can do a reverse lookup should the researchers have questions.
type mapping struct {
Services map[string]string
Operations map[string]string // key=[service]:operation
}

// Anonymizer transforms Jaeger span in the domain model by obfuscating site-specific strings,
// like service and operation names, and removes custom tags. It returns obfuscated span in the
// Jaeger UI format, to make it easy to visualize traces.
//
// The mapping from original to obfuscated strings is stored in a file and can be reused between runs.
type Anonymizer struct {
mappingFile string
logger *zap.Logger

lock sync.Mutex
mapping mapping
}

// New creates new Anonymizer. The mappingFile stores the mapping from original to
// obfuscated strings, in case later investigations require looking at the original traces.
func New(mappingFile string, logger *zap.Logger) *Anonymizer {
a := &Anonymizer{
mappingFile: mappingFile,
logger: logger,
mapping: mapping{
Services: make(map[string]string),
Operations: make(map[string]string),
},
}
if _, err := os.Stat(filepath.Clean(mappingFile)); err == nil {
dat, err := ioutil.ReadFile(filepath.Clean(mappingFile))
if err != nil {
logger.Fatal("Cannot load previous mapping", zap.Error(err))
}
if err := json.Unmarshal(dat, &a.mapping); err != nil {
logger.Fatal("Cannot unmarshal previous mapping", zap.Error(err))
}
}
go func() {
for range time.NewTicker(10 * time.Second).C {
a.SaveMapping()
}
}()
return a
}

// SaveMapping writes the mapping from original to obfuscated strings to a file.
// It is called by the anonymizer itself periodically, and should be called at
// the end of the extraction run.
func (a *Anonymizer) SaveMapping() {
a.lock.Lock()
defer a.lock.Unlock()
dat, err := json.Marshal(a.mapping)
if err != nil {
a.logger.Error("Failed to marshal mapping file", zap.Error(err))
return
}
if err := ioutil.WriteFile(filepath.Clean(a.mappingFile), dat, os.ModePerm); err != nil {
a.logger.Error("Failed to write mapping file", zap.Error(err))
return
}
a.logger.Sugar().Infof("Saved mapping file %s: %s", a.mappingFile, string(dat))
}

func (a *Anonymizer) mapServiceName(service string) string {
return a.mapString(service, a.mapping.Services)
}

func (a *Anonymizer) mapOperationName(service, operation string) string {
v := fmt.Sprintf("[%s]:%s", service, operation)
return a.mapString(v, a.mapping.Operations)
}

func (a *Anonymizer) mapString(v string, m map[string]string) string {
a.lock.Lock()
defer a.lock.Unlock()
if s, ok := m[v]; ok {
return s
}
s := hash(v)
m[v] = s
return s
}

func hash(value string) string {
h := fnv.New64()
_, _ = h.Write([]byte(value))
return fmt.Sprintf("%016x", h.Sum64())
}

// AnonymizeSpan obfuscates and converts the span.
func (a *Anonymizer) AnonymizeSpan(span *model.Span) *uimodel.Span {
service := span.Process.ServiceName
span.OperationName = a.mapOperationName(service, span.OperationName)
span.Tags = filterTags(span.Tags)
span.Logs = nil
span.Process.ServiceName = a.mapServiceName(service)
span.Process.Tags = nil
span.Warnings = nil
return uiconv.FromDomainEmbedProcess(span)
}

func filterTags(tags []model.KeyValue) []model.KeyValue {
out := make([]model.KeyValue, 0, len(tags))
for _, tag := range tags {
if !allowedTags[tag.Key] {
continue
}
if tag.Key == "error" {
switch tag.VType {
case model.BoolType:
// allowed
case model.StringType:
if tag.VStr != "true" && tag.VStr != "false" {
tag = model.Bool("error", true)
}
default:
tag = model.Bool("error", true)
}
}
out = append(out, tag)
}
return out
}
1 change: 1 addition & 0 deletions cmd/anonymizer/app/writer/.nocover
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nobn-critical test utility
132 changes: 132 additions & 0 deletions cmd/anonymizer/app/writer/writer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package writer

import (
"bytes"
"encoding/json"
"fmt"
"os"
"sync"

"github.com/gogo/protobuf/jsonpb"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/anonymizer/app/anonymizer"
"github.com/jaegertracing/jaeger/model"
)

// Config contains parameters to NewWriter.
type Config struct {
MaxSpansCount int `yaml:"max_spans_count" name:"max_spans_count"`
CapturedFile string `yaml:"captured_file" name:"captured_file"`
AnonymizedFile string `yaml:"anonymized_file" name:"anonymized_file"`
MappingFile string `yaml:"mapping_file" name:"mapping_file"`
}

// Writer is a span Writer that obfuscates the span and writes it to a JSON file.
type Writer struct {
config Config
lock sync.Mutex
logger *zap.Logger
capturedFile *os.File
anonymizedFile *os.File
anonymizer *anonymizer.Anonymizer
spanCount int
}

// New creates an Writer
func New(config Config, logger *zap.Logger) (*Writer, error) {
wd, err := os.Getwd()
if err != nil {
return nil, err
}
logger.Sugar().Infof("Current working dir is %s", wd)

cf, err := os.OpenFile(config.CapturedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, fmt.Errorf("cannot create output file: %w", err)
}
logger.Sugar().Infof("Writing captured spans to file %s", config.CapturedFile)

af, err := os.OpenFile(config.AnonymizedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, fmt.Errorf("cannot create output file: %w", err)
}
logger.Sugar().Infof("Writing anonymized spans to file %s", config.AnonymizedFile)

_, err = cf.WriteString("[")
if err != nil {
return nil, fmt.Errorf("cannot write tp output file: %w", err)
}
_, err = af.WriteString("[")
if err != nil {
return nil, fmt.Errorf("cannot write tp output file: %w", err)
}
return &Writer{
config: config,
logger: logger,
capturedFile: cf,
anonymizedFile: af,
anonymizer: anonymizer.New(config.MappingFile, logger),
}, nil
}

// WriteSpan anonymized the span and appends it as JSON to w.file.
func (w *Writer) WriteSpan(msg *model.Span) error {
w.lock.Lock()
defer w.lock.Unlock()

out := new(bytes.Buffer)
if err := new(jsonpb.Marshaler).Marshal(out, msg); err != nil {
return err
}
if w.spanCount > 0 {
w.capturedFile.WriteString(",\n")
}
w.capturedFile.Write(out.Bytes())
w.capturedFile.Sync()

span := w.anonymizer.AnonymizeSpan(msg)

dat, err := json.Marshal(span)
if err != nil {
return err
}
if w.spanCount > 0 {
w.anonymizedFile.WriteString(",\n")
}
if _, err := w.anonymizedFile.Write(dat); err != nil {
return err
}
w.anonymizedFile.Sync()

w.spanCount++
if w.spanCount%100 == 0 {
w.logger.Info("progress", zap.Int("numSpans", w.spanCount))
}

if w.spanCount >= w.config.MaxSpansCount {
w.logger.Info("Saved enough spans, exiting...")
w.capturedFile.WriteString("\n]\n")
w.capturedFile.Close()
w.anonymizedFile.WriteString("\n]\n")
w.anonymizedFile.Close()
w.anonymizer.SaveMapping()
os.Exit(0)
}

return nil
}
27 changes: 27 additions & 0 deletions cmd/anonymizer/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/anonymizer/app/writer"
)

func main() {
// TODO
_, _ = writer.New(writer.Config{}, zap.NewNop())
println("not implemented")
}

0 comments on commit 5001225

Please sign in to comment.