Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scrubbing env vars from logs #1315

Merged
merged 2 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
634 changes: 330 additions & 304 deletions cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions cmd/containerd-shim-runhcs-v1/options/runhcs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ message Options {
// no_inherit_host_timezone specifies to skip inheriting the hosts time zone for WCOW UVMs and instead default to
// UTC.
bool no_inherit_host_timezone = 19;

// scrub_logs enables removing environment variables and other protentially sensitive information form logs
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
bool scrub_logs = 20;
}

// ProcessDetails contains additional information about a process. This is the additional
Expand Down
8 changes: 7 additions & 1 deletion cmd/containerd-shim-runhcs-v1/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/Microsoft/go-winio"
runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
"github.com/Microsoft/hcsshim/internal/extendedtask"
"github.com/Microsoft/hcsshim/internal/scrub"
"github.com/Microsoft/hcsshim/internal/shimdiag"
"github.com/Microsoft/hcsshim/pkg/octtrpc"
"github.com/containerd/containerd/log"
Expand Down Expand Up @@ -84,7 +85,7 @@ var serveCommand = cli.Command{
}

if shimOpts.Debug && shimOpts.LogLevel != "" {
logrus.Warning("Both Debug and LogLevel specified, Debug will be overriden")
logrus.Warning("Both Debug and LogLevel specified, Debug will be overridden")
}

// For now keep supporting the debug option, this used to be the only way to specify a different logging
Expand Down Expand Up @@ -158,6 +159,11 @@ var serveCommand = cli.Command{

os.Stdin.Close()

// enable scrubbing
if shimOpts.ScrubLogs {
scrub.SetScrubbing(true)
}

// Force the cli.ErrWriter to be os.Stdout for this. We use stderr for
// the panic.log attached via start.
cli.ErrWriter = os.Stdout
Expand Down
25 changes: 21 additions & 4 deletions internal/gcs/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"syscall"
"time"

"github.com/Microsoft/hcsshim/internal/scrub"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
Expand Down Expand Up @@ -365,6 +366,7 @@ func (brdg *bridge) recvLoop() error {
func (brdg *bridge) sendLoop() {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.SetEscapeHTML(false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did we add this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default behavior is to escape characters for HTML, and since the replacement is <scrubbed>, that gets encoded as \u003cscrubbed\u0033, which i felt would cause issues for pattern matching down the line

for {
select {
case <-brdg.waitCh:
Expand Down Expand Up @@ -392,11 +394,26 @@ func (brdg *bridge) writeMessage(buf *bytes.Buffer, enc *json.Encoder, typ msgTy
}
// Update the message header with the size.
binary.LittleEndian.PutUint32(buf.Bytes()[hdrOffSize:], uint32(buf.Len()))

if brdg.log.Logger.GetLevel() >= logrus.DebugLevel {
b := buf.Bytes()[hdrSize:]
switch typ {
// container environment vars are in rpCreate for linux; rpcExecuteProcess for windows
case msgType(rpcCreate) | msgTypeRequest:
b, err = scrub.BridgeCreate(b)
case msgType(rpcExecuteProcess) | msgTypeRequest:
b, err = scrub.BridgeExecProcess(b)
}
if err != nil {
brdg.log.WithError(err).Warning("could not scrub bridge payload")
}
brdg.log.WithFields(logrus.Fields{
"payload": string(b),
"type": typ,
"message-id": id}).Debug("bridge send")
}

// Write the message.
brdg.log.WithFields(logrus.Fields{
"payload": string(buf.Bytes()[hdrSize:]),
"type": typ,
"message-id": id}).Debug("bridge send")
_, err = buf.WriteTo(brdg.conn)
if err != nil {
return fmt.Errorf("bridge write: %s", err)
Expand Down
174 changes: 174 additions & 0 deletions internal/scrub/scrub.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// This package scrubs objects of customer information to pass to logging
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd almost say maybe this makes sense in /internal/log instead to keep them close together (in a scrub.go file)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although I guess the scenario I'm picturing in my mind is you'd set an option on the logger itself and it would handle scrubbing internally.

Copy link
Contributor Author

@helsaawy helsaawy Mar 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was debating adding a flag in the context, but that, along with setting a flag on the logger, wouldnt work because a lot of places create a new logger from scratch rather than inherit it from parent functions.
I do like the idea of moving it to internal/log though

package scrub

import (
"bytes"
"encoding/json"
"errors"
"sync/atomic"

hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
)

// scrubbing utilities to remove user information from arbitrary objects

type genMap = map[string]interface{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it the same as type genMap map[string]interface{}?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nope, this is a type alias, so genMap is equivalent to a map[string]interface{} and I dont have to convert between the two.
This is just to save keystrokes and make functions look cleaner.

type scrubberFunc func(genMap) error

const ScrubbedReplacement = "<scrubbed>"

var (
ErrUnknownType = errors.New("encoded object is of unknown type")

// case sensitive keywords, so "env" is not a substring on "Environment"
_scrubKeywords = [][]byte{[]byte("env"), []byte("Environment")}

_scrub int32
)

// SetScrubbing enables scrubbing
func SetScrubbing(enable bool) {
v := int32(0) // cant convert from bool to int32 directly
if enable {
v = 1
}
atomic.StoreInt32(&_scrub, v)
}

// ScrubbingEnabled checks if scrubbing is enabled
func ScrubbingEnabled() bool {
v := atomic.LoadInt32(&_scrub)
return v != 0
}

// for HCS Create Process requests with config parameters of
anmaxvl marked this conversation as resolved.
Show resolved Hide resolved
// type internal/hcs/schema2.ProcessParameters (aka hcsshema.ProcessParameters)
func ProcessParameters(s string) (string, error) {
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
// todo: deal with v1 ProcessConfig
b := []byte(s)
if !ScrubbingEnabled() || !hasKeywords(b) || !json.Valid(b) {
return s, nil
}

pp := hcsschema.ProcessParameters{}
if err := json.Unmarshal(b, &pp); err != nil {
return "", err
}
pp.Environment = map[string]string{ScrubbedReplacement: ScrubbedReplacement}

buf := bytes.NewBuffer(b[:0])
if err := encode(buf, pp); err != nil {
return "", err
}
return buf.String(), nil
}

// for requests sent over the bridge of type
// internal/gcs/protocol.containerCreate wrapping a internal/hcsoci.linuxHostedSystem
func BridgeCreate(b []byte) ([]byte, error) {
helsaawy marked this conversation as resolved.
Show resolved Hide resolved
return scrubBytes(b, scrubLinuxHostedSystem)
}

func scrubLinuxHostedSystem(m genMap) error {
if !isRequestBase(m) {
return ErrUnknownType
}
if m, ok := index(m, "ContainerConfig"); ok {
if m, ok := index(m, "OciSpecification"); ok {
if m, ok := index(m, "process"); ok {
if _, ok := m["env"]; ok {
m["env"] = []string{ScrubbedReplacement}
return nil
}
}
}
}
return ErrUnknownType
}

// for requests sent over the bridge of type internal/gcs/protocol.containerExecuteProcess
func BridgeExecProcess(b []byte) ([]byte, error) {
return scrubBytes(b, scrubExecuteProcess)
}
helsaawy marked this conversation as resolved.
Show resolved Hide resolved

func scrubExecuteProcess(m genMap) error {
if !isRequestBase(m) {
return ErrUnknownType
}
if m, ok := index(m, "Settings"); ok {
if ss, ok := m["ProcessParameters"]; ok {
// ProcessParameters is a json encoded struct passed as a regular sting field
s, ok := ss.(string)
if !ok {
return ErrUnknownType
}

s, err := ProcessParameters(s)
if err != nil {
return err
}

m["ProcessParameters"] = s
return nil
}
}
return ErrUnknownType
}

func scrubBytes(b []byte, scrub scrubberFunc) ([]byte, error) {
if !ScrubbingEnabled() || !hasKeywords(b) || !json.Valid(b) {
return b, nil
}

m := make(genMap)
if err := json.Unmarshal(b, &m); err != nil {
return nil, err
}

// could use regexp, but if the env strings contain braces, the regexp fails
// parsing into individual structs would require access to private structs
if err := scrub(m); err != nil {
return nil, err
}

buf := &bytes.Buffer{}
if err := encode(buf, m); err != nil {
return nil, err
}
return buf.Bytes(), nil
}

func encode(buf *bytes.Buffer, v interface{}) error {
enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
if err := enc.Encode(v); err != nil {
return err
}
return nil
}

func isRequestBase(m genMap) bool {
// neither of these are (currently) `omitempty`
_, a := m["ActivityId"]
_, c := m["ContainerId"]
return a && c
}

// combination `m, ok := m[s]` and `m, ok := m.(genMap)`
func index(m genMap, s string) (genMap, bool) {
if m, ok := m[s]; ok {
mm, ok := m.(genMap)
return mm, ok
}

return m, false
}

func hasKeywords(b []byte) bool {
for _, bb := range _scrubKeywords {
if bytes.Contains(b, bb) {
return true
}
}
return false
}
8 changes: 7 additions & 1 deletion internal/vmcompute/vmcompute.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/internal/scrub"
"github.com/Microsoft/hcsshim/internal/timeout"
"go.opencensus.io/trace"
)
Expand Down Expand Up @@ -389,7 +390,12 @@ func HcsCreateProcess(ctx gcontext.Context, computeSystem HcsSystem, processPara
}
oc.SetSpanStatus(span, hr)
}()
span.AddAttributes(trace.StringAttribute("processParameters", processParameters))
if span.IsRecordingEvents() {
// wont handle v1 process parameters
if s, err := scrub.ProcessParameters(processParameters); err == nil {
span.AddAttributes(trace.StringAttribute("processParameters", s))
}
}

return processInformation, process, result, execute(ctx, timeout.SyscallWatcher, func() error {
var resultp *uint16
Expand Down
Loading