Skip to content

Commit

Permalink
Merge #85426
Browse files Browse the repository at this point in the history
85426: cli: teach decode-proto to handle Chrome protos r=andreimatei a=andreimatei

This patch teaches `cockroach debug decode-protos` to help with
replaying HTTP network requests captured by Chrome through its "copy as
cURL" feature. Chrome inexplicably seems to mess up when copying our
requests that POST protos: it encodes the raw bytes as UTF-8, which
makes the POST data fail to decode as a proto. This patch makes
decode-proto recognize such an encoding.

The new fields --single --binary --out=<file> are added to help the use
case of decoding a single proto and writing it to a binary file that can
then be read by curl.

Release note: None

Co-authored-by: Andrei Matei <[email protected]>
  • Loading branch information
craig[bot] and andreimatei committed Aug 6, 2022
2 parents db95c24 + f94b3fa commit 7d06d8d
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 32 deletions.
31 changes: 27 additions & 4 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,21 +672,38 @@ Decode and print a hexadecimal-encoded key-value pair.

var debugDecodeProtoName string
var debugDecodeProtoEmitDefaults bool
var debugDecodeProtoSingleProto bool
var debugDecodeProtoBinaryOutput bool
var debugDecodeProtoOutputFile string
var debugDecodeProtoCmd = &cobra.Command{
Use: "decode-proto",
Short: "decode-proto <proto> --name=<fully qualified proto name>",
Long: `
Read from stdin and attempt to decode any hex or base64 encoded proto fields and
output them as JSON. All other fields will be outputted unchanged. Output fields
will be separated by tabs.
Read from stdin and attempt to decode any hex, base64, or C-escaped encoded
protos and output them as JSON. If --single is specified, the input is expected
to consist of a single encoded proto. Otherwise, the input can consist of
multiple fields, separated by new lines and tabs. Each field is attempted to be
decoded and, if that's unsuccessful, is echoed as is.
The default value for --schema is 'cockroach.sql.sqlbase.Descriptor'.
For example:
$ decode-proto < cat debug/system.decsriptor.txt
$ cat debug/system.descriptor.txt | cockroach debug decode-proto
id descriptor hex_descriptor
1 \022!\012\006system\020\001\032\025\012\011\012\005admin\0200\012\010\012\004root\0200 {"database": {"id": 1, "modificationTime": {}, "name": "system", "privileges": {"users": [{"privileges": 48, "user": "admin"}, {"privileges": 48, "user": "root"}]}}}
...
decode-proto can be used to decode protos as captured by Chrome Dev
Tools from HTTP network requests ("Copy as cURL"). Chrome captures these as
UTF8-encoded raw bytes, which are then rendered as C-escaped strings. The UTF8
encoding breaks the proto encoding, so the curl command doesn't work as Chrome
presents it. To rectify that, take the string argument passed to "curl --data" and pass it to
"cockroach decode-proto --single --binary --out=<file>". Then, to replay the HTTP
request, do something like:
$ curl -X POST 'http://localhost:8080/ts/query' \
-H 'Accept: application/json' \
-H 'Content-Type: application/x-protobuf' \
--data-binary @<file>
`,
Args: cobra.ArbitraryArgs,
RunE: runDebugDecodeProto,
Expand Down Expand Up @@ -1798,6 +1815,12 @@ func init() {
"fully qualified name of the proto to decode")
f.BoolVar(&debugDecodeProtoEmitDefaults, "emit-defaults", false,
"encode default values for every field")
f.BoolVar(&debugDecodeProtoSingleProto, "single", false,
"treat the input as a single field")
f.BoolVar(&debugDecodeProtoBinaryOutput, "binary", false,
"output the protos as binary instead of JSON. If specified, --out also needs to be specified.")
f.StringVar(&debugDecodeProtoOutputFile, "out", "",
"path to output file. If not specified, output goes to stdout.")

f = debugCheckLogConfigCmd.Flags()
f.Var(&debugLogChanSel, "only-channels", "selection of channels to include in the output diagram.")
Expand Down
157 changes: 137 additions & 20 deletions pkg/cli/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,98 @@ package cli

import (
"bufio"
"bytes"
"encoding/base64"
gohex "encoding/hex"
"fmt"
"io"
"os"
"strconv"
"strings"
"unicode/utf8"

"github.com/cockroachdb/cockroach/pkg/sql/protoreflect"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/errors"
"github.com/mattn/go-isatty"
"github.com/spf13/cobra"
)

func runDebugDecodeProto(_ *cobra.Command, _ []string) error {
if debugDecodeProtoBinaryOutput && debugDecodeProtoOutputFile == "" {
return errors.Errorf("--out is required when --binary is specified. Redirecting stdout is not " +
"supported because that can introduce a trailing newline character.")
}
if debugDecodeProtoBinaryOutput && !debugDecodeProtoSingleProto {
return errors.Errorf("--single is required when --binary is specified. " +
"Outputting binary data interspersed with text fields is not supported.")
}

if isatty.IsTerminal(os.Stdin.Fd()) {
fmt.Fprintln(stderr,
`# Reading proto-encoded pieces of data from stdin.
# Press Ctrl+C or Ctrl+D to terminate.`,
)
}
return streamMap(os.Stdout, os.Stdin,
out := os.Stdout
if debugDecodeProtoOutputFile != "" {
var err error
out, err = os.OpenFile(debugDecodeProtoOutputFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
panic(err)
}
}

if debugDecodeProtoSingleProto {
buf := bytes.NewBuffer(nil)
_, err := buf.ReadFrom(os.Stdin)
if err != nil {
return err
}
msg := tryDecodeValue(buf.String(), debugDecodeProtoName)
if msg == nil {
return errors.Errorf("decoding failed")
}

// Output the decoded proto, either as JSON, or as binary (proto-encoded).
if debugDecodeProtoBinaryOutput {
bytes, err := protoutil.Marshal(msg)
if err != nil {
return err
}
_, err = out.Write(bytes)
if err != nil {
return err
}
} else {
j, err := protoreflect.MessageToJSON(msg, protoreflect.FmtFlags{EmitDefaults: debugDecodeProtoEmitDefaults})
if err != nil {
// Unexpected error: the data was valid protobuf, but does not
// reflect back to JSON. We report the protobuf struct in the
// error message nonetheless.
return errors.Wrapf(err, "while JSON-encoding %#v", msg)
}
fmt.Fprint(out, j)
}
return nil
}

// If --single was not specified, we attempt to decode individual fields.
return streamMap(out, os.Stdin,
func(s string) (bool, string, error) {
return tryDecodeValue(s, debugDecodeProtoName, debugDecodeProtoEmitDefaults)
msg := tryDecodeValue(s, debugDecodeProtoName)
if msg == nil {
return false, "", nil
}

j, err := protoreflect.MessageToJSON(msg, protoreflect.FmtFlags{EmitDefaults: debugDecodeProtoEmitDefaults})
if err != nil {
// Unexpected error: the data was valid protobuf, but does not
// reflect back to JSON. We report the protobuf struct in the
// error message nonetheless.
return false, "", errors.Wrapf(err, "while JSON-encoding %#v", msg)
}
return true, j.String(), nil
})
}

Expand Down Expand Up @@ -63,27 +132,75 @@ func streamMap(out io.Writer, in io.Reader, fn func(string) (bool, string, error
return sc.Err()
}

// tryDecodeValue tries to decode the given string with the given proto name
// reports ok=false if the data was not valid proto-encoded.
func tryDecodeValue(s, protoName string, emitDefaults bool) (ok bool, val string, err error) {
// interpretString decodes s from one of a couple of supported encodings:
// - hex
// - base-64
// - Go (or C) quoted string
func interpretString(s string) ([]byte, bool) {
// Try hex.
bytes, err := gohex.DecodeString(s)
if err != nil {
b, err := base64.StdEncoding.DecodeString(s)
if err != nil {
return false, "", nil //nolint:returnerrcheck
}
bytes = b
if err == nil {
return bytes, true
}
// Try base64.
bytes, err = base64.StdEncoding.DecodeString(s)
if err == nil {
return bytes, true
}
// Try quoted string.
s = strings.TrimSpace(s)
// Remove wrapping quotes, if any.
if (strings.HasPrefix(s, "'") && strings.HasSuffix(s, "'")) ||
(strings.HasPrefix(s, "\"") && strings.HasSuffix(s, "\"")) {
s = s[1 : len(s)-1]
}
// Add wrapping quotes; strconv.Unquote requires them.
s = fmt.Sprintf("\"%s\"", s)
unquoted, err := strconv.Unquote(s)
if err == nil {
return []byte(unquoted), true
}
return nil, false
}

// tryDecodeValue tries to decode the given string with the given proto name.
// Returns false if decoding fails.
func tryDecodeValue(s, protoName string) protoutil.Message {
bytes, ok := interpretString(s)
if !ok {
return nil
}

// Try to decode the proto directly.
msg, err := protoreflect.DecodeMessage(protoName, bytes)
if err != nil {
return false, "", nil //nolint:returnerrcheck
if err == nil {
return msg
}
j, err := protoreflect.MessageToJSON(msg, protoreflect.FmtFlags{EmitDefaults: emitDefaults})
if err != nil {
// Unexpected error: the data was valid protobuf, but does not
// reflect back to JSON. We report the protobuf struct in the
// error message nonetheless.
return false, "", errors.Wrapf(err, "while JSON-encoding %#v", msg)
_ = err // appease the linter

// Try to undo UTF-8 encoding of the bytes. This compensates for how Chrome
// seems to encode the POST data through the "Export as cURL" functionality.
bytes, ok = convertFromUTF8(bytes)
if !ok {
return nil
}
msg, _ /* err */ = protoreflect.DecodeMessage(protoName, bytes)
return msg
}

func convertFromUTF8(bytes []byte) (out []byte, ok bool) {
for len(bytes) > 0 {
// We expect only one-byte runes, which encode to one or two UTF-8 bytes.
// That's sufficient for how (I think) Chrome treats the raw bytes that it
// encodes to UTF-8: the theory is that that it goes through the raw bytes
// one by one and converts the ones above 127 into a 2-byte rune.
got, n := utf8.DecodeRune(bytes)
if got > 0xff || n > 2 {
// Unexpected multi-byte rune.
return nil, false
}
out = append(out, byte(got))
bytes = bytes[n:]
}
return true, j.String(), nil
return out, true
}
39 changes: 31 additions & 8 deletions pkg/cli/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func TestStreamMap(t *testing.T) {
func TestTryDecodeValue(t *testing.T) {
defer leaktest.AfterTest(t)()

protoName := "cockroach.sql.sqlbase.TableDescriptor"
const defaultProtoType = "cockroach.sql.sqlbase.TableDescriptor"
marshal := func(pb protoutil.Message) []byte {
s, err := protoutil.Marshal(pb)
require.NoError(t, err)
Expand All @@ -87,10 +87,11 @@ func TestTryDecodeValue(t *testing.T) {
tableDesc := &descpb.TableDescriptor{ID: 42, ParentID: 7, Name: "foo"}

tests := []struct {
name string
s string
wantOK bool
wantVal string
name string
protoType string
s string
wantOK bool
wantVal string
}{
{
name: "from hex",
Expand All @@ -116,13 +117,35 @@ func TestTryDecodeValue(t *testing.T) {
name: "base64 not proto",
s: base64.StdEncoding.EncodeToString([]byte("@#$@#%$%@")),
},
{
// This is the POST data of an HTTP tsdb query taken from Chrome using
// "Copy as cURL". It is a quoted string, containing UTF-8 encoded bytes.
name: "Chrome-encoded",
s: `\u0008\u0080ì¿ùÛ\u008bù\u0083\u0017\u0010\u0080¬¢ÿ¾ôù\u0083\u0017\u001a \n\u0018cr.node.sql.select.count\u0010\u0001\u0018\u0002 \u0002\u001a \n\u0018cr.node.sql.update.count\u0010\u0001\u0018\u0002 \u0002\u001a \n\u0018cr.node.sql.insert.count\u0010\u0001\u0018\u0002 \u0002\u001a \n\u0018cr.node.sql.delete.count\u0010\u0001\u0018\u0002 \u0002\u001a*\n\u001fcr.node.sql.service.latency-p99\u0010\u0003\u0018\u0002 \u0000*\u00011\u001a3\n+cr.node.sql.distsql.contended_queries.count\u0010\u0001\u0018\u0002 \u0002\u001a\u001c\n\u0011cr.store.replicas\u0010\u0001\u0018\u0002 \u0000*\u00011\u001a\u0019\n\u0011cr.store.capacity\u0010\u0001\u0018\u0002 \u0000\u001a#\n\u001bcr.store.capacity.available\u0010\u0001\u0018\u0002 \u0000\u001a\u001e\n\u0016cr.store.capacity.used\u0010\u0001\u0018\u0002 \u0000 \u0080Ø\u008eáo`,
wantOK: true,
protoType: "cockroach.ts.tspb.TimeSeriesQueryRequest",
wantVal: `{"endNanos": "1659549679000000000", "queries": [{"derivative": "NON_NEGATIVE_DERIVATIVE", "downsampler": "AVG", "name": "cr.node.sql.select.count", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NON_NEGATIVE_DERIVATIVE", "downsampler": "AVG", "name": "cr.node.sql.update.count", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NON_NEGATIVE_DERIVATIVE", "downsampler": "AVG", "name": "cr.node.sql.insert.count", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NON_NEGATIVE_DERIVATIVE", "downsampler": "AVG", "name": "cr.node.sql.delete.count", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NONE", "downsampler": "MAX", "name": "cr.node.sql.service.latency-p99", "sourceAggregator": "SUM", "sources": ["1"]}, {"derivative": "NON_NEGATIVE_DERIVATIVE", "downsampler": "AVG", "name": "cr.node.sql.distsql.contended_queries.count", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NONE", "downsampler": "AVG", "name": "cr.store.replicas", "sourceAggregator": "SUM", "sources": ["1"]}, {"derivative": "NONE", "downsampler": "AVG", "name": "cr.store.capacity", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NONE", "downsampler": "AVG", "name": "cr.store.capacity.available", "sourceAggregator": "SUM", "sources": []}, {"derivative": "NONE", "downsampler": "AVG", "name": "cr.store.capacity.used", "sourceAggregator": "SUM", "sources": []}], "sampleNanos": "30000000000", "startNanos": "1659546079000000000"}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotOk, gotVal, err := tryDecodeValue(tt.s, protoName, true /* emitDefaults */)
require.Equal(t, tt.wantOK, gotOk)
protoType := tt.protoType
if protoType == "" {
protoType = defaultProtoType
}
msg := tryDecodeValue(tt.s, protoType)
if !tt.wantOK {
if msg != nil {
t.Fatal("decoding succeeded unexpectedly")
}
return
}
if msg == nil {
t.Fatal("decoding failed")
}
json, err := protoreflect.MessageToJSON(msg, protoreflect.FmtFlags{EmitDefaults: true})
require.NoError(t, err)
require.Equal(t, gotVal, tt.wantVal)
require.Equal(t, tt.wantVal, json.String())
})
}
}

0 comments on commit 7d06d8d

Please sign in to comment.