From 09658154a105fd08cebe056c557c6d8ec9e6c6ca Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 25 Aug 2024 14:54:24 -0400 Subject: [PATCH] Fix binary data in JSON output --- pkg/mlrval/mlrval_json.go | 24 ++++++++++++++++++------ test/cases/io-json-io/0036/cmd | 1 + test/cases/io-json-io/0036/experr | 0 test/cases/io-json-io/0036/expout | 2 ++ test/cases/io-json-io/0037/cmd | 1 + test/cases/io-json-io/0037/experr | 0 test/cases/io-json-io/0037/expout | 5 +++++ 7 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 test/cases/io-json-io/0036/cmd create mode 100644 test/cases/io-json-io/0036/experr create mode 100644 test/cases/io-json-io/0036/expout create mode 100644 test/cases/io-json-io/0037/cmd create mode 100644 test/cases/io-json-io/0037/experr create mode 100644 test/cases/io-json-io/0037/expout diff --git a/pkg/mlrval/mlrval_json.go b/pkg/mlrval/mlrval_json.go index c657815ec2..5dfd996244 100644 --- a/pkg/mlrval/mlrval_json.go +++ b/pkg/mlrval/mlrval_json.go @@ -352,9 +352,17 @@ func (mv *Mlrval) marshalJSONString(outputIsStdout bool) (string, error) { } // Wraps with double-quotes and escape-encoded JSON-special characters. +// +// Per https://www.json.org/json-en.html: +// +// * Escapes: \b \f \n \r \t \u +// * Acceptable ranges: 0x20..0x10FFFF +// +// Since these are bytes here, we only need to check < 0x20, and special-case the five valid +// escapes, and then \u the rest. + func millerJSONEncodeString(input string) string { var buffer bytes.Buffer - buffer.WriteByte('"') for _, b := range []byte(input) { @@ -362,15 +370,15 @@ func millerJSONEncodeString(input string) string { case '\\': buffer.WriteByte('\\') buffer.WriteByte('\\') - case '\n': - buffer.WriteByte('\\') - buffer.WriteByte('n') case '\b': buffer.WriteByte('\\') buffer.WriteByte('b') case '\f': buffer.WriteByte('\\') buffer.WriteByte('f') + case '\n': + buffer.WriteByte('\\') + buffer.WriteByte('n') case '\r': buffer.WriteByte('\\') buffer.WriteByte('r') @@ -381,12 +389,16 @@ func millerJSONEncodeString(input string) string { buffer.WriteByte('\\') buffer.WriteByte('"') default: - buffer.WriteByte(b) + if b < 0x20 { + s := fmt.Sprintf("\\u%04x", b) + buffer.WriteString(s) + } else { + buffer.WriteByte(b) + } } } buffer.WriteByte('"') - return buffer.String() } diff --git a/test/cases/io-json-io/0036/cmd b/test/cases/io-json-io/0036/cmd new file mode 100644 index 0000000000..a298f0f2ea --- /dev/null +++ b/test/cases/io-json-io/0036/cmd @@ -0,0 +1 @@ +mlr --ijson --opprint cat test/input/binary.json diff --git a/test/cases/io-json-io/0036/experr b/test/cases/io-json-io/0036/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/io-json-io/0036/expout b/test/cases/io-json-io/0036/expout new file mode 100644 index 0000000000..dd883f4e5d --- /dev/null +++ b/test/cases/io-json-io/0036/expout @@ -0,0 +1,2 @@ +msg +X����Y diff --git a/test/cases/io-json-io/0037/cmd b/test/cases/io-json-io/0037/cmd new file mode 100644 index 0000000000..abcffb242f --- /dev/null +++ b/test/cases/io-json-io/0037/cmd @@ -0,0 +1 @@ +mlr -j cat test/input/binary.json diff --git a/test/cases/io-json-io/0037/experr b/test/cases/io-json-io/0037/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/io-json-io/0037/expout b/test/cases/io-json-io/0037/expout new file mode 100644 index 0000000000..9bf2f47be2 --- /dev/null +++ b/test/cases/io-json-io/0037/expout @@ -0,0 +1,5 @@ +[ +{ + "msg": "X\u0001\b����\u0012Y" +} +]