postmanlabs · thatplguy · Oct 21, 2022 · Oct 20, 2022 · Oct 20, 2022 · Oct 20, 2022
diff --git a/learn/json_preprocessor.go b/learn/json_preprocessor.go
@@ -0,0 +1,51 @@
+package learn
+
+import (
+	"io"
+	"unicode"
+)
+
+type stripControlCharactersReader struct {
+	wrapped io.Reader
+}
+
+func newStripControlCharactersReader(wrapped io.Reader) stripControlCharactersReader {
+	return stripControlCharactersReader{wrapped: wrapped}
+}
+
+// Read up to len(p) bytes, removing any unescaped control characters found.
+// Removed characters do not count toward the total bytes read.  Returns the
+// number of bytes read.
+func (r stripControlCharactersReader) Read(p []byte) (n int, err error) {
+	pIdx := 0
+	buf := make([]byte, len(p))
+
+	// Read up to len(p) bytes, then discard any control characters.  Continue
+	// reading (and discarding control characters) until p is full or there are
+	// no more bytes to read.
+	for pIdx < len(p) {
+		remaining := len(p) - pIdx
+		bufSlice := buf[:remaining]
+
+		var bufN int
+		bufN, err = r.wrapped.Read(bufSlice)
+
+		// Copy from buf to p, skipping unescaped control characters.
+		for _, r := range string(bufSlice) {
+			if unicode.IsControl(r) {
+				continue
+			}
+			pIdx += copy(p[pIdx:], string([]rune{r}))
+		}
+
+		// If we hit an error or read fewer bytes than the size of the buffer,
+		// don't bother trying to read more from the underlying reader.
+		if err != nil || bufN < remaining {
+			break
+		}
+
+		// Otherwise, we loop to replace CRs we dropped.
+	}
+
+	return pIdx, err
+}
diff --git a/learn/json_preprocessor_test.go b/learn/json_preprocessor_test.go
@@ -0,0 +1,119 @@
+package learn
+
+import (
+	"io"
+	"strings"
+	"testing"
+	"unicode"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSkipCarriageReturnReader(t *testing.T) {
+	testCases := []struct {
+		Name                 string
+		Input                string
+		BytesToRead          int
+		DifferentEOFBehavior bool
+	}{
+		{
+			Name:        "empty",
+			Input:       "",
+			BytesToRead: 128,
+		},
+		{
+			Name:        "read 0 bytes",
+			Input:       "foo",
+			BytesToRead: 0,
+		},
+		{
+			Name:        "no CR string",
+			Input:       "foo",
+			BytesToRead: 128,
+		},
+		{
+			Name:        "CR",
+			Input:       "foo\rbar",
+			BytesToRead: 128,
+		},
+		{
+			Name:        "CRs",
+			Input:       "foo\rbar\r",
+			BytesToRead: 128,
+		},
+		{
+			Name:        "fill in after removing CRs",
+			Input:       "foo\rbar",
+			BytesToRead: 4,
+		},
+		{
+			Name:        "CRLFs",
+			Input:       "f\r\noo\r\nbar\r\n",
+			BytesToRead: 128,
+		},
+		{
+			Name:        "fill in after removing CRLFs",
+			Input:       "f\r\noo\r\nbar\r\n",
+			BytesToRead: 4,
+		},
+		{
+			Name:        "fill in after removing trailing CRLFs",
+			Input:       "f\r\noo\r\nbar\r\n",
+			BytesToRead: 7,
+
+			// The strip reader returns an EOF where the strings.Reader
+			// doesn't, because it tries to read more characters after the
+			// trailing CRLF and fails.
+			DifferentEOFBehavior: true,
+		},
+	}
+
+	// For each test case, read bytes from the CR stripper and compare the
+	// results to reading bytes straight from a strings.Reader and then
+	// manually removing CRs.
+	for _, tc := range testCases {
+		numCRs := countControlCharacters(tc.Input)
+
+		// Read bytes from the CR stripper.
+		buf := make([]byte, tc.BytesToRead)
+		n, err := newStripControlCharactersReader(strings.NewReader(tc.Input)).Read(buf)
+
+		// Read bytes straight from strings.Reader. Read extra bytes to make up
+		// for manually removing CRs afterwards.
+		expectedBuf := make([]byte, tc.BytesToRead+numCRs)
+		expectedN, expectedErr := strings.NewReader(tc.Input).Read(expectedBuf)
+
+		// Manually remove CRs from the string read from the strings.Reader.
+		expectedStr := removeControlCharacters(string(expectedBuf))
+
+		// Copy the expected string into a BytesToRead-sized buffer, to
+		// ensure the sizes of the underlying buffers match.
+		expectedBuf = make([]byte, tc.BytesToRead)
+		copy(expectedBuf, expectedStr)
+
+		if !(tc.DifferentEOFBehavior && err == io.EOF) {
+			assert.Equal(t, expectedErr, err, tc.Name+": error")
+		}
+		assert.Equal(t, expectedBuf, buf, tc.Name+": string")
+		assert.Equal(t, expectedN-numCRs, n, tc.Name+": character count")
+	}
+}
+
+func countControlCharacters(s string) (count int) {
+	for _, r := range s {
+		if unicode.IsControl(r) {
+			count += 1
+		}
+	}
+
+	return count
+}
+
+func removeControlCharacters(s string) string {
+	return strings.Map(func(r rune) rune {
+		if unicode.IsControl(r) {
+			return -1
+		}
+		return r
+	}, s)
+}
diff --git a/learn/parse_http.go b/learn/parse_http.go
@@ -6,7 +6,6 @@ import (
 	"compress/gzip"
 	"encoding/json"
 	"fmt"
-	"github.com/golang/protobuf/proto"
 	"io"
 	"io/ioutil"
 	"mime"
@@ -16,20 +15,23 @@ import (
 	"sort"
 	"strings"
 
+	"github.com/golang/protobuf/proto"
+
 	"github.com/andybalholm/brotli"
 	"github.com/google/uuid"
 	"github.com/pkg/errors"
 	"golang.org/x/text/encoding/ianaindex"
 	"golang.org/x/text/transform"
 	"gopkg.in/yaml.v2"
 
-	"github.com/akitasoftware/akita-cli/printer"
-	"github.com/akitasoftware/akita-cli/telemetry"
 	pb "github.com/akitasoftware/akita-ir/go/api_spec"
 	"github.com/akitasoftware/akita-libs/akinet"
 	"github.com/akitasoftware/akita-libs/memview"
 	"github.com/akitasoftware/akita-libs/spec_util"
 	"github.com/akitasoftware/akita-libs/spec_util/ir_hash"
+
+	"github.com/akitasoftware/akita-cli/printer"
+	"github.com/akitasoftware/akita-cli/telemetry"
 )
 
 var (
@@ -632,7 +634,7 @@ func parseMethodMeta(req *akinet.HTTPRequest) *pb.MethodMeta {
 
 func parseHTTPBodyJSON(stream io.Reader) (*pb.Data, error) {
 	var top interface{}
-	decoder := json.NewDecoder(stream)
+	decoder := json.NewDecoder(newStripControlCharactersReader(stream))
 	decoder.UseNumber()
 
 	err := decoder.Decode(&top)

diff --git a/learn/parse_http_test.go b/learn/parse_http_test.go
@@ -10,7 +10,6 @@ import (
 	as "github.com/akitasoftware/akita-ir/go/api_spec"
 	"github.com/akitasoftware/akita-libs/akinet"
 	"github.com/akitasoftware/akita-libs/spec_util"
-
 	"github.com/spf13/viper"
 )
 
@@ -33,6 +32,7 @@ func init() {
 var testBodyDict = `
 {
   "name": "prince",
+  "name_with_escaped_CRLF": "prince\r\n",
   "number_teeth": 9000,
   "dog": true,
   "canadian_social_insurance_number": "378734493671000",
@@ -65,6 +65,7 @@ func newTestBodySpec(statusCode int) *as.Data {
 func newTestBodySpecContentType(contentType string, statusCode int) *as.Data {
 	return newTestBodySpecFromStruct(statusCode, as.HTTPBody_JSON, contentType, map[string]*as.Data{
 		"name":                             dataFromPrimitive(spec_util.NewPrimitiveString("prince")),
+		"name_with_escaped_CRLF":           dataFromPrimitive(spec_util.NewPrimitiveString("prince\r\n")),
 		"number_teeth":                     dataFromPrimitive(spec_util.NewPrimitiveInt64(9000)),
 		"dog":                              dataFromPrimitive(spec_util.NewPrimitiveBool(true)),
 		"canadian_social_insurance_number": dataFromPrimitive(annotateIfSensitiveForTest(true, spec_util.NewPrimitiveString("378734493671000"))),
@@ -590,6 +591,26 @@ func TestFailingParse(t *testing.T) {
 				[]*http.Cookie{},
 			),
 		},
+		{
+			Name: "json error: invalid '\t\r\n' in string literal",
+			TestContent: newTestHTTPResponse(
+				200,
+				[]byte("{\"field\t\r\n\": 3}"),
+				"application/json",
+				map[string][]string{},
+				[]*http.Cookie{},
+			),
+		},
+		{
+			Name: "json error: invalid '\t\r\r' not in string literal",
+			TestContent: newTestHTTPResponse(
+				200,
+				[]byte("{\"field\": 3\t\r\n}"),
+				"application/json",
+				map[string][]string{},
+				[]*http.Cookie{},
+			),
+		},
 	}
 	for _, tc := range testCases {
 		_, err := ParseHTTP(tc.TestContent)
@@ -600,5 +621,4 @@ func TestFailingParse(t *testing.T) {
 	}
 
 	viper.Set("debug", false)
-
 }