Skip to content

Commit

Permalink
codec: generalize DecodeOptions:RawToString flag (for all encodings)
Browse files Browse the repository at this point in the history
When decoding into a nil interface{} (naked decoding),
we peek at the stream and decode into a type that matches the stream.

Previously, we treated string type as UTF-8. However, a go string is just
a sequence of bytes (an immutable view of []byte), that makes no determination
of the encoding.

To that effect, some users want to decode an un-encoded sequence of bytes
in the stream as an immutable view (a string).

We now enable this via the flag:RawToString.

Now, if we peek at the stream and it is a sequence of bytes, we will decode it
into a string if RawToString=true. By default, it continues to be decoded into a []byte.

Updates #286
  • Loading branch information
ugorji committed Mar 15, 2019
1 parent 8643c33 commit a70535d
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 30 deletions.
3 changes: 1 addition & 2 deletions codec/binc.go
Original file line number Diff line number Diff line change
Expand Up @@ -932,8 +932,7 @@ func (d *bincDecDriver) DecodeNaked() {
n.v = valueTypeString
n.s = d.DecodeString()
case bincVdByteArray:
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
decNakedReadRawBytes(d, d.d, n, d.h.RawToString)
case bincVdTimestamp:
n.v = valueTypeTime
tt, err := bincDecodeTime(d.r.readx(uint(d.vs)))
Expand Down
6 changes: 2 additions & 4 deletions codec/cbor.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,7 @@ func (d *cborDecDriver) DecodeNaked() {
n.v = valueTypeFloat
n.f = d.DecodeFloat64()
case cborBdIndefiniteBytes:
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
decNakedReadRawBytes(d, d.d, n, d.h.RawToString)
case cborBdIndefiniteString:
n.v = valueTypeString
n.s = d.DecodeString()
Expand All @@ -674,8 +673,7 @@ func (d *cborDecDriver) DecodeNaked() {
n.v = valueTypeInt
n.i = d.DecodeInt64()
case d.bd >= cborBaseBytes && d.bd < cborBaseString:
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
decNakedReadRawBytes(d, d.d, n, d.h.RawToString)
case d.bd >= cborBaseString && d.bd < cborBaseArray:
n.v = valueTypeString
n.s = d.DecodeString()
Expand Down
18 changes: 7 additions & 11 deletions codec/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ func testInit() {
bh.MaxInitLen = testMaxInitLen
}

testMsgpackH.RawToString = true
testMsgpackH.WriteExt = true

var tTimeExt timeExt
var tBytesExt wrapBytesExt
Expand Down Expand Up @@ -826,14 +826,12 @@ func testCodecTableOne(t *testing.T, h Handle) {
tableTestNilVerify := testTableVerify(testVerifyDoNil|testVerifyMapTypeStrIntf, h)
switch v := h.(type) {
case *MsgpackHandle:
var oldWriteExt, oldRawToString bool
_, _ = oldWriteExt, oldRawToString
oldWriteExt, v.WriteExt = v.WriteExt, true
oldRawToString, v.RawToString = v.RawToString, true
// defer func() { v.WriteExt, v.RawToString = oldWriteExt, oldRawToString }()
var oldWriteExt bool
_ = oldWriteExt
oldWriteExt = v.WriteExt
v.WriteExt = true
doTestCodecTableOne(t, false, h, table, tableVerify)
v.WriteExt = oldWriteExt
v.RawToString = oldRawToString
case *JsonHandle:
//skip []interface{} containing time.Time, as it encodes as a number, but cannot decode back to time.Time.
//As there is no real support for extension tags in json, this must be skipped.
Expand Down Expand Up @@ -1176,7 +1174,6 @@ func testCodecRpcOne(t *testing.T, rr Rpc, h Handle, doRequest bool, exitSleepMs
// var opts *DecoderOptions
// opts := testDecOpts
// opts.MapType = mapStrIntfTyp
// opts.RawToString = false
serverExitChan := make(chan bool, 1)
var serverExitFlag uint64
serverFn := func() {
Expand Down Expand Up @@ -3091,9 +3088,8 @@ func TestCborMammothMapsAndSlices(t *testing.T) {
}

func TestMsgpackMammothMapsAndSlices(t *testing.T) {
old1, old2 := testMsgpackH.RawToString, testMsgpackH.WriteExt
defer func() { testMsgpackH.RawToString, testMsgpackH.WriteExt = old1, old2 }()
testMsgpackH.RawToString = true
old1 := testMsgpackH.WriteExt
defer func() { testMsgpackH.WriteExt = old1 }()
testMsgpackH.WriteExt = true

doTestMammothMapsAndSlices(t, testMsgpackH)
Expand Down
14 changes: 14 additions & 0 deletions codec/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ type DecodeOptions struct {
// If true, we will delete the mapping of the key.
// Else, just set the mapping to the zero value of the type.
DeleteOnNilMapValue bool

// RawToString controls how raw bytes in a stream are decoded into a nil interface{}.
// By default, they are decoded as []byte, but can be decoded as string (if configured).
RawToString bool
}

// ------------------------------------------------
Expand Down Expand Up @@ -3093,3 +3097,13 @@ func decReadFull(r io.Reader, bs []byte) (n uint, err error) {
// if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF }
return
}

func decNakedReadRawBytes(dr decDriver, d *Decoder, n *decNaked, rawToString bool) {
if rawToString {
n.v = valueTypeString
n.s = string(dr.DecodeBytes(d.b[:], true))
} else {
n.v = valueTypeBytes
n.l = dr.DecodeBytes(nil, false)
}
}
16 changes: 5 additions & 11 deletions codec/msgpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -521,16 +521,15 @@ func (d *msgpackDecDriver) DecodeNaked() {
n.v = valueTypeInt
n.i = int64(int8(bd))
case bd == mpStr8, bd == mpStr16, bd == mpStr32, bd >= mpFixStrMin && bd <= mpFixStrMax:
if d.h.RawToString {
if d.h.WriteExt {
n.v = valueTypeString
n.s = string(d.DecodeBytes(d.d.b[:], true))
n.s = d.DecodeString()
} else {
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
}
case bd == mpBin8, bd == mpBin16, bd == mpBin32:
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
decNakedReadRawBytes(d, d.d, n, d.h.RawToString)
case bd == mpArray16, bd == mpArray32, bd >= mpFixArrayMin && bd <= mpFixArrayMax:
n.v = valueTypeArray
decodeFurther = true
Expand Down Expand Up @@ -768,10 +767,10 @@ func (d *msgpackDecDriver) ContainerType() (vt valueType) {
} else if bd == mpBin8 || bd == mpBin16 || bd == mpBin32 {
return valueTypeBytes
} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 || (bd >= mpFixStrMin && bd <= mpFixStrMax) {
if d.h.RawToString {
if d.h.WriteExt { // UTF-8 string (new spec)
return valueTypeString
}
return valueTypeBytes
return valueTypeBytes // raw (old spec)
} else if bd == mpArray16 || bd == mpArray32 || (bd >= mpFixArrayMin && bd <= mpFixArrayMax) {
return valueTypeArray
} else if bd == mpMap16 || bd == mpMap32 || (bd >= mpFixMapMin && bd <= mpFixMapMax) {
Expand Down Expand Up @@ -957,11 +956,6 @@ func (d *msgpackDecDriver) decodeExtV(verifyTag bool, tag byte) (xtag byte, xbs
type MsgpackHandle struct {
BasicHandle

// RawToString controls how raw bytes in a stream are decoded into a nil interface{}.
// By default, they are decoded as []byte,
// but can be decoded as string (if configured).
RawToString bool

// NoFixedNum says to output all signed integers as 2-bytes, never as 1-byte fixednum.
NoFixedNum bool

Expand Down
3 changes: 1 addition & 2 deletions codec/simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,7 @@ func (d *simpleDecDriver) DecodeNaked() {
n.s = d.DecodeString()
case simpleVdByteArray, simpleVdByteArray + 1,
simpleVdByteArray + 2, simpleVdByteArray + 3, simpleVdByteArray + 4:
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
decNakedReadRawBytes(d, d.d, n, d.h.RawToString)
case simpleVdExt, simpleVdExt + 1, simpleVdExt + 2, simpleVdExt + 3, simpleVdExt + 4:
n.v = valueTypeExt
l := d.decLen()
Expand Down

0 comments on commit a70535d

Please sign in to comment.