From 8846ae69bc36f09908f38bc42c2580f5206f282e Mon Sep 17 00:00:00 2001 From: wenruimeng-work Date: Mon, 12 Feb 2024 17:30:41 -0800 Subject: [PATCH] fix the uint and int conversion in the encoding --- common/common.go | 28 ++++++++++++++++++++++++---- common/common_test.go | 16 ++++++++-------- encoding/binarywrite.go | 23 +++++++++++++++++++++-- example/proto_write.go | 3 +++ writer/arrow_test.go | 14 +++++++------- 5 files changed, 63 insertions(+), 21 deletions(-) diff --git a/common/common.go b/common/common.go index e2325309..978d372f 100644 --- a/common/common.go +++ b/common/common.go @@ -810,7 +810,12 @@ func (table boolFuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, va type int32FuncTable struct{} func (_ int32FuncTable) LessThan(a interface{}, b interface{}) bool { - return a.(int32) < b.(int32) + switch a.(type) { + case int32, int16, int8: + return a.(int32) < b.(int32) + default: + return int32(a.(uint32)) < int32(b.(uint32)) + } } func (table int32FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, val interface{}) (interface{}, interface{}, int32) { @@ -820,7 +825,12 @@ func (table int32FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, v type uint32FuncTable struct{} func (_ uint32FuncTable) LessThan(a interface{}, b interface{}) bool { - return uint32(a.(int32)) < uint32(b.(int32)) + switch a.(type) { + case int32, int16, int8: + return uint32(a.(int32)) < uint32(b.(int32)) + default: + return a.(uint32) < b.(uint32) + } } func (table uint32FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, val interface{}) (interface{}, interface{}, int32) { @@ -830,7 +840,12 @@ func (table uint32FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, type int64FuncTable struct{} func (_ int64FuncTable) LessThan(a interface{}, b interface{}) bool { - return a.(int64) < b.(int64) + switch a.(type) { + case int64: + return a.(int64) < b.(int64) + default: + return int64(a.(uint64)) < int64(b.(uint64)) + } } func (table int64FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, val interface{}) (interface{}, interface{}, int32) { @@ -840,7 +855,12 @@ func (table int64FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, v type uint64FuncTable struct{} func (_ uint64FuncTable) LessThan(a interface{}, b interface{}) bool { - return uint64(a.(int64)) < uint64(b.(int64)) + switch a.(type) { + case uint64: + return a.(uint64) < b.(uint64) + default: + return uint64(a.(int64)) < uint64(b.(int64)) + } } func (table uint64FuncTable) MinMaxSize(minVal interface{}, maxVal interface{}, val interface{}) (interface{}, interface{}, int32) { diff --git a/common/common_test.go b/common/common_test.go index 33fd36ea..47a690a7 100644 --- a/common/common_test.go +++ b/common/common_test.go @@ -158,14 +158,14 @@ func TestCmp(t *testing.T) { {"int_8 2", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16), true}, {"int_8 3", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_32), true}, {"int_8 4", int64(1), int64(2), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_64), true}, - - {"uint_8 1", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8), true}, - {"uint_8 2", int32(1), int32(-2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8), true}, - {"uint_8 3", int32(-1), int32(-2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8), false}, - {"uint_8 4", int32(-2), int32(-1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8), true}, - {"uint_16 1", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_16), true}, - {"uint_16 2", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), true}, - {"uint_16 3", int64(1), int64(2), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_64), true}, + {"int_8 5", int32(1), int32(-2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), false}, + {"int_8 6", int32(-1), int32(-2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), false}, + {"int_8 7", int32(-2), int32(-1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), true}, + + {"uint_8 1", uint32(1), uint32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8), true}, + {"uint_16 1", uint32(1), uint32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_16), true}, + {"uint_16 2", uint32(1), uint32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), true}, + {"uint_16 3", uint64(1), uint64(2), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_64), true}, {"date 1", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DATE), true}, {"time_millis 1", int32(1), int32(2), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MILLIS), true}, diff --git a/encoding/binarywrite.go b/encoding/binarywrite.go index 4434dbc4..c71004b1 100644 --- a/encoding/binarywrite.go +++ b/encoding/binarywrite.go @@ -7,10 +7,19 @@ import ( //LittleEndian +func GetUint32(n interface{}) uint32 { + switch n.(type) { + case int, int8, int16, int32: + return uint32(n.(int32)) + default: + return n.(uint32) + } +} + func BinaryWriteINT32(w io.Writer, nums []interface{}) { buf := make([]byte, len(nums)*4) for i, n := range nums { - v := uint32(n.(int32)) + v := GetUint32(n) buf[i*4+0] = byte(v) buf[i*4+1] = byte(v >> 8) buf[i*4+2] = byte(v >> 16) @@ -19,10 +28,20 @@ func BinaryWriteINT32(w io.Writer, nums []interface{}) { w.Write(buf) } +func GetUint64(n interface{}) uint64 { + switch n.(type) { + case int, int8, int16, int32, int64: + return uint64(n.(int64)) + default: + return n.(uint64) + } + +} + func BinaryWriteINT64(w io.Writer, nums []interface{}) { buf := make([]byte, len(nums)*8) for i, n := range nums { - v := uint64(n.(int64)) + v := GetUint64(n) buf[i*8+0] = byte(v) buf[i*8+1] = byte(v >> 8) buf[i*8+2] = byte(v >> 16) diff --git a/example/proto_write.go b/example/proto_write.go index ff595d22..73c42879 100644 --- a/example/proto_write.go +++ b/example/proto_write.go @@ -73,6 +73,9 @@ type TestInterfaceStruct struct { NestedVal TestInterface Arr [][]TestInterface Message ProtoMessage + UintVal uint + UintVal32 uint32 + UintVal64 uint64 } func main() { diff --git a/writer/arrow_test.go b/writer/arrow_test.go index e74509b0..446fa634 100644 --- a/writer/arrow_test.go +++ b/writer/arrow_test.go @@ -80,31 +80,31 @@ func testRecord(mem memory.Allocator) arrow.Record { col5 := func() arrow.Array { ib := array.NewUint8Builder(mem) defer ib.Release() - ib.AppendValues([]uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) + ib.AppendValues([]uint8{uint8(1), uint8(2), uint8(3), uint8(4), uint8(5), uint8(6), uint8(7), uint8(8), uint8(9), uint8(10)}, nil) return ib.NewUint8Array() }() defer col5.Release() col6 := func() arrow.Array { ib := array.NewUint16Builder(mem) defer ib.Release() - ib.AppendValues([]uint16{11, 12, 13, 14, 15, 16, 17, 18, 19, - 20}, nil) + ib.AppendValues([]uint16{uint16(11), uint16(12), uint16(13), uint16(14), uint16(15), uint16(16), uint16(17), uint16(18), uint16(19), + uint16(20)}, nil) return ib.NewUint16Array() }() defer col6.Release() col7 := func() arrow.Array { ib := array.NewUint32Builder(mem) defer ib.Release() - ib.AppendValues([]uint32{21, 22, 23, 24, 25, 26, 27, 28, 29, - 30}, nil) + ib.AppendValues([]uint32{uint32(21), uint32(22), uint32(23), uint32(24), uint32(25), uint32(26), uint32(27), uint32(28), uint32(29), + uint32(30)}, nil) return ib.NewUint32Array() }() defer col7.Release() col8 := func() arrow.Array { ib := array.NewUint64Builder(mem) defer ib.Release() - ib.AppendValues([]uint64{31, 32, 33, 34, 35, 36, 37, 38, 39, - 40}, nil) + ib.AppendValues([]uint64{uint64(31), uint64(32), uint64(33), uint64(34), uint64(35), uint64(36), uint64(37), uint64(38), uint64(39), + uint64(40)}, nil) return ib.NewUint64Array() }() defer col8.Release()