From ae4de0ff18cbbb29e5ed441196b901904459cc04 Mon Sep 17 00:00:00 2001 From: Kartik Verma <74044639+verma-kartik@users.noreply.github.com> Date: Fri, 19 Apr 2024 21:29:06 +0530 Subject: [PATCH] GH-40563: [Go] Unable to JSON marshal float64 arrays which contain a NaN value (#41109) ### Rationale for this change From this [issue](https://github.com/apache/arrow/issues/40563#issuecomment-2043464834). ### What changes are included in this PR? MarshalJSON() is changed for Float32, Float64 to handle NaN, +Inf and -Inf values. I was only able to handle NaN for Float16. UnmarshalJSON() was already handling NaN, +Inf and -Inf in the right way. ### Are these changes tested? These changes are tested. Tests are included in those files where I find it was logically appropriate to do so. ### Are there any user-facing changes? They will be able to handle corner values for Float32, Float64 now. * GitHub Issue: #40563 Lead-authored-by: verma-kartik Co-authored-by: work-vermakartik Co-authored-by: Matt Topol Signed-off-by: Matt Topol --- arrow/array/float16.go | 16 +++- arrow/array/numeric.gen.go | 34 ++++++-- arrow/array/numeric.gen.go.tmpl | 34 ++++++++ arrow/array/numeric_test.go | 91 ++++++++++++++++++++- arrow/array/numericbuilder.gen_test.go | 45 ++++++++++ arrow/array/numericbuilder.gen_test.go.tmpl | 23 ++++++ arrow/float16/float16.go | 2 + 7 files changed, 235 insertions(+), 10 deletions(-) diff --git a/arrow/array/float16.go b/arrow/array/float16.go index 6fe35f24..e0fe6507 100644 --- a/arrow/array/float16.go +++ b/arrow/array/float16.go @@ -87,10 +87,20 @@ func (a *Float16) GetOneForMarshal(i int) interface{} { func (a *Float16) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i, v := range a.values { - if a.IsValid(i) { - vals[i] = v.Float32() - } else { + if !a.IsValid(i) { vals[i] = nil + continue + } + + switch { + case v.IsNaN(): + vals[i] = "NaN" + case v.IsInf() && !v.Signbit(): + vals[i] = "+Inf" + case v.IsInf() && v.Signbit(): + vals[i] = "-Inf" + default: + vals[i] = v.Float32() } } return json.Marshal(vals) diff --git a/arrow/array/numeric.gen.go b/arrow/array/numeric.gen.go index 0c7346dd..fbf129ec 100644 --- a/arrow/array/numeric.gen.go +++ b/arrow/array/numeric.gen.go @@ -20,6 +20,7 @@ package array import ( "fmt" + "math" "strconv" "strings" @@ -290,11 +291,23 @@ func (a *Float64) GetOneForMarshal(i int) interface{} { func (a *Float64) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { + if !a.IsValid(i) { vals[i] = nil + continue + } + + f := a.Value(i) + switch { + case math.IsNaN(f): + vals[i] = "NaN" + case math.IsInf(f, 1): + vals[i] = "+Inf" + case math.IsInf(f, -1): + vals[i] = "-Inf" + default: + vals[i] = f } + } return json.Marshal(vals) @@ -575,10 +588,19 @@ func (a *Float32) GetOneForMarshal(i int) interface{} { func (a *Float32) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { + if !a.IsValid(i) { vals[i] = nil + continue + } + + f := a.Value(i) + v := strconv.FormatFloat(float64(f), 'g', -1, 32) + + switch v { + case "NaN", "+Inf", "-Inf": + vals[i] = v + default: + vals[i] = f } } diff --git a/arrow/array/numeric.gen.go.tmpl b/arrow/array/numeric.gen.go.tmpl index 2ccb8d8b..8fb93c3b 100644 --- a/arrow/array/numeric.gen.go.tmpl +++ b/arrow/array/numeric.gen.go.tmpl @@ -133,11 +133,45 @@ func (a *{{.Name}}) MarshalJSON() ([]byte, error) { {{else -}} vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { + {{if (eq .Name "Float32") -}} + if !a.IsValid(i) { + vals[i] = nil + continue + } + + f := a.Value(i) + v := strconv.FormatFloat(float64(f), 'g', -1, 32) + + switch v { + case "NaN", "+Inf", "-Inf": + vals[i] = v + default: + vals[i] = f + } + {{else if (eq .Name "Float64") -}} + if !a.IsValid(i) { + vals[i] = nil + continue + } + + f := a.Value(i) + switch { + case math.IsNaN(f): + vals[i] = "NaN" + case math.IsInf(f, 1): + vals[i] = "+Inf" + case math.IsInf(f, -1): + vals[i] = "-Inf" + default: + vals[i] = f + } + {{else}} if a.IsValid(i) { {{ if (eq .Size "1") }}vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data{{ else }}vals[i] = a.values[i]{{ end }} } else { vals[i] = nil } + {{end}} } {{end}} return json.Marshal(vals) diff --git a/arrow/array/numeric_test.go b/arrow/array/numeric_test.go index 34cab789..6f10a4ce 100644 --- a/arrow/array/numeric_test.go +++ b/arrow/array/numeric_test.go @@ -16,13 +16,14 @@ package array_test -import ( +import ( "math" "reflect" "testing" "github.com/apache/arrow/go/v16/arrow" "github.com/apache/arrow/go/v16/arrow/array" + "github.com/apache/arrow/go/v16/arrow/float16" "github.com/apache/arrow/go/v16/arrow/memory" "github.com/apache/arrow/go/v16/internal/json" "github.com/stretchr/testify/assert" @@ -137,6 +138,94 @@ func TestFloat64SliceDataWithNull(t *testing.T) { } } +func TestFloat16MarshalJSON(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + bldr := array.NewFloat16Builder(pool) + defer bldr.Release() + + jsonstr := `[0, 1, 2, 3, "NaN", "NaN", 4, 5, "+Inf", "-Inf"]` + + bldr.Append(float16.New(0)) + bldr.Append(float16.New(1)) + bldr.Append(float16.New(2)) + bldr.Append(float16.New(3)) + bldr.Append(float16.NaN()) + bldr.Append(float16.NaN()) + bldr.Append(float16.New(4)) + bldr.Append(float16.New(5)) + bldr.Append(float16.Inf()) + bldr.Append(float16.Inf().Negate()) + + + expected := bldr.NewFloat16Array() + defer expected.Release() + expected_json, err := expected.MarshalJSON() + assert.NoError(t, err) + assert.JSONEq(t, jsonstr, string(expected_json)) +} + +func TestFloat32MarshalJSON(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + bldr := array.NewFloat32Builder(pool) + defer bldr.Release() + + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` + + bldr.Append(0) + bldr.Append(1) + bldr.Append(float32(math.Inf(1))) + bldr.Append(2) + bldr.Append(3) + bldr.Append(float32(math.NaN())) + bldr.Append(float32(math.NaN())) + bldr.Append(4) + bldr.Append(5) + bldr.Append(float32(math.Inf(-1))) + + + expected := bldr.NewFloat32Array() + defer expected.Release() + + expected_json, err := expected.MarshalJSON() + assert.NoError(t, err) + + assert.JSONEq(t, jsonstr, string(expected_json)) +} + +func TestFloat64MarshalJSON(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + bldr := array.NewFloat64Builder(pool) + defer bldr.Release() + + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` + + bldr.Append(0) + bldr.Append(1) + bldr.Append(math.Inf(1)) + bldr.Append(2) + bldr.Append(3) + bldr.Append(math.NaN()) + bldr.Append(math.NaN()) + bldr.Append(4) + bldr.Append(5) + bldr.Append(math.Inf(-1)) + + expected := bldr.NewFloat64Array() + defer expected.Release() + + expected_json, err := expected.MarshalJSON() + assert.NoError(t, err) + + assert.JSONEq(t, jsonstr, string(expected_json)) + +} + func TestUnmarshalSpecialFloat(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) diff --git a/arrow/array/numericbuilder.gen_test.go b/arrow/array/numericbuilder.gen_test.go index 10751e42..cb5a7f1a 100644 --- a/arrow/array/numericbuilder.gen_test.go +++ b/arrow/array/numericbuilder.gen_test.go @@ -19,6 +19,7 @@ package array_test import ( + "math" "testing" "github.com/apache/arrow/go/v16/arrow" @@ -633,6 +634,28 @@ func TestFloat64Builder_Resize(t *testing.T) { assert.Equal(t, 5, ab.Len()) } +func TestFloat64BuilderUnmarshalJSON(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + bldr := array.NewFloat64Builder(mem) + defer bldr.Release() + + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` + + err := bldr.UnmarshalJSON([]byte(jsonstr)) + assert.NoError(t, err) + + arr := bldr.NewFloat64Array() + defer arr.Release() + + assert.NotNil(t, arr) + + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) + assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) + assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) +} + func TestInt32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) @@ -1239,6 +1262,28 @@ func TestFloat32Builder_Resize(t *testing.T) { assert.Equal(t, 5, ab.Len()) } +func TestFloat32BuilderUnmarshalJSON(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + bldr := array.NewFloat32Builder(mem) + defer bldr.Release() + + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` + + err := bldr.UnmarshalJSON([]byte(jsonstr)) + assert.NoError(t, err) + + arr := bldr.NewFloat32Array() + defer arr.Release() + + assert.NotNil(t, arr) + + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) + assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) + assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) +} + func TestInt16StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) diff --git a/arrow/array/numericbuilder.gen_test.go.tmpl b/arrow/array/numericbuilder.gen_test.go.tmpl index 41f68715..e3ad0d61 100644 --- a/arrow/array/numericbuilder.gen_test.go.tmpl +++ b/arrow/array/numericbuilder.gen_test.go.tmpl @@ -271,6 +271,29 @@ func Test{{.Name}}Builder_Resize(t *testing.T) { ab.Resize(32) assert.Equal(t, 5, ab.Len()) } + +func Test{{.Name}}BuilderUnmarshalJSON(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + bldr := array.New{{.Name}}Builder(mem) + defer bldr.Release() + + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` + + err := bldr.UnmarshalJSON([]byte(jsonstr)) + assert.NoError(t, err) + + arr := bldr.New{{.Name}}Array() + defer arr.Release() + + assert.NotNil(t, arr) + + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) + assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) + assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) +} + {{end}} diff --git a/arrow/float16/float16.go b/arrow/float16/float16.go index e0192495..ecf5c9dd 100644 --- a/arrow/float16/float16.go +++ b/arrow/float16/float16.go @@ -175,6 +175,8 @@ func (n Num) Signbit() bool { return (n.bits & 0x8000) != 0 } func (n Num) IsNaN() bool { return (n.bits & 0x7fff) > 0x7c00 } +func (n Num) IsInf() bool {return (n.bits & 0x7c00) == 0x7c00 } + func (n Num) IsZero() bool { return (n.bits & 0x7fff) == 0 } func (f Num) Uint16() uint16 { return f.bits }