From 0e067ef93605493060392f0999be27694146fca4 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Fri, 4 Aug 2023 00:04:57 -0500 Subject: [PATCH] Improve handling of non-integer numeric indices (fix #2815) --- src/jv_aux.c | 136 ++++++++++++++++++++++++++++++++------------------ tests/jq.test | 58 +++++++++++++++++++++ 2 files changed, 145 insertions(+), 49 deletions(-) diff --git a/src/jv_aux.c b/src/jv_aux.c index 133fb54ccb..0b7d169df5 100644 --- a/src/jv_aux.c +++ b/src/jv_aux.c @@ -1,6 +1,8 @@ +#include +#include +#include #include #include -#include #include "jv_alloc.h" #include "jv_private.h" @@ -13,7 +15,7 @@ static double jv_number_get_value_and_consume(jv number) { return value; } -static int parse_slice(jv j, jv slice, int* pstart, int* pend) { +static jv parse_slice(jv j, jv slice, int* pstart, int* pend) { // Array slices jv start_jv = jv_object_get(jv_copy(slice), jv_string("start")); jv end_jv = jv_object_get(slice, jv_string("end")); @@ -27,8 +29,14 @@ static int parse_slice(jv j, jv slice, int* pstart, int* pend) { } else if (jv_get_kind(j) == JV_KIND_STRING) { len = jv_string_length_codepoints(j); } else { + /* + * XXX This should be dead code because callers shouldn't call this + * function if `j' is neither an array nor a string. + */ jv_free(j); - return 0; + jv_free(start_jv); + jv_free(end_jv); + return jv_invalid_with_msg(jv_string("Only arrays and strings can be sliced")); } if (jv_get_kind(end_jv) == JV_KIND_NULL) { jv_free(end_jv); @@ -38,29 +46,34 @@ static int parse_slice(jv j, jv slice, int* pstart, int* pend) { jv_get_kind(end_jv) != JV_KIND_NUMBER) { jv_free(start_jv); jv_free(end_jv); - return 0; - } else { - double dstart = jv_number_value(start_jv); - double dend = jv_number_value(end_jv); - jv_free(start_jv); - jv_free(end_jv); - if (dstart < 0) dstart += len; - if (dend < 0) dend += len; - if (dstart < 0) dstart = 0; - if (dstart > len) dstart = len; - - int start = (int)dstart; - int end = (dend > len) ? len : (int)dend; - // Ends are exclusive but e.g. 1 < 1.5 so :1.5 should be :2 not :1 - if(end < dend) end += 1; - - if (end > len) end = len; - if (end < start) end = start; - assert(0 <= start && start <= end && end <= len); - *pstart = start; - *pend = end; - return 1; - } + return jv_invalid_with_msg(jv_string("Array/string slice indices must be integers")); + } + + double dstart = jv_number_value(start_jv); + double dend = jv_number_value(end_jv); + int start, end; + + jv_free(start_jv); + jv_free(end_jv); + if (isnan(dstart)) dstart = 0; + if (dstart < 0) dstart += len; + if (dstart < 0) dstart = 0; + if (dstart > len) dstart = len; + start = dstart > INT_MAX ? INT_MAX : (int)dstart; // Rounds down + + if (isnan(dend)) dend = len; + if (dend < 0) dend += len; + if (dend < 0) dend = start; + end = dend > INT_MAX ? INT_MAX : (int)dend; + if (end > len) end = len; + if (end < len) end += end < dend ? 1 : 0; // We round start down + // but round end up + + if (end < start) end = start; + assert(0 <= start && start <= end && end <= len); + *pstart = start; + *pend = end; + return jv_true(); } jv jv_get(jv t, jv k) { @@ -72,36 +85,44 @@ jv jv_get(jv t, jv k) { v = jv_null(); } } else if (jv_get_kind(t) == JV_KIND_ARRAY && jv_get_kind(k) == JV_KIND_NUMBER) { - if(jv_is_integer(k)){ - int idx = (int)jv_number_value(k); - if (idx < 0) - idx += jv_array_length(jv_copy(t)); - v = jv_array_get(t, idx); - if (!jv_is_valid(v)) { - jv_free(v); - v = jv_null(); - } - jv_free(k); - } else { + if (jvp_number_is_nan(k)) { jv_free(t); - jv_free(k); v = jv_null(); + } else { + double didx = jv_number_value(k); + if (jvp_number_is_nan(k)) { + v = jv_null(); + } else { + if (didx < INT_MIN) didx = INT_MIN; + if (didx > INT_MAX) didx = INT_MAX; + int idx = (int)jv_number_value(k); + if (idx < 0) + idx += jv_array_length(jv_copy(t)); + v = jv_array_get(t, idx); + if (!jv_is_valid(v)) { + jv_free(v); + v = jv_null(); + } + } } + jv_free(k); } else if (jv_get_kind(t) == JV_KIND_ARRAY && jv_get_kind(k) == JV_KIND_OBJECT) { int start, end; - if (parse_slice(jv_copy(t), k, &start, &end)) { + jv e = parse_slice(jv_copy(t), k, &start, &end); + if (jv_get_kind(e) == JV_KIND_TRUE) { v = jv_array_slice(t, start, end); } else { jv_free(t); - v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers")); + v = e; } } else if (jv_get_kind(t) == JV_KIND_STRING && jv_get_kind(k) == JV_KIND_OBJECT) { int start, end; - if (parse_slice(jv_copy(t), k, &start, &end)) { + jv e = parse_slice(jv_copy(t), k, &start, &end); + if (jv_get_kind(e) == JV_KIND_TRUE) { v = jv_string_slice(t, start, end); } else { - v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an string slice must be numbers")); jv_free(t); + v = e; } } else if (jv_get_kind(t) == JV_KIND_ARRAY && jv_get_kind(k) == JV_KIND_ARRAY) { v = jv_array_indexes(t, k); @@ -146,14 +167,24 @@ jv jv_set(jv t, jv k, jv v) { t = jv_object_set(t, k, v); } else if (jv_get_kind(k) == JV_KIND_NUMBER && (jv_get_kind(t) == JV_KIND_ARRAY || isnull)) { - if (isnull) t = jv_array(); - t = jv_array_set(t, (int)jv_number_value(k), v); - jv_free(k); + if (jvp_number_is_nan(k)) { + jv_free(t); + jv_free(k); + t = jv_invalid_with_msg(jv_string("Cannot set array element at NaN index")); + } else { + double didx = jv_number_value(k); + if (didx < INT_MIN) didx = INT_MIN; + if (didx > INT_MAX) didx = INT_MAX; + if (isnull) t = jv_array(); + t = jv_array_set(t, (int)didx, v); + jv_free(k); + } } else if (jv_get_kind(k) == JV_KIND_OBJECT && (jv_get_kind(t) == JV_KIND_ARRAY || isnull)) { if (isnull) t = jv_array(); int start, end; - if (parse_slice(jv_copy(t), k, &start, &end)) { + jv e = parse_slice(jv_copy(t), k, &start, &end); + if (jv_get_kind(e) == JV_KIND_TRUE) { if (jv_get_kind(v) == JV_KIND_ARRAY) { int array_len = jv_array_length(jv_copy(t)); assert(0 <= start && start <= end && end <= array_len); @@ -185,8 +216,14 @@ jv jv_set(jv t, jv k, jv v) { } else { jv_free(t); jv_free(v); - t = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers")); + t = e; } + } else if (jv_get_kind(k) == JV_KIND_OBJECT && jv_get_kind(t) == JV_KIND_STRING) { + jv_free(t); + jv_free(k); + jv_free(v); + /* Well, why not? We should implement this... */ + t = jv_invalid_with_msg(jv_string_fmt("Cannot update string slices")); } else { jv err = jv_invalid_with_msg(jv_string_fmt("Cannot update field at %s index of %s", jv_kind_name(jv_get_kind(k)), @@ -255,13 +292,14 @@ static jv jv_dels(jv t, jv keys) { } } else if (jv_get_kind(key) == JV_KIND_OBJECT) { int start, end; - if (parse_slice(jv_copy(t), key, &start, &end)) { + jv e = parse_slice(jv_copy(t), key, &start, &end); + if (jv_get_kind(e) == JV_KIND_TRUE) { starts = jv_array_append(starts, jv_number(start)); ends = jv_array_append(ends, jv_number(end)); } else { jv_free(new_array); jv_free(key); - new_array = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers")); + new_array = e; goto arr_out; } } else { diff --git a/tests/jq.test b/tests/jq.test index e35722fd49..9d0b59285b 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -2024,3 +2024,61 @@ walk(1) walk(select(IN({}, []) | not)) {"a":1,"b":[]} {"a":1} + +# #2815 +[range(10)] | .[1.2:3.5] +null +[1,2,3] + +[range(10)] | .[1.5:3.5] +null +[1,2,3] + +[range(10)] | .[1.7:3.5] +null +[1,2,3] + +[range(10)] | .[1.7:4294967295] +null +[1,2,3,4,5,6,7,8,9] + +[range(10)] | .[1.7:-4294967296] +null +[] + +[[range(10)] | .[1.1,1.5,1.7]] +null +[1,1,1] + +[range(5)] | .[1.1] = 5 +null +[0,5,2,3,4] + +[range(3)] | .[nan:1] +null +[0] + +[range(3)] | .[1:nan] +null +[1,2] + +[range(3)] | .[nan] +null +null + +try ([range(3)] | .[nan] = 9) catch . +null +"Cannot set array element at NaN index" + +try ("foobar" | .[1.5:3.5] = "xyz") catch . +null +"Cannot update string slices" + +try ([range(10)] | .[1.5:3.5] = ["xyz"]) catch . +null +[0,"xyz",4,5,6,7,8,9] + +try ("foobar" | .[1.5]) catch . +null +"Cannot index string with number" +