Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of non-integer numeric indices (fix #2815) #2821

Merged
merged 1 commit into from
Aug 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 87 additions & 49 deletions src/jv_aux.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "jv_alloc.h"
#include "jv_private.h"

Expand All @@ -13,7 +15,7 @@ static double jv_number_get_value_and_consume(jv number) {
return value;
}

static int parse_slice(jv j, jv slice, int* pstart, int* pend) {
static jv parse_slice(jv j, jv slice, int* pstart, int* pend) {
// Array slices
jv start_jv = jv_object_get(jv_copy(slice), jv_string("start"));
jv end_jv = jv_object_get(slice, jv_string("end"));
Expand All @@ -27,8 +29,14 @@ static int parse_slice(jv j, jv slice, int* pstart, int* pend) {
} else if (jv_get_kind(j) == JV_KIND_STRING) {
len = jv_string_length_codepoints(j);
} else {
/*
* XXX This should be dead code because callers shouldn't call this
* function if `j' is neither an array nor a string.
*/
jv_free(j);
return 0;
jv_free(start_jv);
jv_free(end_jv);
return jv_invalid_with_msg(jv_string("Only arrays and strings can be sliced"));
}
if (jv_get_kind(end_jv) == JV_KIND_NULL) {
jv_free(end_jv);
Expand All @@ -38,29 +46,34 @@ static int parse_slice(jv j, jv slice, int* pstart, int* pend) {
jv_get_kind(end_jv) != JV_KIND_NUMBER) {
jv_free(start_jv);
jv_free(end_jv);
return 0;
} else {
double dstart = jv_number_value(start_jv);
double dend = jv_number_value(end_jv);
jv_free(start_jv);
jv_free(end_jv);
if (dstart < 0) dstart += len;
if (dend < 0) dend += len;
if (dstart < 0) dstart = 0;
if (dstart > len) dstart = len;

int start = (int)dstart;
int end = (dend > len) ? len : (int)dend;
// Ends are exclusive but e.g. 1 < 1.5 so :1.5 should be :2 not :1
if(end < dend) end += 1;

if (end > len) end = len;
if (end < start) end = start;
assert(0 <= start && start <= end && end <= len);
*pstart = start;
*pend = end;
return 1;
}
return jv_invalid_with_msg(jv_string("Array/string slice indices must be integers"));
}

double dstart = jv_number_value(start_jv);
double dend = jv_number_value(end_jv);
int start, end;

jv_free(start_jv);
jv_free(end_jv);
if (isnan(dstart)) dstart = 0;
if (dstart < 0) dstart += len;
if (dstart < 0) dstart = 0;
if (dstart > len) dstart = len;
start = dstart > INT_MAX ? INT_MAX : (int)dstart; // Rounds down

if (isnan(dend)) dend = len;
if (dend < 0) dend += len;
if (dend < 0) dend = start;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The operator looks misaligned.

end = dend > INT_MAX ? INT_MAX : (int)dend;
itchyny marked this conversation as resolved.
Show resolved Hide resolved
if (end > len) end = len;
if (end < len) end += end < dend ? 1 : 0; // We round start down
// but round end up

if (end < start) end = start;
assert(0 <= start && start <= end && end <= len);
*pstart = start;
*pend = end;
return jv_true();
}

jv jv_get(jv t, jv k) {
Expand All @@ -72,36 +85,44 @@ jv jv_get(jv t, jv k) {
v = jv_null();
}
} else if (jv_get_kind(t) == JV_KIND_ARRAY && jv_get_kind(k) == JV_KIND_NUMBER) {
if(jv_is_integer(k)){
int idx = (int)jv_number_value(k);
if (idx < 0)
idx += jv_array_length(jv_copy(t));
v = jv_array_get(t, idx);
if (!jv_is_valid(v)) {
jv_free(v);
v = jv_null();
}
jv_free(k);
} else {
if (jvp_number_is_nan(k)) {
jv_free(t);
jv_free(k);
v = jv_null();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously .[nan] emits null due to the jv_is_integer checking but now we rely on undefined behavior (on my M1 macOS, [1] | .[nan] emits 1 in this branch, previously null). We should check isnan first.

} else {
double didx = jv_number_value(k);
if (jvp_number_is_nan(k)) {
v = jv_null();
} else {
if (didx < INT_MIN) didx = INT_MIN;
if (didx > INT_MAX) didx = INT_MAX;
int idx = (int)jv_number_value(k);
if (idx < 0)
idx += jv_array_length(jv_copy(t));
v = jv_array_get(t, idx);
if (!jv_is_valid(v)) {
jv_free(v);
v = jv_null();
}
}
}
jv_free(k);
} else if (jv_get_kind(t) == JV_KIND_ARRAY && jv_get_kind(k) == JV_KIND_OBJECT) {
int start, end;
if (parse_slice(jv_copy(t), k, &start, &end)) {
jv e = parse_slice(jv_copy(t), k, &start, &end);
if (jv_get_kind(e) == JV_KIND_TRUE) {
v = jv_array_slice(t, start, end);
} else {
jv_free(t);
v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers"));
v = e;
}
} else if (jv_get_kind(t) == JV_KIND_STRING && jv_get_kind(k) == JV_KIND_OBJECT) {
int start, end;
if (parse_slice(jv_copy(t), k, &start, &end)) {
jv e = parse_slice(jv_copy(t), k, &start, &end);
if (jv_get_kind(e) == JV_KIND_TRUE) {
v = jv_string_slice(t, start, end);
} else {
v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an string slice must be numbers"));
jv_free(t);
v = e;
}
} else if (jv_get_kind(t) == JV_KIND_ARRAY && jv_get_kind(k) == JV_KIND_ARRAY) {
v = jv_array_indexes(t, k);
Expand Down Expand Up @@ -146,14 +167,24 @@ jv jv_set(jv t, jv k, jv v) {
t = jv_object_set(t, k, v);
} else if (jv_get_kind(k) == JV_KIND_NUMBER &&
(jv_get_kind(t) == JV_KIND_ARRAY || isnull)) {
if (isnull) t = jv_array();
t = jv_array_set(t, (int)jv_number_value(k), v);
jv_free(k);
if (jvp_number_is_nan(k)) {
jv_free(t);
jv_free(k);
t = jv_invalid_with_msg(jv_string("Cannot set array element at NaN index"));
} else {
double didx = jv_number_value(k);
if (didx < INT_MIN) didx = INT_MIN;
if (didx > INT_MAX) didx = INT_MAX;
if (isnull) t = jv_array();
t = jv_array_set(t, (int)didx, v);
jv_free(k);
}
} else if (jv_get_kind(k) == JV_KIND_OBJECT &&
(jv_get_kind(t) == JV_KIND_ARRAY || isnull)) {
if (isnull) t = jv_array();
int start, end;
if (parse_slice(jv_copy(t), k, &start, &end)) {
jv e = parse_slice(jv_copy(t), k, &start, &end);
if (jv_get_kind(e) == JV_KIND_TRUE) {
if (jv_get_kind(v) == JV_KIND_ARRAY) {
int array_len = jv_array_length(jv_copy(t));
assert(0 <= start && start <= end && end <= array_len);
Expand Down Expand Up @@ -185,8 +216,14 @@ jv jv_set(jv t, jv k, jv v) {
} else {
jv_free(t);
jv_free(v);
t = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers"));
t = e;
}
} else if (jv_get_kind(k) == JV_KIND_OBJECT && jv_get_kind(t) == JV_KIND_STRING) {
jv_free(t);
jv_free(k);
jv_free(v);
/* Well, why not? We should implement this... */
t = jv_invalid_with_msg(jv_string_fmt("Cannot update string slices"));
} else {
jv err = jv_invalid_with_msg(jv_string_fmt("Cannot update field at %s index of %s",
jv_kind_name(jv_get_kind(k)),
Expand Down Expand Up @@ -255,13 +292,14 @@ static jv jv_dels(jv t, jv keys) {
}
} else if (jv_get_kind(key) == JV_KIND_OBJECT) {
int start, end;
if (parse_slice(jv_copy(t), key, &start, &end)) {
jv e = parse_slice(jv_copy(t), key, &start, &end);
if (jv_get_kind(e) == JV_KIND_TRUE) {
starts = jv_array_append(starts, jv_number(start));
ends = jv_array_append(ends, jv_number(end));
} else {
jv_free(new_array);
jv_free(key);
new_array = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers"));
new_array = e;
goto arr_out;
}
} else {
Expand Down
58 changes: 58 additions & 0 deletions tests/jq.test
Original file line number Diff line number Diff line change
Expand Up @@ -2024,3 +2024,61 @@ walk(1)
walk(select(IN({}, []) | not))
{"a":1,"b":[]}
{"a":1}

# #2815
[range(10)] | .[1.2:3.5]
null
[1,2,3]

[range(10)] | .[1.5:3.5]
null
[1,2,3]

[range(10)] | .[1.7:3.5]
null
[1,2,3]

[range(10)] | .[1.7:4294967295]
null
[1,2,3,4,5,6,7,8,9]

[range(10)] | .[1.7:-4294967296]
null
[]

[[range(10)] | .[1.1,1.5,1.7]]
null
[1,1,1]

[range(5)] | .[1.1] = 5
null
[0,5,2,3,4]

[range(3)] | .[nan:1]
null
[0]

[range(3)] | .[1:nan]
null
[1,2]

[range(3)] | .[nan]
null
null

try ([range(3)] | .[nan] = 9) catch .
null
"Cannot set array element at NaN index"

try ("foobar" | .[1.5:3.5] = "xyz") catch .
null
"Cannot update string slices"

try ([range(10)] | .[1.5:3.5] = ["xyz"]) catch .
null
[0,"xyz",4,5,6,7,8,9]

try ("foobar" | .[1.5]) catch .
null
"Cannot index string with number"