From 31fa0c9782d96b84768fac4141ed3d2ffa31a726 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 27 Oct 2017 12:14:27 -0700 Subject: [PATCH 1/4] libkvs/treeobj: decode_val add xtra 0 byte to data Problem: if string values are not stored in the KVS with their NULL terminators, we need to recopy them to a len + 1 size buffer before returning to the user. base64_decode_length() already includes provision for an extra NULL byte. Add some assertions to the code to ensure (and document) that this NULL byte is present, then document the behavior in the header. Add a unit test. --- src/common/libkvs/test/treeobj.c | 2 ++ src/common/libkvs/treeobj.c | 28 ++++++++++++++-------------- src/common/libkvs/treeobj.h | 2 ++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/common/libkvs/test/treeobj.c b/src/common/libkvs/test/treeobj.c index f6418da7a3d4..53480e9fd7c7 100644 --- a/src/common/libkvs/test/treeobj.c +++ b/src/common/libkvs/test/treeobj.c @@ -173,6 +173,8 @@ void test_val (void) "and returned size same as input"); ok (memcmp (buf, outbuf, outlen) == 0, "and returned data same as input"); + ok (outbuf[outlen] == '\0', + "and includes an extra null terminator"); free (outbuf); ok (treeobj_decode_val (val, (void **)&outbuf, NULL) == 0, "treeobj_decode_val works w/o len input"); diff --git a/src/common/libkvs/treeobj.c b/src/common/libkvs/treeobj.c index 10e8629da109..4f5d91ceb9ac 100644 --- a/src/common/libkvs/treeobj.c +++ b/src/common/libkvs/treeobj.c @@ -152,7 +152,7 @@ int treeobj_decode_val (json_t *obj, void **dp, int *lp) { const char *type, *xdatastr; json_t *xdata; - int len, xlen; + int buflen, len, xlen; char *data; if (treeobj_unpack (obj, &type, &xdata) < 0 @@ -162,20 +162,20 @@ int treeobj_decode_val (json_t *obj, void **dp, int *lp) } xdatastr = json_string_value (xdata); xlen = strlen (xdatastr); - len = base64_decode_length (xlen); - if (len > 1) { - if (!(data = malloc (len))) { - errno = ENOMEM; - return -1; - } - if (base64_decode_block (data, &len, xdatastr, xlen) < 0) { - free (data); - errno = EINVAL; - return -1; - } + buflen = base64_decode_length (xlen); // an *estimate*, includes space + assert (buflen > 0); // for '\0' term + if (!(data = malloc (buflen))) + return -1; + len = buflen; // len is an in/out parameter + if (base64_decode_block (data, &len, xdatastr, xlen) < 0) { + free (data); + errno = EINVAL; + return -1; } - else { - len = 0; + assert (len < buflen); // len does not account + assert (data[len] == '\0'); // for '\0' term + if (len == 0) { + free (data); data = NULL; } if (lp) diff --git a/src/common/libkvs/treeobj.h b/src/common/libkvs/treeobj.h index 5aaa425ec29f..58fbcf91178d 100644 --- a/src/common/libkvs/treeobj.h +++ b/src/common/libkvs/treeobj.h @@ -46,6 +46,8 @@ bool treeobj_is_dirref (json_t *obj); json_t *treeobj_get_data (json_t *obj); /* get decoded val data. + * If len == 0, data will be NULL. + * If len > 0, data will be followed by an extra NULL byte in memory. * Caller must free returned data. */ int treeobj_decode_val (json_t *obj, void **data, int *len); From 7f339ee77a6ef2f75680721ce95be8fa41409d35 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 27 Oct 2017 12:21:40 -0700 Subject: [PATCH 2/4] libkvs: do not store strings with \0 term Problem: the reasons no longer exist for storing string/json values in the KVS with their terminating NULL byte, and this complicates the implementation of atomic append. Skip storing the NULL byte, and change lookup_get() et al to rely on the fact that treeobj_decode_val() pads the space after the returned data with a NULL byte. Use json_loadb() instead of _loads() when decoding a JSON value. Update unit and sharness tests. Fix #1261 --- src/common/libkvs/kvs_lookup.c | 19 ++-------------- src/common/libkvs/kvs_txn.c | 4 ++-- src/common/libkvs/kvs_watch.c | 9 ++------ src/common/libkvs/test/kvs_txn.c | 14 ++---------- t/t1000-kvs.t | 39 +++----------------------------- t/t1002-kvs-extra.t | 3 +-- 6 files changed, 12 insertions(+), 76 deletions(-) diff --git a/src/common/libkvs/kvs_lookup.c b/src/common/libkvs/kvs_lookup.c index b8652b8f68b1..1bb2874cf2d8 100644 --- a/src/common/libkvs/kvs_lookup.c +++ b/src/common/libkvs/kvs_lookup.c @@ -165,14 +165,6 @@ flux_future_t *flux_kvs_lookupat (flux_t *h, int flags, const char *key, return f; } -static bool value_is_string (const char *s, int len) -{ - if (len < 1 || s[len - 1] != '\0') - return false; - else - return true; -} - static int decode_treeobj (flux_future_t *f, json_t **treeobj) { json_t *obj; @@ -204,10 +196,7 @@ int flux_kvs_lookup_get (flux_future_t *f, const char **value) &ctx->val_len) < 0) return -1; ctx->val_valid = true; - } - if (ctx->val_data && !value_is_string (ctx->val_data, ctx->val_len)) { - errno = EINVAL; - return -1; + // N.B. val_data includes xtra 0 byte term not reflected in val_len } if (value) *value = ctx->val_data; @@ -256,11 +245,7 @@ int flux_kvs_lookup_get_unpack (flux_future_t *f, const char *fmt, ...) ctx->val_valid = true; } if (!ctx->val_obj) { - if (!value_is_string (ctx->val_data, ctx->val_len)) { - errno = EINVAL; - return -1; - } - if (!(ctx->val_obj = json_loads (ctx->val_data, + if (!(ctx->val_obj = json_loadb (ctx->val_data, ctx->val_len, JSON_DECODE_ANY, NULL))) { errno = EINVAL; return -1; diff --git a/src/common/libkvs/kvs_txn.c b/src/common/libkvs/kvs_txn.c index 219f4c225937..6b96ee3dbfa4 100644 --- a/src/common/libkvs/kvs_txn.c +++ b/src/common/libkvs/kvs_txn.c @@ -176,7 +176,7 @@ int flux_kvs_txn_put (flux_kvs_txn_t *txn, int flags, } if (validate_flags (flags, 0) < 0) goto error; - if (!(dirent = treeobj_create_val (value, value ? strlen (value) + 1 : 0))) + if (!(dirent = treeobj_create_val (value, value ? strlen (value) : 0))) goto error; if (append_op_to_txn (txn, flags, key, dirent) < 0) goto error; @@ -214,7 +214,7 @@ int flux_kvs_txn_vpack (flux_kvs_txn_t *txn, int flags, goto error; } json_decref (val); - if (!(dirent = treeobj_create_val (s, strlen (s) + 1))) { + if (!(dirent = treeobj_create_val (s, strlen (s)))) { free (s); goto error; } diff --git a/src/common/libkvs/kvs_watch.c b/src/common/libkvs/kvs_watch.c index e3fb6e6db8aa..5ad32f6de98a 100644 --- a/src/common/libkvs/kvs_watch.c +++ b/src/common/libkvs/kvs_watch.c @@ -259,7 +259,7 @@ static flux_future_t *kvs_watch_rpc (flux_t *h, const char *key, /* val will be one of three things: * 1) JSON_NULL, set json_str to string-encoded object (NULL) * 2) RFC 11 dir object, set json_str to string-encoded object - * 3) RFC 11 val object, unbase64, verify NULL termination, set json_str + * 3) RFC 11 val object, unbase64, set json_str * The caller must free returned json_str (if any) */ static int decode_val_object (json_t *val, char **json_str) @@ -285,11 +285,6 @@ static int decode_val_object (json_t *val, char **json_str) goto error; if (treeobj_decode_val (val, (void **)&s, &len) < 0) goto error; - if (s[len - 1] != '\0') { - free (s); - errno = EPROTO; - goto error; - } } else { errno = EPROTO; @@ -348,7 +343,7 @@ int flux_kvs_watch_once (flux_t *h, const char *key, char **valp) } val_in = *valp; if (val_in) { - if (!(xval_obj = treeobj_create_val (val_in, strlen (val_in) + 1))) + if (!(xval_obj = treeobj_create_val (val_in, strlen (val_in)))) goto done; if (!(xval_str = treeobj_encode (xval_obj))) goto done; diff --git a/src/common/libkvs/test/kvs_txn.c b/src/common/libkvs/test/kvs_txn.c index a6c535bf11c6..7d753a37451f 100644 --- a/src/common/libkvs/test/kvs_txn.c +++ b/src/common/libkvs/test/kvs_txn.c @@ -51,12 +51,7 @@ int check_int_value (json_t *dirent, int expected) diag ("%s: initial base64 decode failed", __FUNCTION__); return -1; } - if (len == 0 || data[len - 1] != '\0') { - diag ("%s: data not null terminated", __FUNCTION__); - free (data); - return -1; - } - if (!(val = json_loads (data, JSON_DECODE_ANY, NULL))) { + if (!(val = json_loadb (data, len, JSON_DECODE_ANY, NULL))) { diag ("%s: couldn't decode JSON", __FUNCTION__); free (data); return -1; @@ -90,12 +85,7 @@ int check_string_value (json_t *dirent, const char *expected) diag ("%s: initial base64 decode failed", __FUNCTION__); return -1; } - if (len == 0 || data[len - 1] != '\0') { - diag ("%s: data not null terminated", __FUNCTION__); - free (data); - return -1; - } - if (!(val = json_loads (data, JSON_DECODE_ANY, NULL))) { + if (!(val = json_loadb (data, len, JSON_DECODE_ANY, NULL))) { diag ("%s: couldn't decode JSON", __FUNCTION__); free (data); return -1; diff --git a/t/t1000-kvs.t b/t/t1000-kvs.t index cd7bafed28cc..f571dfbcd9a3 100755 --- a/t/t1000-kvs.t +++ b/t/t1000-kvs.t @@ -420,11 +420,11 @@ test_expect_success 'kvs: zero-length value NOT handled by get --json' ' flux kvs put --raw $DIR.a= && test_must_fail flux kvs get --json $DIR.a ' -test_expect_success 'kvs: zero-length value NOT made by put with no options' ' +test_expect_success 'kvs: zero-length value is made by put with no options' ' flux kvs unlink -Rf $DIR && flux kvs put $DIR.a= && - flux kvs get --raw $DIR.a >onenull.output && - test_must_fail diff -q /dev/null onenull.output + flux kvs get --raw $DIR.a >empty3.output && + test_cmp /dev/null empty3.output ' test_expect_success 'kvs: zero-length value does not cause dir to fail' ' flux kvs unlink -Rf $DIR && @@ -437,39 +437,6 @@ test_expect_success 'kvs: zero-length value does not cause ls -FR to fail' ' flux kvs ls -FR $DIR ' -# -# empty string values -# -test_expect_success 'kvs: empty string value made by put with no options' ' - flux kvs unlink -Rf $DIR && - flux kvs put $DIR.a= && - dd if=/dev/zero count=1 bs=1 of=empty.expected && - flux kvs get --raw $DIR.a >empty.actual && - test_cmp empty.expected empty.actual -' -test_expect_success 'kvs: empty string value can be retrieved by get' ' - flux kvs unlink -Rf $DIR && - flux kvs put $DIR.a= && - echo >empty2.expected && - flux kvs get $DIR.a >empty2.actual && - test_cmp empty2.expected empty2.actual -' -test_expect_success 'kvs: empty string value NOT handled by get --json' ' - flux kvs unlink -Rf $DIR && - flux kvs put $DIR.a= && - test_must_fail flux kvs get --json $DIR.a -' -test_expect_success 'kvs: empty string value does not cause dir to fail' ' - flux kvs unlink -Rf $DIR && - flux kvs put $DIR.a= && - flux kvs dir $DIR | grep -q "a =" -' -test_expect_success 'kvs: empty string value does not cause ls -FR to fail' ' - flux kvs unlink -Rf $DIR && - flux kvs put $DIR.a= && - flux kvs ls -FR $DIR | grep -q "a" -' - # # raw value tests # diff --git a/t/t1002-kvs-extra.t b/t/t1002-kvs-extra.t index 57dd8e2b0a02..8ce1a84dc636 100755 --- a/t/t1002-kvs-extra.t +++ b/t/t1002-kvs-extra.t @@ -272,9 +272,8 @@ test_expect_success 'kvs: kvsdir_get_size works' ' # kvs reads/writes of raw data to/from content store work -# largevalhash includes string quotes and NUL char at end largeval="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" -largevalhash="sha1-79da8e5c9dbe65c6460377d3f09b8f535ceb7d9d" +largevalhash="sha1-0b22e9fecf9c832032fe976e67058df0322dcc5c" test_expect_success 'kvs: large put stores raw data into content store' ' flux kvs unlink -Rf $TEST && From 7b8b6f6a9d3731ba003f77fc2bd96a4d33e1dd79 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 27 Oct 2017 13:46:25 -0700 Subject: [PATCH 3/4] doc/flux_kvs_lookup(3): strings not \0 terminated Update lookup_get_*() descriptions to reflect the fact that string terminators are no longer stored in the KVS. --- doc/man3/flux_kvs_lookup.adoc | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/man3/flux_kvs_lookup.adoc b/doc/man3/flux_kvs_lookup.adoc index dd8bee9817b5..dee2add8374c 100644 --- a/doc/man3/flux_kvs_lookup.adoc +++ b/doc/man3/flux_kvs_lookup.adoc @@ -55,18 +55,17 @@ than once on the same future, and they may be intermixed to probe a result or interpret it in different ways. Results remain valid until `flux_future_destroy()` is called. -`flux_kvs_lookup_get()` interprets the result as a NULL-terminated string -value. The value is assigned to _value_. If the value is empty, NULL -is assigned. - -`flux_kvs_lookup_get_unpack()` interprets the result as a NULL-terminated -string value, then as encoded JSON (not necessarily enclosed in an object). -The value is parsed according to variable arguments in Jansson `json_unpack()` -format. - -`flux_kvs_lookup_get_raw()` interprets the result as a raw, opaque value, -which it assigns to _buf_ and its length to _len_. The value may be any -byte sequence. If the value has zero length, NULL is assigned to _buf_. +`flux_kvs_lookup_get()` interprets the result as a value. If the value +has length greater than zero, a NULL is appended and it is assigned +to _value_, otherwise NULL is assigned to _value_. + +`flux_kvs_lookup_get_unpack()` interprets the result as a value, which +it decodes as JSON according to variable arguments in Jansson +`json_unpack()` format. + +`flux_kvs_lookup_get_raw()` interprets the result as a value. If the value +has length greater than zero, the value and its length are assigned to +_buf_ and _len_, respectively. Otherwise NULL and zero are assigned. `flux_kvs_lookup_get_dir()` interprets the result as a directory, e.g. in response to a lookup with the FLUX_KVS_READDIR flag set. From 91e373803a64f78aa49dc82f32adec2b7477294d Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 27 Oct 2017 14:24:17 -0700 Subject: [PATCH 4/4] doc/flux-kvs(1): strings not \0 terminated --- doc/man1/flux-kvs.adoc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/doc/man1/flux-kvs.adoc b/doc/man1/flux-kvs.adoc index 1b30431f71a0..ffb4e9877124 100644 --- a/doc/man1/flux-kvs.adoc +++ b/doc/man1/flux-kvs.adoc @@ -44,18 +44,17 @@ COMMANDS -------- *get* [-j|-r|-t] [-a treeobj] 'key' ['key...']:: Retrieve the value stored under 'key'. If nothing has been stored under -'key', display an error message. If no options, value is interpreted -as a NULL-terminated string. If '-j', it is interpreted as encoded JSON. -If '-r', it is interpreted as raw data and is output without formatting. -If '-t', the RFC 11 object is displayed. '-a treeobj' causes the lookup -to be relative to an RFC 11 snapshot reference. +'key', display an error message. If no options, value is displayed with +a newline appended (if value length is nonzero). If '-j', value is +interpreted as encoded JSON and formatted accordingly. If '-r', value +is displayed without a newline. If '-t', the RFC 11 object is displayed. +'-a treeobj' causes the lookup to be relative to an RFC 11 snapshot reference. *put* [-j|-r|-t] [-n] 'key=value' ['key=value...']:: Store 'value' under 'key' and commit it. If it already has a value, -overwrite it. If no options, value is stored as a NULL-terminated string. -If '-j', it is first encoded as JSON, then stored as a NULL-terminated string. -If '-r', it is stored as raw data with no termination. For raw mode only, -a value of "-" indicates that the value should be read from standard input. +overwrite it. If no options, value is stored directly. If '-j', it is +first encoded as JSON, then stored. If '-r', the value may be read from +standard input if specified as "-", and may include embedded NULL bytes. If '-t', value is stored as a RFC 11 object. '-n' prevents the commit from being merged with with other contemporaneous commits.