From 32665df175332ab236824d74b113ca267db51963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20R=C3=B8nningstad?= Date: Thu, 25 Apr 2024 11:58:22 +0000 Subject: [PATCH] zcbor_encode: Add new fragmented string encoding API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit to match the redesigned decoding API. Signed-off-by: Øyvind Rønningstad --- MIGRATION_GUIDE.md | 10 ++ README.md | 25 ++++ include/zcbor_encode.h | 40 ++++++ src/zcbor_encode.c | 117 +++++++++++++++- tests/unit/test1_unit_tests/src/main.c | 185 +++++++++++++++++++++++++ 5 files changed, 375 insertions(+), 2 deletions(-) diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md index a2ec19a7..0d06fffb 100644 --- a/MIGRATION_GUIDE.md +++ b/MIGRATION_GUIDE.md @@ -13,6 +13,16 @@ * A fix was made to the naming of bstr elements with a .size specifier, which might mean that these elements change name in your code when you regenerate. +* The fragmented payload API has been completely redesigned to accomodate adding the encoding counterpart. + The docs have been updated and there's a new section in the README to explain the functionality. + + * You must now define ZCBOR_FRAGMENTS to access the API + * `zcbor_*str_decode_fragment()` has been renamed to `zcbor_*str_fragments_start_decode()` + * After calling `zcbor_*str_fragments_start_decode()`, you must now retrieve the first fragment manually with `zcbor_str_fragment_decode()`, instead of via an argument. + * `zcbor_next_fragment()` and `zcbor_bstr_next_fragment()` have merged and is now called `zcbor_str_fragment_decode()`. + It does not take a `prev_fragment` argument, instead, this state is kept internally in the state struct. + * `zcbor_bstr_start_decode_fragment()` has been renamed to `zcbor_cbor_bstr_fragments_start_decode()` and does not return a fragment. + To retrieve fragments when decoding a CBOR-encoded bstr, use `zcbor_str_fragment_decode()` # zcbor v. 0.8.0 diff --git a/README.md b/README.md index 212c2d19..f548ce21 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,31 @@ ZCBOR_STATE_D(decode_state, n, payload, payload_len, elem_count, n_flags); ZCBOR_STATE_E(encode_state, n, payload, payload_len, 0); ``` +Fragmented payloads +------------------- + +zcbor can encode and decode payloads in sections, i.e. the payload can be split into separate buffers/arrays. +This can be useful e.g. if you send or receive your payload in multiple packets. +When the current payload section is done, call `zcbor_update_state()` to introduce the next section. +Note that zcbor does not allow section boundaries to split a zcbor header/value pair. +This means that the following elements cannot be split between sections: + +- Numbers and simple values (integers, floats, bools, undefined, nil) +- Tags +- Headers of lists, maps, tstrs, and bstrs + +If your payload is split in an unsupported way, you can get around it by making a small section out of the remaining bytes of one section spliced with the start of the next. +Another option is to leave a little room at the start of each section buffer, and copy the remaining end of one section into the start of the next buffer. +8 bytes should be enough for this. + +Lists and maps can span multiple sections, as long as the individual elements are not split as to break the above rule. + +String payloads can be split across multiple payload sections, if `ZCBOR_FRAGMENTS` is enabled, and the `*str_fragments_*()` APIs are used. Note that in the zcbor docs, the term "string fragment" is used for fragmented strings, while the term "payload section" is used for fragmented CBOR payloads, as passed to `zcbor_update_state()`. These do not always line up perfectly, particularly at the start and end of fragmented strings. + +CBOR-encoded bstrs can be nested, and there can also be a non-CBOR-encoded innermost string. +The current innermost string (CBOR-encoded or otherwise) is called the "current string". +`zcbor_update_state()` modifies all backups so that outer nested CBOR-encoded strings have updated information about the new section. + Configuration ------------- diff --git a/include/zcbor_encode.h b/include/zcbor_encode.h index 9bd9383c..326c47ba 100644 --- a/include/zcbor_encode.h +++ b/include/zcbor_encode.h @@ -231,6 +231,46 @@ bool zcbor_bstr_start_encode(zcbor_state_t *state); */ bool zcbor_bstr_end_encode(zcbor_state_t *state, struct zcbor_string *result); + +#ifdef ZCBOR_FRAGMENTS + +/** Start encoding a fragmented string. I.e. a string spread over non-consecutive payload sections. + * + * After calling this, you can write a fragment with @ref zcbor_str_fragment_encode, + * then update the payload with @ref zcbor_update_state. + * Repeat until the string is fully decoded, then call @ref zcbor_bstr_fragments_end_encode. + */ +bool zcbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t total_len); +bool zcbor_tstr_fragments_start_encode(zcbor_state_t *state, size_t total_len); + +/** Start encoding a fragmented CBOR-encoded bytestring. + * + * I.e. a string spread over non-consecutive payload sections. + * + * This is an alternative to zcbor_*str_fragments_start_encode() to be used if the payload + * contains CBOR data that will be encoded directly with other zcbor_*() functions. + * + * A state backup is created to keep track of the element count and original payload_end. + * After calling this, you can encode elements using other zcbor functions, + * then update the payload with @ref zcbor_update_state. + * Repeat until the string is fully decoded, then call @ref zcbor_bstr_fragments_end_encode. + * When the current payload section contains the end of the string, + * payload_end is set to the end of the string, so there is no risk of encoding past the end. + */ +bool zcbor_cbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t total_len); + +/** Retrieve a string fragment. + * + * Consume bytes from the payload until either the end of the payload or the end of the string. + * Do not use this function with @ref zcbor_cbor_bstr_fragments_start_encode. + */ +bool zcbor_str_fragment_encode(zcbor_state_t *state, struct zcbor_string *fragment, size_t *enc_len); + +/** Finish encoding a fragmented string. */ +bool zcbor_str_fragments_end_encode(zcbor_state_t *state); + +#endif /* ZCBOR_FRAGMENTS */ + #ifdef __cplusplus } #endif diff --git a/src/zcbor_encode.c b/src/zcbor_encode.c index 18207f56..f8187367 100644 --- a/src/zcbor_encode.c +++ b/src/zcbor_encode.c @@ -48,6 +48,10 @@ static bool encode_header_byte(zcbor_state_t *state, zcbor_assert_state(additional < 32, "Unsupported additional value: %d\r\n", additional); +#ifdef ZCBOR_FRAGMENTS + ZCBOR_ERR_IF(state->inside_frag_str, ZCBOR_ERR_INSIDE_STRING); +#endif + *(state->payload_mut) = (uint8_t)((major_type << 5) | (additional & 0x1F)); zcbor_trace(state, "value_encode"); state->payload_mut++; @@ -234,6 +238,7 @@ bool zcbor_bstr_start_encode(zcbor_state_t *state) /* Encode a dummy header */ if (!value_encode(state, ZCBOR_MAJOR_TYPE_BSTR, &max_len, sizeof(max_len))) { + zcbor_process_backup(state, ZCBOR_FLAG_CONSUME, 0xFFFFFFFF); ZCBOR_FAIL(); } return true; @@ -259,7 +264,7 @@ bool zcbor_bstr_end_encode(zcbor_state_t *state, struct zcbor_string *result) result->value = state->payload + zcbor_header_len(zcbor_remaining_str_len(state)); result->len = (size_t)payload - (size_t)result->value; - /* Reencode header of list now that we know the number of elements. */ + /* Reencode header of list now that we know the length. */ if (!zcbor_bstr_encode(state, result)) { ZCBOR_FAIL(); } @@ -279,7 +284,7 @@ static bool str_encode(zcbor_state_t *state, } if (state->payload_mut != input->value) { /* Use memmove since string might be encoded into the same space - * because of bstrx_cbor_start_encode/bstrx_cbor_end_encode. */ + * because of zcbor_bstr_start_encode/zcbor_bstr_end_encode. */ memmove(state->payload_mut, input->value, input->len); } state->payload += input->len; @@ -327,6 +332,106 @@ bool zcbor_tstr_put_term(zcbor_state_t *state, char const *str, size_t maxlen) } +#ifdef ZCBOR_FRAGMENTS + +static bool start_encode_fragments(zcbor_state_t *state, + zcbor_major_type_t major_type, size_t len, bool cbor_bstr) +{ + ZCBOR_CHECK_PAYLOAD(); + + if (state->inside_cbor_bstr) { + if ((state->str_total_len_cbor - zcbor_current_string_offset(state) - zcbor_header_len(len)) < len) { + ZCBOR_ERR(ZCBOR_ERR_INNER_STRING_TOO_LARGE); + } + } + + if (cbor_bstr) { + if (!zcbor_new_backup(state, 0)) { + ZCBOR_FAIL(); + } + } + + if (!value_encode(state, major_type, &len, sizeof(len))) { + if (cbor_bstr) { + zcbor_process_backup(state, ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_RESTORE, 0xFFFFFFFF); + } + ZCBOR_FAIL(); + } + + ptrdiff_t new_offset = state->constant_state->curr_payload_section - state->payload; + + if (cbor_bstr) { + state->frag_offset_cbor = new_offset; + state->str_total_len_cbor = len; + state->inside_cbor_bstr = true; + } else { + state->frag_offset = new_offset; + state->str_total_len = len; + state->inside_frag_str = true; + } + + return true; +} + + +bool zcbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t len) +{ + return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_BSTR, len, false); +} + + +bool zcbor_tstr_fragments_start_encode(zcbor_state_t *state, size_t len) +{ + return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_TSTR, len, false); +} + + +bool zcbor_cbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t len) +{ + return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_BSTR, len, true); +} + + +bool zcbor_str_fragment_encode(zcbor_state_t *state, struct zcbor_string *fragment, size_t *enc_len) +{ + ZCBOR_CHECK_PAYLOAD(); + + ZCBOR_ERR_IF(!state->inside_frag_str, ZCBOR_ERR_NOT_IN_FRAGMENT); + + size_t len = MIN(MIN((size_t)state->payload_end - (size_t)state->payload, fragment->len), + state->str_total_len - zcbor_current_string_offset(state)); + + memcpy(state->payload_mut, fragment->value, len); + state->payload += len; + + if (enc_len != NULL) { + *enc_len = len; + } + + return true; +} + + +bool zcbor_str_fragments_end_encode(zcbor_state_t *state) +{ + ZCBOR_ERR_IF(!state->inside_frag_str && !state->inside_cbor_bstr, ZCBOR_ERR_NOT_IN_FRAGMENT); + ZCBOR_ERR_IF(zcbor_current_string_remainder(state) != 0, ZCBOR_ERR_NOT_AT_END); + + if (state->inside_frag_str) { + state->inside_frag_str = false; + } else { + if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_KEEP_PAYLOAD, 0xFFFFFFFF)) { + ZCBOR_FAIL(); + } + state->elem_count++; + } + + return true; +} + +#endif /* ZCBOR_FRAGMENTS */ + + static bool list_map_start_encode(zcbor_state_t *state, size_t max_num, zcbor_major_type_t major_type) { @@ -376,6 +481,14 @@ static bool list_map_end_encode(zcbor_state_t *state, size_t max_num, size_t max_header_len = zcbor_header_len_ptr(&max_num, 4) - 1; size_t header_len = zcbor_header_len_ptr(&list_count, 4) - 1; + if (max_num == list_count) { + if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_KEEP_PAYLOAD, 0xFFFFFFFF)) { + ZCBOR_FAIL(); + } + state->elem_count++; + return true; + } + if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME, 0xFFFFFFFF)) { ZCBOR_FAIL(); } diff --git a/tests/unit/test1_unit_tests/src/main.c b/tests/unit/test1_unit_tests/src/main.c index 61e8a3ed..3027e1c9 100644 --- a/tests/unit/test1_unit_tests/src/main.c +++ b/tests/unit/test1_unit_tests/src/main.c @@ -641,6 +641,191 @@ ZTEST(zcbor_unit_tests, test_bstr_cbor_fragments) zassert_mem_equal(output.value, &payload[4], 11, NULL); } +#define zassert_error(err, state) zassert_equal(err, zcbor_peek_error(state), #err " != %s\n", zcbor_error_str(zcbor_peek_error(state))) + + +ZTEST(zcbor_unit_tests, test_nested_fragments) +{ + uint8_t lorem[] = "Lorem ipsum dolor sit amet"; + struct zcbor_string lorem_str = {.value = lorem, .len = sizeof(lorem) - 1}; + struct zcbor_string lorem_str_exp = {.value = lorem, .len = sizeof(lorem) - 1}; + struct zcbor_string_fragment output_frags[3]; + struct zcbor_string_fragment output_frags_bstr; + uint8_t output_string[30]; + size_t output_str_len = sizeof(output_string); + struct zcbor_string res_str; + size_t enc_len; + uint8_t payload_frag1[4]; + int dummy_sep1; // To separate payload fragments + uint8_t payload_frag2[18]; + int dummy_sep2; // To separate payload fragments + uint8_t payload_frag3[10]; + int dummy_sep3; // To separate payload fragments + uint8_t payload_frag4[25]; + + uint8_t payload1[100]; + + (void)dummy_sep1; + (void)dummy_sep2; + (void)dummy_sep3; + + ZCBOR_STATE_E(state_e, 4, payload_frag1, sizeof(payload_frag1), 0); + + ZCBOR_STATE_D(state_d, 4, payload_frag1, sizeof(payload_frag1) - 1, 1, 0); + ZCBOR_STATE_D(state_d2, 4, payload1, sizeof(payload1), 1, 0); + + /* Start encode tests, negative tests are indented. */ + + /* payload_frag1 */ + zassert_true(zcbor_list_start_encode(state_e, 2)); + zassert_false(zcbor_str_fragments_end_encode(state_e)); + zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_e); + zassert_false(zcbor_str_fragment_decode(state_d, &output_frags_bstr)); + zassert_equal(ZCBOR_ERR_NOT_IN_FRAGMENT, zcbor_peek_error(state_d)); + zassert_true(zcbor_uint32_put(state_e, 42)); + zassert_false(zcbor_cbor_bstr_fragments_start_encode(state_e, 38)); + zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_e); + zcbor_update_state(state_e, payload_frag2, sizeof(payload_frag2)); /* Abandon 1 byte of the fragment. */ + +#ifdef ZCBOR_CANONICAL + #define LEN_OFFS 0 +#else + #define LEN_OFFS 1 +#endif + + /* payload_frag2 */ + zassert_true(zcbor_cbor_bstr_fragments_start_encode(state_e, 37 + LEN_OFFS)); + zassert_true(zcbor_uint32_put(state_e, 43)); + zassert_true(zcbor_list_start_encode(state_e, 2)); + zassert_true(zcbor_uint32_put(state_e, 44)); + zassert_false(zcbor_cbor_bstr_fragments_start_encode(state_e, lorem_str.len + 5 + LEN_OFFS)); + zassert_error(ZCBOR_ERR_INNER_STRING_TOO_LARGE, state_e); + zassert_true(zcbor_cbor_bstr_fragments_start_encode(state_e, lorem_str.len + 4)); + zassert_false(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len)); + zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_e); + zassert_true(zcbor_uint32_put(state_e, 45)); + zassert_false(zcbor_tstr_fragments_start_encode(state_e, lorem_str.len + 1)); + zassert_error(ZCBOR_ERR_INNER_STRING_TOO_LARGE, state_e); + bool ret = zcbor_tstr_fragments_start_encode(state_e, lorem_str.len); + zassert_true(ret, "err %s\n", zcbor_error_str(zcbor_peek_error(state_e))); + zassert_false(zcbor_uint32_put(state_e, 46)); + zassert_error(ZCBOR_ERR_INSIDE_STRING, state_e); + zassert_false(zcbor_tstr_fragments_start_encode(state_e, 1)); + zassert_error(ZCBOR_ERR_INSIDE_STRING, state_e); + zassert_true(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len)); + zassert_equal(sizeof(payload_frag2) - 13, enc_len); + zassert_false(zcbor_str_fragment_encode(state_e, &lorem_str, NULL)); + zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_e); + zcbor_update_state(state_e, payload_frag3, sizeof(payload_frag3)); + + /* payload_frag3 */ + lorem_str.value += enc_len; + lorem_str.len -= enc_len; + zassert_true(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len)); + zassert_equal(sizeof(payload_frag3), enc_len); + zcbor_update_state(state_e, payload_frag4, sizeof(payload_frag4)); + + /* payload_frag4 */ + lorem_str.value += enc_len; + lorem_str.len -= enc_len; + zassert_true(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len)); + zassert_equal(lorem_str.len, enc_len, "%d != %d\n", lorem_str.len, enc_len); + zassert_true(zcbor_str_fragments_end_encode(state_e)); + zassert_true(zcbor_str_fragments_end_encode(state_e)); + ret = zcbor_list_end_encode(state_e, 2); + zassert_true(ret, "err %s\n", zcbor_error_str(zcbor_peek_error(state_e))); + + ret = zcbor_str_fragments_end_encode(state_e); + zassert_true(ret, "err %s\n", zcbor_error_str(zcbor_peek_error(state_e))); + + zassert_false(zcbor_str_fragments_end_encode(state_e)); + zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_e); + zassert_true(zcbor_list_end_encode(state_e, 2)); + size_t offs = 0; + memcpy(payload1, payload_frag1, sizeof(payload_frag1) - 1); + offs += sizeof(payload_frag1) - 1; /* 1 abandoned byte */ + memcpy(&payload1[offs], payload_frag2, sizeof(payload_frag2)); + offs += sizeof(payload_frag2); + memcpy(&payload1[offs], payload_frag3, sizeof(payload_frag3)); + offs += sizeof(payload_frag3); + memcpy(&payload1[offs], payload_frag4, sizeof(payload_frag4)); + + /* Check */ + zassert_true(zcbor_list_start_decode(state_d2)); + zassert_true(zcbor_uint32_expect(state_d2, 42)); + zassert_true(zcbor_bstr_start_decode(state_d2, &res_str)); + zassert_true(zcbor_uint32_expect(state_d2, 43)); + zassert_true(zcbor_list_start_decode(state_d2)); + zassert_true(zcbor_uint32_expect(state_d2, 44)); + zassert_true(zcbor_bstr_start_decode(state_d2, &res_str)); + zassert_true(zcbor_uint32_expect(state_d2, 45)); + zassert_true(zcbor_tstr_expect(state_d2, &lorem_str_exp)); + zassert_true(zcbor_bstr_end_decode(state_d2)); + zassert_true(zcbor_list_end_decode(state_d2)); + zassert_true(zcbor_bstr_end_decode(state_d2)); + zassert_true(zcbor_list_end_decode(state_d2)); + + /* Start decode tests, negative tests are indented. */ + + /* payload_frag1 */ + zassert_true(zcbor_list_start_decode(state_d)); + zassert_false(zcbor_str_fragments_end_decode(state_d)); + zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_d); + zassert_true(zcbor_uint32_expect(state_d, 42)); + zassert_false(zcbor_cbor_bstr_fragments_start_decode(state_d)); + zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_d); + zcbor_update_state(state_d, payload_frag2, sizeof(payload_frag2)); + + /* payload_frag2 */ + zassert_true(zcbor_cbor_bstr_fragments_start_decode(state_d)); + zassert_true(zcbor_uint32_expect(state_d, 43)); + zassert_true(zcbor_list_start_decode(state_d)); + zassert_false(zcbor_cbor_bstr_fragments_start_decode(state_d)); + zassert_error(ZCBOR_ERR_WRONG_TYPE, state_d); + zassert_true(zcbor_uint32_expect(state_d, 44)); + state_d->payload_mut[1] += 2; /* induce an error */ + zassert_false(zcbor_cbor_bstr_fragments_start_decode(state_d)); + zassert_error(ZCBOR_ERR_INNER_STRING_TOO_LARGE, state_d); + state_d->payload_mut[1] -= 2; + zassert_true(zcbor_str_fragment_decode(state_d, &output_frags_bstr)); + zassert_equal_ptr(output_frags_bstr.fragment.value, &payload_frag2[2]); + zassert_equal_ptr(output_frags_bstr.fragment.len, state_d->payload - &payload_frag2[2]); + zassert_equal(37 + LEN_OFFS, output_frags_bstr.total_len); + zassert_equal(0, output_frags_bstr.offset); + zassert_true(zcbor_cbor_bstr_fragments_start_decode(state_d)); + zassert_true(zcbor_uint32_expect(state_d, 45)); + zassert_true(zcbor_str_fragment_decode(state_d, &output_frags_bstr)); + zassert_equal_ptr(output_frags_bstr.fragment.value, state_d->payload - 2); + zassert_equal_ptr(output_frags_bstr.fragment.len, 2); + zassert_equal(sizeof(lorem) + 3, output_frags_bstr.total_len, "%d != %d\r\n", sizeof(lorem) + 3, output_frags_bstr.total_len); + zassert_equal(0, output_frags_bstr.offset); + zassert_true(zcbor_tstr_fragments_start_decode(state_d)); + zassert_false(zcbor_uint32_expect(state_d, 46)); + zassert_error(ZCBOR_ERR_INSIDE_STRING, state_d); + zassert_false(zcbor_tstr_fragments_start_decode(state_d)); + zassert_error(ZCBOR_ERR_INSIDE_STRING, state_d); + zassert_true(zcbor_str_fragment_decode(state_d, &output_frags[0])); + zassert_false(zcbor_str_fragment_decode(state_d, &output_frags[1])); + zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_d); + zcbor_update_state(state_d, payload_frag3, sizeof(payload_frag3)); + + /* payload_frag3 */ + zassert_true(zcbor_str_fragment_decode(state_d, &output_frags[1])); + zcbor_update_state(state_d, payload_frag4, sizeof(payload_frag4)); + + /* payload_frag4 */ + zassert_true(zcbor_str_fragment_decode(state_d, &output_frags[2])); + zassert_true(zcbor_validate_string_fragments(output_frags, 3)); + zassert_true(zcbor_splice_string_fragments(output_frags, 3, output_string, &output_str_len)); + zassert_mem_equal(output_string, lorem, sizeof(lorem) - 1); + zassert_true(zcbor_str_fragments_end_decode(state_d)); + zassert_true(zcbor_str_fragments_end_decode(state_d)); + zassert_false(zcbor_str_fragment_decode(state_d, &output_frags_bstr)); + zassert_true(zcbor_list_end_decode(state_d), NULL); + zassert_true(zcbor_str_fragments_end_decode(state_d)); +} + + ZTEST(zcbor_unit_tests, test_canonical_list) { #ifndef ZCBOR_CANONICAL