zcbor_encode: Add new fragmented string encoding API

to match the redesigned decoding API. Signed-off-by: Øyvind Rønningstad <[email protected]>
NordicSemiconductor · Apr 29, 2024 · e6ca233 · e6ca233
1 parent b257663
commit e6ca233
Show file tree

Hide file tree

Showing 4 changed files with 351 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -88,6 +88,31 @@ ZCBOR_STATE_D(decode_state, n, payload, payload_len, elem_count, n_flags);
 ZCBOR_STATE_E(encode_state, n, payload, payload_len, 0);
 ```
 
+Fragmented payloads
+-------------------
+
+zcbor can encode and decode payloads in sections.
+This can be useful e.g. if you send or receive your payload in multiple packets.
+When the current payload section is done, call `zcbor_update_state()` to introduce the next section.
+Note that zcbor does not allow section boundaries to fall inside a zcbor header/value pair.
+This means that the following elements cannot be split between sections:
+
+- Numbers and simple values (integers, floats, bools, undefined, nil)
+- Tags
+- Headers of lists, maps, tstrs, and bstrs
+
+If your payload is split in an unsupported way, you can get around it by making a small section out of the remaining bytes of one section spliced with the start of the next.
+Another option is to leave a little room at the start of each section buffer, and copy the remaining end of one section into the start of the next buffer.
+8 bytes should be enough for this.
+
+Lists and maps can span multiple sections, as long as the individual elements are not split as to break the above rule.
+
+String payloads can be split across multiple payload sections, if `ZCBOR_FRAGMENTS` is enabled, and the `*str_fragments_*()` APIs are used. Note that in the zcbor docs, the term "string fragment" is used for fragmented strings, while the term "payload section" is used for fragmented CBOR payloads, as passed to `zcbor_update_state()`. These do not always line up perfectly, particularly at the start and end of fragmented strings.
+
+CBOR-encoded bstrs can be nested, and there can also be a non-CBOR-encoded innermost string.
+The current innermost string is called the "current string".
+`zcbor_update_state()` modifies all backups so that outer nested strings have updated information about the new section.
+
 Configuration
 -------------
 

diff --git a/include/zcbor_encode.h b/include/zcbor_encode.h
@@ -231,6 +231,46 @@ bool zcbor_bstr_start_encode(zcbor_state_t *state);
  */
 bool zcbor_bstr_end_encode(zcbor_state_t *state, struct zcbor_string *result);
 
+
+#ifdef ZCBOR_FRAGMENTS
+
+/** Start encoding a fragmented string. I.e. a string spread over non-consecutive payload sections.
+ *
+ * After calling this, you can write a fragment with @ref zcbor_str_fragment_encode,
+ * then update the payload with @ref zcbor_update_state.
+ * Repeat until the string is fully decoded, then call @ref zcbor_bstr_fragments_end_encode.
+ */
+bool zcbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t total_len);
+bool zcbor_tstr_fragments_start_encode(zcbor_state_t *state, size_t total_len);
+
+/** Start encoding a fragmented CBOR-encoded bytestring.
+ *
+ * I.e. a string spread over non-consecutive payload sections.
+ *
+ * This is an alternative to zcbor_*str_fragments_start_encode() to be used if the payload
+ * contains CBOR data that will be encoded directly with other zcbor_*() functions.
+ *
+ * A state backup is created to keep track of the element count and original payload_end.
+ * After calling this, you can encode elements using other zcbor functions,
+ * then update the payload with @ref zcbor_update_state.
+ * Repeat until the string is fully decoded, then call @ref zcbor_bstr_fragments_end_encode.
+ * When the current payload section contains the end of the string,
+ * payload_end is set to the end of the string, so there is no risk of encoding past the end.
+ */
+bool zcbor_cbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t total_len);
+
+/** Retrieve a string fragment.
+ *
+ * Consume bytes from the payload until either the end of the payload or the end of the string.
+ * Do not use this function with @ref zcbor_cbor_bstr_fragments_start_encode.
+ */
+bool zcbor_str_fragment_encode(zcbor_state_t *state, struct zcbor_string *fragment, size_t *enc_len);
+
+/** Finish encoding a fragmented string. */
+bool zcbor_str_fragments_end_encode(zcbor_state_t *state);
+
+#endif /* ZCBOR_FRAGMENTS */
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/src/zcbor_encode.c b/src/zcbor_encode.c
@@ -48,6 +48,10 @@ static bool encode_header_byte(zcbor_state_t *state,
 
 	zcbor_assert_state(additional < 32, "Unsupported additional value: %d\r\n", additional);
 
+#ifdef ZCBOR_FRAGMENTS
+	ZCBOR_ERR_IF(state->inside_frag_str, ZCBOR_ERR_INSIDE_STRING);
+#endif
+
 	*(state->payload_mut) = (uint8_t)((major_type << 5) | (additional & 0x1F));
 	zcbor_trace(state, "value_encode");
 	state->payload_mut++;
@@ -243,6 +247,7 @@ bool zcbor_bstr_start_encode(zcbor_state_t *state)
 
 	/* Encode a dummy header */
 	if (!value_encode(state, ZCBOR_MAJOR_TYPE_BSTR, &max_len, sizeof(max_len))) {
+		zcbor_process_backup(state, ZCBOR_FLAG_CONSUME, 0xFFFFFFFF);
 		ZCBOR_FAIL();
 	}
 	return true;
@@ -268,7 +273,7 @@ bool zcbor_bstr_end_encode(zcbor_state_t *state, struct zcbor_string *result)
 	result->value = state->payload_end - remaining_str_len(state);
 	result->len = (size_t)payload - (size_t)result->value;
 
-	/* Reencode header of list now that we know the number of elements. */
+	/* Reencode header of list now that we know the length. */
 	if (!zcbor_bstr_encode(state, result)) {
 		ZCBOR_FAIL();
 	}
@@ -288,7 +293,7 @@ static bool str_encode(zcbor_state_t *state,
 	}
 	if (state->payload_mut != input->value) {
 		/* Use memmove since string might be encoded into the same space
-		 * because of bstrx_cbor_start_encode/bstrx_cbor_end_encode. */
+		 * because of zcbor_bstr_start_encode/zcbor_bstr_end_encode. */
 		memmove(state->payload_mut, input->value, input->len);
 	}
 	state->payload += input->len;
@@ -336,6 +341,106 @@ bool zcbor_tstr_put_term(zcbor_state_t *state, char const *str, size_t maxlen)
 }
 
 
+#ifdef ZCBOR_FRAGMENTS
+
+static bool start_encode_fragments(zcbor_state_t *state,
+	zcbor_major_type_t major_type, size_t len, bool cbor_bstr)
+{
+	ZCBOR_CHECK_PAYLOAD();
+
+	if (state->inside_cbor_bstr) {
+		if ((state->str_total_len_cbor - zcbor_current_string_offset(state) - zcbor_header_len(len)) < len) {
+			ZCBOR_ERR(ZCBOR_ERR_INNER_STRING_TOO_LARGE);
+		}
+	}
+
+	if (cbor_bstr) {
+		if (!zcbor_new_backup(state, 0)) {
+			ZCBOR_FAIL();
+		}
+	}
+
+	if (!value_encode(state, major_type, &len, sizeof(len))) {
+		if (cbor_bstr) {
+			zcbor_process_backup(state, ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_RESTORE, 0xFFFFFFFF);
+		}
+		ZCBOR_FAIL();
+	}
+
+	ptrdiff_t new_offset = state->constant_state->curr_payload_section - state->payload;
+
+	if (cbor_bstr) {
+		state->frag_offset_cbor = new_offset;
+		state->str_total_len_cbor = len;
+		state->inside_cbor_bstr = true;
+	} else {
+		state->frag_offset = new_offset;
+		state->str_total_len = len;
+		state->inside_frag_str = true;
+	}
+
+	return true;
+}
+
+
+bool zcbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t len)
+{
+	return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_BSTR, len, false);
+}
+
+
+bool zcbor_tstr_fragments_start_encode(zcbor_state_t *state, size_t len)
+{
+	return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_TSTR, len, false);
+}
+
+
+bool zcbor_cbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t len)
+{
+	return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_BSTR, len, true);
+}
+
+
+bool zcbor_str_fragment_encode(zcbor_state_t *state, struct zcbor_string *fragment, size_t *enc_len)
+{
+	ZCBOR_CHECK_PAYLOAD();
+
+	ZCBOR_ERR_IF(!state->inside_frag_str, ZCBOR_ERR_NOT_IN_FRAGMENT);
+
+	size_t len  = MIN(MIN((size_t)state->payload_end - (size_t)state->payload, fragment->len),
+				state->str_total_len - zcbor_current_string_offset(state));
+
+	memcpy(state->payload_mut, fragment->value, len);
+	state->payload += len;
+
+	if (enc_len != NULL) {
+		*enc_len = len;
+	}
+
+	return true;
+}
+
+
+bool zcbor_str_fragments_end_encode(zcbor_state_t *state)
+{
+	ZCBOR_ERR_IF(!state->inside_frag_str && !state->inside_cbor_bstr, ZCBOR_ERR_NOT_IN_FRAGMENT);
+	ZCBOR_ERR_IF(zcbor_current_string_remainder(state) != 0, ZCBOR_ERR_NOT_AT_END);
+
+	if (state->inside_frag_str) {
+		state->inside_frag_str = false;
+	} else {
+		if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_KEEP_PAYLOAD, 0xFFFFFFFF)) {
+			ZCBOR_FAIL();
+		}
+		state->elem_count++;
+	}
+
+	return true;
+}
+
+#endif /* ZCBOR_FRAGMENTS */
+
+
 static bool list_map_start_encode(zcbor_state_t *state, size_t max_num,
 		zcbor_major_type_t major_type)
 {
@@ -385,6 +490,14 @@ static bool list_map_end_encode(zcbor_state_t *state, size_t max_num,
 	size_t max_header_len = zcbor_header_len_ptr(&max_num, 4) - 1;
 	size_t header_len = zcbor_header_len_ptr(&list_count, 4) - 1;
 
+	if (max_num == list_count) {
+		if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_KEEP_PAYLOAD, 0xFFFFFFFF)) {
+			ZCBOR_FAIL();
+		}
+		state->elem_count++;
+		return true;
+	}
+
 	if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME, 0xFFFFFFFF)) {
 		ZCBOR_FAIL();
 	}