diff --git a/src/libbson/doc/bson_iter_t.rst b/src/libbson/doc/bson_iter_t.rst index 620388f775..89832351ab 100644 --- a/src/libbson/doc/bson_iter_t.rst +++ b/src/libbson/doc/bson_iter_t.rst @@ -132,6 +132,7 @@ The :symbol:`bson_t` *MUST* be valid for the lifetime of the iter and it is an e bson_iter_utf8 bson_iter_value bson_iter_visit_all + bson_iter_visit_all_v2 Examples -------- diff --git a/src/libbson/doc/bson_iter_visit_all.rst b/src/libbson/doc/bson_iter_visit_all.rst index e7e35231fe..f2a763e5d0 100644 --- a/src/libbson/doc/bson_iter_visit_all.rst +++ b/src/libbson/doc/bson_iter_visit_all.rst @@ -23,7 +23,11 @@ Parameters Description ----------- -A convenience function to iterate all remaining fields of ``iter`` using the callback vtable provided by ``visitor``. +A convenience function to iterate all remaining fields of ``iter`` using the +callback vtable provided by ``visitor``. + +This function is equivalent to :symbol:`bson_iter_visit_all_v2` with +``BSON_ITER_VISIT_DEFAULT`` given for its ``flags`` argument. Returns ------- diff --git a/src/libbson/doc/bson_iter_visit_all_v2.rst b/src/libbson/doc/bson_iter_visit_all_v2.rst new file mode 100644 index 0000000000..4fadd03c8a --- /dev/null +++ b/src/libbson/doc/bson_iter_visit_all_v2.rst @@ -0,0 +1,118 @@ +:man_page: bson_iter_visit_all_v2 + +bson_iter_visit_all_v2() +======================== + +Synopsis +-------- + +.. code-block:: c + + enum bson_iter_visit_flags { + BSON_ITER_VISIT_NOFLAGS = 0, + BSON_ITER_VISIT_VALIDATE_KEYS = 1 << 0, + BSON_ITER_VISIT_VALIDATE_UTF8 = 1 << 1, + BSON_ITER_VISIT_VALIDATE_REGEX = 1 << 2, + BSON_ITER_VISIT_VALIDATE_CODE = 1 << 3, + BSON_ITER_VISIT_VALIDATE_DBPOINTER = 1 << 4, + BSON_ITER_VISIT_VALIDATE_SYMBOL = 1 << 5, + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8 = 1 << 6, + BSON_ITER_VISIT_VALIDATE_STRINGS = + BSON_ITER_VISIT_VALIDATE_KEYS | BSON_ITER_VISIT_VALIDATE_UTF8 | + BSON_ITER_VISIT_VALIDATE_REGEX | BSON_ITER_VISIT_VALIDATE_CODE | + BSON_ITER_VISIT_VALIDATE_DBPOINTER | BSON_ITER_VISIT_VALIDATE_SYMBOL, + BSON_ITER_VISIT_VALIDATE_VALUES = + BSON_ITER_VISIT_VALIDATE_UTF8 | BSON_ITER_VISIT_VALIDATE_REGEX | + BSON_ITER_VISIT_VALIDATE_CODE | BSON_ITER_VISIT_VALIDATE_DBPOINTER | + BSON_ITER_VISIT_VALIDATE_SYMBOL, + BSON_ITER_VISIT_DEFAULT = BSON_ITER_VISIT_VALIDATE_KEYS | + BSON_ITER_VISIT_VALIDATE_VALUES | + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8, + }; + + bool + bson_iter_visit_all_v2 (bson_iter_t *iter, + const bson_visitor_t *visitor, + const bson_iter_visit_flags flags. + void *data); + +Parameters +---------- + +* ``iter``: A :symbol:`bson_iter_t`. +* ``visitor``: A :symbol:`bson_visitor_t`. +* ``flags``: A set of bit flags from ``bson_iter_visit_flags`` to control + visitation (See below). +* ``data``: Optional data for ``visitor``. + +Description +----------- + +A convenience function to iterate all remaining fields of ``iter`` using the +callback vtable provided by ``visitor``. + +This function is an extension of :symbol:`bson_iter_visit_all`, with the +additional ``flags`` parameter that can be used to control the behavior of the +visit iteration. The following flags are defined: + +``BSON_ITER_VISIT_NOFLAGS`` + + None of the behaviors requested by any other options are used. + +``BSON_ITER_VISIT_VALIDATE_KEYS`` + + Validate that element keys are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_VALIDATE_UTF8`` + + Validate that UTF-8 text elements are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_VALIDATE_REGEX`` + + Validate regular expression elements to contain valid UTF-8 encoded strings. + +``BSON_ITER_VISIT_VALIDATE_CODE`` + + Validate code elements that the code component is a valid UTF-8-encoded + string. + +``BSON_ITER_VISIT_VALIDATE_DBPOINTER`` + + Validate that DBPointer element names are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_VALIDATE_SYMBOL`` + + Validate that symbol elements are valid UTF-8-encoded strings. + +``BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8`` + + When validating any UTF-8 strings, permit a zero ``0x00`` code unit as a valid + UTF-8 code unit. + +``BSON_ITER_VISIT_VALIDATE_STRINGS`` + + Validate all strings in all components in top-level elements of the document. + +``BSON_ITER_VISIT_VALIDATE_VALUES`` + + Validate all values in all top-level elements of the document. + +``BSON_ITER_VISIT_DEFAULT`` + + Validate all keys and values. Permits a zero ``0x00`` UTF-8 code unit in + strings. + + This flag has the same behavior as :symbol:`bson_iter_visit_all`. + +.. note:: + + If a requested element fails validation, the ``bson_iter_visit_all_v2`` call + will return and indicate the position of the erring element via ``iter``. + +Returns +------- + +Returns true if visitation was prematurely stopped by a callback function. Returns false either because all elements were visited *or* due to corrupt BSON. + +See :symbol:`bson_visitor_t` for examples of how to set your own callbacks to provide information about the location of corrupt or unsupported BSON document entries. + diff --git a/src/libbson/src/bson/bson-iter.c b/src/libbson/src/bson/bson-iter.c index 7b4630f94c..3b4bf2dd00 100644 --- a/src/libbson/src/bson/bson-iter.c +++ b/src/libbson/src/bson/bson-iter.c @@ -1888,23 +1888,34 @@ bson_iter_array (const bson_iter_t *iter, /* IN */ #define VISIT_MAXKEY VISIT_FIELD (maxkey) #define VISIT_MINKEY VISIT_FIELD (minkey) +bool +bson_iter_visit_all (bson_iter_t *iter, + const bson_visitor_t *visitor, + void *data) +{ + return bson_iter_visit_all_v2 (iter, visitor, BSON_ITER_VISIT_DEFAULT, data); +} bool -bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ - const bson_visitor_t *visitor, /* IN */ - void *data) /* IN */ +bson_iter_visit_all_v2 (bson_iter_t *iter, + const bson_visitor_t *visitor, + enum bson_iter_visit_flags flags, + void *data) { uint32_t bson_type = 0; const char *key = NULL; bool unsupported; + const bool allow_nul = flags & BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8; BSON_ASSERT (iter); BSON_ASSERT (visitor); while (_bson_iter_next_internal (iter, 0, &key, &bson_type, &unsupported)) { - if (*key && !bson_utf8_validate (key, strlen (key), false)) { - iter->err_off = iter->off; - break; + if (flags & BSON_ITER_VISIT_VALIDATE_KEYS) { + if (*key && !bson_utf8_validate (key, strlen (key), false)) { + iter->err_off = iter->off; + break; + } } if (VISIT_BEFORE (iter, key, data)) { @@ -1925,9 +1936,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ utf8 = bson_iter_utf8 (iter, &utf8_len); - if (!bson_utf8_validate (utf8, utf8_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_UTF8) { + if (!bson_utf8_validate (utf8, utf8_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_UTF8 (iter, key, utf8_len, utf8, data)) { @@ -2009,9 +2022,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ const char *options = NULL; regex = bson_iter_regex (iter, &options); - if (!bson_utf8_validate (regex, strlen (regex), true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_REGEX) { + if (!bson_utf8_validate (regex, strlen (regex), allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_REGEX (iter, key, regex, options, data)) { @@ -2025,9 +2040,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ bson_iter_dbpointer (iter, &collection_len, &collection, &oid); - if (!bson_utf8_validate (collection, collection_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_DBPOINTER) { + if (!bson_utf8_validate (collection, collection_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_DBPOINTER ( @@ -2041,9 +2058,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ code = bson_iter_code (iter, &code_len); - if (!bson_utf8_validate (code, code_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_CODE) { + if (!bson_utf8_validate (code, code_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_CODE (iter, key, code_len, code, data)) { @@ -2056,9 +2075,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ symbol = bson_iter_symbol (iter, &symbol_len); - if (!bson_utf8_validate (symbol, symbol_len, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_SYMBOL) { + if (!bson_utf8_validate (symbol, symbol_len, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (VISIT_SYMBOL (iter, key, symbol_len, symbol, data)) { @@ -2074,9 +2095,11 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ code = bson_iter_codewscope (iter, &length, &doclen, &docbuf); - if (!bson_utf8_validate (code, length, true)) { - iter->err_off = iter->off; - return true; + if (flags & BSON_ITER_VISIT_VALIDATE_CODE) { + if (!bson_utf8_validate (code, length, allow_nul)) { + iter->err_off = iter->off; + return true; + } } if (bson_init_static (&b, docbuf, doclen) && diff --git a/src/libbson/src/bson/bson-iter.h b/src/libbson/src/bson/bson-iter.h index 92e1968f74..ed8ee004c1 100644 --- a/src/libbson/src/bson/bson-iter.h +++ b/src/libbson/src/bson/bson-iter.h @@ -511,7 +511,8 @@ bson_iter_overwrite_double (bson_iter_t *iter, double value); BSON_EXPORT (void) -bson_iter_overwrite_decimal128 (bson_iter_t *iter, const bson_decimal128_t *value); +bson_iter_overwrite_decimal128 (bson_iter_t *iter, + const bson_decimal128_t *value); BSON_EXPORT (void) @@ -537,6 +538,48 @@ bson_iter_visit_all (bson_iter_t *iter, const bson_visitor_t *visitor, void *data); +/** + * @brief Flag options to control bson_iter_visit APIs + */ +enum bson_iter_visit_flags { + BSON_ITER_VISIT_NOFLAGS = 0, + /// Validate element keys to be valid UTF-8 + BSON_ITER_VISIT_VALIDATE_KEYS = 1 << 0, + /// Validate UTF-8 elements to be valid UTF-8 + BSON_ITER_VISIT_VALIDATE_UTF8 = 1 << 1, + /// Validate regular expression elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_REGEX = 1 << 2, + /// Validate code and code_w_scope elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_CODE = 1 << 3, + /// Validate dbPointer elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_DBPOINTER = 1 << 4, + /// Validate symbol elements to contain valid UTF-8 + BSON_ITER_VISIT_VALIDATE_SYMBOL = 1 << 5, + /// When checking UTF-8 strings, allow a NUL as a valid codepoint + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8 = 1 << 6, + /// Validate all strings in the visited BSON to be valid UTF-8 strings + BSON_ITER_VISIT_VALIDATE_STRINGS = + BSON_ITER_VISIT_VALIDATE_KEYS | BSON_ITER_VISIT_VALIDATE_UTF8 | + BSON_ITER_VISIT_VALIDATE_REGEX | BSON_ITER_VISIT_VALIDATE_CODE | + BSON_ITER_VISIT_VALIDATE_DBPOINTER | BSON_ITER_VISIT_VALIDATE_SYMBOL, + /// Validate all element values to be valid (does not validate keys) + BSON_ITER_VISIT_VALIDATE_VALUES = + BSON_ITER_VISIT_VALIDATE_UTF8 | BSON_ITER_VISIT_VALIDATE_REGEX | + BSON_ITER_VISIT_VALIDATE_CODE | BSON_ITER_VISIT_VALIDATE_DBPOINTER | + BSON_ITER_VISIT_VALIDATE_SYMBOL, + /// The default option for bson_iter_visit_all_v2 (validates all keys and + /// values, allows NUL in UTF-8 strings) + BSON_ITER_VISIT_DEFAULT = BSON_ITER_VISIT_VALIDATE_KEYS | + BSON_ITER_VISIT_VALIDATE_VALUES | + BSON_ITER_VISIT_ALLOW_NUL_IN_UTF8, +}; + +BSON_EXPORT (bool) +bson_iter_visit_all_v2 (bson_iter_t *iter, + const bson_visitor_t *visitor, + enum bson_iter_visit_flags flags, + void *data); + BSON_EXPORT (uint32_t) bson_iter_offset (bson_iter_t *iter); diff --git a/src/libbson/src/bson/bson.c b/src/libbson/src/bson/bson.c index 87e76c3ae9..30d7600c35 100644 --- a/src/libbson/src/bson/bson.c +++ b/src/libbson/src/bson/bson.c @@ -3584,7 +3584,15 @@ _bson_iter_validate_document (const bson_iter_t *iter, state->phase = BSON_VALIDATE_PHASE_LF_REF_KEY; } - (void) bson_iter_visit_all (&child, &bson_validate_funcs, state); + bson_iter_visit_all_v2 ( + &child, &bson_validate_funcs, BSON_ITER_VISIT_VALIDATE_KEYS, state); + + // Assert: bson_iter_visit_all_v2 will never generate an error and not give + // that error to our visitor as a "corrupt" BSON element. This implies that + // if child.err_off is greater than zero, our own error offset MUST ALSO be + // greater than zero, because the state will have been told about that + // invalid element. + BSON_ASSERT (child.err_off <= 0 || state->err_offset > 0); if (state->phase == BSON_VALIDATE_PHASE_LF_ID_KEY || state->phase == BSON_VALIDATE_PHASE_LF_REF_UTF8 || diff --git a/src/libbson/tests/binary/invalid-key.bson b/src/libbson/tests/binary/invalid-key.bson new file mode 100644 index 0000000000..54f451dd80 Binary files /dev/null and b/src/libbson/tests/binary/invalid-key.bson differ diff --git a/src/libbson/tests/binary/invalid-utf8.bson b/src/libbson/tests/binary/invalid-utf8.bson new file mode 100644 index 0000000000..18a2674aa6 Binary files /dev/null and b/src/libbson/tests/binary/invalid-utf8.bson differ diff --git a/src/libbson/tests/test-bson.c b/src/libbson/tests/test-bson.c index 33443642da..1ac314280b 100644 --- a/src/libbson/tests/test-bson.c +++ b/src/libbson/tests/test-bson.c @@ -1198,6 +1198,16 @@ test_bson_validate (void) 9, BSON_VALIDATE_NONE, "corrupt BSON"); + VALIDATE_TEST ("invalid-utf8.bson", + BSON_VALIDATE_UTF8, + 4, + BSON_VALIDATE_UTF8, + "Invalid utf8 string for key \"a\""); + VALIDATE_TEST ("invalid-key.bson", + BSON_VALIDATE_UTF8, + 4, + BSON_VALIDATE_NONE, + "corrupt BSON"); VALIDATE_TEST ("overflow4.bson", BSON_VALIDATE_NONE, 9,