From b730fcab9c3e8e773a114a65f035d6f70ddbf597 Mon Sep 17 00:00:00 2001 From: Joao Paulo Magalhaes Date: Sat, 18 May 2024 22:10:34 +0100 Subject: [PATCH] add fuzz tests and ci --- .github/setenv.sh | 4 +- .github/workflows/clang.yml | 53 ++++++- .github/workflows/coverage.yml | 2 +- .github/workflows/gcc.yml | 20 ++- README.md | 98 ++++++------- ROADMAP.md | 2 +- doc/index.rst | 4 +- doc/sphinx_quicklinks.rst | 2 +- doc/sphinx_yaml_standard.rst | 20 ++- src/c4/yml/detail/stack.hpp | 12 +- src/c4/yml/event_handler_stack.hpp | 4 +- test/CMakeLists.txt | 137 ++++++++++++++++++- test/test_fuzz/test_fuzz_common.hpp | 134 ++++++++++++++++++ test/test_fuzz/test_fuzz_events.cpp | 9 ++ test/test_fuzz/test_fuzz_main.cpp | 16 +++ test/test_fuzz/test_fuzz_parse_emit.cpp | 9 ++ test/test_parse_engine_6_qmrk.cpp | 26 ++++ test/test_suite/test_suite_event_handler.cpp | 4 +- test/test_suite/test_suite_event_handler.hpp | 26 ++-- test/test_tag_property.cpp | 13 ++ 20 files changed, 494 insertions(+), 101 deletions(-) create mode 100644 test/test_fuzz/test_fuzz_common.hpp create mode 100644 test/test_fuzz/test_fuzz_events.cpp create mode 100644 test/test_fuzz/test_fuzz_main.cpp create mode 100644 test/test_fuzz/test_fuzz_parse_emit.cpp diff --git a/.github/setenv.sh b/.github/setenv.sh index 3eb65a108..3ba6c611d 100644 --- a/.github/setenv.sh +++ b/.github/setenv.sh @@ -13,6 +13,8 @@ function c4_show_info() echo "PROJ_PFX_TARGET=$PROJ_PFX_TARGET" echo "PROJ_PFX_CMAKE=$PROJ_PFX_CMAKE" echo "CMAKE_FLAGS=$CMAKE_FLAGS" + echo "CMAKE_C_FLAGS=$CMAKE_C_FLAGS" + echo "CMAKE_CXX_FLAGS=$CMAKE_CXX_FLAGS" echo "NUM_JOBS_BUILD=$NUM_JOBS_BUILD" echo "GITHUB_WORKSPACE=$GITHUB_WORKSPACE" pwd @@ -304,7 +306,7 @@ function c4_cfg_test() -G 'Visual Studio 17 2022' -A $(_c4vsarchtype $id) \ $(_c4_add_ehsc_to_vs_arm32 $id) \ -DCMAKE_BUILD_TYPE=$BT $CMFLAGS \ - -DCMAKE_C_FLAGS=" $CFLAGS" -DCMAKE_CXX_FLAGS=" $CXXFLAGS" + -DCMAKE_C_FLAGS=" $CFLAGS $CMAKE_C_FLAGS" -DCMAKE_CXX_FLAGS=" $CXXFLAGS $CMAKE_CXX_FLAGS" ;; vs2019) cmake -S $PROJ_DIR -B $build_dir -DCMAKE_INSTALL_PREFIX="$install_dir" \ diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index 911163d65..bc3f19f5e 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -192,7 +192,58 @@ jobs: - {std: 11, cxx: "4.0", bt: Release, vg: on, img: ubuntu18.04} - {std: 11, cxx: "3.9", bt: Debug , vg: on, img: ubuntu18.04} - {std: 11, cxx: "3.9", bt: Release, vg: on, img: ubuntu18.04} - env: {STD: "${{matrix.std}}", CXX_: "clang++-${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"} + env: { + STD: "${{matrix.std}}", CXX_: "clang++-${{matrix.cxx}}", BT: "${{matrix.bt}}", + BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", + SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}" + } + steps: + - {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}} + - run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169 + - run: c4core-install $CXX_ + - {name: show info, run: source .github/setenv.sh && c4_show_info} + - name: shared64-configure--------------------------------------------------- + run: source .github/setenv.sh && c4_cfg_test shared64 + - {name: shared64-build, run: source .github/setenv.sh && c4_build_test shared64} + - {name: shared64-run, run: source .github/setenv.sh && c4_run_test shared64} + - {name: shared64-pack, run: source .github/setenv.sh && c4_package shared64} + - name: static64-configure--------------------------------------------------- + run: source .github/setenv.sh && c4_cfg_test static64 + - {name: static64-build, run: source .github/setenv.sh && c4_build_test static64} + - {name: static64-run, run: source .github/setenv.sh && c4_run_test static64} + - {name: static64-pack, run: source .github/setenv.sh && c4_package static64} + - name: static32-configure--------------------------------------------------- + run: source .github/setenv.sh && c4_cfg_test static32 + - {name: static32-build, run: source .github/setenv.sh && c4_build_test static32} + - {name: static32-run, run: source .github/setenv.sh && c4_run_test static32} + - {name: static32-pack, run: source .github/setenv.sh && c4_package static32} + - name: shared32-configure--------------------------------------------------- + run: source .github/setenv.sh && c4_cfg_test shared32 + - {name: shared32-build, run: source .github/setenv.sh && c4_build_test shared32} + - {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32} + - {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32} + + #---------------------------------------------------------------------------- + clang_fuzz: + name: clang_fuzz/${{matrix.cxx}}/c++${{matrix.std}}/${{matrix.bt}}/vg${{matrix.vg}} + continue-on-error: true + if: always() # https://stackoverflow.com/questions/62045967/github-actions-is-there-a-way-to-continue-on-error-while-still-getting-correct + runs-on: ubuntu-latest + container: ghcr.io/biojppm/c4core/${{matrix.img}}:latest # use the docker image + strategy: + fail-fast: false + matrix: + include: + - {std: 11, cxx: "16" , bt: fuzz , vg: on, img: ubuntu18.04} + - {std: 11, cxx: "16" , bt: Debug , vg: on, img: ubuntu18.04} + - {std: 11, cxx: "16" , bt: Release, vg: on, img: ubuntu18.04} + env: { + CMAKE_CXX_FLAGS: "-g -O0 -fsanitize=address,undefined", + CMAKE_FLAGS: "-DRYML_TEST_FUZZ=ON -DRYML_FUZZ_LIBFUZZER_MERGE=ON", + STD: "${{matrix.std}}", CXX_: "clang++-${{matrix.cxx}}", BT: "${{matrix.bt}}", + BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}",xs SAN: "${{matrix.san}}", + LINT: "${{matrix.lint}}", OS: "${{matrix.os}}" + } steps: - {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}} - run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index d2e7872c3..f233d2824 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -46,7 +46,7 @@ jobs: env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}", - CMAKE_FLAGS: "${{matrix.cmk}}", + CMAKE_FLAGS: "${{matrix.cmk}} -DRYML_TEST_FUZZ=ON", CODECOV_TOKEN: "${{secrets.CODECOV_TOKEN}}", COVERALLS_REPO_TOKEN: "${{secrets.COVERALLS_REPO_TOKEN}}", # coveralls disabled: https://github.com/lemurheavy/coveralls-public/issues/1665 diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 2a16edb27..d92b80a18 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -44,7 +44,10 @@ jobs: - {std: 11, cxx: g++-5 , bt: Release, bitlinks: shared64 static32} - {std: 11, cxx: g++-4.8, bt: Debug , bitlinks: shared64 static32} - {std: 11, cxx: g++-4.8, bt: Release, bitlinks: shared64 static32} - env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"} + env: { + STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", + BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}" + } steps: - {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}} - run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169 @@ -134,7 +137,13 @@ jobs: - {std: 11, cxx: g++-7 , bt: Release, bitlinks: shared64 static32} - {std: 20, cxx: g++-10 , bt: Debug , bitlinks: shared64 static32} - {std: 20, cxx: g++-10 , bt: Release, bitlinks: shared64 static32} - env: {CXXFLAGS: "-fno-exceptions -fno-rtti", STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"} + env: { + CXXFLAGS: "-fno-exceptions -fno-rtti", + CMAKE_FLAGS: "-DRYML_TEST_FUZZ=ON", + STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", + BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", + SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}" + } steps: - {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}} - run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169 @@ -200,7 +209,12 @@ jobs: - {std: 11, cxx: g++-4.9, bt: Release, img: ubuntu18.04} - {std: 11, cxx: g++-4.8, bt: Debug , img: ubuntu18.04} - {std: 11, cxx: g++-4.8, bt: Release, img: ubuntu18.04} - env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"} + env: { + CMAKE_FLAGS: "-DRYML_TEST_FUZZ=ON", + STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", + BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", + SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}" + } steps: - {name: checkout, uses: actions/checkout@v3, with: {submodules: recursive}} - run: git config --system --add safe.directory '*' # needed for running in the docker image. see https://github.com/actions/checkout/issues/1169 diff --git a/README.md b/README.md index dff3d55d5..c25997429 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ ryml is written in C++11, and compiles cleanly with: ryml's API documentation is [available at ReadTheDocs](https://rapidyaml.readthedocs.io/latest/). -ryml is [extensively unit-tested in Linux, Windows and +ryml is [extensively unit-tested and fuzz-tested in Linux, Windows and MacOS](https://github.com/biojppm/rapidyaml/actions). The tests cover x64, x86, wasm (emscripten), arm, aarch64, ppc64le and s390x architectures, and include analysing ryml with: @@ -63,6 +63,8 @@ architectures, and include analysing ryml with: * memory * address * undefined behavior + * fuzzers: + * libfuzzer ryml also [runs in bare-metal](https://github.com/biojppm/rapidyaml/issues/193), and @@ -432,37 +434,6 @@ CHECK(loc.col == 4u); ## Using ryml in your project -### Package managers - -ryml is available in most package managers (thanks to all the -contributors!) and linux distributions. But please be aware: those -packages are maintained downstream of this repository, so if you have -issues with the package, file a report with the respective maintainer. - -Here's a quick roundup (not maintained): -* Package managers: - * [conan](https://conan.io/center/recipes/rapidyaml) - * [vcpkg](https://vcpkg.io/en/packages.html): `vcpkg install ryml` - * [PyPI](https://pypi.org/project/rapidyaml/) -* Linux distributions: - * Arch Linux/Manjaro: - * [rapidyaml (aarch64)](https://archlinuxarm.org/packages/aarch64/rapidyaml) - * [rapidyaml-git (AUR)](https://aur.archlinux.org/packages/rapidyaml-git/) - * [python-rapidyaml-git (AUR)](https://aur.archlinux.org/packages/python-rapidyaml-git/) - * [Fedora Linux](https://getfedora.org/)/[EPEL](https://docs.fedoraproject.org/en-US/epel/): - * `dnf install rapidyaml-devel` - * `dnf install python3-rapidyaml` - * [Gentoo](https://packages.gentoo.org/packages/dev-cpp/rapidyaml) - * [OpenSuse](https://build.openbuildservice.org/package/show/Emulators/rapidyaml) - * [Slackbuilds](https://slackbuilds.org/repository/15.0/libraries/rapidyaml/) - * [AltLinux](https://packages.altlinux.org/en/sisyphus/srpms/rapidyaml/3006055151670528141) - -Although package managers are very useful for quickly getting up to -speed, the advised way is still to bring ryml as a submodule of your -project, building both together. This makes it easy to track any -upstream changes in ryml. Also, ryml is small and quick to build, so -there's not much of a cost for building it with your project. - ### Single header file ryml is provided chiefly as a cmake library project, but it can also be used as a single header file, and there is a [tool to @@ -531,6 +502,38 @@ If you omit `--recursive`, after cloning you will have to do `git submodule update --init --recursive` to ensure ryml's submodules are checked out. +### Package managers + +ryml is available in most package managers (thanks to all the +contributors!) and linux distributions. But please be aware: those +packages are maintained downstream of this repository, so if you have +issues with the package, file a report with the respective maintainer. + +Here's a quick roundup (not maintained): +* Package managers: + * [conan](https://conan.io/center/recipes/rapidyaml) + * [vcpkg](https://vcpkg.io/en/packages.html): `vcpkg install ryml` + * [PyPI](https://pypi.org/project/rapidyaml/) +* Linux distributions: + * Arch Linux/Manjaro: + * [rapidyaml (aarch64)](https://archlinuxarm.org/packages/aarch64/rapidyaml) + * [rapidyaml-git (AUR)](https://aur.archlinux.org/packages/rapidyaml-git/) + * [python-rapidyaml-git (AUR)](https://aur.archlinux.org/packages/python-rapidyaml-git/) + * [Fedora Linux](https://getfedora.org/)/[EPEL](https://docs.fedoraproject.org/en-US/epel/): + * `dnf install rapidyaml-devel` + * `dnf install python3-rapidyaml` + * [Gentoo](https://packages.gentoo.org/packages/dev-cpp/rapidyaml) + * [OpenSuse](https://build.openbuildservice.org/package/show/Emulators/rapidyaml) + * [Slackbuilds](https://slackbuilds.org/repository/15.0/libraries/rapidyaml/) + * [AltLinux](https://packages.altlinux.org/en/sisyphus/srpms/rapidyaml/3006055151670528141) + +Although package managers are very useful for quickly getting up to +speed, the advised way is still to bring ryml as a submodule of your +project, building both together. This makes it easy to track any +upstream changes in ryml. Also, ryml is small and quick to build, so +there's not much of a cost for building it with your project. + + ### Quickstart samples These samples show different ways of getting ryml into your application. All the @@ -555,6 +558,7 @@ more about each sample: | [`fetch_content`](./samples/fetch_content) | **yes** | [`CMakeLists.txt`](./samples/fetch_content/CMakeLists.txt) | [`run.sh`](./samples/fetch_content/run.sh) | | [`find_package`](./samples/find_package) | **no**
needs prior install or package | [`CMakeLists.txt`](./samples/find_package/CMakeLists.txt) | [`run.sh`](./samples/find_package/run.sh) | + ### CMake build settings for ryml The following cmake variables can be used to control the build behavior of ryml: @@ -726,20 +730,16 @@ See also [the roadmap](./ROADMAP.md) for a list of future work. ### Known limitations -ryml deliberately makes no effort to follow the standard in the +ryml deliberately makes no effort to follow the YAML standard in the following situations: -* ryml's tree does NOT accept containers are as mapping keys: keys - must be scalars. HOWEVER, this is a limitation only of the tree. The - event-based parser engine DOES parse container keys. The parser - engine is the result of a recent refactor and its usage is meant to - be used by other programming languages to create their native - data-structures. This engine is fully tested and fully conformant - (other than the general error permissiveness noted below). But - because it is recent, it is still undocumented, and it requires some - API cleanup before being ready for isolated use. Please get in touch - if you are interested in integrating the event-based parser engine - without the standalone `ryml::parse_*()` +* ryml's tree does NOT accept containers as map keys: keys stored in + the tree must always be scalars. HOWEVER, this is a limitation only + of the final tree. The event-based parse engine DOES parse container + keys, as it is is meant to be used by other programming languages to + create their native data-structures, and it is fully tested and + fully conformant (other than the general error permissiveness noted + below). * Tab characters after `:` and `-` are not accepted tokens, unless ryml is compiled with the macro `RYML_WITH_TAB_TOKENS`. This requirement exists because checking for tabs introduces branching @@ -774,11 +774,11 @@ following situations: If you do run into trouble and would like to investigate conformance of your YAML code, **beware** of existing online YAML linters, many of which are not fully conformant. Instead, try using -[https://play.yaml.io](https://play.yaml.io), an amazing tool which -lets you dynamically input your YAML and continuously see the results -from all the existing parsers (kudos to @ingydotnet and the people -from the YAML test suite). And of course, if you detect anything wrong -with ryml, please [open an +[https://play.yaml.io](https://play.yaml.io), an amazingly useful tool +which lets you dynamically input your YAML and continuously see the +results from all the existing parsers (kudos to @ingydotnet and the +people from the YAML test suite). And of course, if you detect +anything wrong with ryml, please [open an issue](https://github.com/biojppm/rapidyaml/issues) so that we can improve. diff --git a/ROADMAP.md b/ROADMAP.md index 8df21edc4..c78da795c 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,2 +1,2 @@ -Moved! See ryml's [Kanban board on github](https://github.com/biojppm/rapidyaml/projects/1). +Moved! See rapidyaml's [Kanban board on github](https://github.com/users/biojppm/projects/1/views/1). diff --git a/doc/index.rst b/doc/index.rst index b26b178cd..67bd88711 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -35,10 +35,10 @@ ryml is written in C++11, and compiles cleanly with: * Visual Studio 2015 and later -* clang++ 3.9 and later - * g++ 4.8 and later +* clang++ 3.9 and later + * Intel Compiler .. note:: diff --git a/doc/sphinx_quicklinks.rst b/doc/sphinx_quicklinks.rst index b4c8c7c1e..133d4f80d 100644 --- a/doc/sphinx_quicklinks.rst +++ b/doc/sphinx_quicklinks.rst @@ -13,7 +13,7 @@ Quick links * `Pull Requests `_ - * `Kanban board `_ + * `Kanban board `_ * Latest release: `0.6.0 `_ diff --git a/doc/sphinx_yaml_standard.rst b/doc/sphinx_yaml_standard.rst index fdf5c13e8..4d96bb397 100644 --- a/doc/sphinx_yaml_standard.rst +++ b/doc/sphinx_yaml_standard.rst @@ -17,7 +17,7 @@ welcome. linters**, many of which are not fully conformant; instead, try using `https://play.yaml.io `__, - an amazing tool which lets you dynamically input your YAML and + an amazingly useful tool which lets you dynamically input your YAML and continuously see the results from all the existing parsers (kudos to @ingydotnet and the people from the YAML test suite). And of course, if you detect anything wrong with ryml, please `open an @@ -31,17 +31,13 @@ Deliberate deviations ryml deliberately makes no effort to follow the standard in the following situations: -- ryml's tree does NOT accept containers are as mapping keys: keys - must be scalars. HOWEVER, this is a limitation only of the tree. The - event-based parser engine DOES parse container keys. The parser - engine is the result of a recent refactor and its usage is meant to - be used by other programming languages to create their native - data-structures. This engine is fully tested and fully conformant - (other than the general error permissiveness noted below). But - because it is recent, it is still undocumented, and it requires some - API cleanup before being ready for isolated use. Please get in touch - if you are interested in integrating the event-based parser engine - without the standalone `ryml::parse_*()` +- ryml's tree does NOT accept containers as map keys: keys stored in + the tree must always be scalars. HOWEVER, this is a limitation only + of the final tree. The event-based parse engine DOES parse container + keys, as it is is meant to be used by other programming languages to + create their native data-structures, and it is fully tested and + fully conformant (other than the general error permissiveness noted + below). - Tab characters after ``:`` and ``-`` are not accepted tokens, unless ryml is compiled with the macro ``RYML_WITH_TAB_TOKENS``. This requirement exists because checking for tabs introduces branching diff --git a/src/c4/yml/detail/stack.hpp b/src/c4/yml/detail/stack.hpp index df3e27d37..10089558d 100644 --- a/src/c4/yml/detail/stack.hpp +++ b/src/c4/yml/detail/stack.hpp @@ -25,7 +25,7 @@ namespace detail { template class stack { - static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); + //static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); static_assert(std::is_trivially_destructible::value, "T must be trivially destructible"); public: @@ -34,11 +34,11 @@ class stack public: - T m_buf[size_t(N)]; - T * m_stack; - id_type m_size; - id_type m_capacity; - Callbacks m_callbacks; + T m_buf[size_t(N)]; + T *C4_RESTRICT m_stack; + id_type m_size; + id_type m_capacity; + Callbacks m_callbacks; public: diff --git a/src/c4/yml/event_handler_stack.hpp b/src/c4/yml/event_handler_stack.hpp index 4e6d94f5e..af1e11b27 100644 --- a/src/c4/yml/event_handler_stack.hpp +++ b/src/c4/yml/event_handler_stack.hpp @@ -46,7 +46,7 @@ struct EventHandlerStack state *C4_RESTRICT m_curr; ///< current stack level: top of the stack. cached here for easier access. state *C4_RESTRICT m_parent; ///< parent of the current stack level. pfn_relocate_arena m_relocate_arena; ///< callback when the arena gets relocated - void *C4_RESTRICT m_relocate_arena_data; + void * m_relocate_arena_data; protected: @@ -67,6 +67,8 @@ struct EventHandlerStack void _stack_finish_parse() { + m_relocate_arena = nullptr; + m_relocate_arena_data = nullptr; } protected: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 905292cef..1b5d870b5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -189,13 +189,6 @@ endif() option(RYML_TEST_SUITE "Enable cases from yaml-test-suite, https://github.com/yaml/yaml-test-suite." ON) if(RYML_TEST_SUITE) - set(ed ${CMAKE_CURRENT_BINARY_DIR}/subprojects) # casual ryml extern dir (these projects are not part of ryml and are downloaded and compiled on the fly) - - c4_require_subproject(c4log REMOTE - GIT_REPOSITORY https://github.com/biojppm/c4log - GIT_TAG master) - - set(tsdir ${ed}/yaml-test-suite) c4_download_remote_proj(yaml-test-suite suite_dir GIT_REPOSITORY https://github.com/yaml/yaml-test-suite GIT_TAG data-2022-01-17) @@ -203,6 +196,10 @@ if(RYML_TEST_SUITE) c4_err("cannot find yaml-test-suite at ${suite_dir} -- was there an error downloading the project?") endif() + c4_require_subproject(c4log REMOTE + GIT_REPOSITORY https://github.com/biojppm/c4log + GIT_TAG master) + c4_add_executable(ryml-test-suite SOURCES test_suite.cpp @@ -265,3 +262,129 @@ if(RYML_TEST_SUITE) ryml_add_test_from_suite(${case}) endforeach() endif(RYML_TEST_SUITE) + + +#------------------------------------------------------------------------------ +#------------------------------------------------------------------------------ +#------------------------------------------------------------------------------ + +string(TOUPPER "${CMAKE_BUILD_TYPE}" upper_build_type) +if(upper_build_type STREQUAL FUZZ) + option(RYML_TEST_FUZZ "Enable fuzz tests" ON) +else() + option(RYML_TEST_FUZZ "Enable fuzz tests" OFF) +endif() + +if(RYML_TEST_FUZZ) + c4_download_remote_proj(rapidyaml-data rapidyaml_data_dir + GIT_REPOSITORY https://github.com/biojppm/rapidyaml-data + GIT_TAG master) + if(NOT EXISTS ${rapidyaml_data_dir}/fuzz/yaml.dict) + c4_err("cannot find rapidyaml-data at ${rapidyaml_data_dir} -- was there an error downloading the project?") + endif() + # + set(corpus_suite_dir ${rapidyaml_data_dir}/fuzz/yaml_test_suite) + set(corpus_generated_dir ${rapidyaml_data_dir}/fuzz/yaml_generated) + set(corpus_artifacts_dir ${rapidyaml_data_dir}/fuzz/yaml_artifacts) + set(corpus_merged_dir ${rapidyaml_data_dir}/fuzz/yaml_merged) + set(yaml_dict ${rapidyaml_data_dir}/fuzz/yaml.dict) + file(GLOB_RECURSE fuzz_files RELATIVE "${corpus_artifacts_dir}" "${corpus_artifacts_dir}/*") + file(GLOB_RECURSE suite_files RELATIVE "${corpus_suite_dir}" "${corpus_suite_dir}/*") + # + function(ryml_add_fuzz_test name) + c4_add_executable(ryml-test-fuzz-${name} + SOURCES + test_fuzz/test_fuzz_common.hpp + test_fuzz/test_fuzz_${name}.cpp + test_fuzz/test_fuzz_main.cpp + ${ARGN} + INC_DIRS ${CMAKE_CURRENT_LIST_DIR} + LIBS ryml c4fs + FOLDER test/fuzz) + function(ryml_add_fuzz_test_file name_ dir file) + string(REPLACE "/" "_" fuzz_name "${file}") + add_test(NAME ryml-test-fuzz-${name_}-${fuzz_name} + COMMAND $ ${dir}/${file}) + endfunction() + foreach(fuzz_file ${fuzz_files}) + ryml_add_fuzz_test_file(${name} ${corpus_artifacts_dir} ${fuzz_file}) + endforeach() + if(RYML_DBG) + target_compile_definitions(ryml-test-fuzz-${name} PUBLIC RYML_DBG) + endif() + add_dependencies(ryml-test-build ryml-test-fuzz-${name}) + endfunction() + ryml_add_fuzz_test(parse_emit) + ryml_add_fuzz_test(events + ../test/test_suite/test_suite_event_handler.hpp + ../test/test_suite/test_suite_event_handler.cpp) + # + # + # fuzzing libraries: + # https://llvm.org/docs/LibFuzzer.html + # http://lcamtuf.coredump.cx/afl/ + # https://github.com/AFLplusplus/AFLplusplus + # https://gitlab.com/akihe/radamsa + # + # actions: + # https://google.github.io/clusterfuzzlite/ + # https://github.com/google/oss-fuzz + # + # + # libfuzzer: https://llvm.org/docs/LibFuzzer.html + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + option(RYML_FUZZ_LIBFUZZER_MERGE OFF "merge fuzz corpus") + option(RYML_FUZZ_LIBFUZZER_MERGE_RESUME ON "resume merge") + option(RYML_FUZZ_LIBFUZZER_DICT ON "use a yaml dict") + option(RYML_FUZZ_LIBFUZZER_FIXED_SEED ON "use a fixed seed") + set(RYML_FUZZ_LIBFUZZER_OPTIONS "-timeout=5" CACHE STRING "options for libfuzzer https://llvm.org/docs/LibFuzzer.html#id16") + function(ryml_add_libfuzzer_test name) + c4_add_executable(ryml-test-libfuzzer-${name} + SOURCES + test_fuzz/test_fuzz_common.hpp + test_fuzz/test_fuzz_${name}.cpp + ${ARGN} + CFLAGS -fsanitize=fuzzer + INC_DIRS ${CMAKE_CURRENT_LIST_DIR} + LIBS ryml -fsanitize=fuzzer + FOLDER test/fuzz) + if(RYML_DBG) + target_compile_definitions(ryml-test-libfuzzer-${name} PUBLIC RYML_DBG) + endif() + add_dependencies(ryml-test-build ryml-test-libfuzzer-${name}) + set(corpus_dirs + ${corpus_generated_dir} # generated inputs go here + ${corpus_artifacts_dir} # corpus with crash/timeout artifacts + ${corpus_suite_dir} # corpus with yaml test suite + ) + set(opts) + if(RYML_FUZZ_LIBFUZZER_DICT) + list(APPEND opts "-dict=${yaml_dict}") + endif() + file(MAKE_DIRECTORY ${corpus_merged_dir}) + if(RYML_FUZZ_LIBFUZZER_MERGE) + list(APPEND opts "-merge=1") + if(RYML_FUZZ_LIBFUZZER_MERGE_RESUME) + list(APPEND opts --merge_control_file=${CMAKE_CURRENT_BINARY_DIR}/fuzz_merge_control_file) + endif() + list(PREPEND corpus_dirs ${corpus_merged_dir}) + else() + list(APPEND opts "-merge=0") + endif() + set(cmd $ ${opts} ${RYML_FUZZ_LIBFUZZER_OPTIONS} ${corpus_dirs}) + add_custom_target(ryml-test-libfuzzer-${name}-run + COMMAND ${cmd} + COMMENT "cd\ ${corpus_artifacts_dir}\ ;\ ${cmd}" + WORKING_DIRECTORY ${corpus_artifacts_dir}) # setting the workdir to this will collect the artifacts in there + if(RYML_FUZZ_LIBFUZZER_MERGE) + add_test(NAME ryml-test-fuzz-libfuzzer-${name} + COMMAND ${cmd} + WORKING_DIRECTORY ${corpus_artifacts_dir}) # setting the workdir to this will collect the artifacts in there + endif() + endfunction() + ryml_add_libfuzzer_test(parse_emit) + ryml_add_libfuzzer_test(events + ../test/test_suite/test_suite_event_handler.hpp + ../test/test_suite/test_suite_event_handler.cpp) + endif() +endif() diff --git a/test/test_fuzz/test_fuzz_common.hpp b/test/test_fuzz/test_fuzz_common.hpp new file mode 100644 index 000000000..53857acd0 --- /dev/null +++ b/test/test_fuzz/test_fuzz_common.hpp @@ -0,0 +1,134 @@ +#pragma once +#ifndef TEST_FUZZ_COMMON_H +#define TEST_FUZZ_COMMON_H + +#ifdef RYML_SINGLE_HEADER +#include +#else +#include +#include +#include +#include +#include +#endif +#include +#include +#include + +#ifdef C4_EXCEPTIONS +#include +#else +#include +std::jmp_buf jmp_env = {}; +c4::csubstr jmp_msg = {}; +#endif + + +#ifdef RYML_DBG +#define _if_dbg(...) __VA_ARGS__ +bool report_errors = true; +#else +#define _if_dbg(...) +bool report_errors = false; +#endif + +inline void report_error(const char* msg, size_t length, c4::yml::Location loc, FILE *f) +{ + if(!report_errors) + return; + if(!loc.name.empty()) + { + fwrite(loc.name.str, 1, loc.name.len, f); + fputc(':', f); + } + fprintf(f, "%zu:", loc.line); + if(loc.col) + fprintf(f, "%zu:", loc.col); + if(loc.offset) + fprintf(f, " (%zuB):", loc.offset); + fputc(' ', f); + fprintf(f, "%.*s\n", static_cast(length), msg); + fflush(f); +} + +inline C4_NORETURN void errcallback(const char *msg, size_t msg_len, c4::yml::Location location, void *) +{ + report_error(msg, msg_len, location, stderr); + C4_IF_EXCEPTIONS( + throw std::runtime_error({msg, msg_len}); + , + jmp_msg.assign(msg, msg_len); + std::longjmp(jmp_env, 1); + ); +} + +inline c4::yml::Callbacks create_custom_callbacks() +{ + c4::set_error_flags(c4::ON_ERROR_CALLBACK); + c4::set_error_callback([](const char *msg, size_t msg_len){ + errcallback(msg, msg_len, {}, nullptr); + }); + c4::yml::Callbacks callbacks = {}; + callbacks.m_error = errcallback; + return callbacks; +} + +namespace c4 { +namespace yml { + +inline int fuzztest_parse_emit(uint32_t case_number, csubstr src) +{ + C4_UNUSED(case_number); + set_callbacks(create_custom_callbacks()); + Tree tree(create_custom_callbacks()); + bool parse_success = false; + C4_IF_EXCEPTIONS_(try, if(setjmp(jmp_env) == 0)) + { + RYML_ASSERT(tree.empty()); + _if_dbg(_dbg_printf("in[{}]: [{}]~~~\n{}\n~~~\n", case_number, src.len, src); fflush(NULL)); + parse_in_arena(src, &tree); + parse_success = true; + _if_dbg(print_tree("parsed tree", tree)); + _if_dbg(_dbg_printf("in[{}]: [{}]~~~\n{}\n~~~\n", case_number, src.len, src); fflush(NULL)); + std::string dst = emitrs_yaml(tree); + _if_dbg(_dbg_printf("emitted[{}]: [{}]~~~\n{}\n~~~\n", case_number, dst.size(), to_csubstr(dst)); fflush(NULL)); + C4_DONT_OPTIMIZE(dst); + C4_DONT_OPTIMIZE(parse_success); + } + C4_IF_EXCEPTIONS_(catch(std::exception const&), else) + { + // if an exception leaks from here, it is likely because of a greedy noexcept + _if_dbg(if(parse_success) print_tree("parsed tree", tree)); + return 1; + } + return 0; +} + +inline int fuzztest_yaml_events(uint32_t case_number, csubstr src) +{ + C4_UNUSED(case_number); + set_callbacks(create_custom_callbacks()); + EventHandlerYamlStd::EventSink sink = {}; + EventHandlerYamlStd handler(&sink, create_custom_callbacks()); + ParseEngine parser(&handler); + std::string str(src.begin(), src.end()); + C4_IF_EXCEPTIONS_(try, if(setjmp(jmp_env) == 0)) + { + _if_dbg(_dbg_printf("in[{}]: [{}]~~~\n{}\n~~~\n", case_number, src.len, src); fflush(NULL)); + parser.parse_in_place_ev("input", c4::to_substr(str)); + _if_dbg(_dbg_printf("evts[{}]: ~~~\n{}\n~~~\n", case_number, sink.get()); fflush(NULL)); + C4_DONT_OPTIMIZE(sink); + } + C4_IF_EXCEPTIONS_(catch(std::exception const&), else) + { + // if an exception leaks from here, it is likely because of a greedy noexcept + _if_dbg(fprintf(stdout, "err\n"); fflush(NULL)); + return 1; + } + return 0; +} + +} // namespace yml +} // namespace c4 + +#endif /* TEST_FUZZ_COMMON_H */ diff --git a/test/test_fuzz/test_fuzz_events.cpp b/test/test_fuzz/test_fuzz_events.cpp new file mode 100644 index 000000000..a857a2fe6 --- /dev/null +++ b/test/test_fuzz/test_fuzz_events.cpp @@ -0,0 +1,9 @@ +#include "./test_fuzz_common.hpp" +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *str, size_t len) +{ + static std::atomic case_number{0}; + c4::csubstr src = {reinterpret_cast(str), len}; + return c4::yml::fuzztest_yaml_events(case_number++, src); +} diff --git a/test/test_fuzz/test_fuzz_main.cpp b/test/test_fuzz/test_fuzz_main.cpp new file mode 100644 index 000000000..45f21b28e --- /dev/null +++ b/test/test_fuzz/test_fuzz_main.cpp @@ -0,0 +1,16 @@ +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *str, size_t len); + +int main(int argc, const char *argv[]) +{ + if(argc < 2) + return 1; + const char *filename = argv[1]; + if(!c4::fs::file_exists(filename)) + return 1; + std::string file = c4::fs::file_get_contents(filename); + (void)LLVMFuzzerTestOneInput(reinterpret_cast(&file[0]), file.size()); + return 0; +} diff --git a/test/test_fuzz/test_fuzz_parse_emit.cpp b/test/test_fuzz/test_fuzz_parse_emit.cpp new file mode 100644 index 000000000..dc4c08057 --- /dev/null +++ b/test/test_fuzz/test_fuzz_parse_emit.cpp @@ -0,0 +1,9 @@ +#include "./test_fuzz_common.hpp" +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *str, size_t len) +{ + static std::atomic case_number{0}; + c4::csubstr src = {reinterpret_cast(str), len}; + return c4::yml::fuzztest_parse_emit(case_number++, src); +} diff --git a/test/test_parse_engine_6_qmrk.cpp b/test/test_parse_engine_6_qmrk.cpp index fe182ef29..3f7eec122 100644 --- a/test/test_parse_engine_6_qmrk.cpp +++ b/test/test_parse_engine_6_qmrk.cpp @@ -292,6 +292,32 @@ ENGINE_TEST(Qmrk3, ___(ps.end_stream()); } +ENGINE_TEST(Qmrk4_0, + ("[?baz:,]", + "[{?baz: }]"), + "+STR\n" + "+DOC\n" + "+SEQ []\n" + "+MAP {}\n" + "=VAL :?baz\n" + "=VAL :\n" + "-MAP\n" + "-SEQ\n" + "-DOC\n" + "-STR\n") +{ + ___(ps.begin_stream()); + ___(ps.begin_doc()); + ___(ps.begin_seq_val_flow()); + ___(ps.begin_map_val_flow()); + ___(ps.set_key_scalar_plain("?baz")); + ___(ps.set_val_scalar_plain({})); + ___(ps.end_map()); + ___(ps.end_seq()); + ___(ps.end_doc()); + ___(ps.end_stream()); +} + ENGINE_TEST(Qmrk4, ("[ ? an explicit key, ? foo,? bar,?baz:,?bat]", "[{an explicit key: },{foo: },{bar: },{?baz: },?bat]"), diff --git a/test/test_suite/test_suite_event_handler.cpp b/test/test_suite/test_suite_event_handler.cpp index 55d939f12..b03435ad0 100644 --- a/test/test_suite/test_suite_event_handler.cpp +++ b/test/test_suite/test_suite_event_handler.cpp @@ -21,7 +21,7 @@ void append_escaped(extra::string *es, csubstr val) prev = i + skip; \ } \ while(0) - uint8_t const* C4_RESTRICT s = reinterpret_cast(val.str); + uint8_t const* C4_RESTRICT s = reinterpret_cast(es->get().str); size_t prev = 0; for(size_t i = 0; i < val.len; ++i) { @@ -72,7 +72,7 @@ void append_escaped(extra::string *es, csubstr val) } } // flush the rest - this->append(val.sub(prev)); + es->append(val.sub(prev)); #undef _c4flush_use_instead } diff --git a/test/test_suite/test_suite_event_handler.hpp b/test/test_suite/test_suite_event_handler.hpp index 5730374dd..5f4993952 100644 --- a/test/test_suite/test_suite_event_handler.hpp +++ b/test/test_suite/test_suite_event_handler.hpp @@ -7,9 +7,6 @@ #ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_ #include "c4/yml/event_handler_stack.hpp" #endif -#ifndef _C4_YML_STD_STRING_HPP_ -#include "c4/yml/std/string.hpp" -#endif #ifndef _C4_YML_DETAIL_PRINT_HPP_ #include "c4/yml/detail/print.hpp" #endif @@ -95,8 +92,9 @@ struct EventHandlerYamlStd : public EventHandlerStackflags |= RUNK|RTOP; - for(auto &td : m_tag_directives) + for(TagDirective &td : m_tag_directives) td = {}; + m_val_buffers.clear(); m_val_buffers.resize((size_t)m_stack.size()); m_arena.clear(); m_arena.reserve(1024); @@ -629,20 +627,20 @@ struct EventHandlerYamlStd : public EventHandlerStacklevel < m_val_buffers.size()); - return m_val_buffers[(size_t)m_curr->level]; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_curr->level < m_val_buffers.size()); + return m_val_buffers[m_curr->level]; } EventSink& _buf_(id_type level) noexcept { - _RYML_CB_ASSERT(m_stack.m_callbacks, (size_t)level < m_val_buffers.size()); - return m_val_buffers[(size_t)level]; + _RYML_CB_ASSERT(m_stack.m_callbacks, level < m_val_buffers.size()); + return m_val_buffers[level]; } EventSink const& _buf_(id_type level) const noexcept { - _RYML_CB_ASSERT(m_stack.m_callbacks, (size_t)level < m_val_buffers.size()); - return m_val_buffers[(size_t)level]; + _RYML_CB_ASSERT(m_stack.m_callbacks, level < m_val_buffers.size()); + return m_val_buffers[level]; } static void _buf_flush_to_(EventSink &C4_RESTRICT src, EventSink &C4_RESTRICT dst) noexcept @@ -665,8 +663,8 @@ struct EventHandlerYamlStd : public EventHandlerStack m_val_buffers.size()) - m_val_buffers.resize((size_t)size_needed); + if(size_needed > m_val_buffers.size()) + m_val_buffers.resize(size_needed); } C4_ALWAYS_INLINE void _send_(csubstr s) noexcept { _buf_().append(s); } @@ -678,7 +676,7 @@ struct EventHandlerYamlStd : public EventHandlerStack