From 0bfdfb6e97a1b8f164ea64e380558a5b89838264 Mon Sep 17 00:00:00 2001 From: Innokentii Sennovskii Date: Mon, 5 Dec 2022 16:49:26 +0000 Subject: [PATCH] Fuzzing mode + bigfield, safe_uint and field fuzzers take 2 (#1787) * Enabled fuzzer mode * Added safe_uint fuzzer * Optimized check_circuit method in turbo_composer for more efficient fuzzing * Added fixes to circumvent assert checks when fuzzing with asserts * Added bigfield fuzzer * Added assert circumvention to bigfield fuzzer * Add fuzzer enhancements * Fixing stuff broken by low-memory prover changes * Documentation changes * Add byte_array fuzzer * Add bit_array fuzzer * bit_array fuzzer: Support the SLICE operation * bit_array fuzzer: Support the SET operation * byte_array fuzzer: Support the SET operation * field_t fuzzer: Support the SET operation * byte_array fuzzer: Invoke safe_uint_t-based constructor in SET operation * byte_array fuzzer: Fix invocation of safe_uint_t-based constructor * bigfield_t fuzzer: Support the SET operation * field_t fuzzer: Call additional field_t functions - invert() - accumulate() - assert_is_in_set() - decompose_into_bits() - operator bool_t() * bigfield_t fuzzer: Additional conversions in the SET operation * byte_array fuzzer: Invoke field_t constructor with variable byte size * Add uint fuzzer * uint fuzzer: Test all types (u8, u16, u32, u64) simultaneously * uint fuzzer: Implement the NOT opcode * uint fuzzer: Ensure uint256_t returned by get_value() has appropriate width * uint fuzzer: Add explicit sanity checks around byte_array_t/field_t conversions * uint fuzzer: Make methods static or const were appropriate * Build fuzzers with different composers * Add missing file * bigfield fuzzer: Add invariant check * Fix writeInstruction in bit_array fuzzer * Add bool fuzzer * bigfield fuzzer: Fix call to dual field_t constructor * bigfield fuzzer: Avoid divisions by zero * bigfield fuzzer: Fix call to dual field_t constructor * bigfield fuzzer: Multi-numerator division * bigfield fuzzer: Assert bigfield_t context pointer is not NULL * bigfield fuzzer: Fix multi-numerator division * Fixed cpp/hpp issue * Applied Guido's postprocessing fix * Added check_circuit fix for turbocomposer * Slightly updated docs * Disabled check_circuit error printing in Turbo in fuzzing mode * Updated toolchain * Disabling executables in a different way * More cmake optimizations * A few more makelist updates * A bit more * Removed free space Co-authored-by: Guido Vranken --- CMakeLists.txt | 24 + README.md | 16 + cmake/module.cmake | 26 +- cmake/toolchain.cmake | 14 +- docs/Fuzzing.md | 100 + src/aztec/common/fuzzer.hpp | 598 +++++ src/aztec/common/fuzzer_constants.hpp | 7 + src/aztec/plonk/composer/turbo_composer.cpp | 65 +- src/aztec/plonk/composer/turbo_composer.hpp | 2 +- .../turbo_arithmetic_widget.hpp | 14 + .../turbo_fixed_base_widget.hpp | 14 + .../transition_widgets/turbo_logic_widget.hpp | 13 + .../transition_widgets/turbo_range_widget.hpp | 13 + src/aztec/rollup/CMakeLists.txt | 4 +- src/aztec/rollup/proofs/CMakeLists.txt | 2 +- .../primitives/bigfield/bigfield.fuzzer.hpp | 1970 ++++++++++++++++ .../bigfield/bigfield_all.fuzzer.cpp | 3 + .../bigfield/bigfield_standard.fuzzer.cpp | 3 + .../bigfield/bigfield_turbo.fuzzer.cpp | 3 + .../primitives/bit_array/bit_array.fuzzer.hpp | 922 ++++++++ .../bit_array/bit_array_all.fuzzer.cpp | 3 + .../bit_array/bit_array_standard.fuzzer.cpp | 3 + .../bit_array/bit_array_turbo.fuzzer.cpp | 3 + .../stdlib/primitives/bool/bool.fuzzer.hpp | 884 ++++++++ .../primitives/bool/bool_all.fuzzer.cpp | 3 + .../primitives/bool/bool_standard.fuzzer.cpp | 3 + .../primitives/bool/bool_turbo.fuzzer.cpp | 3 + .../byte_array/byte_array.fuzzer.hpp | 970 ++++++++ .../byte_array/byte_array_all.fuzzer.cpp | 3 + .../byte_array/byte_array_standard.fuzzer.cpp | 3 + .../byte_array/byte_array_turbo.fuzzer.cpp | 3 + .../stdlib/primitives/field/field.fuzzer.hpp | 2019 +++++++++++++++++ .../primitives/field/field_all.fuzzer.cpp | 3 + .../field/field_standard.fuzzer.cpp | 3 + .../primitives/field/field_turbo.fuzzer.cpp | 3 + .../primitives/safe_uint/safe_uint.fuzzer.hpp | 1457 ++++++++++++ .../safe_uint/safe_uint_all.fuzzer.cpp | 3 + .../safe_uint/safe_uint_standard.fuzzer.cpp | 3 + .../safe_uint/safe_uint_turbo.fuzzer.cpp | 3 + .../stdlib/primitives/uint/uint.fuzzer.hpp | 1585 +++++++++++++ .../primitives/uint/uint_all.fuzzer.cpp | 3 + .../primitives/uint/uint_standard.fuzzer.cpp | 3 + .../primitives/uint/uint_turbo.fuzzer.cpp | 3 + 43 files changed, 10749 insertions(+), 33 deletions(-) create mode 100644 docs/Fuzzing.md create mode 100644 src/aztec/common/fuzzer.hpp create mode 100644 src/aztec/common/fuzzer_constants.hpp create mode 100644 src/aztec/stdlib/primitives/bigfield/bigfield.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/bigfield/bigfield_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bigfield/bigfield_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bigfield/bigfield_turbo.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bit_array/bit_array.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/bit_array/bit_array_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bit_array/bit_array_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bit_array/bit_array_turbo.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bool/bool.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/bool/bool_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bool/bool_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/bool/bool_turbo.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/byte_array/byte_array.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/byte_array/byte_array_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/byte_array/byte_array_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/byte_array/byte_array_turbo.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/field/field.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/field/field_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/field/field_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/field/field_turbo.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/safe_uint/safe_uint.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/safe_uint/safe_uint_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/safe_uint/safe_uint_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/safe_uint/safe_uint_turbo.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/uint/uint.fuzzer.hpp create mode 100644 src/aztec/stdlib/primitives/uint/uint_all.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/uint/uint_standard.fuzzer.cpp create mode 100644 src/aztec/stdlib/primitives/uint/uint_turbo.fuzzer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c9a883faf5..5805f2db924 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,30 @@ if(ARM) set(RUN_HAVE_POSIX_REGEX 0) endif() +if(FUZZING) + add_definitions(-DFUZZING=1) + + if(DISABLE_CUSTOM_MUTATORS) + add_definitions(-DDISABLE_CUSTOM_MUTATORS=1) + endif() + + set(SANITIZER_OPTIONS "") + + if(ADDRESS_SANITIZER) + set(SANITIZER_OPTIONS ${SANITIZER_OPTIONS} -fsanitize=address) + endif() + + if(UNDEFINED_BEHAVIOUR_SANITIZER) + set(SANITIZER_OPTIONS ${SANITIZER_OPTIONS} -fsanitize=undefined -fno-sanitize=alignment) + endif() + + add_compile_options(-fsanitize=fuzzer-no-link ${SANITIZER_OPTIONS}) + + set(WASM OFF) + set(BENCHMARKS OFF) + set(TESTING OFF) +endif() + if(WASM) message(STATUS "Compiling for WebAssembly.") set(DISABLE_ASM ON) diff --git a/README.md b/README.md index bb3ac28d2a7..9e478a22eae 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ CMake can be passed various build options on it's command line: - `-DTESTING=ON | OFF`: Enable/disable building of tests. - `-DBENCHMARK=ON | OFF`: Enable/disable building of benchmarks. - `-DTOOLCHAIN=`: Use one of the preconfigured toolchains. +- `-DFUZZING=ON | OFF`: Enable building various fuzzers. ### WASM build @@ -117,3 +118,18 @@ Tests can be built and run like: make ecc_tests wasmtime --dir=.. ./bin/ecc_tests ``` + +### Fuzzing build + +To build: +``` +mkdir build-fuzzing && cd build-fuzzing +cmake -DTOOLCHAIN=x86_64-linux-clang -DFUZZING=ON .. +make +``` +Fuzzing build turns off building tests and benchmarks, since they are incompatible with libfuzzer interface. + +To turn on address sanitizer add `-DADDRESS_SANITIZER=ON`. Note that address sanitizer can be used to explore crashes. +Sometimes you might have to specify the address of llvm-symbolizer. You have to do it with `export ASAN_SYMBOLIZER_PATH=`. +For undefined behaviour sanitizer `-DUNDEFINED_BEHAVIOUR_SANITIZER=ON`. +Note that the fuzzer can be orders of magnitude slower with ASan (2-3x slower) or UBSan on, so it is best to run a non-sanitized build first, minimize the testcase and then run it for a bit of time with sanitizers. diff --git a/cmake/module.cmake b/cmake/module.cmake index a4edd3cf9f9..063e3f690ad 100644 --- a/cmake/module.cmake +++ b/cmake/module.cmake @@ -15,7 +15,7 @@ function(barretenberg_module MODULE_NAME) file(GLOB_RECURSE SOURCE_FILES *.cpp) file(GLOB_RECURSE HEADER_FILES *.hpp) - list(FILTER SOURCE_FILES EXCLUDE REGEX ".*\.(test|bench).cpp$") + list(FILTER SOURCE_FILES EXCLUDE REGEX ".*\.(fuzzer|test|bench).cpp$") if(SOURCE_FILES) add_library( @@ -103,6 +103,30 @@ function(barretenberg_module MODULE_NAME) ) endif() + file(GLOB_RECURSE FUZZERS_SOURCE_FILES *.fuzzer.cpp) + if(FUZZING AND FUZZERS_SOURCE_FILES) + foreach(FUZZER_SOURCE_FILE ${FUZZERS_SOURCE_FILES}) + get_filename_component(FUZZER_NAME_STEM ${FUZZER_SOURCE_FILE} NAME_WE) + add_executable( + ${MODULE_NAME}_${FUZZER_NAME_STEM}_fuzzer + ${FUZZER_SOURCE_FILE} + ) + + target_link_options( + ${MODULE_NAME}_${FUZZER_NAME_STEM}_fuzzer + PRIVATE + "-fsanitize=fuzzer" + ${SANITIZER_OPTIONS} + ) + + target_link_libraries( + ${MODULE_NAME}_${FUZZER_NAME_STEM}_fuzzer + PRIVATE + ${MODULE_LINK_NAME} + ) + endforeach() + endif() + file(GLOB_RECURSE BENCH_SOURCE_FILES *.bench.cpp) if(BENCHMARKS AND BENCH_SOURCE_FILES) add_library( diff --git a/cmake/toolchain.cmake b/cmake/toolchain.cmake index 9419a7d199c..4de860fac49 100644 --- a/cmake/toolchain.cmake +++ b/cmake/toolchain.cmake @@ -1,6 +1,10 @@ -if(NOT TOOLCHAIN) - set(TOOLCHAIN "x86_64-linux-clang" CACHE STRING "Build toolchain." FORCE) -endif() -message(STATUS "Toolchain: ${TOOLCHAIN}") +if (CMAKE_C_COMPILER AND CMAKE_CXX_COMPILER) + message(STATUS "Toolchain: manually chosen ${CMAKE_C_COMPILER} and ${CMAKE_CXX_COMPILER}") +else() + if(NOT TOOLCHAIN) + set(TOOLCHAIN "x86_64-linux-clang" CACHE STRING "Build toolchain." FORCE) + endif() + message(STATUS "Toolchain: ${TOOLCHAIN}") -include("./cmake/toolchains/${TOOLCHAIN}.cmake") \ No newline at end of file + include("./cmake/toolchains/${TOOLCHAIN}.cmake") +endif() \ No newline at end of file diff --git a/docs/Fuzzing.md b/docs/Fuzzing.md new file mode 100644 index 00000000000..629e7526ea2 --- /dev/null +++ b/docs/Fuzzing.md @@ -0,0 +1,100 @@ +# Fuzzing barretenberg +## Intro +We are gradually introducing fuzzing of various primitives into barretenberg, focusing first and foremost on in-cicruit types. If you are developing / patching a primitive and there is a fuzzer available for it, please take the time to update the fuzzer (if you've added new functionality) and run it for at least a few hours to increase security. + +## Build + +To build with standard clang: +``` +mkdir build-fuzzing && cd build-fuzzing +cmake -DFUZZING=ON .. +make +``` +Fuzzing build turns off building tests and benchmarks, since they are incompatible with libfuzzer interface. + +To turn on address sanitizer add `-DADDRESS_SANITIZER=ON`. Note that address sanitizer can be used to explore crashes. +Sometimes you might have to specify the address of llvm-symbolizer. You have to do it with `export ASAN_SYMBOLIZER_PATH=`. +For undefined behaviour sanitizer `-DUNDEFINED_BEHAVIOUR_SANITIZER=ON`. +Note that the fuzzer can be orders of magnitude slower with ASan (2-3x slower) or UBSan on, so it is best to run a non-sanitized build first, minimize the testcase and then run it for a bit of time with sanitizers. + +Building with clang 13 or later is recommended, since libfuzzer contains and by default utilizes the entropic power schedule, which is considered more efficient +than the standard one present in previous versions. +You can downloadload the latest clang+llvm release here: https://github.com/llvm/llvm-project/releases + +To set up cmake with another version of clang and fuzzing on: + +```bash +cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER= -DCMAKE_CXX_COMPILER= -DFUZZING=ON .. +``` + +## Currently supported +Currently we have fuzzers for bigfield, bit_array, bool, byte_array, field, safe_uint and uint. Each of them is available in 3 versions: StandardPlonk, TurboPlonk, ALL (differential fuzzing of 2 versions). +To compile all fuzzers just type `make`. +## Running the fuzzer +TODO: add information about saved testcases + +You can simply run a fuzzer by executing the built executable, for example, +```bash +./bin/stdlib_primitives_bigfield_turbo_fuzzer +``` +This is useful if you added a feature/instruction or changed some logic and want to quickly test if there are any really bad bugs. +To run the fuzzer seriously, I'd recommend: +```bash +mkdir ../../../_testcases; +mkdir crashes; +./bin/ -timeout=1 -len_control=500 -workers=8 -jobs=8 -entropic=1 -shrink=1 -artifact_prefix=crashes/ -use_value_profile=1 ../../../_testcases +``` +You can watch the progress of the fuzzer in one of the generated logs fuzz-.log +The purpose of each parameter: ++ -timeout=1 - If a testcase takes more than 1 second to execute, it will be treated as a crash ++ -len_control=500 - Slows down the increase of testcase size. Especially important for heavy classes like bigfield, keeps the number of executions per second at a decent rate ++ -worker=8 - The number of threads that can simultaneously execute testcases. Should be less or equal to the number of jobs ++ -jobs=8 - After how many crashes the fuzzer will stop fuzzing. If a crash is executed and the number of jobs is more than workers then the fuzzer will proceed to give the worker a new job. The 8/8 worker/job configuration ensures that the fuzzer will quit after 8 crashes and until the first crash all the workers are busy. ++ -entropic=1 - Entropic should be enabled by default, but in case it isn't, enable it. A better power schedule than the old one. ++ -shrink=1 - If a new testcase is encountered that has the same coverage as some previous one in the corpus and the testcase is smaller, replace the one in the corpus with the new one. Helps keep exec/s higher. ++ -artifact_prefix=crashes/ - Where to save crashes/timeouts/ooms. ++ -use_value_profile=1 - Leverage libfuzzer internal CMP analysis. Very useful, but blows the corpus up. ++ (../../../_testcases) - The path to the folder, where corpus testcases are going to be saved and loaded from (also loads testcases from there at the start of fuzzing). + +Log structure is described here https://llvm.org/docs/LibFuzzer.html + +If you've found an issue, stopped the fuzzer, you can minimize the corpus to get rid of repetitions and then start from a minimized corpus + +```bash +mkdir ../../../_testcases_minimized; +./bin/ -merge=1 -use_value_profile=1 ../../../_testcases_minimized ../../../_testcases; +rm ../../../_testcases/*; +cp ../../../_testcases_minimized/* ../../../_testcases; +``` + +If you've found a crash, you can minimize the crash to make the root cause more obvious: +```bash +mkdir minimized_crashes +./bin/ -minimize_crash=1 -artifact_prefix=minimized_crashes +``` +Also, both bigfield and safeuint fuzzer containt the SHOW_INFORMATION preprocessor cases, which enable the printing of instructions and values to make debugging the crash easier. + +# Coverage reports + +Build with coverage instrumentation: + +```cpp +mkdir build-coverage/ +cd build-coverage/ +cmake -DFUZZING=ON -DCMAKE_CXX_FLAGS="-fprofile-instr-generate -fcoverage-mapping" .. +make -j$(nproc) +``` + +Then run the fuzzer on the corpus and generate the HTML coverage reports: + +``` +LLVM_PROFILE_FILE="coverage.profraw" ./bin/ corpus/ -runs=1 +llvm-profdata merge -sparse coverage.profraw -o coverage.profdata +llvm-cov show -output-dir=out/report -format=html ./bin/ -instr-profile=coverage.profdata +``` + +View the coverage reports with your web browser: + +``` +python3 -m http.server --directory out/ +``` diff --git a/src/aztec/common/fuzzer.hpp b/src/aztec/common/fuzzer.hpp new file mode 100644 index 00000000000..9be101f4008 --- /dev/null +++ b/src/aztec/common/fuzzer.hpp @@ -0,0 +1,598 @@ +#pragma once +#include +#include +#include + +#define PARENS () + +// Rescan macro tokens 256 times +#define EXPAND(arg) EXPAND1(EXPAND1(EXPAND1(EXPAND1(arg)))) +#define EXPAND1(arg) EXPAND2(EXPAND2(EXPAND2(EXPAND2(arg)))) +#define EXPAND2(arg) EXPAND3(EXPAND3(EXPAND3(EXPAND3(arg)))) +#define EXPAND3(arg) EXPAND4(EXPAND4(EXPAND4(EXPAND4(arg)))) +#define EXPAND4(arg) arg + +#define FOR_EACH(macro, ...) __VA_OPT__(EXPAND(FOR_EACH_HELPER(macro, __VA_ARGS__))) +#define FOR_EACH_HELPER(macro, a1, ...) macro(a1) __VA_OPT__(FOR_EACH_AGAIN PARENS(macro, __VA_ARGS__)) +#define FOR_EACH_AGAIN() FOR_EACH_HELPER + +#define ALL_POSSIBLE_OPCODES \ + CONSTANT, WITNESS, CONSTANT_WITNESS, ADD, SUBTRACT, MULTIPLY, DIVIDE, ADD_TWO, MADD, MULT_MADD, MSUB_DIV, SQR, \ + ASSERT_EQUAL, ASSERT_NOT_EQUAL, SQR_ADD, ASSERT_EQUAL, ASSERT_NOT_EQUAL, SQR_ADD, SUBTRACT_WITH_CONSTRAINT, \ + DIVIDE_WITH_CONSTRAINTS, SLICE, ASSERT_ZERO, ASSERT_NOT_ZERO, COND_NEGATE, ADD_MULTI, ASSERT_VALID, \ + COND_SELECT, DOUBLE, RANDOMSEED, SELECT_IF_ZERO, SELECT_IF_EQ, REVERSE, GET_BIT, SET_BIT, SET, INVERT, AND, \ + OR, XOR, MODULO, SHL, SHR, ROL, ROR, NOT + +struct HavocSettings { + size_t GEN_LLVM_POST_MUTATION_PROB; // Controls frequency of additional mutation after structural ones + size_t GEN_MUTATION_COUNT_LOG; // This is the logarithm of the number of micromutations applied during mutation of a + // testcase + size_t GEN_STRUCTURAL_MUTATION_PROBABILITY; // The probability of applying a structural mutation + // (DELETION/DUPLICATION/INSERTION/SWAP) + size_t GEN_VALUE_MUTATION_PROBABILITY; // The probability of applying a value mutation + size_t ST_MUT_DELETION_PROBABILITY; // The probability of applying DELETION mutation + size_t ST_MUT_DUPLICATION_PROBABILITY; // The probability of applying DUPLICATION mutation + size_t ST_MUT_INSERTION_PROBABILITY; // The probability of applying INSERTION mutation + size_t ST_MUT_MAXIMUM_DELETION_LOG; // The logarithm of the maximum of deletions + size_t ST_MUT_MAXIMUM_DUPLICATION_LOG; // The logarithm of the maximum of duplication + size_t ST_MUT_SWAP_PROBABILITY; // The probability of a SWAP mutation + size_t VAL_MUT_LLVM_MUTATE_PROBABILITY; // The probablity of using the LLVM mutator on field element value + size_t VAL_MUT_MONTGOMERY_PROBABILITY; // The probability of converting to montgomery form before applying value + // mutations + size_t VAL_MUT_NON_MONTGOMERY_PROBABILITY; // The probability of not converting to montgomery form before applying + // value mutations + size_t VAL_MUT_SMALL_ADDITION_PROBABILITY; // The probability of performing small additions + size_t VAL_MUT_SMALL_MULTIPLICATION_PROBABILITY; // The probability of performing small multiplications + size_t VAL_MUT_SPECIAL_VALUE_PROBABILITY; // The probability of assigning special values (0,1, p-1, p-2, p-1/2) + std::vector structural_mutation_distribution; // Holds the values to quickly select a structural mutation + // based on chosen probabilities + std::vector value_mutation_distribution; // Holds the values to quickly select a value mutation based on + // chosen probabilities +}; +#ifdef HAVOC_TESTING + +HavocSettings fuzzer_havoc_settings; +#endif +// This is an external function in Libfuzzer used internally by custom mutators +extern "C" size_t LLVMFuzzerMutate(uint8_t* Data, size_t Size, size_t MaxSize); + +/** + * @brief Class for quickly deterministically creating new random values. We don't care about distribution much here. + * + */ +class FastRandom { + uint32_t state; + + public: + FastRandom(uint32_t seed) { reseed(seed); } + uint32_t next() + { + state = static_cast((uint64_t(state) * uint64_t(363364578) + uint64_t(537)) % uint64_t(3758096939)); + return state; + } + void reseed(uint32_t seed) + { + if (seed == 0) { + seed = 1; + } + state = seed; + } +}; + +/** + * @brief Concept for a simple PRNG which returns a uint32_t when next is called + * + * @tparam T + */ +template concept SimpleRng = requires(T a) +{ + { + a.next() + } + ->std::convertible_to; +}; +/** + * @brief Concept for forcing ArgumentSizes to be size_t + * + * @tparam T + */ +template concept InstructionArgumentSizes = requires +{ + { + std::make_tuple(T::CONSTANT, + T::WITNESS, + T::CONSTANT_WITNESS, + T::ADD, + T::SUBTRACT, + T::MULTIPLY, + T::DIVIDE, + T::ADD_TWO, + T::MADD, + T::MULT_MADD, + T::MSUB_DIV, + T::SQR, + T::SQR_ADD, + T::SUBTRACT_WITH_CONSTRAINT, + T::DIVIDE_WITH_CONSTRAINTS, + T::SLICE, + T::ASSERT_ZERO, + T::ASSERT_NOT_ZERO) + } + ->std::same_as>; +}; + +/** + * @brief Concept for Havoc Configurations + * + * @tparam T + */ +template concept HavocConfigConstraint = requires +{ + { + std::make_tuple(T::GEN_MUTATION_COUNT_LOG, T::GEN_STRUCTURAL_MUTATION_PROBABILITY) + } + ->std::same_as>; + T::GEN_MUTATION_COUNT_LOG <= 7; +}; +/** + * @brief Concept specifying the class used by the fuzzer + * + * @tparam T + */ +template concept ArithmeticFuzzHelperConstraint = requires +{ + typename T::ArgSizes; + typename T::Instruction; + typename T::ExecutionState; + typename T::ExecutionHandler; + InstructionArgumentSizes; + // HavocConfigConstraint; +}; + +/** + * @brief Fuzzer uses only composers with check_circuit function + * + * @tparam T + */ +template concept CheckableComposer = requires(T a) +{ + { + a.check_circuit() + } + ->std::same_as; +}; + +/** + * @brief The fuzzer can use a postprocessing function that is specific to the type being fuzzed + * + * @tparam T Type being tested + * @tparam Composer + * @tparam Context The class containing the full context + */ +template +concept PostProcessingEnabled = requires(Composer composer, Context context) +{ + { + T::postProcess(&composer, context) + } + ->std::same_as; +}; + +/** + * @brief This concept is used when we want to limit the number of executions of certain instructions (for example, + * divisions and multiplications in bigfield start to bog down the fuzzer) + * + * @tparam T + */ +template concept InstructionWeightsEnabled = requires +{ + typename T::InstructionWeights; + T::InstructionWeights::_LIMIT; +}; +/** + * @brief A templated class containing most of the fuzzing logic for a generic Arithmetic class + * + * @tparam T + */ +template requires ArithmeticFuzzHelperConstraint class ArithmeticFuzzHelper { + private: + /** + * @brief Mutator swapping two instructions together + * + * @param instructions + * @param rng + */ + inline static void swapTwoInstructions(std::vector& instructions, FastRandom& rng) + { + const size_t instructions_count = instructions.size(); + if (instructions_count <= 2) { + return; + } + const size_t first_element_index = rng.next() % instructions_count; + size_t second_element_index = rng.next() % instructions_count; + if (first_element_index == second_element_index) { + second_element_index = (second_element_index + 1) % instructions_count; + } + std::iter_swap(instructions.begin() + static_cast(first_element_index), + instructions.begin() + static_cast(second_element_index)); + } + + /** + * @brief Mutator, deleting a sequence of instructions + * + * @param instructions + * @param rng + * @param havoc_settings + */ + inline static void deleteInstructions(std::vector& instructions, + FastRandom& rng, + HavocSettings& havoc_settings) + { + + const size_t instructions_count = instructions.size(); + if (instructions_count == 0) { + return; + } + if (rng.next() & 1) { + instructions.erase(instructions.begin() + (rng.next() % instructions_count)); + } else { + // We get the logarithm of number of instructions and subtract 1 to delete at most half + const size_t max_deletion_log = + std::min(static_cast(64 - __builtin_clzll(static_cast(instructions_count)) - 1), + havoc_settings.ST_MUT_MAXIMUM_DELETION_LOG); + + if (max_deletion_log == 0) { + return; + } + const size_t deletion_size = 1 << (rng.next() % max_deletion_log); + const size_t start = rng.next() % (instructions_count + 1 - deletion_size); + instructions.erase(instructions.begin() + static_cast(start), + instructions.begin() + static_cast(start + deletion_size)); + } + } + /** + * @brief Mutator duplicating an instruction + * + * @param instructions + * @param rng + * @param havoc_settings + */ + inline static void duplicateInstruction(std::vector& instructions, + FastRandom& rng, + HavocSettings& havoc_settings) + { + const size_t instructions_count = instructions.size(); + if (instructions_count == 0) { + return; + } + const size_t duplication_size = 1 << (rng.next() % havoc_settings.ST_MUT_MAXIMUM_DUPLICATION_LOG); + typename T::Instruction chosen_instruction = instructions[rng.next() % instructions_count]; + instructions.insert( + instructions.begin() + (rng.next() % (instructions_count + 1)), duplication_size, chosen_instruction); + } + inline static void insertRandomInstruction(std::vector& instructions, + FastRandom& rng, + HavocSettings& havoc_settings) + { + (void)havoc_settings; + instructions.insert(instructions.begin() + static_cast(rng.next() % (instructions.size() + 1)), + T::Instruction::template generateRandom(rng)); + } + /** + * @brief Mutator for instruction structure + * + * @param instructions + * @param rng + * @param havoc_settings + */ + inline static void mutateInstructionStructure(std::vector& instructions, + FastRandom& rng, + HavocSettings& havoc_settings) + { + const size_t structural_mutators_count = havoc_settings.structural_mutation_distribution.size(); + const size_t prob_pool = havoc_settings.structural_mutation_distribution[structural_mutators_count - 1]; + const size_t choice = rng.next() % prob_pool; + if (choice < havoc_settings.structural_mutation_distribution[0]) { + deleteInstructions(instructions, rng, havoc_settings); + } else if (choice < havoc_settings.structural_mutation_distribution[1]) { + + duplicateInstruction(instructions, rng, havoc_settings); + } else if (choice < havoc_settings.structural_mutation_distribution[2]) { + insertRandomInstruction(instructions, rng, havoc_settings); + } else { + + swapTwoInstructions(instructions, rng); + } + } + /** + * @brief Choose a random instruction from the vector and mutate it + * + * @param instructions Vector of instructions + * @param rng Pseudorandom number generator + * @param havoc_settings Mutation settings + */ + inline static void mutateInstructionValue(std::vector& instructions, + FastRandom& rng, + HavocSettings& havoc_settings) + { + + const size_t instructions_count = instructions.size(); + if (instructions_count == 0) { + return; + } + const size_t chosen = rng.next() % instructions_count; + instructions[chosen] = + T::Instruction::template mutateInstruction(instructions[chosen], rng, havoc_settings); + } + + static void mutateInstructionVector(std::vector& instructions, FastRandom& rng) + { +#ifdef HAVOC_TESTING + // If we are testing which havoc settings are best, then we use global parameters + const size_t mutation_count = 1 << fuzzer_havoc_settings.GEN_MUTATION_COUNT_LOG; +#else + const size_t mutation_count = 1 << T::HavocConfig::MUTATION_COUNT_LOG; + HavocSettings fuzzer_havoc_settings; + // FILL the values +#endif + for (size_t i = 0; i < mutation_count; i++) { + uint32_t val = rng.next(); + if ((val % (fuzzer_havoc_settings.GEN_STRUCTURAL_MUTATION_PROBABILITY + + fuzzer_havoc_settings.GEN_VALUE_MUTATION_PROBABILITY)) < + fuzzer_havoc_settings.GEN_STRUCTURAL_MUTATION_PROBABILITY) { + // mutate structure + mutateInstructionStructure(instructions, rng, fuzzer_havoc_settings); + } else { + // mutate a single instruction vector + + mutateInstructionValue(instructions, rng, fuzzer_havoc_settings); + } + } + } + + public: + /** + * @brief Splice two instruction vectors into one randomly + * + * @param vecA First instruction vector + * @param vecB Second instruction vector + * @param rng PRNG + * @return Resulting vector of instructions + */ + static std::vector crossoverInstructionVector( + const std::vector& vecA, + const std::vector& vecB, + FastRandom& rng) + { + // Get vector sizes + const size_t vecA_size = vecA.size(); + const size_t vecB_size = vecB.size(); + // If one of them is empty, just return the other one + if (vecA_size == 0) { + return vecB; + } + if (vecB_size == 0) { + return vecA; + } + std::vector result; + // Choose the size of th resulting vector + const size_t final_result_size = rng.next() % (vecA_size + vecB_size) + 1; + size_t indexA = 0, indexB = 0; + size_t* inIndex = &indexA; + size_t inSize = vecA_size; + auto inIterator = vecA.begin(); + size_t current_result_size = 0; + bool currentlyUsingA = true; + // What we do is basically pick a sequence from one, follow with a sequence from the other + while (current_result_size < final_result_size && (indexA < vecA_size || indexB < vecB_size)) { + // Get the size left + size_t result_size_left = final_result_size - current_result_size; + // If we can still read from this vector + if (*inIndex < inSize) { + // Get the size left in this vector and in the output vector and pick the lowest + size_t inSizeLeft = inSize - *inIndex; + size_t maxExtraSize = std::min(result_size_left, inSizeLeft); + if (maxExtraSize != 0) { + // If not zero, get a random number of elements from input + size_t copySize = (rng.next() % maxExtraSize) + 1; + result.insert(result.begin() + static_cast(current_result_size), + inIterator + static_cast((*inIndex)), + + inIterator + static_cast((*inIndex) + copySize)); + // Update indexes and sizes + *inIndex += copySize; + current_result_size += copySize; + } + } + // Switch input vector + inIndex = currentlyUsingA ? &indexB : &indexA; + inSize = currentlyUsingA ? vecB_size : vecA_size; + inIterator = currentlyUsingA ? vecB.begin() : vecA.begin(); + currentlyUsingA = !currentlyUsingA; + } + // Return spliced vector + return result; + } + /** + * @brief Parses a given data buffer into a vector of instructions for testing the arithmetic + * + * @param Data Pointer to the data buffer + * @param Size Data buffer size + * @return A vector of instructions + */ + static std::vector parseDataIntoInstructions(const uint8_t* Data, size_t Size) + { + std::vector fuzzingInstructions; + uint8_t* pData = (uint8_t*)Data; + size_t size_left = Size; + while (size_left != 0) { + uint8_t chosen_operation = *pData; + size_left -= 1; + pData++; + // If the opcode is enabled (exists and arguments' size is not -1), check if it's the right opcode. If it + // is, parse it with a designated function +#define PARSE_OPCODE(name) \ + if constexpr (requires { T::ArgSizes::name; }) \ + if constexpr (T::ArgSizes::name != size_t(-1)) { \ + if (chosen_operation == T::Instruction::OPCODE::name) { \ + if (size_left < T::ArgSizes::name) { \ + return fuzzingInstructions; \ + } \ + fuzzingInstructions.push_back( \ + T::Parser::template parseInstructionArgs(pData)); \ + size_left -= T::ArgSizes::name; \ + pData += T::ArgSizes::name; \ + continue; \ + } \ + } + // Create handlers for all opcodes that are in ArgsSizes +#define PARSE_ALL_OPCODES(...) FOR_EACH(PARSE_OPCODE, __VA_ARGS__) + + PARSE_ALL_OPCODES(ALL_POSSIBLE_OPCODES) + } + return fuzzingInstructions; + } + /** + * @brief Write instructions into the buffer until there are no instructions left or there is no more space + * + * @param instructions Vector of fuzzing instructions + * @param Data Pointer to data buffer + * @param MaxSize Size of buffer + * @return How much of the buffer was filled with instructions + */ + static size_t writeInstructionsToBuffer(std::vector& instructions, + uint8_t* Data, + size_t MaxSize) + { + uint8_t* pData = Data; + size_t size_left = MaxSize; + for (auto& instruction : instructions) { + // If the opcode is enabled and it's this opcode, use a designated function to serialize it +#define WRITE_OPCODE_IF(name) \ + if constexpr (requires { T::ArgSizes::name; }) \ + if constexpr (T::ArgSizes::name != (size_t)-1) { \ + if (instruction.id == T::Instruction::OPCODE::name) { \ + if (size_left >= (T::ArgSizes::name + 1)) { \ + T::Parser::template writeInstruction(instruction, pData); \ + size_left -= (T::ArgSizes::name + 1); \ + pData += (T::ArgSizes::name + 1); \ + } else { \ + return MaxSize - size_left; \ + } \ + continue; \ + } \ + } + // Create handlers for all opcodes that are in ArgsSizes +#define WRITE_ALL_OPCODES(...) FOR_EACH(WRITE_OPCODE_IF, __VA_ARGS__) + + WRITE_ALL_OPCODES(ALL_POSSIBLE_OPCODES) + } + return MaxSize - size_left; + } + + /** + * @brief Execute instructions in a loop + * + * @tparam Composer composer used + * @param instructions + */ + template + inline static void executeInstructions( + std::vector& instructions) requires CheckableComposer + { + typename T::ExecutionState state; + Composer composer = Composer(); + circuit_should_fail = false; + size_t total_instruction_weight = 0; + (void)total_instruction_weight; + for (auto& instruction : instructions) { + // If instruction enabled and this is it, delegate to the handler +#define EXECUTE_OPCODE_IF(name) \ + if constexpr (requires { T::ArgSizes::name; }) \ + if constexpr (T::ArgSizes::name != size_t(-1)) { \ + if (instruction.id == T::Instruction::OPCODE::name) { \ + if constexpr (InstructionWeightsEnabled) { \ + if (!((total_instruction_weight + T::InstructionWeights::name) > T::InstructionWeights::_LIMIT)) { \ + total_instruction_weight += T::InstructionWeights::name; \ + if (T::ExecutionHandler::execute_##name(&composer, state, instruction)) { \ + return; \ + } \ + } else { \ + return; \ + } \ + } else { \ + \ + if (T::ExecutionHandler::execute_##name(&composer, state, instruction)) { \ + return; \ + } \ + } \ + } \ + } +#define EXECUTE_ALL_OPCODES(...) FOR_EACH(EXECUTE_OPCODE_IF, __VA_ARGS__) + + EXECUTE_ALL_OPCODES(ALL_POSSIBLE_OPCODES) + } + bool final_value_check = true; + // If there is a posprocessing function, use it + if constexpr (PostProcessingEnabled>) { + final_value_check = T::postProcess(&composer, state); +#ifdef SHOW_INFORMATION + if (!final_value_check) { + std::cerr << "Final value check failed" << std::endl; + } +#endif + } + bool check_result = composer.check_circuit() && final_value_check; + // If the circuit is correct, but it should fail, abort + if (check_result && circuit_should_fail) { + abort(); + } + // If the circuit is incorrect, but there's no reason, abort + if ((!check_result) && (!circuit_should_fail)) { + if (!final_value_check) { + std::cerr << "Final value check failed" << std::endl; + } else { + std::cerr << "Circuit failed" << std::endl; + } + + abort(); + } + } + + /** + * @brief Interpret the data buffer as a series of arithmetic instructions and mutate it accordingly + * + * @param Data Pointer to the buffer + * @param Size The initial filled size + * @param MaxSize The size of the buffer + * @return size_t The new length of data in the buffer + */ + static size_t MutateInstructionBuffer(uint8_t* Data, size_t Size, size_t MaxSize, FastRandom& rng) + { + // Parse the vector + std::vector instructions = parseDataIntoInstructions(Data, Size); + // Mutate the vector of instructions + mutateInstructionVector(instructions, rng); + // Serialize the vector of instructions back to buffer + return writeInstructionsToBuffer(instructions, Data, MaxSize); + } +}; + +template