From 5b1ebeb310134eba57dac1cd6be7cd2419be59da Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 1 Mar 2021 22:37:52 +0000 Subject: [PATCH] New expression: count_leading_zeros_exprt Rather than ad-hoc handling __builtin_clz and __lzcnt (and their variants) in the C front-end, make counting-leading-zeros available across the code base. --- regression/ansi-c/__builtin_clz-02/main.c | 10 ++ .../__builtin_clz-02/test.desc | 0 .../cbmc-library/__builtin_clz-02/main.c | 12 --- .../__builtin_clz-01/main.c | 4 + regression/cbmc/__builtin_clz-01/test.desc | 10 ++ src/analyses/goto_check.cpp | 37 +++++++- src/ansi-c/c_typecheck_expr.cpp | 25 ++--- src/ansi-c/expr2c.cpp | 3 + src/ansi-c/library/gcc.c | 58 ------------ src/ansi-c/library/windows.c | 43 --------- src/solvers/flattening/boolbv.cpp | 2 + src/solvers/smt2/smt2_conv.cpp | 4 + src/util/bitvector_expr.cpp | 38 ++++++++ src/util/bitvector_expr.h | 93 +++++++++++++++++++ src/util/format_expr.cpp | 2 + src/util/simplify_expr.cpp | 28 ++++++ src/util/simplify_expr_class.h | 4 + 17 files changed, 236 insertions(+), 137 deletions(-) create mode 100644 regression/ansi-c/__builtin_clz-02/main.c rename regression/{cbmc-library => ansi-c}/__builtin_clz-02/test.desc (100%) delete mode 100644 regression/cbmc-library/__builtin_clz-02/main.c rename regression/{cbmc-library => cbmc}/__builtin_clz-01/main.c (90%) create mode 100644 regression/cbmc/__builtin_clz-01/test.desc diff --git a/regression/ansi-c/__builtin_clz-02/main.c b/regression/ansi-c/__builtin_clz-02/main.c new file mode 100644 index 000000000000..af8b5e06167a --- /dev/null +++ b/regression/ansi-c/__builtin_clz-02/main.c @@ -0,0 +1,10 @@ +int main() +{ +#ifdef __GNUC__ + _Static_assert( + __builtin_clz(0xffU) == 8 * sizeof(unsigned) - 8, + "GCC/Clang compile-time constant"); +#endif + + return 0; +} diff --git a/regression/cbmc-library/__builtin_clz-02/test.desc b/regression/ansi-c/__builtin_clz-02/test.desc similarity index 100% rename from regression/cbmc-library/__builtin_clz-02/test.desc rename to regression/ansi-c/__builtin_clz-02/test.desc diff --git a/regression/cbmc-library/__builtin_clz-02/main.c b/regression/cbmc-library/__builtin_clz-02/main.c deleted file mode 100644 index 16416024d609..000000000000 --- a/regression/cbmc-library/__builtin_clz-02/main.c +++ /dev/null @@ -1,12 +0,0 @@ -#ifdef _MSC_VER -# define __builtin_clz(x) __lzcnt(x) -# define _Static_assert static_assert -#endif - -int main() -{ - _Static_assert( - __builtin_clz(0xffU) == 8 * sizeof(unsigned) - 8, "compile-time constant"); - - return 0; -} diff --git a/regression/cbmc-library/__builtin_clz-01/main.c b/regression/cbmc/__builtin_clz-01/main.c similarity index 90% rename from regression/cbmc-library/__builtin_clz-01/main.c rename to regression/cbmc/__builtin_clz-01/main.c index fbae81294d83..6640bc3f9974 100644 --- a/regression/cbmc-library/__builtin_clz-01/main.c +++ b/regression/cbmc/__builtin_clz-01/main.c @@ -56,5 +56,9 @@ int main() __CPROVER_assume(u != 0); assert(nlz2a(u) == __builtin_clz(u)); +#undef __builtin_clz + // a failing assertion should be generated as __builtin_clz is undefined for 0 + __builtin_clz(0U); + return 0; } diff --git a/regression/cbmc/__builtin_clz-01/test.desc b/regression/cbmc/__builtin_clz-01/test.desc new file mode 100644 index 000000000000..7e7552cdc694 --- /dev/null +++ b/regression/cbmc/__builtin_clz-01/test.desc @@ -0,0 +1,10 @@ +CORE +main.c +--bounds-check +^\[main.bit_count.\d+\] line 61 count leading zeros argument in __builtin_clz\(0u\): FAILURE$ +^\*\* 1 of \d+ failed +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^warning: ignoring diff --git a/src/analyses/goto_check.cpp b/src/analyses/goto_check.cpp index 206c130b8963..bc1fd5d66738 100644 --- a/src/analyses/goto_check.cpp +++ b/src/analyses/goto_check.cpp @@ -172,7 +172,9 @@ class goto_checkt using conditionst = std::list; - void bounds_check(const index_exprt &, const guardt &); + void bounds_check(const exprt &, const guardt &); + void bounds_check_index(const index_exprt &, const guardt &); + void bounds_check_clz(const count_leading_zeros_exprt &, const guardt &); void div_by_zero_check(const div_exprt &, const guardt &); void mod_by_zero_check(const mod_exprt &, const guardt &); void mod_overflow_check(const mod_exprt &, const guardt &); @@ -1321,9 +1323,7 @@ std::string goto_checkt::array_name(const exprt &expr) return ::array_name(ns, expr); } -void goto_checkt::bounds_check( - const index_exprt &expr, - const guardt &guard) +void goto_checkt::bounds_check(const exprt &expr, const guardt &guard) { if(!enable_bounds_check) return; @@ -1335,6 +1335,16 @@ void goto_checkt::bounds_check( return; } + if(expr.id() == ID_index) + bounds_check_index(to_index_expr(expr), guard); + else if(expr.id() == ID_count_leading_zeros) + bounds_check_clz(to_count_leading_zeros_expr(expr), guard); +} + +void goto_checkt::bounds_check_index( + const index_exprt &expr, + const guardt &guard) +{ typet array_type = expr.array().type(); if(array_type.id()==ID_pointer) @@ -1510,6 +1520,19 @@ void goto_checkt::bounds_check( } } +void goto_checkt::bounds_check_clz( + const count_leading_zeros_exprt &expr, + const guardt &guard) +{ + add_guarded_property( + notequal_exprt{expr.op(), from_integer(0, expr.op().type())}, + "count leading zeros argument", + "bit count", + expr.find_source_location(), + expr, + guard); +} + void goto_checkt::add_guarded_property( const exprt &asserted_expr, const std::string &comment, @@ -1729,7 +1752,7 @@ void goto_checkt::check_rec(const exprt &expr, guardt &guard) if(expr.id()==ID_index) { - bounds_check(to_index_expr(expr), guard); + bounds_check(expr, guard); } else if(expr.id()==ID_div) { @@ -1766,6 +1789,10 @@ void goto_checkt::check_rec(const exprt &expr, guardt &guard) { pointer_primitive_check(expr, guard); } + else if(expr.id() == ID_count_leading_zeros) + { + bounds_check(expr, guard); + } } void goto_checkt::check(const exprt &expr) diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index 7eae7abcbf68..406671379a94 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -2689,27 +2689,14 @@ exprt c_typecheck_baset::do_special_functions( throw 0; } - side_effect_expr_function_callt try_constant{expr}; - typecheck_function_call_arguments(try_constant); - exprt argument = try_constant.arguments().front(); - simplify(argument, *this); - const auto int_constant = numeric_cast(argument); - - if( - !int_constant.has_value() || *int_constant == 0 || - argument.type().id() != ID_unsignedbv) - { - return nil_exprt{}; - } + typecheck_function_call_arguments(expr); - const std::string binary_value = integer2binary( - *int_constant, to_unsignedbv_type(argument.type()).get_width()); - std::size_t n_leading_zeros = binary_value.find('1'); - CHECK_RETURN(n_leading_zeros != std::string::npos); + count_leading_zeros_exprt clz{expr.arguments().front(), + has_prefix(id2string(identifier), "__lzcnt"), + expr.type()}; + clz.add_source_location() = source_location; - return from_integer( - n_leading_zeros, - to_code_type(try_constant.function().type()).return_type()); + return std::move(clz); } else if(identifier==CPROVER_PREFIX "equal") { diff --git a/src/ansi-c/expr2c.cpp b/src/ansi-c/expr2c.cpp index 028d759aa053..bf2835f184fb 100644 --- a/src/ansi-c/expr2c.cpp +++ b/src/ansi-c/expr2c.cpp @@ -3896,6 +3896,9 @@ std::string expr2ct::convert_with_precedence( else if(src.id()==ID_type) return convert(src.type()); + else if(src.id() == ID_count_leading_zeros) + return convert_function(src, "__builtin_clz"); + // no C language expression for internal representation return convert_norep(src, precedence); } diff --git a/src/ansi-c/library/gcc.c b/src/ansi-c/library/gcc.c index 9b652c923938..c9f736e45c63 100644 --- a/src/ansi-c/library/gcc.c +++ b/src/ansi-c/library/gcc.c @@ -33,64 +33,6 @@ inline void __sync_synchronize(void) #endif } -/* FUNCTION: __builtin_clz */ - -int __builtin_popcount(unsigned int); - -inline int __builtin_clz(unsigned int x) -{ - __CPROVER_precondition(x != 0, "__builtin_clz(0) is undefined"); - - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - if(sizeof(x) >= 4) - x = x | (x >> 16); - - return __builtin_popcount(~x); -} - -/* FUNCTION: __builtin_clzl */ - -int __builtin_popcountl(unsigned long int); - -inline int __builtin_clzl(unsigned long int x) -{ - __CPROVER_precondition(x != 0, "__builtin_clzl(0) is undefined"); - - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - if(sizeof(x) >= 4) - x = x | (x >> 16); - if(sizeof(x) >= 8) - x = x | (x >> 32); - - return __builtin_popcountl(~x); -} - -/* FUNCTION: __builtin_clzll */ - -int __builtin_popcountll(unsigned long long int); - -inline int __builtin_clzll(unsigned long long int x) -{ - __CPROVER_precondition(x != 0, "__builtin_clzll(0) is undefined"); - - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - if(sizeof(x) >= 4) - x = x | (x >> 16); - if(sizeof(x) >= 8) - x = x | (x >> 32); - - return __builtin_popcountll(~x); -} - /* FUNCTION: __builtin_ffs */ int __builtin_clz(unsigned int x); diff --git a/src/ansi-c/library/windows.c b/src/ansi-c/library/windows.c index aa9ed115b628..1c6b5e758bf5 100644 --- a/src/ansi-c/library/windows.c +++ b/src/ansi-c/library/windows.c @@ -51,46 +51,3 @@ inline HANDLE CreateThread( return handle; } #endif - -/* FUNCTION: __lzcnt16 */ - -#ifdef _MSC_VER -int __builtin_clz(unsigned int x); - -unsigned short __lzcnt16(unsigned short value) -{ - if(value == 0) - return 16; - - return __builtin_clz(value) - - (sizeof(unsigned int) - sizeof(unsigned short)) * 8; -} -#endif - -/* FUNCTION: __lzcnt */ - -#ifdef _MSC_VER -int __builtin_clz(unsigned int x); - -unsigned int __lzcnt(unsigned int value) -{ - if(value == 0) - return 32; - - return __builtin_clz(value); -} -#endif - -/* FUNCTION: __lzcnt64 */ - -#ifdef _MSC_VER -int __builtin_clzll(unsigned long long x); - -unsigned __int64 __lzcnt64(unsigned __int64 value) -{ - if(value == 0) - return 64; - - return __builtin_clzll(value); -} -#endif diff --git a/src/solvers/flattening/boolbv.cpp b/src/solvers/flattening/boolbv.cpp index 5529ec544684..37bf8d80de4a 100644 --- a/src/solvers/flattening/boolbv.cpp +++ b/src/solvers/flattening/boolbv.cpp @@ -228,6 +228,8 @@ bvt boolbvt::convert_bitvector(const exprt &expr) return convert_power(to_binary_expr(expr)); else if(expr.id() == ID_popcount) return convert_bv(to_popcount_expr(expr).lower()); + else if(expr.id() == ID_count_leading_zeros) + return convert_bv(to_count_leading_zeros_expr(expr).lower()); return conversion_failed(expr); } diff --git a/src/solvers/smt2/smt2_conv.cpp b/src/solvers/smt2/smt2_conv.cpp index 0ce62235126f..9adb969612f4 100644 --- a/src/solvers/smt2/smt2_conv.cpp +++ b/src/solvers/smt2/smt2_conv.cpp @@ -1986,6 +1986,10 @@ void smt2_convt::convert_expr(const exprt &expr) { convert_expr(to_popcount_expr(expr).lower()); } + else if(expr.id() == ID_count_leading_zeros) + { + convert_expr(to_count_leading_zeros_expr(expr).lower()); + } else INVARIANT_WITH_DIAGNOSTICS( false, diff --git a/src/util/bitvector_expr.cpp b/src/util/bitvector_expr.cpp index 1d9ab256d4d2..6fa6742db900 100644 --- a/src/util/bitvector_expr.cpp +++ b/src/util/bitvector_expr.cpp @@ -87,3 +87,41 @@ exprt popcount_exprt::lower() const // the result is restricted to the result type return typecast_exprt::conditional_cast(x, type()); } + +exprt count_leading_zeros_exprt::lower() const +{ + // x = x | (x >> 1); + // x = x | (x >> 2); + // x = x | (x >> 4); + // x = x | (x >> 8); + // etc. + // return popcount(~x); + + // make sure the operand width is a power of two + exprt x = op(); + const auto x_width = to_bitvector_type(x.type()).get_width(); + CHECK_RETURN(x_width >= 1); + const std::size_t bits = address_bits(x_width); + const std::size_t new_width = numeric_cast_v(power(2, bits)); + + const bool need_typecast = + new_width > x_width || x.type().id() != ID_unsignedbv; + + if(need_typecast) + x = typecast_exprt(x, unsignedbv_typet(new_width)); + + // repeatedly compute x = x | (x >> shift) + for(std::size_t shift = 1; shift < new_width; shift <<= 1) + { + // x >> shift + lshr_exprt shifted_x( + x, from_integer(shift, unsignedbv_typet(address_bits(shift) + 1))); + // build the expression + x = bitor_exprt{x, shifted_x}; + } + + // the result is restricted to the result type + return popcount_exprt{ + bitnot_exprt{typecast_exprt::conditional_cast(x, op().type())}, type()} + .lower(); +} diff --git a/src/util/bitvector_expr.h b/src/util/bitvector_expr.h index 61510f3bf3e9..727ad213c100 100644 --- a/src/util/bitvector_expr.h +++ b/src/util/bitvector_expr.h @@ -822,4 +822,97 @@ inline unary_overflow_exprt &to_unary_overflow_expr(exprt &expr) return ret; } +/// \brief The count leading zeros (counting the number of zero bits starting +/// from the most-significant bit) expression. When \c zero_permitted is set to +/// false, goto_checkt must generate an assertion that the operand does not +/// evaluates to zero. The result is always defined, even for zero (where the +/// result is the bit width). +class count_leading_zeros_exprt : public unary_exprt +{ +public: + count_leading_zeros_exprt(exprt _op, bool _zero_permitted, typet _type) + : unary_exprt(ID_count_leading_zeros, std::move(_op), std::move(_type)) + { + zero_permitted(_zero_permitted); + } + + explicit count_leading_zeros_exprt(const exprt &_op) + : count_leading_zeros_exprt(_op, true, _op.type()) + { + } + + bool zero_permitted() const + { + return !get_bool(ID_C_bounds_check); + } + + void zero_permitted(bool value) + { + set(ID_C_bounds_check, !value); + } + + static void check( + const exprt &expr, + const validation_modet vm = validation_modet::INVARIANT) + { + DATA_CHECK( + vm, + expr.operands().size() == 1, + "unary expression must have a single operand"); + DATA_CHECK( + vm, + can_cast_type(to_unary_expr(expr).op().type()), + "operand must be of bitvector type"); + } + + static void validate( + const exprt &expr, + const namespacet &, + const validation_modet vm = validation_modet::INVARIANT) + { + check(expr, vm); + } + + /// Lower a count_leading_zeros_exprt to arithmetic and logic expressions. + /// \return Semantically equivalent expression + exprt lower() const; +}; + +template <> +inline bool can_cast_expr(const exprt &base) +{ + return base.id() == ID_count_leading_zeros; +} + +inline void validate_expr(const count_leading_zeros_exprt &value) +{ + validate_operands(value, 1, "count_leading_zeros must have one operand"); +} + +/// \brief Cast an exprt to a \ref count_leading_zeros_exprt +/// +/// \a expr must be known to be \ref count_leading_zeros_exprt. +/// +/// \param expr: Source expression +/// \return Object of type \ref count_leading_zeros_exprt +inline const count_leading_zeros_exprt & +to_count_leading_zeros_expr(const exprt &expr) +{ + PRECONDITION(expr.id() == ID_count_leading_zeros); + const count_leading_zeros_exprt &ret = + static_cast(expr); + validate_expr(ret); + return ret; +} + +/// \copydoc to_count_leading_zeros_expr(const exprt &) +inline count_leading_zeros_exprt &to_count_leading_zeros_expr(exprt &expr) +{ + PRECONDITION(expr.id() == ID_count_leading_zeros); + count_leading_zeros_exprt &ret = + static_cast(expr); + validate_expr(ret); + return ret; +} + #endif // CPROVER_UTIL_BITVECTOR_EXPR_H diff --git a/src/util/format_expr.cpp b/src/util/format_expr.cpp index 9311f6227e6c..d43cd8b4f785 100644 --- a/src/util/format_expr.cpp +++ b/src/util/format_expr.cpp @@ -151,6 +151,8 @@ static std::ostream &format_rec(std::ostream &os, const unary_exprt &src) os << u8"\u00ac"; // neg, U+00AC else if(src.id() == ID_unary_minus) os << '-'; + else if(src.id() == ID_count_leading_zeros) + os << "clz"; else return os << src.pretty(); diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index d3dfa08743df..a23dde76f80f 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -148,6 +148,30 @@ simplify_exprt::simplify_popcount(const popcount_exprt &expr) return unchanged(expr); } +simplify_exprt::resultt<> +simplify_exprt::simplify_clz(const count_leading_zeros_exprt &expr) +{ + const auto const_bits_opt = expr2bits( + expr.op(), byte_extract_id() == ID_byte_extract_little_endian, ns); + + if(!const_bits_opt.has_value()) + return unchanged(expr); + + // expr2bits generates a bit string starting with the least-significant bit + std::size_t n_leading_zeros = const_bits_opt->rfind('1'); + if(n_leading_zeros == std::string::npos) + { + if(!expr.zero_permitted()) + return unchanged(expr); + + n_leading_zeros = const_bits_opt->size(); + } + else + n_leading_zeros = const_bits_opt->size() - n_leading_zeros - 1; + + return from_integer(n_leading_zeros, expr.type()); +} + /// Simplify String.endsWith function when arguments are constant /// \param expr: the expression to simplify /// \param ns: namespace @@ -2393,6 +2417,10 @@ simplify_exprt::resultt<> simplify_exprt::simplify_node(exprt node) { r = simplify_popcount(to_popcount_expr(expr)); } + else if(expr.id() == ID_count_leading_zeros) + { + r = simplify_clz(to_count_leading_zeros_expr(expr)); + } else if(expr.id() == ID_function_application) { r = simplify_function_application(to_function_application_expr(expr)); diff --git a/src/util/simplify_expr_class.h b/src/util/simplify_expr_class.h index 7b281b2408ac..030318ee443d 100644 --- a/src/util/simplify_expr_class.h +++ b/src/util/simplify_expr_class.h @@ -37,6 +37,7 @@ class bswap_exprt; class byte_extract_exprt; class byte_update_exprt; class concatenation_exprt; +class count_leading_zeros_exprt; class dereference_exprt; class div_exprt; class exprt; @@ -202,6 +203,9 @@ class simplify_exprt NODISCARD resultt<> simplify_function_application(const function_application_exprt &); + /// Try to simplify count-leading-zeros to a constant expression. + NODISCARD resultt<> simplify_clz(const count_leading_zeros_exprt &); + // auxiliary bool simplify_if_implies( exprt &expr, const exprt &cond, bool truth, bool &new_truth);