diff --git a/ir/attrs.cpp b/ir/attrs.cpp index 7b99a0a70..310d042a9 100644 --- a/ir/attrs.cpp +++ b/ir/attrs.cpp @@ -48,6 +48,10 @@ ostream& operator<<(ostream &os, const ParamAttrs &attr) { os << "allocptr "; if (attr.has(ParamAttrs::AllocAlign)) os << "allocalign "; + if (attr.has(ParamAttrs::DeadOnUnwind)) + os << "dead_on_unwind "; + if (attr.has(ParamAttrs::Writable)) + os << "writable "; return os; } @@ -293,8 +297,9 @@ bool ParamAttrs::refinedBy(const ParamAttrs &other) const { unsigned attrs = ByVal | Dereferenceable | + DereferenceableOrNull | NoUndef | - DereferenceableOrNull + Writable ; auto other_params = (other.bits & attrs); @@ -317,8 +322,11 @@ bool ParamAttrs::refinedBy(const ParamAttrs &other) const { } bool ParamAttrs::poisonImpliesUB() const { - return has(Dereferenceable) || has(NoUndef) || has(ByVal) || - has(DereferenceableOrNull); + return has(ByVal) || + has(Dereferenceable) || + has(DereferenceableOrNull) || + has(NoUndef) || + has(Writable); } uint64_t ParamAttrs::getDerefBytes() const { @@ -342,7 +350,8 @@ void ParamAttrs::merge(const ParamAttrs &other) { static expr encodePtrAttrs(State &s, const expr &ptrvalue, uint64_t derefBytes, uint64_t derefOrNullBytes, uint64_t align, bool nonnull, - bool nocapture, const expr &allocsize, Value *allocalign) { + bool nocapture, bool writable, const expr &allocsize, + Value *allocalign) { auto &m = s.getMemory(); Pointer p(m, ptrvalue); expr non_poison(true); @@ -356,15 +365,16 @@ encodePtrAttrs(State &s, const expr &ptrvalue, uint64_t derefBytes, // dereferenceable, byval (ParamAttrs), dereferenceable_or_null if (derefBytes) s.addUB(merge(Pointer(m, ptrvalue) - .isDereferenceable(derefBytes, align, false, true))); + .isDereferenceable(derefBytes, align, writable, true))); if (derefOrNullBytes) s.addUB(p.isNull() || merge(Pointer(m, ptrvalue) - .isDereferenceable(derefOrNullBytes, align, false,true))); + .isDereferenceable(derefOrNullBytes, align, writable, + true))); if (allocsize.isValid()) s.addUB(p.isNull() || merge(Pointer(m, ptrvalue) - .isDereferenceable(allocsize, align, false, true))); + .isDereferenceable(allocsize, align, writable, true))); } else if (align != 1) non_poison &= Pointer(m, ptrvalue).isAligned(align); @@ -391,7 +401,7 @@ StateValue ParamAttrs::encode(State &s, StateValue &&val, const Type &ty) const{ if (ty.isPtrType()) val.non_poison &= encodePtrAttrs(s, val.value, getDerefBytes(), derefOrNullBytes, align, - has(NonNull), has(NoCapture), {}, nullptr); + has(NonNull), has(NoCapture), has(Writable), {}, nullptr); if (poisonImpliesUB()) { s.addUB(std::move(val.non_poison)); @@ -494,7 +504,7 @@ StateValue FnAttrs::encode(State &s, StateValue &&val, const Type &ty, if (ty.isPtrType()) val.non_poison &= encodePtrAttrs(s, val.value, derefBytes, derefOrNullBytes, align, - has(NonNull), false, allocsize, allocalign); + has(NonNull), false, false, allocsize, allocalign); if (poisonImpliesUB()) { s.addUB(std::move(val.non_poison)); diff --git a/ir/attrs.h b/ir/attrs.h index 655b98bb3..8e9cfacfe 100644 --- a/ir/attrs.h +++ b/ir/attrs.h @@ -67,7 +67,8 @@ class ParamAttrs final { NoAlias = 1<<9, DereferenceableOrNull = 1<<10, AllocPtr = 1<<11, AllocAlign = 1<<12, ZeroExt = 1<<13, SignExt = 1<<14, InReg = 1<<15, - NoFPClass = 1<<16, + NoFPClass = 1<<16, DeadOnUnwind = 1<<17, + Writable = 1<<18, IsArg = 1<<31 // used internally to make values as arguments }; diff --git a/ir/globals.cpp b/ir/globals.cpp index 96fa26f66..984f03e0a 100644 --- a/ir/globals.cpp +++ b/ir/globals.cpp @@ -26,6 +26,7 @@ unsigned bits_program_pointer = 64; unsigned bits_size_t = 64; unsigned bits_ptr_address = 64; unsigned bits_byte = 8; +unsigned num_sub_byte_bits = 6; unsigned strlen_unroll_cnt = 8; unsigned memcmp_unroll_cnt = 8; bool little_endian = true; diff --git a/ir/globals.h b/ir/globals.h index 9eed595fb..95e98ea55 100644 --- a/ir/globals.h +++ b/ir/globals.h @@ -51,6 +51,10 @@ extern unsigned bits_ptr_address; /// Number of bits for a byte. extern unsigned bits_byte; +/// Required bits to store the size of sub-byte accesses +/// (e.g., store i5 -> we record 4, so 3 bits) +extern unsigned num_sub_byte_bits; + extern unsigned strlen_unroll_cnt; extern unsigned memcmp_unroll_cnt; diff --git a/ir/instr.cpp b/ir/instr.cpp index 215ca3039..016e4115a 100644 --- a/ir/instr.cpp +++ b/ir/instr.cpp @@ -949,7 +949,16 @@ vector UnaryOp::operands() const { } bool UnaryOp::propagatesPoison() const { - return true; + switch (op) { + case Copy: + case BitReverse: + case BSwap: + case Ctpop: + case FFS: + return true; + case IsConstant: return false; + } + UNREACHABLE(); } bool UnaryOp::hasSideEffects() const { @@ -3503,11 +3512,13 @@ MemInstr::ByteAccessInfo::get(const Type &t, bool store, unsigned align) { info.doesPtrStore = ptr_access && store; info.doesPtrLoad = ptr_access && !store; info.byteSize = gcd(align, getCommonAccessSize(t)); + if (auto intTy = t.getAsIntType()) + info.subByteAccess = intTy->maxSubBitAccess(); return info; } MemInstr::ByteAccessInfo MemInstr::ByteAccessInfo::full(unsigned byteSize) { - return { true, true, true, true, byteSize }; + return { true, true, true, true, byteSize, 0 }; } diff --git a/ir/instr.h b/ir/instr.h index 921546b85..57fd75e50 100644 --- a/ir/instr.h +++ b/ir/instr.h @@ -682,6 +682,8 @@ class MemInstr : public Instr { // Otherwise, bytes of a memory can be widened to this size. unsigned byteSize = 0; + unsigned subByteAccess = 0; + bool doesMemAccess() const { return byteSize; } static ByteAccessInfo intOnly(unsigned byteSize); diff --git a/ir/memory.cpp b/ir/memory.cpp index e853de095..4502f7b5c 100644 --- a/ir/memory.cpp +++ b/ir/memory.cpp @@ -109,6 +109,18 @@ static unsigned next_const_bid; static unsigned next_global_bid; static unsigned next_ptr_input; + +static unsigned size_byte_number() { + if (!num_sub_byte_bits) + return 0; + return + max(ilog2_ceil(divide_up(1 << num_sub_byte_bits, bits_byte), false), 1u); +} + +static unsigned sub_byte_bits() { + return num_sub_byte_bits + size_byte_number(); +} + static bool byte_has_ptr_bit() { return true; } @@ -125,7 +137,8 @@ static unsigned padding_ptr_byte() { static unsigned padding_nonptr_byte() { return - Byte::bitsByte() - byte_has_ptr_bit() - bits_byte - bits_poison_per_byte; + Byte::bitsByte() - byte_has_ptr_bit() - bits_byte - bits_poison_per_byte + - num_sub_byte_bits - size_byte_number(); } static expr concat_if(const expr &ifvalid, expr &&e) { @@ -217,7 +230,9 @@ Byte::Byte(const Memory &m, const StateValue &ptr, unsigned i) : m(m) { assert(!ptr.isValid() || p.bits() == bitsByte()); } -Byte::Byte(const Memory &m, const StateValue &v) : m(m) { +Byte::Byte(const Memory &m, const StateValue &v, unsigned bits_read, + unsigned byte_number) + : m(m) { assert(!v.isValid() || v.value.bits() == bits_byte); assert(!v.isValid() || v.non_poison.isBool() || v.non_poison.bits() == bits_poison_per_byte); @@ -234,12 +249,19 @@ Byte::Byte(const Memory &m, const StateValue &v) : m(m) { expr::mkInt(-1, bits_poison_per_byte), expr::mkUInt(0, bits_poison_per_byte)) : v.non_poison; - p = concat_if(p, np.concat(v.value).concat_zeros(padding_nonptr_byte())); + p = concat_if(p, np.concat(v.value)); + if (num_sub_byte_bits) { + if ((bits_read % 8) == 0) + bits_read = 0; + p = p.concat(expr::mkUInt(bits_read, num_sub_byte_bits) + .concat(expr::mkUInt(byte_number, size_byte_number()))); + } + p = p.concat_zeros(padding_nonptr_byte()); assert(!p.isValid() || p.bits() == bitsByte()); } Byte Byte::mkPoisonByte(const Memory &m) { - return { m, StateValue(expr::mkUInt(0, bits_byte), false) }; + return { m, StateValue(expr::mkUInt(0, bits_byte), false), 0, true }; } expr Byte::isPtr() const { @@ -248,7 +270,7 @@ expr Byte::isPtr() const { expr Byte::ptrNonpoison() const { auto bit = p.bits() - 1 - byte_has_ptr_bit(); - return p.extract(bit, bit) == 1; + return isAsmMode() ? expr(true) : p.extract(bit, bit) == 1; } Pointer Byte::ptr() const { @@ -276,7 +298,10 @@ expr Byte::ptrByteoffset() const { expr Byte::nonptrNonpoison() const { if (!does_int_mem_access) return expr::mkUInt(0, 1); - unsigned start = padding_nonptr_byte() + bits_byte; + if (isAsmMode()) + return expr::mkInt(-1, bits_poison_per_byte); + + unsigned start = padding_nonptr_byte() + bits_byte + sub_byte_bits(); return p.extract(start + bits_poison_per_byte - 1, start); } @@ -288,13 +313,25 @@ expr Byte::boolNonptrNonpoison() const { expr Byte::nonptrValue() const { if (!does_int_mem_access) return expr::mkUInt(0, bits_byte); - unsigned start = padding_nonptr_byte(); + unsigned start = padding_nonptr_byte() + sub_byte_bits(); return p.extract(start + bits_byte - 1, start); } +expr Byte::numStoredBits() const { + unsigned start = padding_nonptr_byte() + size_byte_number(); + return p.extract(start + num_sub_byte_bits - 1, start); +} + +expr Byte::byteNumber() const { + unsigned start = padding_nonptr_byte(); + return p.extract(start + size_byte_number() - 1, start); +} + expr Byte::isPoison() const { if (!does_int_mem_access) return does_ptr_mem_access ? !ptrNonpoison() : true; + if (isAsmMode()) + return false; expr np = nonptrNonpoison(); if (byte_has_ptr_bit() && bits_poison_per_byte == 1) { @@ -305,6 +342,8 @@ expr Byte::isPoison() const { } expr Byte::nonPoison() const { + if (isAsmMode()) + return expr::mkInt(-1, bits_poison_per_byte); if (!does_int_mem_access) return ptrNonpoison(); @@ -323,6 +362,10 @@ expr Byte::isZero() const { return expr::mkIf(isPtr(), ptr().isNull(), nonptrValue() == 0); } +bool Byte::isAsmMode() const { + return m.isAsmMode(); +} + expr Byte::castPtrToInt() const { auto offset = ptrByteoffset().zextOrTrunc(bits_ptr_address); offset = offset * expr::mkUInt(bits_byte, offset); @@ -403,7 +446,8 @@ expr Byte::refined(const Byte &other) const { unsigned Byte::bitsByte() { unsigned ptr_bits = does_ptr_mem_access * (1 + Pointer::totalBits() + bits_ptr_byte_offset()); - unsigned int_bits = does_int_mem_access * (bits_byte + bits_poison_per_byte); + unsigned int_bits = does_int_mem_access * (bits_byte + bits_poison_per_byte) + + sub_byte_bits(); // allow at least 1 bit if there's no memory access return max(1u, byte_has_ptr_bit() + max(ptr_bits, int_bits)); } @@ -419,10 +463,11 @@ ostream& operator<<(ostream &os, const Byte &byte) { } else { auto np = byte.nonptrNonpoison(); auto val = byte.nonptrValue(); + if (np.isZero()) + return os << "poison"; + if (np.isAllOnes()) { val.printHexadecimal(os); - } else if (np.isZero()) { - os << "poison"; } else { os << "#b"; for (unsigned i = 0; i < bits_poison_per_byte; ++i) { @@ -432,6 +477,14 @@ ostream& operator<<(ostream &os, const Byte &byte) { os << (is_poison ? 'p' : (v ? '1' : '0')); } } + + uint64_t num_bits; + if (num_sub_byte_bits && + byte.numStoredBits().isUInt(num_bits) && num_bits != 0) { + os << " / written with " << num_bits << " bits"; + if (byte.byteNumber().isUInt(num_bits)) + os << " / byte #" << num_bits; + } } return os; } @@ -484,14 +537,14 @@ static void pad(StateValue &v, unsigned amount, State &s) { } static vector valueToBytes(const StateValue &val, const Type &fromType, - const Memory &mem, State *s) { + const Memory &mem, State &s) { vector bytes; if (fromType.isPtrType()) { Pointer p(mem, val.value); unsigned bytesize = bits_program_pointer / bits_byte; // constant global can't store pointers that alias with local blocks - if (s->isInitializationPhase() && !p.isLocal().isFalse()) { + if (s.isInitializationPhase() && !p.isLocal().isFalse()) { expr bid = expr::mkUInt(0, 1).concat(p.getShortBid()); p = Pointer(mem, bid, p.getOffset(), p.getAttrs()); } @@ -500,11 +553,16 @@ static vector valueToBytes(const StateValue &val, const Type &fromType, bytes.emplace_back(mem, StateValue(expr(p()), expr(val.non_poison)), i); } else { assert(!fromType.isAggregateType() || isNonPtrVector(fromType)); - StateValue bvval = fromType.toInt(*s, val); + StateValue bvval = fromType.toInt(s, val); unsigned bitsize = bvval.bits(); unsigned bytesize = divide_up(bitsize, bits_byte); - pad(bvval, bytesize * bits_byte - bitsize, *s); + // There are no sub-byte accesses in assembly + if (mem.isAsmMode() && (bitsize % 8) != 0) { + s.addUB(expr(false)); + } + + pad(bvval, bytesize * bits_byte - bitsize, s); unsigned np_mul = bits_poison_per_byte; for (unsigned i = 0; i < bytesize; ++i) { @@ -512,7 +570,7 @@ static vector valueToBytes(const StateValue &val, const Type &fromType, bvval.value.extract((i + 1) * bits_byte - 1, i * bits_byte), bvval.non_poison.extract((i + 1) * np_mul - 1, i * np_mul) }; - bytes.emplace_back(mem, data); + bytes.emplace_back(mem, data, bitsize, i); } } return bytes; @@ -530,6 +588,8 @@ static StateValue bytesToValue(const Memory &m, const vector &bytes, return std::move(e); }; + bool is_asm = m.isAsmMode(); + if (toType.isPtrType()) { assert(bytes.size() == bits_program_pointer / bits_byte); expr loaded_ptr, is_ptr; @@ -559,6 +619,8 @@ static StateValue bytesToValue(const Memory &m, const vector &bytes, b.nonptrValue() == 0)); non_poison &= !b.isPoison(); } + if (is_asm) + non_poison = true; // if bits of loaded ptr are a subset of the non-ptr value, // we know they must be zero otherwise the value is poison. @@ -578,17 +640,25 @@ static StateValue bytesToValue(const Memory &m, const vector &bytes, assert(!toType.isAggregateType() || isNonPtrVector(toType)); auto bitsize = toType.bits(); assert(divide_up(bitsize, bits_byte) == bytes.size()); - bool is_asm = m.isAsmMode(); StateValue val; bool first = true; IntType ibyteTy("", bits_byte); + unsigned byte_number = 0; for (auto &b: bytes) { - expr isptr = ub_pre(!b.isPtr()); + expr expr_np = ub_pre(!b.isPtr()); + + if (num_sub_byte_bits) { + unsigned bits = (bitsize % 8) == 0 ? 0 : bitsize; + expr_np &= b.numStoredBits() == bits; + expr_np &= b.byteNumber() == byte_number++; + } + if (is_asm) + expr_np = true; + StateValue v(is_asm ? b.forceCastToInt() : b.nonptrValue(), - is_asm ? b.nonPoison() - : ibyteTy.combine_poison(isptr, b.nonptrNonpoison())); + ibyteTy.combine_poison(expr_np, b.nonptrNonpoison())); val = first ? std::move(v) : v.concat(val); first = false; } @@ -1160,14 +1230,39 @@ void Memory::mkNonPoisonAxioms(bool local) { } } +expr Memory::mkSubByteZExtStoreCond(const Byte &val, const Byte &val2) const { + bool mk_axiom = &val == &val2; + + // optimization: the initial block is assumed to follow the ABI already. + if (!mk_axiom && isInitialMemBlock(val.p, true)) + return true; + + bool always_1_byte = bits_byte >= (1u << num_sub_byte_bits); + auto stored_bits = val.numStoredBits(); + auto num_bytes + = always_1_byte ? expr::mkUInt(0, size_byte_number()) + : stored_bits.udiv(expr::mkUInt(bits_byte, stored_bits)) + .zextOrTrunc(size_byte_number()); + auto leftover_bits + = (always_1_byte ? stored_bits + : stored_bits.urem(expr::mkUInt(bits_byte, stored_bits)) + ).zextOrTrunc(bits_byte); + + return val.isPtr() || + (mk_axiom ? expr(false) : !val.boolNonptrNonpoison()) || + stored_bits == 0 || + val.byteNumber() != num_bytes || + val2.nonptrValue().lshr(leftover_bits) == 0; +} + void Memory::mkNonlocalValAxioms(bool skip_consts) { // Users may request the initial memory to be non-poisonous - if (((config::disable_poison_input && state->isSource()) || isAsmMode()) && + if ((config::disable_poison_input && state->isSource()) && (does_int_mem_access || does_ptr_mem_access)) { mkNonPoisonAxioms(false); } - if (!does_ptr_mem_access) + if (!does_ptr_mem_access && !(num_sub_byte_bits && isAsmMode())) return; expr offset @@ -1178,6 +1273,17 @@ void Memory::mkNonlocalValAxioms(bool skip_consts) { continue; Byte byte(*this, non_local_block_val[i].val.load(offset)); + + // in ASM mode, non-byte-aligned values are expected to be zero-extended + // per the ABI. + if (num_sub_byte_bits && isAsmMode()) { + state->addAxiom( + expr::mkForAll({ offset }, mkSubByteZExtStoreCond(byte, byte))); + } + + if (!does_ptr_mem_access) + continue; + Pointer loadedptr = byte.ptr(); expr bid = loadedptr.getShortBid(); @@ -1235,19 +1341,11 @@ Memory::Memory(State &state) : state(&state), escaped_local_blks(*this) { // initialize all local blocks as non-pointer, poison value // This is okay because loading a pointer as non-pointer is also poison. - // Unless we are in asm mode, in which memory is nondet data. if (numLocals() > 0) { - if (isAsmMode()) { - for (unsigned bid = 0, e = numLocals(); bid != e; ++bid) { - local_block_val.emplace_back(mk_block_val_array(bid)); - } - mkNonPoisonAxioms(true); - } else { - auto poison_array - = expr::mkConstArray(expr::mkUInt(0, Pointer::bitsShortOffset()), - Byte::mkPoisonByte(*this)()); - local_block_val.resize(numLocals(), {std::move(poison_array), DATA_NONE}); - } + auto poison_array + = expr::mkConstArray(expr::mkUInt(0, Pointer::bitsShortOffset()), + Byte::mkPoisonByte(*this)()); + local_block_val.resize(numLocals(), {std::move(poison_array), DATA_NONE}); // all local blocks are dead in the beginning local_block_liveness = expr::mkUInt(0, numLocals()); @@ -1275,10 +1373,8 @@ void Memory::mkAxioms(const Memory &tgt) const { auto skip_bid = [&](unsigned bid) { if (bid == 0 && has_null_block) return true; - if (is_globalvar(bid, true)) + if (is_globalvar(bid, true) || is_fncall_mem(bid)) return false; - if (is_fncall_mem(bid)) - return true; return bid >= tgt.next_nonlocal_bid; }; @@ -1286,8 +1382,8 @@ void Memory::mkAxioms(const Memory &tgt) const { expr align = expr::mkUInt(ilog2(heap_block_alignment), 6); if (null_is_dereferenceable && has_null_block) { - state->addAxiom(Pointer::mkNullPointer(*this).blockAlignment() == -1u); - state->addAxiom(Pointer::mkNullPointer(tgt).blockAlignment() == -1u); + state->addAxiom(Pointer::mkNullPointer(*this).blockAlignment()==UINT64_MAX); + state->addAxiom(Pointer::mkNullPointer(tgt).blockAlignment() == UINT64_MAX); } for (unsigned bid = has_null_block; bid < num_nonlocals; ++bid) { @@ -1589,7 +1685,7 @@ Memory::mkCallState(const string &fnname, bool nofree, auto blk_type = mk_block_val_array(1); expr only_write_inaccess = access.canOnlyWrite(MemoryAccess::Inaccessible); - // innaccessible memory block + // inaccessible memory block st.non_local_block_val.emplace_back(expr::mkFreshVar("blk_val", blk_type)); if (!only_write_inaccess.isTrue()) { @@ -1706,7 +1802,7 @@ void Memory::setState(const Memory::CallState &st, // TODO: havoc local blocks // for now, zero out if in non UB-exploitation mode to avoid false positives if (config::disallow_ub_exploitation) { - expr raw_byte = Byte(*this, {expr::mkUInt(0, bits_byte), true})(); + expr raw_byte = Byte(*this, {expr::mkUInt(0, bits_byte), true}, 0, true)(); expr array = expr::mkConstArray(expr::mkUInt(0, bits_for_offset), raw_byte); @@ -1927,7 +2023,7 @@ void Memory::store(const StateValue &v, const Type &type, unsigned offset0, assert(byteofs == getStoreByteSize(type)); } else { - vector bytes = valueToBytes(v, type, *this, state); + vector bytes = valueToBytes(v, type, *this, *state); assert(!v.isValid() || bytes.size() * bytesz == getStoreByteSize(type)); for (unsigned i = 0, e = bytes.size(); i < e; ++i) { @@ -2045,7 +2141,7 @@ void Memory::memset(const expr &p, const StateValue &val, const expr &bytesize, } assert(!val.isValid() || wval.bits() == bits_byte); - auto bytes = valueToBytes(wval, IntType("", bits_byte), *this, state); + auto bytes = valueToBytes(wval, IntType("", bits_byte), *this, *state); assert(bytes.size() == 1); expr raw_byte = std::move(bytes[0])(); @@ -2292,7 +2388,13 @@ expr Memory::blockValRefined(const Memory &other, unsigned bid, bool local, undef.insert(mem1.undef.begin(), mem1.undef.end()); - return val.refined(val2); + expr r = val.refined(val2); + + // The ABI requires that sub-byte stores zero extend the stored value + if (num_sub_byte_bits && other.isAsmMode()) + r &= mkSubByteZExtStoreCond(val, val2); + + return r; } expr Memory::blockRefined(const Pointer &src, const Pointer &tgt, unsigned bid, diff --git a/ir/memory.h b/ir/memory.h index 0d531cdb5..ce1a1f0c7 100644 --- a/ir/memory.h +++ b/ir/memory.h @@ -30,15 +30,17 @@ class State; // A data structure that represents a byte. // A byte is either a pointer byte or a non-pointer byte. // Pointer byte's representation: -// +-+-----------+-----------------------------+---------------+---------+ -// |1|non-poison?| Pointer (see class below) | byte offset | padding | -// | |(1 bit) | | (0 or 3 bits) | | -// +-+-----------+-----------------------------+---------------+---------+ +// +-+-------------+-----------+---------------+------------------------+ +// |1| non-poison? | Pointer | byte offset | padding | +// | | (1 bit) | | (0 or 3 bits) | | +// +-+-------------+-----------+---------------+------------------------+ // Non-pointer byte's representation: -// +-+--------------------+--------------------+-------------------------+ -// |0| non-poison bit(s) | data | padding | -// | | (bits_byte) | (bits_byte) | | -// +-+--------------------+--------------------+-------------------------+ +// +-+-------------------+-------------+-------------+--------+---------+ +// |0| non-poison bit(s) | data | stored bits | byte | padding | +// | | (bits_byte) | (bits_byte) | (sub-byte) | number | | +// +-+-------------------+-------------+-------------+--------+---------+ +// The last 2 fields are only present if the program contains sub-byte accesses + class Byte { const Memory &m; @@ -52,7 +54,8 @@ class Byte { // non_poison should be an one-bit vector or boolean. Byte(const Memory &m, const StateValue &ptr, unsigned i); - Byte(const Memory &m, const StateValue &v); + Byte(const Memory &m, const StateValue &v, unsigned bits_read, + unsigned byte_number); static Byte mkPoisonByte(const Memory &m); @@ -64,10 +67,16 @@ class Byte { smt::expr nonptrNonpoison() const; smt::expr boolNonptrNonpoison() const; smt::expr nonptrValue() const; + + smt::expr numStoredBits() const; + smt::expr byteNumber() const; + smt::expr isPoison() const; smt::expr nonPoison() const; smt::expr isZero() const; // zero or null + bool isAsmMode() const; + smt::expr castPtrToInt() const; smt::expr forceCastToInt() const; @@ -86,6 +95,7 @@ class Byte { static unsigned bitsByte(); friend std::ostream& operator<<(std::ostream &os, const Byte &byte); + friend class Memory; }; @@ -172,6 +182,7 @@ class Memory { smt::expr isBlockAlive(const smt::expr &bid, bool local) const; void mkNonPoisonAxioms(bool local); + smt::expr mkSubByteZExtStoreCond(const Byte &val, const Byte &val2) const; void mkNonlocalValAxioms(bool skip_consts); bool mayalias(bool local, unsigned bid, const smt::expr &offset, diff --git a/ir/pointer.cpp b/ir/pointer.cpp index 7099119dc..df5bf370e 100644 --- a/ir/pointer.cpp +++ b/ir/pointer.cpp @@ -416,12 +416,10 @@ static pair is_dereferenceable(Pointer &p, cond &= p.isBlockAlive(); - if (!ignore_accessability) { - if (iswrite) - cond &= p.isWritable() && !p.isNoWrite(); - else - cond &= !p.isNoRead(); - } + if (iswrite) + cond &= p.isWritable() && !p.isNoWrite(); + else if (!ignore_accessability) + cond &= !p.isNoRead(); // try some constant folding; these are implied by the conditions above if (bytes.ugt(p.blockSize()).isTrue() || @@ -601,6 +599,8 @@ expr Pointer::isWritable() const { if (has_null_block && null_is_dereferenceable) non_local |= bid == 0; + non_local &= expr::mkUF("#is_writable", {bid}, expr(false)); + // check for non-writable byval blocks (which are non-local) for (auto [byval, is_const] : m.byval_blks) { if (is_const) diff --git a/ir/state.cpp b/ir/state.cpp index 53dd0b7c2..07e0b0f80 100644 --- a/ir/state.cpp +++ b/ir/state.cpp @@ -1056,13 +1056,8 @@ State::addFnCall(const string &name, vector &&inputs, } } - expr uf = expr::mkUF("#access_" + name, { fn_ptr }, memaccess.val); - if (auto acc = std::move(decl_access)()) { - memaccess &= SMTMemoryAccess::mkIf(decl_access.domain(), *acc, - std::move(uf)); - } else { - memaccess &= std::move(uf); - } + memaccess &= *std::move(decl_access).mk(SMTMemoryAccess{ + expr::mkUF("#access_" + name, { fn_ptr }, memaccess.val)}); } if (!memaccess.canWrite(MemoryAccess::Args).isFalse() || @@ -1100,8 +1095,8 @@ State::addFnCall(const string &name, vector &&inputs, auto call_data_pair = calls_fn.try_emplace( { std::move(inputs), std::move(ptr_inputs), std::move(call_ranges), - mkIf_fold(memaccess.canReadSomething(), memory.dup(), - memory.dupNoRead()), + memaccess.canReadSomething().isFalse() + ? memory.dupNoRead() : memory.dup(), memaccess, noret, willret }); auto &I = call_data_pair.first; bool inserted = call_data_pair.second; diff --git a/ir/type.cpp b/ir/type.cpp index d2edb6d2e..9b3a896d5 100644 --- a/ir/type.cpp +++ b/ir/type.cpp @@ -204,6 +204,10 @@ expr Type::enforcePtrOrVectorType() const { [&](auto &ty) { return ty.enforcePtrType(); }); } +const IntType* Type::getAsIntType() const { + return nullptr; +} + const FloatType* Type::getAsFloatType() const { return nullptr; } @@ -321,6 +325,14 @@ void VoidType::print(ostream &os) const { } +unsigned IntType::maxSubBitAccess() const { + if (!defined) + return 63; + if (bitwidth % 8) + return bitwidth; + return 0; +} + unsigned IntType::bits() const { return bitwidth; } @@ -359,6 +371,10 @@ expr IntType::enforceIntType(unsigned bits) const { return bits ? sizeVar() == bits : true; } +const IntType* IntType::getAsIntType() const { + return this; +} + pair IntType::refines(State &src_s, State &tgt_s, const StateValue &src, const StateValue &tgt) const { @@ -1357,6 +1373,10 @@ expr SymbolicType::enforceVectorType( return v ? (isVector() && v->enforceVectorType(enforceElem)) : false; } +const IntType* SymbolicType::getAsIntType() const { + return &*i; +} + const FloatType* SymbolicType::getAsFloatType() const { return &*f; } diff --git a/ir/type.h b/ir/type.h index 72ec188d2..b2b02c9e7 100644 --- a/ir/type.h +++ b/ir/type.h @@ -20,6 +20,7 @@ namespace IR { class AggregateType; class FloatType; +class IntType; class StructType; class SymbolicType; class VectorType; @@ -88,6 +89,7 @@ class Type { smt::expr enforceFloatOrVectorType() const; smt::expr enforcePtrOrVectorType() const; + virtual const IntType* getAsIntType() const; virtual const FloatType* getAsFloatType() const; virtual const AggregateType* getAsAggregateType() const; virtual const StructType* getAsStructType() const; @@ -157,6 +159,7 @@ class IntType final : public Type { IntType(std::string &&name, unsigned bitwidth) : Type(std::move(name)), bitwidth(bitwidth), defined(true) {} + unsigned maxSubBitAccess() const; unsigned bits() const override; IR::StateValue getDummyValue(bool non_poison) const override; smt::expr getTypeConstraints() const override; @@ -165,6 +168,7 @@ class IntType final : public Type { void fixup(const smt::Model &m) override; bool isIntType() const override; smt::expr enforceIntType(unsigned bits = 0) const override; + const IntType* getAsIntType() const override; std::pair refines(State &src_s, State &tgt_s, const StateValue &src, const StateValue &tgt) const override; @@ -399,6 +403,7 @@ class SymbolicType final : public Type { smt::expr enforceFloatType() const override; smt::expr enforceVectorType( const std::function &enforceElem) const override; + const IntType* getAsIntType() const override; const FloatType* getAsFloatType() const override; const AggregateType* getAsAggregateType() const override; const StructType* getAsStructType() const override; diff --git a/llvm_util/llvm2alive.cpp b/llvm_util/llvm2alive.cpp index 8e0cc3803..71420694d 100644 --- a/llvm_util/llvm2alive.cpp +++ b/llvm_util/llvm2alive.cpp @@ -1439,6 +1439,10 @@ class llvm2alive_ : public llvm::InstVisitor> { llvmattr.getDereferenceableOrNullBytes()); break; + case llvm::Attribute::Writable: + attrs.set(ParamAttrs::Writable); + break; + case llvm::Attribute::Alignment: attrs.set(ParamAttrs::Align); attrs.align = max(attrs.align, llvmattr.getAlignment()->value()); @@ -1470,6 +1474,10 @@ class llvm2alive_ : public llvm::InstVisitor> { attrs.set(ParamAttrs::AllocAlign); break; + case llvm::Attribute::DeadOnUnwind: + attrs.set(ParamAttrs::DeadOnUnwind); + break; + default: // If it is call site, it should be added at approximation list if (!is_callsite) diff --git a/smt/expr.cpp b/smt/expr.cpp index 10ad21279..b14a2d931 100644 --- a/smt/expr.cpp +++ b/smt/expr.cpp @@ -2054,7 +2054,7 @@ expr expr::mkIf(const expr &cond, const expr &then, const expr &els) { if (cond.isFalse()) return els; - if (then.isTrue()) + if (then.isTrue() || cond.eq(then)) return cond || els; if (then.isFalse()) return !cond && els; diff --git a/smt/exprs.h b/smt/exprs.h index fc2fd7c57..5ac924d00 100644 --- a/smt/exprs.h +++ b/smt/exprs.h @@ -89,14 +89,6 @@ class DisjointExpr { } } - expr domain() const { - expr ret(false); - for (auto &p : vals) { - ret |= p.second; - } - return ret; - } - std::optional operator()() const& { std::optional ret; for (auto &[val, domain] : vals) { @@ -110,8 +102,9 @@ class DisjointExpr { return default_val; } - std::optional operator()() && { - std::optional ret; + // argument is the value used if the domain is false + // not the same as default, which is used only if no element is present + std::optional mk(std::optional ret) && { for (auto &[val0, domain] : vals) { auto &val = const_cast(val0); if (domain.isTrue()) @@ -125,6 +118,8 @@ class DisjointExpr { return std::move(default_val); } + std::optional operator()() && { return std::move(*this).mk({}); } + std::optional lookup(const expr &domain) const { for (auto &[v, d] : vals) { if (d.eq(domain)) diff --git a/tests/alive-tv/asm/delete-store.srctgt.ll b/tests/alive-tv/asm/delete-store.srctgt.ll new file mode 100644 index 000000000..b4318d730 --- /dev/null +++ b/tests/alive-tv/asm/delete-store.srctgt.ll @@ -0,0 +1,12 @@ +; TEST-ARGS: -tgt-is-asm +; SKIP-IDENTITY + +define void @src(ptr %0) { + %2 = load i1, ptr %0, align 1 + store i1 %2, ptr %0, align 1 + ret void +} + +define void @tgt(ptr %0) { + ret void +} diff --git a/tests/alive-tv/asm/sub-byte-load.srctgt.ll b/tests/alive-tv/asm/sub-byte-load.srctgt.ll new file mode 100644 index 000000000..6ab2e2ad5 --- /dev/null +++ b/tests/alive-tv/asm/sub-byte-load.srctgt.ll @@ -0,0 +1,12 @@ +; TEST-ARGS: -tgt-is-asm + +define i8 @src(ptr %p) { + %x = load i5, ptr %p, align 2 + %y = zext i5 %x to i8 + ret i8 %y +} + +define i8 @tgt(ptr %p) { + %x = load i8, ptr %p, align 2 + ret i8 %x +} diff --git a/tests/alive-tv/asm/sub-byte-load2.srctgt.ll b/tests/alive-tv/asm/sub-byte-load2.srctgt.ll new file mode 100644 index 000000000..1882afe50 --- /dev/null +++ b/tests/alive-tv/asm/sub-byte-load2.srctgt.ll @@ -0,0 +1,12 @@ +; TEST-ARGS: -tgt-is-asm + +define i8 @src(ptr %0) { + %2 = load i1, ptr %0, align 1 + %3 = load i8, ptr %0, align 1 + ret i8 %3 +} + +define i8 @tgt(ptr %0) { + %r = load i8, ptr %0, align 1 + ret i8 %r +} diff --git a/tests/alive-tv/asm/sub-byte-load3.srctgt.ll b/tests/alive-tv/asm/sub-byte-load3.srctgt.ll new file mode 100644 index 000000000..da5c9df3b --- /dev/null +++ b/tests/alive-tv/asm/sub-byte-load3.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -tgt-is-asm + +define i1 @src(ptr %0) { + %2 = load i9, ptr %0, align 4 + %3 = icmp slt i9 %2, 0 + ret i1 %3 +} + +define i1 @tgt(ptr %0) { + %p = getelementptr i8, ptr %0, i64 1 + %a3_1 = load i8, ptr %p, align 1 + %a6_23 = trunc i8 %a3_1 to i1 + ret i1 %a6_23 +} diff --git a/tests/alive-tv/asm/sub-byte-store.srctgt.ll b/tests/alive-tv/asm/sub-byte-store.srctgt.ll new file mode 100644 index 000000000..dffbb67fd --- /dev/null +++ b/tests/alive-tv/asm/sub-byte-store.srctgt.ll @@ -0,0 +1,12 @@ +; TEST-ARGS: -tgt-is-asm +; SKIP-IDENTITY + +define void @src(ptr %0) { + store i9 5, ptr %0 + ret void +} + +define void @tgt(ptr %0) { + store i16 5, ptr %0 + ret void +} diff --git a/tests/alive-tv/asm/sub-byte-store2.srctgt.ll b/tests/alive-tv/asm/sub-byte-store2.srctgt.ll new file mode 100644 index 000000000..8954bc7ad --- /dev/null +++ b/tests/alive-tv/asm/sub-byte-store2.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -tgt-is-asm +; SKIP-IDENTITY +; ERROR: Mismatch in memory + +define void @src(ptr %0) { + store i9 5, ptr %0 + ret void +} + +define void @tgt(ptr %0) { + store i16 1029, ptr %0 + ret void +} diff --git a/tests/alive-tv/attrs/dereferenceable-fail2.srctgt.ll b/tests/alive-tv/attrs/dereferenceable-fail2.srctgt.ll new file mode 100644 index 000000000..73b03b20e --- /dev/null +++ b/tests/alive-tv/attrs/dereferenceable-fail2.srctgt.ll @@ -0,0 +1,19 @@ +; ERROR: Source is more defined than target + +define void @src(ptr dereferenceable(1) %p) { + %v = load i8, ptr %p + %c = icmp ne i8 %v, 42 + br i1 %c, label %then, label %else + +then: + store i8 42, ptr %p + ret void + +else: + ret void +} + +define void @tgt(ptr dereferenceable(1) %p) { + store i8 42, ptr %p + ret void +} diff --git a/tests/alive-tv/attrs/indirect-call2.srctgt.ll b/tests/alive-tv/attrs/indirect-call2.srctgt.ll new file mode 100644 index 000000000..9138341fd --- /dev/null +++ b/tests/alive-tv/attrs/indirect-call2.srctgt.ll @@ -0,0 +1,22 @@ +declare i8 @fn() +@fptr = global ptr null, align 8 + +define i8 @src() { + %f = load ptr, ptr @fptr, align 8 + %c = call i8 %f() + ret i8 %c +} + +define i8 @tgt() { + %f = load ptr, ptr @fptr, align 8 + %cmp = icmp eq ptr %f, @fn + br i1 %cmp, label %fn, label %else + +fn: + %c = call i8 @fn() + ret i8 %c + +else: + %call = call i8 %f() + ret i8 %call +} diff --git a/tests/alive-tv/attrs/writable.srctgt.ll b/tests/alive-tv/attrs/writable.srctgt.ll new file mode 100644 index 000000000..997d2a34e --- /dev/null +++ b/tests/alive-tv/attrs/writable.srctgt.ll @@ -0,0 +1,17 @@ +define void @src(ptr writable dereferenceable(4) %p) { + %v = load i32, ptr %p + %c = icmp ne i32 %v, 42 + br i1 %c, label %then, label %else + +then: + store i32 42, ptr %p + ret void + +else: + ret void +} + +define void @tgt(ptr writable dereferenceable(4) %p) { + store i32 42, ptr %p + ret void +} diff --git a/tests/alive-tv/bugs/pr41949.srctgt.ll b/tests/alive-tv/bugs/pr41949.srctgt.ll index 3a9dac8d2..620eb0330 100644 --- a/tests/alive-tv/bugs/pr41949.srctgt.ll +++ b/tests/alive-tv/bugs/pr41949.srctgt.ll @@ -3,19 +3,15 @@ target datalayout = "E" define i32 @src(ptr %p) { %u = alloca i32 - %a = bitcast ptr %u to ptr - %b = bitcast ptr %u to ptr - store i32 -1, ptr %a - store i12 20, ptr %b + store i32 -1, ptr %u + store i12 20, ptr %u %v = load i32, ptr %u ret i32 %v } define i32 @tgt(ptr %p) { %u = alloca i32 - %a = bitcast ptr %u to ptr - store i32 22020095, ptr %a + store i32 22020095, ptr %u %v = load i32, ptr %u ret i32 %v -} -; ERROR: Value mismatch +} diff --git a/tests/alive-tv/calls/indirect-call-ptrarg.srctgt.ll b/tests/alive-tv/calls/indirect-call-ptrarg.srctgt.ll new file mode 100644 index 000000000..646e710ee --- /dev/null +++ b/tests/alive-tv/calls/indirect-call-ptrarg.srctgt.ll @@ -0,0 +1,13 @@ +define i32 @src(ptr %f, ptr %arg) { + call void @fn() + call void %f(ptr %arg) + ret i32 0 +} + +define i32 @tgt(ptr %f, ptr %arg) { + call void @fn() + call void %f(ptr %arg) + ret i32 0 +} + +declare void @fn() diff --git a/tests/alive-tv/memory/padding-nonbyte-fail.srctgt.ll b/tests/alive-tv/memory/padding-nonbyte-fail.srctgt.ll index 20d630af3..1d29c6321 100644 --- a/tests/alive-tv/memory/padding-nonbyte-fail.srctgt.ll +++ b/tests/alive-tv/memory/padding-nonbyte-fail.srctgt.ll @@ -1,5 +1,3 @@ -; ERROR: Value mismatch - define i17 @src(ptr %p) { store <2 x i17> , ptr %p %load = load i17, ptr %p diff --git a/tools/alive-exec.cpp b/tools/alive-exec.cpp index 8a3a0d3ca..ffefb3d48 100644 --- a/tools/alive-exec.cpp +++ b/tools/alive-exec.cpp @@ -16,7 +16,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" -#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Signals.h" #include "llvm/TargetParser/Triple.h" @@ -193,9 +193,8 @@ unique_ptr cache; int main(int argc, char **argv) { llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); - llvm::PrettyStackTraceProgram X(argc, argv); + llvm::InitLLVM X(argc, argv); llvm::EnableDebugBuffering = true; - llvm::llvm_shutdown_obj llvm_shutdown; // Call llvm_shutdown() on exit. llvm::LLVMContext Context; std::string Usage = diff --git a/tools/alive-tv.cpp b/tools/alive-tv.cpp index d97d673c8..23e0e6cb4 100644 --- a/tools/alive-tv.cpp +++ b/tools/alive-tv.cpp @@ -19,7 +19,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Passes/PassBuilder.h" -#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Signals.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -74,9 +74,8 @@ unique_ptr cache; int main(int argc, char **argv) { llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); - llvm::PrettyStackTraceProgram X(argc, argv); + llvm::InitLLVM X(argc, argv); llvm::EnableDebugBuffering = true; - llvm::llvm_shutdown_obj llvm_shutdown; // Call llvm_shutdown() on exit. llvm::LLVMContext Context; unsigned M1_anon_count = 0; diff --git a/tools/quick-fuzz.cpp b/tools/quick-fuzz.cpp index 4d5183a50..34e5ba8ea 100644 --- a/tools/quick-fuzz.cpp +++ b/tools/quick-fuzz.cpp @@ -21,7 +21,7 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/InitializePasses.h" #include "llvm/Passes/PassBuilder.h" -#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/TargetParser/Triple.h" @@ -881,9 +881,8 @@ void BBFuzzer::go() { int main(int argc, char **argv) { sys::PrintStackTraceOnErrorSignal(argv[0]); - PrettyStackTraceProgram X(argc, argv); + llvm::InitLLVM X(argc, argv); EnableDebugBuffering = true; - llvm_shutdown_obj llvm_shutdown; // Call llvm_shutdown() on exit. LLVMContext Context; string Usage = diff --git a/tools/transform.cpp b/tools/transform.cpp index 9999a0514..aaa1bb9e1 100644 --- a/tools/transform.cpp +++ b/tools/transform.cpp @@ -979,6 +979,7 @@ static void calculateAndInitConstants(Transform &t) { bool does_any_byte_access = false; has_indirect_fncalls = false; has_ptr_arg = false; + num_sub_byte_bits = 0; set inaccessiblememonly_fns; num_inaccessiblememonly_fns = 0; @@ -1087,6 +1088,8 @@ static void calculateAndInitConstants(Transform &t) { does_mem_access |= info.doesMemAccess(); observes_addresses |= info.observesAddresses; min_access_size = gcd(min_access_size, info.byteSize); + num_sub_byte_bits = max(num_sub_byte_bits, + (unsigned)bit_width(info.subByteAccess)); if (info.doesMemAccess() && !info.hasIntByteAccess && !info.doesPtrLoad && !info.doesPtrStore) does_any_byte_access = true; @@ -1266,6 +1269,7 @@ static void calculateAndInitConstants(Transform &t) { << "\ndoes_mem_access: " << does_mem_access << "\ndoes_ptr_mem_access: " << does_ptr_mem_access << "\ndoes_int_mem_access: " << does_int_mem_access + << "\nnum_sub_byte_bits: " << num_sub_byte_bits << "\nhas_ptr_arg: " << has_ptr_arg << '\n'; }