diff --git a/BugList.md b/BugList.md index 928e221c2..459dd9183 100644 --- a/BugList.md +++ b/BugList.md @@ -100,6 +100,7 @@ Please contact us or submit a PR if something is missing or inaccurate. 93. InstCombine: align attribute doesn't dereferenceability (https://llvm.org/PR90446) 94. Reassociate: invalid propagation of overflow attributes at low bit-width (https://llvm.org/PR91417) 95. InstCombine: removes a select, making the code more poisonous (https://llvm.org/PR91691) +96. DSE removes store before free() incorrectly (https://llvm.org/PR97956) ### Bugs found in Z3 1. Incorrect bitblast for fprem (https://github.com/Z3Prover/z3/issues/2369) diff --git a/ir/attrs.cpp b/ir/attrs.cpp index 310d042a9..31b7b539a 100644 --- a/ir/attrs.cpp +++ b/ir/attrs.cpp @@ -7,10 +7,12 @@ #include "ir/state.h" #include "ir/state_value.h" #include "ir/type.h" +#include "util/compiler.h" #include using namespace std; using namespace smt; +using namespace util; namespace IR { ostream& operator<<(ostream &os, const ParamAttrs &attr) { @@ -339,6 +341,13 @@ uint64_t ParamAttrs::getDerefBytes() const { return bytes; } +uint64_t ParamAttrs::maxAccessSize() const { + uint64_t bytes = getDerefBytes(); + if (has(ParamAttrs::DereferenceableOrNull)) + bytes = max(bytes, derefOrNullBytes); + return round_up(bytes, align); +} + void ParamAttrs::merge(const ParamAttrs &other) { bits |= other.bits; derefBytes = max(derefBytes, other.derefBytes); @@ -351,7 +360,7 @@ static expr encodePtrAttrs(State &s, const expr &ptrvalue, uint64_t derefBytes, uint64_t derefOrNullBytes, uint64_t align, bool nonnull, bool nocapture, bool writable, const expr &allocsize, - Value *allocalign) { + Value *allocalign, bool isdecl) { auto &m = s.getMemory(); Pointer p(m, ptrvalue); expr non_poison(true); @@ -364,19 +373,20 @@ encodePtrAttrs(State &s, const expr &ptrvalue, uint64_t derefBytes, if (derefBytes || derefOrNullBytes || allocsize.isValid()) { // dereferenceable, byval (ParamAttrs), dereferenceable_or_null if (derefBytes) - s.addUB(merge(Pointer(m, ptrvalue) - .isDereferenceable(derefBytes, align, writable, true))); + s.addUB(merge(p.isDereferenceable(derefBytes, align, writable, true))); if (derefOrNullBytes) s.addUB(p.isNull() || - merge(Pointer(m, ptrvalue) - .isDereferenceable(derefOrNullBytes, align, writable, - true))); + merge(p.isDereferenceable(derefOrNullBytes, align, writable, + true))); if (allocsize.isValid()) s.addUB(p.isNull() || - merge(Pointer(m, ptrvalue) - .isDereferenceable(allocsize, align, writable, true))); - } else if (align != 1) - non_poison &= Pointer(m, ptrvalue).isAligned(align); + merge(p.isDereferenceable(allocsize, align, writable, true))); + } else if (align != 1) { + non_poison &= p.isAligned(align); + if (isdecl) + s.addUB(merge(p.isDereferenceable(1, 1, false, true)) + .implies(merge(p.isDereferenceable(1, align, false, true)))); + } if (allocalign) { Pointer p(m, ptrvalue); @@ -392,7 +402,8 @@ encodePtrAttrs(State &s, const expr &ptrvalue, uint64_t derefBytes, return non_poison; } -StateValue ParamAttrs::encode(State &s, StateValue &&val, const Type &ty) const{ +StateValue ParamAttrs::encode(State &s, StateValue &&val, const Type &ty, + bool isdecl) const{ if (has(NoFPClass)) { assert(ty.isFloatType()); val.non_poison &= !isfpclass(val.value, ty, nofpclass); @@ -401,7 +412,8 @@ StateValue ParamAttrs::encode(State &s, StateValue &&val, const Type &ty) const{ if (ty.isPtrType()) val.non_poison &= encodePtrAttrs(s, val.value, getDerefBytes(), derefOrNullBytes, align, - has(NonNull), has(NoCapture), has(Writable), {}, nullptr); + has(NonNull), has(NoCapture), has(Writable), {}, nullptr, + isdecl); if (poisonImpliesUB()) { s.addUB(std::move(val.non_poison)); @@ -504,7 +516,7 @@ StateValue FnAttrs::encode(State &s, StateValue &&val, const Type &ty, if (ty.isPtrType()) val.non_poison &= encodePtrAttrs(s, val.value, derefBytes, derefOrNullBytes, align, - has(NonNull), false, false, allocsize, allocalign); + has(NonNull), false, false, allocsize, allocalign, false); if (poisonImpliesUB()) { s.addUB(std::move(val.non_poison)); @@ -516,6 +528,9 @@ StateValue FnAttrs::encode(State &s, StateValue &&val, const Type &ty, expr isfpclass(const expr &v, const Type &ty, uint16_t mask) { + if (mask == 1023) + return true; + auto *fpty = ty.getAsFloatType(); auto a = fpty->getFloat(v); OrExpr result; @@ -523,22 +538,29 @@ expr isfpclass(const expr &v, const Type &ty, uint16_t mask) { result.add(fpty->isNaN(v, true)); if (mask & (1 << 1)) result.add(fpty->isNaN(v, false)); - if (mask & (1 << 2)) - result.add(a.isFPNegative() && a.isInf()); - if (mask & (1 << 3)) - result.add(a.isFPNegative() && a.isFPNormal()); - if (mask & (1 << 4)) - result.add(a.isFPNegative() && a.isFPSubNormal()); - if (mask & (1 << 5)) - result.add(a.isFPNegZero()); - if (mask & (1 << 6)) - result.add(a.isFPZero() && !a.isFPNegative()); - if (mask & (1 << 7)) - result.add(!a.isFPNegative() && a.isFPSubNormal()); - if (mask & (1 << 8)) - result.add(!a.isFPNegative() && a.isFPNormal()); - if (mask & (1 << 9)) - result.add(!a.isFPNegative() && a.isInf()); + + auto check = [&](unsigned idx_neg, unsigned idx_pos, auto test) { + unsigned mask_neg = 1u << idx_neg; + unsigned mask_pos = 1u << idx_pos; + unsigned mask_both = mask_neg | mask_pos; + + if ((mask & mask_both) == mask_both) { + result.add(test()); + } else if (mask & mask_neg) { + result.add(a.isFPNegative() && test()); + } else if (mask & mask_pos) { + result.add(!a.isFPNegative() && test()); + } + }; +#define CHECK(neg, pos, fn) check(neg, pos, [&a]() { return a.fn(); }) + + CHECK(5, 6, isFPZero); + CHECK(4, 7, isFPSubNormal); + CHECK(3, 8, isFPNormal); + CHECK(2, 9, isInf); + +#undef CHECK + return std::move(result)(); } diff --git a/ir/attrs.h b/ir/attrs.h index 8e9cfacfe..9512ead05 100644 --- a/ir/attrs.h +++ b/ir/attrs.h @@ -88,13 +88,15 @@ class ParamAttrs final { bool poisonImpliesUB() const; uint64_t getDerefBytes() const; + uint64_t maxAccessSize() const; void merge(const ParamAttrs &other); friend std::ostream& operator<<(std::ostream &os, const ParamAttrs &attr); // Encodes the semantics of attributes using UB and poison. - StateValue encode(State &s, StateValue &&val, const Type &ty) const; + StateValue encode(State &s, StateValue &&val, const Type &ty, + bool isdecl = false) const; }; struct FPDenormalAttrs { diff --git a/ir/function.cpp b/ir/function.cpp index bb974a75d..d8a8408d2 100644 --- a/ir/function.cpp +++ b/ir/function.cpp @@ -142,6 +142,7 @@ unsigned Function::FnDecl::hash() const { hash_ty(*ty); } hash_ty(*output); + hash.add(is_varargs * 32); return hash(); } @@ -849,6 +850,9 @@ void Function::print(ostream &os, bool print_header) const { os << input.second << *input.first; first = false; } + if (decl.is_varargs) { + os << (first ? "..." : ", ..."); + } os << ')' << decl.attrs << '\n'; } os << '\n'; diff --git a/ir/function.h b/ir/function.h index bbf3471b7..42a94dbc7 100644 --- a/ir/function.h +++ b/ir/function.h @@ -96,6 +96,7 @@ class Function final { struct FnDecl { std::string name; std::vector> inputs; + bool is_varargs = false; Type *output; FnAttrs attrs; unsigned hash() const; diff --git a/ir/instr.cpp b/ir/instr.cpp index 016e4115a..6fff2ff54 100644 --- a/ir/instr.cpp +++ b/ir/instr.cpp @@ -2141,9 +2141,9 @@ unique_ptr InsertValue::dup(Function &f, const string &suffix) const { DEFINE_AS_RETZERO(FnCall, getMaxGEPOffset); FnCall::FnCall(Type &type, string &&name, string &&fnName, FnAttrs &&attrs, - Value *fnptr) + Value *fnptr, unsigned var_arg_idx) : MemInstr(type, std::move(name)), fnName(std::move(fnName)), fnptr(fnptr), - attrs(std::move(attrs)) { + attrs(std::move(attrs)), var_arg_idx(var_arg_idx) { if (config::disallow_ub_exploitation) this->attrs.set(FnAttrs::NoUndef); assert(!fnptr || this->fnName.empty()); @@ -2287,7 +2287,11 @@ void FnCall::print(ostream &os) const { << (fnptr ? fnptr->getName() : fnName) << '('; bool first = true; + unsigned idx = 0; for (auto &[arg, attrs] : args) { + if (idx++ == var_arg_idx) + os << "..."; + if (!first) os << ", "; @@ -2411,23 +2415,30 @@ StateValue FnCall::toSMT(State &s) const { auto ptr = fnptr; // This is a direct call, but check if there are indirect calls elsewhere // to this function. If so, call it indirectly to match the other calls. - if (!ptr) - ptr = s.getFn().getGlobalVar(string_view(fnName).substr(1)); + if (!ptr && has_indirect_fncalls) + ptr = s.getFn().getGlobalVar(fnName); ostringstream fnName_mangled; if (ptr) { fnName_mangled << "#indirect_call"; Pointer p(s.getMemory(), s.getAndAddPoisonUB(*ptr, true).value); - inputs.emplace_back(p.reprWithoutAttrs(), true); + auto bid = p.getShortBid(); + inputs.emplace_back(expr(bid), true); s.addUB(p.isDereferenceable(1, 1, false)); Function::FnDecl decl; decl.output = &getType(); + unsigned idx = 0; for (auto &[arg, params] : args) { + if (idx++ == var_arg_idx) + break; decl.inputs.emplace_back(&arg->getType(), params); } - s.addUB(expr::mkUF("#fndeclty", { inputs[0].value }, expr::mkUInt(0, 32)) == + decl.is_varargs = var_arg_idx != -1u; + s.addUB(!p.isLocal()); + s.addUB(p.getOffset() == 0); + s.addUB(expr::mkUF("#fndeclty", { std::move(bid) }, expr::mkUInt(0, 32)) == (indirect_hash = decl.hash())); } else { fnName_mangled << fnName; @@ -2568,7 +2579,7 @@ expr FnCall::getTypeConstraints(const Function &f) const { unique_ptr FnCall::dup(Function &f, const string &suffix) const { auto r = make_unique(getType(), getName() + suffix, string(fnName), - FnAttrs(attrs), fnptr); + FnAttrs(attrs), fnptr, var_arg_idx); r->args = args; r->approx = approx; return r; @@ -3255,6 +3266,17 @@ bool Assume::propagatesPoison() const { } bool Assume::hasSideEffects() const { + switch (kind) { + // assume(true) is NOP + case AndNonPoison: + case WellDefined: + if (auto *c = dynamic_cast(args[0])) + if (auto n = c->getInt()) + return *n != 1; + break; + default: + break; + } return true; } @@ -3699,7 +3721,7 @@ uint64_t GEP::getMaxGEPOffset() const { return UINT64_MAX; if (auto n = getInt(*v)) { - off = add_saturate(off, abs((int64_t)mul * *n)); + off = add_saturate(off, (int64_t)mul * *n); continue; } @@ -3811,7 +3833,7 @@ StateValue GEP::toSMT(State &s) const { // try to simplify the pointer if (all_zeros.isFalse()) - ptr.inbounds(true); + ptr.inbounds(); } return { std::move(ptr).release(), non_poison() }; diff --git a/ir/instr.h b/ir/instr.h index 57fd75e50..ac04a7f08 100644 --- a/ir/instr.h +++ b/ir/instr.h @@ -1053,15 +1053,18 @@ class FnCall : public MemInstr { Value *fnptr; std::vector> args; FnAttrs attrs; + unsigned var_arg_idx; bool approx = false; Value* getAlignArg() const; public: FnCall(Type &type, std::string &&name, std::string &&fnName, - FnAttrs &&attrs = FnAttrs::None, Value *fnptr = nullptr); + FnAttrs &&attrs = FnAttrs::None, Value *fnptr = nullptr, + unsigned var_arg_idx = -1u); void addArg(Value &arg, ParamAttrs &&attrs); const auto& getFnName() const { return fnName; } + Value* getFnPtr() const { return fnptr; } const auto& getArgs() const { return args; } const auto& getAttributes() const { return attrs; } bool hasAttribute(const FnAttrs::Attribute &i) const { return attrs.has(i); } diff --git a/ir/memory.cpp b/ir/memory.cpp index 4502f7b5c..f0908e087 100644 --- a/ir/memory.cpp +++ b/ir/memory.cpp @@ -103,6 +103,24 @@ static bool always_nowrite(unsigned bid, bool src_only = false, return always_noread(bid, is_fncall_accessible) || is_constglb(bid, src_only); } +static expr mk_block_if(const expr &cond, expr then, expr els) { + if (cond.isTrue()) + return then; + if (cond.isFalse()) + return els; + + bool is_bv1 = then.isBV(); + bool is_bv2 = els.isBV(); + if (is_bv1 != is_bv2) { + expr offset = expr::mkUInt(0, Pointer::bitsShortOffset()); + if (is_bv1) + then = expr::mkConstArray(offset, then); + else + els = expr::mkConstArray(offset, els); + } + return expr::mkIf(cond, then, els); +} + static unsigned next_local_bid; static unsigned next_const_bid; @@ -889,27 +907,23 @@ bool Memory::mayalias(bool local, unsigned bid0, const expr &offset0, if ( write && always_nowrite(bid0)) return false; } - if (offset0.isNegative().isTrue()) + if (!isAsmMode() && offset0.isNegative().isTrue()) return false; assert(!isUndef(offset0)); expr bid = expr::mkUInt(bid0, Pointer::bitsShortBid()); - if (auto algn = (local ? local_blk_align : non_local_blk_align).lookup(bid)) { - int64_t offset = 0; - uint64_t blk_align; - if (algn->isUInt(blk_align) && - align > (1ull << blk_align) && - (!observesAddresses() || offset0.isInt(offset))) - return false; - } if (auto sz = (local ? local_blk_size : non_local_blk_size).lookup(bid)) { expr offset = offset0.sextOrTrunc(bits_size_t); - if (offset.uge(*sz).isTrue() || - sz->ult(bytes).isTrue() || - (*sz - offset).ult(bytes).isTrue()) + if (sz->ult(bytes).isTrue()) return false; + + if (!isAsmMode()) { + if (offset.uge(*sz).isTrue() || + (*sz - offset).ult(bytes).isTrue()) + return false; + } } else if (local) // allocated in another branch return false; @@ -990,13 +1004,15 @@ Memory::AliasSet Memory::computeAliasing(const Pointer &ptr, unsigned bytes, void Memory::access(const Pointer &ptr, unsigned bytes, uint64_t align, bool write, const - function &fn) { + function &fn) { auto aliasing = computeAliasing(ptr, bytes, align, write); unsigned has_local = aliasing.numMayAlias(true); unsigned has_nonlocal = aliasing.numMayAlias(false); bool has_both = has_local && has_nonlocal; bool is_singleton = has_local + has_nonlocal == 1; + expr addr = ptr.getAddress(); + expr offset = ptr.getOffset(); expr is_local = ptr.isLocal(); expr bid = has_both ? ptr.getBid() : ptr.getShortBid(); expr one = expr::mkUInt(1, 1); @@ -1005,29 +1021,68 @@ void Memory::access(const Pointer &ptr, unsigned bytes, uint64_t align, auto sz_nonlocal = aliasing.size(false); for (unsigned i = 0; i < sz_local; ++i) { - if (aliasing.mayAlias(true, i)) { + if (!aliasing.mayAlias(true, i)) + continue; + + Pointer this_ptr(*this, i, true); + expr cond_eq; + + if (isAsmMode() && !ptr.isInbounds(true).isTrue()) { + // in asm mode, all pointers have full provenance + cond_eq = ptr.isInboundsOf(this_ptr, bytes); + this_ptr += addr - this_ptr.getAddress(); + } else { auto n = expr::mkUInt(i, Pointer::bitsShortBid()); - fn(local_block_val[i], i, true, - is_singleton ? true - : (has_local == 1 - ? is_local - : bid == (has_both ? one.concat(n) : n))); + cond_eq + = has_local == 1 ? is_local : (bid == (has_both ? one.concat(n) : n)); + this_ptr += offset; } + + fn(local_block_val[i], ptr, is_singleton ? expr(true) : std::move(cond_eq)); } for (unsigned i = 0; i < sz_nonlocal; ++i) { - if (aliasing.mayAlias(false, i)) { - // A nonlocal block for encoding fn calls' side effects cannot be - // accessed. - // If aliasing info says it can, either imprecise analysis or incorrect - // block id encoding is happening. - assert(!is_fncall_mem(i)); - fn(non_local_block_val[i], i, false, - is_singleton ? true : (has_nonlocal == 1 ? !is_local : bid == i)); + if (!aliasing.mayAlias(false, i)) + continue; + + // A nonlocal block for encoding fn calls' side effects cannot be + // accessed. + // If aliasing info says it can, either imprecise analysis or incorrect + // block id encoding is happening. + assert(!is_fncall_mem(i)); + Pointer this_ptr(*this, i, false); + expr cond_eq; + + if (isAsmMode() && !ptr.isInbounds(true).isTrue()) { + // in asm mode, all pointers have full provenance + cond_eq = ptr.isInboundsOf(this_ptr, bytes); + this_ptr += addr - this_ptr.getAddress(); + } else { + cond_eq = has_nonlocal == 1 ? !is_local : bid == i; + this_ptr += offset; } + + fn(non_local_block_val[i], this_ptr, + is_singleton ? expr(true) : std::move(cond_eq)); } } +static expr raw_load(const expr &block, const expr &offset, + uint64_t max_idx = UINT64_MAX) { + return block.isBV() ? block : block.load(offset, max_idx); +} + +static expr mk_store(expr mem, const expr &offset, const expr &val) { + if (mem.isBV()) + mem = expr::mkConstArray(offset, mem); + return mem.store(offset, val); +} + +Byte Memory::raw_load(bool local, unsigned bid, const expr &offset) const { + auto &block = (local ? local_block_val : non_local_block_val)[bid].val; + return {*this, ::raw_load(block, offset) }; +} + vector Memory::load(const Pointer &ptr, unsigned bytes, set &undef, uint64_t align, bool left2right, DataType type) { if (bytes == 0) @@ -1039,9 +1094,7 @@ vector Memory::load(const Pointer &ptr, unsigned bytes, set &undef, expr poison = Byte::mkPoisonByte(*this)(); loaded.resize(loaded_bytes, poison); - expr offset = ptr.getShortOffset(); - - auto fn = [&](MemBlock &blk, unsigned bid, bool local, expr &&cond) { + auto fn = [&](MemBlock &blk, const Pointer &ptr, expr &&cond) { bool is_poison = (type & blk.type) == DATA_NONE; if (is_poison) { for (unsigned i = 0; i < loaded_bytes; ++i) { @@ -1049,15 +1102,16 @@ vector Memory::load(const Pointer &ptr, unsigned bytes, set &undef, } } else { uint64_t blk_size = UINT64_MAX; - Pointer(*this, bid, local).blockSize().isUInt(blk_size); - expr blk_offset = blk_size == bytes ? expr::mkUInt(0, offset) : offset; + bool single_load = ptr.blockSize().isUInt(blk_size) && blk_size == bytes; + auto offset = ptr.getShortOffset(); + expr blk_offset = single_load ? expr::mkUInt(0, offset) : offset; for (unsigned i = 0; i < loaded_bytes; ++i) { unsigned idx = left2right ? i : (loaded_bytes - i - 1); assert(idx < blk_size); uint64_t max_idx = blk_size - bytes + idx; expr off = blk_offset + expr::mkUInt(idx, offset); - loaded[i].add(blk.val.load(off, max_idx), cond); + loaded[i].add(::raw_load(blk.val, off, max_idx), cond); } undef.insert(blk.undef.begin(), blk.undef.end()); } @@ -1109,19 +1163,15 @@ void Memory::store(const Pointer &ptr, } unsigned bytes = data.size() * (bits_byte/8); - expr offset = ptr.getShortOffset(); - unsigned off_bits = Pointer::bitsShortOffset(); auto stored_ty = data_type(data, false); auto stored_ty_full = data_type(data, true); - auto fn = [&](MemBlock &blk, unsigned bid, bool local, expr &&cond) { + auto fn = [&](MemBlock &blk, const Pointer &ptr, expr &&cond) { auto mem = blk.val; uint64_t blk_size; - bool full_write - = Pointer(*this, bid, local).blockSize().isUInt(blk_size) && - blk_size == bytes; + bool full_write = ptr.blockSize().isUInt(blk_size) && blk_size == bytes; // optimization: if fully rewriting the block, don't bother with the old // contents. Pick a value as the default one. @@ -1129,8 +1179,13 @@ void Memory::store(const Pointer &ptr, // init because the size is rounded up to the alignment. // The remaining bytes are poison. if (full_write || state->isInitializationPhase()) { - mem = expr::mkConstArray(offset, - full_write ? data[0].second : Byte::mkPoisonByte(*this)()); + // optimization: if the access size == byte size, then we don't store + // data as arrays, but simply as a BV + if (data.size() == 1) { + mem = data[0].second; + } else { + mem = full_write ? data[0].second : Byte::mkPoisonByte(*this)(); + } if (cond.isTrue()) { blk.undef.clear(); blk.type = stored_ty_full; @@ -1141,14 +1196,16 @@ void Memory::store(const Pointer &ptr, blk.type |= stored_ty; } + expr offset = ptr.getShortOffset(); + for (auto &[idx, val] : data) { if (full_write && val.eq(data[0].second)) continue; expr off - = offset + expr::mkUInt(idx >> Pointer::zeroBitsShortOffset(), off_bits); - mem = mem.store(off, val); + = offset + expr::mkUInt(idx >> Pointer::zeroBitsShortOffset(), offset); + mem = mk_store(std::move(mem), off, val); } - blk.val = expr::mkIf(cond, mem, blk.val); + blk.val = mk_block_if(cond, std::move(mem), std::move(blk.val)); blk.undef.insert(undef.begin(), undef.end()); }; @@ -1160,9 +1217,6 @@ void Memory::storeLambda(const Pointer &ptr, const expr &offset, const vector> &data, const set &undef, uint64_t align) { assert(!state->isInitializationPhase()); - // offset in [ptr, ptr+sz) - auto offset_cond = offset.uge(ptr.getShortOffset()) && - offset.ult((ptr + bytes).getShortOffset()); bool val_no_offset = data.size() == 1 && !data[0].second.vars().count(offset); auto stored_ty = data_type(data, false); @@ -1173,21 +1227,32 @@ void Memory::storeLambda(const Pointer &ptr, const expr &offset, val = expr::mkIf(offset.urem(mod) == I->first, I->second, val); } - auto fn = [&](MemBlock &blk, unsigned bid, bool local, expr &&cond) { + auto fn = [&](MemBlock &blk, const Pointer &ptr, expr &&cond) { + auto orig_val = ::raw_load(blk.val, offset); + // optimization: full rewrite - if (bytes.eq(Pointer(*this, bid, local).blockSize())) { - blk.val = val_no_offset && data.size() == 1 - ? expr::mkIf(cond, expr::mkConstArray(offset, val), blk.val) - : expr::mkLambda(offset, expr::mkIf(cond, val, blk.val.load(offset))); + if (bytes.eq(ptr.blockSize())) { + blk.val = val_no_offset + ? mk_block_if(cond, val, std::move(blk.val)) + : expr::mkLambda(offset, mk_block_if(cond, val, std::move(orig_val))); if (cond.isTrue()) { blk.undef.clear(); blk.type = stored_ty; } } else { - blk.val = expr::mkLambda(offset, expr::mkIf(cond && offset_cond, val, - blk.val.load(offset))); + // offset in [ptr, ptr+sz) + auto offset_cond = offset.uge(ptr.getShortOffset()) && + offset.ult((ptr + bytes).getShortOffset()); + + blk.val + = expr::mkLambda(offset, + mk_block_if(cond && offset_cond, val, + std::move(orig_val))); } + // const array -> BV + blk.val.isConstArray(blk.val); + blk.type |= stored_ty; blk.undef.insert(undef.begin(), undef.end()); }; @@ -1202,11 +1267,13 @@ static bool memory_unused() { return num_locals_src == 0 && num_locals_tgt == 0 && num_nonlocals == 0; } -static expr mk_block_val_array(unsigned bid) { +expr Memory::mk_block_val_array(unsigned bid) const { auto str = "init_mem_" + to_string(bid); - return expr::mkArray(str.c_str(), - expr::mkUInt(0, Pointer::bitsShortOffset()), - expr::mkUInt(0, Byte::bitsByte())); + return Pointer(*this, bid, false).isBlkSingleByte() + ? expr::mkVar(str.c_str(), Byte::bitsByte()) + : expr::mkArray(str.c_str(), + expr::mkUInt(0, Pointer::bitsShortOffset()), + expr::mkUInt(0, Byte::bitsByte())); } static expr mk_liveness_array() { @@ -1218,15 +1285,17 @@ static expr mk_liveness_array() { return expr::mkInt(-1, num_nonlocals); } -void Memory::mkNonPoisonAxioms(bool local) { +void Memory::mkNonPoisonAxioms(bool local) const { expr offset = expr::mkFreshVar("#off", expr::mkUInt(0, Pointer::bitsShortOffset())); + unsigned bid = 0; for (auto &block : local ? local_block_val : non_local_block_val) { if (isInitialMemBlock(block.val, config::disallow_ub_exploitation)) state->addAxiom( expr::mkForAll({ offset }, - !Byte(*this, block.val.load(offset)).isPoison())); + !raw_load(local, bid, offset).isPoison())); + ++bid; } } @@ -1255,7 +1324,7 @@ expr Memory::mkSubByteZExtStoreCond(const Byte &val, const Byte &val2) const { val2.nonptrValue().lshr(leftover_bits) == 0; } -void Memory::mkNonlocalValAxioms(bool skip_consts) { +void Memory::mkNonlocalValAxioms(bool skip_consts) const { // Users may request the initial memory to be non-poisonous if ((config::disable_poison_input && state->isSource()) && (does_int_mem_access || does_ptr_mem_access)) { @@ -1272,7 +1341,7 @@ void Memory::mkNonlocalValAxioms(bool skip_consts) { if (always_noread(i, true)) continue; - Byte byte(*this, non_local_block_val[i].val.load(offset)); + Byte byte = raw_load(false, i, offset); // in ASM mode, non-byte-aligned values are expected to be zero-extended // per the ABI. @@ -1335,10 +1404,6 @@ Memory::Memory(State &state) : state(&state), escaped_local_blks(*this) { non_local_block_liveness = mk_liveness_array(); - // Non-local blocks cannot initially contain pointers to local blocks - // and no-capture pointers. - mkNonlocalValAxioms(false); - // initialize all local blocks as non-pointer, poison value // This is okay because loading a pointer as non-pointer is also poison. if (numLocals() > 0) { @@ -1370,6 +1435,18 @@ void Memory::mkAxioms(const Memory &tgt) const { if (memory_unused()) return; + // Non-local blocks cannot initially contain pointers to local blocks + // and no-capture pointers. + for (auto m_ptr : { this, &tgt }) { + auto &m = const_cast(*m_ptr); + auto old_vals = m.non_local_block_val; + for (unsigned i = 0, e = old_vals.size(); i != e; ++i) { + m.non_local_block_val.at(i).val = m.mk_block_val_array(i); + } + m.mkNonlocalValAxioms(false); + m.non_local_block_val = std::move(old_vals); + } + auto skip_bid = [&](unsigned bid) { if (bid == 0 && has_null_block) return true; @@ -1514,7 +1591,7 @@ expr Memory::mkInput(const char *name, const ParamAttrs &attrs0) { auto attrs = attrs0; attrs.set(ParamAttrs::IsArg); - Pointer p(*this, name, false, false, false, attrs); + Pointer p(*this, name, attrs); auto bid = p.getShortBid(); state->addAxiom(bid.ule(max_bid)); @@ -1651,10 +1728,11 @@ Memory::CallState Memory::CallState::mkIf(const expr &cond, ret.non_local_block_val.emplace_back(then.non_local_block_val[i]); } else { ret.non_local_block_val.emplace_back( - expr::mkIf(cond, then.non_local_block_val[i], - els.non_local_block_val[i])); + mk_block_if(cond, then.non_local_block_val[i], + els.non_local_block_val[i])); } } + ret.writes_args = expr::mkIf(cond, then.writes_args, els.writes_args); ret.non_local_liveness = expr::mkIf(cond, then.non_local_liveness, els.non_local_liveness); return ret; @@ -1670,11 +1748,13 @@ expr Memory::CallState::operator==(const CallState &rhs) const { for (unsigned i = 0, e = non_local_block_val.size(); i != e; ++i) { ret &= non_local_block_val[i] == rhs.non_local_block_val[i]; } + if (writes_args.isValid()) + ret &= writes_args == rhs.writes_args; return ret; } Memory::CallState -Memory::mkCallState(const string &fnname, bool nofree, +Memory::mkCallState(const string &fnname, bool nofree, unsigned num_ptr_args, const SMTMemoryAccess &access) { assert(has_fncall); CallState st; @@ -1682,11 +1762,12 @@ Memory::mkCallState(const string &fnname, bool nofree, // TODO: handle havoc of local blocks - auto blk_type = mk_block_val_array(1); - expr only_write_inaccess = access.canOnlyWrite(MemoryAccess::Inaccessible); - // inaccessible memory block - st.non_local_block_val.emplace_back(expr::mkFreshVar("blk_val", blk_type)); + unsigned innaccess_bid = num_nonlocals_src - 1; + st.non_local_block_val.emplace_back( + expr::mkFreshVar("blk_val", non_local_block_val[innaccess_bid].val)); + + expr only_write_inaccess = access.canOnlyWrite(MemoryAccess::Inaccessible); if (!only_write_inaccess.isTrue()) { unsigned limit = num_nonlocals_src - num_inaccessiblememonly_fns; @@ -1694,16 +1775,19 @@ Memory::mkCallState(const string &fnname, bool nofree, if (always_nowrite(i, true, true)) continue; st.non_local_block_val.emplace_back( - expr::mkFreshVar("blk_val", blk_type)); + expr::mkFreshVar("blk_val", mk_block_val_array(i))); } } + st.writes_args + = expr::mkFreshVar("writes_args", expr::mkUInt(0, num_ptr_args)); + st.non_local_liveness = mk_liveness_array(); if (num_nonlocals_src && !nofree) st.non_local_liveness - = expr::mkIf(only_write_inaccess, - st.non_local_liveness, - expr::mkFreshVar("blk_liveness", st.non_local_liveness)); + = mkIf_fold(only_write_inaccess, + st.non_local_liveness, + expr::mkFreshVar("blk_liveness", st.non_local_liveness)); return st; } @@ -1714,7 +1798,8 @@ void Memory::setState(const Memory::CallState &st, unsigned inaccessible_bid) { assert(has_fncall); - unsigned limit = num_nonlocals_src - num_inaccessiblememonly_fns; + if (access.canWriteSomething().isFalse()) + return; // 1) Havoc memory @@ -1728,52 +1813,45 @@ void Memory::setState(const Memory::CallState &st, assert(non_local_block_val[bid].undef.empty()); auto &cur_val = non_local_block_val[bid].val; cur_val - = expr::mkIf(only_write_inaccess, st.non_local_block_val[0], cur_val); + = mk_block_if(only_write_inaccess, st.non_local_block_val[0], cur_val); } - expr cond = access.canWrite(MemoryAccess::Args) && - !access.canOnlyWrite(MemoryAccess::Other); - if (!cond.isFalse()) { + // TODO: MemoryAccess::Errno + + if (!only_write_inaccess.isTrue()) { unsigned idx = 1; - for (unsigned bid = 0; bid < limit - has_write_fncall; ++bid) { + unsigned limit = num_nonlocals_src - num_inaccessiblememonly_fns; + for (unsigned bid = 0; bid < limit; ++bid) { if (always_nowrite(bid, true, true)) continue; - expr modifies(false); - for (auto &ptr_in : ptr_inputs) { - if (bid < next_nonlocal_bid) { - modifies |= cond && - ptr_in.val.non_poison && - !ptr_in.nowrite && - ptr_in.byval == 0 && - Pointer(*this, ptr_in.val.value).getBid() == bid; + expr modifies = access.canWrite(MemoryAccess::Other); + + if (!is_fncall_mem(bid)) { + unsigned arg_idx = 0; + for (auto &ptr_in : ptr_inputs) { + if (bid < next_nonlocal_bid) { + expr writes = st.writes_args.extract(arg_idx, arg_idx) == 1; + expr cond = !ptr_in.nowrite && + ptr_in.byval == 0 && + access.canWrite(MemoryAccess::Args) && + writes; + + modifies |= cond && + Pointer(*this, ptr_in.val.value).getBid() == bid; + state->addUB(cond.implies(ptr_in.val.non_poison)); + state->addUB(ptr_in.nowrite.implies(!writes)); + ++arg_idx; + } } } - auto &new_val = st.non_local_block_val[idx++]; auto &cur_val = non_local_block_val[bid].val; - cur_val = expr::mkIf(modifies, new_val, cur_val); + auto &new_val = st.non_local_block_val[idx++]; + cur_val = mk_block_if(modifies, new_val, std::move(cur_val)); if (modifies.isTrue()) non_local_block_val[bid].undef.clear(); } - assert(idx == st.non_local_block_val.size() - has_write_fncall); - } - - if (!access.canWrite(MemoryAccess::Errno).isFalse()) { - // TODO - } - - cond = access.canWrite(MemoryAccess::Other); - if (!cond.isFalse()) { - unsigned idx = 1; - for (unsigned bid = 0; bid < limit; ++bid) { - if (always_nowrite(bid, true, true)) - continue; - auto &cur_val = non_local_block_val[bid].val; - cur_val = expr::mkIf(cond, st.non_local_block_val[idx++], cur_val); - if (cond.isTrue()) - non_local_block_val[bid].undef.clear(); - } assert(idx == st.non_local_block_val.size()); } @@ -1802,13 +1880,11 @@ void Memory::setState(const Memory::CallState &st, // TODO: havoc local blocks // for now, zero out if in non UB-exploitation mode to avoid false positives if (config::disallow_ub_exploitation) { - expr raw_byte = Byte(*this, {expr::mkUInt(0, bits_byte), true}, 0, true)(); - expr array = expr::mkConstArray(expr::mkUInt(0, bits_for_offset), - raw_byte); + expr zero_byte = Byte(*this, {expr::mkUInt(0, bits_byte), true}, 0, true)(); for (unsigned i = 0; i < next_local_bid; ++i) { if (escaped_local_blks.mayAlias(true, i)) { - local_block_val[i] = expr(array); + local_block_val[i] = expr(zero_byte); } } } @@ -1950,6 +2026,13 @@ Memory::alloc(const expr *size, uint64_t align, BlockKind blockKind, (is_local ? local_blk_kind : non_local_blk_kind) .add(short_bid, expr::mkUInt(alloc_ty, 2)); + if (Pointer(*this, bid, is_local).isBlkSingleByte()) { + if (is_local) + local_block_val[bid].val = Byte::mkPoisonByte(*this)(); + else + non_local_block_val[bid].val = mk_block_val_array(bid); + } + if (!nonnull.isTrue()) { expr nondet_nonnull = expr::mkFreshVar("#alloc_nondet_nonnull", true); state->addQuantVar(nondet_nonnull); @@ -2221,8 +2304,7 @@ void Memory::memcpy(const expr &d, const expr &s, const expr &bytesize, = expr::mkFreshVar("#off", expr::mkUInt(0, Pointer::bitsShortOffset())); Pointer ptr_src = src + (offset - dst.getShortOffset()); set undef; - auto val = raw_load(ptr_src, undef); - storeLambda(dst, offset, bytesize, {{0, std::move(val)()}}, undef, + storeLambda(dst, offset, bytesize, {{0, raw_load(ptr_src, undef)()}}, undef, align_dst); } } @@ -2240,23 +2322,35 @@ void Memory::copy(const Pointer &src, const Pointer &dst) { assert(local.isConst()); bool dst_local = local.isTrue(); uint64_t dst_bid; - ENSURE(dst.getShortBid().isUInt(dst_bid)); + expr dst_bid_expr = dst.getShortBid(); + ENSURE(dst_bid_expr.isUInt(dst_bid)); auto &dst_blk = (dst_local ? local_block_val : non_local_block_val)[dst_bid]; dst_blk.undef.clear(); dst_blk.type = DATA_NONE; + unsigned has_bv_val = 0; auto offset = expr::mkUInt(0, Pointer::bitsShortOffset()); - DisjointExpr val(expr::mkConstArray(offset, Byte::mkPoisonByte(*this)())); + DisjointExpr val(Byte::mkPoisonByte(*this)()); - auto fn = [&](MemBlock &blk, unsigned bid, bool local, expr &&cond) { + auto fn = [&](MemBlock &blk, const Pointer &ptr, expr &&cond) { // we assume src != dst - if (local == dst_local && bid == dst_bid) + if (ptr.isLocal().eq(local) && ptr.getShortBid().eq(dst_bid_expr)) return; val.add(blk.val, std::move(cond)); dst_blk.undef.insert(blk.undef.begin(), blk.undef.end()); dst_blk.type |= blk.type; + has_bv_val |= 1u << blk.val.isBV(); }; access(src, bits_byte/8, bits_byte/8, false, fn); + + // if we have mixed array/non-array blocks, switch them all to array + if (has_bv_val == 3) { + DisjointExpr newval; + for (auto &[v, cond] : val) { + newval.add(v.isBV() ? expr::mkConstArray(offset, v) : v, cond); + } + val = std::move(newval); + } dst_blk.val = *std::move(val)(); } @@ -2380,8 +2474,8 @@ expr Memory::blockValRefined(const Memory &other, unsigned bid, bool local, return false; } - Byte val(*this, mem1.val.load(offset)); - Byte val2(other, mem2.load(offset)); + Byte val = raw_load(false, bid, offset); + Byte val2 = other.raw_load(false, bid, offset); if (val.eq(val2)) return true; @@ -2528,7 +2622,7 @@ expr Memory::checkNocapture() const { if (always_nowrite(bid)) continue; Pointer p(*this, bid, false); - Byte b(*this, non_local_block_val[bid].val.load(offset)); + Byte b = raw_load(false, bid, offset); Pointer loadp(*this, b.ptrValue()); res &= (p.isBlockAlive() && b.isPtr() && b.ptrNonpoison()) .implies(!loadp.isNocapture()); @@ -2569,18 +2663,18 @@ Memory Memory::mkIf(const expr &cond, Memory &&then, Memory &&els) { for (unsigned bid = 0, end = ret.numNonlocals(); bid < end; ++bid) { if (always_nowrite(bid, false, true)) continue; + auto &blk = ret.non_local_block_val[bid]; auto &other = els.non_local_block_val[bid]; - ret.non_local_block_val[bid].val - = expr::mkIf(cond, then.non_local_block_val[bid].val, other.val); - ret.non_local_block_val[bid].undef.insert(other.undef.begin(), - other.undef.end()); + blk.val = mk_block_if(cond, blk.val, other.val); + blk.type |= other.type; + blk.undef.insert(other.undef.begin(), other.undef.end()); } for (unsigned bid = 0, end = ret.numLocals(); bid < end; ++bid) { + auto &blk = ret.local_block_val[bid]; auto &other = els.local_block_val[bid]; - ret.local_block_val[bid].val - = expr::mkIf(cond, then.local_block_val[bid].val, other.val); - ret.local_block_val[bid].undef.insert(other.undef.begin(), - other.undef.end()); + blk.val = mk_block_if(cond, blk.val, other.val); + blk.type |= other.type; + blk.undef.insert(other.undef.begin(), other.undef.end()); } ret.non_local_block_liveness = expr::mkIf(cond, then.non_local_block_liveness, els.non_local_block_liveness); diff --git a/ir/memory.h b/ir/memory.h index ce1a1f0c7..b11c0d596 100644 --- a/ir/memory.h +++ b/ir/memory.h @@ -181,9 +181,9 @@ class Memory { smt::expr isBlockAlive(const smt::expr &bid, bool local) const; - void mkNonPoisonAxioms(bool local); + void mkNonPoisonAxioms(bool local) const; smt::expr mkSubByteZExtStoreCond(const Byte &val, const Byte &val2) const; - void mkNonlocalValAxioms(bool skip_consts); + void mkNonlocalValAxioms(bool skip_consts) const; bool mayalias(bool local, unsigned bid, const smt::expr &offset, unsigned bytes, uint64_t align, bool write) const; @@ -192,7 +192,7 @@ class Memory { bool write) const; void access(const Pointer &ptr, unsigned btyes, uint64_t align, bool write, - const std::function &fn); std::vector load(const Pointer &ptr, unsigned bytes, @@ -238,6 +238,7 @@ class Memory { class CallState { std::vector non_local_block_val; smt::expr non_local_liveness; + smt::expr writes_args; bool empty = true; public: @@ -297,7 +298,7 @@ class Memory { mkFnRet(const char *name, const std::vector &ptr_inputs, bool is_local, const FnRetData *data = nullptr); CallState mkCallState(const std::string &fnname, bool nofree, - const SMTMemoryAccess &access); + unsigned num_ptr_args, const SMTMemoryAccess &access); void setState(const CallState &st, const SMTMemoryAccess &access, const std::vector &ptr_inputs, unsigned inaccessible_bid); @@ -377,6 +378,8 @@ class Memory { friend class Pointer; private: + smt::expr mk_block_val_array(unsigned bid) const; + Byte raw_load(bool local, unsigned bid, const smt::expr &offset) const; void print_array(std::ostream &os, const smt::expr &a, unsigned indent = 0) const; }; diff --git a/ir/pointer.cpp b/ir/pointer.cpp index df5bf370e..ad0d60383 100644 --- a/ir/pointer.cpp +++ b/ir/pointer.cpp @@ -6,7 +6,6 @@ #include "ir/memory.h" #include "ir/globals.h" #include "ir/state.h" -#include "util/compiler.h" using namespace IR; using namespace smt; @@ -27,10 +26,6 @@ static expr disjoint(const expr &begin1, const expr &len1, const expr &begin2, return begin1.uge(begin2 + len2) || begin2.uge(begin1 + len1); } -static unsigned total_bits_short() { - return Pointer::bitsShortBid() + Pointer::bitsShortOffset(); -} - static expr attr_to_bitvec(const ParamAttrs &attrs) { if (!bits_for_ptrattrs) return {}; @@ -56,16 +51,14 @@ Pointer::Pointer(const Memory &m, const expr &bid, const expr &offset, assert(!bid.isValid() || !offset.isValid() || p.bits() == totalBits()); } -Pointer::Pointer(const Memory &m, const char *var_name, const expr &local, - bool unique_name, bool align, const ParamAttrs &attr) : m(m) { - unsigned bits = total_bits_short() + !align * zeroBitsShortOffset(); - p = prepend_if(local.toBVBool(), - expr::mkVar(var_name, bits, unique_name), hasLocalBit()); - if (align) - p = p.concat_zeros(zeroBitsShortOffset()); +Pointer::Pointer(const Memory &m, const char *var_name, + const ParamAttrs &attr) : m(m) { + unsigned bits = bitsShortBid() + bits_for_offset; + p = prepend_if(expr::mkUInt(0, 1), + expr::mkVar(var_name, bits, false), hasLocalBit()); if (bits_for_ptrattrs) p = p.concat(attr_to_bitvec(attr)); - assert(!local.isValid() || p.bits() == totalBits()); + assert(p.bits() == totalBits()); } Pointer::Pointer(const Memory &m, expr repr) : m(m), p(std::move(repr)) { @@ -251,7 +244,7 @@ expr Pointer::getBlockBaseAddress(bool simplify) const { expr Pointer::getAddress(bool simplify) const { return - getBlockBaseAddress(simplify) + getOffset().zextOrTrunc(bits_ptr_address); + getBlockBaseAddress(simplify) + getOffset().sextOrTrunc(bits_ptr_address); } expr Pointer::blockSize() const { @@ -312,32 +305,48 @@ expr Pointer::operator!=(const Pointer &rhs) const { return !operator==(rhs); } -static expr inbounds(const Pointer &p, bool strict) { - if (isUndef(p.getOffset())) +expr Pointer::isInboundsOf(const Pointer &block, const expr &bytes0) const { + assert(block.getOffset().isZero()); + expr bytes = bytes0.zextOrTrunc(bits_ptr_address); + expr addr = getAddress(); + expr block_addr = block.getAddress(); + expr block_size = block.blockSize().zextOrTrunc(bits_ptr_address); + + if (bytes.eq(block_size)) + return addr == block_addr; + if (bytes.ugt(block_size).isTrue()) return false; - auto offset = p.getOffsetSizet(); - auto size = p.blockSizeOffsetT(); - return (strict ? offset.ult(size) : offset.ule(size)) && !offset.isNegative(); + return addr.uge(block_addr) && + addr.add_no_uoverflow(bytes) && + (addr + bytes).ule(block_addr + block_size); +} + +expr Pointer::isInboundsOf(const Pointer &block, unsigned bytes) const { + return isInboundsOf(block, expr::mkUInt(bytes, bits_ptr_address)); } -expr Pointer::inbounds(bool simplify_ptr, bool strict) { - if (!simplify_ptr) - return ::inbounds(*this, strict); +expr Pointer::isInbounds(bool strict) const { + if (isUndef(getOffset())) + return false; + + auto offset = getOffsetSizet(); + auto size = blockSizeOffsetT(); + return (strict ? offset.ult(size) : offset.ule(size)) && !offset.isNegative(); +} +expr Pointer::inbounds() { DisjointExpr ret(expr(false)), all_ptrs; for (auto &[ptr_expr, domain] : DisjointExpr(p, 3)) { - expr inb = ::inbounds(Pointer(m, ptr_expr), strict); + expr inb = Pointer(m, ptr_expr).isInbounds(false); if (!inb.isFalse()) all_ptrs.add(ptr_expr, domain); ret.add(std::move(inb), domain); } // trim set of valid ptrs - if (auto ptrs = std::move(all_ptrs)()) - p = *ptrs; - else - p = expr::mkUInt(0, totalBits()); + auto ptrs = std::move(all_ptrs)(); + p = ptrs ? *std::move(ptrs) : expr::mkUInt(0, totalBits()); return *std::move(ret)(); } @@ -383,10 +392,10 @@ expr Pointer::isAligned(uint64_t align) { newp = newp.concat(getAttrs()); p = std::move(newp); assert(!p.isValid() || p.bits() == totalBits()); - return { blk_align && offset.extract(bits - 1, 0) == zero }; + return { blk_align && offset.trunc(bits) == zero }; } - return getAddress().extract(bits - 1, 0) == 0; + return getAddress().trunc(bits) == 0; } expr Pointer::isAligned(const expr &align) { @@ -400,36 +409,6 @@ expr Pointer::isAligned(const expr &align) { isNull()); } -static pair is_dereferenceable(Pointer &p, - const expr &bytes_off, - const expr &bytes, - uint64_t align, bool iswrite, - bool ignore_accessability) { - expr block_sz = p.blockSizeOffsetT(); - expr offset = p.getOffset(); - - // check that offset is within bounds and that arith doesn't overflow - expr cond = (offset + bytes_off).sextOrTrunc(block_sz.bits()).ule(block_sz); - cond &= !offset.isNegative(); - if (!block_sz.isNegative().isFalse()) // implied if block_sz >= 0 - cond &= offset.add_no_soverflow(bytes_off); - - cond &= p.isBlockAlive(); - - if (iswrite) - cond &= p.isWritable() && !p.isNoWrite(); - else if (!ignore_accessability) - cond &= !p.isNoRead(); - - // try some constant folding; these are implied by the conditions above - if (bytes.ugt(p.blockSize()).isTrue() || - p.getOffsetSizet().uge(block_sz).isTrue() || - isUndef(offset)) - cond = false; - - return { std::move(cond), p.isAligned(align) }; -} - // When bytes is 0, pointer is always dereferenceable pair Pointer::isDereferenceable(const expr &bytes0, uint64_t align, @@ -437,15 +416,58 @@ Pointer::isDereferenceable(const expr &bytes0, uint64_t align, bool round_size_to_align) { expr bytes = bytes0.zextOrTrunc(bits_size_t); if (round_size_to_align) - bytes = bytes.round_up(expr::mkUInt(align, bits_size_t)); + bytes = bytes.round_up(expr::mkUInt(align, bytes)); expr bytes_off = bytes.zextOrTrunc(bits_for_offset); + auto block_constraints = [=](const Pointer &p) { + expr ret = p.isBlockAlive(); + if (iswrite) + ret &= p.isWritable() && !p.isNoWrite(); + else if (!ignore_accessability) + ret &= !p.isNoRead(); + return ret; + }; + + auto is_dereferenceable = [&](Pointer &p) { + expr block_sz = p.blockSizeOffsetT(); + expr offset = p.getOffset(); + expr cond; + + if (m.state->isAsmMode()) { + // Pointers have full provenance in ASM mode + auto check = [&](unsigned limit, bool local) { + for (unsigned i = 0; i != limit; ++i) { + Pointer this_ptr(m, i, local); + cond |= p.isInboundsOf(this_ptr, bytes) && + block_constraints(this_ptr); + } + }; + cond = false; + check(m.numLocals(), true); + check(m.numNonlocals(), false); + + // try some constant folding + } else if (bytes.ugt(p.blockSize()).isTrue() || + p.getOffsetSizet().uge(block_sz).isTrue() || + isUndef(offset)) { + cond = false; + } else { + // check that the offset is within bounds and that arith doesn't overflow + cond = (offset + bytes_off).sextOrTrunc(block_sz.bits()).ule(block_sz); + cond &= !offset.isNegative(); + if (!block_sz.isNegative().isFalse()) // implied if block_sz >= 0 + cond &= offset.add_no_soverflow(bytes_off); + + cond &= block_constraints(p); + } + return make_pair(std::move(cond), p.isAligned(align)); + }; + DisjointExpr UB(expr(false)), is_aligned(expr(false)), all_ptrs; for (auto &[ptr_expr, domain] : DisjointExpr(p, 3)) { Pointer ptr(m, ptr_expr); - auto [ub, aligned] = ::is_dereferenceable(ptr, bytes_off, bytes, align, - iswrite, ignore_accessability); + auto [ub, aligned] = is_dereferenceable(ptr); // record pointer if not definitely unfeasible if (!ub.isFalse() && !aligned.isFalse() && !ptr.blockSize().isZero()) @@ -579,14 +601,11 @@ expr Pointer::fninputRefined(const Pointer &other, set &undef, expr nonlocal = is_asm ? getAddress() == other.getAddress() : *this == other; - Pointer other_deref - = is_asm ? other.m.searchPointer(other.getAddress()) : other; - return expr::mkIf(isNull(), other.isNull(), expr::mkIf(isLocal(), local, nonlocal) && // FIXME: this should be disabled just for phy pointers (is_asm ? expr(true) - : isBlockAlive().implies(other_deref.isBlockAlive()))); + : isBlockAlive().implies(other.isBlockAlive()))); } expr Pointer::isWritable() const { @@ -674,6 +693,11 @@ expr Pointer::isNull() const { return *this == mkNullPointer(m); } +bool Pointer::isBlkSingleByte() const { + uint64_t blk_size; + return blockSize().isUInt(blk_size) && blk_size == bits_byte/8; +} + Pointer Pointer::mkIf(const expr &cond, const Pointer &then, const Pointer &els) { assert(&then.m == &els.m); diff --git a/ir/pointer.h b/ir/pointer.h index 3e93f1ff4..caab88afb 100644 --- a/ir/pointer.h +++ b/ir/pointer.h @@ -38,9 +38,7 @@ class Pointer { public: Pointer(const Memory &m, const smt::expr &bid, const smt::expr &offset, const smt::expr &attr); - Pointer(const Memory &m, const char *var_name, - const smt::expr &local = false, bool unique_name = true, - bool align = true, const ParamAttrs &attr = {}); + Pointer(const Memory &m, const char *var_name, const ParamAttrs &attr); Pointer(const Memory &m, smt::expr p); Pointer(const Memory &m, unsigned bid, bool local); Pointer(const Memory &m, const smt::expr &bid, const smt::expr &offset, @@ -94,7 +92,11 @@ class Pointer { smt::expr operator==(const Pointer &rhs) const; smt::expr operator!=(const Pointer &rhs) const; - smt::expr inbounds(bool simplify_ptr = false, bool strict = false); + smt::expr isInboundsOf(const Pointer &block, const smt::expr &bytes) const; + smt::expr isInboundsOf(const Pointer &block, unsigned bytes) const; + smt::expr isInbounds(bool strict) const; + smt::expr inbounds(); + smt::expr blockAlignment() const; // log(bits) smt::expr isBlockAligned(uint64_t align, bool exact = false) const; @@ -142,6 +144,8 @@ class Pointer { static Pointer mkNullPointer(const Memory &m); smt::expr isNull() const; + bool isBlkSingleByte() const; + static Pointer mkIf(const smt::expr &cond, const Pointer &then, const Pointer &els); diff --git a/ir/state.cpp b/ir/state.cpp index 07e0b0f80..780fb80ea 100644 --- a/ir/state.cpp +++ b/ir/state.cpp @@ -255,8 +255,7 @@ State::State(const Function &f, bool source) : f(f), source(source), memory(*this), fp_rounding_mode(expr::mkVar("fp_rounding_mode", 3)), fp_denormal_mode(expr::mkVar("fp_denormal_mode", 2)), - return_val(DisjointExpr(f.getType().getDummyValue(false))), - return_memory(DisjointExpr(memory.dup())) {} + return_val(DisjointExpr(f.getType().getDummyValue(false))) {} void State::resetGlobals() { Memory::resetGlobals(); @@ -1000,10 +999,10 @@ State::addFnCall(const string &name, vector &&inputs, bool noalias = attrs.has(FnAttrs::NoAlias); bool is_indirect = name.starts_with("#indirect_call"); - expr fn_ptr; + expr fn_ptr_bid; if (is_indirect) { assert(inputs.size() >= 1); - fn_ptr = inputs[0].value; + fn_ptr_bid = inputs[0].value; } assert(!noret || !willret); @@ -1019,9 +1018,8 @@ State::addFnCall(const string &name, vector &&inputs, } auto isgvar = [&](const auto &decl) { - if (auto gv = getFn().getGlobalVar(string_view(decl.name).substr(1))) - return Pointer::mkPointerFromNoAttrs(memory, fn_ptr).getAddress() == - Pointer(memory, (*this)[*gv].value).getAddress(); + if (auto gv = getFn().getGlobalVar(decl.name)) + return fn_ptr_bid == Pointer(memory, (*this)[*gv].value).getShortBid(); return expr(); }; @@ -1042,7 +1040,8 @@ State::addFnCall(const string &name, vector &&inputs, decl_access.add(decl.attrs.mem, cmp); for (auto &ptr : ptr_inputs) { - if (decl.inputs.size() != ret_args.size()) + if (decl.inputs.size() != ret_args.size() || + (decl.is_varargs && ptr.idx < decl.inputs.size())) continue; auto &attrs = decl.inputs[ptr.idx].second; ptr.byval = expr::mkIf(cmp, expr::mkUInt(attrs.blockSize, 64), @@ -1057,7 +1056,7 @@ State::addFnCall(const string &name, vector &&inputs, } memaccess &= *std::move(decl_access).mk(SMTMemoryAccess{ - expr::mkUF("#access_" + name, { fn_ptr }, memaccess.val)}); + expr::mkUF("#access_" + name, { fn_ptr_bid }, memaccess.val)}); } if (!memaccess.canWrite(MemoryAccess::Args).isFalse() || @@ -1081,9 +1080,9 @@ State::addFnCall(const string &name, vector &&inputs, if (!memaccess.canRead(MemoryAccess::Inaccessible).isFalse() || !memaccess.canRead(MemoryAccess::Errno).isFalse() || !memaccess.canRead(MemoryAccess::Other).isFalse()) - call_ranges = memaccess.canOnlyRead(MemoryAccess::Inaccessible).isFalse() - ? analysis.ranges_fn_calls - : analysis.ranges_fn_calls.project(name); + call_ranges = memaccess.canOnlyRead(MemoryAccess::Inaccessible).isTrue() + ? analysis.ranges_fn_calls.project(name) + : analysis.ranges_fn_calls; if (ret_arg_ty && (*ret_arg_ty == out_type).isFalse()) { ret_arg = out_type.fromInt(ret_arg_ty->toInt(*this, std::move(ret_arg))); @@ -1136,13 +1135,15 @@ State::addFnCall(const string &name, vector &&inputs, }; output = ret_arg_ty ? std::move(ret_arg) : mk_output(out_type); + if (ret_arg_ty && ret_arg_ty->isPtrType()) + ret_data.emplace_back(Memory::FnRetData()); // Indirect calls may be changed into direct in tgt // Account for this if we have declarations with a returned argument // to limit the behavior of the SMT var. if (is_indirect) { for (auto &decl : getFn().getFnDecls()) { - if (decl.inputs.size() != ret_args.size()) + if (decl.inputs.size() != ret_args.size() || decl.is_varargs) continue; unsigned idx = 0; for (auto &[ty, attrs] : decl.inputs) { @@ -1167,7 +1168,8 @@ State::addFnCall(const string &name, vector &&inputs, : expr::mkFreshVar((name + "#noreturn").c_str(), false), memaccess.canWriteSomething().isFalse() ? Memory::CallState() - : memory.mkCallState(name, attrs.has(FnAttrs::NoFree), memaccess), + : memory.mkCallState(name, attrs.has(FnAttrs::NoFree), + I->first.args_ptr.size(), memaccess), std::move(ret_data) }; // add equality constraints between source's function calls @@ -1266,7 +1268,7 @@ void State::addNonDetVar(const expr &var) { expr State::getFreshNondetVar(const char *prefix, const expr &type) { expr var = expr::mkFreshVar(prefix, type); - nondet_vars.emplace(var); + addNonDetVar(var); return var; } @@ -1305,9 +1307,15 @@ expr State::rewriteUndef(expr &&val, const set &undef_vars) { void State::finishInitializer() { is_initialization_phase = false; -} -void State::saveReturnedInput() { + const Memory *mem = &memory; + // if we have an init block, the unconditional jump std::moved the memory + if (!predecessor_data.empty()) { + assert(predecessor_data.size() == 1); + mem = &predecessor_data.begin()->second.begin()->second.mem.begin()->first; + } + return_memory = DisjointExpr(mem->dup()); + if (auto *ret = getFn().getReturnedInput()) { returned_input = (*this)[*ret]; resetUndefVars(true); @@ -1333,8 +1341,10 @@ const StateValue& State::returnValCached() { } Memory& State::returnMemory() { - if (auto *m = get_if>(&return_memory)) - return_memory = *std::move(*m)(); + if (auto *m = get_if>(&return_memory)) { + auto val = std::move(*m)(); + return_memory = val ? *std::move(val) : memory.dup(); + } return get(return_memory); } @@ -1387,11 +1397,14 @@ void State::mkAxioms(State &tgt) { if (has_indirect_fncalls) { for (auto &decl : f.getFnDecls()) { - if (auto gv = f.getGlobalVar(string_view(decl.name).substr(1))) + if (auto gv = f.getGlobalVar(decl.name)) { + Pointer ptr(memory, (*this)[*gv].value); + addAxiom(!ptr.isLocal()); + addAxiom(ptr.getOffset() == 0); addAxiom( - expr::mkUF("#fndeclty", - { Pointer(memory, (*this)[*gv].value).reprWithoutAttrs() }, - expr::mkUInt(0, 32)) == decl.hash()); + expr::mkUF("#fndeclty", { ptr.getShortBid() }, expr::mkUInt(0, 32)) + == decl.hash()); + } } } } diff --git a/ir/state.h b/ir/state.h index 37c46240f..944d59af4 100644 --- a/ir/state.h +++ b/ir/state.h @@ -334,7 +334,6 @@ class State { const auto& getNondetVars() const { return nondet_vars; } const auto& getFnQuantVars() const { return fn_call_qvars; } - void saveReturnedInput(); const std::optional& getReturnedInput() const { return returned_input; } diff --git a/ir/type.cpp b/ir/type.cpp index 9b3a896d5..e310d6d74 100644 --- a/ir/type.cpp +++ b/ir/type.cpp @@ -469,16 +469,19 @@ expr FloatType::fromFloat(State &s, const expr &fp, const Type &from_type0, auto add_maybe_quiet = [&](const expr &e) { // account for fpext/fptruc - expr fraction = e.trunc(min(from_type->fractionBits(), fraction_bits)); - if (fraction.bits() < fraction_bits) - fraction = fraction.concat_zeros(fraction_bits - fraction.bits()); - assert(!fraction.isValid() || fraction.bits() == fraction_bits); - + auto orig_bw = from_type->fractionBits(); + expr fraction = e.trunc(orig_bw); expr truncated_is_nan = true; - if (from_type->fractionBits() > fraction_bits) + + if (orig_bw > fraction_bits) { + fraction = fraction.extract(orig_bw-1, orig_bw - fraction_bits); truncated_is_nan = fraction != 0; + } else if (orig_bw < fraction_bits) { + fraction = fraction.concat_zeros(fraction_bits - fraction.bits()); + } + assert(!fraction.isValid() || fraction.bits() == fraction_bits); - expr qnan = fraction.extract(fraction_bits - 1, fraction_bits - 1); + expr qnan = fraction.sign(); expr quieted = var.extract(0, 0); qnan = expr::mkIf(truncated_is_nan, qnan | quieted, expr::mkUInt(1, 1)); exprs.add(qnan.concat(fraction.trunc(fraction_bits - 1)), diff --git a/ir/value.cpp b/ir/value.cpp index a8f1fa94a..4135532f8 100644 --- a/ir/value.cpp +++ b/ir/value.cpp @@ -249,7 +249,7 @@ StateValue Input::mkInput(State &s, const Type &ty, unsigned child) const { s.addUndefVar(std::move(var)); } - auto state_val = attrs.encode(s, {std::move(val), expr(true)}, ty); + auto state_val = attrs.encode(s, {std::move(val), expr(true)}, ty, true); bool never_poison = config::disable_poison_input || attrs.poisonImpliesUB(); expr np = expr::mkBoolVar(("np_" + getSMTName(child)).c_str()); diff --git a/llvm_util/compare.cpp b/llvm_util/compare.cpp index cb7bf2da6..8c8ea8b68 100644 --- a/llvm_util/compare.cpp +++ b/llvm_util/compare.cpp @@ -47,7 +47,7 @@ Results verify(llvm::Function &F1, llvm::Function &F2, return Results::Error("Could not translate '" + F1.getName().str() + "' to Alive IR\n"); - auto fn2 = llvm2alive(F2, TLI.getTLI(F2), false, fn1->getGlobalVarNames()); + auto fn2 = llvm2alive(F2, TLI.getTLI(F2), false, fn1->getGlobalVars()); if (!fn2) return Results::Error("Could not translate '" + F2.getName().str() + "' to Alive IR\n"); diff --git a/llvm_util/known_fns.cpp b/llvm_util/known_fns.cpp index 39c53937e..2a9fa9528 100644 --- a/llvm_util/known_fns.cpp +++ b/llvm_util/known_fns.cpp @@ -517,7 +517,7 @@ known_call(llvm::CallInst &i, const llvm::TargetLibraryInfo &TLI, auto decl = i.getCalledFunction(); llvm::LibFunc libfn; - if (!decl || !TLI.getLibFunc(*decl, libfn) || !TLI.has(libfn)) + if (!decl || !TLI.getLibFunc(*decl, libfn)) RETURN_EXACT(); switch (libfn) { diff --git a/llvm_util/llvm2alive.cpp b/llvm_util/llvm2alive.cpp index 71420694d..391dd8199 100644 --- a/llvm_util/llvm2alive.cpp +++ b/llvm_util/llvm2alive.cpp @@ -88,6 +88,7 @@ bool hit_limits; unsigned constexpr_idx; unsigned copy_idx; unsigned alignopbundle_idx; +unsigned metadata_idx; #define PARSE_UNOP() \ auto ty = llvm_type2alive(i.getType()); \ @@ -132,7 +133,7 @@ class llvm2alive_ : public llvm::InstVisitor> { /// function. bool IsSrc; vector i_constexprs; - const vector &gvnamesInSrc; + const vector &gvsInSrc; vector> todo_phis; const Instr *insert_constexpr_before = nullptr; ostream *out; @@ -195,7 +196,8 @@ class llvm2alive_ : public llvm::InstVisitor> { auto get_operand(llvm::Value *v) { return llvm_util::get_operand(v, [this](auto I) { return convert_constexpr(I); }, - [&](auto ag) { return copy_inserter(ag); }); + [this](auto ag) { return copy_inserter(ag); }, + [this](auto fn) { return register_fn_decl(fn, nullptr); }); } RetTy NOP(llvm::Instruction &i) { @@ -210,8 +212,8 @@ class llvm2alive_ : public llvm::InstVisitor> { public: llvm2alive_(llvm::Function &f, const llvm::TargetLibraryInfo &TLI, bool IsSrc, - const vector &gvnamesInSrc) - : f(f), TLI(TLI), IsSrc(IsSrc), gvnamesInSrc(gvnamesInSrc), + const vector &gvsInSrc) + : f(f), TLI(TLI), IsSrc(IsSrc), gvsInSrc(gvsInSrc), out(&get_outs()) {} ~llvm2alive_() { @@ -343,20 +345,13 @@ class llvm2alive_ : public llvm::InstVisitor> { } auto fn = i.getCalledFunction(); - FnAttrs attrs; - vector param_attrs; - - parse_fn_attrs(i, attrs, true); - - if (auto op = dyn_cast(&i)) { - if (op->hasNoNaNs()) - attrs.set(FnAttrs::NNaN); - } auto ty = llvm_type2alive(i.getType()); if (!ty) return error(i); + unsigned vararg_idx = -1u; + // record fn decl in case there are indirect calls to this function // elsewhere auto fn_decl = fn; @@ -379,11 +374,8 @@ class llvm2alive_ : public llvm::InstVisitor> { return make_unique(*args.at(0), Assume::AndNonPoison); } - Function::FnDecl decl; - decl.name = '@' + fn_decl->getName().str(); - decl.attrs = attrs; - if (!(decl.output = llvm_type2alive(fn_decl->getReturnType()))) - return error(i); + if (fn_decl->isVarArg()) + vararg_idx = fn_decl->arg_size(); // it's UB if there's a mismatch in the number of function arguments if (( fn_decl->isVarArg() && i.arg_size() < fn_decl->arg_size()) || @@ -396,24 +388,20 @@ class llvm2alive_ : public llvm::InstVisitor> { UnaryOp::Copy); } - auto attrs_fndef = fn_decl->getAttributes(); - for (uint64_t idx = 0, nargs = fn_decl->arg_size(); idx < nargs; ++idx) { - auto ty = llvm_type2alive(fn_decl->getArg(idx)->getType()); - if (!ty) - return error(i); - - unsigned attr_argidx = llvm::AttributeList::FirstArgIndex + idx; - auto &arg = args.at(idx); - ParamAttrs pattr; - handleParamAttrs(attrs_fndef.getAttributes(attr_argidx), pattr, - *arg, arg, true); - decl.inputs.emplace_back(ty, std::move(pattr)); - } - alive_fn->addFnDecl(std::move(decl)); + if (!register_fn_decl(fn_decl, &args)) + return error(i); } + FnAttrs attrs; + vector param_attrs; + parse_fn_attrs(i, attrs); + if (auto op = dyn_cast(&i)) { + if (op->hasNoNaNs()) + attrs.set(FnAttrs::NNaN); + } + if (!approx) { auto [known, approx0] = known_call(i, TLI, *BB, args, std::move(attrs), param_attrs); @@ -441,7 +429,7 @@ class llvm2alive_ : public llvm::InstVisitor> { call = make_unique(*ty, value_name(i), fn ? '@' + fn->getName().str() : string(), - std::move(attrs), fnptr); + std::move(attrs), fnptr, vararg_idx); } auto attrs_callsite = i.getAttributes(); @@ -456,10 +444,10 @@ class llvm2alive_ : public llvm::InstVisitor> { unsigned attr_argidx = llvm::AttributeList::FirstArgIndex + argidx; approx |= !handleParamAttrs(attrs_callsite.getAttributes(attr_argidx), - pattr, *arg, arg, true); + pattr, &arg, true); if (fn && argidx < fn->arg_size()) approx |= !handleParamAttrs(attrs_fndef.getAttributes(attr_argidx), - pattr, *arg, arg, true); + pattr, &arg, true); if (i.paramHasAttr(argidx, llvm::Attribute::NoUndef)) { if (i.getArgOperand(argidx)->getType()->isAggregateType()) @@ -844,10 +832,20 @@ class llvm2alive_ : public llvm::InstVisitor> { BB->addInstr(make_unique(*av, Assume::WellDefined)); } - } else if (name == "align") { - llvm::Value *ptr = bundle.Inputs[0].get(); + } + else if (name == "dereferenceable") { + auto *ptr = get_operand(bundle.Inputs[0].get()); + auto *bytes = get_operand(bundle.Inputs[1].get()); + if (!ptr || !bytes) + return error(i); + + BB->addInstr(make_unique(vector{ptr, bytes}, + Assume::Dereferenceable)); + } + else if (name == "align") { + auto *aptr = get_operand(bundle.Inputs[0].get()); llvm::Value *align = bundle.Inputs[1].get(); - auto *aptr = get_operand(ptr), *aalign = get_operand(align); + auto *aalign = get_operand(align); if (!aptr || !aalign || align->getType()->getIntegerBitWidth() > 64) return error(i); @@ -855,7 +853,7 @@ class llvm2alive_ : public llvm::InstVisitor> { assert(bundle.Inputs.size() == 3); auto gep = make_unique( aptr->getType(), - "#align_adjustedptr" + to_string(alignopbundle_idx++), + "%#align_adjustedptr" + to_string(alignopbundle_idx++), *aptr, false, false, false); gep->addIdx(-1ull, *get_operand(bundle.Inputs[2].get())); @@ -863,15 +861,18 @@ class llvm2alive_ : public llvm::InstVisitor> { BB->addInstr(std::move(gep)); } - vector args = {aptr, aalign}; - BB->addInstr(make_unique(std::move(args), Assume::Align)); - } else if (name == "nonnull") { - llvm::Value *ptr = bundle.Inputs[0].get(); - auto *aptr = get_operand(ptr); - if (!aptr) + BB->addInstr(make_unique(vector{aptr, aalign}, + Assume::Align)); + } + else if (name == "nonnull") { + auto *ptr = get_operand(bundle.Inputs[0].get()); + if (!ptr) return error(i); - BB->addInstr(make_unique(*aptr, Assume::NonNull)); + BB->addInstr(make_unique(*ptr, Assume::NonNull)); + } + else if (name == "cold") { + // Not relevant for correctness } else { return error(i); } @@ -1257,7 +1258,8 @@ class llvm2alive_ : public llvm::InstVisitor> { RetTy visitInstruction(llvm::Instruction &i) { return error(i); } - RetTy error(llvm::Instruction &i) { + template + RetType error(llvm::Instruction &i) { if (hit_limits) return {}; stringstream ss; @@ -1267,6 +1269,7 @@ class llvm2alive_ : public llvm::InstVisitor> { << string_view(std::move(ss).str()).substr(0, 80) << '\n'; return {}; } + RetTy errorAttr(const llvm::Attribute &attr) { *out << "ERROR: Unsupported attribute: " << attr.getAsString() << '\n'; return {}; @@ -1320,6 +1323,46 @@ class llvm2alive_ : public llvm::InstVisitor> { BB->addInstr(make_unique(*i, Assume::WellDefined)); break; + case LLVMContext::MD_callees: { + auto *fn_call = dynamic_cast(i); + assert(fn_call); + auto &i1_type = get_int_type(1); + Value *last_value = nullptr; + + for (auto &Op : Node->operands()) { + auto *callee = + get_operand(llvm::mdconst::dyn_extract_or_null(Op)); + + if (!callee) { + *out << "ERROR: Unsupported !callee metadata\n"; + return false; + } + + auto icmp = make_unique(i1_type, + "%#cmp#" + to_string(metadata_idx), + ICmp::EQ, *fn_call->getFnPtr(), + *callee); + auto *icmp_ptr = icmp.get(); + BB->addInstrAt(std::move(icmp), fn_call, true); + + if (last_value) { + auto or_i + = make_unique(i1_type, + "%#or#" + to_string(metadata_idx), + *last_value, *icmp_ptr, BinOp::Or); + last_value = or_i.get(); + BB->addInstrAt(std::move(or_i), fn_call, true); + } else { + last_value = icmp_ptr; + } + ++metadata_idx; + } + auto val = last_value ? last_value : make_intconst(0, 1); + BB->addInstrAt(make_unique(*val, Assume::AndNonPoison), + fn_call, true); + break; + } + case LLVMContext::MD_dereferenceable: case LLVMContext::MD_dereferenceable_or_null: { auto kind = ID == LLVMContext::MD_dereferenceable @@ -1353,6 +1396,32 @@ class llvm2alive_ : public llvm::InstVisitor> { return true; } + bool handleOpBundles(llvm::Instruction &llvm_i, Instr *i) { + auto *call = dyn_cast(&llvm_i); + if (!call) + return true; + + for (auto &bundle0 : call->bundle_op_infos()) { + auto bundle = call->operandBundleFromBundleOpInfo(bundle0); + if (bundle.getTagName() == "clang.arc.attachedcall") { + assert(bundle.Inputs.size() == 1); + auto *fn = dyn_cast(bundle.Inputs[0].get()); + auto ty = llvm_type2alive(fn->getReturnType()); + if (!ty) + return error(llvm_i); + + FnAttrs attrs; + parse_fn_decl_attrs(fn, attrs); + + BB->addInstr( + make_unique(*ty, i->getName() + "#arc", + '@' + fn->getName().str(), std::move(attrs))); + } + } + + return true; + } + unique_ptr handleRangeAttrNoInsert(const llvm::Attribute &attr, Value &val, bool is_welldefined = false) { @@ -1378,7 +1447,7 @@ class llvm2alive_ : public llvm::InstVisitor> { // TODO: Once attributes at a call site are fully supported, we should // remove is_callsite flag bool handleParamAttrs(const llvm::AttributeSet &aset, ParamAttrs &attrs, - Value &val, Value* &newval, bool is_callsite) { + Value **val, bool is_callsite) { bool precise = true; for (const llvm::Attribute &llvmattr : aset) { switch (llvmattr.getKindAsEnum()) { @@ -1453,7 +1522,8 @@ class llvm2alive_ : public llvm::InstVisitor> { break; case llvm::Attribute::Range: - newval = handleRangeAttr(llvmattr, val, + if (val) + *val = handleRangeAttr(llvmattr, **val, aset.hasAttribute(llvm::Attribute::NoUndef)); break; @@ -1488,7 +1558,7 @@ class llvm2alive_ : public llvm::InstVisitor> { return precise; } - void handleRetAttrs(const llvm::AttributeSet &aset, FnAttrs &attrs) { + static void handleRetAttrs(const llvm::AttributeSet &aset, FnAttrs &attrs) { for (const llvm::Attribute &llvmattr : aset) { if (!llvmattr.isEnumAttribute() && !llvmattr.isIntAttribute() && !llvmattr.isTypeAttribute()) @@ -1547,7 +1617,7 @@ class llvm2alive_ : public llvm::InstVisitor> { return attr; } - void handleFnAttrs(const llvm::AttributeSet &aset, FnAttrs &attrs) { + static void handleFnAttrs(const llvm::AttributeSet &aset, FnAttrs &attrs) { for (const llvm::Attribute &llvmattr : aset) { if (llvmattr.isStringAttribute()) { auto str = llvmattr.getKindAsString(); @@ -1613,7 +1683,7 @@ class llvm2alive_ : public llvm::InstVisitor> { } } - MemoryAccess handleMemAttrs(const llvm::MemoryEffects &e) { + static MemoryAccess handleMemAttrs(const llvm::MemoryEffects &e) { MemoryAccess attrs; attrs.setNoAccess(); @@ -1637,35 +1707,65 @@ class llvm2alive_ : public llvm::InstVisitor> { return attrs; } - void parse_fn_attrs(const llvm::CallInst &i, FnAttrs &attrs, - bool decl_only = false) { - auto fn = i.getCalledFunction(); - llvm::AttributeList attrs_callsite = i.getAttributes(); - llvm::AttributeList attrs_fndef = fn ? fn->getAttributes() - : llvm::AttributeList(); + static void parse_fn_decl_attrs(const llvm::Function *fn, FnAttrs &attrs) { + llvm::AttributeList attrs_fndef = fn->getAttributes(); auto ret = llvm::AttributeList::ReturnIndex; auto fnidx = llvm::AttributeList::FunctionIndex; - - if (!decl_only) { - handleRetAttrs(attrs_callsite.getAttributes(ret), attrs); - handleFnAttrs(attrs_callsite.getAttributes(fnidx), attrs); - } handleRetAttrs(attrs_fndef.getAttributes(ret), attrs); handleFnAttrs(attrs_fndef.getAttributes(fnidx), attrs); attrs.mem.setFullAccess(); - if (!decl_only) - attrs.mem &= handleMemAttrs(i.getMemoryEffects()); - if (fn) - attrs.mem &= handleMemAttrs(fn->getMemoryEffects()); + attrs.mem &= handleMemAttrs(fn->getMemoryEffects()); + attrs.inferImpliedAttributes(); + } + + static void parse_fn_attrs(const llvm::CallInst &i, FnAttrs &attrs) { + attrs.mem.setFullAccess(); + + if (auto fn = i.getCalledFunction()) + parse_fn_decl_attrs(fn, attrs); + + llvm::AttributeList attrs_callsite = i.getAttributes(); + auto ret = llvm::AttributeList::ReturnIndex; + auto fnidx = llvm::AttributeList::FunctionIndex; + handleRetAttrs(attrs_callsite.getAttributes(ret), attrs); + handleFnAttrs(attrs_callsite.getAttributes(fnidx), attrs); + attrs.mem &= handleMemAttrs(i.getMemoryEffects()); attrs.inferImpliedAttributes(); } + bool register_fn_decl(llvm::Function *fn, vector *args) { + Function::FnDecl decl; + decl.name = '@' + fn->getName().str(); + decl.is_varargs = fn->isVarArg(); + if (!(decl.output = llvm_type2alive(fn->getReturnType()))) + return false; + + parse_fn_decl_attrs(fn, decl.attrs); + + const auto &attrs_fndef = fn->getAttributes(); + for (uint64_t idx = 0, nargs = fn->arg_size(); idx < nargs; ++idx) { + auto ty = llvm_type2alive(fn->getArg(idx)->getType()); + if (!ty) + return false; + + unsigned attr_argidx = llvm::AttributeList::FirstArgIndex + idx; + auto **arg = args ? &args->at(idx) : nullptr; + ParamAttrs pattr; + handleParamAttrs(attrs_fndef.getAttributes(attr_argidx), pattr, arg, + true); + decl.inputs.emplace_back(ty, std::move(pattr)); + } + alive_fn->addFnDecl(std::move(decl)); + return true; + } + optional run() { hit_limits = false; constexpr_idx = 0; copy_idx = 0; alignopbundle_idx = 0; + metadata_idx = 0; // don't even bother if number of BBs or instructions is huge.. if (distance(f.begin(), f.end()) > 5000 || @@ -1700,7 +1800,7 @@ class llvm2alive_ : public llvm::InstVisitor> { auto val = make_unique(*ty, value_name(arg)); Value *newval = val.get(); - if (!ty || !handleParamAttrs(argattr, attrs, *val, newval, false)) + if (!ty || !handleParamAttrs(argattr, attrs, &newval, false)) return {}; val->setAttributes(std::move(attrs)); add_identifier(arg, *newval); @@ -1773,13 +1873,16 @@ class llvm2alive_ : public llvm::InstVisitor> { if (!alive_i->isVoid()) { add_identifier(i, *alive_i); - } else { - alive_i = static_cast(get_operand(&i)); + } else if (auto *ptr = static_cast(get_identifier(i))) { + alive_i = ptr; } if (i.hasMetadataOtherThanDebugLoc() && !handleMetadata(Fn, i, alive_i)) return {}; + + if (!handleOpBundles(i, alive_i)) + return {}; } else return {}; } @@ -1806,8 +1909,7 @@ class llvm2alive_ : public llvm::InstVisitor> { const char *chrs = name.data(); char *end_ptr; auto numeric_id = strtoul(chrs, &end_ptr, 10); - - if (size_t(end_ptr - chrs) != name.size()) + if (end_ptr != name.end()) return M->getGlobalVariable(name, true); else { auto itr = M->global_begin(), end = M->global_end(); @@ -1828,9 +1930,14 @@ class llvm2alive_ : public llvm::InstVisitor> { insert_constexpr_before = nullptr; // Ensure all src globals exist in target as well - for (auto &gvname : gvnamesInSrc) { - if (auto gv = getGlobalVariable(gvname)) - get_operand(gv); + for (auto *gv : gvsInSrc) { + if (Fn.getGlobalVar(gv->getName())) { + // do nothing + } else { + // import from src + // FIXME: this is wrong for IPO + Fn.addConstant(make_unique(*gv)); + } } map> stores; @@ -1881,10 +1988,9 @@ initializer::initializer(ostream &os, const llvm::DataLayout &DL) { init_llvm_utils(os, DL); } -optional llvm2alive(llvm::Function &F, - const llvm::TargetLibraryInfo &TLI, - bool IsSrc, - const vector &gvnamesInSrc) { - return llvm2alive_(F, TLI, IsSrc, gvnamesInSrc).run(); +optional +llvm2alive(llvm::Function &F, const llvm::TargetLibraryInfo &TLI, bool IsSrc, + const vector &gvsInSrc) { + return llvm2alive_(F, TLI, IsSrc, gvsInSrc).run(); } } diff --git a/llvm_util/llvm2alive.h b/llvm_util/llvm2alive.h index 883807cf5..6fa725c62 100644 --- a/llvm_util/llvm2alive.h +++ b/llvm_util/llvm2alive.h @@ -23,5 +23,5 @@ struct initializer { std::optional llvm2alive(llvm::Function &F, const llvm::TargetLibraryInfo &TLI, bool IsSrc, - const std::vector &gvnamesInSrc = {}); + const std::vector &gvsInSrc = {}); } diff --git a/llvm_util/utils.cpp b/llvm_util/utils.cpp index e210c3c29..78f78b0b5 100644 --- a/llvm_util/utils.cpp +++ b/llvm_util/utils.cpp @@ -277,10 +277,10 @@ IR::Value* get_poison(Type &ty) { Value* get_operand(llvm::Value *v, function constexpr_conv, - function copy_inserter) { - if (auto I = value_cache.find(v); - I != value_cache.end()) - return I->second; + function copy_inserter, + function register_fn_decl) { + if (auto ptr = get_identifier(*v)) + return ptr; auto ty = llvm_type2alive(v->getType()); if (!ty) @@ -357,10 +357,14 @@ Value* get_operand(llvm::Value *v, if (auto fn = dyn_cast(v)) { auto val = make_unique( - *ty, fn->getName().str(), 0, + *ty, '@' + fn->getName().str(), 0, fn->getAlign().value_or(llvm::Align(8)).value(), true, true); auto gvar = val.get(); current_fn->addConstant(std::move(val)); + + if (!register_fn_decl(fn)) + return nullptr; + RETURN_CACHE(gvar); } @@ -371,7 +375,8 @@ Value* get_operand(llvm::Value *v, for (unsigned i = 0; i < aty->numElementsConst(); ++i) { if (!aty->isPadding(i)) { - if (auto op = get_operand(get_elem(opi), constexpr_conv, copy_inserter)) + if (auto op = get_operand(get_elem(opi), constexpr_conv, copy_inserter, + register_fn_decl)) vals.emplace_back(op); else return false; @@ -448,6 +453,11 @@ void replace_identifier(const llvm::Value &llvm, Value &v) { value_cache[&llvm] = &v; } +Value* get_identifier(const llvm::Value &llvm) { + auto I = value_cache.find(&llvm); + return I != value_cache.end() ? I->second : nullptr; +} + #define PRINT(T) \ ostream& operator<<(ostream &os, const T &x) { \ diff --git a/llvm_util/utils.h b/llvm_util/utils.h index 50d55d309..6fd97b180 100644 --- a/llvm_util/utils.h +++ b/llvm_util/utils.h @@ -45,10 +45,12 @@ IR::Value* make_intconst(const llvm::APInt &val); IR::Value* get_poison(IR::Type &ty); IR::Value* get_operand(llvm::Value *v, std::function constexpr_conv, - std::function copy_inserter); + std::function copy_inserter, + std::function register_fn_decl); void add_identifier(const llvm::Value &llvm, IR::Value &v); void replace_identifier(const llvm::Value &llvm, IR::Value &v); +IR::Value* get_identifier(const llvm::Value &llvm); #define PRINT(T) std::ostream& operator<<(std::ostream &os, const T &x); PRINT(llvm::Type) diff --git a/scripts/opt-alive.sh.in b/scripts/opt-alive.sh.in index 7787b5658..a1394a315 100755 --- a/scripts/opt-alive.sh.in +++ b/scripts/opt-alive.sh.in @@ -10,7 +10,7 @@ set -e # attributor, function-attrs: inter procedural pass that deduces and/or propagates attributes # metarenamer: anonymizes function names # sample-profile: inlines functions -SKIP_TV_PASSES="argpromotion deadargelim globalopt hotcoldsplit inline ipconstprop ipsccp mergefunc partial-inliner function-specialization =tbaa loop-extract extract-blocks safe-stack place-safepoints attributor function-attrs metarenamer sample-profile lowertypetests extract-blocks openmp-opt-cgscc prune-eh tailcallelim iroutliner ctx-instr-lower expand-variadics globals-aa debug-pass-manager" +SKIP_TV_PASSES="argpromotion deadargelim globalopt hotcoldsplit inline function-import called-value-propagation ipconstprop ipsccp mergefunc partial-inliner function-specialization =tbaa loop-extract extract-blocks safe-stack place-safepoints attributor function-attrs metarenamer sample-profile lowertypetests extract-blocks openmp-opt-cgscc prune-eh tailcallelim iroutliner ctx-instr-lower expand-variadics globals-aa debug-pass-manager" # see opt.cpp: shouldForceLegacyPM() FORCE_OLD_NPM="codegenprepare interleaved-load-combine unreachableblockelim atomic-expand expandvp interleaved-access global-merge expand-reductions indirectbr-expand pre-isel-intrinsic-lowering loop-reduce expand-large-div-rem expand-large-fp-convert structurizecfg fix-irreducible expand-memcmp amdgpu-aa-wrapper aarch64-lit" diff --git a/smt/ctx.cpp b/smt/ctx.cpp index 5f18fb29c..e399bd08c 100644 --- a/smt/ctx.cpp +++ b/smt/ctx.cpp @@ -19,7 +19,7 @@ static void z3_error_handler(Z3_context ctx, Z3_error_code err) { if (str == "canceled") return; - dbg() << "Severe Z3 error: " << str << " [code=" << err << "]\n"; + dbg() << "Severe Z3 error: " << str << " [code=" << err << ']' << endl; _Exit(-1); } diff --git a/smt/expr.cpp b/smt/expr.cpp index b14a2d931..70353e8a1 100644 --- a/smt/expr.cpp +++ b/smt/expr.cpp @@ -715,9 +715,9 @@ expr expr::operator-(const expr &rhs) const { expr expr::operator*(const expr &rhs) const { unsigned power; if (rhs.isConst() && is_power2(rhs, power)) - return *this << expr::mkUInt(power, sort()); + return *this << mkUInt(power, sort()); if (isConst() && is_power2(*this, power)) - return rhs << expr::mkUInt(power, sort()); + return rhs << mkUInt(power, sort()); return binopc(Z3_mk_bvmul, operator*, Z3_OP_BMUL, isOne, isZero); } @@ -834,7 +834,7 @@ expr expr::add_no_soverflow(const expr &rhs) const { if (rhs.isConst()) { auto v = IntSMin(bits()) - rhs; return rhs.isNegative().isTrue() ? sge(v) - : sle(v - expr::mkUInt(1, rhs.sort())); + : sle(v - mkUInt(1, rhs.sort())); } if (isConst()) return rhs.add_no_soverflow(*this); @@ -1032,7 +1032,7 @@ static expr log2_rec(const expr &e, unsigned idx, unsigned bw) { } expr expr::isPowerOf2() const { - return *this != 0 && (*this & (*this - expr::mkUInt(1, *this))) == 0; + return *this != 0 && (*this & (*this - mkUInt(1, *this))) == 0; } expr expr::log2(unsigned bw_output) const { @@ -1225,7 +1225,7 @@ expr expr::frem(const expr &rhs) const { expr expr::fabs() const { if (isBV()) - return expr::mkUInt(0, 1).concat(extract(bits() - 2, 0)); + return mkUInt(0, 1).concat(extract(bits() - 2, 0)); fold_fp_neg(fabs); return unop_fold(Z3_mk_fpa_abs); @@ -1234,7 +1234,7 @@ expr expr::fabs() const { expr expr::fneg() const { if (isBV()) { auto signbit = bits() - 1; - return (extract(signbit, signbit) ^ expr::mkUInt(1, 1)) + return (extract(signbit, signbit) ^ mkUInt(1, 1)) .concat(extract(signbit - 1, 0)); } return unop_fold(Z3_mk_fpa_neg); @@ -1823,7 +1823,7 @@ expr expr::concat(const expr &rhs) const { } expr expr::concat_zeros(unsigned bits) const { - return bits ? concat(expr::mkUInt(0, bits)) : *this; + return bits ? concat(mkUInt(0, bits)) : *this; } expr expr::extract(unsigned high, unsigned low, unsigned depth) const { diff --git a/tests/alive-tv/asm/cmp-null.srctgt.ll b/tests/alive-tv/asm/cmp-null.srctgt.ll new file mode 100644 index 000000000..5211ad038 --- /dev/null +++ b/tests/alive-tv/asm/cmp-null.srctgt.ll @@ -0,0 +1,12 @@ +; TEST-ARGS: -tgt-is-asm + +define i1 @src(ptr %0) { + %2 = getelementptr i8, ptr %0, i64 -1 + %3 = icmp ult ptr %2, %0 + ret i1 %3 +} + +define i1 @tgt(ptr %0) { + %a4_5.not.not = icmp ne ptr %0, null + ret i1 %a4_5.not.not +} diff --git a/tests/alive-tv/asm/memcmp-align.srctgt.ll b/tests/alive-tv/asm/memcmp-align.srctgt.ll new file mode 100644 index 000000000..f0f476323 --- /dev/null +++ b/tests/alive-tv/asm/memcmp-align.srctgt.ll @@ -0,0 +1,16 @@ +; TEST-ARGS: -tgt-is-asm + +declare i32 @memcmp(ptr, ptr, i64) + +define i1 @src(ptr align 4 %0, ptr %intbuf) { + %2 = call i32 @memcmp(ptr %0, ptr %intbuf, i64 1) + %3 = icmp eq i32 %2, 0 + ret i1 %3 +} + +define i1 @tgt(ptr align 4 %0, ptr %1) { + %a3_1 = load i8, ptr %0, align 1 + %a4_1 = load i8, ptr %1, align 1 + %a5_8.not = icmp eq i8 %a3_1, %a4_1 + ret i1 %a5_8.not +} diff --git a/tests/alive-tv/asm/ptr-cmp.srctgt.ll b/tests/alive-tv/asm/ptr-cmp.srctgt.ll new file mode 100644 index 000000000..6e4c25690 --- /dev/null +++ b/tests/alive-tv/asm/ptr-cmp.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -tgt-is-asm + +define i1 @src(ptr %0, ptr %1) { + %3 = getelementptr i8, ptr %0, i32 1 + %4 = getelementptr i8, ptr %1, i32 1 + %5 = icmp eq ptr %3, %4 + ret i1 %5 +} + +define i1 @tgt(ptr %0, ptr %1) { + %r = icmp eq ptr %0, %1 + ret i1 %r +} diff --git a/tests/alive-tv/asm/ptr-replacement.srctgt.ll b/tests/alive-tv/asm/ptr-replacement.srctgt.ll new file mode 100644 index 000000000..60c2d8acd --- /dev/null +++ b/tests/alive-tv/asm/ptr-replacement.srctgt.ll @@ -0,0 +1,25 @@ +; TEST-ARGS: -tgt-is-asm + +define i8 @src(ptr %0, ptr %1) { + store i8 0, ptr %0, align 4 + %c = icmp eq ptr %0, %1 + br i1 %c, label %then, label %else + +then: + ret i8 1 +else: + ret i8 0 +} + +define i8 @tgt(ptr %0, ptr %1) { + %c = icmp eq ptr %0, %1 + br i1 %c, label %then, label %else + +then: + store i8 0, ptr %1, align 4 + ret i8 1 + +else: + store i8 0, ptr %0, align 4 + ret i8 0 +} \ No newline at end of file diff --git a/tests/alive-tv/attrs/deref_align.srctgt.ll b/tests/alive-tv/attrs/deref_align.srctgt.ll new file mode 100644 index 000000000..fa75cde4a --- /dev/null +++ b/tests/alive-tv/attrs/deref_align.srctgt.ll @@ -0,0 +1,12 @@ + +declare void @fn(ptr) + +define void @src(ptr align 16 dereferenceable(4) %0) { + call void @fn(ptr %0) + ret void +} + +define void @tgt(ptr align 16 dereferenceable(4) %0) { + call void @fn(ptr nonnull %0) + ret void +} diff --git a/tests/alive-tv/calls/addr-taken-ub.srctgt.ll b/tests/alive-tv/calls/addr-taken-ub.srctgt.ll new file mode 100644 index 000000000..c452eca7d --- /dev/null +++ b/tests/alive-tv/calls/addr-taken-ub.srctgt.ll @@ -0,0 +1,11 @@ +declare void @func3() + +define void @src(i1 %c) { + %fn = select i1 %c, ptr null, ptr @func3 + call void %fn(i8 0) + ret void +} + +define void @tgt(i1 %c) { + unreachable +} diff --git a/tests/alive-tv/calls/callees-metadata-fail.srctgt.ll b/tests/alive-tv/calls/callees-metadata-fail.srctgt.ll new file mode 100644 index 000000000..46315d231 --- /dev/null +++ b/tests/alive-tv/calls/callees-metadata-fail.srctgt.ll @@ -0,0 +1,18 @@ +; ERROR: Source is more defined than target + +declare i8 @test() +declare i8 @test2() +@fptr = external global ptr + +define i8 @src() { + %p = load ptr, ptr @fptr, align 8 + %ret = call i8 %p(), !callees !0 + ret i8 %ret +} + +define i8 @tgt() { + %ret = call i8 @test() + ret i8 %ret +} + +!0 = !{ptr @test, ptr @test2} diff --git a/tests/alive-tv/calls/callees-metadata.srctgt.ll b/tests/alive-tv/calls/callees-metadata.srctgt.ll new file mode 100644 index 000000000..3f34ee92c --- /dev/null +++ b/tests/alive-tv/calls/callees-metadata.srctgt.ll @@ -0,0 +1,15 @@ +declare i8 @test() +@fptr = external global ptr + +define i8 @src() { + %p = load ptr, ptr @fptr, align 8 + %ret = call i8 %p(), !callees !0 + ret i8 %ret +} + +define i8 @tgt() { + %ret = call i8 @test() + ret i8 %ret +} + +!0 = !{ptr @test, ptr @test} diff --git a/tests/alive-tv/calls/fn-ptr-taken.srctgt.ll b/tests/alive-tv/calls/fn-ptr-taken.srctgt.ll new file mode 100644 index 000000000..d3a9b84cb --- /dev/null +++ b/tests/alive-tv/calls/fn-ptr-taken.srctgt.ll @@ -0,0 +1,8 @@ +declare void @varargs(ptr, ...) +declare void @g(ptr, ptr) + +define void @fn() { + call void (ptr, ...) @varargs(ptr null, ptr null) + call void @g(ptr null, ptr @varargs) + ret void +} diff --git a/tests/alive-tv/calls/indirect-call-arg.srctgt.ll b/tests/alive-tv/calls/indirect-call-arg.srctgt.ll new file mode 100644 index 000000000..b82b42891 --- /dev/null +++ b/tests/alive-tv/calls/indirect-call-arg.srctgt.ll @@ -0,0 +1,22 @@ +declare void @f() +declare void @g(ptr) + +define void @src(ptr noundef %p) { + call void @f() + call void %p(ptr @f) + ret void +} + +define void @tgt(ptr noundef %p) { + call void @f() + %cmp = icmp eq ptr %p, @g + br i1 %cmp, label %then, label %else + +then: + call void @g(ptr @f) + ret void + +else: + call void %p(ptr @f) + ret void +} diff --git a/tests/alive-tv/calls/indirect-call-nocapture.srctgt.ll b/tests/alive-tv/calls/indirect-call-nocapture.srctgt.ll new file mode 100644 index 000000000..96a83e780 --- /dev/null +++ b/tests/alive-tv/calls/indirect-call-nocapture.srctgt.ll @@ -0,0 +1,9 @@ +define void @src(ptr nocapture %0, ptr %1) { + call void %1(ptr %0) + ret void +} + +define void @tgt(ptr %0, ptr %1) { + call void %1(ptr %0) + ret void +} diff --git a/tests/alive-tv/calls/indirect-call-nowrite-removal.srctgt.ll b/tests/alive-tv/calls/indirect-call-nowrite-removal.srctgt.ll new file mode 100644 index 000000000..9fe09c80e --- /dev/null +++ b/tests/alive-tv/calls/indirect-call-nowrite-removal.srctgt.ll @@ -0,0 +1,10 @@ +define void @src(ptr %0, ptr %1) { + call void %1(ptr readnone %0) + ret void +} + + +define void @tgt(ptr %0, ptr %1) { + call void %1(ptr %0) + ret void +} diff --git a/tests/alive-tv/calls/indirect-varargs.srctgt.ll b/tests/alive-tv/calls/indirect-varargs.srctgt.ll new file mode 100644 index 000000000..4c6c87dfd --- /dev/null +++ b/tests/alive-tv/calls/indirect-varargs.srctgt.ll @@ -0,0 +1,22 @@ +declare void @varargs(ptr, ...) +declare void @g(ptr, ptr) + +define void @src(ptr noundef %p) { + call void (ptr, ...) @varargs(ptr null, ptr null) + call void %p(ptr null, ptr @varargs) + ret void +} + +define void @tgt(ptr noundef %p) { + call void (ptr, ...) @varargs(ptr null, ptr null) + %cmp = icmp eq ptr %p, @g + br i1 %cmp, label %then, label %else + +then: + call void @g(ptr null, ptr @varargs) + ret void + +else: + call void %p(ptr null, ptr @varargs) + ret void +} diff --git a/tests/alive-tv/fp/fpext-trunc.srctgt.ll b/tests/alive-tv/fp/fpext-trunc.srctgt.ll new file mode 100644 index 000000000..994a46d72 --- /dev/null +++ b/tests/alive-tv/fp/fpext-trunc.srctgt.ll @@ -0,0 +1,9 @@ +define half @src(half %0) { + %2 = fpext half %0 to float + %3 = fptrunc float %2 to half + ret half %3 +} + +define half @tgt(half %0) { + ret half %0 +} diff --git a/tests/alive-tv/fp/nondet-model.srctgt.ll b/tests/alive-tv/fp/nondet-model.srctgt.ll new file mode 100644 index 000000000..ead58a246 --- /dev/null +++ b/tests/alive-tv/fp/nondet-model.srctgt.ll @@ -0,0 +1,13 @@ +; ERROR: Value mismatch +; CHECK: %m = #x7e00 + +define half @src(i1 %x, half %y) { + %z = uitofp i1 %x to half + %m = fmul nsz half %y, %z + ret half %m +} + +define half @tgt(i1 %x, half %y) { + %m = select i1 %x, half %y, half 0.000000e+00 + ret half %m +} diff --git a/tests/alive-tv/memory/disjoint_path_store.srctgt.ll b/tests/alive-tv/memory/disjoint_path_store.srctgt.ll new file mode 100644 index 000000000..8f3a3b729 --- /dev/null +++ b/tests/alive-tv/memory/disjoint_path_store.srctgt.ll @@ -0,0 +1,26 @@ +define i32 @src(ptr %0, i1 %1) { + br i1 %1, label %then, label %else + +then: + ret i32 1 + +else: + unreachable +} + +define i32 @tgt(ptr %0, i1 %1) { + %stack = alloca i32 + br i1 %1, label %then, label %else + +then: + store i32 1, ptr %stack + br label %exit + +else: + store i32 0, ptr %0 + br label %exit + +exit: + %v = load i32, ptr %stack + ret i32 %v +} diff --git a/tests/alive-tv/memory/unreach.srctgt.ll b/tests/alive-tv/memory/unreach.srctgt.ll new file mode 100644 index 000000000..bb3934188 --- /dev/null +++ b/tests/alive-tv/memory/unreach.srctgt.ll @@ -0,0 +1,18 @@ + +@a = external global i8 + +define void @src() { + %tobool = icmp eq i32 undef, 0 + br i1 %tobool, label %if.end11.loopexit, label %for.cond.preheader + +for.cond.preheader: + unreachable + +if.end11.loopexit: + load i8, ptr @a, align 1 + ret void +} + +define void @tgt() { + unreachable +} diff --git a/tests/alive-tv/objc_arc.srctgt.ll b/tests/alive-tv/objc_arc.srctgt.ll new file mode 100644 index 000000000..82420fc43 --- /dev/null +++ b/tests/alive-tv/objc_arc.srctgt.ll @@ -0,0 +1,14 @@ + +declare ptr @objc_retainAutoreleasedReturnValue() +declare ptr @fn1() + +define void @src() { + call ptr @fn1() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ] + ret void +} + +define void @tgt() { + call ptr @fn1() + call ptr @objc_retainAutoreleasedReturnValue() + ret void +} diff --git a/tools/transform.cpp b/tools/transform.cpp index aaa1bb9e1..2f3fae77a 100644 --- a/tools/transform.cpp +++ b/tools/transform.cpp @@ -64,6 +64,15 @@ static void print_single_varval(ostream &os, const State &st, const Model &m, type.printVal(os, st, m.eval(val.value, true)); + if (dynamic_cast(&type)) { + auto addr = Pointer(st.getMemory(), m.eval(val.value, true)).getAddress(); + addr = m.eval(addr); + if (addr.isConst()) { + os << " / Address="; + addr.printHexadecimal(os); + } + } + // undef variables may not have a model since each read uses a copy // TODO: add intervals of possible values for ints at least? if (!partial.isConst()) { @@ -588,6 +597,16 @@ check_refinement(Errors &errs, const Transform &t, State &src_state, e = expr(); auto res = s.check(); + // Some non-deterministic vars have preconditions. These preconditions are + // under the forall quantifier, hence they have no effect when we fetch + // the vars from the model (as in fact these are different vars -- they + // are implicitly existentially quantified). + if (res.isSat()) { + s.add(pre_src_forall); + res = s.check(); + assert(!res.isUnsat()); + } + if (!res.isUnsat() && !error(errs, src_state, tgt_state, res, s, var, msg, check_each_var, printer)) @@ -688,11 +707,22 @@ check_refinement(Errors &errs, const Transform &t, State &src_state, qvars.insert(mem_undef.begin(), mem_undef.end()); auto print_ptr_load = [&](ostream &s, const Model &m) { + Pointer p1(src_mem, m[ptr_refinement()]); + Pointer p2(tgt_mem, m[ptr_refinement()]); + + expr alive1 = m[p1.isBlockAlive()]; + expr alive2 = m[p2.isBlockAlive()]; + if (alive1.isConst() && alive2.isConst() && !alive1.eq(alive2)) { + auto p = [](const expr &a) { return a.isTrue() ? "alive" : "dead"; }; + s << "\nMismatch is liveness of blocks. Source: " << p(alive1) + << " / Target: " << p(alive2); + return; + } + set undef; - Pointer p(src_mem, m[ptr_refinement()]); - s << "\nMismatch in " << p - << "\nSource value: " << Byte(src_mem, m[src_mem.raw_load(p, undef)()]) - << "\nTarget value: " << Byte(tgt_mem, m[tgt_mem.raw_load(p, undef)()]); + s << "\nMismatch in " << p1 + << "\nSource value: " << Byte(src_mem, m[src_mem.raw_load(p1, undef)()]) + << "\nTarget value: " << Byte(tgt_mem, m[tgt_mem.raw_load(p2, undef)()]); }; CHECK(dom && !(memory_cnstr0.isTrue() ? memory_cnstr0 @@ -1015,11 +1045,11 @@ static void calculateAndInitConstants(Transform &t) { has_ptr_arg |= hasPtr(i->getType()); update_min_vect_sz(i->getType()); + max_access_size + = max(max_access_size, i->getAttributes().maxAccessSize()); if (i->hasAttribute(ParamAttrs::Dereferenceable)) { does_mem_access = true; - uint64_t deref_bytes = i->getAttributes().derefBytes; - max_access_size = max(max_access_size, deref_bytes); } if (i->hasAttribute(ParamAttrs::DereferenceableOrNull)) { // Optimization: unless explicitly compared with a null pointer, don't @@ -1028,13 +1058,10 @@ static void calculateAndInitConstants(Transform &t) { // Note that dereferenceable_or_null implies num_ptrinputs > 0, // which may turn has_null_block on. does_mem_access = true; - uint64_t deref_bytes = i->getAttributes().derefOrNullBytes; - max_access_size = max(max_access_size, deref_bytes); } if (i->hasAttribute(ParamAttrs::ByVal)) { does_mem_access = true; uint64_t sz = i->getAttributes().blockSize; - max_access_size = max(max_access_size, sz); min_global_size = min_global_size != UINT64_MAX ? gcd(sz, min_global_size) : sz; @@ -1052,22 +1079,27 @@ static void calculateAndInitConstants(Transform &t) { update_min_vect_sz(i.getType()); - if (auto fn = dynamic_cast(&i)) { + if (auto call = dynamic_cast(&i)) { has_fncall |= true; - if (!fn->getAttributes().isAlloc()) { - if (fn->getAttributes().mem.canOnlyWrite(MemoryAccess::Inaccessible)) { - if (inaccessiblememonly_fns.emplace(fn->getName()).second) + auto &attrs = call->getAttributes(); + if (!attrs.isAlloc()) { + if (attrs.mem.canOnlyWrite(MemoryAccess::Inaccessible)) { + if (inaccessiblememonly_fns.emplace(call->getName()).second) ++num_inaccessiblememonly_fns; } else { - if (fn->getAttributes().mem - .canOnlyRead(MemoryAccess::Inaccessible)) { - if (inaccessiblememonly_fns.emplace(fn->getName()).second) + if (attrs.mem.canOnlyRead(MemoryAccess::Inaccessible)) { + if (inaccessiblememonly_fns.emplace(call->getName()).second) ++num_inaccessiblememonly_fns; } - has_write_fncall |= fn->getAttributes().mem.canWriteSomething(); + has_write_fncall |= attrs.mem.canWriteSomething(); } + // we do an indirect call if the fn is address taken + if (!call->isIndirect() && + fn->getGlobalVar(string_view(call->getFnName()).substr(1)) && + inaccessiblememonly_fns.emplace(call->getName()).second) + ++num_inaccessiblememonly_fns; } - if (fn->isIndirect()) { + if (call->isIndirect()) { has_indirect_fncalls = true; num_inaccessiblememonly_fns += is_src; } @@ -1228,6 +1260,9 @@ static void calculateAndInitConstants(Transform &t) { if (config::tgt_is_asm) bits_ptr_address = bits_program_pointer; + // TODO: this is only needed if some program pointer is observable + bits_for_offset = max(bits_ptr_address, bits_for_offset); + bits_byte = 8 * (does_mem_access ? (unsigned)min_access_size : 1); bits_poison_per_byte = 1; diff --git a/tv/tv.cpp b/tv/tv.cpp index b9ecd1659..2f335bef7 100644 --- a/tv/tv.cpp +++ b/tv/tv.cpp @@ -208,8 +208,8 @@ struct TVLegacyPass final : public llvm::ModulePass { return false; auto fn = llvm2alive(F, *TLI, first, - first ? vector() - : I->second.fn.getGlobalVarNames()); + first ? vector() + : I->second.fn.getGlobalVars()); if (!fn) { fns.erase(I); return false; diff --git a/util/symexec.cpp b/util/symexec.cpp index e1c06790a..54e2f0b2e 100644 --- a/util/symexec.cpp +++ b/util/symexec.cpp @@ -57,7 +57,6 @@ void sym_exec_init(State &s) { } s.finishInitializer(); - s.saveReturnedInput(); s.exec(Value::voidVal); }