diff --git a/components/Decoder/Instruction.cpp b/components/Decoder/Instruction.cpp index cbd99582..ea9a4d36 100644 --- a/components/Decoder/Instruction.cpp +++ b/components/Decoder/Instruction.cpp @@ -85,6 +85,7 @@ ArchInstruction::doDispatchEffects() auto bp_state = bpState(); DBG_Assert(bp_state, (<< "No branch predictor state exists, but it must")); + if (isMicroOp()) return; if (bp_state->thePredictedType == kNonBranch) return; if (isBranch()) return; @@ -161,7 +162,7 @@ decode(Flexus::SharedTypes::FetchedOpcode const& aFetchedOpcode, uint32_t aCPU, DBG_(VVerb, (<< "\033[1;31m DECODER: Decoding " << std::hex << aFetchedOpcode.theOpcode << std::dec << "\033[0m")); bool last_uop = true; - boost::intrusive_ptr ret_val = disas_a64_insn(aFetchedOpcode, aCPU, aSequenceNo, aUop); + boost::intrusive_ptr ret_val = disas_a64_insn(aFetchedOpcode, aCPU, aSequenceNo, aUop, last_uop); return std::make_pair(ret_val, last_uop); } diff --git a/components/Decoder/encodings/Encodings.cpp b/components/Decoder/encodings/Encodings.cpp index daa20111..ee45bf57 100644 --- a/components/Decoder/encodings/Encodings.cpp +++ b/components/Decoder/encodings/Encodings.cpp @@ -7,7 +7,7 @@ namespace nDecoder { /* C3.1 A64 instruction index by encoding */ archinst -disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop) +disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool &aLastUop) { if (aFetchedOpcode.theOpcode == 1) { // instruction fetch page fault return blackBox(aFetchedOpcode, aCPU, aSequenceNo); @@ -27,7 +27,7 @@ disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceN case 0x4: case 0x6: case 0xc: - case 0xe: /* Loads and stores */ return disas_ldst(aFetchedOpcode, aCPU, aSequenceNo); + case 0xe: /* Loads and stores */ return disas_ldst(aFetchedOpcode, aCPU, aSequenceNo, aUop, aLastUop); case 0x5: case 0xd: /* Data processing - register */ return disas_data_proc_reg(aFetchedOpcode, aCPU, aSequenceNo); case 0x7: diff --git a/components/Decoder/encodings/Encodings.hpp b/components/Decoder/encodings/Encodings.hpp index 74bc1cd1..1c5ad77a 100644 --- a/components/Decoder/encodings/Encodings.hpp +++ b/components/Decoder/encodings/Encodings.hpp @@ -62,13 +62,13 @@ disas_ldst_reg_unsigned_imm(archcode const& aFetchedOpcode, uint32_t aCPU, int64 archinst disas_ldst_reg(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); archinst -disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop); archinst disas_ld_lit(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); archinst disas_ldst_excl(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); archinst -disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop); //<<--Data Processing -- Register archinst @@ -120,7 +120,7 @@ disas_data_proc_simd_fp(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t a /* C3.1 A64 instruction index by encoding */ archinst -disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop); +disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop); } // namespace nDecoder diff --git a/components/Decoder/encodings/Encodings_LoadStore.cpp b/components/Decoder/encodings/Encodings_LoadStore.cpp index 1c23000b..ecf20254 100644 --- a/components/Decoder/encodings/Encodings_LoadStore.cpp +++ b/components/Decoder/encodings/Encodings_LoadStore.cpp @@ -300,10 +300,12 @@ disas_ldst_reg(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceN * imm7 = signed offset (multiple of 4 or 8 depending on size) */ archinst -disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop) { DECODER_TRACE; + aLastUop = !(aUop == 0); + bool is_vector = extract32(aFetchedOpcode.theOpcode, 26, 1); bool is_load = extract32(aFetchedOpcode.theOpcode, 22, 1); @@ -320,9 +322,9 @@ disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequence // } } else { if (is_load) { - return LDP(aFetchedOpcode, aCPU, aSequenceNo); + return LDP(aFetchedOpcode, aCPU, aSequenceNo, aUop); } else { - return STP(aFetchedOpcode, aCPU, aSequenceNo); + return STP(aFetchedOpcode, aCPU, aSequenceNo, aUop); } } } @@ -403,7 +405,7 @@ disas_ldst_excl(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequence /* Loads and stores */ archinst -disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop) { switch (extract32(aFetchedOpcode.theOpcode, 24, 6)) { case 0x08: /* Load/store exclusive */ return disas_ldst_excl(aFetchedOpcode, aCPU, aSequenceNo); @@ -412,7 +414,7 @@ disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) case 0x28: case 0x29: case 0x2c: - case 0x2d: /* Load/store pair (all forms) */ return disas_ldst_pair(aFetchedOpcode, aCPU, aSequenceNo); + case 0x2d: /* Load/store pair (all forms) */ return disas_ldst_pair(aFetchedOpcode, aCPU, aSequenceNo, aUop, aLastUop); case 0x38: case 0x39: case 0x3c: @@ -425,4 +427,4 @@ disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } } -} // namespace nDecoder \ No newline at end of file +} // namespace nDecoder diff --git a/components/Decoder/encodings/LoadStore.cpp b/components/Decoder/encodings/LoadStore.cpp index b9fc18a2..02d9450c 100644 --- a/components/Decoder/encodings/LoadStore.cpp +++ b/components/Decoder/encodings/LoadStore.cpp @@ -435,7 +435,7 @@ LDR_lit(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } // Load/store pair (all forms) archinst -LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop) { DECODER_TRACE; uint32_t opc = extract32(aFetchedOpcode.theOpcode, 30, 2); @@ -453,7 +453,9 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) */ bool is_signed = (opc & 0x1) == 0x1; uint32_t size = 8 << (scale + 1); - eSize sz = dbSize(size); + auto sizeOfEachPair = size / 2; + auto sizeOfEachPairInBytes = sizeOfEachPair / 8; + eSize sz = dbSize(size / 2); if ((((opc & 1) == 1) && L == 0) || opc == 3 || (is_signed && index == kNoOffset)) { /* Msutherl: if signed, only valid encodings are preindex, postindex, imm-index */ @@ -471,6 +473,9 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) aFetchedOpcode.theBPState, aCPU, aSequenceNo)); + if (aUop == 0) { + inst->setIsMicroOp(true); + } inst->setClass(clsLoad, codeLDP); @@ -478,14 +483,24 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) std::vector> addr_deps(1); // calculate the address from rn - simple_action act = addAddressCompute(inst, addr_deps); + simple_action act; + if (aUop == 0) { + // first uop computes the address using rn and offset + act = addAddressCompute(inst, addr_deps); + } else { + // second uop also adds the size of each pair to the address + addr_deps.resize(2); + act = addAddressCompute(inst, addr_deps); + addReadConstant(inst, 2, sizeOfEachPairInBytes, addr_deps[1]); + } addReadXRegister(inst, 1, rn, addr_deps[0], true); - if (index == kPreIndex || index == kPostIndex) { + if ((index == kPreIndex || index == kPostIndex) && aUop == 1) { // C6.2.129 LDP PostIndex and PreIndex cause writeback + // Writeback only happens in the second uop std::vector> wb_deps(1); predicated_action wback = addExecute(inst, operation(kADD_), { kAddress, kOperand4 }, wb_deps, kResult2); - addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 : 0), wb_deps[0]); + addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 - sizeOfEachPairInBytes : -sizeOfEachPairInBytes), wb_deps[0]); // wback.action->addDependance(wback.action->dependance(1)); connectDependance(wback.action->dependance(1), act); // wb_deps[0].push_back(act.action->dependance(0)); @@ -508,7 +523,21 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } predicated_dependant_action load; - load = ldpAction(inst, sz, is_signed ? kSignExtend : kNoExtension, kPD, kPD1); + if (aUop == 0) { + if (rt != 31) { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, kPD); + addDestination(inst, rt, load, size / 2 == 64); + } else { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, boost::none); + } + } else { + if (rt2 != 31) { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, kPD); + addDestination(inst, rt2, load, size / 2 == 64); + } else { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, boost::none); + } + } inst->addDispatchEffect(allocateLoad(inst, sz, load.dependance, acctype)); inst->addCheckTrapEffect(mmuPageFaultCheck(inst)); @@ -517,12 +546,10 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) inst->addSquashEffect(eraseLSQ(inst)); inst->addRetirementConstraint(loadMemoryConstraint(inst)); - addPairDestination(inst, rt, rt2, load, size / 2 == 64); - return inst; } archinst -STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop) { DECODER_TRACE; uint32_t opc = extract32(aFetchedOpcode.theOpcode, 30, 2); @@ -535,7 +562,9 @@ STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) uint32_t scale = 2 + extract32(opc, 1, 1); // bool is_signed = ( opc & 1 ) != 0; int size = 8 << (scale + 1); - eSize sz = dbSize(size); + auto sizeOfEachPair = size / 2; + auto sizeOfEachPairInBytes = sizeOfEachPair / 8; + eSize sz = dbSize(size / 2); if ((((opc & 1) == 1) && L == 0) || opc == 3) { return unallocated_encoding(aFetchedOpcode, aCPU, aSequenceNo); } @@ -548,20 +577,33 @@ STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) aSequenceNo)); inst->setClass(clsStore, codeStore); + if (aUop == 0) { + inst->setIsMicroOp(true); + } // calculate the address from rn eAccType acctype = kAccType_STREAM; - std::vector> addr_deps(1), data_deps(2); - simple_action addr = addAddressCompute(inst, addr_deps); + std::vector> addr_deps(1), data_deps(1); + simple_action addr; + if (aUop == 0) { + // first uop computes the address using rn and offset + addr = addAddressCompute(inst, addr_deps); + } else { + // second uop also adds the size of each pair to the address + addr_deps.resize(2); + addr = addAddressCompute(inst, addr_deps); + addReadConstant(inst, 2, sizeOfEachPairInBytes, addr_deps[1]); + } addReadXRegister(inst, 1, rn, addr_deps[0], true); - if (index == kPreIndex || index == kPostIndex) { + if ((index == kPreIndex || index == kPostIndex) && aUop == 1) { // C6.2.273 STP PostIndex and PreIndex cause writeback + // Writeback only happens in the second uop std::vector> wb_deps(2); predicated_action wback = addExecute(inst, operation(kADD_), { kAddress, kOperand4 }, wb_deps, kResult1); connect(wb_deps[1], addr); - addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 : 0), wb_deps[0]); + addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 - sizeOfEachPairInBytes : - sizeOfEachPairInBytes), wb_deps[0]); addDestination1(inst, rn, wback, size / 2 == 64); } @@ -575,28 +617,26 @@ STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) DBG_(VVerb, (<< "setting signed offset #" << imm7)); } } - + predicated_dependant_action update_value = updateStoreValueAction(inst, kOperand5); + data_deps[0].push_back(update_value.dependance); + connectDependance(inst->retirementDependance(), update_value); // read data registers - simple_action act = - addExecute(inst, operation(size / 2 == 64 ? kCONCAT64_ : kCONCAT32_), { kOperand2, kOperand3 }, data_deps); - readRegister(inst, 2, rt2, data_deps[0], size / 2 == 64); - readRegister(inst, 3, rt, data_deps[1], size / 2 == 64); + if (aUop == 0) { + readRegister(inst, 5, rt, data_deps[0], size / 2 == 64); + } else { + readRegister(inst, 5, rt2, data_deps[0], size / 2 == 64); + } inst->addDispatchEffect(allocateStore(inst, sz, false, acctype)); inst->addCheckTrapEffect(mmuPageFaultCheck(inst)); inst->addRetirementConstraint(storeQueueAvailableConstraint(inst)); inst->addRetirementConstraint(sideEffectStoreConstraint(inst)); - multiply_dependant_action update_value = updateSTPValueAction(inst, kResult); - inst->addDispatchEffect(satisfy(inst, update_value.dependances[1])); - connectDependance(update_value.dependances[0], act); - connectDependance(inst->retirementDependance(), update_value); - inst->addRetirementEffect(retireMem(inst)); inst->addCommitEffect(commitStore(inst)); inst->addSquashEffect(eraseLSQ(inst)); - inst->addPostvalidation(validateMemory(kAddress, kResult, sz, inst)); + inst->addPostvalidation(validateMemory(kAddress, kOperand5, sz, inst)); return inst; } diff --git a/components/Decoder/encodings/LoadStore.hpp b/components/Decoder/encodings/LoadStore.hpp index b10e3b50..ce8720c0 100644 --- a/components/Decoder/encodings/LoadStore.hpp +++ b/components/Decoder/encodings/LoadStore.hpp @@ -26,9 +26,9 @@ LDR_lit(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); // Load/store pair (all forms) archinst -LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop); archinst -STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop); /* Load/store register (all forms) */ archinst diff --git a/components/uArch/CoreModel/cycle.cpp b/components/uArch/CoreModel/cycle.cpp index 596f8ddf..2e0a64fb 100644 --- a/components/uArch/CoreModel/cycle.cpp +++ b/components/uArch/CoreModel/cycle.cpp @@ -1446,9 +1446,11 @@ CoreImpl::commit(boost::intrusive_ptr anInstruction) throw ResynchronizeWithQemuException(true); } - validation_passed &= checkValidatation(); + if (anInstruction->advancesSimics()) { + validation_passed &= checkValidatation(); + validation_passed &= anInstruction->postValidate(); + } - validation_passed &= anInstruction->postValidate(); DBG_(Iface, (<< "Post Validating... " << validation_passed)); if (!validation_passed) {