diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index f819a5d925..510944f750 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -2461,7 +2461,6 @@ decode QUADRANT default Unknown::unknown() { }}, OPFVV, VectorFloatArithOp); } } - format VectorFloatMaskFormat { 0x18: vmfeq_vv({{ Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index 0f50897fba..f4b82992f4 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -70,53 +70,396 @@ let {{ uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; ''' + code - def wideningOpRegisterConstraintChecks(code): + def fflags_wrapper(code): return ''' - const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); - if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) { + RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS); + std::feclearexcept(FE_ALL_EXCEPT); + ''' + code + ''' + FFLAGS |= softfloat_exceptionFlags; + softfloat_exceptionFlags = 0; + xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); + ''' + + def VI_CHECK_SSS(flag = True): + if flag == True: + return ''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if (vflmul > 1) { + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul)&& + is_aligned(machInst.vs1, vflmul))) { + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + } + ''' + else: + return ''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if (vflmul > 1) { + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul))) { + std::string error = + csprintf("Unaligned Vd or Vs2 group"); + return std::make_shared(error, machInst); + } + } + ''' + + def VI_CHECK_DSS(flag = True): + if flag == True: + return ''' + const float vflmul = Vflmul(vlmul); + if(vflmul > 4){ std::string error = - csprintf("Unaligned Vd group in Widening op"); + csprintf("Illegal LMUL"); return std::make_shared(error, machInst); } - if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) { - // A destination vector register group can overlap a source vector - // register group if The destination EEW is greater than the source - // EEW, the source EMUL is at least 1, and the overlap is in the - // highest- numbered part of the destination register group. + if(!(is_aligned(machInst.vd, 2*vflmul)&& + is_aligned(machInst.vs2, 2*vflmul)&& + is_aligned(machInst.vs1, 2*vflmul))){ std::string error = - csprintf("Unsupported overlap in Vs2 and Vd for Widening op"); + csprintf("Unaligned Vd, Vs2 or Vs1 group"); return std::make_shared(error, machInst); } - ''' + code + if(vflmul < 1){ + if(is_overlapped(machInst.vd, 2*vflmul, + machInst.vs2, 2*vflmul)|| + is_overlapped(machInst.vd, 2*vflmul, + machInst.vs1, 2*vflmul)) + { + std::string error = + csprintf( + "Unsupported overlap in Vd and Vs2/Vs1 group" + ); + return std::make_shared(error, + machInst); + } + }else{ + if(is_overlapped_widen(machInst.vd, 2*vflmul, + machInst.vs2, 2*vflmul)|| + is_overlapped_widen(machInst.vd, 2*vflmul, + machInst.vs1, 2*vflmul)) + { + std::string error = + csprintf( + "Unsupported overlap in Vd and Vs2/Vs1 group" + ); + return std::make_shared(error, + machInst); + } + } + ''' + elif flag == False: + return ''' + const float vflmul = Vflmul(vlmul); + if(vflmul > 4){ + std::string error = + csprintf("Illegal LMUL"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul))){ + std::string error = + csprintf("Unaligned Vd or Vs2 group"); + return std::make_shared(error, machInst); + } + if(vflmul < 1){ + if(is_overlapped(machInst.vd, vflmul*2, + machInst.vs2, vflmul*2)) + { + std::string error = + csprintf( + "Unsupported overlap in Vd and Vs2 group" + ); + return std::make_shared(error, + machInst); + } + }else{ + if(is_overlapped_widen(machInst.vd, vflmul*2, + machInst.vs2, vflmul*2)){ + std::string error = + csprintf( + "Unsupported overlap in Vd and Vs2 group" + ); + return std::make_shared(error, + machInst); + } + } + ''' + else: + return ''' + ''' - def narrowingOpRegisterConstraintChecks(code): + def VI_CHECK_DDS(flag = True): + if flag == True: + return ''' + const float vflmul = Vflmul(vlmul); + if(vflmul > 4){ + std::string error = + csprintf("Illegal LMUL"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul*2)&& + is_aligned(machInst.vs2, vflmul*2)&& + is_aligned(machInst.vs1, vflmul))){ + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + if(vflmul < 1){ + if(is_overlapped(machInst.vd, vflmul*2, + machInst.vs1, vflmul)){ + std::string error = + csprintf( + "Unsupported overlap in Vd and Vs1 group" + ); + return std::make_shared(error, + machInst); + } + }else{ + if(is_overlapped_widen(machInst.vd, vflmul*2, + machInst.vs1, vflmul)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs1 group"); + return std::make_shared(error, machInst); + } + } + ''' + elif flag == False: + return ''' + const float vflmul = Vflmul(vlmul); + if(vflmul > 4){ + std::string error = + csprintf("Illegal LMUL"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul*2)&& + is_aligned(machInst.vs2, vflmul*2))){ + std::string error = + csprintf("Unaligned Vd or Vs2 group"); + return std::make_shared(error, machInst); + } + ''' + else: + return ''' + ''' + + def VI_CHECK_SDS(flag = True): + if flag == True: + return ''' + const float vflmul = Vflmul(vlmul); + if(vflmul > 4){ + std::string error = + csprintf("Illegal LMUL"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul*2)&& + is_aligned(machInst.vs1, vflmul))){ + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + if(machInst.vd != machInst.vs2){ + if(is_overlapped(machInst.vd, vflmul, machInst.vs2, vflmul*2)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + } + ''' + else: + return ''' + const float vflmul = Vflmul(vlmul); + if(vflmul > 4){ + std::string error = + csprintf("Illegal LMUL"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul*2))){ + std::string error = + csprintf("Unaligned Vd or Vs2 group"); + return std::make_shared(error, machInst); + } + if(machInst.vd != machInst.vs2){ + if(is_overlapped(machInst.vd, vflmul, machInst.vs2, vflmul*2)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + } + ''' + + def VI_CHECK_REDUCTION(): return ''' - const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); - if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) { + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if(!(is_aligned(machInst.vs2, vflmul))){ + std::string error = + csprintf("Unaligned Vs2 group"); + return std::make_shared(error, machInst); + } + ''' + + def VI_CHECK_MSS(flag): + if flag == True: + return''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if(machInst.rd != machInst.vs2){ + if(is_overlapped(machInst.vd, 1, machInst.vs2, vflmul)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + } + if(!(is_aligned(machInst.vs2, vflmul)&& + is_aligned(machInst.vs1, vflmul))){ std::string error = - csprintf("Unaligned VS2 group in Narrowing op"); + csprintf("Unaligned Vs2 or Vs1 group"); return std::make_shared(error, machInst); } - if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) { - // A destination vector register group can overlap a source vector - // register group The destination EEW is smaller than the source EEW - // and the overlap is in the lowest-numbered part of the source - // register group + if(machInst.rd != machInst.vs1){ + if(is_overlapped(machInst.vd, 1, machInst.vs1, vflmul)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs1 group"); + return std::make_shared(error, machInst); + } + } + ''' + else: + return ''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if(machInst.rd != machInst.vs2){ + if(is_overlapped(machInst.vd, 1, machInst.vs2, vflmul)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + } + if(!(is_aligned(machInst.vs2, vflmul))){ std::string error = - csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op"); + csprintf("Unaligned Vs2 group"); return std::make_shared(error, machInst); } - ''' + code + ''' - def fflags_wrapper(code): + def VI_CHECK_VRGATHER(flag): + if flag == True: + return ''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul)&& + is_aligned(machInst.vs1, vflmul))) { + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + if(machInst.vd==machInst.vs2 || machInst.vd==machInst.vs1){ + std::string error = + csprintf("Vd is the same as Vs2/Vs1"); + return std::make_shared(error, machInst); + } + ''' + elif flag == False: + return ''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul))) { + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + if(machInst.vd==machInst.vs2){ + std::string error = + csprintf("Vd is the same as Vs2"); + return std::make_shared(error, machInst); + } + ''' + else: + return ''' + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + const float vemul = (16.0 / sew * vflmul); + if(!(vemul<=8 && vemul>=0.125)){ + std::string error = + csprintf("Illegal vemul"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul)&& + is_aligned(machInst.vs1, vemul))) { + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + if(machInst.vd==machInst.vs2){ + std::string error = + csprintf("Vd is the same as Vs2"); + return std::make_shared(error, machInst); + } + if(is_overlapped(machInst.vd, vflmul, machInst.vs1, vemul)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + ''' + + def VMV_CHECK(): return ''' - RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS); - std::feclearexcept(FE_ALL_EXCEPT); - ''' + code + ''' - FFLAGS |= softfloat_exceptionFlags; - softfloat_exceptionFlags = 0; - xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); + const uint64_t vd = machInst.vd; + const uint64_t vs2 = machInst.vs2; + const uint64_t len = machInst.vs1 + 1; + if(!(is_aligned(vd, len)&&is_aligned(vs2, len))) { + std::string error = + csprintf("Unaligned Vd, Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } ''' + + def VI_VV_EXT_CHECK(div): + return ''' + if(machInst.vd==machInst.vs2){ + std::string error = + csprintf("Vd is the same as Vs2"); + return std::make_shared(error, machInst); + } + size_t sew = sizeof(vu) * 8; + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + uint64_t div = %d; + uint64_t from = sew / div; + if(!(from >= 8 && from <= 64)){ + std::string error = + csprintf("Invalid vsew"); + return std::make_shared(error, machInst); + } + if(!((vflmul / div) >= 0.125 && (vflmul / div) <= 8)){ + std::string error = + csprintf("Invalid vflmul"); + return std::make_shared(error, machInst); + } + if(!(is_aligned(machInst.vd, vflmul)&& + is_aligned(machInst.vs2, vflmul/div))){ + std::string error = + csprintf("Unaligned Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + if(vflmul/div < 1){ + if(is_overlapped(machInst.vd, vflmul, + machInst.vs2, vflmul/div)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + }else{ + if(is_overlapped_widen(machInst.vd, vflmul, + machInst.vs2, vflmul/div)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } + } + ''' %(div) }}; @@ -134,8 +477,8 @@ def format VectorIntFormat(code, category, *flags) {{ v0_required = inst_name not in ["vmv"] mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim']) need_elem_idx = mask_cond or code.find("ei") != -1 - dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + vi_check_sss_flag = True num_src_regs = 0 @@ -148,8 +491,10 @@ def format VectorIntFormat(code, category, *flags) {{ num_src_regs += 1 elif category in ["OPIVX", "OPMVX"]: src1_reg_id = "intRegClass[_machInst.rs1]" + vi_check_sss_flag = False num_src_regs += 1 elif category == "OPIVI": + vi_check_sss_flag = False pass else: error("not supported category for VectorIntFormat: %s" % category) @@ -177,7 +522,7 @@ def format VectorIntFormat(code, category, *flags) {{ vm_decl_rd = "" if v0_required: vm_decl_rd = vmDeclAndReadData() - + vi_check_sss = VI_CHECK_SSS(vi_check_sss_flag) microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, @@ -185,7 +530,8 @@ def format VectorIntFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_sss': vi_check_sss}, flags) # Because of the use of templates, we had to put all parts in header to @@ -233,7 +579,9 @@ def format VectorIntExtFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), - 'ext_div': ext_div}, + 'ext_div': ext_div, + 'vi_vv_ext_check': VI_VV_EXT_CHECK(ext_div) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -258,17 +606,28 @@ def format VectorIntWideningFormat(code, category, *flags) {{ old_vd_idx = 2 dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" src1_reg_id = "" + + vi_check_dss_flag = None if category in ["OPIVV", "OPMVV"]: src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + vi_check_dss_flag = True elif category in ["OPIVX", "OPMVX"]: src1_reg_id = "intRegClass[_machInst.rs1]" + vi_check_dss_flag = False else: error("not supported category for VectorIntFormat: %s" % category) + + vi_check_dds_flag = None src2_reg_id = "" if inst_suffix in ["vv", "vx"]: src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" elif inst_suffix in ["wv", "wx"]: src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + vi_check_dss_flag = None + if inst_suffix == "wv": + vi_check_dds_flag = True + else: + vi_check_dds_flag = False src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" set_dest_reg_idx = setDestWrapper(dest_reg_id) @@ -287,8 +646,6 @@ def format VectorIntWideningFormat(code, category, *flags) {{ code = eiDeclarePrefix(code, widening=True) code = loopWrapper(code) - code = wideningOpRegisterConstraintChecks(code) - vm_decl_rd = "" if v0_required: vm_decl_rd = vmDeclAndReadData() @@ -300,7 +657,10 @@ def format VectorIntWideningFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_dss': VI_CHECK_DSS(vi_check_dss_flag), + 'vi_check_dds': VI_CHECK_DDS(vi_check_dds_flag) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -322,9 +682,11 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ need_elem_idx = True old_vd_idx = 2 + vi_check_sds_flag = False dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" if category in ["OPIVV"]: src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + vi_check_sds_flag = True elif category in ["OPIVX"]: src1_reg_id = "intRegClass[_machInst.rs1]" elif category == "OPIVI": @@ -345,7 +707,6 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ code = maskCondWrapper(code) code = eiDeclarePrefix(code, widening=True) code = loopWrapper(code) - code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() microiop = InstObjParams(name + "_micro", @@ -356,6 +717,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_sds': VI_CHECK_SDS(vi_check_sds_flag) }, flags) @@ -384,9 +746,11 @@ def format VectorIntMaskFormat(code, category, *flags) {{ need_elem_idx = mask_cond or code.find("ei") != -1 old_vd_idx = 2 + vi_check_mss_flag = False dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" src1_reg_id = "" if category == "OPIVV": + vi_check_mss_flag = True src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" elif category == "OPIVX": src1_reg_id = "intRegClass[_machInst.rs1]" @@ -423,7 +787,9 @@ def format VectorIntMaskFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_mss': VI_CHECK_MSS(vi_check_mss_flag) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -450,14 +816,18 @@ def format VectorGatherFormat(code, category, *flags) {{ old_vd_idx = 2 dest_reg_id = "vecRegClass[_machInst.vd + vd_idx]" src1_reg_id = "" + vi_check_vrather_flag = False if category in ["OPIVV"]: src1_reg_id = "vecRegClass[_machInst.vs1 + vs1_idx]" + vi_check_vrather_flag = True elif category in ["OPIVX"]: src1_reg_id = "intRegClass[_machInst.rs1]" elif category == "OPIVI": old_vd_idx = 1 else: error("not supported category for VectorIntFormat: %s" % category) + if inst_name == "vrgatherei16": + vi_check_vrather_flag = None src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]" src3_reg_id = "vecRegClass[_machInst.vs3 + vd_idx]" @@ -471,7 +841,6 @@ def format VectorGatherFormat(code, category, *flags) {{ set_src_reg_idx += setSrcVm() # code - vm_decl_rd = vmDeclAndReadData() microiop = InstObjParams(name + "_micro", @@ -482,7 +851,9 @@ def format VectorGatherFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), - 'idx_type': idx_type}, + 'idx_type': idx_type, + 'vi_check_vrather': VI_CHECK_VRGATHER(vi_check_vrather_flag) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -505,11 +876,12 @@ def format VectorFloatFormat(code, category, *flags) {{ v0_required = inst_name not in ["vfmv"] mask_cond = v0_required and (inst_suffix not in ['vvm', 'vfm']) need_elem_idx = mask_cond or code.find("ei") != -1 - + vi_check_sss_flag = False dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" src1_reg_id = "" if category == "OPFVV": src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + vi_check_sss_flag = True elif category == "OPFVF": src1_reg_id = "floatRegClass[_machInst.rs1]" else: @@ -544,7 +916,9 @@ def format VectorFloatFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_sss': VI_CHECK_SSS(vi_check_sss_flag) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -588,7 +962,9 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_sss': VI_CHECK_SSS(False) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -611,19 +987,28 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ mask_cond = v0_required need_elem_idx = mask_cond or code.find("ei") != -1 + vi_check_dss_flag = None dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" src1_reg_id = "" if category in ["OPFVV"]: src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + vi_check_dss_flag = True elif category in ["OPFVF"]: src1_reg_id = "floatRegClass[_machInst.rs1]" + vi_check_dss_flag = False else: error("not supported category for VectorFloatFormat: %s" % category) src2_reg_id = "" + vi_check_dds_flag = None if inst_suffix in ["vv", "vf"]: src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" elif inst_suffix in ["wv", "wf"]: src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + vi_check_dss_flag = None + if inst_suffix == "wv": + vi_check_dds_flag = True + else: + vi_check_dds_flag = False src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" set_dest_reg_idx = setDestWrapper(dest_reg_id) @@ -643,8 +1028,6 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ code = loopWrapper(code) code = fflags_wrapper(code) - code = wideningOpRegisterConstraintChecks(code) - vm_decl_rd = "" if v0_required: vm_decl_rd = vmDeclAndReadData() @@ -656,7 +1039,10 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_dss': VI_CHECK_DSS(vi_check_dss_flag), + 'vi_check_dds': VI_CHECK_DDS(vi_check_dds_flag) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -700,7 +1086,10 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_dss': VI_CHECK_DSS(False), + 'vi_check_dds': VI_CHECK_DDS(None) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -734,7 +1123,6 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ code = eiDeclarePrefix(code) code = loopWrapper(code) code = fflags_wrapper(code) - code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() @@ -745,7 +1133,9 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_sds': VI_CHECK_SDS(False) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -768,8 +1158,10 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ flags) dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" src1_reg_id = "" + vi_check_mss_flag = False if category == "OPFVV": src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + vi_check_mss_flag = True elif category == "OPFVF": src1_reg_id = "floatRegClass[_machInst.rs1]" else: @@ -796,7 +1188,9 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_mss': VI_CHECK_MSS(vi_check_mss_flag) + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -816,7 +1210,9 @@ def format VMvWholeFormat(code, category, *flags) {{ microiop = InstObjParams(name + "_micro", Name + "Micro", 'VMvWholeMicroInst', - {'code': code}, + {'code': code, + 'vmv_check': VMV_CHECK() + }, flags) header_output = \ @@ -1032,7 +1428,9 @@ def format VectorReduceIntFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_reduction': VI_CHECK_REDUCTION() + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -1077,7 +1475,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_reduction': VI_CHECK_REDUCTION() + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -1121,7 +1521,9 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_reduction': VI_CHECK_REDUCTION() + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -1139,6 +1541,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, flags) inst_name, inst_suffix = name.split("_", maxsplit=1) + vi_check_sss_flag = True old_vd_idx = 2 dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" src1_reg_id = "" @@ -1146,8 +1549,10 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" elif category in ["OPIVX"]: src1_reg_id = "intRegClass[_machInst.rs1]" + vi_check_sss_flag = False elif category == "OPIVI": old_vd_idx = 1 + vi_check_sss_flag = False else: error("not supported category for VectorIntVxsatFormat: %s" % category) src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" @@ -1161,6 +1566,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(src3_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + vi_check_sss = VI_CHECK_SSS(vi_check_sss_flag) code = maskCondWrapper(code) code = eiDeclarePrefix(code) @@ -1173,7 +1579,9 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(old_vd_idx)}, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'vi_check_sss': vi_check_sss + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -1211,7 +1619,9 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, 'vm_decl_rd': vm_decl_rd, - 'copy_old_vd': copyOldVd(2)}, + 'copy_old_vd': copyOldVd(2), + 'vi_check_reduction': VI_CHECK_REDUCTION() + }, flags) # Because of the use of templates, we had to put all parts in header to @@ -1223,4 +1633,4 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ VectorReduceMacroDeclare.subst(iop) + \ VectorReduceMacroConstructor.subst(iop) decode_block = VectorIntWideningDecodeBlock.subst(iop) -}}; \ No newline at end of file +}}; diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 6c412cebc9..0a70d0d1fe 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -129,6 +129,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_sss)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -198,9 +199,10 @@ Fault using vu [[maybe_unused]] = std::make_unsigned_t; using vi [[maybe_unused]] = std::make_signed_t; - if (machInst.vill) + if (machInst.vill){ return std::make_shared("VILL is set", machInst); - + } + %(vi_vv_ext_check)s; auto SEW = vtype_SEW(vtype); auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); switch (SEW / %(ext_div)d) { @@ -361,6 +363,8 @@ Fault [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + %(vi_check_dss)s; + %(vi_check_dds)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -393,6 +397,7 @@ Fault [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + %(vi_check_sds)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -503,6 +508,7 @@ Fault VRM_REQUIRED; + %(vi_check_sss)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -596,6 +602,8 @@ Fault [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + %(vi_check_dss)s; + %(vi_check_dds)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -630,6 +638,7 @@ Fault [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + %(vi_check_sds)s %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -745,6 +754,17 @@ Fault using vi [[maybe_unused]] = std::make_signed_t; if (machInst.vill) return std::make_shared("VILL is set", machInst); + const float vflmul = Vflmul(vtype_vlmul(machInst.vtype8)); + if(!(is_aligned(machInst.vd, vflmul))){ + std::string error = + csprintf("Unaligned Vs2 or Vs1 group"); + return std::make_shared(error, machInst); + } + if(is_overlapped(machInst.vd, vflmul, machInst.vs2, 1)){ + std::string error = + csprintf("Unsupported overlap in Vd and Vs2 group"); + return std::make_shared(error, machInst); + } %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -782,6 +802,11 @@ Fault using vu = uint8_t; if (machInst.vill) return std::make_shared("VILL is set", machInst); + if(machInst.vd == machInst.vs2){ + std::string error = + csprintf("Vd is the same as Vs2"); + return std::make_shared(error, machInst); + } %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -937,6 +962,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_mss)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1044,6 +1070,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_mss)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1134,6 +1161,7 @@ Fault { // TODO: Check register alignment. // TODO: If vd is equal to vs2 the instruction is an architectural NOP. + %(vmv_check)s; %(op_decl)s; %(op_rd)s; for (size_t i = 0; i < (VLEN / 64); i++) { @@ -1361,6 +1389,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_reduction)s %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1396,6 +1425,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_reduction)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1433,6 +1463,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_reduction)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1572,7 +1603,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); - + %(vi_check_vrather)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1728,6 +1759,7 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + %(vi_check_reduction)s; %(op_decl)s; %(op_rd)s; %(vm_decl_rd)s; @@ -1752,4 +1784,4 @@ Fault return NoFault; } -}}; \ No newline at end of file +}}; diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index f085863c2f..c3ce7f0b02 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -186,6 +186,20 @@ vtype_vlmul(const uint64_t vtype) return (int64_t)sext<3>(bits(vtype, 2, 0)); } +inline float +Vflmul(const int64_t val){ + switch(val) { + case 0: return 1; + case 1: return 2; + case 2: return 4; + case 3: return 8; + case -1: return 1/2; + case -2: return 1/4; + case -3: return 1/8; + default: GEM5_UNREACHABLE; + } +} + inline uint64_t vtype_regs_per_group(const uint64_t vtype) { @@ -226,6 +240,44 @@ elem_mask(const T* vs, const int index) return (vs[idx] >> pos) & 1; } +inline bool +is_aligned(const unsigned val, const unsigned pos) +{ + return pos ? (val & (pos - 1)) == 0 : true; +} + +inline bool +is_overlapped(const int astart, int asize, + const int bstart, int bsize) +{ + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; +} + +inline bool +is_overlapped_widen(const int astart, int asize, + const int bstart, int bsize) +{ + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + if (astart < bstart && + is_overlapped(astart, asize, bstart, bsize) && + !is_overlapped(astart, asize, bstart + bsize, bsize)) { + return false; + } else { + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; + } +} + inline uint64_t mulhu(uint64_t a, uint64_t b) {