Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge xfloat options #14247

Merged
merged 2 commits into from
Aug 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5316,7 +5316,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
if (src > 0x40000)
{
// Use the xfloat hint to create 256-bit (4x double) PHI
llvm::Type* type = g_cfg.core.spu_accurate_xfloat && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);

const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i));
m_block->phi[i] = _phi;
Expand Down Expand Up @@ -8876,7 +8876,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void FREST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto a = get_vr<f32[4]>(op.ra);
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
Expand All @@ -8885,7 +8885,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return;
}

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
{
Expand Down Expand Up @@ -8918,13 +8918,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void FRSQEST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
return;
}

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
{
Expand Down Expand Up @@ -8956,7 +8956,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FCGT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) > get_vr<f64[4]>(op.rb))));
return;
Expand Down Expand Up @@ -9003,7 +9003,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}

if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));
Expand Down Expand Up @@ -9034,7 +9034,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FCMGT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) > fabs(get_vr<f64[4]>(op.rb)))));
return;
Expand Down Expand Up @@ -9080,7 +9080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(mai > mbi));
}

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_uno(ma > mb) & (mai > mbi)));
}
Expand All @@ -9101,7 +9101,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FA(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) + get_vr<f64[4]>(op.rb));
return;
Expand All @@ -9126,7 +9126,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) - get_vr<f64[4]>(op.rb));
return;
Expand All @@ -9137,7 +9137,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
const auto bc = clamp_smax(b); // for #4478
return eval(a - bc);
Expand All @@ -9159,7 +9159,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FM(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) * get_vr<f64[4]>(op.rb));
return;
Expand All @@ -9170,7 +9170,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
if (a.value == b.value)
{
Expand Down Expand Up @@ -9206,7 +9206,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FESD(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto r = zshuffle(get_vr<f64[4]>(op.ra), 1, 3);
const auto d = bitcast<s64[2]>(r);
Expand Down Expand Up @@ -9236,7 +9236,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FRDS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto r = get_vr<f64[2]>(op.ra);
const auto d = bitcast<s64[2]>(r);
Expand Down Expand Up @@ -9267,7 +9267,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FCEQ(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) == get_vr<f64[4]>(op.rb))));
return;
Expand Down Expand Up @@ -9320,7 +9320,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
}

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_ord(a == b)) | sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
}
Expand All @@ -9341,7 +9341,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void FCMEQ(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) == fabs(get_vr<f64[4]>(op.rb)))));
return;
Expand Down Expand Up @@ -9397,7 +9397,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
}

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_ord(fa == fb)) | sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
}
Expand Down Expand Up @@ -9490,7 +9490,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void FNMS(spu_opcode_t op)
{
// See FMA.
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(-a, b, c));
Expand All @@ -9503,7 +9503,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));

if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
}
Expand All @@ -9525,7 +9525,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void FMA(spu_opcode_t op)
{
// Hardware FMA produces the same result as multiple + add on the limited double range (xfloat).
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(a, b, c));
Expand All @@ -9538,7 +9538,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
Expand Down Expand Up @@ -9599,7 +9599,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void FMS(spu_opcode_t op)
{
// See FMA.
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(a, b, -c));
Expand All @@ -9612,7 +9612,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
}
Expand Down Expand Up @@ -9646,7 +9646,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void FI(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.rb));
// const auto [a, b] = get_vrs<f64[4]>(op.ra, op.rb);
Expand Down Expand Up @@ -9674,7 +9674,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return bitcast<f32[4]>((b & 0xff800000u) | (bitcast<u32[4]>(fpcast<f32[4]>(bnew)) & ~0xff800000u)); // Inject old sign and exponent
});

if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_re", [&](llvm::CallInst* ci)
{
Expand Down Expand Up @@ -9733,7 +9733,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void CFLTS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
value_t<f64[4]> s;
Expand Down Expand Up @@ -9807,7 +9807,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void CFLTU(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
value_t<f64[4]> s;
Expand Down Expand Up @@ -9890,7 +9890,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void CSFLT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
value_t<f64[4]> r;
Expand Down Expand Up @@ -9930,7 +9930,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void CUFLT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
value_t<f64[4]> r;
Expand Down
4 changes: 1 addition & 3 deletions rpcs3/Emu/system_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,7 @@ struct cfg_root : cfg::node
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", enable_tsx_by_default() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
cfg::_bool spu_relaxed_xfloat{ this, "Relaxed xfloat", true }; // Approximate accuracy for only the "FCGT", "FNMS", "FREST" AND "FRSQEST" instructions
cfg::_enum<xfloat_accuracy> spu_xfloat_accuracy{ this, "XFloat Accuracy", xfloat_accuracy::approximate, false };
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
cfg::_bool full_width_avx512{ this, "Full Width AVX-512", true };
Expand Down
17 changes: 17 additions & 0 deletions rpcs3/Emu/system_config_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,3 +664,20 @@ void fmt_class_string<output_scaling_mode>::format(std::string& out, u64 arg)
return unknown;
});
}

template <>
void fmt_class_string<xfloat_accuracy>::format(std::string& out, u64 arg)
{
format_enum(out, arg, [](xfloat_accuracy value)
{
switch (value)
{
case xfloat_accuracy::accurate: return "Accurate";
case xfloat_accuracy::approximate: return "Approximate";
case xfloat_accuracy::relaxed: return "Relaxed";
case xfloat_accuracy::inaccurate: return "Inaccurate";
}

return unknown;
});
}
8 changes: 8 additions & 0 deletions rpcs3/Emu/system_config_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,11 @@ enum class stereo_render_mode_options
side_by_side,
over_under
};

enum class xfloat_accuracy
{
accurate,
approximate,
relaxed, // Approximate accuracy for only the "FCGT", "FNMS", "FREST" AND "FRSQEST" instructions
inaccurate
};
8 changes: 8 additions & 0 deletions rpcs3/rpcs3qt/emu_settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,14 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
case midi_device_type::keyboard: return tr("Keyboard", "Midi Device Type");
}
break;
case emu_settings_type::XFloatAccuracy:
switch (static_cast<xfloat_accuracy>(index))
{
case xfloat_accuracy::accurate: return tr("Accurate XFloat");
case xfloat_accuracy::approximate: return tr("Approximate XFloat");
case xfloat_accuracy::relaxed: return tr("Relaxed XFloat");
case xfloat_accuracy::inaccurate: return tr("Inaccurate XFloat");
}
default:
break;
}
Expand Down
6 changes: 2 additions & 4 deletions rpcs3/rpcs3qt/emu_settings_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ enum class emu_settings_type
AccurateClineStores,
AccurateRSXAccess,
FIFOAccuracy,
AccurateXFloat,
ApproximateXFloat,
XFloatAccuracy,
AccuratePPU128Loop,
MFCCommandsShuffling,
NumPPUThreads,
Expand Down Expand Up @@ -212,8 +211,7 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ emu_settings_type::FIFOAccuracy, { "Core", "RSX FIFO Accuracy"}},
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
{ emu_settings_type::ApproximateXFloat, { "Core", "Approximate xfloat"}},
{ emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}},
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}},
Expand Down
25 changes: 2 additions & 23 deletions rpcs3/rpcs3qt/settings_dialog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,30 +265,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
SubscribeTooltip(ui->spuLoopDetection, tooltips.settings.spu_loop_detection);

// Comboboxes
m_emu_settings->EnhanceComboBox(ui->xfloatAccuracy, emu_settings_type::XFloatAccuracy);
SubscribeTooltip(ui->gb_xfloat_accuracy, tooltips.settings.xfloat);
ui->xfloatAccuracy->addItem(tr("Accurate XFloat"));
ui->xfloatAccuracy->addItem(tr("Approximate XFloat"));
ui->xfloatAccuracy->addItem(tr("Relaxed XFloat"));

connect(ui->xfloatAccuracy, QOverload<int>::of(&QComboBox::currentIndexChanged), this, [this](int index)
{
if (index < 0) return;

m_emu_settings->SetSetting(emu_settings_type::AccurateXFloat, index == 0 ? "true" : "false");
m_emu_settings->SetSetting(emu_settings_type::ApproximateXFloat, index == 1 ? "true" : "false");
});

connect(m_emu_settings.get(), &emu_settings::RestoreDefaultsSignal, this, [this]()
{
ui->xfloatAccuracy->setCurrentIndex(1);
});

if (m_emu_settings->GetSetting(emu_settings_type::AccurateXFloat) == "true")
ui->xfloatAccuracy->setCurrentIndex(0);
else if (m_emu_settings->GetSetting(emu_settings_type::ApproximateXFloat) == "true")
ui->xfloatAccuracy->setCurrentIndex(1);
else
ui->xfloatAccuracy->setCurrentIndex(2);
remove_item(ui->xfloatAccuracy, static_cast<int>(xfloat_accuracy::inaccurate), static_cast<int>(g_cfg.core.spu_xfloat_accuracy.def));

m_emu_settings->EnhanceComboBox(ui->spuBlockSize, emu_settings_type::SPUBlockSize);
SubscribeTooltip(ui->gb_spuBlockSize, tooltips.settings.spu_block_size);
Expand Down