Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU: Use bitset to reduce memory for compiler instance #15434

Merged
merged 2 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions rpcs3/Emu/Cell/SPUCommonRecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2482,9 +2482,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
workload.push_back(entry_point);

std::memset(m_regmod.data(), 0xff, sizeof(m_regmod));
std::memset(m_use_ra.data(), 0xff, sizeof(m_use_ra));
std::memset(m_use_rb.data(), 0xff, sizeof(m_use_rb));
std::memset(m_use_rc.data(), 0xff, sizeof(m_use_rc));
m_use_ra.reset();
m_use_rb.reset();
m_use_rc.reset();
m_targets.clear();
m_preds.clear();
m_preds[entry_point];
Expand Down Expand Up @@ -2579,11 +2579,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (auto iflags = g_spu_iflag.decode(data))
{
if (+iflags & +spu_iflag::use_ra)
m_use_ra[pos / 4] = op.ra;
m_use_ra.set(pos / 4);
if (+iflags & +spu_iflag::use_rb)
m_use_rb[pos / 4] = op.rb;
m_use_rb.set(pos / 4);
if (+iflags & +spu_iflag::use_rc)
m_use_rc[pos / 4] = op.rc;
m_use_rc.set(pos / 4);
}

// Analyse instruction
Expand Down Expand Up @@ -3010,11 +3010,6 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
m_regmod[pos / 4] = s_reg_mfc_size;
break;
}
case MFC_Cmd:
{
m_use_rb[pos / 4] = s_reg_mfc_eal;
break;
}
default: break;
}

Expand Down Expand Up @@ -3461,10 +3456,13 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
reg_save = op.rt;
}

for (auto* _use : {&m_use_ra, &m_use_rb, &m_use_rc})
for (auto _use : std::initializer_list<std::pair<u32, bool>>{{op.ra, m_use_ra.test(ia / 4)}
, {op.rb, m_use_rb.test(ia / 4)}, {op.rc, m_use_rc.test(ia / 4)}})
{
if (u8 reg = (*_use)[ia / 4]; reg < s_reg_max)
if (_use.second)
{
const u32 reg = _use.first;

// Register reg use only if it happens before reg mod
if (!block.reg_mod[reg])
{
Expand All @@ -3479,7 +3477,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
}
}

if (m_use_rb[ia / 4] == s_reg_mfc_eal)
if (type == spu_itype::WRCH && op.ra == MFC_Cmd)
{
// Expand MFC_Cmd reg use
for (u8 reg : {s_reg_mfc_lsa, s_reg_mfc_tag, s_reg_mfc_size})
Expand Down
6 changes: 3 additions & 3 deletions rpcs3/Emu/Cell/SPURecompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ class spu_recompiler_base
// GPR modified by the instruction (-1 = not set)
std::array<u8, 0x10000> m_regmod;

std::array<u8, 0x10000> m_use_ra;
std::array<u8, 0x10000> m_use_rb;
std::array<u8, 0x10000> m_use_rc;
std::bitset<0x10000> m_use_ra;
std::bitset<0x10000> m_use_rb;
std::bitset<0x10000> m_use_rc;

// List of possible targets for the instruction (entry shouldn't exist for simple instructions)
std::unordered_map<u32, std::basic_string<u32>, value_hash<u32, 2>> m_targets;
Expand Down
5 changes: 4 additions & 1 deletion rpcs3/Emu/Cell/SPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,10 @@ void spu_thread::cpu_work()

if (has_active_local_bps)
{
if (local_breakpoints[pc / 4])
const u32 pos_at = pc / 4;
const u32 pos_bit = 1u << (pos_at % 8);

if (local_breakpoints[pos_at] & pos_bit)
{
// Ignore repeatations until a different instruction is issued
if (pc != current_bp_pc)
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/SPUThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,7 @@ class spu_thread : public cpu_thread
atomic_t<spu_debugger_mode> debugger_mode{};

// PC-based breakpoint list
std::array<atomic_t<bool>, SPU_LS_SIZE / 4> local_breakpoints{};
std::array<atomic_t<u8>, SPU_LS_SIZE / 4 / 8> local_breakpoints{};
atomic_t<bool> has_active_local_bps = false;
u32 current_bp_pc = umax;
bool stop_flag_removal_protection = false;
Expand Down
4 changes: 3 additions & 1 deletion rpcs3/rpcs3qt/breakpoint_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ void breakpoint_list::HandleBreakpointRequest(u32 loc, bool only_add)

const auto spu = static_cast<spu_thread*>(m_cpu);
auto& list = spu->local_breakpoints;
const u32 pos_at = loc / 4;
const u32 pos_bit = 1u << (pos_at % 8);

if (list[loc / 4].test_and_invert())
if (list[pos_at / 8].fetch_xor(pos_bit) & pos_bit)
{
if (std::none_of(list.begin(), list.end(), [](auto& val){ return val.load(); }))
{
Expand Down
13 changes: 11 additions & 2 deletions rpcs3/rpcs3qt/debugger_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,17 @@ void debugger_list::ShowAddress(u32 addr, bool select_addr, bool direct)
{
switch (m_cpu ? m_cpu->id_type() : 0)
{
case 1: return m_ppu_breakpoint_handler->HasBreakpoint(pc);
case 2: return (*spu_bps_list)[pc / 4].load();
case 1:
{
return m_ppu_breakpoint_handler->HasBreakpoint(pc);
}
case 2:
{
const u32 pos_at = pc / 4;
const u32 pos_bit = 1u << (pos_at % 8);

return !!((*spu_bps_list)[pos_at] & pos_bit);
}
default: return false;
}
};
Expand Down