Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
bylaws committed Mar 1, 2024
1 parent e87ca02 commit 5067372
Showing 1 changed file with 28 additions and 14 deletions.
42 changes: 28 additions & 14 deletions FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1718,7 +1718,7 @@ DEF_OP(MemSet) {
ARMEmitter::SingleUseForwardLabel BackwardImpl{};
ARMEmitter::SingleUseForwardLabel Done{};

mov(TMP1, Length.X());
mov(Size == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit, TMP1, Length);
if (Op->Prefix.IsInvalid()) {
mov(TMP2, MemReg.X());
}
Expand Down Expand Up @@ -1810,27 +1810,34 @@ DEF_OP(MemSet) {
and_(ARMEmitter::Size::i64Bit, TMP4, TMP2, 0x3);
cbnz(ARMEmitter::Size::i64Bit, TMP4, &AgainInternal);

// Keep the counter one copy ahead, so that underflow can be used to detect when to fallback
// to the copy unit size copy loop for the last chunk.
// Do this in two parts, to fallback to the byte by byte loop if size < 32, and to the
// single copy loop if size < 64.
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbnz(TMP1, 63, &AgainInternal128Exit);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbnz(TMP1, 63, &AgainInternal256Exit);

// Fill VTMP2 with the set pattern
dup(SubRegSize, VTMP2.Q(), Value);

Bind(&AgainInternal256);
// Keep the counter one copy ahead, so that underflow can be used to detect when to fallback
// to the copy unit size copy loop for the last chunk.
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
tbnz(TMP1, 63, &AgainInternal256Exit);
stp<ARMEmitter::IndexType::POST>(VTMP2.Q(), VTMP2.Q(), TMP2, 32 * Direction);
stp<ARMEmitter::IndexType::POST>(VTMP2.Q(), VTMP2.Q(), TMP2, 32 * Direction);
b(&AgainInternal256);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
tbz(TMP1, 63, &AgainInternal256);

Bind(&AgainInternal256Exit);
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

Bind(&AgainInternal128);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbnz(TMP1, 63, &AgainInternal128Exit);
Bind(&AgainInternal128);
stp<ARMEmitter::IndexType::POST>(VTMP2.Q(), VTMP2.Q(), TMP2, 32 * Direction);
b(&AgainInternal128);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbz(TMP1, 63, &AgainInternal128);

Bind(&AgainInternal128Exit);
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
Expand Down Expand Up @@ -1941,7 +1948,7 @@ DEF_OP(MemCpy) {
ARMEmitter::SingleUseForwardLabel BackwardImpl{};
ARMEmitter::SingleUseForwardLabel Done{};

mov(TMP1, Length.X());
mov(Size == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit, TMP1, Length);
if (Op->PrefixDest.IsInvalid()) {
mov(TMP2, MemRegDest.X());
}
Expand Down Expand Up @@ -2113,24 +2120,31 @@ DEF_OP(MemCpy) {
and_(ARMEmitter::Size::i64Bit, TMP4, TMP4, 0x3);
cbnz(ARMEmitter::Size::i64Bit, TMP4, &AgainInternal);

Bind(&AgainInternal256);
// Keep the counter one copy ahead, so that underflow can be used to detect when to fallback
// to the copy unit size copy loop for the last chunk.
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
// Do this in two parts, to fallback to the byte by byte loop if size < 32, and to the
// single copy loop if size < 64.
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbnz(TMP1, 63, &AgainInternal128Exit);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbnz(TMP1, 63, &AgainInternal256Exit);

Bind(&AgainInternal256);
MemCpy(32, 32 * Direction);
MemCpy(32, 32 * Direction);
b(&AgainInternal256);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
tbz(TMP1, 63, &AgainInternal256);

Bind(&AgainInternal256Exit);
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

Bind(&AgainInternal128);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbnz(TMP1, 63, &AgainInternal128Exit);
Bind(&AgainInternal128);
MemCpy(32, 32 * Direction);
b(&AgainInternal128);
sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
tbz(TMP1, 63, &AgainInternal128);

Bind(&AgainInternal128Exit);
add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
Expand Down

0 comments on commit 5067372

Please sign in to comment.