From 2aecc7b1101f766dc50cea74481f8aad646b50f3 Mon Sep 17 00:00:00 2001 From: zifeihan Date: Thu, 23 Nov 2023 21:42:10 +0800 Subject: [PATCH] 8319900: Recursive lightweight locking: riscv64 implementation --- .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 8 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 308 +++++++++++++++--- .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 4 +- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - .../cpu/riscv/macroAssembler_riscv.cpp | 160 +++++---- .../cpu/riscv/macroAssembler_riscv.hpp | 4 +- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 1 - src/hotspot/cpu/riscv/vm_version_riscv.hpp | 2 + 8 files changed, 373 insertions(+), 115 deletions(-) diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 6c1dce0de1598..ec14c7f47811c 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -67,15 +67,17 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr lwu(hdr, Address(hdr, Klass::access_flags_offset())); test_bit(temp, hdr, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); bnez(temp, slow_case, true /* is_far */); + } else if (LockingMode == LM_LIGHTWEIGHT) { + // null check obj. load_klass performs load if DiagnoseSyncOnValueBasedClasses != 0. + ld(hdr, Address(obj)); } - // Load object header - ld(hdr, Address(obj, hdr_offset)); - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(obj, hdr, temp, t1, slow_case); } else if (LockingMode == LM_LEGACY) { Label done; + // Load object header + ld(hdr, Address(obj, hdr_offset)); // and mark it as unlocked ori(hdr, hdr, markWord::unlocked_value); // save unlocked object header into the displaced header location on the stack diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 5daeff511922c..bbdd06a7da8c1 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -32,6 +32,7 @@ #include "opto/output.hpp" #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -74,7 +75,8 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, if (LockingMode == LM_MONITOR) { mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path j(cont); - } else if (LockingMode == LM_LEGACY) { + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Set tmp to be (markWord of object | UNLOCK_VALUE). ori(tmp, disp_hdr, markWord::unlocked_value); @@ -106,17 +108,6 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); mv(flag, tmp); // we can use the value of tmp as the result here j(cont); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, ""); - Label slow; - lightweight_lock(oop, disp_hdr, tmp, tmp3Reg, slow); - - // Indicate success on completion. - mv(flag, zr); - j(count); - bind(slow); - mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path - j(no_count); } // Handle existing monitor. @@ -129,14 +120,12 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) - if (LockingMode != LM_LIGHTWEIGHT) { - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for - // markWord::monitor_value so use markWord::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::slow_enter. - mv(tmp, (address)markWord::unused_mark().value()); - sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - } + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markWord::monitor_value so use markWord::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. + mv(tmp, (address)markWord::unused_mark().value()); + sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); beqz(flag, cont); // CAS success means locking succeeded @@ -188,7 +177,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, if (LockingMode == LM_MONITOR) { mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path j(cont); - } else if (LockingMode == LM_LEGACY) { + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Check if it is still a light weight lock, this is true if we // see the stack address of the basicLock in the markWord of the // object. @@ -197,17 +187,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Assembler::rl, /*result*/tmp); xorr(flag, box, tmp); // box == tmp if cas succeeds j(cont); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, ""); - Label slow; - lightweight_unlock(oop, tmp, box, disp_hdr, slow); - - // Indicate success on completion. - mv(flag, zr); - j(count); - bind(slow); - mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path - j(no_count); } assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -217,17 +196,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, STATIC_ASSERT(markWord::monitor_value <= INT_MAX); add(tmp, tmp, -(int)markWord::monitor_value); // monitor - if (LockingMode == LM_LIGHTWEIGHT) { - // If the owner is anonymous, we need to fix it -- in an outline stub. - Register tmp2 = disp_hdr; - ld(tmp2, Address(tmp, ObjectMonitor::owner_offset())); - test_bit(t0, tmp2, exact_log2(ObjectMonitor::ANONYMOUS_OWNER)); - C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); - Compile::current()->output()->add_stub(stub); - bnez(t0, stub->entry(), /* is_far */ true); - bind(stub->continuation()); - } - ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); Label notRecursive; @@ -260,6 +228,260 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, bind(no_count); } +void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register tmp1, Register tmp2) { + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + // TODO: Current implementation uses box only as a TEMP, consider renaming. + assert_different_registers(obj, box, tmp1, tmp2); + + // Flag register, zero for success; non-zero for failure. + Register flag = t1; + // Handle inflated monitor. + Label inflated; + // Finish fast lock successfully. MUST reach to with flag == 0 + Label locked; + // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 + Label slow_path, slow_path_set_flag; + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(flag, obj); + lwu(flag, Address(flag, Klass::access_flags_offset())); + test_bit(flag, flag, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); + bnez(flag, slow_path, true /* is_far */); + } + + const Register mark = tmp1; + const Register t = box; + + { // Lightweight locking + + // Push lock to the lock stack and finish successfully. MUST reach to with flag == EQ + Label push; + + const Register top = tmp2; + + // Check if lock-stack is full. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + // Use flag register to give it a !0 value. + mv(flag, (unsigned)LockStack::end_offset()); + bge(top, flag, slow_path, /* is_far */ true); + + // Check if recursive. + add(t, xthread, top); + ld(t, Address(t, -oopSize)); + beq(obj, t, push); + + // Check for monitor (0b10). + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, inflated, /* is_far */ true); + + // Not inflated. + + // Try to lock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + ori(mark, mark, markWord::unlocked_value); + xori(t, mark, markWord::unlocked_value); + cmpxchgptr(mark, t, obj, flag, push, &slow_path_set_flag); + + bind(push); + // After successful lock, push object on lock-stack. + add(t, xthread, top); + sd(obj, Address(t)); + addw(top, top, oopSize); + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(flag, zr); + j(locked); + } + + { // Handle inflated monitor. + bind(inflated); + + // mark contains the tagged ObjectMonitor*. + const Register tagged_monitor = mark; + const uintptr_t monitor_tag = markWord::monitor_value; + const Register owner_addr = tmp2; + + // Compute owner address. + la(owner_addr, Address(tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag))); + + // CAS owner (null => current thread). + cmpxchg(owner_addr, zr, xthread, Assembler::int64, Assembler::aq, Assembler::rl, flag); + beqz(flag, locked); + + // Check if recursive. + bne(flag, xthread, slow_path); + + // Recursive. + mv(flag, zr); + increment(Address(tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1, t0, t); + } + + bind(locked); + increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, t); + +#ifdef ASSERT + // Check that locked label is reached with flags == 0. + Label flag_correct; + bnez(flag, flag_correct); + stop("Fast Lock Flag != 0"); +#endif + Label end; + j(end); + bind(slow_path_set_flag); + mv(flag, 1); + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flags != 0. + beqz(flag, flag_correct); + stop("Fast Lock Flag == 0"); + bind(flag_correct); +#endif + bind(end); + // C2 uses the value of flags (0 vs !0) to determine the continuation. +} + +void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Register tmp1, + Register tmp2) { + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + // TODO: Current implementation uses box only as a TEMP, consider renaming. + assert_different_registers(obj, box, tmp1, tmp2); + + // Flag register, zero for success; non-zero for failure. + Register flag = t1; + // Handle inflated monitor. + Label inflated, inflated_load_monitor; + // Finish fast unlock successfully. unlocked MUST reach to with flag == 0 + Label unlocked, unlocked_set_flag; + // Finish fast unlock unsuccessfully. MUST branch to with flag != 0 + Label slow_path; + + const Register mark = box; + const Register t = tmp2; + + { // Lightweight unlock + const Register top = tmp1; + + // Check if obj is top of lock-stack. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + subw(top, top, oopSize); + add(t, xthread, top); + ld(t, Address(t)); + // Top of lock stack was not obj. Must be monitor. + bne(obj, t, inflated_load_monitor, /* is_far */ true); + + // Pop lock-stack. + DEBUG_ONLY(add(t, xthread, top);) + DEBUG_ONLY(sd(zr, Address(t));) + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + + // Check if recursive. + add(t, xthread, top); + ld(t, Address(t, -oopSize)); + beq(obj, t, unlocked_set_flag, /* is_far */ true); + + // Not recursive. + + // Check for monitor (0b10). + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, inflated, /* is_far */ true); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + // Use flag register to give it a !0 value. + ori(flag, mark, markWord::unlocked_value); + cmpxchgptr(mark, flag, obj, t, unlocked_set_flag, nullptr); + + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(add(t, xthread, top);) + DEBUG_ONLY(sd(obj, Address(t));) + // Use flag register to give it a !0 value. + addw(flag, top, oopSize); + sw(flag, Address(xthread, JavaThread::lock_stack_top_offset())); + j(slow_path); + } + + + { // Handle inflated monitor. + bind(inflated_load_monitor); + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); +#ifdef ASSERT + // TODO: Check lock-stack does not contain obj. + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, inflated, /* is_far */ true); + stop("Fast Unlock not monitor"); +#endif + + bind(inflated); + + // mark contains the tagged ObjectMonitor*. + const Register monitor = mark; + const uintptr_t monitor_tag = markWord::monitor_value; + + // Untag the monitor. + sub(monitor, mark, monitor_tag); + + const Register recursions = tmp1; + Label not_recursive; + + // Check if recursive. + ld(recursions, Address(monitor, ObjectMonitor::recursions_offset())); + beqz(recursions, not_recursive); + + // Recursive unlock. + sub(recursions, recursions, 1u); + sd(recursions, Address(monitor, ObjectMonitor::recursions_offset())); + j(unlocked_set_flag); + + bind(not_recursive); + + Label release; + const Register t = tmp1; + const Register owner_addr = tmp2; + + // Compute owner address. + la(owner_addr, Address(monitor, ObjectMonitor::owner_offset())); + + // Check if the entry lists are empty. + ld(t, Address(monitor, ObjectMonitor::EntryList_offset())); + // Use flag register to give it a known value. + ld(flag, Address(monitor, ObjectMonitor::cxq_offset())); + orr(flag, flag, t); + beqz(flag, release); + + // The owner may be anonymous and we removed the last obj entry in + // the lock-stack. This loses the information about the owner. + // Write the thread to the owner field so the runtime knows the owner. + sd(xthread, Address(owner_addr)); + j(slow_path); + + bind(release); + // Set owner to null. + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(owner_addr)); + } + + bind(unlocked_set_flag); + mv(flag, zr); + bind(unlocked); + decrement(Address(xthread, JavaThread::held_monitor_count_offset())); + +#ifdef ASSERT + // Check that unlocked label is reached with flags == 0. + Label flag_correct; + bnez(flag, flag_correct); + stop("Fast Lock Flag != 0"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flags != 0. + beqz(flag, flag_correct); + stop("Fast Lock Flag == 0"); + bind(flag_correct); +#endif + // C2 uses the value of flags (0 vs !0) to determine the continuation. +} + // short string // StringUTF16.indexOfChar // StringLatin1.indexOfChar diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index f36615809c0ee..0d7883800864b 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -44,9 +44,11 @@ public: // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. - // See full description in macroAssembler_riscv.cpp. void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); void fast_unlock(Register object, Register box, Register tmp1, Register tmp2); + // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. + void fast_lock_lightweight(Register object, Register box, Register tmp, Register tmp2); + void fast_unlock_lightweight(Register object, Register box, Register tmp, Register tmp2); void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 1240566e26cc4..b5e9c468d695d 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -763,7 +763,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) } if (LockingMode == LM_LIGHTWEIGHT) { - ld(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case); j(count); } else if (LockingMode == LM_LEGACY) { diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 5201486c8c634..de93d94f17081 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -48,6 +48,7 @@ #include "runtime/jniHandles.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/powerOfTwo.hpp" #ifdef COMPILER2 #include "opto/compile.hpp" @@ -4714,45 +4715,57 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { // Falls through upon success. // // - obj: the object to be locked -// - hdr: the header, already loaded from obj, will be destroyed -// - tmp1, tmp2: temporary registers, will be destroyed -void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { +// - tmp1, tmp2, tmp3: temporary registers, will be destroyed +void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, tmp1, tmp2, t0); - - // Check if we would have space on lock-stack for the object. - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - mv(tmp2, (unsigned)LockStack::end_offset()); - bge(tmp1, tmp2, slow, /* is_far */ true); - - // Load (object->mark() | 1) into hdr - ori(hdr, hdr, markWord::unlocked_value); - // Clear lock-bits, into tmp2 - xori(tmp2, hdr, markWord::unlocked_value); - - // Try to swing header from unlocked to locked - Label success; - cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); - bind(success); - - // After successful lock, push object on lock-stack - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - add(tmp2, xthread, tmp1); - sd(obj, Address(tmp2, 0)); - addw(tmp1, tmp1, oopSize); - sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + assert_different_registers(obj, tmp1, tmp2, tmp3, t0); + + Label push, reload_top_and_push; + const Register top = tmp1; + const Register mark = tmp2; + const Register t = tmp3; + + // Check if the lock-stack is full. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(t, (unsigned)LockStack::end_offset()); + bge(top, t, slow, /* is_far */ true); + + // Check for recursion. + add(t, xthread, top); + ld(t, Address(t, -oopSize)); + beq(obj, t, push); + + // Check header for monitor (0b10). + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, slow, /* is_far */ true); + + // Try to lock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + ori(mark, mark, markWord::unlocked_value); + xori(t, mark, markWord::unlocked_value); + cmpxchgptr(mark, t, obj, top, reload_top_and_push, &slow); + + bind(reload_top_and_push); + // Reload top, used as tmp for cmpxchgptr. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + + bind(push); + // After successful lock, push object on lock-stack. + add(t, xthread, top); + sd(obj, Address(t)); + addw(top, top, oopSize); + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); } // Implements ligthweight-unlocking. // Branches to slow upon failure. // Falls through upon success. // -// - obj: the object to be unlocked -// - hdr: the (pre-loaded) header of the object -// - tmp1, tmp2: temporary registers -void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { +// - tmp1, tmp2, tmp3: temporary registers +void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, tmp1, tmp2, t0); + assert_different_registers(obj, tmp1, tmp2, tmp3, t0); #ifdef ASSERT { @@ -4764,44 +4777,63 @@ void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp Label stack_ok; lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); mv(tmp2, (unsigned)LockStack::start_offset()); - bgt(tmp1, tmp2, stack_ok); + bge(tmp1, tmp2, stack_ok); STOP("Lock-stack underflow"); bind(stack_ok); } - { - // Check if the top of the lock-stack matches the unlocked object. - Label tos_ok; - subw(tmp1, tmp1, oopSize); - add(tmp1, xthread, tmp1); - ld(tmp1, Address(tmp1, 0)); - beq(tmp1, obj, tos_ok); - STOP("Top of lock-stack does not match the unlocked object"); - bind(tos_ok); - } - { - // Check that hdr is fast-locked. - Label hdr_ok; - andi(tmp1, hdr, markWord::lock_mask_in_place); - beqz(tmp1, hdr_ok); - STOP("Header is not fast-locked"); - bind(hdr_ok); - } #endif + Label unlocked, push_and_slow; + const Register top = tmp1; + const Register mark = tmp2; + const Register t = tmp3; + + // Check if obj is top of lock-stack. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + subw(top, top, oopSize); + add(t, xthread, top); + ld(t, Address(t)); + bne(obj, t, slow, /* is_far */ true); + + // Pop lock-stack. + DEBUG_ONLY(add(t, xthread, top);) + DEBUG_ONLY(sd(zr, Address(t));) + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + + // Check if recursive. + add(t, xthread, top); + ld(t, Address(t, -oopSize)); + beq(obj, t, unlocked, /* is_far */ true); + + // Not recursive. Check header for monitor (0b10). + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, push_and_slow, /* is_far */ true); - // Load the new header (unlocked) into tmp1 - ori(tmp1, hdr, markWord::unlocked_value); - - // Try to swing header from locked to unlocked - Label success; - cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); - bind(success); - - // After successful unlock, pop object from lock-stack - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - subw(tmp1, tmp1, oopSize); #ifdef ASSERT - add(tmp2, xthread, tmp1); - sd(zr, Address(tmp2, 0)); + // Check header not unlocked (0b01). + Label not_unlocked; + test_bit(t, mark, exact_log2(markWord::unlocked_value)); + beqz(t, not_unlocked, /* is_far */ true); + stop("lightweight_unlock already unlocked"); + bind(not_unlocked); #endif - sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); + // Use flag register to give it a !0 value. + ori(t, mark, markWord::unlocked_value); + cmpxchgptr(mark, t, obj, top, unlocked, nullptr); + + // Reload top, is used as tmp in cmpxchgptr. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + + bind(push_and_slow); + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(add(t, xthread, top);) + DEBUG_ONLY(sd(obj, Address(t));) + addw(top, top, oopSize); + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + j(slow); + + bind(unlocked); } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 51bcba2f1a3fc..61f5d6baec483 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1474,8 +1474,8 @@ class MacroAssembler: public Assembler { void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); public: - void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); - void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); + void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); + void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); }; #ifdef ASSERT diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index fa718e5b08e86..851669fe60f85 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -1699,7 +1699,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ bnez(swap_reg, slow_path_lock); } else { assert(LockingMode == LM_LIGHTWEIGHT, ""); - __ ld(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); } diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index 3c769ebfe2afe..30fff99047233 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -207,6 +207,8 @@ class VM_Version : public Abstract_VM_Version { constexpr static bool supports_stack_watermark_barrier() { return true; } + constexpr static bool supports_recursive_lightweight_locking() { return true; } + static bool supports_on_spin_wait() { return UseZihintpause; } };