From 5428e070b3b01ff08afc0c4e5a5da00dc1048df1 Mon Sep 17 00:00:00 2001 From: Tiago Quelhas Date: Tue, 5 Nov 2024 21:13:49 +0100 Subject: [PATCH] [8.0.0] Make the Windows implementation of AcquireLock compatible with JVM locks. (#24210) Currently, we rely on CreateFile to effectively obtain an exclusive (write) lock on the entire file, which makes the later call to LockFileEx redundant. This CL makes it so that we open the file in shared mode, and actually use LockFileEx to lock it. This makes a client-side lock compatible with a server-side one obtained through the JVM (which defaults to opening files in shared mode and uses LockFileEx for locking). Even though this doesn't matter for the output base lock, which is only ever obtained from the client side (the server side doesn't use filesystem-based locks), it will be necessary to implement install base locking (as part of fixing #2109). Note that this means an older Bazel might immediately exit instead of blocking for the lock, if the latter was previously acquired by a newer Bazel (since the older Bazel will always CreateFile successfully, but treat the subsequent LockFileEx failure as an unrecoverable error). However, this only matters during the very small window during which the client-side lock is held (it's taken over by the server-side lock in very short order), so I believe this is a very small price to pay to avoid adding more complexity. RELNOTES[INC]: On Windows, a change to the output base locking protocol might cause an older Bazel invoked immediately after a newer Bazel (on the same output base) to error out instead of blocking for the lock, even if --block_for_lock is enabled. PiperOrigin-RevId: 692973056 Change-Id: Iaf1ccecfb4c138333ec9d7a694b10caf96b2917b --- src/main/cpp/blaze_util_windows.cc | 110 ++++++++++++++++------------- 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/src/main/cpp/blaze_util_windows.cc b/src/main/cpp/blaze_util_windows.cc index 922addd4ea15fe..318706c311de9d 100644 --- a/src/main/cpp/blaze_util_windows.cc +++ b/src/main/cpp/blaze_util_windows.cc @@ -1088,66 +1088,74 @@ uint64_t WindowsClock::GetMilliseconds() const { uint64_t AcquireLock(const blaze_util::Path& output_base, bool batch_mode, bool block, BlazeLock* blaze_lock) { blaze_util::Path lockfile = output_base.GetRelative("lock"); - blaze_lock->handle = INVALID_HANDLE_VALUE; + + // CreateFile defaults to opening the file exclusively. We intentionally open + // it in shared mode and instead rely on LockFileEx to obtain an exclusive + // lock, mimicking the behavior of FileChannel in the JVM, to make locks + // obtained on the client side compatible with the server side. + HANDLE handle = ::CreateFileW( + /* lpFileName */ lockfile.AsNativePath().c_str(), + /* dwDesiredAccess */ GENERIC_READ | GENERIC_WRITE, + /* dwShareMode */ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + /* lpSecurityAttributes */ nullptr, + /* dwCreationDisposition */ CREATE_ALWAYS, + /* dwFlagsAndAttributes */ FILE_ATTRIBUTE_NORMAL, + /* hTemplateFile */ nullptr); + if (handle == INVALID_HANDLE_VALUE) { + BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) + << "Failed to CreateFile(" << lockfile.AsPrintablePath() + << "): " << GetLastErrorString(); + } + bool first_lock_attempt = true; - uint64_t st = GetMillisecondsMonotonic(); + uint64_t start_time = GetMillisecondsMonotonic(); + while (true) { - blaze_lock->handle = ::CreateFileW( - /* lpFileName */ lockfile.AsNativePath().c_str(), - /* dwDesiredAccess */ GENERIC_READ | GENERIC_WRITE, - /* dwShareMode */ FILE_SHARE_READ, - /* lpSecurityAttributes */ nullptr, - /* dwCreationDisposition */ CREATE_ALWAYS, - /* dwFlagsAndAttributes */ FILE_ATTRIBUTE_NORMAL, - /* hTemplateFile */ nullptr); - if (blaze_lock->handle != INVALID_HANDLE_VALUE) { - // We could open the file, so noone else holds a lock on it. + OVERLAPPED overlapped = {}; + BOOL success = LockFileEx( + /* hFile */ handle, + /* dwFlags */ LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, + /* dwReserved */ 0, + /* nNumberOfBytesToLockLow */ 1, + /* nNumberOfBytesToLockHigh */ 0, + /* lpOverlapped */ &overlapped); + if (success) { + // Successfully acquired the lock. break; } - if (GetLastError() == ERROR_SHARING_VIOLATION) { - // Someone else has the lock. - if (first_lock_attempt) { - first_lock_attempt = false; - BAZEL_LOG(USER) << "Another command holds the client lock."; - if (block) { - BAZEL_LOG(USER) << "Waiting for it to complete..."; - } - fflush(stderr); - } - if (!block) { - BAZEL_DIE(blaze_exit_code::LOCK_HELD_NOBLOCK_FOR_LOCK) - << "Exiting because the lock is held and --noblock_for_lock was " - "given."; - } - Sleep(/* dwMilliseconds */ 200); - } else { - string err = GetLastErrorString(); + // The LockFileEx API documentation claims ERROR_IO_PENDING is raised + // when the lock is already held, but when LOCKFILE_FAIL_IMMEDIATELY is + // passed, the error is actually ERROR_LOCK_VIOLATION. + // See https://devblogs.microsoft.com/oldnewthing/20140905-00/?p=63. + if (GetLastError() != ERROR_LOCK_VIOLATION) { BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) - << "AcquireLock(" << lockfile.AsPrintablePath() - << "): CreateFile failed: " << err; + << "Unexpected result from LockFileEx(" << lockfile.AsPrintablePath() + << "):" << GetLastErrorString(); + } + // Someone else has the lock. + if (first_lock_attempt) { + first_lock_attempt = false; + BAZEL_LOG(USER) << "Another command holds the client lock."; + if (block) { + BAZEL_LOG(USER) << "Waiting for it to complete..."; + } + fflush(stderr); } + if (!block) { + BAZEL_DIE(blaze_exit_code::LOCK_HELD_NOBLOCK_FOR_LOCK) + << "Exiting because the lock is held and --noblock_for_lock was " + "given."; + } + Sleep(/* dwMilliseconds */ 500); } - uint64_t wait_time = GetMillisecondsMonotonic() - st; - // We have the lock. - OVERLAPPED overlapped = {0}; - if (!LockFileEx( - /* hFile */ blaze_lock->handle, - /* dwFlags */ LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, - /* dwReserved */ 0, - /* nNumberOfBytesToLockLow */ 1, - /* nNumberOfBytesToLockHigh */ 0, - /* lpOverlapped */ &overlapped)) { - string err = GetLastErrorString(); - BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) - << "AcquireLock(" << lockfile.AsPrintablePath() - << "): LockFileEx failed: " << err; - } - // On other platforms we write some info about this process into the lock file - // such as the server PID. On Windows we don't do that because the file is - // locked exclusively, meaning other processes may not open the file even for - // reading. + // On Unix, we take advantage of the advisory nature of locks and write some + // information about the process holding the lock into the lock file, so that + // a concurrent process can read and display it. On Windows we can't do so + // because locks are mandatory, thus we cannot read the file concurrently. + uint64_t wait_time = GetMillisecondsMonotonic() - start_time; + blaze_lock->handle = handle; return wait_time; }