Fix block cache ID uniqueness on Windows

Since we do not evict a file's blocks from block cache before that file is deleted, we require a file's cache ID prefix is both unique and non-reusable. However, the Windows functionality we were relying on only guaranteed uniqueness. That meant a newly created file could be assigned the same cache ID prefix as a deleted file. If the newly created file had block offsets matching the deleted file, full cache keys could be exactly the same, resulting in obsolete data blocks returned from cache when trying to read from the new file. We noticed this when running on FAT32 where compaction was writing out of order keys due to reading obsolete blocks from its input files. The functionality is documented as behaving the same on NTFS, although I wasn't able to repro it there. Test Plan: we had a reliable repro of out-of-order keys on FAT32 that was fixed by this change
cockroachdb · Sep 24, 2019 · 217d7a1 · 217d7a1
1 parent 8978e86
commit 217d7a1
Showing 1 changed file with 11 additions and 54 deletions.
diff --git a/port/win/io_win.cc b/port/win/io_win.cc
@@ -175,60 +175,17 @@ Status ftruncate(const std::string& filename, HANDLE hFile,
   return status;
 }
 
-size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size) {
-
-  if (max_size < kMaxVarint64Length * 3) {
-    return 0;
-  }
-#if (_WIN32_WINNT == _WIN32_WINNT_VISTA)
-  // MINGGW as defined by CMake file.
-  // yuslepukhin: I hate the guts of the above macros.
-  // This impl does not guarantee uniqueness everywhere
-  // is reasonably good
-  BY_HANDLE_FILE_INFORMATION FileInfo;
-
-  BOOL result = GetFileInformationByHandle(hFile, &FileInfo);
-
-  TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result);
-
-  if (!result) {
-    return 0;
-  }
-
-  char* rid = id;
-  rid = EncodeVarint64(rid, uint64_t(FileInfo.dwVolumeSerialNumber));
-  rid = EncodeVarint64(rid, uint64_t(FileInfo.nFileIndexHigh));
-  rid = EncodeVarint64(rid, uint64_t(FileInfo.nFileIndexLow));
-
-  assert(rid >= id);
-  return static_cast<size_t>(rid - id);
-#else
-  FILE_ID_INFO FileInfo;
-  BOOL result = GetFileInformationByHandleEx(hFile, FileIdInfo, &FileInfo,
-    sizeof(FileInfo));
-
-  TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result);
-
-  if (!result) {
-    return 0;
-  }
-
-  static_assert(sizeof(uint64_t) == sizeof(FileInfo.VolumeSerialNumber),
-    "Wrong sizeof expectations");
-  // FileId.Identifier is an array of 16 BYTEs, we encode them as two uint64_t
-  static_assert(sizeof(uint64_t) * 2 == sizeof(FileInfo.FileId.Identifier),
-    "Wrong sizeof expectations");
-
-  char* rid = id;
-  rid = EncodeVarint64(rid, uint64_t(FileInfo.VolumeSerialNumber));
-  uint64_t* file_id = reinterpret_cast<uint64_t*>(&FileInfo.FileId.Identifier[0]);
-  rid = EncodeVarint64(rid, *file_id);
-  ++file_id;
-  rid = EncodeVarint64(rid, *file_id);
-
-  assert(rid >= id);
-  return static_cast<size_t>(rid - id);
-#endif
+size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/, size_t /*max_size*/) {
+  // `FileIdInfo` is not supported. The possible alternative,
+  // `BY_HANDLE_FILE_INFORMATION::nFileIndex{High,Low}`, allows deleted files'
+  // IDs to be reused for newly created files. Since we don't evict from block
+  // cache before deleting a file, this introduces a risk that readers access
+  // obsolete data blocks.
+  //
+  // Returning 0 is safe as it causes the table reader to generate a unique ID.
+  // This is suboptimal for performance as it prevents multiple table readers
+  // for the same file from sharing cached blocks, but at least it's safe.
+  return 0;
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////