Skip to content

Commit

Permalink
Fix block cache ID uniqueness on Windows
Browse files Browse the repository at this point in the history
Since we do not evict a file's blocks from block cache before that file
is deleted, we require a file's cache ID prefix is both unique and
non-reusable. However, the Windows functionality we were relying on only
guaranteed uniqueness. That meant a newly created file could be assigned
the same cache ID prefix as a deleted file. If the newly created file
had block offsets matching the deleted file, full cache keys could be
exactly the same, resulting in obsolete data blocks returned from cache
when trying to read from the new file.

We noticed this when running on FAT32 where compaction was writing out
of order keys due to reading obsolete blocks from its input files. The
functionality is documented as behaving the same on NTFS, although I
wasn't able to repro it there.

Test Plan: we had a reliable repro of out-of-order keys on FAT32 that
was fixed by this change
  • Loading branch information
ajkr committed Sep 24, 2019
1 parent 8978e86 commit 217d7a1
Showing 1 changed file with 11 additions and 54 deletions.
65 changes: 11 additions & 54 deletions port/win/io_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,60 +175,17 @@ Status ftruncate(const std::string& filename, HANDLE hFile,
return status;
}

size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size) {

if (max_size < kMaxVarint64Length * 3) {
return 0;
}
#if (_WIN32_WINNT == _WIN32_WINNT_VISTA)
// MINGGW as defined by CMake file.
// yuslepukhin: I hate the guts of the above macros.
// This impl does not guarantee uniqueness everywhere
// is reasonably good
BY_HANDLE_FILE_INFORMATION FileInfo;

BOOL result = GetFileInformationByHandle(hFile, &FileInfo);

TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result);

if (!result) {
return 0;
}

char* rid = id;
rid = EncodeVarint64(rid, uint64_t(FileInfo.dwVolumeSerialNumber));
rid = EncodeVarint64(rid, uint64_t(FileInfo.nFileIndexHigh));
rid = EncodeVarint64(rid, uint64_t(FileInfo.nFileIndexLow));

assert(rid >= id);
return static_cast<size_t>(rid - id);
#else
FILE_ID_INFO FileInfo;
BOOL result = GetFileInformationByHandleEx(hFile, FileIdInfo, &FileInfo,
sizeof(FileInfo));

TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result);

if (!result) {
return 0;
}

static_assert(sizeof(uint64_t) == sizeof(FileInfo.VolumeSerialNumber),
"Wrong sizeof expectations");
// FileId.Identifier is an array of 16 BYTEs, we encode them as two uint64_t
static_assert(sizeof(uint64_t) * 2 == sizeof(FileInfo.FileId.Identifier),
"Wrong sizeof expectations");

char* rid = id;
rid = EncodeVarint64(rid, uint64_t(FileInfo.VolumeSerialNumber));
uint64_t* file_id = reinterpret_cast<uint64_t*>(&FileInfo.FileId.Identifier[0]);
rid = EncodeVarint64(rid, *file_id);
++file_id;
rid = EncodeVarint64(rid, *file_id);

assert(rid >= id);
return static_cast<size_t>(rid - id);
#endif
size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/, size_t /*max_size*/) {
// `FileIdInfo` is not supported. The possible alternative,
// `BY_HANDLE_FILE_INFORMATION::nFileIndex{High,Low}`, allows deleted files'
// IDs to be reused for newly created files. Since we don't evict from block
// cache before deleting a file, this introduces a risk that readers access
// obsolete data blocks.
//
// Returning 0 is safe as it causes the table reader to generate a unique ID.
// This is suboptimal for performance as it prevents multiple table readers
// for the same file from sharing cached blocks, but at least it's safe.
return 0;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 217d7a1

Please sign in to comment.