diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java b/server/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java index b355959980e3e..fc14a9745d6ad 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Bootstrap.java @@ -22,6 +22,7 @@ import org.elasticsearch.cli.Terminal; import org.elasticsearch.cli.UserException; import org.elasticsearch.common.PidFile; +import org.elasticsearch.common.filesystem.FileSystemNatives; import org.elasticsearch.common.inject.CreationException; import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.logging.DeprecationLogger; @@ -150,6 +151,9 @@ public boolean handle(int code) { // init lucene random seed. it will use /dev/urandom where available: StringHelper.randomId(); + + // init filesystem natives + FileSystemNatives.init(); } static void initializeProbes() { diff --git a/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java b/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java new file mode 100644 index 0000000000000..e1d42ece2b05b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.filesystem; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.util.Constants; + +import java.nio.file.Path; +import java.util.OptionalLong; + +/** + * This class provides utility methods for calling some native methods related to filesystems. + */ +public final class FileSystemNatives { + + private static final Logger logger = LogManager.getLogger(FileSystemNatives.class); + + @FunctionalInterface + interface Provider { + OptionalLong allocatedSizeInBytes(Path path); + } + + private static final Provider NOOP_FILE_SYSTEM_NATIVES_PROVIDER = path -> OptionalLong.empty(); + private static final Provider JNA_PROVIDER = loadJnaProvider(); + + private static Provider loadJnaProvider() { + try { + // load one of the main JNA classes to see if the classes are available. this does not ensure that all native + // libraries are available, only the ones necessary by JNA to function + Class.forName("com.sun.jna.Native"); + if (Constants.WINDOWS) { + return WindowsFileSystemNatives.getInstance(); + } + } catch (ClassNotFoundException e) { + logger.warn("JNA not found. FileSystemNatives methods will be disabled.", e); + } catch (LinkageError e) { + logger.warn("unable to load JNA native support library, FileSystemNatives methods will be disabled.", e); + } + return NOOP_FILE_SYSTEM_NATIVES_PROVIDER; + } + + private FileSystemNatives() {} + + public static void init() { + assert JNA_PROVIDER != null; + } + + /** + * Returns the number of allocated bytes on disk for a given file. + * + * @param path the path to the file + * @return an {@link OptionalLong} that contains the number of allocated bytes on disk for the file. The optional is empty is the + * allocated size of the file failed be retrieved using native methods + */ + public static OptionalLong allocatedSizeInBytes(Path path) { + return JNA_PROVIDER.allocatedSizeInBytes(path); + } + +} diff --git a/server/src/main/java/org/elasticsearch/common/filesystem/WindowsFileSystemNatives.java b/server/src/main/java/org/elasticsearch/common/filesystem/WindowsFileSystemNatives.java new file mode 100644 index 0000000000000..4fe219bfc774d --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/filesystem/WindowsFileSystemNatives.java @@ -0,0 +1,98 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.filesystem; + +import com.sun.jna.Native; +import com.sun.jna.WString; +import com.sun.jna.ptr.IntByReference; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.util.Constants; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.OptionalLong; + +/** + * {@link FileSystemNatives.Provider} implementation for Windows/Kernel32 + */ +final class WindowsFileSystemNatives implements FileSystemNatives.Provider { + + private static final Logger logger = LogManager.getLogger(WindowsFileSystemNatives.class); + + private static final WindowsFileSystemNatives INSTANCE = new WindowsFileSystemNatives(); + + private static final int INVALID_FILE_SIZE = -1; + private static final int NO_ERROR = 0; + + private WindowsFileSystemNatives() { + assert Constants.WINDOWS : Constants.OS_NAME; + try { + Native.register("kernel32"); + logger.debug("windows/Kernel32 library loaded"); + } catch (LinkageError e) { + logger.warn("unable to link Windows/Kernel32 library. native methods and handlers will be disabled.", e); + throw e; + } + } + + static WindowsFileSystemNatives getInstance() { + return INSTANCE; + } + + /** + * Retrieves the actual number of bytes of disk storage used to store a specified file. + * + * https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getcompressedfilesizew + * + * @param lpFileName the path string + * @param lpFileSizeHigh pointer to high-order DWORD for compressed file size (or null if not needed) + * @return the low-order DWORD for compressed file siz + */ + private native int GetCompressedFileSizeW(WString lpFileName, IntByReference lpFileSizeHigh); + + /** + * Retrieves the actual number of bytes of disk storage used to store a specified file. If the file is located on a volume that supports + * compression and the file is compressed, the value obtained is the compressed size of the specified file. If the file is located on a + * volume that supports sparse files and the file is a sparse file, the value obtained is the sparse size of the specified file. + * + * This method uses Win32 DLL native method {@link #GetCompressedFileSizeW(WString, IntByReference)}. + * + * @param path the path to the file + * @return an {@link OptionalLong} that contains the number of allocated bytes on disk for the file, or empty if the size is invalid + */ + public OptionalLong allocatedSizeInBytes(Path path) { + assert Files.isRegularFile(path) : path; + final WString fileName = new WString("\\\\?\\" + path); + final IntByReference lpFileSizeHigh = new IntByReference(); + + final int lpFileSizeLow = GetCompressedFileSizeW(fileName, lpFileSizeHigh); + if (lpFileSizeLow == INVALID_FILE_SIZE) { + final int err = Native.getLastError(); + if (err != NO_ERROR) { + logger.warn("error [{}] when executing native method GetCompressedFileSizeW for file [{}]", err, path); + return OptionalLong.empty(); + } + } + + // convert lpFileSizeLow to unsigned long and combine with signed/shifted lpFileSizeHigh + final long allocatedSize = (((long) lpFileSizeHigh.getValue()) << Integer.SIZE) | Integer.toUnsignedLong(lpFileSizeLow); + if (logger.isTraceEnabled()) { + logger.trace( + "executing native method GetCompressedFileSizeW returned [high={}, low={}, allocated={}] for file [{}]", + lpFileSizeHigh, + lpFileSizeLow, + allocatedSize, + path + ); + } + return OptionalLong.of(allocatedSize); + } +} diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/common/CacheFileTests.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/common/CacheFileTests.java index 0ac988590d383..17f7e643c7441 100644 --- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/common/CacheFileTests.java +++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/cache/common/CacheFileTests.java @@ -7,8 +7,11 @@ package org.elasticsearch.xpack.searchablesnapshots.cache.common; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.SetOnce; import org.elasticsearch.common.UUIDs; +import org.elasticsearch.common.filesystem.FileSystemNatives; import org.elasticsearch.common.util.concurrent.DeterministicTaskQueue; import org.elasticsearch.core.PathUtils; import org.elasticsearch.core.PathUtilsForTesting; @@ -20,16 +23,19 @@ import org.hamcrest.Matcher; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.FileSystem; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Objects; +import java.util.OptionalLong; import java.util.Set; import java.util.SortedSet; import java.util.concurrent.ExecutionException; @@ -38,13 +44,16 @@ import static org.elasticsearch.xpack.searchablesnapshots.cache.common.TestUtils.randomPopulateAndReads; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.sameInstance; +@LuceneTestCase.SuppressFileSystems("DisableFsyncFS") // required by {@link testCacheFileCreatedAsSparseFile()} public class CacheFileTests extends ESTestCase { private static final CacheFile.ModificationListener NOOP = new CacheFile.ModificationListener() { @@ -57,7 +66,7 @@ public void onCacheFileDelete(CacheFile cacheFile) {} private static final CacheKey CACHE_KEY = new CacheKey("_snap_uuid", "_snap_index", new ShardId("_name", "_uuid", 0), "_filename"); - public void testGetCacheKey() throws Exception { + public void testGetCacheKey() { final Path file = createTempDir().resolve("file.new"); final CacheKey cacheKey = new CacheKey( UUIDs.randomBase64UUID(random()), @@ -380,6 +389,54 @@ public void testFSyncFailure() throws Exception { } } + public void testCacheFileCreatedAsSparseFile() throws Exception { + assumeTrue("This test uses a native method implemented only for Windows", Constants.WINDOWS); + final long oneMb = 1 << 20; + + final Path file = createTempDir().resolve(UUIDs.randomBase64UUID(random())); + final CacheFile cacheFile = new CacheFile( + new CacheKey("_snap_uuid", "_snap_name", new ShardId("_name", "_uid", 0), "_filename"), + oneMb, + file, + NOOP + ); + assertFalse(Files.exists(file)); + + final TestEvictionListener listener = new TestEvictionListener(); + cacheFile.acquire(listener); + try { + final FileChannel fileChannel = cacheFile.getChannel(); + assertTrue(Files.exists(file)); + + OptionalLong sizeOnDisk = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(sizeOnDisk.isPresent()); + assertThat(sizeOnDisk.getAsLong(), equalTo(0L)); + + // write 1 byte at the last position in the cache file. + // For non sparse files, Windows would allocate the full file on disk in order to write a single byte at the end, + // making the next assertion fails. + fill(fileChannel, Math.toIntExact(cacheFile.getLength() - 1L), Math.toIntExact(cacheFile.getLength())); + fileChannel.force(false); + + sizeOnDisk = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(sizeOnDisk.isPresent()); + assertThat("Cache file should be sparse and not fully allocated on disk", sizeOnDisk.getAsLong(), lessThan(oneMb)); + + fill(fileChannel, 0, Math.toIntExact(cacheFile.getLength())); + fileChannel.force(false); + + sizeOnDisk = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(sizeOnDisk.isPresent()); + assertThat( + "Cache file should be fully allocated on disk (maybe more given cluster/block size)", + sizeOnDisk.getAsLong(), + greaterThanOrEqualTo(oneMb) + ); + } finally { + cacheFile.release(listener); + } + } + static class TestEvictionListener implements EvictionListener { private final SetOnce evicted = new SetOnce<>(); @@ -440,4 +497,24 @@ private static FSyncTrackingFileSystemProvider setupFSyncCountingFileSystem() { PathUtilsForTesting.installMock(provider.getFileSystem(null)); return provider; } + + private static void fill(FileChannel fileChannel, int from, int to) { + final byte[] buffer = new byte[Math.min(Math.max(0, to - from), 1024)]; + Arrays.fill(buffer, (byte) 0xff); + assert fileChannel.isOpen(); + + try { + int written = 0; + int remaining = to - from; + while (remaining > 0) { + final int len = Math.min(remaining, buffer.length); + fileChannel.write(ByteBuffer.wrap(buffer, 0, len), from + written); + remaining -= len; + written += len; + } + assert written == to - from; + } catch (IOException e) { + throw new AssertionError(e); + } + } }