From 976048d5d630b7adc53f54f2b27558d3a38b1f65 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 29 Jun 2023 06:23:14 +0530 Subject: [PATCH] Moving zstd out of sandbox (#7908) * Adding zstd module to source Signed-off-by: Sarthak Aggarwal * Removing zstd module from sandbox Signed-off-by: Sarthak Aggarwal * Added tests and refactoring Signed-off-by: Sarthak Aggarwal * Fixing gradle issues Signed-off-by: Sarthak Aggarwal * flaky test Signed-off-by: Sarthak Aggarwal * fixing precommit failure Signed-off-by: Sarthak Aggarwal * Incorporate review comments and fixed precommit failures Signed-off-by: Sarthak Aggarwal * Incorporating review comments Signed-off-by: Sarthak Aggarwal * Incorporating review comments Signed-off-by: Sarthak Aggarwal * Adding Integ tests Signed-off-by: Sarthak Aggarwal * Incorporating review comments Signed-off-by: Sarthak Aggarwal --------- Signed-off-by: Sarthak Aggarwal Signed-off-by: Daniel (dB.) Doubrovkine Co-authored-by: Daniel (dB.) Doubrovkine --- CHANGELOG.md | 1 + .../index/codec/MultiCodecReindexIT.java | 189 ++++++++++++++++++ sandbox/plugins/custom-codecs/build.gradle | 28 --- .../codec/customcodecs/CustomCodecPlugin.java | 26 --- .../plugin-metadata/plugin-security.policy | 11 - .../index/codec/MultiCodecMergeIT.java | 175 ++++++++++++++++ .../opensearch/index/codec/CodecService.java | 8 + .../customcodecs/Lucene95CustomCodec.java | 27 ++- .../Lucene95CustomStoredFieldsFormat.java | 13 +- .../index/codec/customcodecs/ZstdCodec.java | 7 + .../customcodecs/ZstdCompressionMode.java | 5 +- .../codec/customcodecs/ZstdNoDictCodec.java | 9 +- .../ZstdNoDictCompressionMode.java | 5 +- .../codec/customcodecs/package-info.java | 0 .../opensearch/index/engine/EngineConfig.java | 4 +- .../services/org.apache.lucene.codecs.Codec | 0 .../opensearch/index/codec/CodecTests.java | 81 ++++++-- .../customcodecs/AbstractCompressorTests.java | 10 +- ...Lucene95CustomStoredFieldsFormatTests.java | 27 +++ .../customcodecs/ZstdCompressorTests.java | 0 .../ZstdNoDictCompressorTests.java | 0 21 files changed, 524 insertions(+), 102 deletions(-) create mode 100644 modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java delete mode 100644 sandbox/plugins/custom-codecs/build.gradle delete mode 100644 sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java delete mode 100644 sandbox/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy create mode 100644 server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java (65%) rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java (96%) rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java (79%) rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java (99%) rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java (74%) rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java (99%) rename {sandbox/plugins/custom-codecs => server}/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java (100%) rename {sandbox/plugins/custom-codecs => server}/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec (100%) rename {sandbox/plugins/custom-codecs => server}/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java (100%) create mode 100644 server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java rename {sandbox/plugins/custom-codecs => server}/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java (100%) rename {sandbox/plugins/custom-codecs => server}/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ec159efafc9a..139807f0bffd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -138,6 +138,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Snapshot Interop] Add Changes in Create Snapshot Flow for remote store interoperability. ([#7118](https://github.com/opensearch-project/OpenSearch/pull/7118)) - Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496)) - Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652)) +- Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908)) ### Deprecated diff --git a/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java b/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java new file mode 100644 index 0000000000000..87f3c68d8af76 --- /dev/null +++ b/modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java @@ -0,0 +1,189 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.opensearch.action.admin.indices.flush.FlushResponse; +import org.opensearch.action.admin.indices.refresh.RefreshResponse; +import org.opensearch.action.admin.indices.segments.IndicesSegmentsRequest; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.action.support.ActiveShardCount; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.Segment; +import org.opensearch.index.reindex.BulkByScrollResponse; +import org.opensearch.index.reindex.ReindexAction; +import org.opensearch.index.reindex.ReindexRequestBuilder; +import org.opensearch.index.reindex.ReindexTestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.toList; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY_ALLOW_DELETE; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; + +public class MultiCodecReindexIT extends ReindexTestCase { + + public void testReindexingMultipleCodecs() throws InterruptedException, ExecutionException { + internalCluster().ensureAtLeastNumDataNodes(1); + Map codecMap = Map.of( + "best_compression", + "BEST_COMPRESSION", + "zstd_no_dict", + "ZSTD_NO_DICT", + "zstd", + "ZSTD", + "default", + "BEST_SPEED" + ); + + for (Map.Entry codec : codecMap.entrySet()) { + assertReindexingWithMultipleCodecs(codec.getKey(), codec.getValue(), codecMap); + } + + } + + private void assertReindexingWithMultipleCodecs(String destCodec, String destCodecMode, Map codecMap) + throws ExecutionException, InterruptedException { + + final String index = "test-index" + destCodec; + final String destIndex = "dest-index" + destCodec; + + // creating source index + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", "default") + .put("index.merge.policy.max_merged_segment", "1b") + .build() + ); + ensureGreen(index); + + final int nbDocs = randomIntBetween(2, 5); + + // indexing with all 4 codecs + for (Map.Entry codec : codecMap.entrySet()) { + useCodec(index, codec.getKey()); + ingestDocs(index, nbDocs); + } + + assertTrue( + getSegments(index).stream() + .flatMap(s -> s.getAttributes().values().stream()) + .collect(Collectors.toSet()) + .containsAll(codecMap.values()) + ); + + // creating destination index with destination codec + createIndex( + destIndex, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", destCodec) + .build() + ); + + BulkByScrollResponse bulkResponse = new ReindexRequestBuilder(client(), ReindexAction.INSTANCE).source(index) + .destination(destIndex) + .refresh(true) + .waitForActiveShards(ActiveShardCount.ONE) + .get(); + + assertEquals(codecMap.size() * nbDocs, bulkResponse.getCreated()); + assertEquals(codecMap.size() * nbDocs, bulkResponse.getTotal()); + assertEquals(0, bulkResponse.getDeleted()); + assertEquals(0, bulkResponse.getNoops()); + assertEquals(0, bulkResponse.getVersionConflicts()); + assertEquals(1, bulkResponse.getBatches()); + assertTrue(bulkResponse.getTook().getMillis() > 0); + assertEquals(0, bulkResponse.getBulkFailures().size()); + assertEquals(0, bulkResponse.getSearchFailures().size()); + assertTrue(getSegments(destIndex).stream().allMatch(segment -> segment.attributes.containsValue(destCodecMode))); + } + + private void useCodec(String index, String codec) throws ExecutionException, InterruptedException { + assertAcked(client().admin().indices().prepareClose(index)); + + assertAcked( + client().admin() + .indices() + .updateSettings(new UpdateSettingsRequest(index).settings(Settings.builder().put("index.codec", codec))) + .get() + ); + + assertAcked(client().admin().indices().prepareOpen(index)); + } + + private void flushAndRefreshIndex(String index) { + + // Request is not blocked + for (String blockSetting : Arrays.asList( + SETTING_BLOCKS_READ, + SETTING_BLOCKS_WRITE, + SETTING_READ_ONLY, + SETTING_BLOCKS_METADATA, + SETTING_READ_ONLY_ALLOW_DELETE + )) { + try { + enableIndexBlock(index, blockSetting); + // flush + FlushResponse flushResponse = client().admin().indices().prepareFlush(index).setForce(true).execute().actionGet(); + assertNoFailures(flushResponse); + + // refresh + RefreshResponse refreshResponse = client().admin().indices().prepareRefresh(index).execute().actionGet(); + assertNoFailures(refreshResponse); + } finally { + disableIndexBlock(index, blockSetting); + } + } + } + + private void ingestDocs(String index, int nbDocs) throws InterruptedException { + + indexRandom( + randomBoolean(), + false, + randomBoolean(), + IntStream.range(0, nbDocs) + .mapToObj(i -> client().prepareIndex(index).setId(UUID.randomUUID().toString()).setSource("num", i)) + .collect(toList()) + ); + flushAndRefreshIndex(index); + } + + private ArrayList getSegments(String index) { + + return new ArrayList<>( + client().admin() + .indices() + .segments(new IndicesSegmentsRequest(index)) + .actionGet() + .getIndices() + .get(index) + .getShards() + .get(0) + .getShards()[0].getSegments() + ); + } + +} diff --git a/sandbox/plugins/custom-codecs/build.gradle b/sandbox/plugins/custom-codecs/build.gradle deleted file mode 100644 index 2183df25044a4..0000000000000 --- a/sandbox/plugins/custom-codecs/build.gradle +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - * - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -apply plugin: 'opensearch.opensearchplugin' -apply plugin: 'opensearch.yaml-rest-test' - -opensearchplugin { - name 'custom-codecs' - description 'A plugin that implements custom compression codecs.' - classname 'org.opensearch.index.codec.customcodecs.CustomCodecPlugin' - licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt') - noticeFile rootProject.file('NOTICE.txt') -} - -dependencies { - api "com.github.luben:zstd-jni:${versions.zstd}" -} - -yamlRestTest.enabled = false; -testingConventions.enabled = false; diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java b/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java deleted file mode 100644 index 9d36184bf81af..0000000000000 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.codec.customcodecs; - -import org.opensearch.plugins.Plugin; -import org.opensearch.plugins.EnginePlugin; - -/** - * A plugin that implements custom codecs. Supports these codecs: - *
    - *
  • zstd - *
  • zstdnodict - *
- * - * @opensearch.internal - */ -public final class CustomCodecPlugin extends Plugin implements EnginePlugin { - /** Creates a new instance. */ - public CustomCodecPlugin() {} -} diff --git a/sandbox/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy b/sandbox/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy deleted file mode 100644 index 8161010cfa897..0000000000000 --- a/sandbox/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy +++ /dev/null @@ -1,11 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -grant codeBase "${codebase.zstd-jni}" { - permission java.lang.RuntimePermission "loadLibrary.*"; -}; diff --git a/server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java b/server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java new file mode 100644 index 0000000000000..2866292e5e2e0 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java @@ -0,0 +1,175 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.opensearch.action.admin.indices.flush.FlushResponse; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; +import org.opensearch.action.admin.indices.refresh.RefreshResponse; +import org.opensearch.action.admin.indices.segments.IndicesSegmentsRequest; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.Segment; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.toList; +import static org.hamcrest.Matchers.is; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY_ALLOW_DELETE; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST) +public class MultiCodecMergeIT extends OpenSearchIntegTestCase { + + public void testForceMergeMultipleCodecs() throws ExecutionException, InterruptedException { + + Map codecMap = Map.of( + "best_compression", + "BEST_COMPRESSION", + "zstd_no_dict", + "ZSTD_NO_DICT", + "zstd", + "ZSTD", + "default", + "BEST_SPEED" + ); + + for (Map.Entry codec : codecMap.entrySet()) { + forceMergeMultipleCodecs(codec.getKey(), codec.getValue(), codecMap); + } + + } + + private void forceMergeMultipleCodecs(String finalCodec, String finalCodecMode, Map codecMap) throws ExecutionException, + InterruptedException { + + internalCluster().ensureAtLeastNumDataNodes(1); + final String index = "test-index" + finalCodec; + + // creating index + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.codec", "default") + .put("index.merge.policy.max_merged_segment", "1b") + .build() + ); + ensureGreen(index); + // ingesting and asserting segment codec mode for all four codecs + for (Map.Entry codec : codecMap.entrySet()) { + useCodec(index, codec.getKey()); + ingestDocs(index); + } + + assertTrue( + getSegments(index).stream() + .flatMap(s -> s.getAttributes().values().stream()) + .collect(Collectors.toSet()) + .containsAll(codecMap.values()) + ); + + // force merge into final codec + useCodec(index, finalCodec); + flushAndRefreshIndex(index); + final ForceMergeResponse forceMergeResponse = client().admin().indices().prepareForceMerge(index).setMaxNumSegments(1).get(); + + assertThat(forceMergeResponse.getFailedShards(), is(0)); + assertThat(forceMergeResponse.getSuccessfulShards(), is(1)); + + flushAndRefreshIndex(index); + + List segments = getSegments(index).stream().filter(Segment::isSearch).collect(Collectors.toList()); + assertEquals(1, segments.size()); + assertTrue(segments.stream().findFirst().get().attributes.containsValue(finalCodecMode)); + } + + private void useCodec(String index, String codec) throws ExecutionException, InterruptedException { + assertAcked(client().admin().indices().prepareClose(index)); + + assertAcked( + client().admin() + .indices() + .updateSettings(new UpdateSettingsRequest(index).settings(Settings.builder().put("index.codec", codec))) + .get() + ); + + assertAcked(client().admin().indices().prepareOpen(index)); + } + + private void ingestDocs(String index) throws InterruptedException { + ingest(index); + flushAndRefreshIndex(index); + } + + private ArrayList getSegments(String index) { + + return new ArrayList<>( + client().admin() + .indices() + .segments(new IndicesSegmentsRequest(index)) + .actionGet() + .getIndices() + .get(index) + .getShards() + .get(0) + .getShards()[0].getSegments() + ); + } + + private void ingest(String index) throws InterruptedException { + + final int nbDocs = randomIntBetween(1, 5); + indexRandom( + randomBoolean(), + false, + randomBoolean(), + IntStream.range(0, nbDocs) + .mapToObj(i -> client().prepareIndex(index).setId(UUID.randomUUID().toString()).setSource("num", i)) + .collect(toList()) + ); + } + + private void flushAndRefreshIndex(String index) { + + // Request is not blocked + for (String blockSetting : Arrays.asList( + SETTING_BLOCKS_READ, + SETTING_BLOCKS_WRITE, + SETTING_READ_ONLY, + SETTING_BLOCKS_METADATA, + SETTING_READ_ONLY_ALLOW_DELETE + )) { + try { + enableIndexBlock(index, blockSetting); + FlushResponse flushResponse = client().admin().indices().prepareFlush(index).setForce(true).execute().actionGet(); + assertNoFailures(flushResponse); + RefreshResponse response = client().admin().indices().prepareRefresh(index).execute().actionGet(); + assertNoFailures(response); + } finally { + disableIndexBlock(index, blockSetting); + } + } + } + +} diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index e4899c02d37e8..b6dac7bd1596c 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -38,6 +38,8 @@ import org.apache.lucene.codecs.lucene95.Lucene95Codec.Mode; import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; +import org.opensearch.index.codec.customcodecs.ZstdCodec; +import org.opensearch.index.codec.customcodecs.ZstdNoDictCodec; import org.opensearch.index.mapper.MapperService; import java.util.Map; @@ -58,15 +60,21 @@ public class CodecService { public static final String BEST_COMPRESSION_CODEC = "best_compression"; /** the raw unfiltered lucene default. useful for testing */ public static final String LUCENE_DEFAULT_CODEC = "lucene_default"; + public static final String ZSTD_CODEC = "zstd"; + public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict"; public CodecService(@Nullable MapperService mapperService, Logger logger) { final MapBuilder codecs = MapBuilder.newMapBuilder(); if (mapperService == null) { codecs.put(DEFAULT_CODEC, new Lucene95Codec()); codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION)); + codecs.put(ZSTD_CODEC, new ZstdCodec()); + codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec()); } else { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); + codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger)); + codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java similarity index 65% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java index ad9e5cd3374fa..3c570f9d0566c 100644 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java @@ -8,19 +8,27 @@ package org.opensearch.index.codec.customcodecs; -import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec; +import org.opensearch.index.mapper.MapperService; -import java.util.Locale; - -abstract class Lucene95CustomCodec extends FilterCodec { +/** + * + * Extends {@link FilterCodec} to reuse the functionality of Lucene Codec. + * Supports two modes zstd and zstd_no_dict. + * + * @opensearch.internal + */ +public abstract class Lucene95CustomCodec extends FilterCodec { public static final int DEFAULT_COMPRESSION_LEVEL = 6; /** Each mode represents a compression algorithm. */ public enum Mode { ZSTD, - ZSTDNODICT + ZSTD_NO_DICT } private final StoredFieldsFormat storedFieldsFormat; @@ -40,10 +48,15 @@ public Lucene95CustomCodec(Mode mode) { * the other compression codecs: default, lucene_default, and best_compression. * * @param mode The compression codec (ZSTD or ZSTDNODICT). - * @parama compressionLevel The compression level. + * @param compressionLevel The compression level. */ public Lucene95CustomCodec(Mode mode, int compressionLevel) { - super(mode.name().toLowerCase(Locale.ROOT), new Lucene95Codec()); + super("Lucene95CustomCodec", new Lucene95Codec()); + this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); + } + + public Lucene95CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) { + super("Lucene95CustomCodec", new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger)); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java similarity index 96% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java index 2bfec2ef171d4..f70306afc8562 100644 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java @@ -8,8 +8,6 @@ package org.opensearch.index.codec.customcodecs; -import java.io.IOException; -import java.util.Objects; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; @@ -20,6 +18,9 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import java.io.IOException; +import java.util.Objects; + /** Stored field format used by pluggable codec */ public class Lucene95CustomStoredFieldsFormat extends StoredFieldsFormat { @@ -95,7 +96,7 @@ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOCo return impl(mode).fieldsWriter(directory, si, context); } - private StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { + StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { switch (mode) { case ZSTD: return new Lucene90CompressingStoredFieldsFormat( @@ -105,7 +106,7 @@ private StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { ZSTD_MAX_DOCS_PER_BLOCK, ZSTD_BLOCK_SHIFT ); - case ZSTDNODICT: + case ZSTD_NO_DICT: return new Lucene90CompressingStoredFieldsFormat( "CustomStoredFieldsZstdNoDict", zstdNoDictCompressionMode, @@ -117,4 +118,8 @@ private StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { throw new AssertionError(); } } + + Lucene95CustomCodec.Mode getMode() { + return mode; + } } diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java similarity index 79% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java index 2b09540d8037d..68da782421e6e 100644 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java @@ -8,6 +8,9 @@ package org.opensearch.index.codec.customcodecs; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.mapper.MapperService; + /** * ZstdCodec provides ZSTD compressor using the zstd-jni library. */ @@ -29,6 +32,10 @@ public ZstdCodec(int compressionLevel) { super(Mode.ZSTD, compressionLevel); } + public ZstdCodec(MapperService mapperService, Logger logger) { + super(Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL, mapperService, logger); + } + /** The name for this codec. */ @Override public String toString() { diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java similarity index 99% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java index 5b8f1ffcc9569..7057dac3d6bd2 100644 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCompressionMode.java @@ -13,16 +13,17 @@ import com.github.luben.zstd.ZstdDecompressCtx; import com.github.luben.zstd.ZstdDictCompress; import com.github.luben.zstd.ZstdDictDecompress; -import java.io.IOException; import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.codecs.compressing.Compressor; import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import java.io.IOException; + /** Zstandard Compression Mode */ public class ZstdCompressionMode extends CompressionMode { diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java similarity index 74% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java index 4ed6ba57545d0..26620473ec116 100644 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java @@ -8,6 +8,9 @@ package org.opensearch.index.codec.customcodecs; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.mapper.MapperService; + /** * ZstdNoDictCodec provides ZSTD compressor without a dictionary support. */ @@ -26,7 +29,11 @@ public ZstdNoDictCodec() { * @param compressionLevel The compression level. */ public ZstdNoDictCodec(int compressionLevel) { - super(Mode.ZSTDNODICT, compressionLevel); + super(Mode.ZSTD_NO_DICT, compressionLevel); + } + + public ZstdNoDictCodec(MapperService mapperService, Logger logger) { + super(Mode.ZSTD_NO_DICT, DEFAULT_COMPRESSION_LEVEL, mapperService, logger); } /** The name for this codec. */ diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java similarity index 99% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java index 6cfd85b053190..7a1d661550768 100644 --- a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressionMode.java @@ -9,16 +9,17 @@ package org.opensearch.index.codec.customcodecs; import com.github.luben.zstd.Zstd; -import java.io.IOException; import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.codecs.compressing.Compressor; import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import java.io.IOException; + /** ZSTD Compression Mode (without a dictionary support). */ public class ZstdNoDictCompressionMode extends CompressionMode { diff --git a/sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java similarity index 100% rename from sandbox/plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java rename to server/src/main/java/org/opensearch/index/codec/customcodecs/package-info.java diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 338a541af387a..7419cf1dadea6 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -129,12 +129,14 @@ public Supplier retentionLeasesSupplier() { switch (s) { case "default": case "best_compression": + case "zstd": + case "zstd_no_dict": case "lucene_default": return s; default: if (Codec.availableCodecs().contains(s) == false) { // we don't error message the not officially supported ones throw new IllegalArgumentException( - "unknown value for [index.codec] must be one of [default, best_compression] but was: " + s + "unknown value for [index.codec] must be one of [default, best_compression, zstd, zstd_no_dict] but was: " + s ); } return s; diff --git a/sandbox/plugins/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec similarity index 100% rename from sandbox/plugins/custom-codecs/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec rename to server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index bc50525412954..016e785f8da13 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -47,6 +47,8 @@ import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec; +import org.opensearch.index.codec.customcodecs.Lucene95CustomStoredFieldsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.similarity.SimilarityService; import org.opensearch.indices.mapper.MapperRegistry; @@ -63,40 +65,75 @@ public class CodecTests extends OpenSearchTestCase { public void testResolveDefaultCodecs() throws Exception { - CodecService codecService = createCodecService(); + CodecService codecService = createCodecService(false); assertThat(codecService.codec("default"), instanceOf(PerFieldMappingPostingFormatCodec.class)); assertThat(codecService.codec("default"), instanceOf(Lucene95Codec.class)); } public void testDefault() throws Exception { - Codec codec = createCodecService().codec("default"); + Codec codec = createCodecService(false).codec("default"); assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec); } public void testBestCompression() throws Exception { - Codec codec = createCodecService().codec("best_compression"); + Codec codec = createCodecService(false).codec("best_compression"); assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_COMPRESSION, codec); } + public void testZstd() throws Exception { + Codec codec = createCodecService(false).codec("zstd"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + } + + public void testZstdNoDict() throws Exception { + Codec codec = createCodecService(false).codec("zstd_no_dict"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + } + + public void testDefaultMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("default"); + assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec); + } + + public void testBestCompressionMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("best_compression"); + assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_COMPRESSION, codec); + } + + public void testZstdMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("zstd"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + } + + public void testZstdNoDictMapperServiceNull() throws Exception { + Codec codec = createCodecService(true).codec("zstd_no_dict"); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + } + + public void testExceptionCodecNull() { + assertThrows(IllegalArgumentException.class, () -> createCodecService(true).codec(null)); + } + // write some docs with it, inspect .si to see this was the used compression private void assertStoredFieldsCompressionEquals(Lucene95Codec.Mode expected, Codec actual) throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(null); - iwc.setCodec(actual); - IndexWriter iw = new IndexWriter(dir, iwc); - iw.addDocument(new Document()); - iw.commit(); - iw.close(); - DirectoryReader ir = DirectoryReader.open(dir); - SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); + SegmentReader sr = getSegmentReader(actual); String v = sr.getSegmentInfo().info.getAttribute(Lucene90StoredFieldsFormat.MODE_KEY); assertNotNull(v); assertEquals(expected, Lucene95Codec.Mode.valueOf(v)); - ir.close(); - dir.close(); } - private CodecService createCodecService() throws IOException { + private void assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode expected, Codec actual) throws Exception { + SegmentReader sr = getSegmentReader(actual); + String v = sr.getSegmentInfo().info.getAttribute(Lucene95CustomStoredFieldsFormat.MODE_KEY); + assertNotNull(v); + assertEquals(expected, Lucene95CustomCodec.Mode.valueOf(v)); + } + + private CodecService createCodecService(boolean isMapperServiceNull) throws IOException { + + if (isMapperServiceNull) { + return new CodecService(null, LogManager.getLogger("test")); + } Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); SimilarityService similarityService = new SimilarityService(settings, null, Collections.emptyMap()); @@ -115,4 +152,18 @@ private CodecService createCodecService() throws IOException { return new CodecService(service, LogManager.getLogger("test")); } + private SegmentReader getSegmentReader(Codec codec) throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(null); + iwc.setCodec(codec); + IndexWriter iw = new IndexWriter(dir, iwc); + iw.addDocument(new Document()); + iw.commit(); + iw.close(); + DirectoryReader ir = DirectoryReader.open(dir); + SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); + ir.close(); + dir.close(); + return sr; + } } diff --git a/sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java b/server/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java similarity index 100% rename from sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java rename to server/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java index fcfb06ca6b050..cc794eb2c48f1 100644 --- a/sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java +++ b/server/src/test/java/org/opensearch/index/codec/customcodecs/AbstractCompressorTests.java @@ -8,21 +8,21 @@ package org.opensearch.index.codec.customcodecs; -import org.apache.lucene.tests.util.LineFileDocs; -import org.apache.lucene.tests.util.TestUtil; -import org.opensearch.test.OpenSearchTestCase; import org.apache.lucene.codecs.compressing.Compressor; import org.apache.lucene.codecs.compressing.Decompressor; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.tests.util.LineFileDocs; +import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; +import org.opensearch.test.OpenSearchTestCase; -import java.util.List; -import java.nio.ByteBuffer; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.List; import java.util.Random; /** diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java b/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java new file mode 100644 index 0000000000000..4f23450ce0b39 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.customcodecs; + +import org.opensearch.test.OpenSearchTestCase; + +public class Lucene95CustomStoredFieldsFormatTests extends OpenSearchTestCase { + + public void testDefaultLucene95CustomCodecMode() { + Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat(); + assertEquals(Lucene95CustomCodec.Mode.ZSTD, lucene95CustomStoredFieldsFormat.getMode()); + } + + public void testZstdNoDictLucene95CustomCodecMode() { + Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat( + Lucene95CustomCodec.Mode.ZSTD_NO_DICT + ); + assertEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, lucene95CustomStoredFieldsFormat.getMode()); + } + +} diff --git a/sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java b/server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java similarity index 100% rename from sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java rename to server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdCompressorTests.java diff --git a/sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java b/server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java similarity index 100% rename from sandbox/plugins/custom-codecs/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java rename to server/src/test/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCompressorTests.java