Skip to content

Commit

Permalink
Disallowing compression level for lz4 and best_compression codec
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Jul 18, 2023
1 parent ca74aac commit 0820f5f
Show file tree
Hide file tree
Showing 5 changed files with 273 additions and 10 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Added
- Add server version as REST response header [#6583](https://github.com/opensearch-project/OpenSearch/issues/6583)
- Start replication checkpointTimers on primary before segments upload to remote store. ([#8221]()https://github.com/opensearch-project/OpenSearch/pull/8221)

- Disallowing compression level to be set for default and best_compression index codecs ([#8737]()https://github.com/opensearch-project/OpenSearch/pull/8737)
### Dependencies
- Bump `org.apache.logging.log4j:log4j-core` from 2.17.1 to 2.20.0 ([#8307](https://github.com/opensearch-project/OpenSearch/pull/8307))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.apache.logging.log4j.core.util.Throwables;
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.Randomness;
import org.opensearch.common.settings.Settings;
import org.opensearch.test.OpenSearchIntegTestCase;

import java.util.List;
import java.util.concurrent.ExecutionException;

import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;

@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST)
public class CodecCompressionLevelIT extends OpenSearchIntegTestCase {

List<String> luceneCodecs = List.of("default", "best_compression");
List<String> ZStandardCodecs = List.of("zstd", "zstd_no_dict");

public void testLuceneCodecsCreateIndexWithCompressionLevel() {

internalCluster().ensureAtLeastNumDataNodes(1);
final String index = "test-index";

// creating index
assertThrows(
IllegalArgumentException.class,
() -> createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", getRandomCodec(luceneCodecs))
.put("index.codec.compression_level", randomIntBetween(1, 6))
.build()
)
);

createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", getRandomCodec(luceneCodecs))
.build()
);
ensureGreen(index);
}

public void testZStandardCodecsCreateIndexWithCompressionLevel() {

internalCluster().ensureAtLeastNumDataNodes(1);
final String index = "test-index";

// creating index
createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", getRandomCodec(ZStandardCodecs))
.put("index.codec.compression_level", randomIntBetween(1, 6))
.build()
);

ensureGreen(index);
}

public void testZStandardToLuceneCodecsWithCompressionLevel() throws ExecutionException, InterruptedException {

internalCluster().ensureAtLeastNumDataNodes(1);
final String index = "test-index";

// creating index
createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", getRandomCodec(ZStandardCodecs))
.put("index.codec.compression_level", randomIntBetween(1, 6))
.build()
);
ensureGreen(index);

assertAcked(client().admin().indices().prepareClose(index));

Throwable executionException = expectThrows(
ExecutionException.class,
() -> client().admin()
.indices()
.updateSettings(
new UpdateSettingsRequest(index).settings(Settings.builder().put("index.codec", getRandomCodec(luceneCodecs)))
)
.get()
);

Throwable rootCause = Throwables.getRootCause(executionException);
assertEquals(IllegalArgumentException.class, rootCause.getClass());
assertTrue(rootCause.getMessage().startsWith("Compression level cannot be set"));

assertAcked(
client().admin()
.indices()
.updateSettings(
new UpdateSettingsRequest(index).settings(
Settings.builder()
.put("index.codec", getRandomCodec(luceneCodecs))
.put("index.codec.compression_level", (String) null)
)
)
.get()
);

assertAcked(client().admin().indices().prepareOpen(index));
ensureGreen(index);
}

public void testLuceneToZStandardCodecsWithCompressionLevel() throws ExecutionException, InterruptedException {

internalCluster().ensureAtLeastNumDataNodes(1);
final String index = "test-index";

// creating index
createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", getRandomCodec(luceneCodecs))
.build()
);
ensureGreen(index);

assertAcked(client().admin().indices().prepareClose(index));

Throwable executionException = expectThrows(
ExecutionException.class,
() -> client().admin()
.indices()
.updateSettings(
new UpdateSettingsRequest(index).settings(
Settings.builder()
.put("index.codec", getRandomCodec(luceneCodecs))
.put("index.codec.compression_level", randomIntBetween(1, 6))
)
)
.get()
);

Throwable rootCause = Throwables.getRootCause(executionException);
assertEquals(IllegalArgumentException.class, rootCause.getClass());
assertTrue(rootCause.getMessage().startsWith("Compression level cannot be set"));

assertAcked(
client().admin()
.indices()
.updateSettings(
new UpdateSettingsRequest(index).settings(
Settings.builder()
.put("index.codec", getRandomCodec(ZStandardCodecs))
.put("index.codec.compression_level", randomIntBetween(1, 6))
)
)
.get()
);

assertAcked(client().admin().indices().prepareOpen(index));
ensureGreen(index);
}

private String getRandomCodec(List<String> codecList) {
return codecList.get(Randomness.get().nextInt(codecList.size()));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,15 @@
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.MapBuilder;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec;
import org.opensearch.index.codec.customcodecs.ZstdCodec;
import org.opensearch.index.codec.customcodecs.ZstdNoDictCodec;
import org.opensearch.index.mapper.MapperService;

import java.util.Map;

import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_SETTING;

/**
* Since Lucene 4.0 low level index segments are read and written through a
Expand All @@ -71,7 +73,11 @@ public class CodecService {
public CodecService(@Nullable MapperService mapperService, IndexSettings indexSettings, Logger logger) {
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
assert null != indexSettings;
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
String codecName = indexSettings.getValue(INDEX_CODEC_SETTING);
int compressionLevel = Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL;
if (isZStandardCodec(codecName)) {
compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
}
if (mapperService == null) {
codecs.put(DEFAULT_CODEC, new Lucene95Codec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION));
Expand Down Expand Up @@ -104,4 +110,9 @@ public Codec codec(String name) {
public String[] availableCodecs() {
return codecs.keySet().toArray(new String[0]);
}

public static boolean isZStandardCodec(String codec) {
return codec.equals(ZSTD_CODEC) || codec.equals(ZSTD_NO_DICT_CODEC);
}

}
42 changes: 37 additions & 5 deletions server/src/main/java/org/opensearch/index/engine/EngineConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,13 @@
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.BooleanSupplier;
import java.util.function.LongSupplier;
import java.util.function.Supplier;

import static org.opensearch.index.codec.CodecService.isZStandardCodec;

/**
* Holds all the configuration that is used to create an {@link Engine}.
* Once {@link Engine} has been created with this object, changes to this
Expand Down Expand Up @@ -148,13 +151,42 @@ public Supplier<RetentionLeases> retentionLeasesSupplier() {
* Compression Level gives a trade-off between compression ratio and speed. The higher compression level results in higher compression ratio but slower compression and decompression speeds.
* This setting is <b>not</b> realtime updateable.
*/
public static final Setting<Integer> INDEX_CODEC_COMPRESSION_LEVEL_SETTING = Setting.intSetting(

public static final Setting<Integer> INDEX_CODEC_COMPRESSION_LEVEL_SETTING = new Setting<>(
"index.codec.compression_level",
3,
1,
6,
Integer.toString(3),
new Setting.IntegerParser(1, 6, "index.codec.compression_level", false),
Property.IndexScope
);
) {
@Override
public Set<SettingDependency> getSettingsDependencies(String key) {
return Set.of(new SettingDependency() {
@Override
public Setting<String> getSetting() {
return INDEX_CODEC_SETTING;
}

@Override
public void validate(String key, Object value, Object dependency) {
if (!(dependency instanceof String)) {
throw new IllegalArgumentException("Codec should be of string type.");
}
doValidateCodecSettings((String) dependency);
}
});
}
};

private static void doValidateCodecSettings(final String codec) {
if (!isZStandardCodec(codec)) {
throw new IllegalArgumentException(
"Compression level cannot be set for the "
+ codec
+ " codec. Compression level settings is only applicable for zstd and zstd_no_dict codecs."
);
}

}

/**
* Configures an index to optimize documents with auto generated ids for append only. If this setting is updated from <code>false</code>
Expand Down
41 changes: 38 additions & 3 deletions server/src/test/java/org/opensearch/index/codec/CodecTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,15 @@
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs;
import org.opensearch.common.Randomness;
import org.opensearch.common.settings.IndexScopedSettings;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec;
import org.opensearch.index.codec.customcodecs.Lucene95CustomStoredFieldsFormat;
import org.opensearch.index.engine.EngineConfig;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.similarity.SimilarityService;
import org.opensearch.indices.mapper.MapperRegistry;
Expand All @@ -58,12 +61,16 @@

import java.io.IOException;
import java.util.Collections;
import java.util.List;

import static org.hamcrest.Matchers.instanceOf;

@SuppressCodecs("*") // we test against default codec so never get a random one here!
public class CodecTests extends OpenSearchTestCase {

List<String> luceneCodecs = List.of("default", "best_compression");
List<String> ZStandardCodecs = List.of("zstd", "zstd_no_dict");

public void testResolveDefaultCodecs() throws Exception {
CodecService codecService = createCodecService(false);
assertThat(codecService.codec("default"), instanceOf(PerFieldMappingPostingFormatCodec.class));
Expand Down Expand Up @@ -96,20 +103,43 @@ public void testZstdNoDict() throws Exception {

public void testZstdWithCompressionLevel() throws Exception {
int randomCompressionLevel = randomIntBetween(1, 6);
Codec codec = createCodecService(randomCompressionLevel).codec("zstd");
Codec codec = createCodecService(randomCompressionLevel, "zstd").codec("zstd");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel());
}

public void testZstdNoDictWithCompressionLevel() throws Exception {
int randomCompressionLevel = randomIntBetween(1, 6);
Codec codec = createCodecService(randomCompressionLevel).codec("zstd_no_dict");
Codec codec = createCodecService(randomCompressionLevel, "zstd_no_dict").codec("zstd_no_dict");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel());
}

public void testBestCompressionWithCompressionLevel() {
final Settings zstdSettings = Settings.builder()
.put(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6))
.put(EngineConfig.INDEX_CODEC_SETTING.getKey(), getRandomCodec(ZStandardCodecs))
.build();

// able to validate zstd
final IndexScopedSettings zstdIndexScopedSettings = new IndexScopedSettings(
zstdSettings,
IndexScopedSettings.BUILT_IN_INDEX_SETTINGS
);
zstdIndexScopedSettings.validate(zstdSettings, true);

final Settings settings = Settings.builder()
.put(EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6))
.put(EngineConfig.INDEX_CODEC_SETTING.getKey(), getRandomCodec(luceneCodecs))
.build();
final IndexScopedSettings indexScopedSettings = new IndexScopedSettings(settings, IndexScopedSettings.BUILT_IN_INDEX_SETTINGS);

final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> indexScopedSettings.validate(settings, true));
assertTrue(e.getMessage().startsWith("Compression level cannot be set"));
}

public void testDefaultMapperServiceNull() throws Exception {
Codec codec = createCodecService(true).codec("default");
assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec);
Expand Down Expand Up @@ -165,9 +195,10 @@ private CodecService createCodecService(boolean isMapperServiceNull) throws IOEx
return buildCodecService(nodeSettings);
}

private CodecService createCodecService(int randomCompressionLevel) throws IOException {
private CodecService createCodecService(int randomCompressionLevel, String codec) throws IOException {
Settings nodeSettings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put("index.codec", codec)
.put("index.codec.compression_level", randomCompressionLevel)
.build();
return buildCodecService(nodeSettings);
Expand Down Expand Up @@ -207,4 +238,8 @@ private SegmentReader getSegmentReader(Codec codec) throws IOException {
return sr;
}

private String getRandomCodec(List<String> codecList) {
return codecList.get(Randomness.get().nextInt(codecList.size()));
}

}

0 comments on commit 0820f5f

Please sign in to comment.