Skip to content

Commit

Permalink
Disallow invalid characters for physical file name to be included wit…
Browse files Browse the repository at this point in the history
…hin vector field name. (#1936)

* Block a vector field to have invalid characters for a physical file name.

Signed-off-by: Dooyong Kim <[email protected]>

* Block a vector field to have invalid characters for a physical file name.

Signed-off-by: Dooyong Kim <[email protected]>

---------

Signed-off-by: Dooyong Kim <[email protected]>
Signed-off-by: Doo Yong Kim <[email protected]>
Co-authored-by: Dooyong Kim <[email protected]>
  • Loading branch information
0ctopus13prime and Dooyong Kim authored Aug 12, 2024
1 parent 5a5351f commit f5ba771
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Add script_fields context to KNNAllowlist [#1917] (https://github.com/opensearch-project/k-NN/pull/1917)
* Fix graph merge stats size calculation [#1844](https://github.com/opensearch-project/k-NN/pull/1844)
* Integrate Lucene Vector field with native engines to use KNNVectorFormat during segment creation [#1945](https://github.com/opensearch-project/k-NN/pull/1945)
* Disallow a vector field to have an invalid character for a physical file name. [#1936] (https://github.com/opensearch-project/k-NN/pull/1936)
### Infrastructure
### Documentation
### Maintenance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import lombok.extern.log4j.Log4j2;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
Expand All @@ -23,6 +25,7 @@
import org.opensearch.common.Explicit;
import org.opensearch.common.ValidationException;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.core.common.Strings;
import org.opensearch.core.xcontent.ToXContent;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
Expand Down Expand Up @@ -219,6 +222,8 @@ private void validateFlatMapper() {

@Override
public KNNVectorFieldMapper build(BuilderContext context) {
validateFullFieldName(context);

final MultiFields multiFieldsBuilder = this.multiFieldsBuilder.build(this, context);
final CopyTo copyToBuilder = copyTo.build();
final Explicit<Boolean> ignoreMalformed = ignoreMalformed(context);
Expand Down Expand Up @@ -413,6 +418,32 @@ private KNNEngine validateDimensions(final KNNMethodContext knnMethodContext, fi
}
return knnEngine;
}

/**
* Validate whether provided full field name contain any invalid characters for physical file name.
* At the moment, we use a field name as a part of file name while we throw an exception
* if a physical file name contains any invalid characters when creating snapshot.
* To prevent from this happening, we restrict vector field name and make sure generated file to have a valid name.
*
* @param context : Builder context to have field name info.
*/
private void validateFullFieldName(final BuilderContext context) {
final String fullFieldName = buildFullName(context);
for (char ch : fullFieldName.toCharArray()) {
if (Strings.INVALID_FILENAME_CHARS.contains(ch)) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"Vector field name must not include invalid characters of %s. "
+ "Provided field name=[%s] had a disallowed character [%c]",
Strings.INVALID_FILENAME_CHARS.stream().map(c -> "'" + c + "'").collect(Collectors.toList()),
fullFieldName,
ch
)
);
}
}
}
}

public static class TypeParser implements Mapper.TypeParser {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.opensearch.common.settings.IndexScopedSettings;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.common.Strings;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.ContentPath;
Expand Down Expand Up @@ -1171,6 +1172,23 @@ public void testBuilder_whenBinaryWithLegacyKNNEnabled_thenException() {
assertTrue(ex.getMessage(), ex.getMessage().contains("is not supported for"));
}

public void testBuild_whenInvalidCharsInFieldName_thenThrowException() {
for (char disallowChar : Strings.INVALID_FILENAME_CHARS) {
// When an invalid vector field name was given.
final String invalidVectorFieldName = "fieldname" + disallowChar;

// Prepare context.
Settings settings = Settings.builder().put(settings(CURRENT).build()).put(KNN_INDEX, true).build();
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(settings, new ContentPath());

// IllegalArgumentException should be thrown.
Exception e = assertThrows(IllegalArgumentException.class, () -> {
new KNNVectorFieldMapper.Builder(invalidVectorFieldName, null, CURRENT, null).build(builderContext);
});
assertTrue(e.getMessage(), e.getMessage().contains("Vector field name must not include"));
}
}

private LuceneFieldMapper.CreateLuceneFieldMapperInput.CreateLuceneFieldMapperInputBuilder createLuceneFieldMapperInputBuilder(
VectorDataType vectorDataType
) {
Expand Down

0 comments on commit f5ba771

Please sign in to comment.