From da7e70f7ebc95861bbc471961ee05088d237cb1d Mon Sep 17 00:00:00 2001 From: Vamsi Manohar Date: Tue, 12 Mar 2024 12:28:16 -0700 Subject: [PATCH] Handle ALTER Index Queries in SQL Plugin Signed-off-by: Vamsi Manohar --- build.gradle | 1 + .../src/main/antlr/FlintSparkSqlExtensions.g4 | 22 +- spark/src/main/antlr/SparkSqlBase.g4 | 2 + spark/src/main/antlr/SqlBaseLexer.g4 | 14 +- spark/src/main/antlr/SqlBaseParser.g4 | 3 +- .../sql/spark/dispatcher/IndexDMLHandler.java | 108 +- .../spark/dispatcher/RefreshQueryHandler.java | 19 +- .../dispatcher/SparkQueryDispatcher.java | 38 +- .../dispatcher/StreamingQueryHandler.java | 2 +- .../dispatcher/model/FlintIndexOptions.java | 39 + .../model/IndexQueryActionType.java | 3 +- .../dispatcher/model/IndexQueryDetails.java | 15 +- .../sql/spark/flint/FlintIndexMetadata.java | 38 +- .../spark/flint/FlintIndexMetadataReader.java | 23 - .../flint/FlintIndexMetadataReaderImpl.java | 33 - .../flint/FlintIndexMetadataService.java | 25 + .../flint/FlintIndexMetadataServiceImpl.java | 158 +++ .../sql/spark/flint/FlintIndexState.java | 10 +- .../spark/flint/operation/FlintIndexOp.java | 173 ++- .../flint/operation/FlintIndexOpAlter.java | 65 + .../flint/operation/FlintIndexOpCancel.java | 38 +- .../flint/operation/FlintIndexOpDelete.java | 39 - .../flint/operation/FlintIndexOpDrop.java | 54 + .../config/AsyncExecutorServiceModule.java | 9 +- .../sql/spark/utils/SQLQueryUtils.java | 53 +- .../AsyncQueryExecutorServiceSpec.java | 65 +- .../AsyncQueryGetResultSpecTest.java | 3 +- .../spark/asyncquery/IndexQuerySpecTest.java | 1070 ++++++++++++++--- .../asyncquery/model/MockFlintIndex.java | 72 ++ .../asyncquery/model/MockFlintSparkJob.java | 83 ++ .../spark/dispatcher/IndexDMLHandlerTest.java | 118 +- .../dispatcher/SparkQueryDispatcherTest.java | 28 +- .../FlintIndexMetadataReaderImplTest.java | 117 -- .../FlintIndexMetadataServiceImplTest.java | 190 +++ .../FlintIndexMetadataServiceSpecTest.java | 115 ++ .../spark/flint/FlintIndexMetadataTest.java | 85 -- .../spark/flint/IndexQueryDetailsTest.java | 3 +- .../flint/operation/FlintIndexOpTest.java | 137 ++- .../sql/spark/utils/SQLQueryUtilsTest.java | 40 +- .../opensearch/sql/spark/utils/TestUtils.java | 20 + ...logs_covering_corrupted_index_mapping.json | 33 + ...mydb_http_logs_covering_index_mapping.json | 39 + ...mydb_http_logs_skipping_index_mapping.json | 39 + .../flint_my_glue_mydb_mv_mapping.json | 33 + ...mys3_default_http_logs_skipping_index.json | 23 +- 45 files changed, 2538 insertions(+), 759 deletions(-) create mode 100644 spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java delete mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReader.java delete mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImpl.java create mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java create mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java create mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java delete mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDelete.java create mode 100644 spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java create mode 100644 spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java create mode 100644 spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java delete mode 100644 spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImplTest.java create mode 100644 spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java create mode 100644 spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceSpecTest.java delete mode 100644 spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataTest.java create mode 100644 spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json create mode 100644 spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json create mode 100644 spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json create mode 100644 spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json diff --git a/build.gradle b/build.gradle index ac85b2761f..3e180d4042 100644 --- a/build.gradle +++ b/build.gradle @@ -117,6 +117,7 @@ allprojects { configurations.all { resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib:1.9.10" resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib-common:1.9.10" + resolutionStrategy.force "net.bytebuddy:byte-buddy:1.14.9" } } diff --git a/spark/src/main/antlr/FlintSparkSqlExtensions.g4 b/spark/src/main/antlr/FlintSparkSqlExtensions.g4 index 219bbe782b..3c22becf54 100644 --- a/spark/src/main/antlr/FlintSparkSqlExtensions.g4 +++ b/spark/src/main/antlr/FlintSparkSqlExtensions.g4 @@ -26,6 +26,7 @@ skippingIndexStatement : createSkippingIndexStatement | refreshSkippingIndexStatement | describeSkippingIndexStatement + | alterSkippingIndexStatement | dropSkippingIndexStatement | vacuumSkippingIndexStatement ; @@ -46,6 +47,12 @@ describeSkippingIndexStatement : (DESC | DESCRIBE) SKIPPING INDEX ON tableName ; +alterSkippingIndexStatement + : ALTER SKIPPING INDEX + ON tableName + WITH LEFT_PAREN propertyList RIGHT_PAREN + ; + dropSkippingIndexStatement : DROP SKIPPING INDEX ON tableName ; @@ -59,6 +66,7 @@ coveringIndexStatement | refreshCoveringIndexStatement | showCoveringIndexStatement | describeCoveringIndexStatement + | alterCoveringIndexStatement | dropCoveringIndexStatement | vacuumCoveringIndexStatement ; @@ -83,6 +91,12 @@ describeCoveringIndexStatement : (DESC | DESCRIBE) INDEX indexName ON tableName ; +alterCoveringIndexStatement + : ALTER INDEX indexName + ON tableName + WITH LEFT_PAREN propertyList RIGHT_PAREN + ; + dropCoveringIndexStatement : DROP INDEX indexName ON tableName ; @@ -96,6 +110,7 @@ materializedViewStatement | refreshMaterializedViewStatement | showMaterializedViewStatement | describeMaterializedViewStatement + | alterMaterializedViewStatement | dropMaterializedViewStatement | vacuumMaterializedViewStatement ; @@ -118,6 +133,11 @@ describeMaterializedViewStatement : (DESC | DESCRIBE) MATERIALIZED VIEW mvName=multipartIdentifier ; +alterMaterializedViewStatement + : ALTER MATERIALIZED VIEW mvName=multipartIdentifier + WITH LEFT_PAREN propertyList RIGHT_PAREN + ; + dropMaterializedViewStatement : DROP MATERIALIZED VIEW mvName=multipartIdentifier ; @@ -163,7 +183,7 @@ indexColTypeList ; indexColType - : identifier skipType=(PARTITION | VALUE_SET | MIN_MAX) + : identifier skipType=(PARTITION | VALUE_SET | MIN_MAX | BLOOM_FILTER) (LEFT_PAREN skipParams RIGHT_PAREN)? ; diff --git a/spark/src/main/antlr/SparkSqlBase.g4 b/spark/src/main/antlr/SparkSqlBase.g4 index 01f45016d6..8f9ed570f8 100644 --- a/spark/src/main/antlr/SparkSqlBase.g4 +++ b/spark/src/main/antlr/SparkSqlBase.g4 @@ -139,6 +139,7 @@ nonReserved // Flint lexical tokens +BLOOM_FILTER: 'BLOOM_FILTER'; MIN_MAX: 'MIN_MAX'; SKIPPING: 'SKIPPING'; VALUE_SET: 'VALUE_SET'; @@ -155,6 +156,7 @@ DOT: '.'; AS: 'AS'; +ALTER: 'ALTER'; CREATE: 'CREATE'; DESC: 'DESC'; DESCRIBE: 'DESCRIBE'; diff --git a/spark/src/main/antlr/SqlBaseLexer.g4 b/spark/src/main/antlr/SqlBaseLexer.g4 index 174887def6..7c376e2268 100644 --- a/spark/src/main/antlr/SqlBaseLexer.g4 +++ b/spark/src/main/antlr/SqlBaseLexer.g4 @@ -79,6 +79,7 @@ COMMA: ','; DOT: '.'; LEFT_BRACKET: '['; RIGHT_BRACKET: ']'; +BANG: '!'; // NOTE: If you add a new token in the list below, you should update the list of keywords // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and @@ -273,7 +274,7 @@ NANOSECOND: 'NANOSECOND'; NANOSECONDS: 'NANOSECONDS'; NATURAL: 'NATURAL'; NO: 'NO'; -NOT: 'NOT' | '!'; +NOT: 'NOT'; NULL: 'NULL'; NULLS: 'NULLS'; NUMERIC: 'NUMERIC'; @@ -510,8 +511,13 @@ BIGDECIMAL_LITERAL | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? ; +// Generalize the identifier to give a sensible INVALID_IDENTIFIER error message: +// * Unicode letters rather than a-z and A-Z only +// * URI paths for table references using paths +// We then narrow down to ANSI rules in exitUnquotedIdentifier() in the parser. IDENTIFIER - : (LETTER | DIGIT | '_')+ + : (UNICODE_LETTER | DIGIT | '_')+ + | UNICODE_LETTER+ '://' (UNICODE_LETTER | DIGIT | '_' | '/' | '-' | '.' | '?' | '=' | '&' | '#' | '%')+ ; BACKQUOTED_IDENTIFIER @@ -535,6 +541,10 @@ fragment LETTER : [A-Z] ; +fragment UNICODE_LETTER + : [\p{L}] + ; + SIMPLE_COMMENT : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) ; diff --git a/spark/src/main/antlr/SqlBaseParser.g4 b/spark/src/main/antlr/SqlBaseParser.g4 index 801cc62491..41a5ec241c 100644 --- a/spark/src/main/antlr/SqlBaseParser.g4 +++ b/spark/src/main/antlr/SqlBaseParser.g4 @@ -388,6 +388,7 @@ describeFuncName | comparisonOperator | arithmeticOperator | predicateOperator + | BANG ; describeColName @@ -946,7 +947,7 @@ expressionSeq ; booleanExpression - : NOT booleanExpression #logicalNot + : (NOT | BANG) booleanExpression #logicalNot | EXISTS LEFT_PAREN query RIGHT_PAREN #exists | valueExpression predicate? #predicated | left=booleanExpression operator=AND right=booleanExpression #logicalBinary diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java index f153e94713..c2351bcd0b 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java @@ -10,11 +10,12 @@ import static org.opensearch.sql.spark.execution.statestore.StateStore.createIndexDMLResult; import com.amazonaws.services.emrserverless.model.JobRunState; +import java.util.Map; import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONObject; -import org.opensearch.client.Client; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryId; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; @@ -27,10 +28,10 @@ import org.opensearch.sql.spark.execution.statement.StatementState; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.flint.FlintIndexMetadata; -import org.opensearch.sql.spark.flint.FlintIndexMetadataReader; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; import org.opensearch.sql.spark.flint.operation.FlintIndexOp; -import org.opensearch.sql.spark.flint.operation.FlintIndexOpCancel; -import org.opensearch.sql.spark.flint.operation.FlintIndexOpDelete; +import org.opensearch.sql.spark.flint.operation.FlintIndexOpAlter; +import org.opensearch.sql.spark.flint.operation.FlintIndexOpDrop; import org.opensearch.sql.spark.response.JobExecutionResponseReader; /** Handle Index DML query. includes * DROP * ALT? */ @@ -38,47 +39,60 @@ public class IndexDMLHandler extends AsyncQueryHandler { private static final Logger LOG = LogManager.getLogger(); + // To be deprecated in 3.0. Still using for backward compatibility. public static final String DROP_INDEX_JOB_ID = "dropIndexJobId"; + public static final String DML_QUERY_JOB_ID = "DMLQueryJobId"; private final EMRServerlessClient emrServerlessClient; private final JobExecutionResponseReader jobExecutionResponseReader; - private final FlintIndexMetadataReader flintIndexMetadataReader; - - private final Client client; + private final FlintIndexMetadataService flintIndexMetadataService; private final StateStore stateStore; public static boolean isIndexDMLQuery(String jobId) { - return DROP_INDEX_JOB_ID.equalsIgnoreCase(jobId); + return DROP_INDEX_JOB_ID.equalsIgnoreCase(jobId) || DML_QUERY_JOB_ID.equalsIgnoreCase(jobId); } @Override public DispatchQueryResponse submit( DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) { DataSourceMetadata dataSourceMetadata = context.getDataSourceMetadata(); - IndexQueryDetails indexDetails = context.getIndexQueryDetails(); - FlintIndexMetadata indexMetadata = flintIndexMetadataReader.getFlintIndexMetadata(indexDetails); - // if index is created without auto refresh. there is no job to cancel. - String status = JobRunState.FAILED.toString(); - String error = ""; - long startTime = 0L; + long startTime = System.currentTimeMillis(); try { - FlintIndexOp jobCancelOp = - new FlintIndexOpCancel( - stateStore, dispatchQueryRequest.getDatasource(), emrServerlessClient); - jobCancelOp.apply(indexMetadata); - - FlintIndexOp indexDeleteOp = - new FlintIndexOpDelete(stateStore, dispatchQueryRequest.getDatasource()); - indexDeleteOp.apply(indexMetadata); - status = JobRunState.SUCCESS.toString(); + IndexQueryDetails indexDetails = context.getIndexQueryDetails(); + FlintIndexMetadata indexMetadata = getFlintIndexMetadata(indexDetails); + executeIndexOp(dispatchQueryRequest, indexDetails, indexMetadata); + AsyncQueryId asyncQueryId = + storeIndexDMLResult( + dispatchQueryRequest, + dataSourceMetadata, + JobRunState.SUCCESS.toString(), + StringUtils.EMPTY, + startTime); + return new DispatchQueryResponse( + asyncQueryId, DML_QUERY_JOB_ID, dataSourceMetadata.getResultIndex(), null); } catch (Exception e) { - error = e.getMessage(); - LOG.error(e); + LOG.error(e.getMessage()); + AsyncQueryId asyncQueryId = + storeIndexDMLResult( + dispatchQueryRequest, + dataSourceMetadata, + JobRunState.FAILED.toString(), + e.getMessage(), + startTime); + return new DispatchQueryResponse( + asyncQueryId, DML_QUERY_JOB_ID, dataSourceMetadata.getResultIndex(), null); } + } + private AsyncQueryId storeIndexDMLResult( + DispatchQueryRequest dispatchQueryRequest, + DataSourceMetadata dataSourceMetadata, + String status, + String error, + long startTime) { AsyncQueryId asyncQueryId = AsyncQueryId.newAsyncQueryId(dataSourceMetadata.getName()); IndexDMLResult indexDMLResult = new IndexDMLResult( @@ -88,10 +102,48 @@ public DispatchQueryResponse submit( dispatchQueryRequest.getDatasource(), System.currentTimeMillis() - startTime, System.currentTimeMillis()); - String resultIndex = dataSourceMetadata.getResultIndex(); - createIndexDMLResult(stateStore, resultIndex).apply(indexDMLResult); + createIndexDMLResult(stateStore, dataSourceMetadata.getResultIndex()).apply(indexDMLResult); + return asyncQueryId; + } - return new DispatchQueryResponse(asyncQueryId, DROP_INDEX_JOB_ID, resultIndex, null); + private void executeIndexOp( + DispatchQueryRequest dispatchQueryRequest, + IndexQueryDetails indexQueryDetails, + FlintIndexMetadata indexMetadata) { + switch (indexQueryDetails.getIndexQueryActionType()) { + case DROP: + FlintIndexOp dropOp = + new FlintIndexOpDrop( + stateStore, dispatchQueryRequest.getDatasource(), emrServerlessClient); + dropOp.apply(indexMetadata); + break; + case ALTER: + FlintIndexOpAlter flintIndexOpAlter = + new FlintIndexOpAlter( + indexQueryDetails.getFlintIndexOptions(), + stateStore, + dispatchQueryRequest.getDatasource(), + emrServerlessClient, + flintIndexMetadataService); + flintIndexOpAlter.apply(indexMetadata); + break; + default: + throw new IllegalStateException( + String.format( + "IndexQueryActionType: %s is not supported in IndexDMLHandler.", + indexQueryDetails.getIndexQueryActionType())); + } + } + + private FlintIndexMetadata getFlintIndexMetadata(IndexQueryDetails indexDetails) { + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata(indexDetails.openSearchIndexName()); + if (!indexMetadataMap.containsKey(indexDetails.openSearchIndexName())) { + throw new IllegalStateException( + String.format( + "Couldn't fetch flint index: %s details", indexDetails.openSearchIndexName())); + } + return indexMetadataMap.get(indexDetails.openSearchIndexName()); } @Override diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java index 0528a189f0..d55408f62e 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java @@ -5,6 +5,7 @@ package org.opensearch.sql.spark.dispatcher; +import java.util.Map; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.client.EMRServerlessClient; @@ -14,7 +15,7 @@ import org.opensearch.sql.spark.dispatcher.model.JobType; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.flint.FlintIndexMetadata; -import org.opensearch.sql.spark.flint.FlintIndexMetadataReader; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; import org.opensearch.sql.spark.flint.operation.FlintIndexOp; import org.opensearch.sql.spark.flint.operation.FlintIndexOpCancel; import org.opensearch.sql.spark.leasemanager.LeaseManager; @@ -23,18 +24,18 @@ /** Handle Refresh Query. */ public class RefreshQueryHandler extends BatchQueryHandler { - private final FlintIndexMetadataReader flintIndexMetadataReader; + private final FlintIndexMetadataService flintIndexMetadataService; private final StateStore stateStore; private final EMRServerlessClient emrServerlessClient; public RefreshQueryHandler( EMRServerlessClient emrServerlessClient, JobExecutionResponseReader jobExecutionResponseReader, - FlintIndexMetadataReader flintIndexMetadataReader, + FlintIndexMetadataService flintIndexMetadataService, StateStore stateStore, LeaseManager leaseManager) { super(emrServerlessClient, jobExecutionResponseReader, leaseManager); - this.flintIndexMetadataReader = flintIndexMetadataReader; + this.flintIndexMetadataService = flintIndexMetadataService; this.stateStore = stateStore; this.emrServerlessClient = emrServerlessClient; } @@ -42,8 +43,14 @@ public RefreshQueryHandler( @Override public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { String datasourceName = asyncQueryJobMetadata.getDatasourceName(); - FlintIndexMetadata indexMetadata = - flintIndexMetadataReader.getFlintIndexMetadata(asyncQueryJobMetadata.getIndexName()); + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata(asyncQueryJobMetadata.getIndexName()); + if (!indexMetadataMap.containsKey(asyncQueryJobMetadata.getIndexName())) { + throw new IllegalStateException( + String.format( + "Couldn't fetch flint index: %s details", asyncQueryJobMetadata.getIndexName())); + } + FlintIndexMetadata indexMetadata = indexMetadataMap.get(asyncQueryJobMetadata.getIndexName()); FlintIndexOp jobCancelOp = new FlintIndexOpCancel(stateStore, datasourceName, emrServerlessClient); jobCancelOp.apply(indexMetadata); diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java index 2d6a456a61..f32c3433e8 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java @@ -25,7 +25,7 @@ import org.opensearch.sql.spark.dispatcher.model.JobType; import org.opensearch.sql.spark.execution.session.SessionManager; import org.opensearch.sql.spark.execution.statestore.StateStore; -import org.opensearch.sql.spark.flint.FlintIndexMetadataReader; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; import org.opensearch.sql.spark.leasemanager.LeaseManager; import org.opensearch.sql.spark.response.JobExecutionResponseReader; import org.opensearch.sql.spark.rest.model.LangType; @@ -48,7 +48,7 @@ public class SparkQueryDispatcher { private JobExecutionResponseReader jobExecutionResponseReader; - private FlintIndexMetadataReader flintIndexMetadataReader; + private FlintIndexMetadataService flintIndexMetadataService; private Client client; @@ -81,10 +81,9 @@ public DispatchQueryResponse dispatch(DispatchQueryRequest dispatchQueryRequest) fillMissingDetails(dispatchQueryRequest, indexQueryDetails); contextBuilder.indexQueryDetails(indexQueryDetails); - if (IndexQueryActionType.DROP.equals(indexQueryDetails.getIndexQueryActionType())) { + if (isEligibleForIndexDMLHandling(indexQueryDetails)) { asyncQueryHandler = createIndexDMLHandler(emrServerlessClient); - } else if (IndexQueryActionType.CREATE.equals(indexQueryDetails.getIndexQueryActionType()) - && indexQueryDetails.isAutoRefresh()) { + } else if (isEligibleForStreamingQuery(indexQueryDetails)) { asyncQueryHandler = new StreamingQueryHandler( emrServerlessClient, jobExecutionResponseReader, leaseManager); @@ -94,7 +93,7 @@ public DispatchQueryResponse dispatch(DispatchQueryRequest dispatchQueryRequest) new RefreshQueryHandler( emrServerlessClient, jobExecutionResponseReader, - flintIndexMetadataReader, + flintIndexMetadataService, stateStore, leaseManager); } @@ -102,6 +101,25 @@ public DispatchQueryResponse dispatch(DispatchQueryRequest dispatchQueryRequest) return asyncQueryHandler.submit(dispatchQueryRequest, contextBuilder.build()); } + private boolean isEligibleForStreamingQuery(IndexQueryDetails indexQueryDetails) { + Boolean isCreateAutoRefreshIndex = + IndexQueryActionType.CREATE.equals(indexQueryDetails.getIndexQueryActionType()) + && indexQueryDetails.getFlintIndexOptions().autoRefresh(); + Boolean isAlterQuery = + IndexQueryActionType.ALTER.equals(indexQueryDetails.getIndexQueryActionType()); + return isCreateAutoRefreshIndex || isAlterQuery; + } + + private boolean isEligibleForIndexDMLHandling(IndexQueryDetails indexQueryDetails) { + return IndexQueryActionType.DROP.equals(indexQueryDetails.getIndexQueryActionType()) + || (IndexQueryActionType.ALTER.equals(indexQueryDetails.getIndexQueryActionType()) + && (indexQueryDetails + .getFlintIndexOptions() + .getProvidedOptions() + .containsKey("auto_refresh") + && !indexQueryDetails.getFlintIndexOptions().autoRefresh())); + } + public JSONObject getQueryResponse(AsyncQueryJobMetadata asyncQueryJobMetadata) { EMRServerlessClient emrServerlessClient = emrServerlessClientFactory.getClient(); if (asyncQueryJobMetadata.getSessionId() != null) { @@ -128,7 +146,7 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { new RefreshQueryHandler( emrServerlessClient, jobExecutionResponseReader, - flintIndexMetadataReader, + flintIndexMetadataService, stateStore, leaseManager); } else if (asyncQueryJobMetadata.getJobType() == JobType.STREAMING) { @@ -143,11 +161,7 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { private IndexDMLHandler createIndexDMLHandler(EMRServerlessClient emrServerlessClient) { return new IndexDMLHandler( - emrServerlessClient, - jobExecutionResponseReader, - flintIndexMetadataReader, - client, - stateStore); + emrServerlessClient, jobExecutionResponseReader, flintIndexMetadataService, stateStore); } // TODO: Revisit this logic. diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java index 8cffa8e24a..8170b41c66 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java @@ -77,7 +77,7 @@ public DispatchQueryResponse submit( .build() .toString(), tags, - indexQueryDetails.isAutoRefresh(), + indexQueryDetails.getFlintIndexOptions().autoRefresh(), dataSourceMetadata.getResultIndex()); String jobId = emrServerlessClient.startJobRun(startJobRequest); MetricUtils.incrementNumericalMetric(MetricName.EMR_STREAMING_QUERY_JOBS_CREATION_COUNT); diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java new file mode 100644 index 0000000000..79af1c91ab --- /dev/null +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java @@ -0,0 +1,39 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.dispatcher.model; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** + * Model to store flint index options. Currently added fields which are required, and we can extend + * this in the future. + */ +public class FlintIndexOptions { + + public static final String AUTO_REFRESH = "auto_refresh"; + public static final String INCREMENTAL_REFRESH = "incremental_refresh"; + public static final String CHECKPOINT_LOCATION = "checkpoint_location"; + public static final String WATERMARK_DELAY = "watermark_delay"; + private final Map options = new HashMap<>(); + + public void setOption(String key, String value) { + options.put(key, value); + } + + public Optional getOption(String key) { + return Optional.ofNullable(options.get(key)); + } + + public boolean autoRefresh() { + return Boolean.parseBoolean(getOption(AUTO_REFRESH).orElse("false")); + } + + public Map getProvidedOptions() { + return new HashMap<>(options); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java index 2c96511d2a..93e44f00ea 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java @@ -11,5 +11,6 @@ public enum IndexQueryActionType { REFRESH, DESCRIBE, SHOW, - DROP + DROP, + ALTER } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java index 576b0772d2..7ecd784792 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java +++ b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java @@ -22,8 +22,8 @@ public class IndexQueryDetails { private String indexName; private FullyQualifiedTableName fullyQualifiedTableName; // by default, auto_refresh = false; - private boolean autoRefresh; private IndexQueryActionType indexQueryActionType; + private FlintIndexOptions flintIndexOptions; // materialized view special case where // table name and mv name are combined. private String mvName; @@ -53,17 +53,17 @@ public IndexQueryDetailsBuilder fullyQualifiedTableName(FullyQualifiedTableName return this; } - public IndexQueryDetailsBuilder autoRefresh(Boolean autoRefresh) { - indexQueryDetails.autoRefresh = autoRefresh; - return this; - } - public IndexQueryDetailsBuilder indexQueryActionType( IndexQueryActionType indexQueryActionType) { indexQueryDetails.indexQueryActionType = indexQueryActionType; return this; } + public IndexQueryDetailsBuilder indexOptions(FlintIndexOptions flintIndexOptions) { + indexQueryDetails.flintIndexOptions = flintIndexOptions; + return this; + } + public IndexQueryDetailsBuilder mvName(String mvName) { indexQueryDetails.mvName = mvName; return this; @@ -75,6 +75,9 @@ public IndexQueryDetailsBuilder indexType(FlintIndexType indexType) { } public IndexQueryDetails build() { + if (indexQueryDetails.flintIndexOptions == null) { + indexQueryDetails.flintIndexOptions = new FlintIndexOptions(); + } return indexQueryDetails; } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java index 1721263bf8..50ed17beb7 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java @@ -5,43 +5,33 @@ package org.opensearch.sql.spark.flint; -import java.util.Locale; -import java.util.Map; import java.util.Optional; +import lombok.Builder; import lombok.Data; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; @Data +@Builder public class FlintIndexMetadata { + + public static final String META_KEY = "_meta"; + public static final String LATEST_ID_KEY = "latestId"; + public static final String KIND_KEY = "kind"; + public static final String INDEXED_COLUMNS_KEY = "indexedColumns"; + public static final String NAME_KEY = "name"; + public static final String OPTIONS_KEY = "options"; + public static final String SOURCE_KEY = "source"; + public static final String VERSION_KEY = "version"; public static final String PROPERTIES_KEY = "properties"; public static final String ENV_KEY = "env"; - public static final String OPTIONS_KEY = "options"; - public static final String SERVERLESS_EMR_JOB_ID = "SERVERLESS_EMR_JOB_ID"; - public static final String AUTO_REFRESH = "auto_refresh"; - public static final String AUTO_REFRESH_DEFAULT = "false"; - public static final String APP_ID = "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID"; - public static final String FLINT_INDEX_STATE_DOC_ID = "latestId"; + private final String opensearchIndexName; private final String jobId; - private final boolean autoRefresh; private final String appId; private final String latestId; - - public static FlintIndexMetadata fromMetatdata(Map metaMap) { - Map propertiesMap = (Map) metaMap.get(PROPERTIES_KEY); - Map envMap = (Map) propertiesMap.get(ENV_KEY); - Map options = (Map) metaMap.get(OPTIONS_KEY); - String jobId = (String) envMap.get(SERVERLESS_EMR_JOB_ID); - - boolean autoRefresh = - !((String) options.getOrDefault(AUTO_REFRESH, AUTO_REFRESH_DEFAULT)) - .toLowerCase(Locale.ROOT) - .equalsIgnoreCase(AUTO_REFRESH_DEFAULT); - String appId = (String) envMap.getOrDefault(APP_ID, null); - String latestId = (String) metaMap.getOrDefault(FLINT_INDEX_STATE_DOC_ID, null); - return new FlintIndexMetadata(jobId, autoRefresh, appId, latestId); - } + private final FlintIndexOptions flintIndexOptions; public Optional getLatestId() { return Optional.ofNullable(latestId); diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReader.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReader.java deleted file mode 100644 index 8833665570..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReader.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.opensearch.sql.spark.flint; - -import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; - -/** Interface for FlintIndexMetadataReader */ -public interface FlintIndexMetadataReader { - - /** - * Given Index details, get the streaming job Id. - * - * @param indexQueryDetails indexDetails. - * @return FlintIndexMetadata. - */ - FlintIndexMetadata getFlintIndexMetadata(IndexQueryDetails indexQueryDetails); - - /** - * Given Index name, get the streaming job Id. - * - * @param indexName indexName. - * @return FlintIndexMetadata. - */ - FlintIndexMetadata getFlintIndexMetadata(String indexName); -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImpl.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImpl.java deleted file mode 100644 index d6e07fba8a..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImpl.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.opensearch.sql.spark.flint; - -import java.util.Map; -import lombok.AllArgsConstructor; -import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; -import org.opensearch.client.Client; -import org.opensearch.cluster.metadata.MappingMetadata; -import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; - -/** Implementation of {@link FlintIndexMetadataReader} */ -@AllArgsConstructor -public class FlintIndexMetadataReaderImpl implements FlintIndexMetadataReader { - - private final Client client; - - @Override - public FlintIndexMetadata getFlintIndexMetadata(IndexQueryDetails indexQueryDetails) { - return getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); - } - - @Override - public FlintIndexMetadata getFlintIndexMetadata(String indexName) { - GetMappingsResponse mappingsResponse = - client.admin().indices().prepareGetMappings(indexName).get(); - try { - MappingMetadata mappingMetadata = mappingsResponse.mappings().get(indexName); - Map mappingSourceMap = mappingMetadata.getSourceAsMap(); - return FlintIndexMetadata.fromMetatdata((Map) mappingSourceMap.get("_meta")); - } catch (NullPointerException npe) { - throw new IllegalArgumentException("Provided Index doesn't exist"); - } - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java new file mode 100644 index 0000000000..a069765a95 --- /dev/null +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java @@ -0,0 +1,25 @@ +package org.opensearch.sql.spark.flint; + +import java.util.Map; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; + +/** Interface for FlintIndexMetadataReader */ +public interface FlintIndexMetadataService { + + /** + * Retrieves a map of {@link FlintIndexMetadata} instances matching the specified index pattern. + * + * @param indexPattern indexPattern. + * @return A map of {@link FlintIndexMetadata} instances against indexName, each providing + * metadata access for a matched index. Returns an empty list if no indices match the pattern. + */ + Map getFlintIndexMetadata(String indexPattern); + + /** + * Performs validation and updates flint index to manual refresh. + * + * @param indexName indexName. + * @param flintIndexOptions flintIndexOptions. + */ + void updateIndexToManualRefresh(String indexName, FlintIndexOptions flintIndexOptions); +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java new file mode 100644 index 0000000000..e4e216e77a --- /dev/null +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java @@ -0,0 +1,158 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.AUTO_REFRESH; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.CHECKPOINT_LOCATION; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.INCREMENTAL_REFRESH; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.WATERMARK_DELAY; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.APP_ID; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.ENV_KEY; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.LATEST_ID_KEY; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.META_KEY; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.OPTIONS_KEY; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.PROPERTIES_KEY; +import static org.opensearch.sql.spark.flint.FlintIndexMetadata.SERVERLESS_EMR_JOB_ID; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import lombok.AllArgsConstructor; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; +import org.opensearch.client.Client; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; + +/** Implementation of {@link FlintIndexMetadataService} */ +@AllArgsConstructor +public class FlintIndexMetadataServiceImpl implements FlintIndexMetadataService { + + private static final Logger LOGGER = LogManager.getLogger(FlintIndexMetadataServiceImpl.class); + + private final Client client; + public static final Set ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS = + new LinkedHashSet<>(Arrays.asList(AUTO_REFRESH, INCREMENTAL_REFRESH)); + public static final Set ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS = + new LinkedHashSet<>( + Arrays.asList(AUTO_REFRESH, INCREMENTAL_REFRESH, WATERMARK_DELAY, CHECKPOINT_LOCATION)); + + @Override + public Map getFlintIndexMetadata(String indexPattern) { + GetMappingsResponse mappingsResponse = + client.admin().indices().prepareGetMappings().setIndices(indexPattern).get(); + Map indexMetadataMap = new HashMap<>(); + mappingsResponse + .getMappings() + .forEach( + (indexName, mappingMetadata) -> { + try { + Map mappingSourceMap = mappingMetadata.getSourceAsMap(); + FlintIndexMetadata metadata = + fromMetadata(indexName, (Map) mappingSourceMap.get(META_KEY)); + indexMetadataMap.put(indexName, metadata); + } catch (Exception exception) { + LOGGER.error( + "Exception while building index details for index: {} due to: {}", + indexName, + exception.getMessage()); + } + }); + return indexMetadataMap; + } + + @Override + public void updateIndexToManualRefresh(String indexName, FlintIndexOptions flintIndexOptions) { + GetMappingsResponse mappingsResponse = + client.admin().indices().prepareGetMappings().setIndices(indexName).get(); + Map flintMetadataMap = + mappingsResponse.getMappings().get(indexName).getSourceAsMap(); + Map meta = (Map) flintMetadataMap.get("_meta"); + String kind = (String) meta.get("kind"); + Map options = (Map) meta.get("options"); + Map newOptions = flintIndexOptions.getProvidedOptions(); + validateFlintIndexOptions(kind, options, newOptions); + options.putAll(newOptions); + client.admin().indices().preparePutMapping(indexName).setSource(flintMetadataMap).get(); + } + + private void validateFlintIndexOptions( + String kind, Map existingOptions, Map newOptions) { + if (Boolean.parseBoolean((String) existingOptions.get(INCREMENTAL_REFRESH)) + || (newOptions.containsKey(INCREMENTAL_REFRESH) + && Boolean.parseBoolean(newOptions.get(INCREMENTAL_REFRESH)))) { + validateConversionToIncrementalRefresh(kind, existingOptions, newOptions); + } else { + validateConversionToFullRefresh(newOptions); + } + } + + private void validateConversionToFullRefresh(Map newOptions) { + if (!ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS.containsAll(newOptions.keySet())) { + throw new IllegalArgumentException( + String.format( + "Altering to full refresh only allows: %s options", + ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS)); + } + } + + private void validateConversionToIncrementalRefresh( + String kind, Map existingOptions, Map newOptions) { + if (!ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS.containsAll(newOptions.keySet())) { + throw new IllegalArgumentException( + String.format( + "Altering to incremental refresh only allows: %s options", + ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS)); + } + HashMap mergedOptions = new HashMap<>(); + mergedOptions.putAll(existingOptions); + mergedOptions.putAll(newOptions); + List missingAttributes = new ArrayList<>(); + if (!mergedOptions.containsKey(CHECKPOINT_LOCATION) + || StringUtils.isEmpty((String) mergedOptions.get(CHECKPOINT_LOCATION))) { + missingAttributes.add(CHECKPOINT_LOCATION); + } + if (kind.equals("mv") + && (!mergedOptions.containsKey(WATERMARK_DELAY) + || StringUtils.isEmpty((String) mergedOptions.get(WATERMARK_DELAY)))) { + missingAttributes.add(WATERMARK_DELAY); + } + if (missingAttributes.size() > 0) { + String errorMessage = + "Conversion to incremental refresh index cannot proceed due to missing attributes: " + + String.join(", ", missingAttributes) + + "."; + LOGGER.error(errorMessage); + throw new IllegalArgumentException(errorMessage); + } + } + + private FlintIndexMetadata fromMetadata(String indexName, Map metaMap) { + FlintIndexMetadata.FlintIndexMetadataBuilder flintIndexMetadataBuilder = + FlintIndexMetadata.builder(); + Map propertiesMap = (Map) metaMap.get(PROPERTIES_KEY); + Map envMap = (Map) propertiesMap.get(ENV_KEY); + Map options = (Map) metaMap.get(OPTIONS_KEY); + FlintIndexOptions flintIndexOptions = new FlintIndexOptions(); + for (String key : options.keySet()) { + flintIndexOptions.setOption(key, (String) options.get(key)); + } + String jobId = (String) envMap.get(SERVERLESS_EMR_JOB_ID); + String appId = (String) envMap.getOrDefault(APP_ID, null); + String latestId = (String) metaMap.getOrDefault(LATEST_ID_KEY, null); + flintIndexMetadataBuilder.jobId(jobId); + flintIndexMetadataBuilder.appId(appId); + flintIndexMetadataBuilder.latestId(latestId); + flintIndexMetadataBuilder.opensearchIndexName(indexName); + flintIndexMetadataBuilder.flintIndexOptions(flintIndexOptions); + return flintIndexMetadataBuilder.build(); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java index 0ab4d92c17..36ac8fe715 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java @@ -18,16 +18,22 @@ public enum FlintIndexState { EMPTY("empty"), // transitioning state CREATING("creating"), + // stable state + ACTIVE("active"), // transitioning state REFRESHING("refreshing"), // transitioning state CANCELLING("cancelling"), - // stable state - ACTIVE("active"), // transitioning state DELETING("deleting"), // stable state DELETED("deleted"), + // transitioning state + RECOVERING("recovering"), + // transitioning state + VACUUMING("vacuuming"), + // transitioning state + UPDATING("updating"), // stable state FAILED("failed"), // unknown state, if some state update in Spark side, not reflect in here. diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java index fb44b27568..37d36a49db 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java @@ -10,10 +10,14 @@ import java.util.Locale; import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import lombok.RequiredArgsConstructor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.NotNull; import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.flint.FlintIndexMetadata; import org.opensearch.sql.spark.flint.FlintIndexState; @@ -32,65 +36,132 @@ public void apply(FlintIndexMetadata metadata) { // todo, remove this logic after IndexState feature is enabled in Flint. Optional latestId = metadata.getLatestId(); if (latestId.isEmpty()) { - // take action without occ. - FlintIndexStateModel fakeModel = - new FlintIndexStateModel( - FlintIndexState.REFRESHING, - metadata.getAppId(), - metadata.getJobId(), - "", - datasourceName, - System.currentTimeMillis(), - "", - SequenceNumbers.UNASSIGNED_SEQ_NO, - SequenceNumbers.UNASSIGNED_PRIMARY_TERM); - runOp(fakeModel); + takeActionWithoutOCC(metadata); } else { - Optional flintIndexOptional = - getFlintIndexState(stateStore, datasourceName).apply(latestId.get()); - if (flintIndexOptional.isEmpty()) { - String errorMsg = String.format(Locale.ROOT, "no state found. docId: %s", latestId.get()); - LOG.error(errorMsg); - throw new IllegalStateException(errorMsg); - } - FlintIndexStateModel flintIndex = flintIndexOptional.get(); - + FlintIndexStateModel initialFlintIndexStateModel = getFlintIndexStateModel(latestId.get()); // 1.validate state. - FlintIndexState currentState = flintIndex.getIndexState(); - if (!validate(currentState)) { - String errorMsg = - String.format(Locale.ROOT, "validate failed. unexpected state: [%s]", currentState); - LOG.debug(errorMsg); - return; - } + validFlintIndexInitialState(initialFlintIndexStateModel); // 2.begin, move to transitioning state - FlintIndexState transitioningState = transitioningState(); + FlintIndexStateModel transitionedFlintIndexStateModel = + moveToTransitioningState(initialFlintIndexStateModel); + // 3.runOp try { - flintIndex = - updateFlintIndexState(stateStore, datasourceName) - .apply(flintIndex, transitioningState()); - } catch (Exception e) { - String errorMsg = - String.format( - Locale.ROOT, "begin failed. target transitioning state: [%s]", transitioningState); - LOG.error(errorMsg, e); - throw new IllegalStateException(errorMsg, e); + runOp(metadata, transitionedFlintIndexStateModel); + commit(transitionedFlintIndexStateModel); + } catch (Throwable e) { + LOG.error("Rolling back transient log due to transaction operation failure", e); + try { + updateFlintIndexState(stateStore, datasourceName) + .apply(transitionedFlintIndexStateModel, initialFlintIndexStateModel.getIndexState()); + } catch (Exception ex) { + LOG.error("Failed to rollback transient log", ex); + } + throw e; } + } + } - // 3.runOp - runOp(flintIndex); + @NotNull + private FlintIndexStateModel getFlintIndexStateModel(String latestId) { + Optional flintIndexOptional = + getFlintIndexState(stateStore, datasourceName).apply(latestId); + if (flintIndexOptional.isEmpty()) { + String errorMsg = String.format(Locale.ROOT, "no state found. docId: %s", latestId); + LOG.error(errorMsg); + throw new IllegalStateException(errorMsg); + } + return flintIndexOptional.get(); + } - // 4.commit, move to stable state - FlintIndexState stableState = stableState(); - try { - updateFlintIndexState(stateStore, datasourceName).apply(flintIndex, stableState); - } catch (Exception e) { - String errorMsg = - String.format(Locale.ROOT, "commit failed. target stable state: [%s]", stableState); - LOG.error(errorMsg, e); - throw new IllegalStateException(errorMsg, e); + private void takeActionWithoutOCC(FlintIndexMetadata metadata) { + // take action without occ. + FlintIndexStateModel fakeModel = + new FlintIndexStateModel( + FlintIndexState.REFRESHING, + metadata.getAppId(), + metadata.getJobId(), + "", + datasourceName, + System.currentTimeMillis(), + "", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM); + runOp(metadata, fakeModel); + } + + private void validFlintIndexInitialState(FlintIndexStateModel flintIndex) { + LOG.debug("Validating the state before the transaction."); + FlintIndexState currentState = flintIndex.getIndexState(); + if (!validate(currentState)) { + String errorMsg = + String.format(Locale.ROOT, "validate failed. unexpected state: [%s]", currentState); + LOG.error(errorMsg); + throw new IllegalStateException("Transaction failed as flint index is not in a valid state."); + } + } + + private FlintIndexStateModel moveToTransitioningState(FlintIndexStateModel flintIndex) { + LOG.debug("Moving to transitioning state before committing."); + FlintIndexState transitioningState = transitioningState(); + try { + flintIndex = + updateFlintIndexState(stateStore, datasourceName).apply(flintIndex, transitioningState()); + } catch (Exception e) { + String errorMsg = + String.format(Locale.ROOT, "Moving to transition state:%s failed.", transitioningState); + LOG.error(errorMsg, e); + throw new IllegalStateException(errorMsg, e); + } + return flintIndex; + } + + private void commit(FlintIndexStateModel flintIndex) { + LOG.debug("Committing the transaction and moving to stable state."); + FlintIndexState stableState = stableState(); + try { + updateFlintIndexState(stateStore, datasourceName).apply(flintIndex, stableState); + } catch (Exception e) { + String errorMsg = + String.format(Locale.ROOT, "commit failed. target stable state: [%s]", stableState); + LOG.error(errorMsg, e); + throw new IllegalStateException(errorMsg, e); + } + } + + /*** + * Common operation between AlterOff and Drop. So moved to FlintIndexOp. + */ + public void cancelStreamingJob( + EMRServerlessClient emrServerlessClient, FlintIndexStateModel flintIndexStateModel) + throws InterruptedException, TimeoutException { + String applicationId = flintIndexStateModel.getApplicationId(); + String jobId = flintIndexStateModel.getJobId(); + try { + emrServerlessClient.cancelJobRun( + flintIndexStateModel.getApplicationId(), flintIndexStateModel.getJobId()); + } catch (IllegalArgumentException e) { + // handle job does not exist case. + LOG.error(e); + return; + } + + // pull job state until timeout or cancelled. + String jobRunState = ""; + int count = 3; + while (count-- != 0) { + jobRunState = + emrServerlessClient.getJobRunResult(applicationId, jobId).getJobRun().getState(); + if (jobRunState.equalsIgnoreCase("Cancelled")) { + break; } + TimeUnit.SECONDS.sleep(1); + } + if (!jobRunState.equalsIgnoreCase("Cancelled")) { + String errMsg = + "Cancel job timeout for Application ID: " + applicationId + ", Job ID: " + jobId; + LOG.error(errMsg); + throw new TimeoutException("Cancel job operation timed out."); } } @@ -104,7 +175,7 @@ public void apply(FlintIndexMetadata metadata) { /** get transitioningState */ abstract FlintIndexState transitioningState(); - abstract void runOp(FlintIndexStateModel flintIndex); + abstract void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndex); /** get stableState */ abstract FlintIndexState stableState(); diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java new file mode 100644 index 0000000000..7db4f6a4c6 --- /dev/null +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint.operation; + +import lombok.SneakyThrows; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.sql.spark.client.EMRServerlessClient; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; +import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.sql.spark.flint.FlintIndexMetadata; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; +import org.opensearch.sql.spark.flint.FlintIndexState; +import org.opensearch.sql.spark.flint.FlintIndexStateModel; + +/** + * Index Operation for Altering the flint index. Only handles alter operation when + * auto_refresh=false. + */ +public class FlintIndexOpAlter extends FlintIndexOp { + private static final Logger LOG = LogManager.getLogger(FlintIndexOpAlter.class); + private final EMRServerlessClient emrServerlessClient; + private final FlintIndexMetadataService flintIndexMetadataService; + private final FlintIndexOptions flintIndexOptions; + + public FlintIndexOpAlter( + FlintIndexOptions flintIndexOptions, + StateStore stateStore, + String datasourceName, + EMRServerlessClient emrServerlessClient, + FlintIndexMetadataService flintIndexMetadataService) { + super(stateStore, datasourceName); + this.emrServerlessClient = emrServerlessClient; + this.flintIndexMetadataService = flintIndexMetadataService; + this.flintIndexOptions = flintIndexOptions; + } + + @Override + protected boolean validate(FlintIndexState state) { + return state == FlintIndexState.ACTIVE || state == FlintIndexState.REFRESHING; + } + + @Override + FlintIndexState transitioningState() { + return FlintIndexState.UPDATING; + } + + @SneakyThrows + @Override + void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndexStateModel) { + LOG.debug( + "Running alter index operation for index: {}", flintIndexMetadata.getOpensearchIndexName()); + this.flintIndexMetadataService.updateIndexToManualRefresh( + flintIndexMetadata.getOpensearchIndexName(), flintIndexOptions); + cancelStreamingJob(emrServerlessClient, flintIndexStateModel); + } + + @Override + FlintIndexState stableState() { + return FlintIndexState.ACTIVE; + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java index ba067e5c03..ed96f4b8d3 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java @@ -5,13 +5,12 @@ package org.opensearch.sql.spark.flint.operation; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import lombok.SneakyThrows; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.sql.spark.flint.FlintIndexMetadata; import org.opensearch.sql.spark.flint.FlintIndexState; import org.opensearch.sql.spark.flint.FlintIndexStateModel; @@ -28,7 +27,7 @@ public FlintIndexOpCancel( } public boolean validate(FlintIndexState state) { - return state == FlintIndexState.REFRESHING || state == FlintIndexState.CANCELLING; + return state == FlintIndexState.REFRESHING || state == FlintIndexState.ACTIVE; } @Override @@ -39,34 +38,11 @@ FlintIndexState transitioningState() { /** cancel EMR-S job, wait cancelled state upto 15s. */ @SneakyThrows @Override - void runOp(FlintIndexStateModel flintIndexStateModel) { - String applicationId = flintIndexStateModel.getApplicationId(); - String jobId = flintIndexStateModel.getJobId(); - try { - emrServerlessClient.cancelJobRun( - flintIndexStateModel.getApplicationId(), flintIndexStateModel.getJobId()); - } catch (IllegalArgumentException e) { - // handle job does not exist case. - LOG.error(e); - return; - } - - // pull job state until timeout or cancelled. - String jobRunState = ""; - int count = 3; - while (count-- != 0) { - jobRunState = - emrServerlessClient.getJobRunResult(applicationId, jobId).getJobRun().getState(); - if (jobRunState.equalsIgnoreCase("Cancelled")) { - break; - } - TimeUnit.SECONDS.sleep(1); - } - if (!jobRunState.equalsIgnoreCase("Cancelled")) { - String errMsg = "cancel job timeout"; - LOG.error(errMsg); - throw new TimeoutException(errMsg); - } + void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndexStateModel) { + LOG.debug( + "Performing drop index operation for index: {}", + flintIndexMetadata.getOpensearchIndexName()); + cancelStreamingJob(emrServerlessClient, flintIndexStateModel); } @Override diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDelete.java b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDelete.java deleted file mode 100644 index d8b275c621..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDelete.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.flint.operation; - -import org.opensearch.sql.spark.execution.statestore.StateStore; -import org.opensearch.sql.spark.flint.FlintIndexState; -import org.opensearch.sql.spark.flint.FlintIndexStateModel; - -/** Flint Index Logical delete operation. Change state to DELETED. */ -public class FlintIndexOpDelete extends FlintIndexOp { - - public FlintIndexOpDelete(StateStore stateStore, String datasourceName) { - super(stateStore, datasourceName); - } - - public boolean validate(FlintIndexState state) { - return state == FlintIndexState.ACTIVE - || state == FlintIndexState.EMPTY - || state == FlintIndexState.DELETING; - } - - @Override - FlintIndexState transitioningState() { - return FlintIndexState.DELETING; - } - - @Override - void runOp(FlintIndexStateModel flintIndex) { - // logically delete, do nothing. - } - - @Override - FlintIndexState stableState() { - return FlintIndexState.DELETED; - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java new file mode 100644 index 0000000000..586c346863 --- /dev/null +++ b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java @@ -0,0 +1,54 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint.operation; + +import lombok.SneakyThrows; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.sql.spark.client.EMRServerlessClient; +import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.sql.spark.flint.FlintIndexMetadata; +import org.opensearch.sql.spark.flint.FlintIndexState; +import org.opensearch.sql.spark.flint.FlintIndexStateModel; + +public class FlintIndexOpDrop extends FlintIndexOp { + private static final Logger LOG = LogManager.getLogger(); + + private final EMRServerlessClient emrServerlessClient; + + public FlintIndexOpDrop( + StateStore stateStore, String datasourceName, EMRServerlessClient emrServerlessClient) { + super(stateStore, datasourceName); + this.emrServerlessClient = emrServerlessClient; + } + + public boolean validate(FlintIndexState state) { + return state == FlintIndexState.REFRESHING + || state == FlintIndexState.EMPTY + || state == FlintIndexState.ACTIVE + || state == FlintIndexState.CREATING; + } + + @Override + FlintIndexState transitioningState() { + return FlintIndexState.DELETING; + } + + /** cancel EMR-S job, wait cancelled state upto 15s. */ + @SneakyThrows + @Override + void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndexStateModel) { + LOG.debug( + "Performing drop index operation for index: {}", + flintIndexMetadata.getOpensearchIndexName()); + cancelStreamingJob(emrServerlessClient, flintIndexStateModel); + } + + @Override + FlintIndexState stableState() { + return FlintIndexState.DELETED; + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java b/spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java index d88c1dd9df..2c86a66fb2 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java +++ b/spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java @@ -29,7 +29,7 @@ import org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher; import org.opensearch.sql.spark.execution.session.SessionManager; import org.opensearch.sql.spark.execution.statestore.StateStore; -import org.opensearch.sql.spark.flint.FlintIndexMetadataReaderImpl; +import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl; import org.opensearch.sql.spark.leasemanager.DefaultLeaseManager; import org.opensearch.sql.spark.response.JobExecutionResponseReader; @@ -70,7 +70,7 @@ public SparkQueryDispatcher sparkQueryDispatcher( DataSourceService dataSourceService, DataSourceUserAuthorizationHelperImpl dataSourceUserAuthorizationHelper, JobExecutionResponseReader jobExecutionResponseReader, - FlintIndexMetadataReaderImpl flintIndexMetadataReader, + FlintIndexMetadataServiceImpl flintIndexMetadataReader, NodeClient client, SessionManager sessionManager, DefaultLeaseManager defaultLeaseManager, @@ -113,8 +113,9 @@ public SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier(Set @Provides @Singleton - public FlintIndexMetadataReaderImpl flintIndexMetadataReader(NodeClient client) { - return new FlintIndexMetadataReaderImpl(client); + public FlintIndexMetadataServiceImpl flintIndexMetadataReader( + NodeClient client, StateStore stateStore) { + return new FlintIndexMetadataServiceImpl(client); } @Provides diff --git a/spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java b/spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java index c1f3f02576..1ac177771c 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java +++ b/spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java @@ -19,6 +19,7 @@ import org.opensearch.sql.spark.antlr.parser.SqlBaseLexer; import org.opensearch.sql.spark.antlr.parser.SqlBaseParser; import org.opensearch.sql.spark.antlr.parser.SqlBaseParserBaseVisitor; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; @@ -257,23 +258,48 @@ public Void visitRefreshMaterializedViewStatement( @Override public Void visitPropertyList(FlintSparkSqlExtensionsParser.PropertyListContext ctx) { + FlintIndexOptions flintIndexOptions = new FlintIndexOptions(); if (ctx != null) { ctx.property() .forEach( - property -> { - // todo. Currently, we use contains() api to avoid unescape string. In future, we - // should leverage - // https://github.com/apache/spark/blob/v3.5.0/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala#L35 to unescape string literal - if (propertyKey(property.key).toLowerCase(Locale.ROOT).contains("auto_refresh")) { - if (propertyValue(property.value).toLowerCase(Locale.ROOT).contains("true")) { - indexQueryDetailsBuilder.autoRefresh(true); - } - } - }); + property -> + flintIndexOptions.setOption( + removeUnwantedQuotes(propertyKey(property.key).toLowerCase(Locale.ROOT)), + removeUnwantedQuotes( + propertyValue(property.value).toLowerCase(Locale.ROOT)))); } + indexQueryDetailsBuilder.indexOptions(flintIndexOptions); return null; } + @Override + public Void visitAlterCoveringIndexStatement( + FlintSparkSqlExtensionsParser.AlterCoveringIndexStatementContext ctx) { + indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.ALTER); + indexQueryDetailsBuilder.indexType(FlintIndexType.COVERING); + visitPropertyList(ctx.propertyList()); + return super.visitAlterCoveringIndexStatement(ctx); + } + + @Override + public Void visitAlterSkippingIndexStatement( + FlintSparkSqlExtensionsParser.AlterSkippingIndexStatementContext ctx) { + indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.ALTER); + indexQueryDetailsBuilder.indexType(FlintIndexType.SKIPPING); + visitPropertyList(ctx.propertyList()); + return super.visitAlterSkippingIndexStatement(ctx); + } + + @Override + public Void visitAlterMaterializedViewStatement( + FlintSparkSqlExtensionsParser.AlterMaterializedViewStatementContext ctx) { + indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.ALTER); + indexQueryDetailsBuilder.indexType(FlintIndexType.MATERIALIZED_VIEW); + indexQueryDetailsBuilder.mvName(ctx.mvName.getText()); + visitPropertyList(ctx.propertyList()); + return super.visitAlterMaterializedViewStatement(ctx); + } + private String propertyKey(FlintSparkSqlExtensionsParser.PropertyKeyContext key) { if (key.STRING() != null) { return key.STRING().getText(); @@ -291,5 +317,12 @@ private String propertyValue(FlintSparkSqlExtensionsParser.PropertyValueContext return value.getText(); } } + + // TODO: Currently escaping is handled partially. + // Full implementation should mirror this: + // https://github.com/apache/spark/blob/v3.5.0/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala#L35 + public String removeUnwantedQuotes(String input) { + return input.replaceAll("^\"|\"$", ""); + } } } diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java index 725080bbcd..c1532d5c10 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java +++ b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java @@ -42,7 +42,6 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentType; import org.opensearch.index.query.QueryBuilder; -import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.plugins.Plugin; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.datasource.model.DataSourceMetadata; @@ -65,9 +64,7 @@ import org.opensearch.sql.spark.execution.session.SessionModel; import org.opensearch.sql.spark.execution.session.SessionState; import org.opensearch.sql.spark.execution.statestore.StateStore; -import org.opensearch.sql.spark.flint.FlintIndexMetadataReaderImpl; -import org.opensearch.sql.spark.flint.FlintIndexState; -import org.opensearch.sql.spark.flint.FlintIndexStateModel; +import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl; import org.opensearch.sql.spark.flint.FlintIndexType; import org.opensearch.sql.spark.leasemanager.DefaultLeaseManager; import org.opensearch.sql.spark.response.JobExecutionResponseReader; @@ -210,7 +207,7 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( this.dataSourceService, new DataSourceUserAuthorizationHelperImpl(client), jobExecutionResponseReader, - new FlintIndexMetadataReaderImpl(client), + new FlintIndexMetadataServiceImpl(client), client, new SessionManager(stateStore, emrServerlessClientFactory, pluginSettings), new DefaultLeaseManager(pluginSettings, stateStore), @@ -330,64 +327,6 @@ public String loadResultIndexMappings() { return Resources.toString(url, Charsets.UTF_8); } - public class MockFlintSparkJob { - - private FlintIndexStateModel stateModel; - - public MockFlintSparkJob(String latestId) { - assertNotNull(latestId); - stateModel = - new FlintIndexStateModel( - FlintIndexState.EMPTY, - "mockAppId", - "mockJobId", - latestId, - DATASOURCE, - System.currentTimeMillis(), - "", - SequenceNumbers.UNASSIGNED_SEQ_NO, - SequenceNumbers.UNASSIGNED_PRIMARY_TERM); - stateModel = StateStore.createFlintIndexState(stateStore, DATASOURCE).apply(stateModel); - } - - public void refreshing() { - stateModel = - StateStore.updateFlintIndexState(stateStore, DATASOURCE) - .apply(stateModel, FlintIndexState.REFRESHING); - } - - public void cancelling() { - stateModel = - StateStore.updateFlintIndexState(stateStore, DATASOURCE) - .apply(stateModel, FlintIndexState.CANCELLING); - } - - public void active() { - stateModel = - StateStore.updateFlintIndexState(stateStore, DATASOURCE) - .apply(stateModel, FlintIndexState.ACTIVE); - } - - public void deleting() { - stateModel = - StateStore.updateFlintIndexState(stateStore, DATASOURCE) - .apply(stateModel, FlintIndexState.DELETING); - } - - public void deleted() { - stateModel = - StateStore.updateFlintIndexState(stateStore, DATASOURCE) - .apply(stateModel, FlintIndexState.DELETED); - } - - void assertState(FlintIndexState expected) { - Optional stateModelOpt = - StateStore.getFlintIndexState(stateStore, DATASOURCE).apply(stateModel.getId()); - assertTrue((stateModelOpt.isPresent())); - assertEquals(expected, stateModelOpt.get().getIndexState()); - } - } - @RequiredArgsConstructor public class FlintDatasetMock { final String query; diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java index 4ec5d4d80b..3acbfc439c 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java @@ -26,6 +26,7 @@ import org.opensearch.sql.protocol.response.format.ResponseFormatter; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryResult; +import org.opensearch.sql.spark.asyncquery.model.MockFlintSparkJob; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.execution.statement.StatementModel; import org.opensearch.sql.spark.execution.statement.StatementState; @@ -52,7 +53,7 @@ public class AsyncQueryGetResultSpecTest extends AsyncQueryExecutorServiceSpec { @Before public void doSetUp() { - mockIndexState = new MockFlintSparkJob(mockIndex.latestId); + mockIndexState = new MockFlintSparkJob(stateStore, mockIndex.latestId, DATASOURCE); } @Test diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java index 9ba15c250e..4aeae3c133 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java @@ -9,11 +9,16 @@ import com.amazonaws.services.emrserverless.model.GetJobRunResult; import com.amazonaws.services.emrserverless.model.JobRun; import com.google.common.collect.ImmutableList; +import java.util.HashMap; +import java.util.Map; import org.junit.Assert; import org.junit.Test; +import org.junit.jupiter.api.Assertions; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; -import org.opensearch.sql.spark.client.EMRServerlessClient; +import org.opensearch.sql.spark.asyncquery.model.MockFlintIndex; +import org.opensearch.sql.spark.asyncquery.model.MockFlintSparkJob; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; +import org.opensearch.sql.spark.client.StartJobRequest; import org.opensearch.sql.spark.flint.FlintIndexState; import org.opensearch.sql.spark.flint.FlintIndexType; import org.opensearch.sql.spark.leasemanager.ConcurrencyLimitExceededException; @@ -92,13 +97,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -146,13 +145,7 @@ public CancelJobRunResult cancelJobRun(String applicationId, String jobId) { throw new IllegalArgumentException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -190,13 +183,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Running")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -212,7 +199,7 @@ public EMRServerlessClient getClient() { AsyncQueryExecutionResponse asyncQueryResults = asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); assertEquals("FAILED", asyncQueryResults.getStatus()); - assertEquals("cancel job timeout", asyncQueryResults.getError()); + assertEquals("Cancel job operation timed out.", asyncQueryResults.getError()); }); } @@ -233,20 +220,15 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); flintIndexJob.refreshing(); // 1.drop index @@ -294,20 +276,15 @@ public CancelJobRunResult cancelJobRun(String applicationId, String jobId) { throw new IllegalArgumentException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state in refresh state. - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); flintIndexJob.refreshing(); // 1.drop index @@ -343,20 +320,15 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Running")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); flintIndexJob.refreshing(); // 1. drop index @@ -368,9 +340,8 @@ public EMRServerlessClient getClient() { AsyncQueryExecutionResponse asyncQueryResults = asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); assertEquals("FAILED", asyncQueryResults.getStatus()); - assertEquals("cancel job timeout", asyncQueryResults.getError()); - - flintIndexJob.assertState(FlintIndexState.CANCELLING); + assertEquals("Cancel job operation timed out.", asyncQueryResults.getError()); + flintIndexJob.assertState(FlintIndexState.REFRESHING); }); } @@ -380,7 +351,7 @@ public EMRServerlessClient getClient() { *

(1) call EMR-S (2) change index state to: DELETED */ @Test - public void dropIndexWithIndexInCancellingState() { + public void dropIndexWithIndexInRefreshingState() { ImmutableList.of(SKIPPING, COVERING, MV) .forEach( mockDS -> { @@ -388,24 +359,20 @@ public void dropIndexWithIndexInCancellingState() { new LocalEMRSClient() { @Override public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); - flintIndexJob.cancelling(); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); + flintIndexJob.refreshing(); // 1. drop index CreateAsyncQueryResponse response = @@ -420,13 +387,16 @@ public EMRServerlessClient getClient() { .getStatus()); flintIndexJob.assertState(FlintIndexState.DELETED); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(1); + emrsClient.getJobRunResultCalled(1); }); } /** - * No Job running, expectation is + * Index state is stable, Drop Index operation is retryable, expectation is * - *

(1) not call EMR-S (2) change index state to: DELETED + *

(1) call EMR-S (2) change index state to: DELETED */ @Test public void dropIndexWithIndexInActiveState() { @@ -435,32 +405,21 @@ public void dropIndexWithIndexInActiveState() { mockDS -> { LocalEMRSClient emrsClient = new LocalEMRSClient() { - @Override - public CancelJobRunResult cancelJobRun(String applicationId, String jobId) { - Assert.fail("should not call cancelJobRun"); - return null; - } - @Override public GetJobRunResult getJobRunResult(String applicationId, String jobId) { - Assert.fail("should not call getJobRunResult"); - return null; - } - }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; + super.getJobRunResult(applicationId, jobId); + return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); flintIndexJob.active(); // 1. drop index @@ -469,50 +428,44 @@ public EMRServerlessClient getClient() { new CreateAsyncQueryRequest(mockDS.query, DATASOURCE, LangType.SQL, null)); // 2. fetch result - assertEquals( - "SUCCESS", - asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) - .getStatus()); - + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); flintIndexJob.assertState(FlintIndexState.DELETED); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(1); + emrsClient.getJobRunResultCalled(1); }); } + /** + * Index state is stable, expectation is + * + *

(1) call EMR-S (2) change index state to: DELETED + */ @Test - public void dropIndexWithIndexInDeletingState() { + public void dropIndexWithIndexInCreatingState() { ImmutableList.of(SKIPPING, COVERING, MV) .forEach( mockDS -> { LocalEMRSClient emrsClient = new LocalEMRSClient() { - @Override - public CancelJobRunResult cancelJobRun(String applicationId, String jobId) { - Assert.fail("should not call cancelJobRun"); - return null; - } - @Override public GetJobRunResult getJobRunResult(String applicationId, String jobId) { - Assert.fail("should not call getJobRunResult"); - return null; - } - }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; + super.getJobRunResult(applicationId, jobId); + return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); - flintIndexJob.deleted(); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); + flintIndexJob.creating(); // 1. drop index CreateAsyncQueryResponse response = @@ -530,40 +483,33 @@ public EMRServerlessClient getClient() { }); } + /** + * Index state is stable, Drop Index operation is retryable, expectation is + * + *

(1) call EMR-S (2) change index state to: DELETED + */ @Test - public void dropIndexWithIndexInDeletedState() { + public void dropIndexWithIndexInEmptyState() { ImmutableList.of(SKIPPING, COVERING, MV) .forEach( mockDS -> { LocalEMRSClient emrsClient = new LocalEMRSClient() { - @Override - public CancelJobRunResult cancelJobRun(String applicationId, String jobId) { - Assert.fail("should not call cancelJobRun"); - return null; - } - @Override public GetJobRunResult getJobRunResult(String applicationId, String jobId) { - Assert.fail("should not call getJobRunResult"); - return null; - } - }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; + super.getJobRunResult(applicationId, jobId); + return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); - flintIndexJob.deleting(); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); // 1. drop index CreateAsyncQueryResponse response = @@ -582,12 +528,12 @@ public EMRServerlessClient getClient() { } /** - * No Job running, expectation is + * Couldn't acquire lock as the index is in transitioning state. Will result in error. * *

(1) not call EMR-S (2) change index state to: DELETED */ @Test - public void dropIndexWithIndexInEmptyState() { + public void dropIndexWithIndexInDeletedState() { ImmutableList.of(SKIPPING, COVERING, MV) .forEach( mockDS -> { @@ -605,34 +551,30 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return null; } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); + flintIndexJob.deleting(); // 1. drop index CreateAsyncQueryResponse response = asyncQueryExecutorService.createAsyncQuery( new CreateAsyncQueryRequest(mockDS.query, DATASOURCE, LangType.SQL, null)); + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); // 2. fetch result + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( - "SUCCESS", - asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) - .getStatus()); - - flintIndexJob.assertState(FlintIndexState.DELETED); + "Transaction failed as flint index is not in a valid state.", + asyncQueryExecutionResponse.getError()); + flintIndexJob.assertState(FlintIndexState.DELETING); }); } @@ -660,13 +602,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return null; } }; - EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactory() { - @Override - public EMRServerlessClient getClient() { - return emrsClient; - } - }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -695,7 +631,8 @@ public void concurrentRefreshJobLimitNotApplied() { // Mock flint index COVERING.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(COVERING.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, COVERING.latestId, DATASOURCE); flintIndexJob.refreshing(); // query with auto refresh @@ -719,7 +656,8 @@ public void concurrentRefreshJobLimitAppliedToDDLWithAuthRefresh() { // Mock flint index COVERING.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(COVERING.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, COVERING.latestId, DATASOURCE); flintIndexJob.refreshing(); // query with auto_refresh = true. @@ -746,7 +684,8 @@ public void concurrentRefreshJobLimitAppliedToRefresh() { // Mock flint index COVERING.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(COVERING.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, COVERING.latestId, DATASOURCE); flintIndexJob.refreshing(); // query with auto_refresh = true. @@ -772,7 +711,8 @@ public void concurrentRefreshJobLimitNotAppliedToDDL() { // Mock flint index COVERING.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(COVERING.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, COVERING.latestId, DATASOURCE); flintIndexJob.refreshing(); CreateAsyncQueryResponse asyncQueryResponse = @@ -810,8 +750,6 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { asyncQueryExecutorService.createAsyncQuery( new CreateAsyncQueryRequest(query, DATASOURCE, LangType.SQL, null)); - System.out.println(query); - // 2. cancel query IllegalArgumentException exception = assertThrows( @@ -845,7 +783,8 @@ public GetJobRunResult getJobRunResult( // Mock flint index mockDS.createIndex(); // Mock index state - MockFlintSparkJob flintIndexJob = new MockFlintSparkJob(mockDS.latestId); + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.latestId, DATASOURCE); // 1. Submit REFRESH statement CreateAsyncQueryResponse response = @@ -865,4 +804,835 @@ public GetJobRunResult getJobRunResult( flintIndexJob.assertState(FlintIndexState.ACTIVE); }); } + + @Test + public void cancelRefreshStatementWithFailureInFetchingIndexMetadata() { + String indexName = "flint_my_glue_mydb_http_logs_covering_corrupted_index"; + MockFlintIndex mockFlintIndex = + new MockFlintIndex(client(), indexName, FlintIndexType.COVERING, null); + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService( + () -> + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); + } + }); + + mockFlintIndex.createIndex(); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, indexName + "_latest_id", DATASOURCE); + + // 1. Submit REFRESH statement + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "REFRESH INDEX covering_corrupted ON my_glue.mydb.http_logs", + DATASOURCE, + LangType.SQL, + null)); + // mock index state. + flintIndexJob.refreshing(); + + // 2. Cancel query + Assertions.assertThrows( + IllegalStateException.class, + () -> asyncQueryExecutorService.cancelQuery(response.getQueryId())); + } + + @Test + public void testAlterIndexQueryConvertingToManualRefresh() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false)"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false)"); + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=false) "); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(1); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("false", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryWithRedundantOperation() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false)"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false)"); + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=false) "); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public String startJobRun(StartJobRequest startJobRequest) { + return "jobId"; + } + + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + + @Override + public CancelJobRunResult cancelJobRun(String applicationId, String jobId) { + return new CancelJobRunResult() + .withJobRunId(jobId) + .withApplicationId(applicationId); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "false"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Converting a Flint index to manual refresh is not permitted if it is already set" + + " to manual.", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("false", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryConvertingToAutoRefresh() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=true," + + " incremental_refresh=false)"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (auto_refresh=true," + + " incremental_refresh=false)"); + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=true," + + " incremental_refresh=false) "); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient localEMRSClient = new LocalEMRSClient(); + EMRServerlessClientFactory clientFactory = () -> localEMRSClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(clientFactory); + + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "false"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + assertEquals( + "RUNNING", + asyncQueryExecutorService + .getAsyncQueryResults(response.getQueryId()) + .getStatus()); + + flintIndexJob.assertState(FlintIndexState.ACTIVE); + localEMRSClient.startJobRunCalled(1); + localEMRSClient.getJobRunResultCalled(1); + localEMRSClient.cancelJobRunCalled(0); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("false", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryWithOutAnyAutoRefresh() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (" + + " incremental_refresh=false)"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (" + + " incremental_refresh=false)"); + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (" + " incremental_refresh=false) "); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient localEMRSClient = new LocalEMRSClient(); + EMRServerlessClientFactory clientFactory = () -> localEMRSClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(clientFactory); + + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "false"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + assertEquals( + "RUNNING", + asyncQueryExecutorService + .getAsyncQueryResults(response.getQueryId()) + .getStatus()); + + flintIndexJob.assertState(FlintIndexState.ACTIVE); + localEMRSClient.startJobRunCalled(1); + localEMRSClient.getJobRunResultCalled(1); + localEMRSClient.cancelJobRunCalled(0); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("false", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryOfFullRefreshWithInvalidOptions() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false, checkpoint_location=\"s3://ckp/skp\")"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false, checkpoint_location=\"s3://ckp/skp\")"); + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=false, checkpoint_location=\"s3://ckp/skp\") "); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Altering to full refresh only allows: [auto_refresh, incremental_refresh]" + + " options", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("true", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryOfIncrementalRefreshWithInvalidOptions() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=true, output_mode=\"complete\")"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=true, output_mode=\"complete\")"); + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=true, output_mode=\"complete\") "); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Altering to incremental refresh only allows: [auto_refresh, incremental_refresh," + + " watermark_delay, checkpoint_location] options", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("true", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryOfIncrementalRefreshWithInsufficientOptions() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=true)"); + MockFlintIndex ALTER_COVERING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_covering_index", + FlintIndexType.COVERING, + "ALTER INDEX covering ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=true)"); + ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + existingOptions.put("incremental_refresh", "false"); + mockDS.updateIndexOptions(existingOptions, true); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Conversion to incremental refresh index cannot proceed due to missing" + + " attributes: checkpoint_location.", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("true", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryOfIncrementalRefreshWithInsufficientOptionsForMV() { + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=true) "); + ImmutableList.of(ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + existingOptions.put("incremental_refresh", "false"); + mockDS.updateIndexOptions(existingOptions, true); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Conversion to incremental refresh index cannot proceed due to missing" + + " attributes: checkpoint_location, watermark_delay.", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("true", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryOfIncrementalRefreshWithEmptyExistingOptionsForMV() { + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=true) "); + ImmutableList.of(ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + existingOptions.put("incremental_refresh", "false"); + existingOptions.put("watermark_delay", ""); + existingOptions.put("checkpoint_location", ""); + mockDS.updateIndexOptions(existingOptions, true); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.active(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Conversion to incremental refresh index cannot proceed due to missing" + + " attributes: checkpoint_location, watermark_delay.", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("true", options.get("auto_refresh")); + }); + } + + @Test + public void testAlterIndexQueryOfIncrementalRefresh() { + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false," + + " incremental_refresh=true) "); + ImmutableList.of(ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + existingOptions.put("incremental_refresh", "false"); + existingOptions.put("watermark_delay", "watermark_delay"); + existingOptions.put("checkpoint_location", "s3://checkpoint/location"); + mockDS.updateIndexOptions(existingOptions, true); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.refreshing(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); + emrsClient.startJobRunCalled(0); + emrsClient.getJobRunResultCalled(1); + emrsClient.cancelJobRunCalled(1); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("false", options.get("auto_refresh")); + Assertions.assertEquals("true", options.get("incremental_refresh")); + }); + } + + @Test + public void testAlterIndexQueryWithIncrementalRefreshAlreadyExisting() { + MockFlintIndex ALTER_MV = + new MockFlintIndex( + client, + "flint_my_glue_mydb_mv", + FlintIndexType.MATERIALIZED_VIEW, + "ALTER MATERIALIZED VIEW my_glue.mydb.mv WITH (auto_refresh=false) "); + ImmutableList.of(ALTER_MV) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + existingOptions.put("incremental_refresh", "true"); + existingOptions.put("watermark_delay", "watermark_delay"); + existingOptions.put("checkpoint_location", "s3://checkpoint/location"); + mockDS.updateIndexOptions(existingOptions, true); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.refreshing(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); + emrsClient.startJobRunCalled(0); + emrsClient.getJobRunResultCalled(1); + emrsClient.cancelJobRunCalled(1); + flintIndexJob.assertState(FlintIndexState.ACTIVE); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("false", options.get("auto_refresh")); + Assertions.assertEquals("true", options.get("incremental_refresh")); + }); + } + + @Test + public void testAlterIndexQueryWithInvalidInitialState() { + MockFlintIndex ALTER_SKIPPING = + new MockFlintIndex( + client, + "flint_my_glue_mydb_http_logs_skipping_index", + FlintIndexType.SKIPPING, + "ALTER SKIPPING INDEX ON my_glue.mydb.http_logs WITH (auto_refresh=false," + + " incremental_refresh=false)"); + ImmutableList.of(ALTER_SKIPPING) + .forEach( + mockDS -> { + LocalEMRSClient emrsClient = + new LocalEMRSClient() { + @Override + public GetJobRunResult getJobRunResult(String applicationId, String jobId) { + super.getJobRunResult(applicationId, jobId); + JobRun jobRun = new JobRun(); + jobRun.setState("cancelled"); + return new GetJobRunResult().withJobRun(jobRun); + } + }; + EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + AsyncQueryExecutorService asyncQueryExecutorService = + createAsyncQueryExecutorService(emrServerlessClientFactory); + // Mock flint index + mockDS.createIndex(); + HashMap existingOptions = new HashMap<>(); + existingOptions.put("auto_refresh", "true"); + mockDS.updateIndexOptions(existingOptions, false); + // Mock index state + MockFlintSparkJob flintIndexJob = + new MockFlintSparkJob(stateStore, mockDS.getLatestId(), DATASOURCE); + flintIndexJob.updating(); + + // 1. alter index + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + mockDS.getQuery(), DATASOURCE, LangType.SQL, null)); + + // 2. fetch result + AsyncQueryExecutionResponse asyncQueryExecutionResponse = + asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); + assertEquals( + "Transaction failed as flint index is not in a valid state.", + asyncQueryExecutionResponse.getError()); + emrsClient.startJobRunCalled(0); + emrsClient.cancelJobRunCalled(0); + flintIndexJob.assertState(FlintIndexState.UPDATING); + Map mappings = mockDS.getIndexMappings(); + Map meta = (HashMap) mappings.get("_meta"); + Map options = (Map) meta.get("options"); + Assertions.assertEquals("true", options.get("auto_refresh")); + }); + } } diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java new file mode 100644 index 0000000000..554de586b4 --- /dev/null +++ b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java @@ -0,0 +1,72 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.asyncquery.model; + +import java.util.HashMap; +import java.util.Map; +import lombok.Getter; +import lombok.SneakyThrows; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; +import org.opensearch.client.Client; +import org.opensearch.sql.spark.flint.FlintIndexType; +import org.opensearch.sql.spark.utils.TestUtils; + +@Getter +public class MockFlintIndex { + private final String indexName; + private final Client client; + private final FlintIndexType flintIndexType; + private final String query; + + public MockFlintIndex( + Client client, String indexName, FlintIndexType flintIndexType, String query) { + this.client = client; + this.indexName = indexName; + this.flintIndexType = flintIndexType; + this.query = query; + } + + public void createIndex() { + String mappingFile = String.format("flint-index-mappings/%s_mapping.json", indexName); + TestUtils.createIndexWithMappings(client, indexName, mappingFile); + } + + public String getLatestId() { + return this.indexName + "_latest_id"; + } + + @SneakyThrows + public void deleteIndex() { + client.admin().indices().delete(new DeleteIndexRequest().indices(indexName)).get(); + } + + public Map getIndexMappings() { + return client + .admin() + .indices() + .prepareGetMappings(indexName) + .get() + .getMappings() + .get(indexName) + .getSourceAsMap(); + } + + public void updateIndexOptions(HashMap newOptions, Boolean replaceCompletely) { + GetMappingsResponse mappingsResponse = + client.admin().indices().prepareGetMappings().setIndices(indexName).get(); + Map flintMetadataMap = + mappingsResponse.getMappings().get(indexName).getSourceAsMap(); + Map meta = (Map) flintMetadataMap.get("_meta"); + Map options = (Map) meta.get("options"); + if (replaceCompletely) { + meta.put("options", newOptions); + } else { + options.putAll(newOptions); + } + client.admin().indices().preparePutMapping(indexName).setSource(flintMetadataMap).get(); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java new file mode 100644 index 0000000000..0840ce975c --- /dev/null +++ b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.asyncquery.model; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Optional; +import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.sql.spark.flint.FlintIndexState; +import org.opensearch.sql.spark.flint.FlintIndexStateModel; + +public class MockFlintSparkJob { + private FlintIndexStateModel stateModel; + private StateStore stateStore; + private String datasource; + + public MockFlintSparkJob(StateStore stateStore, String latestId, String datasource) { + assertNotNull(latestId); + this.stateStore = stateStore; + this.datasource = datasource; + stateModel = + new FlintIndexStateModel( + FlintIndexState.EMPTY, + "mockAppId", + "mockJobId", + latestId, + datasource, + System.currentTimeMillis(), + "", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM); + stateModel = StateStore.createFlintIndexState(stateStore, datasource).apply(stateModel); + } + + public void refreshing() { + stateModel = + StateStore.updateFlintIndexState(stateStore, datasource) + .apply(stateModel, FlintIndexState.REFRESHING); + } + + public void active() { + stateModel = + StateStore.updateFlintIndexState(stateStore, datasource) + .apply(stateModel, FlintIndexState.ACTIVE); + } + + public void creating() { + stateModel = + StateStore.updateFlintIndexState(stateStore, datasource) + .apply(stateModel, FlintIndexState.CREATING); + } + + public void updating() { + stateModel = + StateStore.updateFlintIndexState(stateStore, datasource) + .apply(stateModel, FlintIndexState.UPDATING); + } + + public void deleting() { + stateModel = + StateStore.updateFlintIndexState(stateStore, datasource) + .apply(stateModel, FlintIndexState.DELETING); + } + + public void deleted() { + stateModel = + StateStore.updateFlintIndexState(stateStore, datasource) + .apply(stateModel, FlintIndexState.DELETED); + } + + public void assertState(FlintIndexState expected) { + Optional stateModelOpt = + StateStore.getFlintIndexState(stateStore, datasource).apply(stateModel.getId()); + assertTrue((stateModelOpt.isPresent())); + assertEquals(expected, stateModelOpt.get().getIndexState()); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java index ec82488749..ac03e817dd 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java @@ -6,19 +6,133 @@ package org.opensearch.sql.spark.dispatcher; import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.datasource.model.DataSourceStatus.ACTIVE; +import static org.opensearch.sql.spark.constants.TestConstants.EMRS_APPLICATION_ID; +import static org.opensearch.sql.spark.constants.TestConstants.EMRS_EXECUTION_ROLE; +import static org.opensearch.sql.spark.constants.TestConstants.TEST_CLUSTER_NAME; import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD; import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD; +import java.util.HashMap; import org.json.JSONObject; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.client.EMRServerlessClient; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse; +import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; +import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; +import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.sql.spark.flint.FlintIndexMetadata; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; +import org.opensearch.sql.spark.flint.FlintIndexType; +import org.opensearch.sql.spark.response.JobExecutionResponseReader; +import org.opensearch.sql.spark.rest.model.LangType; +@ExtendWith(MockitoExtension.class) class IndexDMLHandlerTest { + + @Mock private EMRServerlessClient emrServerlessClient; + @Mock private JobExecutionResponseReader jobExecutionResponseReader; + @Mock private FlintIndexMetadataService flintIndexMetadataService; + @Mock private StateStore stateStore; + @Test public void getResponseFromExecutor() { - JSONObject result = - new IndexDMLHandler(null, null, null, null, null).getResponseFromExecutor(null); + JSONObject result = new IndexDMLHandler(null, null, null, null).getResponseFromExecutor(null); assertEquals("running", result.getString(STATUS_FIELD)); assertEquals("", result.getString(ERROR_FIELD)); } + + @Test + public void testWhenIndexDetailsAreNotFound() { + IndexDMLHandler indexDMLHandler = + new IndexDMLHandler( + emrServerlessClient, jobExecutionResponseReader, flintIndexMetadataService, stateStore); + DispatchQueryRequest dispatchQueryRequest = + new DispatchQueryRequest( + EMRS_APPLICATION_ID, + "DROP INDEX", + "my_glue", + LangType.SQL, + EMRS_EXECUTION_ROLE, + TEST_CLUSTER_NAME); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("mys3") + .setDescription("test description") + .setConnector(DataSourceType.S3GLUE) + .setDataSourceStatus(ACTIVE) + .build(); + IndexQueryDetails indexQueryDetails = + IndexQueryDetails.builder() + .mvName("mys3.default.http_logs_metrics") + .indexType(FlintIndexType.MATERIALIZED_VIEW) + .build(); + DispatchQueryContext dispatchQueryContext = + DispatchQueryContext.builder() + .dataSourceMetadata(metadata) + .indexQueryDetails(indexQueryDetails) + .build(); + Mockito.when(flintIndexMetadataService.getFlintIndexMetadata(any())) + .thenReturn(new HashMap<>()); + DispatchQueryResponse dispatchQueryResponse = + indexDMLHandler.submit(dispatchQueryRequest, dispatchQueryContext); + Assertions.assertNotNull(dispatchQueryResponse.getQueryId()); + } + + @Test + public void testWhenIndexDetailsWithInvalidQueryActionType() { + FlintIndexMetadata flintIndexMetadata = mock(FlintIndexMetadata.class); + IndexDMLHandler indexDMLHandler = + new IndexDMLHandler( + emrServerlessClient, jobExecutionResponseReader, flintIndexMetadataService, stateStore); + DispatchQueryRequest dispatchQueryRequest = + new DispatchQueryRequest( + EMRS_APPLICATION_ID, + "CREATE INDEX", + "my_glue", + LangType.SQL, + EMRS_EXECUTION_ROLE, + TEST_CLUSTER_NAME); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("mys3") + .setDescription("test description") + .setConnector(DataSourceType.S3GLUE) + .setDataSourceStatus(ACTIVE) + .build(); + IndexQueryDetails indexQueryDetails = + IndexQueryDetails.builder() + .mvName("mys3.default.http_logs_metrics") + .indexQueryActionType(IndexQueryActionType.CREATE) + .indexType(FlintIndexType.MATERIALIZED_VIEW) + .build(); + DispatchQueryContext dispatchQueryContext = + DispatchQueryContext.builder() + .dataSourceMetadata(metadata) + .indexQueryDetails(indexQueryDetails) + .build(); + HashMap flintMetadataMap = new HashMap<>(); + flintMetadataMap.put(indexQueryDetails.openSearchIndexName(), flintIndexMetadata); + when(flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName())) + .thenReturn(flintMetadataMap); + indexDMLHandler.submit(dispatchQueryRequest, dispatchQueryContext); + } + + @Test + public void testStaticMethods() { + Assertions.assertTrue(IndexDMLHandler.isIndexDMLQuery("dropIndexJobId")); + } } diff --git a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java index bca1bb229a..d1d5033ee0 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java @@ -74,7 +74,7 @@ import org.opensearch.sql.spark.execution.statement.StatementId; import org.opensearch.sql.spark.execution.statement.StatementState; import org.opensearch.sql.spark.execution.statestore.StateStore; -import org.opensearch.sql.spark.flint.FlintIndexMetadataReader; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; import org.opensearch.sql.spark.leasemanager.LeaseManager; import org.opensearch.sql.spark.response.JobExecutionResponseReader; import org.opensearch.sql.spark.rest.model.LangType; @@ -87,7 +87,7 @@ public class SparkQueryDispatcherTest { @Mock private DataSourceService dataSourceService; @Mock private JobExecutionResponseReader jobExecutionResponseReader; @Mock private DataSourceUserAuthorizationHelperImpl dataSourceUserAuthorizationHelper; - @Mock private FlintIndexMetadataReader flintIndexMetadataReader; + @Mock private FlintIndexMetadataService flintIndexMetadataService; @Mock(answer = RETURNS_DEEP_STUBS) private Client openSearchClient; @@ -118,7 +118,7 @@ void setUp() { dataSourceService, dataSourceUserAuthorizationHelper, jobExecutionResponseReader, - flintIndexMetadataReader, + flintIndexMetadataService, openSearchClient, sessionManager, leaseManager, @@ -168,7 +168,7 @@ void testDispatchSelectQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -213,7 +213,7 @@ void testDispatchSelectQueryWithBasicAuthIndexStoreDatasource() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -257,7 +257,7 @@ void testDispatchSelectQueryWithNoAuthIndexStoreDatasource() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -371,7 +371,7 @@ void testDispatchIndexQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -415,7 +415,7 @@ void testDispatchWithPPLQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -460,7 +460,7 @@ void testDispatchQueryWithoutATableAndDataSourceName() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -509,7 +509,7 @@ void testDispatchIndexQueryWithoutADatasourceName() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -558,7 +558,7 @@ void testDispatchMaterializedViewQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -603,7 +603,7 @@ void testDispatchShowMVQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -648,7 +648,7 @@ void testRefreshIndexQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test @@ -693,7 +693,7 @@ void testDispatchDescribeIndexQuery() { verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataReader); + verifyNoInteractions(flintIndexMetadataService); } @Test diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImplTest.java b/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImplTest.java deleted file mode 100644 index 4d809c31dc..0000000000 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataReaderImplTest.java +++ /dev/null @@ -1,117 +0,0 @@ -package org.opensearch.sql.spark.flint; - -import static org.mockito.Answers.RETURNS_DEEP_STUBS; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.google.common.base.Charsets; -import com.google.common.io.Resources; -import java.io.IOException; -import java.net.URL; -import java.util.Map; -import lombok.SneakyThrows; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; -import org.opensearch.client.Client; -import org.opensearch.cluster.metadata.IndexMetadata; -import org.opensearch.cluster.metadata.MappingMetadata; -import org.opensearch.common.xcontent.XContentType; -import org.opensearch.core.xcontent.DeprecationHandler; -import org.opensearch.core.xcontent.NamedXContentRegistry; -import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; -import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; -import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; - -@ExtendWith(MockitoExtension.class) -public class FlintIndexMetadataReaderImplTest { - @Mock(answer = RETURNS_DEEP_STUBS) - private Client client; - - @SneakyThrows - @Test - void testGetJobIdFromFlintSkippingIndexMetadata() { - URL url = - Resources.getResource( - "flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json"); - String mappings = Resources.toString(url, Charsets.UTF_8); - String indexName = "flint_mys3_default_http_logs_skipping_index"; - mockNodeClientIndicesMappings(indexName, mappings); - FlintIndexMetadataReader flintIndexMetadataReader = new FlintIndexMetadataReaderImpl(client); - FlintIndexMetadata indexMetadata = - flintIndexMetadataReader.getFlintIndexMetadata( - IndexQueryDetails.builder() - .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) - .autoRefresh(false) - .indexQueryActionType(IndexQueryActionType.DROP) - .indexType(FlintIndexType.SKIPPING) - .build()); - Assertions.assertEquals("00fdmvv9hp8u0o0q", indexMetadata.getJobId()); - } - - @SneakyThrows - @Test - void testGetJobIdFromFlintCoveringIndexMetadata() { - URL url = - Resources.getResource("flint-index-mappings/flint_mys3_default_http_logs_cv1_index.json"); - String mappings = Resources.toString(url, Charsets.UTF_8); - String indexName = "flint_mys3_default_http_logs_cv1_index"; - mockNodeClientIndicesMappings(indexName, mappings); - FlintIndexMetadataReader flintIndexMetadataReader = new FlintIndexMetadataReaderImpl(client); - FlintIndexMetadata indexMetadata = - flintIndexMetadataReader.getFlintIndexMetadata( - IndexQueryDetails.builder() - .indexName("cv1") - .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) - .autoRefresh(false) - .indexQueryActionType(IndexQueryActionType.DROP) - .indexType(FlintIndexType.COVERING) - .build()); - Assertions.assertEquals("00fdmvv9hp8u0o0q", indexMetadata.getJobId()); - } - - @SneakyThrows - @Test - void testGetJobIDWithNPEException() { - URL url = Resources.getResource("flint-index-mappings/npe_mapping.json"); - String mappings = Resources.toString(url, Charsets.UTF_8); - String indexName = "flint_mys3_default_http_logs_cv1_index"; - mockNodeClientIndicesMappings(indexName, mappings); - FlintIndexMetadataReader flintIndexMetadataReader = new FlintIndexMetadataReaderImpl(client); - IllegalArgumentException illegalArgumentException = - Assertions.assertThrows( - IllegalArgumentException.class, - () -> - flintIndexMetadataReader.getFlintIndexMetadata( - IndexQueryDetails.builder() - .indexName("cv1") - .fullyQualifiedTableName( - new FullyQualifiedTableName("mys3.default.http_logs")) - .autoRefresh(false) - .indexQueryActionType(IndexQueryActionType.DROP) - .indexType(FlintIndexType.COVERING) - .build())); - Assertions.assertEquals("Provided Index doesn't exist", illegalArgumentException.getMessage()); - } - - @SneakyThrows - public void mockNodeClientIndicesMappings(String indexName, String mappings) { - GetMappingsResponse mockResponse = mock(GetMappingsResponse.class); - when(client.admin().indices().prepareGetMappings(any()).get()).thenReturn(mockResponse); - Map metadata; - metadata = Map.of(indexName, IndexMetadata.fromXContent(createParser(mappings)).mapping()); - when(mockResponse.mappings()).thenReturn(metadata); - } - - private XContentParser createParser(String mappings) throws IOException { - return XContentType.JSON - .xContent() - .createParser( - NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, mappings); - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java b/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java new file mode 100644 index 0000000000..f6baa82dd2 --- /dev/null +++ b/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java @@ -0,0 +1,190 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import static org.mockito.Answers.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.common.base.Charsets; +import com.google.common.io.Resources; +import java.io.IOException; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.MappingMetadata; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; +import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; +import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; +import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; + +@ExtendWith(MockitoExtension.class) +public class FlintIndexMetadataServiceImplTest { + @Mock(answer = RETURNS_DEEP_STUBS) + private Client client; + + @SneakyThrows + @Test + void testGetJobIdFromFlintSkippingIndexMetadata() { + URL url = + Resources.getResource( + "flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json"); + String mappings = Resources.toString(url, Charsets.UTF_8); + String indexName = "flint_mys3_default_http_logs_skipping_index"; + mockNodeClientIndicesMappings(indexName, mappings); + FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + IndexQueryDetails indexQueryDetails = + IndexQueryDetails.builder() + .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) + .indexOptions(new FlintIndexOptions()) + .indexQueryActionType(IndexQueryActionType.DROP) + .indexType(FlintIndexType.SKIPPING) + .build(); + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + Assertions.assertEquals( + "00fhelvq7peuao0", + indexMetadataMap.get(indexQueryDetails.openSearchIndexName()).getJobId()); + } + + @SneakyThrows + @Test + void testGetJobIdFromFlintSkippingIndexMetadataWithIndexState() { + URL url = + Resources.getResource( + "flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json"); + String mappings = Resources.toString(url, Charsets.UTF_8); + String indexName = "flint_mys3_default_http_logs_skipping_index"; + mockNodeClientIndicesMappings(indexName, mappings); + FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + IndexQueryDetails indexQueryDetails = + IndexQueryDetails.builder() + .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) + .indexOptions(new FlintIndexOptions()) + .indexQueryActionType(IndexQueryActionType.DROP) + .indexType(FlintIndexType.SKIPPING) + .build(); + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + FlintIndexMetadata metadata = indexMetadataMap.get(indexQueryDetails.openSearchIndexName()); + Assertions.assertEquals("00fhelvq7peuao0", metadata.getJobId()); + } + + @SneakyThrows + @Test + void testGetJobIdFromFlintCoveringIndexMetadata() { + URL url = + Resources.getResource("flint-index-mappings/flint_mys3_default_http_logs_cv1_index.json"); + String mappings = Resources.toString(url, Charsets.UTF_8); + String indexName = "flint_mys3_default_http_logs_cv1_index"; + mockNodeClientIndicesMappings(indexName, mappings); + IndexQueryDetails indexQueryDetails = + IndexQueryDetails.builder() + .indexName("cv1") + .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) + .indexOptions(new FlintIndexOptions()) + .indexQueryActionType(IndexQueryActionType.DROP) + .indexType(FlintIndexType.COVERING) + .build(); + FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + Assertions.assertEquals( + "00fdmvv9hp8u0o0q", + indexMetadataMap.get(indexQueryDetails.openSearchIndexName()).getJobId()); + } + + @SneakyThrows + @Test + void testGetJobIDWithNPEException() { + URL url = Resources.getResource("flint-index-mappings/npe_mapping.json"); + String mappings = Resources.toString(url, Charsets.UTF_8); + String indexName = "flint_mys3_default_http_logs_cv1_index"; + mockNodeClientIndicesMappings(indexName, mappings); + FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + IndexQueryDetails indexQueryDetails = + IndexQueryDetails.builder() + .indexName("cv1") + .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) + .indexOptions(new FlintIndexOptions()) + .indexQueryActionType(IndexQueryActionType.DROP) + .indexType(FlintIndexType.COVERING) + .build(); + Map flintIndexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + Assertions.assertFalse( + flintIndexMetadataMap.containsKey("flint_mys3_default_http_logs_cv1_index")); + } + + @SneakyThrows + @Test + void testGetJobIDWithNPEExceptionForMultipleIndices() { + HashMap indexMappingsMap = new HashMap<>(); + URL url = Resources.getResource("flint-index-mappings/npe_mapping.json"); + String mappings = Resources.toString(url, Charsets.UTF_8); + String indexName = "flint_mys3_default_http_logs_cv1_index"; + indexMappingsMap.put(indexName, mappings); + url = + Resources.getResource( + "flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json"); + mappings = Resources.toString(url, Charsets.UTF_8); + indexName = "flint_mys3_default_http_logs_skipping_index"; + indexMappingsMap.put(indexName, mappings); + mockNodeClientIndicesMappings("flint_mys3*", indexMappingsMap); + FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + Map flintIndexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata("flint_mys3*"); + Assertions.assertFalse( + flintIndexMetadataMap.containsKey("flint_mys3_default_http_logs_cv1_index")); + Assertions.assertTrue( + flintIndexMetadataMap.containsKey("flint_mys3_default_http_logs_skipping_index")); + } + + @SneakyThrows + public void mockNodeClientIndicesMappings(String indexName, String mappings) { + GetMappingsResponse mockResponse = mock(GetMappingsResponse.class); + when(client.admin().indices().prepareGetMappings().setIndices(indexName).get()) + .thenReturn(mockResponse); + Map metadata; + metadata = Map.of(indexName, IndexMetadata.fromXContent(createParser(mappings)).mapping()); + when(mockResponse.getMappings()).thenReturn(metadata); + } + + @SneakyThrows + public void mockNodeClientIndicesMappings( + String indexPattern, HashMap indexMappingsMap) { + GetMappingsResponse mockResponse = mock(GetMappingsResponse.class); + when(client.admin().indices().prepareGetMappings().setIndices(indexPattern).get()) + .thenReturn(mockResponse); + Map metadataMap = new HashMap<>(); + for (String indexName : indexMappingsMap.keySet()) { + metadataMap.put( + indexName, + IndexMetadata.fromXContent(createParser(indexMappingsMap.get(indexName))).mapping()); + } + when(mockResponse.getMappings()).thenReturn(metadataMap); + } + + private XContentParser createParser(String mappings) throws IOException { + return XContentType.JSON + .xContent() + .createParser( + NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, mappings); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceSpecTest.java b/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceSpecTest.java new file mode 100644 index 0000000000..70640bb4c1 --- /dev/null +++ b/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceSpecTest.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import java.util.Map; +import org.junit.Before; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.opensearch.client.node.NodeClient; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.sql.spark.asyncquery.model.MockFlintIndex; +import org.opensearch.sql.spark.asyncquery.model.MockFlintSparkJob; +import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.test.OpenSearchIntegTestCase; + +public class FlintIndexMetadataServiceSpecTest extends OpenSearchIntegTestCase { + + protected ClusterService clusterService; + protected NodeClient client; + protected StateStore stateStore; + private MockFlintIndex skippingIndex; + private MockFlintSparkJob skippingIndexState; + private MockFlintIndex coveringIndex; + private MockFlintSparkJob coveringIndexState; + private MockFlintIndex mvIndex; + private MockFlintSparkJob mvIndexState; + + @Before + public void setup() { + clusterService = clusterService(); + client = (NodeClient) cluster().client(); + stateStore = new StateStore(client, clusterService); + + skippingIndex = + new MockFlintIndex( + client, "flint_my_glue_mydb_http_logs_skipping_index", FlintIndexType.SKIPPING, ""); + skippingIndexState = + new MockFlintSparkJob( + stateStore, "flint_my_glue_mydb_http_logs_skipping_index_latest_id", "my_glue"); + coveringIndex = + new MockFlintIndex( + client, "flint_my_glue_mydb_http_logs_covering_index", FlintIndexType.COVERING, ""); + coveringIndexState = + new MockFlintSparkJob( + stateStore, "flint_my_glue_mydb_http_logs_covering_index_latest_id", "my_glue"); + mvIndex = + new MockFlintIndex(client, "flint_my_glue_mydb_mv", FlintIndexType.MATERIALIZED_VIEW, ""); + mvIndexState = new MockFlintSparkJob(stateStore, "flint_my_glue_mydb_mv_latest_id", "my_glue"); + } + + @Test + public void testGetFlintIndexMetadataWithIndexState() { + + skippingIndex.createIndex(); + skippingIndexState.refreshing(); + coveringIndex.createIndex(); + coveringIndexState.refreshing(); + mvIndex.createIndex(); + mvIndexState.active(); + try { + FlintIndexMetadataService flintIndexMetadataService = + new FlintIndexMetadataServiceImpl(client); + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata("flint_my_glue*"); + Assertions.assertEquals(3, indexMetadataMap.size()); + + Assertions.assertTrue( + indexMetadataMap.containsKey("flint_my_glue_mydb_http_logs_skipping_index")); + Assertions.assertTrue( + indexMetadataMap.containsKey("flint_my_glue_mydb_http_logs_covering_index")); + Assertions.assertTrue(indexMetadataMap.containsKey("flint_my_glue_mydb_mv")); + } finally { + skippingIndex.deleteIndex(); + skippingIndexState.deleted(); + coveringIndex.deleteIndex(); + coveringIndexState.deleted(); + mvIndex.deleteIndex(); + mvIndexState.deleted(); + } + } + + @Test + public void testGetFlintIndexMetadataWithOutIndexState() { + skippingIndex.createIndex(); + skippingIndexState.refreshing(); + coveringIndex.createIndex(); + coveringIndexState.refreshing(); + mvIndex.createIndex(); + mvIndexState.active(); + try { + FlintIndexMetadataService flintIndexMetadataService = + new FlintIndexMetadataServiceImpl(client); + Map indexMetadataMap = + flintIndexMetadataService.getFlintIndexMetadata("flint_my_glue*"); + Assertions.assertEquals(3, indexMetadataMap.size()); + + Assertions.assertTrue( + indexMetadataMap.containsKey("flint_my_glue_mydb_http_logs_skipping_index")); + + Assertions.assertTrue( + indexMetadataMap.containsKey("flint_my_glue_mydb_http_logs_covering_index")); + Assertions.assertTrue(indexMetadataMap.containsKey("flint_my_glue_mydb_mv")); + } finally { + skippingIndex.deleteIndex(); + skippingIndexState.deleted(); + coveringIndex.deleteIndex(); + coveringIndexState.deleted(); + mvIndex.deleteIndex(); + mvIndexState.deleted(); + } + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataTest.java b/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataTest.java deleted file mode 100644 index 808b80766e..0000000000 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.flint; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.opensearch.sql.spark.constants.TestConstants.EMR_JOB_ID; -import static org.opensearch.sql.spark.flint.FlintIndexMetadata.AUTO_REFRESH; -import static org.opensearch.sql.spark.flint.FlintIndexMetadata.ENV_KEY; -import static org.opensearch.sql.spark.flint.FlintIndexMetadata.OPTIONS_KEY; -import static org.opensearch.sql.spark.flint.FlintIndexMetadata.PROPERTIES_KEY; -import static org.opensearch.sql.spark.flint.FlintIndexMetadata.SERVERLESS_EMR_JOB_ID; - -import java.util.HashMap; -import java.util.Map; -import org.junit.jupiter.api.Test; - -public class FlintIndexMetadataTest { - - @Test - public void testAutoRefreshSetToTrue() { - FlintIndexMetadata indexMetadata = - FlintIndexMetadata.fromMetatdata( - new Metadata() - .addEnv(SERVERLESS_EMR_JOB_ID, EMR_JOB_ID) - .addOptions(AUTO_REFRESH, "true") - .metadata()); - assertTrue(indexMetadata.isAutoRefresh()); - } - - @Test - public void testAutoRefreshSetToFalse() { - FlintIndexMetadata indexMetadata = - FlintIndexMetadata.fromMetatdata( - new Metadata() - .addEnv(SERVERLESS_EMR_JOB_ID, EMR_JOB_ID) - .addOptions(AUTO_REFRESH, "false") - .metadata()); - assertFalse(indexMetadata.isAutoRefresh()); - } - - @Test - public void testWithOutAutoRefresh() { - FlintIndexMetadata indexMetadata = - FlintIndexMetadata.fromMetatdata( - new Metadata() - .addEnv(SERVERLESS_EMR_JOB_ID, EMR_JOB_ID) - .addOptions(AUTO_REFRESH, "false") - .metadata()); - assertFalse(indexMetadata.isAutoRefresh()); - } - - static class Metadata { - private final Map properties; - private final Map env; - private final Map options; - - private Metadata() { - properties = new HashMap<>(); - env = new HashMap<>(); - options = new HashMap<>(); - } - - public Metadata addEnv(String key, String value) { - env.put(key, value); - return this; - } - - public Metadata addOptions(String key, String value) { - options.put(key, value); - return this; - } - - public Map metadata() { - Map result = new HashMap<>(); - properties.put(ENV_KEY, env); - result.put(OPTIONS_KEY, options); - result.put(PROPERTIES_KEY, properties); - return result; - } - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java b/spark/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java index 6299dee0ca..cddc790d5e 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.Test; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; @@ -20,7 +21,7 @@ public void skippingIndexName() { IndexQueryDetails.builder() .indexName("invalid") .fullyQualifiedTableName(new FullyQualifiedTableName("mys3.default.http_logs")) - .autoRefresh(false) + .indexOptions(new FlintIndexOptions()) .indexQueryActionType(IndexQueryActionType.DROP) .indexType(FlintIndexType.SKIPPING) .build() diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java b/spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java index 5b3c1d74db..5755d03baa 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java @@ -1,14 +1,10 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - package org.opensearch.sql.spark.flint.operation; -import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import static org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceSpec.DATASOURCE; +import static org.opensearch.sql.spark.execution.statestore.StateStore.DATASOURCE_TO_REQUEST_INDEX; import java.util.Optional; import org.junit.jupiter.api.Assertions; @@ -16,46 +12,121 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.flint.FlintIndexMetadata; import org.opensearch.sql.spark.flint.FlintIndexState; import org.opensearch.sql.spark.flint.FlintIndexStateModel; @ExtendWith(MockitoExtension.class) -class FlintIndexOpTest { - @Mock private StateStore stateStore; +public class FlintIndexOpTest { - @Mock private FlintIndexMetadata flintIndexMetadata; + @Mock private StateStore mockStateStore; - @Mock private FlintIndexStateModel model; + @Test + public void testApplyWithTransitioningStateFailure() { + FlintIndexMetadata metadata = mock(FlintIndexMetadata.class); + when(metadata.getLatestId()).thenReturn(Optional.of("latestId")); + FlintIndexStateModel fakeModel = + new FlintIndexStateModel( + FlintIndexState.ACTIVE, + metadata.getAppId(), + metadata.getJobId(), + "latestId", + "myS3", + System.currentTimeMillis(), + "", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM); + when(mockStateStore.get(eq("latestId"), any(), eq(DATASOURCE_TO_REQUEST_INDEX.apply("myS3")))) + .thenReturn(Optional.of(fakeModel)); + when(mockStateStore.updateState(any(), any(), any(), any())) + .thenThrow(new RuntimeException("Transitioning state failed")); + FlintIndexOp flintIndexOp = new TestFlintIndexOp(mockStateStore, "myS3"); + IllegalStateException illegalStateException = + Assertions.assertThrows(IllegalStateException.class, () -> flintIndexOp.apply(metadata)); + Assertions.assertEquals( + "Moving to transition state:DELETING failed.", illegalStateException.getMessage()); + } @Test - public void beginFailed() { - when(stateStore.updateState(any(), any(), any(), any())).thenThrow(RuntimeException.class); - when(stateStore.get(any(), any(), any())).thenReturn(Optional.of(model)); - when(model.getIndexState()).thenReturn(FlintIndexState.ACTIVE); - when(flintIndexMetadata.getLatestId()).thenReturn(Optional.of("latestId")); - - FlintIndexOpDelete indexOp = new FlintIndexOpDelete(stateStore, DATASOURCE); - IllegalStateException exception = - assertThrows(IllegalStateException.class, () -> indexOp.apply(flintIndexMetadata)); + public void testApplyWithCommitFailure() { + FlintIndexMetadata metadata = mock(FlintIndexMetadata.class); + when(metadata.getLatestId()).thenReturn(Optional.of("latestId")); + FlintIndexStateModel fakeModel = + new FlintIndexStateModel( + FlintIndexState.ACTIVE, + metadata.getAppId(), + metadata.getJobId(), + "latestId", + "myS3", + System.currentTimeMillis(), + "", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM); + when(mockStateStore.get(eq("latestId"), any(), eq(DATASOURCE_TO_REQUEST_INDEX.apply("myS3")))) + .thenReturn(Optional.of(fakeModel)); + when(mockStateStore.updateState(any(), any(), any(), any())) + .thenReturn(FlintIndexStateModel.copy(fakeModel, 1, 2)) + .thenThrow(new RuntimeException("Commit state failed")) + .thenReturn(FlintIndexStateModel.copy(fakeModel, 1, 3)); + FlintIndexOp flintIndexOp = new TestFlintIndexOp(mockStateStore, "myS3"); + IllegalStateException illegalStateException = + Assertions.assertThrows(IllegalStateException.class, () -> flintIndexOp.apply(metadata)); Assertions.assertEquals( - "begin failed. target transitioning state: [DELETING]", exception.getMessage()); + "commit failed. target stable state: [DELETED]", illegalStateException.getMessage()); } @Test - public void commitFailed() { - when(stateStore.updateState(any(), any(), any(), any())) - .thenReturn(model) - .thenThrow(RuntimeException.class); - when(stateStore.get(any(), any(), any())).thenReturn(Optional.of(model)); - when(model.getIndexState()).thenReturn(FlintIndexState.EMPTY); - when(flintIndexMetadata.getLatestId()).thenReturn(Optional.of("latestId")); - - FlintIndexOpDelete indexOp = new FlintIndexOpDelete(stateStore, DATASOURCE); - IllegalStateException exception = - assertThrows(IllegalStateException.class, () -> indexOp.apply(flintIndexMetadata)); + public void testApplyWithRollBackFailure() { + FlintIndexMetadata metadata = mock(FlintIndexMetadata.class); + when(metadata.getLatestId()).thenReturn(Optional.of("latestId")); + FlintIndexStateModel fakeModel = + new FlintIndexStateModel( + FlintIndexState.ACTIVE, + metadata.getAppId(), + metadata.getJobId(), + "latestId", + "myS3", + System.currentTimeMillis(), + "", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM); + when(mockStateStore.get(eq("latestId"), any(), eq(DATASOURCE_TO_REQUEST_INDEX.apply("myS3")))) + .thenReturn(Optional.of(fakeModel)); + when(mockStateStore.updateState(any(), any(), any(), any())) + .thenReturn(FlintIndexStateModel.copy(fakeModel, 1, 2)) + .thenThrow(new RuntimeException("Commit state failed")) + .thenThrow(new RuntimeException("Rollback failure")); + FlintIndexOp flintIndexOp = new TestFlintIndexOp(mockStateStore, "myS3"); + IllegalStateException illegalStateException = + Assertions.assertThrows(IllegalStateException.class, () -> flintIndexOp.apply(metadata)); Assertions.assertEquals( - "commit failed. target stable state: [DELETED]", exception.getMessage()); + "commit failed. target stable state: [DELETED]", illegalStateException.getMessage()); + } + + static class TestFlintIndexOp extends FlintIndexOp { + + public TestFlintIndexOp(StateStore stateStore, String datasourceName) { + super(stateStore, datasourceName); + } + + @Override + boolean validate(FlintIndexState state) { + return state == FlintIndexState.ACTIVE || state == FlintIndexState.EMPTY; + } + + @Override + FlintIndexState transitioningState() { + return FlintIndexState.DELETING; + } + + @Override + void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndex) {} + + @Override + FlintIndexState stableState() { + return FlintIndexState.DELETED; + } } } diff --git a/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java b/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java index f5226206ab..505acf0afb 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java @@ -259,50 +259,68 @@ void testRefreshIndex() { @Test void testAutoRefresh() { Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails(skippingIndex().getQuery()).isAutoRefresh()); + SQLQueryUtils.extractIndexDetails(skippingIndex().getQuery()) + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertFalse( SQLQueryUtils.extractIndexDetails( skippingIndex().withProperty("auto_refresh", "false").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertTrue( SQLQueryUtils.extractIndexDetails( skippingIndex().withProperty("auto_refresh", "true").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertTrue( SQLQueryUtils.extractIndexDetails( skippingIndex().withProperty("\"auto_refresh\"", "true").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertTrue( SQLQueryUtils.extractIndexDetails( skippingIndex().withProperty("\"auto_refresh\"", "\"true\"").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertFalse( SQLQueryUtils.extractIndexDetails( skippingIndex().withProperty("auto_refresh", "1").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertFalse( SQLQueryUtils.extractIndexDetails(skippingIndex().withProperty("interval", "1").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); - Assertions.assertFalse(SQLQueryUtils.extractIndexDetails(index().getQuery()).isAutoRefresh()); + Assertions.assertFalse( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("\"\"", "\"true\"").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + Assertions.assertFalse( + SQLQueryUtils.extractIndexDetails(index().getQuery()).getFlintIndexOptions().autoRefresh()); Assertions.assertFalse( SQLQueryUtils.extractIndexDetails(index().withProperty("auto_refresh", "false").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertTrue( SQLQueryUtils.extractIndexDetails(index().withProperty("auto_refresh", "true").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); Assertions.assertTrue( SQLQueryUtils.extractIndexDetails(mv().withProperty("auto_refresh", "true").getQuery()) - .isAutoRefresh()); + .getFlintIndexOptions() + .autoRefresh()); } @Getter diff --git a/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java b/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java index ca77006d9c..4cab6afa9c 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java +++ b/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java @@ -5,8 +5,15 @@ package org.opensearch.sql.spark.utils; +import com.google.common.base.Charsets; +import com.google.common.io.Resources; import java.io.IOException; +import java.net.URL; import java.util.Objects; +import lombok.SneakyThrows; +import org.opensearch.action.admin.indices.create.CreateIndexRequest; +import org.opensearch.client.Client; +import org.opensearch.common.xcontent.XContentType; public class TestUtils { @@ -22,4 +29,17 @@ public static String getJson(String filename) throws IOException { return new String( Objects.requireNonNull(classLoader.getResourceAsStream(filename)).readAllBytes()); } + + @SneakyThrows + public static String loadMappings(String path) { + URL url = Resources.getResource(path); + return Resources.toString(url, Charsets.UTF_8); + } + + public static void createIndexWithMappings( + Client client, String indexName, String metadataFileLocation) { + CreateIndexRequest request = new CreateIndexRequest(indexName); + request.mapping(loadMappings(metadataFileLocation), XContentType.JSON); + client.admin().indices().create(request).actionGet(); + } } diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json new file mode 100644 index 0000000000..90d37c3e79 --- /dev/null +++ b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json @@ -0,0 +1,33 @@ +{ + "_meta": { + "latestId": "flint_my_glue_mydb_http_logs_covering_corrupted_index_latest_id", + "kind": "covering", + "indexedColumns": [ + { + "columnType": "string", + "columnName": "clientip" + }, + { + "columnType": "int", + "columnName": "status" + } + ], + "name": "covering", + "options": { + "auto_refresh": "true", + "incremental_refresh": "false", + "index_settings": "{\"number_of_shards\":5,\"number_of_replicas\":1}", + "checkpoint_location": "s3://vamsicheckpoint/cv/" + }, + "source": "my_glue.mydb.http_logs", + "version": "0.2.0" + }, + "properties": { + "clientip": { + "type": "keyword" + }, + "status": { + "type": "integer" + } + } +} \ No newline at end of file diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json new file mode 100644 index 0000000000..cb4a6b5366 --- /dev/null +++ b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json @@ -0,0 +1,39 @@ +{ + "_meta": { + "latestId": "flint_my_glue_mydb_http_logs_covering_index_latest_id", + "kind": "covering", + "indexedColumns": [ + { + "columnType": "string", + "columnName": "clientip" + }, + { + "columnType": "int", + "columnName": "status" + } + ], + "name": "covering", + "options": { + "auto_refresh": "true", + "incremental_refresh": "false", + "index_settings": "{\"number_of_shards\":5,\"number_of_replicas\":1}", + "checkpoint_location": "s3://vamsicheckpoint/cv/" + }, + "source": "my_glue.mydb.http_logs", + "version": "0.2.0", + "properties": { + "env": { + "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID": "00fhh7frokkf0k0l", + "SERVERLESS_EMR_JOB_ID": "00fhoag6i0671o0m" + } + } + }, + "properties": { + "clientip": { + "type": "keyword" + }, + "status": { + "type": "integer" + } + } +} \ No newline at end of file diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json new file mode 100644 index 0000000000..4ffd73bf9c --- /dev/null +++ b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json @@ -0,0 +1,39 @@ +{ + "_meta": { + "latestId": "flint_my_glue_mydb_http_logs_skipping_index_latest_id", + "kind": "skipping", + "indexedColumns": [ + { + "columnType": "int", + "kind": "VALUE_SET", + "parameters": { + "max_size": "100" + }, + "columnName": "status" + } + ], + "name": "flint_my_glue_mydb_http_logs_skipping_index", + "options": { + "auto_refresh": "true", + "incremental_refresh": "false", + "index_settings": "{\"number_of_shards\":5, \"number_of_replicas\":1}", + "checkpoint_location": "s3://vamsicheckpoint/skp/" + }, + "source": "my_glue.mydb.http_logs", + "version": "0.3.0", + "properties": { + "env": { + "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID": "00fhe6d5jpah090l", + "SERVERLESS_EMR_JOB_ID": "00fhelvq7peuao0m" + } + } + }, + "properties": { + "file_path": { + "type": "keyword" + }, + "status": { + "type": "integer" + } + } +} \ No newline at end of file diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json new file mode 100644 index 0000000000..0fcbf299ec --- /dev/null +++ b/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json @@ -0,0 +1,33 @@ +{ + "_meta": { + "latestId": "flint_my_glue_mydb_mv_latest_id", + "kind": "mv", + "indexedColumns": [ + { + "columnType": "bigint", + "columnName": "counter1" + } + ], + "name": "my_glue.mydb.mv", + "options": { + "auto_refresh": "true", + "incremental_refresh": "false", + "index_settings": "{\"number_of_shards\":5,\"number_of_replicas\":1}", + "checkpoint_location": "s3://vamsicheckpoint/mv/", + "watermark_delay": "10 seconds" + }, + "source": "SELECT count(`@timestamp`) AS `counter1` FROM my_glue.mydb.http_logs GROUP BY TUMBLE (`@timestamp`, '1 second')", + "version": "0.2.0", + "properties": { + "env": { + "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID": "00fhh7frokkf0k0l", + "SERVERLESS_EMR_JOB_ID": "00fhob01oa7fu00m" + } + } + }, + "properties": { + "counter1": { + "type": "long" + } + } +} \ No newline at end of file diff --git a/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json b/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json index 24e14c12ba..1438b257d1 100644 --- a/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json +++ b/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json @@ -2,23 +2,32 @@ "flint_mys3_default_http_logs_skipping_index": { "mappings": { "_doc": { - "_meta": { + "_meta": { + "latestId": "ZmxpbnRfdmFtc2lfZ2x1ZV92YW1zaWRiX2h0dHBfbG9nc19za2lwcGluZ19pbmRleA==", "kind": "skipping", "indexedColumns": [ { "columnType": "int", "kind": "VALUE_SET", + "parameters": { + "max_size": "100" + }, "columnName": "status" } ], - "name": "flint_mys3_default_http_logs_skipping_index", - "options": {}, - "source": "mys3.default.http_logs", - "version": "0.1.0", + "name": "flint_vamsi_glue_vamsidb_http_logs_skipping_index", + "options": { + "auto_refresh": "true", + "incremental_refresh": "false", + "index_settings": "{\"number_of_shards\":5,\"number_of_replicas\":1}", + "checkpoint_location": "s3://vamsicheckpoint/skp/" + }, + "source": "vamsi_glue.vamsidb.http_logs", + "version": "0.3.0", "properties": { "env": { - "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID": "00fd777k3k3ls20p", - "SERVERLESS_EMR_JOB_ID": "00fdmvv9hp8u0o0q" + "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID": "00fhe6d5jpah090l", + "SERVERLESS_EMR_JOB_ID": "00fhelvq7peuao0" } } }