confluentinc · hjafarpour · Aug 1, 2018 · Aug 15, 2018 · Aug 15, 2018 · Aug 17, 2018
@@ -166,13 +166,18 @@ The supported column data types are:
 -  ``BIGINT``
 -  ``DOUBLE``
 -  ``VARCHAR`` (or ``STRING``)
--  ``ARRAY<ArrayType>`` (JSON and AVRO only. Index starts from 0)
+-  ``ARRAY<ArrayType>`` (JSON and AVRO only. Index starting from 0 but this is configurable. Refer to below for more details.)
 -  ``MAP<VARCHAR, ValueType>`` (JSON and AVRO only)
 -  ``STRUCT<FieldName FieldType, ...>`` (JSON and AVRO only) The STRUCT type requires you to specify a list of fields.
    For each field you must specify the field name (FieldName) and field type (FieldType). The field type can be any of
    the supported KSQL types, including the complex types ``MAP``, ``ARRAY``, and ``STRUCT``. ``STRUCT`` fields can be
    accessed in expressions using the struct dereference (``->``) operator. See :ref:`operators` for more details.
 
+You can configure the base index for arrays to start from 0 or 1 using ``ksql.functions.array.legacy.base`` configuration value.
+The current default value of ``ksql.functions.array.legacy.base`` is true indicating that the array indexes start from 0.
+You can set ``ksql.functions.array.legacy.base`` to false in order to have array indexes startinhg from 1.
+We plan to set the default base for array indexes to 1 in future releases.
+
 KSQL adds the implicit columns ``ROWTIME`` and ``ROWKEY`` to every
 stream and table, which represent the corresponding Kafka message
 timestamp and message key, respectively. The timestamp has milliseconds accuracy.
@@ -243,13 +248,18 @@ The supported column data types are:
 -  ``BIGINT``
 -  ``DOUBLE``
 -  ``VARCHAR`` (or ``STRING``)
--  ``ARRAY<ArrayType>`` (JSON and AVRO only. Index starts from 0)
+-  ``ARRAY<ArrayType>`` (JSON and AVRO only. Index starting from 0 but this is configurable. Refer to below for more details.)
 -  ``MAP<VARCHAR, ValueType>`` (JSON and AVRO only)
 -  ``STRUCT<FieldName FieldType, ...>`` (JSON and AVRO only) The STRUCT type requires you to specify a list of fields.
    For each field you must specify the field name (FieldName) and field type (FieldType). The field type can be any of
    the supported KSQL types, including the complex types ``MAP``, ``ARRAY``, and ``STRUCT``. ``STRUCT`` fields can be
    accessed in expressions using the struct dereference (``->``) operator. See :ref:`operators` for more details.
 
+You can configure the base index for arrays to start from 0 or 1 using ``ksql.functions.array.legacy.base`` configuration value.
+The current default value of ``ksql.functions.array.legacy.base`` is true indicating that the array indexes start from 0.
+You can set ``ksql.functions.array.legacy.base`` to false in order to have array indexes startinhg from 1.
+We plan to set the default base for array indexes to 1 in future releases.
+
 KSQL adds the implicit columns ``ROWTIME`` and ``ROWKEY`` to every
 stream and table, which represent the corresponding Kafka message
 timestamp and message key, respectively. The timestamp has milliseconds accuracy.
@@ -988,6 +998,11 @@ The explanation for each operator includes a supporting example based on the fol
 - Subscript (``[subscript_expr]``) The subscript operator is used to reference the value at
   an array index or a map key.
 
+You can configure the base index for arrays to start from 0 or 1 using ``ksql.functions.array.legacy.base`` configuration value.
+The current default value of ``ksql.functions.array.legacy.base`` is true indicating that the array indexes start from 0.
+You can set ``ksql.functions.array.legacy.base`` to false in order to have array indexes startinhg from 1.
+We plan to set the default base for array indexes to 1 in future releases.
+
 .. code:: sql
 
   SELECT NICKNAMES[0] FROM USERS;

@@ -104,6 +104,15 @@ public class KsqlConfig extends AbstractConfig implements Cloneable {
       + " VARCHAR SUBSTRING(str VARCHAR, pos INT, length INT), where pos is base-one indexed,"
       + " and the last argument is the length of the substring to extract.";
 
+  public static final String KSQL_FUNCTIONS_ARRAY_LEGACY_BASE_CONFIG =
+      KSQ_FUNCTIONS_PROPERTY_PREFIX + "array.legacy.base";
+  private static final String
+      KSQL_FUNCTIONS_ARRAY_LEGACY_BASE_DOCS = "Switch the array base into legacy mode,"
+      + " i.e., array indexes starting from 0 as it was in version 5.0 and earlier of KSQL."
+      + " Up to version 5.0.x array indexes started from 0. By setting this config variable to"
+      + " false the array indexes will start from one. The default for this is true until we"
+      + " switch to arrays with index startiing from one.";
+
   public static final String
       defaultSchemaRegistryUrl = "http://localhost:8081";
 
@@ -139,7 +148,14 @@ public class KsqlConfig extends AbstractConfig implements Cloneable {
               true,
               false,
               ConfigDef.Importance.LOW,
-              KSQL_FUNCTIONS_SUBSTRING_LEGACY_ARGS_DOCS)
+              KSQL_FUNCTIONS_SUBSTRING_LEGACY_ARGS_DOCS),
+           new CompatibilityBreakingConfigDef(
+              KSQL_FUNCTIONS_ARRAY_LEGACY_BASE_CONFIG,
+              ConfigDef.Type.BOOLEAN,
+          true,
+          false,
+              ConfigDef.Importance.LOW,
+              KSQL_FUNCTIONS_ARRAY_LEGACY_BASE_DOCS)
   );
 
   private static class CompatibilityBreakingConfigDef {

@@ -57,7 +57,8 @@ public class CodeGenRunner {
       "java.util.HashMap",
       "java.util.Map",
       "java.util.List",
-      "java.util.ArrayList");
+      "java.util.ArrayList",
+      "io.confluent.ksql.codegen.helpers.*");
 
   private final Schema schema;
   private final FunctionRegistry functionRegistry;
@@ -102,7 +103,8 @@ public ExpressionMetadata buildCodeGenFromParseTree(
       index++;
     }
 
-    final String javaCode = new SqlToJavaVisitor(schema, functionRegistry).process(expression);
+    final String javaCode = new SqlToJavaVisitor(schema, functionRegistry, ksqlConfig)
+        .process(expression);
 
     final IExpressionEvaluator ee =
         CompilerFactoryFactory.getDefaultCompilerFactory().newExpressionEvaluator();

@@ -54,6 +54,7 @@
 import io.confluent.ksql.parser.tree.SubscriptExpression;
 import io.confluent.ksql.parser.tree.SymbolReference;
 import io.confluent.ksql.util.ExpressionTypeManager;
+import io.confluent.ksql.util.KsqlConfig;
 import io.confluent.ksql.util.KsqlException;
 import io.confluent.ksql.util.Pair;
 import io.confluent.ksql.util.SchemaUtil;
@@ -66,14 +67,19 @@
 
 public class SqlToJavaVisitor {
 
-  private Schema schema;
-  private FunctionRegistry functionRegistry;
+  private final Schema schema;
+  private final FunctionRegistry functionRegistry;
+  private final KsqlConfig ksqlConfig;
 
   private final ExpressionTypeManager expressionTypeManager;
 
-  public SqlToJavaVisitor(final Schema schema, final FunctionRegistry functionRegistry) {
+  public SqlToJavaVisitor(
+      final Schema schema,
+      final FunctionRegistry functionRegistry,
+      final KsqlConfig ksqlConfig) {
     this.schema = schema;
     this.functionRegistry = functionRegistry;
+    this.ksqlConfig = ksqlConfig;
     this.expressionTypeManager =
         new ExpressionTypeManager(schema, functionRegistry);
   }
@@ -539,11 +545,12 @@ protected Pair<String, Schema> visitSubscriptExpression(
       switch (internalSchema.type()) {
         case ARRAY:
           return new Pair<>(
-              String.format("((%s) ((%s)%s).get((int)(%s)))",
+              String.format("((%s) (ArrayGet.getItem(((%s) %s), (int)(%s), %s)))",
                   SchemaUtil.getJavaType(internalSchema.valueSchema()).getSimpleName(),
                   internalSchemaJavaType,
                   process(node.getBase(), unmangleNames).getLeft(),
-                  process(node.getIndex(), unmangleNames).getLeft()
+                  process(node.getIndex(), unmangleNames).getLeft(),
+                  ksqlConfig.getBoolean(KsqlConfig.KSQL_FUNCTIONS_ARRAY_LEGACY_BASE_CONFIG)
               ),
               internalSchema.valueSchema()
           );

@@ -0,0 +1,31 @@
+/*
+ * Copyright 2018 Confluent Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+package io.confluent.ksql.codegen.helpers;
+
+import java.util.List;
+
+public final class ArrayGet {
+
+  private ArrayGet() {}
+
+  public static Object getItem(final List<?> array, final int index, final boolean isLegacy) {
+    final int correctIndex = isLegacy ? index : index - 1;
+    return array.get(correctIndex);
+  }
+
+
+}
@@ -87,7 +87,8 @@ public class SqlPredicate {
 
       final String expressionStr = new SqlToJavaVisitor(
           schema,
-          functionRegistry
+          functionRegistry,
+          ksqlConfig
       ).process(filterExpression);
 
       ee.cook(expressionStr);

@@ -53,12 +53,18 @@
 import java.util.stream.Collectors;
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaBuilder;
+import org.easymock.EasyMock;
+import org.easymock.EasyMockRunner;
+import org.easymock.Mock;
+import org.easymock.MockType;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
 
 
 @SuppressWarnings("SameParameterValue")
+@RunWith(EasyMockRunner.class)
 public class CodeGenRunnerTest {
 
     private static final int INT64_INDEX1 = 0;
@@ -80,6 +86,10 @@ public class CodeGenRunnerTest {
     private final InternalFunctionRegistry functionRegistry = new InternalFunctionRegistry();
     private GenericRowValueTypeEnforcer genericRowValueTypeEnforcer;
     private final KsqlConfig ksqlConfig = new KsqlConfig(Collections.emptyMap());
+    private Schema schema;
+
+    @Mock(type = MockType.NICE)
+    KsqlConfig localKsqlConfig;
 
     @Before
     public void init() {
@@ -90,7 +100,7 @@ public void init() {
         final Schema arraySchema = SchemaBuilder.array(Schema.STRING_SCHEMA).optional().build();
 
 
-        final Schema schema = SchemaBuilder.struct()
+        schema = SchemaBuilder.struct()
             .field("CODEGEN_TEST.COL0", SchemaBuilder.OPTIONAL_INT64_SCHEMA)
             .field("CODEGEN_TEST.COL1", SchemaBuilder.OPTIONAL_STRING_SCHEMA)
             .field("CODEGEN_TEST.COL2", SchemaBuilder.OPTIONAL_STRING_SCHEMA)
@@ -158,6 +168,7 @@ public void testIsDistinctFrom() throws Exception {
 
     @Test
     public void testIsNull() throws Exception {
+        // Given:
         final String simpleQuery = "SELECT col0 IS NULL FROM CODEGEN_TEST;";
         final Analysis analysis = analyzeQuery(simpleQuery, metaStore);
 
@@ -179,7 +190,7 @@ public void testIsNull() throws Exception {
 
     @Test
     public void shouldHandleMultiDimensionalArray() throws Exception {
-        final String simpleQuery = "SELECT col14[0][0] FROM CODEGEN_TEST;";
+        final String simpleQuery = "SELECT col14[1][1] FROM CODEGEN_TEST;";
         final Analysis analysis = analyzeQuery(simpleQuery, metaStore);
         final ExpressionMetadata expressionEvaluatorMetadata = codeGenRunner.buildCodeGenFromParseTree
             (analysis.getSelectExpressions().get(0));
@@ -191,6 +202,36 @@ public void shouldHandleMultiDimensionalArray() throws Exception {
         assertThat(result, equalTo("item_11"));
     }
 
+    @Test
+    public void shouldHandleMultiDimensionalArrayWithBaseStartingFrom1() throws Exception {
+        // Given:
+
+        EasyMock.expect(localKsqlConfig.getBoolean(EasyMock.eq(KsqlConfig.KSQL_FUNCTIONS_ARRAY_LEGACY_BASE_CONFIG))).andReturn(false).times(2);
+        EasyMock.replay(localKsqlConfig);
+        final CodeGenRunner localCodeGenRunner = new CodeGenRunner(schema, localKsqlConfig, functionRegistry);
+        final String simpleQuery = "SELECT col14[1][1] FROM CODEGEN_TEST;";
+        final Analysis analysis = analyzeQuery(simpleQuery, metaStore);
+
+        final Object[] arguments = {Arrays.asList(
+            Arrays.asList("item_11", "item_12"),
+            Arrays.asList("item_21", "item_22"))};
+
+        // When:
+        final Object result = executeExpression(simpleQuery, arguments, localCodeGenRunner);
+
+        // Then:
+        EasyMock.verify(localKsqlConfig);
+        assertThat(result, instanceOf(String.class));
+        assertThat(result, equalTo("item_11"));
+    }
+
+    private Object executeExpression(final String simpleQuery, final Object[] arguments, final CodeGenRunner localCodeGenRunner) throws Exception {
+        final Analysis analysis = analyzeQuery(simpleQuery, metaStore);
+        final ExpressionMetadata expressionEvaluatorMetadata = localCodeGenRunner.buildCodeGenFromParseTree
+            (analysis.getSelectExpressions().get(0));
+        return expressionEvaluatorMetadata.getExpressionEvaluator().evaluate(arguments);
+    }
+
     @Test
     public void testIsNotNull() throws Exception {
         final String simpleQuery = "SELECT col0 IS NOT NULL FROM CODEGEN_TEST;";