Skip to content

Commit

Permalink
consolidate json and auto indexers, remove v4 nested column serializer (
Browse files Browse the repository at this point in the history
  • Loading branch information
clintropolis authored Aug 23, 2023
1 parent 6817de9 commit fb053c3
Show file tree
Hide file tree
Showing 33 changed files with 72 additions and 2,320 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.data.FrontCodedIndexed;
Expand Down Expand Up @@ -298,7 +298,7 @@ public void setup()
);
List<DimensionSchema> dims = ImmutableList.<DimensionSchema>builder()
.addAll(schemaInfo.getDimensionsSpec().getDimensions())
.add(new NestedDataDimensionSchema("nested"))
.add(new AutoTypeColumnSchema("nested"))
.build();
DimensionsSpec dimsSpec = new DimensionsSpec(dims);

Expand Down
4 changes: 3 additions & 1 deletion docs/querying/nested-columns.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@ import TabItem from '@theme/TabItem';
~ under the License.
-->

Apache Druid supports directly storing nested data structures in `COMPLEX<json>` columns. `COMPLEX<json>` columns store a copy of the structured data in JSON format and specialized internal columns and indexes for nested literal values&mdash;STRING, LONG, and DOUBLE types. An optimized [virtual column](./virtual-columns.md#nested-field-virtual-column) allows Druid to read and filter these values at speeds consistent with standard Druid LONG, DOUBLE, and STRING columns.
Apache Druid supports directly storing nested data structures in `COMPLEX<json>` columns. `COMPLEX<json>` columns store a copy of the structured data in JSON format and specialized internal columns and indexes for nested literal values&mdash;STRING, LONG, and DOUBLE types, as well as ARRAY of STRING, LONG, and DOUBLE values. An optimized [virtual column](./virtual-columns.md#nested-field-virtual-column) allows Druid to read and filter these values at speeds consistent with standard Druid LONG, DOUBLE, and STRING columns.

Druid [SQL JSON functions](./sql-json-functions.md) allow you to extract, transform, and create `COMPLEX<json>` values in SQL queries, using the specialized virtual columns where appropriate. You can use the [JSON nested columns functions](math-expr.md#json-functions) in [native queries](./querying.md) using [expression virtual columns](./virtual-columns.md#expression-virtual-column), and in native ingestion with a [`transformSpec`](../ingestion/ingestion-spec.md#transformspec).

You can use the JSON functions in INSERT and REPLACE statements in SQL-based ingestion, or in a `transformSpec` in native ingestion as an alternative to using a [`flattenSpec`](../ingestion/data-formats.md#flattenspec) object to "flatten" nested data for ingestion.

Columns ingested as `COMPLEX<json>` are automatically optimized to store the most appropriate physical column based on the data processed. For example, if only LONG values are processed, Druid stores a LONG column, ARRAY columns if the data consists of arrays, or `COMPLEX<json>` in the general case if the data is actually nested. This is the same functionality that powers ['type aware' schema discovery](../ingestion/schema-design.md#type-aware-schema-discovery).

Druid supports directly ingesting nested data with the following formats: JSON, Parquet, Avro, ORC, Protobuf.

## Example nested data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import org.apache.druid.java.util.common.parsers.JSONPathFieldType;
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.nested.StructuredData;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
Expand Down Expand Up @@ -303,15 +303,15 @@ public void testParseTransformNested() throws SchemaValidationException, IOExcep

DimensionsSpec dimensionsSpec = new DimensionsSpec(
ImmutableList.of(
new NestedDataDimensionSchema("someIntValueMap"),
new NestedDataDimensionSchema("someStringValueMap"),
new NestedDataDimensionSchema("someRecord"),
new NestedDataDimensionSchema("someRecordArray"),
new AutoTypeColumnSchema("someIntValueMap"),
new AutoTypeColumnSchema("someStringValueMap"),
new AutoTypeColumnSchema("someRecord"),
new AutoTypeColumnSchema("someRecordArray"),
new LongDimensionSchema("tSomeIntValueMap8"),
new LongDimensionSchema("tSomeIntValueMap8_2"),
new StringDimensionSchema("tSomeStringValueMap8"),
new LongDimensionSchema("tSomeRecordSubLong"),
new NestedDataDimensionSchema("tSomeRecordArray0"),
new AutoTypeColumnSchema("tSomeRecordArray0"),
new StringDimensionSchema("tSomeRecordArray0nestedString")
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import org.apache.druid.java.util.common.parsers.JSONPathFieldType;
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.segment.transform.TransformingInputEntityReader;
Expand Down Expand Up @@ -333,9 +333,9 @@ public void testNestedColumn() throws IOException
new TimestampSpec("ts", "millis", null),
new DimensionsSpec(
ImmutableList.of(
new NestedDataDimensionSchema("middle"),
new NestedDataDimensionSchema("list"),
new NestedDataDimensionSchema("map")
new AutoTypeColumnSchema("middle"),
new AutoTypeColumnSchema("list"),
new AutoTypeColumnSchema("map")
)
),
inputFormat,
Expand Down Expand Up @@ -542,8 +542,8 @@ public void testListMap() throws IOException
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(
ImmutableList.of(
new NestedDataDimensionSchema("a"),
new NestedDataDimensionSchema("b")
new AutoTypeColumnSchema("a"),
new AutoTypeColumnSchema("b")
)
),
inputFormat,
Expand Down Expand Up @@ -608,11 +608,11 @@ public void testNestedArray() throws IOException
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(
ImmutableList.of(
new NestedDataDimensionSchema("a"),
new NestedDataDimensionSchema("b"),
new NestedDataDimensionSchema("c"),
new NestedDataDimensionSchema("d"),
new NestedDataDimensionSchema("t_d_0")
new AutoTypeColumnSchema("a"),
new AutoTypeColumnSchema("b"),
new AutoTypeColumnSchema("c"),
new AutoTypeColumnSchema("d"),
new AutoTypeColumnSchema("t_d_0")
)
),
inputFormat,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.segment.transform.TransformingInputEntityReader;
Expand All @@ -51,8 +51,8 @@ public void testNestedColumnTransformsNestedTestFile() throws IOException
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(
ImmutableList.of(
new NestedDataDimensionSchema("nestedData"),
new NestedDataDimensionSchema("t_nestedData_listDim"),
new AutoTypeColumnSchema("nestedData"),
new AutoTypeColumnSchema("t_nestedData_listDim"),
new StringDimensionSchema("t_nestedData_listDim_string"),
new StringDimensionSchema("t_nestedData_dim2"),
new LongDimensionSchema("t_nestedData_dim3"),
Expand Down Expand Up @@ -105,10 +105,10 @@ public void testNestedColumnTransformsNestedNullableListFile() throws IOExceptio
new TimestampSpec("timestamp", "auto", null),
new DimensionsSpec(
ImmutableList.of(
new NestedDataDimensionSchema("a1"),
new NestedDataDimensionSchema("a2"),
new NestedDataDimensionSchema("t_a2"),
new NestedDataDimensionSchema("t_a1_b1"),
new AutoTypeColumnSchema("a1"),
new AutoTypeColumnSchema("a2"),
new AutoTypeColumnSchema("t_a2"),
new AutoTypeColumnSchema("t_a1_b1"),
new LongDimensionSchema("t_a1_b1_c1"),
new LongDimensionSchema("t_e2_0_b1"),
new LongDimensionSchema("tt_a2_0_b1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.indexing.DataSchema;
Expand Down Expand Up @@ -257,9 +256,9 @@ public void testTypesNoDiscoveryExplicitSchema()
new LongDimensionSchema("long"),
new DoubleDimensionSchema("double"),
new StringDimensionSchema("bool"),
new NestedDataDimensionSchema("variant"),
new NestedDataDimensionSchema("array"),
new NestedDataDimensionSchema("nested")
new AutoTypeColumnSchema("variant"),
new AutoTypeColumnSchema("array"),
new AutoTypeColumnSchema("nested")
)
).build(),
null,
Expand Down Expand Up @@ -292,8 +291,8 @@ public void testTypesNoDiscoveryExplicitSchema()
.add("long", ColumnType.LONG)
.add("double", ColumnType.DOUBLE)
.add("bool", ColumnType.STRING)
.add("variant", ColumnType.NESTED_DATA)
.add("array", ColumnType.NESTED_DATA)
.add("variant", ColumnType.STRING)
.add("array", ColumnType.LONG_ARRAY)
.add("nested", ColumnType.NESTED_DATA)
.build(),
response.getLogicalSegmentSchema()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import com.google.common.collect.ImmutableMap;
import org.apache.druid.client.indexing.SamplerResponse;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
Expand Down Expand Up @@ -65,7 +65,7 @@ public void testSerde() throws IOException
new StringDimensionSchema("dim1")
),
ImmutableList.of(
new NestedDataDimensionSchema("dim1")
new AutoTypeColumnSchema("dim1")
),
RowSignature.builder().addTimeColumn().add("dim1", ColumnType.STRING).add("met1", ColumnType.LONG).build(),
data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.DimensionHandler;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.TypeSignature;
import org.apache.druid.segment.column.ValueType;
Expand All @@ -51,7 +50,7 @@
@JsonSubTypes.Type(name = DimensionSchema.FLOAT_TYPE_NAME, value = FloatDimensionSchema.class),
@JsonSubTypes.Type(name = DimensionSchema.DOUBLE_TYPE_NAME, value = DoubleDimensionSchema.class),
@JsonSubTypes.Type(name = DimensionSchema.SPATIAL_TYPE_NAME, value = NewSpatialDimensionSchema.class),
@JsonSubTypes.Type(name = NestedDataComplexTypeSerde.TYPE_NAME, value = NestedDataDimensionSchema.class),
@JsonSubTypes.Type(name = NestedDataComplexTypeSerde.TYPE_NAME, value = AutoTypeColumnSchema.class),
@JsonSubTypes.Type(name = AutoTypeColumnSchema.TYPE, value = AutoTypeColumnSchema.class)
})
public abstract class DimensionSchema
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import com.google.inject.Binder;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.NestedDataDimensionHandler;
import org.apache.druid.segment.NestedCommonFormatColumnHandler;
import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.StructuredData;
import org.apache.druid.segment.nested.StructuredDataJsonSerializer;
Expand Down Expand Up @@ -56,21 +56,18 @@ public static void registerHandlersAndSerde()
{
if (ComplexMetrics.getSerdeForType(NestedDataComplexTypeSerde.TYPE_NAME) == null) {
ComplexMetrics.registerSerde(NestedDataComplexTypeSerde.TYPE_NAME, NestedDataComplexTypeSerde.INSTANCE);

}
DimensionHandlerUtils.registerDimensionHandlerProvider(
NestedDataComplexTypeSerde.TYPE_NAME,
NestedDataDimensionHandler::new
NestedCommonFormatColumnHandler::new
);
}

public static List<SimpleModule> getJacksonModulesList()
{
return Collections.singletonList(
new SimpleModule("NestedDataModule")
.registerSubtypes(
new NamedType(NestedFieldVirtualColumn.class, "nested-field")
)
.registerSubtypes(new NamedType(NestedFieldVirtualColumn.class, "nested-field"))
.addSerializer(StructuredData.class, new StructuredDataJsonSerializer())
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@
*/
public class AutoTypeColumnMerger implements DimensionMergerV9
{
private static final Logger log = new Logger(NestedDataColumnMerger.class);

private static final Logger log = new Logger(AutoTypeColumnMerger.class);
public static final Comparator<PeekingIterator<String>> STRING_MERGING_COMPARATOR =
SimpleDictionaryMergingIterator.makePeekingComparator();
public static final Comparator<PeekingIterator<Long>> LONG_MERGING_COMPARATOR =
Expand Down
Loading

0 comments on commit fb053c3

Please sign in to comment.