Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add support for geometry type #2032

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions java/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@
<groupId>com.aayushatharva.brotli4j</groupId>
<artifactId>brotli4j</artifactId>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
<version>${jts.version}</version>
</dependency>

<!-- test inter-project -->
<dependency>
Expand Down
26 changes: 26 additions & 0 deletions java/core/src/java/org/apache/orc/GeometryColumnStatistics.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.orc;

import org.apache.orc.geometry.BoundingBox;

public interface GeometryColumnStatistics extends ColumnStatistics {

BoundingBox getBoundingBox();
}
49 changes: 49 additions & 0 deletions java/core/src/java/org/apache/orc/OrcUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,33 @@ private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription t
type.setPrecision(typeDescr.getPrecision());
type.setScale(typeDescr.getScale());
break;
case Geometry:
type.setKind(OrcProto.Type.Kind.GEOMETRY);
OrcProto.GeometryType.Builder builder = OrcProto.GeometryType.newBuilder();
TypeDescription.GeometryType geometryType = typeDescr.getGeometryType();
// set encoding
if (geometryType.getEncoding() == TypeDescription.GeometryType.GeometryEncoding.WKB) {
builder.setEncoding(OrcProto.GeometryType.GeometryEncoding.WKB);
} else {
throw new IllegalArgumentException("Unsupported geometry encoding: " + geometryType.getEncoding());
}
// set edges
switch (geometryType.getEdges()) {
case PLANNER -> builder.setEdges(OrcProto.GeometryType.Edges.PLANAR);
case SPHERICAL -> builder.setEdges(OrcProto.GeometryType.Edges.SPHERICAL);
default -> throw new IllegalArgumentException("Unsupported geometry edges: " + geometryType.getEdges());
}
if (geometryType.getCrs() != null) {
builder.setCrs(geometryType.getCrs());
}
if (geometryType.getCrs_encoding() != null) {
builder.setCrsEncoding(geometryType.getCrs_encoding());
}
if (geometryType.getMetadata() != null) {
builder.setMetadata(geometryType.getMetadata());
}
type.setGeometry(builder);
break;
case LIST:
type.setKind(OrcProto.Type.Kind.LIST);
type.addSubtypes(children.get(0).getId());
Expand Down Expand Up @@ -325,6 +352,28 @@ TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
result.withPrecision(type.getPrecision());
}
break;
case GEOMETRY:
result = TypeDescription.createGeometry();
OrcProto.GeometryType orcGeometryType = type.getGeometry();
TypeDescription.GeometryType.Edges edges;
TypeDescription.GeometryType.GeometryEncoding encoding;
if (orcGeometryType.getEncoding() == OrcProto.GeometryType.GeometryEncoding.WKB) {
encoding = TypeDescription.GeometryType.GeometryEncoding.WKB;
} else {
throw new IllegalArgumentException("Unsupported geometry encoding: " + orcGeometryType.getEncoding());
}
switch (orcGeometryType.getEdges()) {
case PLANAR -> edges = TypeDescription.GeometryType.Edges.PLANNER;
case SPHERICAL -> edges = TypeDescription.GeometryType.Edges.SPHERICAL;
default -> throw new FileFormatException("Unrecognized geometry edges" + orcGeometryType.getEdges());
}
TypeDescription.GeometryType geometryType = new TypeDescription.GeometryType(encoding,
edges,
orcGeometryType.getCrs(),
orcGeometryType.getCrsEncoding(),
orcGeometryType.getMetadata());
result.withGeometryType(geometryType);
break;
case LIST:
if (type.getSubtypesCount() != 1) {
throw new FileFormatException("LIST type should contain exactly " +
Expand Down
84 changes: 83 additions & 1 deletion java/core/src/java/org/apache/orc/TypeDescription.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,57 @@ public class TypeDescription
public static final String ENCRYPT_ATTRIBUTE = "encrypt";
public static final String MASK_ATTRIBUTE = "mask";

static final public class GeometryType {
public GeometryEncoding getEncoding() {
return encoding;
}

public Edges getEdges() {
return edges;
}

public String getCrs() {
return crs;
}

public String getCrs_encoding() {
return crs_encoding;
}

public String getMetadata() { return metadata; }

public enum GeometryEncoding {
WKB
}

public enum Edges {
PLANNER,
SPHERICAL
}

public static final String CRS_ENCODING_DEFAULT = "PROJJSON";
private final GeometryEncoding encoding;
private final Edges edges;
private final String crs;
private final String crs_encoding;
private final String metadata;

public GeometryType(
GeometryEncoding encoding, Edges edges, String crs, String crs_encoding, String metadata) {
if (encoding == null) {
throw new IllegalArgumentException("Geometry encoding is required");
}
if (edges == null) {
throw new IllegalArgumentException("Edges is required");
}
this.encoding = encoding;
this.edges = edges;
this.crs = crs;
this.crs_encoding = crs_encoding;
this.metadata = metadata;
}
}

@Override
public int compareTo(TypeDescription other) {
if (this == other) {
Expand Down Expand Up @@ -116,7 +167,8 @@ public enum Category {
MAP("map", false),
STRUCT("struct", false),
UNION("uniontype", false),
TIMESTAMP_INSTANT("timestamp with local time zone", true);
TIMESTAMP_INSTANT("timestamp with local time zone", true),
Geometry("geometry", true);

Category(String name, boolean isPrimitive) {
this.name = name;
Expand Down Expand Up @@ -187,6 +239,8 @@ public static TypeDescription createDecimal() {
return new TypeDescription(Category.DECIMAL);
}

public static TypeDescription createGeometry() { return new TypeDescription(Category.Geometry); }

/**
* Parse TypeDescription from the Hive type names. This is the inverse
* of TypeDescription.toString()
Expand Down Expand Up @@ -239,6 +293,20 @@ public TypeDescription withScale(int scale) {
return this;
}

/**
* For geometry types, set the GeometryType
* @param geometryType the GeometryType
* @return this
*/
public TypeDescription withGeometryType(GeometryType geometryType) {
if (category != Category.Geometry) {
throw new IllegalArgumentException("GeometryType is only allowed on geometry" +
" and not " + category.name);
}
this.geometryType = geometryType;
return this;
}

/**
* Set an attribute on this type.
* @param key the attribute name
Expand Down Expand Up @@ -366,6 +434,13 @@ public TypeDescription clone() {
result.maxLength = maxLength;
result.precision = precision;
result.scale = scale;
if (geometryType != null) {
result.geometryType = new GeometryType(geometryType.getEncoding(),
geometryType.getEdges(),
geometryType.getCrs(),
geometryType.getCrs_encoding(),
geometryType.getMetadata());
}
if (fieldNames != null) {
result.fieldNames.addAll(fieldNames);
}
Expand Down Expand Up @@ -557,6 +632,12 @@ public int getScale() {
return scale;
}

/**
* Get the type info of geometry type
* @return the type info of geometry
*/
public GeometryType getGeometryType() { return geometryType; }

/**
* For struct types, get the list of field names.
* @return the list of field names.
Expand Down Expand Up @@ -664,6 +745,7 @@ public TypeDescription(Category category) {
private int maxLength = DEFAULT_LENGTH;
private int precision = DEFAULT_PRECISION;
private int scale = DEFAULT_SCALE;
private GeometryType geometryType;

static void printFieldName(StringBuilder buffer, String name) {
if (UNQUOTED_NAMES.matcher(name).matches()) {
Expand Down
Loading
Loading