Skip to content

Commit

Permalink
BigQuery: Add Clustering support (#3415)
Browse files Browse the repository at this point in the history
* BigQuery: Add Clustering support to library.

Initial changes for table.  Next: plumb in configurations for
load/query destination.

* Plumb configuration options in.

* add missing license header, remove unused import

* Address reviewer comments: list immutability
  • Loading branch information
shollyman authored Jul 30, 2018
1 parent b3e9a4c commit 49f93c8
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright 2018 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;


import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import java.io.Serializable;
import java.util.List;
import javax.annotation.Nullable;

@AutoValue
public abstract class Clustering implements Serializable {

private static final long serialVersionUID = 1L;


@Nullable
abstract ImmutableList<String> getFieldsImmut();

public List<String> getFields() {return getFieldsImmut();}


public abstract Builder toBuilder();

@AutoValue.Builder
public abstract static class Builder {

abstract Builder setFieldsImmut(ImmutableList<String> fieldsImmut);

public Builder setFields(List<String> fields) {
return setFieldsImmut(ImmutableList.copyOf(fields));
}

public abstract Clustering build();
}

public static Builder newBuilder() {
return new AutoValue_Clustering.Builder();
}

com.google.api.services.bigquery.model.Clustering toPb() {
com.google.api.services.bigquery.model.Clustering clusterPb =
new com.google.api.services.bigquery.model.Clustering();
clusterPb.setFields(getFields());
return clusterPb;
}

static Clustering fromPb(com.google.api.services.bigquery.model.Clustering clusterPb) {
return newBuilder().setFields(clusterPb.getFields()).build();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ interface Builder {
*/
Builder setAutodetect(Boolean autodetect);

/**
* Sets the time partitioning specification for the destination table.
*/
Builder setTimePartitioning(TimePartitioning timePartitioning);

/**
* Sets the clustering specification for the destination table.
*/
Builder setClustering(Clustering clustering);

LoadConfiguration build();
}

Expand Down Expand Up @@ -211,6 +221,16 @@ interface Builder {
*/
Boolean getAutodetect();

/**
* Returns the time partitioning specification defined for the destination table.
*/
TimePartitioning getTimePartitioning();

/**
* Returns the clustering specification for the definition table.
*/
Clustering getClustering();

/**
* Returns a builder for the load configuration object.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load
private final Boolean ignoreUnknownValues;
private final List<JobInfo.SchemaUpdateOption> schemaUpdateOptions;
private final Boolean autodetect;
private final TimePartitioning timePartitioning;
private final Clustering clustering;

public static final class Builder
extends JobConfiguration.Builder<LoadJobConfiguration, Builder>
Expand All @@ -64,6 +66,8 @@ public static final class Builder
private List<String> projectionFields;
private List<JobInfo.SchemaUpdateOption> schemaUpdateOptions;
private Boolean autodetect;
private TimePartitioning timePartitioning;
private Clustering clustering;

private Builder() {
super(Type.LOAD);
Expand All @@ -84,6 +88,8 @@ private Builder(LoadJobConfiguration loadConfiguration) {
this.autodetect = loadConfiguration.autodetect;
this.destinationEncryptionConfiguration =
loadConfiguration.destinationEncryptionConfiguration;
this.timePartitioning = loadConfiguration.timePartitioning;
this.clustering = loadConfiguration.clustering;
}

private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
Expand Down Expand Up @@ -141,6 +147,12 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
}
this.schemaUpdateOptions = schemaUpdateOptionsBuilder.build();
}
if (loadConfigurationPb.getTimePartitioning() != null) {
this.timePartitioning = TimePartitioning.fromPb(loadConfigurationPb.getTimePartitioning());
}
if (loadConfigurationPb.getClustering() != null) {
this.clustering = Clustering.fromPb(loadConfigurationPb.getClustering());
}
this.autodetect = loadConfigurationPb.getAutodetect();
if (loadConfigurationPb.getDestinationEncryptionConfiguration() != null) {
this.destinationEncryptionConfiguration = new EncryptionConfiguration.Builder(
Expand Down Expand Up @@ -211,6 +223,18 @@ public Builder setIgnoreUnknownValues(Boolean ignoreUnknownValues) {
return this;
}

@Override
public Builder setTimePartitioning(TimePartitioning timePartitioning) {
this.timePartitioning = timePartitioning;
return this;
}

@Override
public Builder setClustering(Clustering clustering) {
this.clustering = clustering;
return this;
}

/**
* Sets the fully-qualified URIs that point to source data in Google Cloud Storage (e.g.
* gs://bucket/path). Each URI can contain one '*' wildcard character and it must come after the
Expand Down Expand Up @@ -253,6 +277,8 @@ private LoadJobConfiguration(Builder builder) {
this.schemaUpdateOptions = builder.schemaUpdateOptions;
this.autodetect = builder.autodetect;
this.destinationEncryptionConfiguration = builder.destinationEncryptionConfiguration;
this.timePartitioning = builder.timePartitioning;
this.clustering = builder.clustering;
}


Expand Down Expand Up @@ -333,6 +359,12 @@ public Boolean getAutodetect() {
return autodetect;
}

@Override
public TimePartitioning getTimePartitioning() { return timePartitioning; }

@Override
public Clustering getClustering() { return clustering; }

@Override
public List<JobInfo.SchemaUpdateOption> getSchemaUpdateOptions() {
return schemaUpdateOptions;
Expand All @@ -357,7 +389,9 @@ ToStringHelper toStringHelper() {
.add("ignoreUnknownValue", ignoreUnknownValues)
.add("sourceUris", sourceUris)
.add("schemaUpdateOptions", schemaUpdateOptions)
.add("autodetect", autodetect);
.add("autodetect", autodetect)
.add("timePartitioning", timePartitioning)
.add("clustering", clustering);
}

@Override
Expand Down Expand Up @@ -429,6 +463,12 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
loadConfigurationPb.setDestinationEncryptionConfiguration(
destinationEncryptionConfiguration.toPb());
}
if (timePartitioning != null) {
loadConfigurationPb.setTimePartitioning(timePartitioning.toPb());
}
if (clustering != null) {
loadConfigurationPb.setClustering(clustering.toPb());
}
return new com.google.api.services.bigquery.model.JobConfiguration()
.setLoad(loadConfigurationPb);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public final class QueryJobConfiguration extends JobConfiguration {
private final Integer maximumBillingTier;
private final List<SchemaUpdateOption> schemaUpdateOptions;
private final EncryptionConfiguration destinationEncryptionConfiguration;
private final TimePartitioning timePartitioning;
private final Clustering clustering;

/**
* Priority levels for a query. If not specified the priority is assumed to be
Expand Down Expand Up @@ -104,6 +106,8 @@ public static final class Builder
private Integer maximumBillingTier;
private List<SchemaUpdateOption> schemaUpdateOptions;
private EncryptionConfiguration destinationEncryptionConfiguration;
private TimePartitioning timePartitioning;
private Clustering clustering;

private Builder() {
super(Type.QUERY);
Expand All @@ -129,6 +133,8 @@ private Builder(QueryJobConfiguration jobConfiguration) {
this.maximumBillingTier = jobConfiguration.maximumBillingTier;
this.schemaUpdateOptions = jobConfiguration.schemaUpdateOptions;
this.destinationEncryptionConfiguration = jobConfiguration.destinationEncryptionConfiguration;
this.timePartitioning = jobConfiguration.timePartitioning;
this.clustering = jobConfiguration.clustering;
}

private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
Expand Down Expand Up @@ -195,6 +201,12 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
this.destinationEncryptionConfiguration = new EncryptionConfiguration.Builder(
queryConfigurationPb.getDestinationEncryptionConfiguration()).build();
}
if (queryConfigurationPb.getTimePartitioning() != null) {
this.timePartitioning = TimePartitioning.fromPb(queryConfigurationPb.getTimePartitioning());
}
if (queryConfigurationPb.getClustering() != null) {
this.clustering = Clustering.fromPb(queryConfigurationPb.getClustering());
}
}


Expand Down Expand Up @@ -488,6 +500,22 @@ public Builder setSchemaUpdateOptions(List<SchemaUpdateOption> schemaUpdateOptio
return this;
}

/**
* Sets the time partitioning specification for the destination table.
*/
public Builder setTimePartitioning(TimePartitioning timePartitioning) {
this.timePartitioning = timePartitioning;
return this;
}

/**
* Sets the clustering specification for the destination table.
*/
public Builder setClustering(Clustering clustering) {
this.clustering = clustering;
return this;
}

public QueryJobConfiguration build() {
return new QueryJobConfiguration(this);
}
Expand Down Expand Up @@ -522,6 +550,8 @@ private QueryJobConfiguration(Builder builder) {
this.maximumBillingTier = builder.maximumBillingTier;
this.schemaUpdateOptions = builder.schemaUpdateOptions;
this.destinationEncryptionConfiguration = builder.destinationEncryptionConfiguration;
this.timePartitioning = builder.timePartitioning;
this.clustering = builder.clustering;
}

/**
Expand Down Expand Up @@ -693,6 +723,16 @@ public List<SchemaUpdateOption> getSchemaUpdateOptions() {
return schemaUpdateOptions;
}

/**
* Returns the time partitioning specification for the destination table.
*/
public TimePartitioning getTimePartitioning() { return timePartitioning; }

/**
* Returns the clustering specification for the destination table.
*/
public Clustering getClustering() { return clustering; }

@Override
public Builder toBuilder() {
return new Builder(this);
Expand All @@ -718,7 +758,9 @@ ToStringHelper toStringHelper() {
.add("dryRun", dryRun)
.add("useLegacySql", useLegacySql)
.add("maximumBillingTier", maximumBillingTier)
.add("schemaUpdateOptions", schemaUpdateOptions);
.add("schemaUpdateOptions", schemaUpdateOptions)
.add("timePartitioning", timePartitioning)
.add("clustering", clustering);
}

@Override
Expand All @@ -734,7 +776,7 @@ public int hashCode() {
defaultDataset, flattenResults, priority, query, positionalParameters,
namedParameters, tableDefinitions, useQueryCache,
userDefinedFunctions, writeDisposition, dryRun, useLegacySql, maximumBillingTier,
schemaUpdateOptions);
schemaUpdateOptions, timePartitioning, clustering);
}

@Override
Expand Down Expand Up @@ -813,6 +855,12 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
if (destinationEncryptionConfiguration != null) {
queryConfigurationPb.setDestinationEncryptionConfiguration(destinationEncryptionConfiguration.toPb());
}
if (timePartitioning != null) {
queryConfigurationPb.setTimePartitioning(timePartitioning.toPb());
}
if (clustering != null) {
queryConfigurationPb.setClustering(clustering.toPb());
}
return configurationPb.setQuery(queryConfigurationPb);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ public abstract static class Builder
*/
public abstract Builder setTimePartitioning(TimePartitioning timePartitioning);

/**
* Set the clustering configuration for the table. If not set, the table is not
* clustered. Clustering is only available for partitioned tables.
*/
public abstract Builder setClustering(Clustering clustering);


/** Creates a {@code StandardTableDefinition} object. */
public abstract StandardTableDefinition build();
}
Expand Down Expand Up @@ -179,6 +186,14 @@ public abstract static class Builder
@Nullable
public abstract TimePartitioning getTimePartitioning();


/**
* Returns the clustering configuration for this table. If {@code null}, the table is not
* clustered.
*/
@Nullable
public abstract Clustering getClustering();

/**
* Returns a builder for a BigQuery standard table definition.
*/
Expand Down Expand Up @@ -212,6 +227,9 @@ Table toPb() {
if (getTimePartitioning() != null) {
tablePb.setTimePartitioning(getTimePartitioning().toPb());
}
if (getClustering() != null) {
tablePb.setClustering(getClustering().toPb());
}
return tablePb;
}

Expand All @@ -227,6 +245,9 @@ static StandardTableDefinition fromPb(Table tablePb) {
if (tablePb.getTimePartitioning() != null) {
builder.setTimePartitioning(TimePartitioning.fromPb(tablePb.getTimePartitioning()));
}
if (tablePb.getClustering() != null) {
builder.setClustering(Clustering.fromPb(tablePb.getClustering()));
}
return builder.setNumBytes(tablePb.getNumBytes()).setLocation(tablePb.getLocation()).build();
}
}
Loading

0 comments on commit 49f93c8

Please sign in to comment.