-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BigQuery: Add ORC format support for load jobs, missing bigtable supp…
…ort. (#3391) * BigQuery: Add ORC format support for load jobs. Additionally, plumb in the (missing) Bigtable format support for federated tables. * add overrides, unit testing * Wire bigtable up into formatoptions * add copyright headers. * Convert BigtableColumn and BigtableColumnFamily to autovalue generation. * excise unused imports, address codacy kvetching about declaration order. * Address reviewer comments: formatting/whitespace, serializable, asserts * unused imports (asserts)
- Loading branch information
Showing
6 changed files
with
597 additions
and
4 deletions.
There are no files selected for viewing
137 changes: 137 additions & 0 deletions
137
...clients/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigtableColumn.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
/* | ||
* Copyright 2018 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.bigquery; | ||
|
||
import com.google.common.base.Function; | ||
import com.google.auto.value.AutoValue; | ||
import java.io.Serializable; | ||
import javax.annotation.Nullable; | ||
|
||
@AutoValue | ||
public abstract class BigtableColumn implements Serializable { | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
@Nullable | ||
public abstract String getQualifierEncoded(); | ||
|
||
@Nullable | ||
public abstract String getFieldName(); | ||
|
||
@Nullable | ||
public abstract Boolean getOnlyReadLatest(); | ||
|
||
@Nullable | ||
public abstract String getEncoding(); | ||
|
||
@Nullable | ||
public abstract String getType(); | ||
|
||
@AutoValue.Builder | ||
public abstract static class Builder { | ||
|
||
/** | ||
* Qualifier of the column. | ||
* | ||
* Columns in the parent column family that has this exact qualifier are exposed as . field. If | ||
* the qualifier is valid UTF-8 string, it can be specified in the qualifier_string field. | ||
* Otherwise, a base-64 encoded value must be set to qualifier_encoded. The column field name is | ||
* the same as the column qualifier. However, if the qualifier is not a valid BigQuery field | ||
* identifier, a valid identifier must be provided as field_name. | ||
*/ | ||
public abstract Builder setQualifierEncoded(String qualifierEncoded); | ||
|
||
/** | ||
* If the qualifier is not a valid BigQuery field identifier, a valid identifier must be | ||
* provided as the column field name and is used as field name in queries. | ||
*/ | ||
public abstract Builder setFieldName(String fieldName); | ||
|
||
/** | ||
* If this is set, only the latest version of value in this column are exposed. | ||
* | ||
* 'onlyReadLatest' can also be set at the column family level. However, the setting at the | ||
* column level takes precedence if 'onlyReadLatest' is set at both levels. | ||
*/ | ||
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest); | ||
|
||
/** | ||
* The encoding of the values when the type is not STRING. Acceptable encoding values are: TEXT | ||
* - indicates values are alphanumeric text strings. BINARY - indicates values are encoded using | ||
* HBase Bytes.toBytes family of functions. | ||
* | ||
* Encoding can also be set at the column family level. However, the setting at the column level | ||
* takes precedence if 'encoding' is set at both levels. | ||
*/ | ||
public abstract Builder setEncoding(String encoding); | ||
|
||
/** | ||
* The type to convert the value in cells of this column. | ||
* | ||
* The values are expected to be encoded using HBase Bytes.toBytes function when using the | ||
* BINARY encoding value. Following BigQuery types are allowed (case-sensitive): BYTES STRING | ||
* INTEGER FLOAT BOOLEAN Default type is BYTES. | ||
* | ||
* 'type' can also be set at the column family level. However, the setting at the column level | ||
* takes precedence if 'type' is set at both levels. | ||
*/ | ||
public abstract Builder setType(String type); | ||
|
||
public abstract BigtableColumn build(); | ||
} | ||
|
||
static Builder newBuilder() { | ||
return new AutoValue_BigtableColumn.Builder(); | ||
} | ||
|
||
static BigtableColumn fromPb(com.google.api.services.bigquery.model.BigtableColumn column) { | ||
Builder builder = newBuilder(); | ||
builder.setQualifierEncoded(column.getQualifierEncoded()); | ||
builder.setFieldName(column.getFieldName()); | ||
builder.setOnlyReadLatest(column.getOnlyReadLatest()); | ||
builder.setEncoding(column.getEncoding()); | ||
builder.setType(column.getType()); | ||
return builder.build(); | ||
} | ||
|
||
com.google.api.services.bigquery.model.BigtableColumn toPb() { | ||
com.google.api.services.bigquery.model.BigtableColumn column = new com.google.api.services.bigquery.model.BigtableColumn() | ||
.setQualifierEncoded(getQualifierEncoded()) | ||
.setFieldName(getFieldName()) | ||
.setOnlyReadLatest(getOnlyReadLatest()) | ||
.setEncoding(getEncoding()) | ||
.setType(getType()); | ||
return column; | ||
} | ||
|
||
static final Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn> FROM_PB_FUNCTION = | ||
new Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn>() { | ||
@Override | ||
public BigtableColumn apply( | ||
com.google.api.services.bigquery.model.BigtableColumn pb) { | ||
return BigtableColumn.fromPb(pb); | ||
} | ||
}; | ||
|
||
static final Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn> TO_PB_FUNCTION = | ||
new Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn>() { | ||
@Override | ||
public com.google.api.services.bigquery.model.BigtableColumn apply( | ||
BigtableColumn column) { | ||
return column.toPb(); | ||
} | ||
}; | ||
} |
144 changes: 144 additions & 0 deletions
144
...s/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigtableColumnFamily.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
/* | ||
* Copyright 2018 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.bigquery; | ||
|
||
import com.google.common.base.Function; | ||
import com.google.common.collect.Lists; | ||
import com.google.auto.value.AutoValue; | ||
import java.io.Serializable; | ||
import java.util.List; | ||
|
||
/** | ||
* List of column families to expose in the table schema along with their types. This list restricts | ||
* the column families that can be referenced in queries and specifies their value types. | ||
* | ||
* You can use this list to do type conversions - see the 'type' field for more details. If you | ||
* leave this list empty, all column families are present in the table schema and their values are | ||
* read as BYTES. During a query only the column families referenced in that query are read from | ||
* Bigtable. | ||
*/ | ||
|
||
@AutoValue | ||
public abstract class BigtableColumnFamily implements Serializable { | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
public abstract String getFamilyID(); | ||
|
||
public abstract List<BigtableColumn> getColumns(); | ||
|
||
public abstract String getEncoding(); | ||
|
||
public abstract Boolean getOnlyReadLatest(); | ||
|
||
public abstract String getType(); | ||
|
||
@AutoValue.Builder | ||
public abstract static class Builder { | ||
|
||
/** | ||
* Identifier of the column family. | ||
*/ | ||
public abstract Builder setFamilyID(String familyID); | ||
|
||
/** | ||
* Lists of columns that should be exposed as individual fields as opposed to a list of (column | ||
* name, value) pairs. All columns whose qualifier matches a qualifier in this list can be | ||
* accessed as .. Other columns can be accessed as a list through .Column field. | ||
*/ | ||
public abstract Builder setColumns(List<BigtableColumn> columns); | ||
|
||
/** | ||
* The encoding of the values when the type is not STRING. | ||
* | ||
* Acceptable encoding values are: TEXT - indicates values are alphanumeric text strings. BINARY | ||
* - indicates values are encoded using HBase Bytes.toBytes family of functions. | ||
* | ||
* This can be overridden for a specific column by listing that column in 'columns' and | ||
* specifying an encoding for it. | ||
*/ | ||
public abstract Builder setEncoding(String encoding); | ||
|
||
/** | ||
* If true, only the latest version of values are exposed for all columns in this column family. | ||
* This can be overridden for a specific column by listing that column in 'columns' and | ||
* specifying a different setting for that column. | ||
*/ | ||
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest); | ||
|
||
/** | ||
* The type to convert the value in cells of this column family. The values are expected to be | ||
* encoded using HBase Bytes.toBytes function when using the BINARY encoding value. | ||
* | ||
* Following BigQuery types are allowed (case-sensitive): BYTES STRING INTEGER FLOAT BOOLEAN. | ||
* | ||
* The default type is BYTES. This can be overridden for a specific column by listing that | ||
* column in 'columns' and specifying a type for it. | ||
*/ | ||
public abstract Builder setType(String type); | ||
|
||
public abstract BigtableColumnFamily build(); | ||
} | ||
|
||
static Builder newBuilder() { | ||
return new AutoValue_BigtableColumnFamily.Builder(); | ||
} | ||
|
||
static BigtableColumnFamily fromPb( | ||
com.google.api.services.bigquery.model.BigtableColumnFamily columnFamily) { | ||
Builder builder = newBuilder(); | ||
builder.setFamilyID(columnFamily.getFamilyId()); | ||
builder.setColumns(Lists.transform(columnFamily.getColumns(), BigtableColumn.FROM_PB_FUNCTION)); | ||
builder.setEncoding(columnFamily.getEncoding()); | ||
builder.setOnlyReadLatest(columnFamily.getOnlyReadLatest()); | ||
builder.setType(columnFamily.getType()); | ||
return builder.build(); | ||
|
||
} | ||
|
||
com.google.api.services.bigquery.model.BigtableColumnFamily toPb() { | ||
com.google.api.services.bigquery.model.BigtableColumnFamily colFamilyPb = new com.google.api.services.bigquery.model.BigtableColumnFamily() | ||
.setFamilyId(getFamilyID()) | ||
.setEncoding(getEncoding()) | ||
.setOnlyReadLatest(getOnlyReadLatest()) | ||
.setType(getType()); | ||
if (getColumns() != null) { | ||
colFamilyPb.setColumns(Lists.transform(getColumns(), BigtableColumn.TO_PB_FUNCTION)); | ||
} | ||
return colFamilyPb; | ||
} | ||
|
||
static final Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily> FROM_PB_FUNCTION = | ||
new Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily>() { | ||
@Override | ||
public BigtableColumnFamily apply( | ||
com.google.api.services.bigquery.model.BigtableColumnFamily pb) { | ||
return BigtableColumnFamily.fromPb(pb); | ||
} | ||
}; | ||
|
||
static final Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily> TO_PB_FUNCTION = | ||
new Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily>() { | ||
@Override | ||
public com.google.api.services.bigquery.model.BigtableColumnFamily apply( | ||
BigtableColumnFamily columnFamily) { | ||
return columnFamily.toPb(); | ||
} | ||
}; | ||
} | ||
|
||
|
||
|
Oops, something went wrong.