Skip to content

Commit

Permalink
BigQuery: Add ORC format support for load jobs, missing bigtable supp…
Browse files Browse the repository at this point in the history
…ort. (#3391)

* BigQuery: Add ORC format support for load jobs.

Additionally, plumb in the (missing) Bigtable format support for
federated tables.

* add overrides, unit testing

* Wire bigtable up into formatoptions

* add copyright headers.

* Convert BigtableColumn and BigtableColumnFamily to autovalue generation.

* excise unused imports, address codacy kvetching about declaration order.

* Address reviewer comments: formatting/whitespace, serializable, asserts

* unused imports (asserts)
  • Loading branch information
shollyman authored Jun 19, 2018
1 parent a2a9bba commit 5cfc619
Show file tree
Hide file tree
Showing 6 changed files with 597 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright 2018 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;

import com.google.common.base.Function;
import com.google.auto.value.AutoValue;
import java.io.Serializable;
import javax.annotation.Nullable;

@AutoValue
public abstract class BigtableColumn implements Serializable {

private static final long serialVersionUID = 1L;

@Nullable
public abstract String getQualifierEncoded();

@Nullable
public abstract String getFieldName();

@Nullable
public abstract Boolean getOnlyReadLatest();

@Nullable
public abstract String getEncoding();

@Nullable
public abstract String getType();

@AutoValue.Builder
public abstract static class Builder {

/**
* Qualifier of the column.
*
* Columns in the parent column family that has this exact qualifier are exposed as . field. If
* the qualifier is valid UTF-8 string, it can be specified in the qualifier_string field.
* Otherwise, a base-64 encoded value must be set to qualifier_encoded. The column field name is
* the same as the column qualifier. However, if the qualifier is not a valid BigQuery field
* identifier, a valid identifier must be provided as field_name.
*/
public abstract Builder setQualifierEncoded(String qualifierEncoded);

/**
* If the qualifier is not a valid BigQuery field identifier, a valid identifier must be
* provided as the column field name and is used as field name in queries.
*/
public abstract Builder setFieldName(String fieldName);

/**
* If this is set, only the latest version of value in this column are exposed.
*
* 'onlyReadLatest' can also be set at the column family level. However, the setting at the
* column level takes precedence if 'onlyReadLatest' is set at both levels.
*/
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest);

/**
* The encoding of the values when the type is not STRING. Acceptable encoding values are: TEXT
* - indicates values are alphanumeric text strings. BINARY - indicates values are encoded using
* HBase Bytes.toBytes family of functions.
*
* Encoding can also be set at the column family level. However, the setting at the column level
* takes precedence if 'encoding' is set at both levels.
*/
public abstract Builder setEncoding(String encoding);

/**
* The type to convert the value in cells of this column.
*
* The values are expected to be encoded using HBase Bytes.toBytes function when using the
* BINARY encoding value. Following BigQuery types are allowed (case-sensitive): BYTES STRING
* INTEGER FLOAT BOOLEAN Default type is BYTES.
*
* 'type' can also be set at the column family level. However, the setting at the column level
* takes precedence if 'type' is set at both levels.
*/
public abstract Builder setType(String type);

public abstract BigtableColumn build();
}

static Builder newBuilder() {
return new AutoValue_BigtableColumn.Builder();
}

static BigtableColumn fromPb(com.google.api.services.bigquery.model.BigtableColumn column) {
Builder builder = newBuilder();
builder.setQualifierEncoded(column.getQualifierEncoded());
builder.setFieldName(column.getFieldName());
builder.setOnlyReadLatest(column.getOnlyReadLatest());
builder.setEncoding(column.getEncoding());
builder.setType(column.getType());
return builder.build();
}

com.google.api.services.bigquery.model.BigtableColumn toPb() {
com.google.api.services.bigquery.model.BigtableColumn column = new com.google.api.services.bigquery.model.BigtableColumn()
.setQualifierEncoded(getQualifierEncoded())
.setFieldName(getFieldName())
.setOnlyReadLatest(getOnlyReadLatest())
.setEncoding(getEncoding())
.setType(getType());
return column;
}

static final Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn> FROM_PB_FUNCTION =
new Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn>() {
@Override
public BigtableColumn apply(
com.google.api.services.bigquery.model.BigtableColumn pb) {
return BigtableColumn.fromPb(pb);
}
};

static final Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn> TO_PB_FUNCTION =
new Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn>() {
@Override
public com.google.api.services.bigquery.model.BigtableColumn apply(
BigtableColumn column) {
return column.toPb();
}
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2018 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;

import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.auto.value.AutoValue;
import java.io.Serializable;
import java.util.List;

/**
* List of column families to expose in the table schema along with their types. This list restricts
* the column families that can be referenced in queries and specifies their value types.
*
* You can use this list to do type conversions - see the 'type' field for more details. If you
* leave this list empty, all column families are present in the table schema and their values are
* read as BYTES. During a query only the column families referenced in that query are read from
* Bigtable.
*/

@AutoValue
public abstract class BigtableColumnFamily implements Serializable {

private static final long serialVersionUID = 1L;

public abstract String getFamilyID();

public abstract List<BigtableColumn> getColumns();

public abstract String getEncoding();

public abstract Boolean getOnlyReadLatest();

public abstract String getType();

@AutoValue.Builder
public abstract static class Builder {

/**
* Identifier of the column family.
*/
public abstract Builder setFamilyID(String familyID);

/**
* Lists of columns that should be exposed as individual fields as opposed to a list of (column
* name, value) pairs. All columns whose qualifier matches a qualifier in this list can be
* accessed as .. Other columns can be accessed as a list through .Column field.
*/
public abstract Builder setColumns(List<BigtableColumn> columns);

/**
* The encoding of the values when the type is not STRING.
*
* Acceptable encoding values are: TEXT - indicates values are alphanumeric text strings. BINARY
* - indicates values are encoded using HBase Bytes.toBytes family of functions.
*
* This can be overridden for a specific column by listing that column in 'columns' and
* specifying an encoding for it.
*/
public abstract Builder setEncoding(String encoding);

/**
* If true, only the latest version of values are exposed for all columns in this column family.
* This can be overridden for a specific column by listing that column in 'columns' and
* specifying a different setting for that column.
*/
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest);

/**
* The type to convert the value in cells of this column family. The values are expected to be
* encoded using HBase Bytes.toBytes function when using the BINARY encoding value.
*
* Following BigQuery types are allowed (case-sensitive): BYTES STRING INTEGER FLOAT BOOLEAN.
*
* The default type is BYTES. This can be overridden for a specific column by listing that
* column in 'columns' and specifying a type for it.
*/
public abstract Builder setType(String type);

public abstract BigtableColumnFamily build();
}

static Builder newBuilder() {
return new AutoValue_BigtableColumnFamily.Builder();
}

static BigtableColumnFamily fromPb(
com.google.api.services.bigquery.model.BigtableColumnFamily columnFamily) {
Builder builder = newBuilder();
builder.setFamilyID(columnFamily.getFamilyId());
builder.setColumns(Lists.transform(columnFamily.getColumns(), BigtableColumn.FROM_PB_FUNCTION));
builder.setEncoding(columnFamily.getEncoding());
builder.setOnlyReadLatest(columnFamily.getOnlyReadLatest());
builder.setType(columnFamily.getType());
return builder.build();

}

com.google.api.services.bigquery.model.BigtableColumnFamily toPb() {
com.google.api.services.bigquery.model.BigtableColumnFamily colFamilyPb = new com.google.api.services.bigquery.model.BigtableColumnFamily()
.setFamilyId(getFamilyID())
.setEncoding(getEncoding())
.setOnlyReadLatest(getOnlyReadLatest())
.setType(getType());
if (getColumns() != null) {
colFamilyPb.setColumns(Lists.transform(getColumns(), BigtableColumn.TO_PB_FUNCTION));
}
return colFamilyPb;
}

static final Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily> FROM_PB_FUNCTION =
new Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily>() {
@Override
public BigtableColumnFamily apply(
com.google.api.services.bigquery.model.BigtableColumnFamily pb) {
return BigtableColumnFamily.fromPb(pb);
}
};

static final Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily> TO_PB_FUNCTION =
new Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily>() {
@Override
public com.google.api.services.bigquery.model.BigtableColumnFamily apply(
BigtableColumnFamily columnFamily) {
return columnFamily.toPb();
}
};
}



Loading

0 comments on commit 5cfc619

Please sign in to comment.