Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Add ORC format support for load jobs, missing bigtable support. #3391

Merged
merged 8 commits into from
Jun 19, 2018
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright 2018 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;

import com.google.common.base.Function;
import com.google.auto.value.AutoValue;
import java.io.Serializable;
import javax.annotation.Nullable;

@AutoValue
public abstract class BigtableColumn implements Serializable {

private static final long serialVersionUID = 1L;

@Nullable
public abstract String getQualifierEncoded();

@Nullable
public abstract String getFieldName();

@Nullable
public abstract Boolean getOnlyReadLatest();

@Nullable
public abstract String getEncoding();

@Nullable
public abstract String getType();

@AutoValue.Builder
public abstract static class Builder {

/**
* Qualifier of the column.
*
* Columns in the parent column family that has this exact qualifier are exposed as . field. If
* the qualifier is valid UTF-8 string, it can be specified in the qualifier_string field.
* Otherwise, a base-64 encoded value must be set to qualifier_encoded. The column field name is
* the same as the column qualifier. However, if the qualifier is not a valid BigQuery field
* identifier, a valid identifier must be provided as field_name.
*/
public abstract Builder setQualifierEncoded(String qualifierEncoded);

/**
* If the qualifier is not a valid BigQuery field identifier, a valid identifier must be
* provided as the column field name and is used as field name in queries.
*/
public abstract Builder setFieldName(String fieldName);

/**
* If this is set, only the latest version of value in this column are exposed.
*
* 'onlyReadLatest' can also be set at the column family level. However, the setting at the
* column level takes precedence if 'onlyReadLatest' is set at both levels.
*/
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest);

/**
* The encoding of the values when the type is not STRING. Acceptable encoding values are: TEXT
* - indicates values are alphanumeric text strings. BINARY - indicates values are encoded using
* HBase Bytes.toBytes family of functions.
*
* Encoding can also be set at the column family level. However, the setting at the column level
* takes precedence if 'encoding' is set at both levels.
*/
public abstract Builder setEncoding(String encoding);

/**
* The type to convert the value in cells of this column.
*
* The values are expected to be encoded using HBase Bytes.toBytes function when using the
* BINARY encoding value. Following BigQuery types are allowed (case-sensitive): BYTES STRING
* INTEGER FLOAT BOOLEAN Default type is BYTES.
*
* 'type' can also be set at the column family level. However, the setting at the column level
* takes precedence if 'type' is set at both levels.
*/
public abstract Builder setType(String type);

public abstract BigtableColumn build();
}

static Builder newBuilder() {
return new AutoValue_BigtableColumn.Builder();
}

static BigtableColumn fromPb(com.google.api.services.bigquery.model.BigtableColumn column) {
Builder builder = newBuilder();
builder.setQualifierEncoded(column.getQualifierEncoded());
builder.setFieldName(column.getFieldName());
builder.setOnlyReadLatest(column.getOnlyReadLatest());
builder.setEncoding(column.getEncoding());
builder.setType(column.getType());
return builder.build();
}

com.google.api.services.bigquery.model.BigtableColumn toPb() {
com.google.api.services.bigquery.model.BigtableColumn column = new com.google.api.services.bigquery.model.BigtableColumn()
.setQualifierEncoded(getQualifierEncoded())
.setFieldName(getFieldName())
.setOnlyReadLatest(getOnlyReadLatest())
.setEncoding(getEncoding())
.setType(getType());
return column;
}

static final Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn> FROM_PB_FUNCTION =
new Function<com.google.api.services.bigquery.model.BigtableColumn, BigtableColumn>() {
@Override
public BigtableColumn apply(
com.google.api.services.bigquery.model.BigtableColumn pb) {
return BigtableColumn.fromPb(pb);
}
};

static final Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn> TO_PB_FUNCTION =
new Function<BigtableColumn, com.google.api.services.bigquery.model.BigtableColumn>() {
@Override
public com.google.api.services.bigquery.model.BigtableColumn apply(
BigtableColumn column) {
return column.toPb();
}
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2018 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;

import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.auto.value.AutoValue;
import java.io.Serializable;
import java.util.List;

/**
* List of column families to expose in the table schema along with their types. This list restricts
* the column families that can be referenced in queries and specifies their value types.
*
* You can use this list to do type conversions - see the 'type' field for more details. If you
* leave this list empty, all column families are present in the table schema and their values are
* read as BYTES. During a query only the column families referenced in that query are read from
* Bigtable.
*/

@AutoValue
public abstract class BigtableColumnFamily implements Serializable {

private static final long serialVersionUID = 1L;

public abstract String getFamilyID();

public abstract List<BigtableColumn> getColumns();

public abstract String getEncoding();

public abstract Boolean getOnlyReadLatest();

public abstract String getType();

@AutoValue.Builder
public abstract static class Builder {

/**
* Identifier of the column family.
*/
public abstract Builder setFamilyID(String familyID);

/**
* Lists of columns that should be exposed as individual fields as opposed to a list of (column
* name, value) pairs. All columns whose qualifier matches a qualifier in this list can be
* accessed as .. Other columns can be accessed as a list through .Column field.
*/
public abstract Builder setColumns(List<BigtableColumn> columns);

/**
* The encoding of the values when the type is not STRING.
*
* Acceptable encoding values are: TEXT - indicates values are alphanumeric text strings. BINARY
* - indicates values are encoded using HBase Bytes.toBytes family of functions.
*
* This can be overridden for a specific column by listing that column in 'columns' and
* specifying an encoding for it.
*/
public abstract Builder setEncoding(String encoding);

/**
* If true, only the latest version of values are exposed for all columns in this column family.
* This can be overridden for a specific column by listing that column in 'columns' and
* specifying a different setting for that column.
*/
public abstract Builder setOnlyReadLatest(Boolean onlyReadLatest);

/**
* The type to convert the value in cells of this column family. The values are expected to be
* encoded using HBase Bytes.toBytes function when using the BINARY encoding value.
*
* Following BigQuery types are allowed (case-sensitive): BYTES STRING INTEGER FLOAT BOOLEAN.
*
* The default type is BYTES. This can be overridden for a specific column by listing that
* column in 'columns' and specifying a type for it.
*/
public abstract Builder setType(String type);

public abstract BigtableColumnFamily build();
}

static Builder newBuilder() {
return new AutoValue_BigtableColumnFamily.Builder();
}

static BigtableColumnFamily fromPb(
com.google.api.services.bigquery.model.BigtableColumnFamily columnFamily) {
Builder builder = newBuilder();
builder.setFamilyID(columnFamily.getFamilyId());
builder.setColumns(Lists.transform(columnFamily.getColumns(), BigtableColumn.FROM_PB_FUNCTION));
builder.setEncoding(columnFamily.getEncoding());
builder.setOnlyReadLatest(columnFamily.getOnlyReadLatest());
builder.setType(columnFamily.getType());
return builder.build();

}

com.google.api.services.bigquery.model.BigtableColumnFamily toPb() {
com.google.api.services.bigquery.model.BigtableColumnFamily colFamilyPb = new com.google.api.services.bigquery.model.BigtableColumnFamily()
.setFamilyId(getFamilyID())
.setEncoding(getEncoding())
.setOnlyReadLatest(getOnlyReadLatest())
.setType(getType());
if (getColumns() != null) {
colFamilyPb.setColumns(Lists.transform(getColumns(), BigtableColumn.TO_PB_FUNCTION));
}
return colFamilyPb;
}

static final Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily> FROM_PB_FUNCTION =
new Function<com.google.api.services.bigquery.model.BigtableColumnFamily, BigtableColumnFamily>() {
@Override
public BigtableColumnFamily apply(
com.google.api.services.bigquery.model.BigtableColumnFamily pb) {
return BigtableColumnFamily.fromPb(pb);
}
};

static final Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily> TO_PB_FUNCTION =
new Function<BigtableColumnFamily, com.google.api.services.bigquery.model.BigtableColumnFamily>() {
@Override
public com.google.api.services.bigquery.model.BigtableColumnFamily apply(
BigtableColumnFamily columnFamily) {
return columnFamily.toPb();
}
};
}



Loading