Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Externalised destination table cache expiry duration for BigQuery Connector #8283

Merged
merged 2 commits into from
Jun 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.bigquery;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update commit message to Move DestinationTableBuilder to BigQueryClient

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

import com.google.cloud.BaseServiceException;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.Dataset;
Expand All @@ -31,6 +32,7 @@
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableSet;
import io.airlift.log.Logger;
import io.airlift.units.Duration;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.TableNotFoundException;
Expand All @@ -41,6 +43,9 @@
import java.util.Map;
import java.util.Optional;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update commit message to Make BigQuery views cache ttl configurable
In general we try to follow https://chris.beams.io/posts/git-commit/ for commit messages

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

import static com.google.cloud.bigquery.TableDefinition.Type.TABLE;
Expand All @@ -50,6 +55,8 @@
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.common.collect.Streams.stream;
import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_AMBIGUOUS_OBJECT_NAME;
import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_VIEW_DESTINATION_TABLE_CREATION_FAILED;
import static io.trino.plugin.bigquery.BigQueryUtil.convertToBigQueryException;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
Expand All @@ -59,12 +66,15 @@

class BigQueryClient
{
private static final Logger log = Logger.get(BigQueryClient.class);

private final BigQuery bigQuery;
private final Optional<String> viewMaterializationProject;
private final Optional<String> viewMaterializationDataset;
private final boolean caseInsensitiveNameMatching;
private final Cache<String, Optional<RemoteDatabaseObject>> remoteDatasets;
private final Cache<TableId, Optional<RemoteDatabaseObject>> remoteTables;
private final Cache<String, TableInfo> destinationTableCache;

BigQueryClient(BigQuery bigQuery, BigQueryConfig config)
{
Expand All @@ -78,6 +88,10 @@ class BigQueryClient
.expireAfterWrite(caseInsensitiveNameMatchingCacheTtl.toMillis(), MILLISECONDS);
this.remoteDatasets = remoteNamesCacheBuilder.build();
this.remoteTables = remoteNamesCacheBuilder.build();
this.destinationTableCache = CacheBuilder.newBuilder()
.expireAfterWrite(config.getViewsCacheTtl().toMillis(), MILLISECONDS)
.maximumSize(1000)
.build();
}

Optional<RemoteDatabaseObject> toRemoteDataset(String projectId, String datasetName)
Expand Down Expand Up @@ -175,6 +189,19 @@ TableInfo getTable(TableId remoteTableId)
return bigQuery.getTable(remoteTableId);
}

TableInfo getCachedTable(ReadSessionCreatorConfig config, TableId tableId, List<String> requiredColumns)
{
String query = selectSql(tableId, requiredColumns);
log.debug("query is %s", query);
try {
return destinationTableCache.get(query,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just noting that the cache key seems to be the entire query string (which can be large).
No changes requested at the moment though.

new DestinationTableBuilder(this, config, query, tableId));
}
catch (ExecutionException e) {
throw new TrinoException(BIGQUERY_VIEW_DESTINATION_TABLE_CREATION_FAILED, "Error creating destination table", e);
}
}

String getProjectId()
{
return bigQuery.getOptions().getProjectId();
Expand Down Expand Up @@ -337,4 +364,63 @@ public boolean isAmbiguous()
return remoteNames.size() > 1;
}
}

static class DestinationTableBuilder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the movement of this class be done in a preparatory first commit? It'll make the diff easier to understand that it's just a move and no code changed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want me to make that change now? Or is it fine to go ahead?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to have that since it makes it easier to make sense of history in the future.

So the move-only changes would get extracted to the first commit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

implements Callable<TableInfo>
{
private final BigQueryClient bigQueryClient;
private final ReadSessionCreatorConfig config;
private final String query;
private final TableId table;

DestinationTableBuilder(BigQueryClient bigQueryClient, ReadSessionCreatorConfig config, String query, TableId table)
{
this.bigQueryClient = requireNonNull(bigQueryClient, "bigQueryClient is null");
this.config = requireNonNull(config, "config is null");
this.query = requireNonNull(query, "query is null");
this.table = requireNonNull(table, "table is null");
}

@Override
public TableInfo call()
{
return createTableFromQuery();
}

TableInfo createTableFromQuery()
{
TableId destinationTable = bigQueryClient.createDestinationTable(table);
log.debug("destinationTable is %s", destinationTable);
JobInfo jobInfo = JobInfo.of(
QueryJobConfiguration
.newBuilder(query)
.setDestinationTable(destinationTable)
.build());
log.debug("running query %s", jobInfo);
Job job = waitForJob(bigQueryClient.create(jobInfo));
log.debug("job has finished. %s", job);
if (job.getStatus().getError() != null) {
throw convertToBigQueryException(job.getStatus().getError());
}
// add expiration time to the table
TableInfo createdTable = bigQueryClient.getTable(destinationTable);
long expirationTime = createdTable.getCreationTime() +
TimeUnit.HOURS.toMillis(config.viewExpirationTimeInHours);
Table updatedTable = bigQueryClient.update(createdTable.toBuilder()
.setExpirationTime(expirationTime)
.build());
return updatedTable;
}

Job waitForJob(Job job)
{
try {
return job.waitFor();
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new BigQueryException(BaseServiceException.UNKNOWN_CODE, format("Job %s has been interrupted", job.getJobId()), e);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public class BigQueryConfig
private int maxReadRowsRetries = DEFAULT_MAX_READ_ROWS_RETRIES;
private boolean caseInsensitiveNameMatching;
private Duration caseInsensitiveNameMatchingCacheTtl = new Duration(1, MINUTES);
private Duration viewsCacheTtl = new Duration(15, MINUTES);

@AssertTrue(message = "Exactly one of 'bigquery.credentials-key' or 'bigquery.credentials-file' must be specified, or the default GoogleCredentials could be created")
public boolean isCredentialsConfigurationValid()
Expand Down Expand Up @@ -219,6 +220,21 @@ public BigQueryConfig setCaseInsensitiveNameMatchingCacheTtl(Duration caseInsens
return this;
}

@NotNull
@MinDuration("0m")
public Duration getViewsCacheTtl()
{
return viewsCacheTtl;
}

@Config("bigquery.views-cache-ttl")
@ConfigDescription("Duration for which the results of querying a view will be cached")
public BigQueryConfig setViewsCacheTtl(Duration viewsCacheTtl)
{
this.viewsCacheTtl = viewsCacheTtl;
return this;
}

ReadSessionCreatorConfig createReadSessionCreatorConfig()
{
return new ReadSessionCreatorConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,48 +13,25 @@
*/
package io.trino.plugin.bigquery;

import com.google.cloud.BaseServiceException;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.TableDefinition;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient;
import com.google.cloud.bigquery.storage.v1beta1.ReadOptions;
import com.google.cloud.bigquery.storage.v1beta1.Storage;
import com.google.cloud.bigquery.storage.v1beta1.TableReferenceProto;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import io.airlift.log.Logger;
import io.trino.spi.TrinoException;

import java.util.List;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

import static io.trino.plugin.bigquery.BigQueryErrorCode.BIGQUERY_VIEW_DESTINATION_TABLE_CREATION_FAILED;
import static io.trino.plugin.bigquery.BigQueryUtil.convertToBigQueryException;
import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;

// A helper class, also handles view materialization
public class ReadSessionCreator
{
private static final Logger log = Logger.get(ReadSessionCreator.class);

private static final Cache<String, TableInfo> destinationTableCache =
CacheBuilder.newBuilder()
.expireAfterWrite(15, TimeUnit.MINUTES)
.maximumSize(1000)
.build();

private final ReadSessionCreatorConfig config;
private final BigQueryClient bigQueryClient;
private final BigQueryStorageClientFactory bigQueryStorageClientFactory;
Expand Down Expand Up @@ -127,78 +104,12 @@ private TableInfo getActualTable(
BigQueryConfig.VIEWS_ENABLED));
}
// get it from the view
String query = bigQueryClient.selectSql(table.getTableId(), requiredColumns);
log.debug("query is %s", query);
try {
return destinationTableCache.get(query, new DestinationTableBuilder(bigQueryClient, config, query, table.getTableId()));
}
catch (ExecutionException e) {
throw new TrinoException(BIGQUERY_VIEW_DESTINATION_TABLE_CREATION_FAILED, "Error creating destination table", e);
}
return bigQueryClient.getCachedTable(config, table.getTableId(), requiredColumns);
}
else {
// not regular table or a view
throw new TrinoException(NOT_SUPPORTED, format("Table type '%s' of table '%s.%s' is not supported",
tableType, table.getTableId().getDataset(), table.getTableId().getTable()));
}
}

private static class DestinationTableBuilder
implements Callable<TableInfo>
{
private final BigQueryClient bigQueryClient;
private final ReadSessionCreatorConfig config;
private final String query;
private final TableId table;

DestinationTableBuilder(BigQueryClient bigQueryClient, ReadSessionCreatorConfig config, String query, TableId table)
{
this.bigQueryClient = requireNonNull(bigQueryClient, "bigQueryClient is null");
this.config = requireNonNull(config, "config is null");
this.query = requireNonNull(query, "query is null");
this.table = requireNonNull(table, "table is null");
}

@Override
public TableInfo call()
{
return createTableFromQuery();
}

TableInfo createTableFromQuery()
{
TableId destinationTable = bigQueryClient.createDestinationTable(table);
log.debug("destinationTable is %s", destinationTable);
JobInfo jobInfo = JobInfo.of(
QueryJobConfiguration
.newBuilder(query)
.setDestinationTable(destinationTable)
.build());
log.debug("running query %s", jobInfo);
Job job = waitForJob(bigQueryClient.create(jobInfo));
log.debug("job has finished. %s", job);
if (job.getStatus().getError() != null) {
throw convertToBigQueryException(job.getStatus().getError());
}
// add expiration time to the table
TableInfo createdTable = bigQueryClient.getTable(destinationTable);
long expirationTime = createdTable.getCreationTime() +
TimeUnit.HOURS.toMillis(config.viewExpirationTimeInHours);
Table updatedTable = bigQueryClient.update(createdTable.toBuilder()
.setExpirationTime(expirationTime)
.build());
return updatedTable;
}

Job waitForJob(Job job)
{
try {
return job.waitFor();
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new BigQueryException(BaseServiceException.UNKNOWN_CODE, format("Job %s has been interrupted", job.getJobId()), e);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ public void testDefaults()
.setViewMaterializationDataset("vmdataset")
.setMaxReadRowsRetries(10)
.setCaseInsensitiveNameMatching(false)
.setCaseInsensitiveNameMatchingCacheTtl(new Duration(1, MINUTES));
.setCaseInsensitiveNameMatchingCacheTtl(new Duration(1, MINUTES))
.setViewsCacheTtl(new Duration(15, MINUTES));

assertEquals(config.getCredentialsKey(), Optional.of("key"));
assertEquals(config.getCredentialsFile(), Optional.of("cfile"));
Expand All @@ -56,6 +57,7 @@ public void testDefaults()
assertEquals(config.getMaxReadRowsRetries(), 10);
assertEquals(config.isCaseInsensitiveNameMatching(), false);
assertEquals(config.getCaseInsensitiveNameMatchingCacheTtl(), new Duration(1, MINUTES));
assertEquals(config.getViewsCacheTtl(), new Duration(15, MINUTES));
}

@Test
Expand All @@ -72,6 +74,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey()
.put("bigquery.max-read-rows-retries", "10")
.put("bigquery.case-insensitive-name-matching", "true")
.put("bigquery.case-insensitive-name-matching.cache-ttl", "1s")
.put("bigquery.views-cache-ttl", "1m")
.build();

ConfigurationFactory configurationFactory = new ConfigurationFactory(properties);
Expand All @@ -87,6 +90,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey()
assertEquals(config.getMaxReadRowsRetries(), 10);
assertEquals(config.isCaseInsensitiveNameMatching(), true);
assertEquals(config.getCaseInsensitiveNameMatchingCacheTtl(), new Duration(1, SECONDS));
assertEquals(config.getViewsCacheTtl(), new Duration(1, MINUTES));
}

@Test
Expand Down