From 34e2016765de95b564d961517663ca41324c6236 Mon Sep 17 00:00:00 2001 From: KaanTolunayKilicOG Date: Sat, 2 Nov 2024 16:43:20 +0100 Subject: [PATCH 1/3] GBICSGO-2361: optimize count partition query --- frontend/src/components/PricePrediction.vue | 2 +- pkg/service/bigquery/bigquery_client.go | 76 +++------------------ 2 files changed, 11 insertions(+), 67 deletions(-) diff --git a/frontend/src/components/PricePrediction.vue b/frontend/src/components/PricePrediction.vue index 6bf2b626..bd74865a 100644 --- a/frontend/src/components/PricePrediction.vue +++ b/frontend/src/components/PricePrediction.vue @@ -50,7 +50,7 @@ const pricePredictionOptions = ref({ }); const backupStatusIsDeleted = computed(() => { - return props.backup?.status === BackupStatus.FINISHED || props.backup?.status === BackupStatus.BACKUP_DELETED; + return props.backup?.status === BackupStatus.FINISHED || props.backup?.status === BackupStatus.BACKUP_DELETED || props.backup?.status === BackupStatus.BACKUP_SOURCE_DELETED }); const updateData = () => { diff --git a/pkg/service/bigquery/bigquery_client.go b/pkg/service/bigquery/bigquery_client.go index f1df3e97..ea8a29a5 100644 --- a/pkg/service/bigquery/bigquery_client.go +++ b/pkg/service/bigquery/bigquery_client.go @@ -2,16 +2,14 @@ package bigquery import ( "context" + "errors" "fmt" "net/http" - "strconv" "time" bq "cloud.google.com/go/bigquery" - "cloud.google.com/go/civil" "github.com/ottogroup/penelope/pkg/config" "github.com/ottogroup/penelope/pkg/http/impersonate" - "github.com/pkg/errors" "go.opencensus.io/trace" gimpersonate "google.golang.org/api/impersonate" "google.golang.org/api/iterator" @@ -215,35 +213,8 @@ func (d *defaultBigQueryClient) HasTablePartitions(ctxIn context.Context, projec } type tablePartition struct { - Total int64 `bigquery:"total"` - TIMESTAMP time.Time `bigquery:"T_TIMESTAMP"` - DATE civil.Date `bigquery:"T_DATE"` - TIME civil.Time `bigquery:"T_TIME"` - DATETIME civil.DateTime `bigquery:"T_DATETIME"` -} - -func (t *tablePartition) getPartitionFor(targetField string) (string, error) { - var partitionName string - switch targetField { - case "T_TIMESTAMP": - partitionName = fmt.Sprintf("%s%s%s", - zerofill(t.TIMESTAMP.Year()), - zerofill(int(t.TIMESTAMP.Month())), - zerofill(t.TIMESTAMP.Day())) - case "T_DATE": - partitionName = fmt.Sprintf("%s%s%s", - zerofill(t.DATE.Year), - zerofill(int(t.DATE.Month)), - zerofill(t.DATE.Day)) - case "T_DATETIME": - partitionName = fmt.Sprintf("%s%s%s", - zerofill(t.DATETIME.Date.Year), - zerofill(int(t.DATETIME.Date.Month)), - zerofill(t.DATETIME.Date.Day)) - default: - return "", fmt.Errorf("partition for target field %q is not supported", targetField) - } - return partitionName, nil + Total int64 `bigquery:"total"` + PartitionID string `bigquery:"partition_id"` } // GetTablePartitions list all partitions in table @@ -260,24 +231,9 @@ func (d *defaultBigQueryClient) GetTablePartitions(ctxIn context.Context, projec return nil, fmt.Errorf("GetTablePartitions failed for `%s.%s.%s`, because partition other then DAY is not supported", project, dataset, table) } - timePartitioningField := "_PARTITIONTIME" - targetFieldInTablePartition := "T_TIMESTAMP" - if metadata.TimePartitioning.Field != "" { - timePartitioningField = metadata.TimePartitioning.Field - for _, schema := range metadata.Schema.Relax() { - if schema.Name == metadata.TimePartitioning.Field { - targetFieldInTablePartition = fmt.Sprintf("T_%s", schema.Type) - break - } - } - } - var partitions []*Table - q := fmt.Sprintf("SELECT COUNT(*) AS total, %s AS %s FROM `%s.%s.%s` WHERE %s IS NOT NULL GROUP BY %s", - timePartitioningField, targetFieldInTablePartition, + q := fmt.Sprintf("SELECT total_rows AS total, partition_id FROM `%s.%s.INFORMATION_SCHEMA.PARTITIONS` WHERE TABLE_NAME = '%s'", project, dataset, table, - timePartitioningField, - targetFieldInTablePartition, ) run, err := d.client.Query(q).Run(ctx) @@ -292,20 +248,16 @@ func (d *defaultBigQueryClient) GetTablePartitions(ctxIn context.Context, projec for { var s tablePartition err := rowIt.Next(&s) - if err == iterator.Done { + if errors.Is(err, iterator.Done) { break - } - if err != iterator.Done && err != nil { + } else if err != nil { return nil, err } if s.Total == 0 { continue } - partition, err := s.getPartitionFor(targetFieldInTablePartition) - if err != nil { - return nil, fmt.Errorf("GetTablePartitions failed for `%s.%s.%s`, because partition with %s is not supported", project, dataset, table, targetFieldInTablePartition) - } + partition := s.PartitionID if _, exists := partitionMetadataCollected[partition]; exists { // tables that where updated multiple times in the same day are skipped continue @@ -330,11 +282,10 @@ func (d *defaultBigQueryClient) GetDatasets(ctxIn context.Context, project strin it.ProjectID = project for { dataset, err := it.Next() - if err == iterator.Done { + if errors.Is(err, iterator.Done) { break - } - if err != nil && err != iterator.Done { - return []string{}, errors.Wrap(err, fmt.Sprintf("Datasets.Next() failed for project %s", project)) + } else if err != nil { + return []string{}, errors.Join(err, fmt.Errorf("Datasets.Next() failed for project %s", project)) } if dataset == nil { return datasets, fmt.Errorf("datasets are nil for project %s", project) @@ -344,13 +295,6 @@ func (d *defaultBigQueryClient) GetDatasets(ctxIn context.Context, project strin return datasets, err } -func zerofill(intToFill int) string { - if intToFill < 10 { - return "0" + strconv.Itoa(intToFill) - } - return strconv.Itoa(intToFill) -} - // GetDatasetDetails get the details of a bigquery dataset func (d *defaultBigQueryClient) GetDatasetDetails(ctxIn context.Context, datasetId string) (*bq.DatasetMetadata, error) { ctx, span := trace.StartSpan(ctxIn, "(*defaultBigQueryClient).GetDatasetDetails") From 26da7c77df4c7153a98c0e02f86a5eb057886687 Mon Sep 17 00:00:00 2001 From: KaanTolunayKilicOG Date: Wed, 6 Nov 2024 09:00:35 +0100 Subject: [PATCH 2/3] GBICSGO-2313: catch null for partiton id --- pkg/service/bigquery/bigquery_client.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/service/bigquery/bigquery_client.go b/pkg/service/bigquery/bigquery_client.go index ea8a29a5..85a0250b 100644 --- a/pkg/service/bigquery/bigquery_client.go +++ b/pkg/service/bigquery/bigquery_client.go @@ -258,6 +258,10 @@ func (d *defaultBigQueryClient) GetTablePartitions(ctxIn context.Context, projec } partition := s.PartitionID + if partition == "" { + return nil, fmt.Errorf("GetTablePartitions failed for `%s.%s.%s`, because partition_id is empty", project, dataset, table) + } + if _, exists := partitionMetadataCollected[partition]; exists { // tables that where updated multiple times in the same day are skipped continue From 4161f33aefb2242593f9ec4227bde733236c1e6e Mon Sep 17 00:00:00 2001 From: KaanTolunayKilicOG Date: Thu, 7 Nov 2024 09:24:28 +0100 Subject: [PATCH 3/3] GBICSGO-2361: adds test for GetTablePartitions --- pkg/http/mock/fixtures.go | 8 ++++ pkg/service/bigquery/bigquery_client_test.go | 43 ++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 pkg/service/bigquery/bigquery_client_test.go diff --git a/pkg/http/mock/fixtures.go b/pkg/http/mock/fixtures.go index 0907355b..8517d983 100644 --- a/pkg/http/mock/fixtures.go +++ b/pkg/http/mock/fixtures.go @@ -63,6 +63,8 @@ var ( GetBackUpSourceNotFoundForBigQueryMock = NewMockedHTTPRequest("GET", "bigquery/v2/projects/.*/datasets/notExistingDataset", getBackUpSourceNotFoundResponseForBigQuery) GetBackUpSourceNotFoundForCloudStorageMock = NewMockedHTTPRequest("GET", "/storage/v1/b/notExistingBucket", getBackUpSourceNotFoundResponseForCloudStorage) + + TableMetadataPartitionResultHTTPMock = NewMockedHTTPRequest("GET", "/bigquery/v2/projects/.*/queries", getMetadataTablePartitionsQueryResponse) ) const ( @@ -178,6 +180,12 @@ Content-Type: application/json; charset=UTF-8 {"kind":"bigquery#getQueryResultsResponse","etag":"P0Ea2Yx1PihRPFrsH/Q3fA==","schema":{"fields":[{"name":"total","type":"INTEGER","mode":"NULLABLE"},{"name":"p","type":"TIMESTAMP","mode":"NULLABLE"}]},"jobReference":{"projectId":"local-ability-backup","jobID":"PYRwoNSDhUNuqUtVbxtkjbSvmx7","location":"EU"},"totalRows":"76","totalBytesProcessed":"0","jobComplete":true,"cacheHit":false}` + getMetadataTablePartitionsQueryResponse = `HTTP/2.0 200 OK +Content-Length: 1462 +Content-Type: application/json; charset=UTF-8 + +{"kind": "bigquery#getQueryResultsResponse", "etag": "P0Ea2Yx1PihRPFrsH/Q3fA==", "schema": {"fields": [{"name": "total", "fullname": "total", "type": "INTEGER", "mode": "NULLABLE", "description": "", "fields": [], "hasPermission": true, "dataPolicies": [], "maxLength": "0", "precision": "0", "scale": "0", "roundingMode": "ROUNDING_MODE_UNSPECIFIED", "collation": "", "defaultValueExpression": "", "isMeasure": false, "rangeElementType": {"type": ""}, "foreignTypeDefinition": "", "dataGovernanceTags": [], "identityColumnInfo": {"generatedMode": "GENERATED_MODE_UNSPECIFIED", "start": "", "increment": ""}}, {"name": "partition_id", "fullname": "partition_id", "type": "STRING", "mode": "NULLABLE", "description": "", "fields": [], "hasPermission": true, "dataPolicies": [], "maxLength": "0", "precision": "0", "scale": "0", "roundingMode": "ROUNDING_MODE_UNSPECIFIED", "collation": "", "defaultValueExpression": "", "isMeasure": false, "rangeElementType": {"type": ""}, "foreignTypeDefinition": "", "dataGovernanceTags": [], "identityColumnInfo": {"generatedMode": "GENERATED_MODE_UNSPECIFIED", "start": "", "increment": ""}}]}, "rows": [{"f": [{"v": "22498"}, {"v": "20190110"}]}, {"f": [{"v": "65"}, {"v": "20200229"}]}], "jobReference": {"projectId": "local-ability-backup", "jobID": "PYRwoNSDhUNuqUtVbxtkjbSvmx7", "location": "EU"}, "totalRows": "2", "totalBytesProcessed": "0", "jobComplete": true, "cacheHit": false}` + getExtractJobResultOkResponse = `HTTP/2.0 200 OK Content-Length: 1191 Content-Type: application/json; charset=UTF-8 diff --git a/pkg/service/bigquery/bigquery_client_test.go b/pkg/service/bigquery/bigquery_client_test.go new file mode 100644 index 00000000..34c88eaf --- /dev/null +++ b/pkg/service/bigquery/bigquery_client_test.go @@ -0,0 +1,43 @@ +package bigquery + +import ( + "context" + "github.com/ottogroup/penelope/pkg/http/mock" + "github.com/stretchr/testify/assert" + "os" + "testing" +) + +func init() { + os.Setenv("PENELOPE_USE_DEFAULT_HTTP_CLIENT", "true") +} + +func TestNewBigQueryClient(t *testing.T) { + httpMockHandler := mock.NewHTTPMockHandler() + httpMocks := []mock.MockedHTTPRequest{ // /bigquery/v2/projects/.*/datasets/unknown-dataset + mock.ImpersonationHTTPMock, mock.RetrieveAccessTokenHTTPMock, + mock.DatasetInfoHTTPMock, mock.TableInfoHTTPMock, + mock.TablePartitionJobHTTPMock, mock.TableMetadataPartitionResultHTTPMock, + mock.ExtractJobResultOkHTTPMock, + } + httpMockHandler.Register(httpMocks...) + httpMockHandler.Start() + defer httpMockHandler.Stop() + + ctx := context.Background() + provider := &mockImpersonatedTokenConfigProvider{} + client, err := NewBigQueryClient(ctx, provider, "test-project", "test-project-backup") + assert.NoError(t, err) + + partitions, err := client.GetTablePartitions(ctx, "test-project", "example-dataset", "example-table") + assert.NoError(t, err) + assert.NotEmpty(t, partitions) + assert.Len(t, partitions, 2) +} + +type mockImpersonatedTokenConfigProvider struct { +} + +func (mi *mockImpersonatedTokenConfigProvider) GetTargetPrincipalForProject(ctxIn context.Context, projectID string) (string, []string, error) { + return "example@test-project-backup.iam.gserviceaccount.com", nil, nil +}