-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adaptive Parallelism in import-data(#1726)
import-data will periodically fetch resource usage metrics from the target yugabyteDB, and adapt the parallelism used, (i.e. parallel-jobs value) accordingly. The aim is to ensure that the cluster remains stable throughout, while ensuring that the data ingestion is as efficient as possible.
- Loading branch information
1 parent
42064d8
commit b465788
Showing
14 changed files
with
666 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
yb-voyager/src/adaptiveparallelism/adaptive_parallelism.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/* | ||
Copyright (c) YugabyteDB, Inc. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
package adaptiveparallelism | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"time" | ||
|
||
"github.com/davecgh/go-spew/spew" | ||
log "github.com/sirupsen/logrus" | ||
) | ||
|
||
const ( | ||
CPU_USAGE_USER = "cpu_usage_user" | ||
CPU_USAGE_SYSTEM = "cpu_usage_system" | ||
MAX_CPU_THRESHOLD = 70 | ||
ADAPTIVE_PARALLELISM_FREQUENCY = 10 * time.Second | ||
) | ||
|
||
type TargetYugabyteDBWithConnectionPool interface { | ||
IsAdaptiveParallelismSupported() bool | ||
GetClusterMetrics() (map[string]map[string]string, error) // node_uuid:metric_name:metric_value | ||
GetNumConnectionsInPool() int | ||
GetNumMaxConnectionsInPool() int | ||
UpdateNumConnectionsInPool(int) error // (delta) | ||
} | ||
|
||
func AdaptParallelism(yb TargetYugabyteDBWithConnectionPool) error { | ||
if !yb.IsAdaptiveParallelismSupported() { | ||
return fmt.Errorf("adaptive parallelism not supported in target YB database") | ||
} | ||
for { | ||
time.Sleep(ADAPTIVE_PARALLELISM_FREQUENCY) | ||
err := fetchClusterMetricsAndUpdateParallelism(yb) | ||
if err != nil { | ||
log.Warnf("adaptive: error updating parallelism: %v", err) | ||
} | ||
} | ||
} | ||
|
||
func fetchClusterMetricsAndUpdateParallelism(yb TargetYugabyteDBWithConnectionPool) error { | ||
clusterMetrics, err := yb.GetClusterMetrics() | ||
log.Infof("adaptive: clusterMetrics: %v", spew.Sdump(clusterMetrics)) // TODO: move to debug? | ||
if err != nil { | ||
return fmt.Errorf("getting cluster metrics: %w", err) | ||
} | ||
|
||
// get max CPU | ||
// Note that right now, voyager ingests data into the target in parallel, | ||
// but one table at a time. Therefore, in cases where there is a single tablet for a table, | ||
// either due to pre-split or colocated table, it is possible that the load on the cluster | ||
// will be uneven. Nevertheless, we still want to ensure that the cluster is not overloaded, | ||
// therefore we use the max CPU usage across all nodes in the cluster. | ||
maxCpuUsage, err := getMaxCpuUsageInCluster(clusterMetrics) | ||
if err != nil { | ||
return fmt.Errorf("getting max cpu usage in cluster: %w", err) | ||
} | ||
log.Infof("adaptive: max cpu usage in cluster = %d", maxCpuUsage) | ||
|
||
if maxCpuUsage > MAX_CPU_THRESHOLD { | ||
log.Infof("adaptive: found CPU usage = %d > %d, reducing parallelism to %d", maxCpuUsage, MAX_CPU_THRESHOLD, yb.GetNumConnectionsInPool()-1) | ||
err = yb.UpdateNumConnectionsInPool(-1) | ||
if err != nil { | ||
return fmt.Errorf("updating parallelism with -1: %w", err) | ||
} | ||
} else { | ||
log.Infof("adaptive: found CPU usage = %d <= %d, increasing parallelism to %d", maxCpuUsage, MAX_CPU_THRESHOLD, yb.GetNumConnectionsInPool()+1) | ||
err := yb.UpdateNumConnectionsInPool(1) | ||
if err != nil { | ||
return fmt.Errorf("updating parallelism with +1 : %w", err) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func getMaxCpuUsageInCluster(clusterMetrics map[string]map[string]string) (int, error) { | ||
var maxCpuPct int | ||
for _, nodeMetrics := range clusterMetrics { | ||
cpuUsageUser, err := strconv.ParseFloat(nodeMetrics[CPU_USAGE_USER], 64) | ||
if err != nil { | ||
return -1, fmt.Errorf("parsing cpu usage user as float: %w", err) | ||
} | ||
cpuUsageSystem, err := strconv.ParseFloat(nodeMetrics[CPU_USAGE_SYSTEM], 64) | ||
if err != nil { | ||
return -1, fmt.Errorf("parsing cpu usage system as float: %w", err) | ||
} | ||
|
||
cpuUsagePct := int((cpuUsageUser + cpuUsageSystem) * 100) | ||
maxCpuPct = max(maxCpuPct, cpuUsagePct) | ||
} | ||
return maxCpuPct, nil | ||
} |
106 changes: 106 additions & 0 deletions
106
yb-voyager/src/adaptiveparallelism/adaptive_parallelism_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/* | ||
Copyright (c) YugabyteDB, Inc. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
package adaptiveparallelism | ||
|
||
import ( | ||
"strconv" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
type dummyTargetYugabyteDB struct { | ||
size int | ||
maxSize int | ||
cpuUsageUser1 float64 | ||
cpuUsageSys1 float64 | ||
cpuUsageUser2 float64 | ||
cpuUsageSys2 float64 | ||
} | ||
|
||
func (d *dummyTargetYugabyteDB) IsAdaptiveParallelismSupported() bool { | ||
return true | ||
} | ||
|
||
func (d *dummyTargetYugabyteDB) GetClusterMetrics() (map[string]map[string]string, error) { | ||
result := make(map[string]map[string]string) | ||
result["node1"] = make(map[string]string) | ||
result["node1"]["cpu_usage_user"] = strconv.FormatFloat(d.cpuUsageUser1, 'f', -1, 64) | ||
result["node1"]["cpu_usage_system"] = strconv.FormatFloat(d.cpuUsageSys1, 'f', -1, 64) | ||
result["node2"] = make(map[string]string) | ||
result["node2"]["cpu_usage_user"] = strconv.FormatFloat(d.cpuUsageUser2, 'f', -1, 64) | ||
result["node2"]["cpu_usage_system"] = strconv.FormatFloat(d.cpuUsageSys2, 'f', -1, 64) | ||
return result, nil | ||
} | ||
|
||
func (d *dummyTargetYugabyteDB) GetNumConnectionsInPool() int { | ||
return d.size | ||
} | ||
|
||
func (d *dummyTargetYugabyteDB) GetNumMaxConnectionsInPool() int { | ||
return d.maxSize | ||
} | ||
|
||
func (d *dummyTargetYugabyteDB) UpdateNumConnectionsInPool(delta int) error { | ||
d.size += delta | ||
return nil | ||
} | ||
|
||
func TestMaxCpuUsage(t *testing.T) { | ||
yb := &dummyTargetYugabyteDB{ | ||
size: 3, | ||
maxSize: 6, | ||
cpuUsageUser1: 0.5, | ||
cpuUsageSys1: 0.1, | ||
cpuUsageUser2: 0.1, | ||
cpuUsageSys2: 0.1, | ||
} | ||
|
||
clusterMetrics, _ := yb.GetClusterMetrics() | ||
maxCpuUsage, err := getMaxCpuUsageInCluster(clusterMetrics) | ||
assert.NoError(t, err) | ||
assert.Equal(t, 60, maxCpuUsage) | ||
} | ||
|
||
func TestIncreaseParallelism(t *testing.T) { | ||
yb := &dummyTargetYugabyteDB{ | ||
size: 3, | ||
maxSize: 6, | ||
cpuUsageUser1: 0.5, | ||
cpuUsageSys1: 0.1, | ||
cpuUsageUser2: 0.5, | ||
cpuUsageSys2: 0.1, | ||
} | ||
|
||
err := fetchClusterMetricsAndUpdateParallelism(yb) | ||
assert.NoErrorf(t, err, "failed to fetch cluster metrics and update parallelism") | ||
assert.Equal(t, 4, yb.GetNumConnectionsInPool()) | ||
} | ||
|
||
func TestDecreaseParallelismBasedOnCpu(t *testing.T) { | ||
yb := &dummyTargetYugabyteDB{ | ||
size: 3, | ||
maxSize: 6, | ||
cpuUsageUser1: 0.8, // above threshold | ||
cpuUsageSys1: 0.1, | ||
cpuUsageUser2: 0.5, | ||
cpuUsageSys2: 0.1, | ||
} | ||
|
||
err := fetchClusterMetricsAndUpdateParallelism(yb) | ||
assert.NoErrorf(t, err, "failed to fetch cluster metrics and update parallelism") | ||
assert.Equal(t, 2, yb.GetNumConnectionsInPool()) | ||
} |
Binary file modified
BIN
-375 Bytes
(96%)
yb-voyager/src/srcdb/data/gather-assessment-metadata.tar.gz
Binary file not shown.
Oops, something went wrong.