-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tablet throttler: empty list of probes on non-leader #13926
Changes from all commits
cc833cd
e9ad8c1
247134c
03ece21
c76ff36
dd5ed79
6336ad6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,13 +7,57 @@ | |
package throttle | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"sync/atomic" | ||
"testing" | ||
"time" | ||
|
||
"github.com/patrickmn/go-cache" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
|
||
"vitess.io/vitess/go/vt/topo" | ||
"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/config" | ||
"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/mysql" | ||
|
||
topodatapb "vitess.io/vitess/go/vt/proto/topodata" | ||
) | ||
|
||
const ( | ||
waitForProbesTimeout = 30 * time.Second | ||
) | ||
|
||
type FakeTopoServer struct { | ||
} | ||
Comment on lines
+31
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW, you could probably use the existing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried but it did not make much sense to me. I need a specific fake behavior for specific functions? |
||
|
||
func (ts *FakeTopoServer) GetTablet(ctx context.Context, alias *topodatapb.TabletAlias) (*topo.TabletInfo, error) { | ||
tablet := &topo.TabletInfo{ | ||
Tablet: &topodatapb.Tablet{ | ||
Alias: alias, | ||
Hostname: "127.0.0.1", | ||
MysqlHostname: "127.0.0.1", | ||
MysqlPort: 3306, | ||
PortMap: map[string]int32{"vt": 5000}, | ||
Type: topodatapb.TabletType_REPLICA, | ||
}, | ||
} | ||
return tablet, nil | ||
} | ||
|
||
func (ts *FakeTopoServer) FindAllTabletAliasesInShard(ctx context.Context, keyspace, shard string) ([]*topodatapb.TabletAlias, error) { | ||
aliases := []*topodatapb.TabletAlias{ | ||
{Cell: "zone1", Uid: 100}, | ||
{Cell: "zone2", Uid: 101}, | ||
} | ||
return aliases, nil | ||
} | ||
|
||
func (ts *FakeTopoServer) GetSrvKeyspace(ctx context.Context, cell, keyspace string) (*topodatapb.SrvKeyspace, error) { | ||
ks := &topodatapb.SrvKeyspace{} | ||
return ks, nil | ||
} | ||
|
||
type FakeHeartbeatWriter struct { | ||
} | ||
|
||
|
@@ -50,6 +94,7 @@ func TestIsAppThrottled(t *testing.T) { | |
} | ||
|
||
func TestIsAppExempted(t *testing.T) { | ||
|
||
throttler := Throttler{ | ||
throttledApps: cache.New(cache.NoExpiration, 0), | ||
heartbeatWriter: FakeHeartbeatWriter{}, | ||
|
@@ -75,3 +120,102 @@ func TestIsAppExempted(t *testing.T) { | |
throttler.UnthrottleApp("schema-tracker") // meaningless. App is statically exempted | ||
assert.True(t, throttler.IsAppExempted("schema-tracker")) | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A short comment would still be nice here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
// TestRefreshMySQLInventory tests the behavior of the throttler's RefreshMySQLInventory() function, which | ||
// is called periodically in actual throttler. For a given cluster name, it generates a list of probes | ||
// the throttler will use to check metrics. | ||
// On a "self" cluster, that list is expect to probe the tablet itself. | ||
// On any other cluster, the list is expected to be empty if non-leader (only leader throttler, on a | ||
// `PRIMARY` tablet, probes other tablets). On the leader, the list is expected to be non-empty. | ||
func TestRefreshMySQLInventory(t *testing.T) { | ||
mattlord marked this conversation as resolved.
Show resolved
Hide resolved
|
||
metricsQuery := "select 1" | ||
config.Settings().Stores.MySQL.Clusters = map[string]*config.MySQLClusterConfigurationSettings{ | ||
selfStoreName: {}, | ||
"ks1": {}, | ||
"ks2": {}, | ||
} | ||
clusters := config.Settings().Stores.MySQL.Clusters | ||
for _, s := range clusters { | ||
s.MetricQuery = metricsQuery | ||
s.ThrottleThreshold = &atomic.Uint64{} | ||
s.ThrottleThreshold.Store(1) | ||
} | ||
|
||
throttler := &Throttler{ | ||
mysqlClusterProbesChan: make(chan *mysql.ClusterProbes), | ||
mysqlClusterThresholds: cache.New(cache.NoExpiration, 0), | ||
ts: &FakeTopoServer{}, | ||
mysqlInventory: mysql.NewInventory(), | ||
} | ||
throttler.metricsQuery.Store(metricsQuery) | ||
throttler.initThrottleTabletTypes() | ||
|
||
validateClusterProbes := func(t *testing.T, ctx context.Context) { | ||
testName := fmt.Sprintf("leader=%t", throttler.isLeader.Load()) | ||
t.Run(testName, func(t *testing.T) { | ||
// validateProbesCount expectes number of probes according to cluster name and throttler's leadership status | ||
validateProbesCount := func(t *testing.T, clusterName string, probes *mysql.Probes) { | ||
if clusterName == selfStoreName { | ||
assert.Equal(t, 1, len(*probes)) | ||
} else if throttler.isLeader.Load() { | ||
assert.NotZero(t, len(*probes)) | ||
} else { | ||
assert.Empty(t, *probes) | ||
} | ||
} | ||
t.Run("waiting for probes", func(t *testing.T) { | ||
ctx, cancel := context.WithTimeout(ctx, waitForProbesTimeout) | ||
defer cancel() | ||
numClusterProbesResults := 0 | ||
for { | ||
select { | ||
case probes := <-throttler.mysqlClusterProbesChan: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO it's worth a comment that we're NOT competing with the main There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added comment. |
||
// Worth noting that in this unit test, the throttler is _closed_. Its own Operate() function does | ||
// not run, and therefore there is none but us to both populate `mysqlClusterProbesChan` as well as | ||
// read from it. We do not compete here with any other goroutine. | ||
assert.NotNil(t, probes) | ||
|
||
throttler.updateMySQLClusterProbes(ctx, probes) | ||
|
||
numClusterProbesResults++ | ||
validateProbesCount(t, probes.ClusterName, probes.InstanceProbes) | ||
|
||
if numClusterProbesResults == len(clusters) { | ||
// Achieved our goal | ||
return | ||
} | ||
case <-ctx.Done(): | ||
assert.FailNowf(t, ctx.Err().Error(), "waiting for %d cluster probes", len(clusters)) | ||
} | ||
} | ||
}) | ||
t.Run("validating probes", func(t *testing.T) { | ||
for clusterName := range clusters { | ||
probes, ok := throttler.mysqlInventory.ClustersProbes[clusterName] | ||
require.True(t, ok) | ||
validateProbesCount(t, clusterName, probes) | ||
} | ||
}) | ||
}) | ||
} | ||
// | ||
ctx := context.Background() | ||
|
||
t.Run("initial, not leader", func(t *testing.T) { | ||
throttler.isLeader.Store(false) | ||
throttler.refreshMySQLInventory(ctx) | ||
validateClusterProbes(t, ctx) | ||
}) | ||
|
||
t.Run("promote", func(t *testing.T) { | ||
throttler.isLeader.Store(true) | ||
throttler.refreshMySQLInventory(ctx) | ||
validateClusterProbes(t, ctx) | ||
}) | ||
|
||
t.Run("demote, expect cleanup", func(t *testing.T) { | ||
throttler.isLeader.Store(false) | ||
throttler.refreshMySQLInventory(ctx) | ||
validateClusterProbes(t, ctx) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought that we had moved this to gRPC?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For backwards compatibility, we need to consider the option of a
v18
PRIMARY
withv17
replicas: those will not be able to serve gRPC, and therefore thev18
PRIMARY
falls back to probing them via HTTP.