Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CBG-3277 add a stat for mismatched bucket names in dbconfigs #6410

Merged
merged 10 commits into from
Sep 15, 2023
42 changes: 37 additions & 5 deletions base/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const (

NamespaceKey = "sgw"
ResourceUtilizationSubsystem = "resource_utilization"
ConfigSubsystem = "config"

SubsystemCacheKey = "cache"
SubsystemDatabaseKey = "database"
Expand Down Expand Up @@ -79,6 +80,7 @@ const (

StatAddedVersion3dot0dot0 = "3.0.0"
StatAddedVersion3dot1dot0 = "3.1.0"
StatAddedVersion3dot1dot2 = "3.1.2"
StatAddedVersion3dot2dot0 = "3.2.0"

StatDeprecatedVersionNotDeprecated = ""
Expand All @@ -105,15 +107,15 @@ var SyncGatewayStats *SgwStats
var SkipPrometheusStatsRegistration bool

func NewSyncGatewayStats() (*SgwStats, error) {
globalStats, err := newGlobalStat()
if err != nil {
return nil, err
}
sgwStats := SgwStats{
GlobalStats: &GlobalStat{},
GlobalStats: globalStats,
DbStats: map[string]*DbStats{},
}

err := sgwStats.GlobalStats.initResourceUtilizationStats()
if err != nil {
return nil, err
}
err = sgwStats.initReplicationStats()
if err != nil {
return nil, err
Expand Down Expand Up @@ -158,6 +160,31 @@ func (s *SgwStats) String() string {

type GlobalStat struct {
ResourceUtilization *ResourceUtilization `json:"resource_utilization"`
ConfigStat *ConfigStat `json:"config"`
}

func newGlobalStat() (*GlobalStat, error) {
g := &GlobalStat{}
err := g.initResourceUtilizationStats()
if err != nil {
return nil, err
}
err = g.initConfigStats()
if err != nil {
return nil, err
}
return g, nil
}

func (g *GlobalStat) initConfigStats() error {
configStat := &ConfigStat{}
var err error
configStat.DatabaseBucketMismatches, err = NewIntStat(ConfigSubsystem, "database_config_bucket_mismatches", StatUnitBytes, DatabaseBucketMismatchesDesc, StatAddedVersion3dot1dot2, StatDeprecatedVersionNotDeprecated, StatStabilityCommitted, nil, nil, prometheus.CounterValue, 0)
if err != nil {
return err
}
g.ConfigStat = configStat
return nil
}

func (g *GlobalStat) initResourceUtilizationStats() error {
Expand Down Expand Up @@ -303,6 +330,11 @@ type ResourceUtilization struct {
Uptime *SgwDurStat `json:"uptime"`
}

type ConfigStat struct {
// The number of times the bucket specified in a database config doesn't match the bucket it's found in.
DatabaseBucketMismatches *SgwIntStat `json:"database_config_bucket_mismatches"`
}

type DbStats struct {
dbName string
CacheStats *CacheStats `json:"cache,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions base/stats_descriptions.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ const (
UptimeDesc = "The total uptime."
)

// error stat
const (
DatabaseBucketMismatchesDesc = "The total number of times a database config is polled from a bucket that doesn't match the bucket specified in the database config."
)

// cache stats descriptions
const (
AbandonedSequencesDesc = "The total number of skipped sequences that were not found after 60 minutes and were abandoned."
Expand Down
14 changes: 8 additions & 6 deletions rest/adminapitest/admin_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1599,7 +1599,7 @@ func TestBadConfigInsertionToBucket(t *testing.T) {
// assert that a request to the database fails with correct error message
resp := rt.SendAdminRequest(http.MethodGet, "/db1/_config", "")
rest.RequireStatus(t, resp, http.StatusNotFound)
assert.Contains(t, resp.Body.String(), "Must update database config immediately")
assert.Contains(t, resp.Body.String(), "You must update database config immediately")
}

// TestMismatchedBucketNameOnDbConfigUpdate:
Expand Down Expand Up @@ -1660,12 +1660,12 @@ func TestMultipleBucketWithBadDbConfigScenario1(t *testing.T) {
config.Bootstrap.ConfigGroupID = groupID
},
})
defer rt1.Close()

// create a db config that has bucket C in the config and persist to rt1 bucket
dbConfig := rt1.NewDbConfig()
dbConfig.Name = "db1"
rt1.PersistDbConfigToBucket(dbConfig, tb3.GetName())
defer rt1.Close()

rt2 := rest.NewRestTester(t, &rest.RestTesterConfig{
CustomTestBucket: tb2,
Expand Down Expand Up @@ -1711,7 +1711,7 @@ func TestMultipleBucketWithBadDbConfigScenario1(t *testing.T) {
// assert a request to the db fails with correct error message
resp := rt3.SendAdminRequest(http.MethodGet, "/db1/_config", "")
rest.RequireStatus(t, resp, http.StatusNotFound)
assert.Contains(t, resp.Body.String(), "Must update database config immediately")
assert.Contains(t, resp.Body.String(), "You must update database config immediately")
}

// TestMultipleBucketWithBadDbConfigScenario2:
Expand All @@ -1734,11 +1734,12 @@ func TestMultipleBucketWithBadDbConfigScenario2(t *testing.T) {
config.Bootstrap.ConfigGroupID = "60ce5544-c368-4b08-b0ed-4ca3b37973f9"
},
})
defer rt1.Close()

// create a db config pointing to bucket C and persist to bucket A
dbConfig := rt1.NewDbConfig()
dbConfig.Name = "db1"
rt1.PersistDbConfigToBucket(dbConfig, rt1.CustomTestBucket.GetName())
defer rt1.Close()

rt2 := rest.NewRestTester(t, &rest.RestTesterConfig{
CustomTestBucket: tb2,
Expand Down Expand Up @@ -1835,6 +1836,7 @@ func TestMultipleBucketWithBadDbConfigScenario3(t *testing.T) {
return len(invalidDatabases) == 1
}, 200, 1000)
require.NoError(t, err)

}

func TestResyncStopUsingDCPStream(t *testing.T) {
Expand Down Expand Up @@ -3707,7 +3709,7 @@ func TestConfigsIncludeDefaults(t *testing.T) {
assert.Equal(t, db.DefaultChannelCacheMaxNumber, *dbConfig.CacheConfig.ChannelCacheConfig.MaxNumber)
assert.Equal(t, base.DefaultOldRevExpirySeconds, *dbConfig.OldRevExpirySeconds)
assert.Equal(t, false, *dbConfig.StartOffline)
assert.Equal(t, db.DefaultCompactInterval, uint32(*dbConfig.CompactIntervalDays))
assert.Equal(t, db.DefaultCompactInterval, time.Duration(*dbConfig.CompactIntervalDays)*24*time.Hour)

assert.Equal(t, dbConfig.Logging.Console.LogLevel.String(), base.LevelDebug.String())
assert.Equal(t, dbConfig.Logging.Console.LogKeys, []string{base.KeyDCP.String()})
Expand All @@ -3724,7 +3726,7 @@ func TestConfigsIncludeDefaults(t *testing.T) {
assert.Equal(t, db.DefaultChannelCacheMaxNumber, *runtimeServerConfigDatabase.CacheConfig.ChannelCacheConfig.MaxNumber)
assert.Equal(t, base.DefaultOldRevExpirySeconds, *runtimeServerConfigDatabase.OldRevExpirySeconds)
assert.Equal(t, false, *runtimeServerConfigDatabase.StartOffline)
assert.Equal(t, db.DefaultCompactInterval, uint32(*runtimeServerConfigDatabase.CompactIntervalDays))
assert.Equal(t, db.DefaultCompactInterval, time.Duration(*runtimeServerConfigDatabase.CompactIntervalDays)*24*time.Hour)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we rebasing this to remove this change in this PR?


// Test unsupported options
tb2 := base.GetTestBucket(t)
Expand Down
23 changes: 18 additions & 5 deletions rest/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ func (d *invalidDatabaseConfigs) addInvalidDatabase(ctx context.Context, dbname
// already logged this entry at warning so need to log at info now
base.InfofCtx(ctx, base.KeyConfig, logMessage)
}
base.SyncGatewayStats.GlobalStats.ConfigStat.DatabaseBucketMismatches.Add(1)
}

func (d *invalidDatabaseConfigs) exists(dbname string) (*invalidConfigInfo, bool) {
Expand Down Expand Up @@ -1369,6 +1370,7 @@ func (sc *ServerContext) fetchAndLoadConfigs(ctx context.Context, isInitialStart
}
for dbName, fetchedConfig := range fetchedConfigs {
if dbConfig, ok := sc.dbConfigs[dbName]; ok && dbConfig.cfgCas >= fetchedConfig.cfgCas {
sc.invalidDatabaseConfigTracking.remove(dbName)
base.DebugfCtx(ctx, base.KeyConfig, "Database %q bucket %q config has not changed since last update", fetchedConfig.Name, *fetchedConfig.Bucket)
delete(fetchedConfigs, dbName)
}
Expand All @@ -1388,7 +1390,7 @@ func (sc *ServerContext) fetchAndLoadConfigs(ctx context.Context, isInitialStart
for _, dbName := range deletedDatabases {
// It's possible that the "deleted" database was not written to the server until after sc.FetchConfigs had returned...
// we'll need to pay for the cost of getting the config again now that we've got the write lock to double-check this db is definitely ok to remove...
found, _, err := sc.fetchDatabase(ctx, dbName)
found, _, err := sc._fetchDatabase(ctx, dbName)
if err != nil {
base.InfofCtx(ctx, base.KeyConfig, "Error fetching config for database %q to check whether we need to remove it: %v", dbName, err)
}
Expand Down Expand Up @@ -1416,15 +1418,13 @@ func (sc *ServerContext) fetchAndLoadDatabaseSince(ctx context.Context, dbName s
}

func (sc *ServerContext) fetchAndLoadDatabase(nonContextStruct base.NonCancellableContext, dbName string) (found bool, err error) {
sc.lock.Lock()
defer sc.lock.Unlock()
return sc._fetchAndLoadDatabase(nonContextStruct, dbName)
}

// _fetchAndLoadDatabase will attempt to find the given database name first in a matching bucket name,
// but then fall back to searching through configs in each bucket to try and find a config.
func (sc *ServerContext) _fetchAndLoadDatabase(nonContextStruct base.NonCancellableContext, dbName string) (found bool, err error) {
found, dbConfig, err := sc.fetchDatabase(nonContextStruct.Ctx, dbName)
found, dbConfig, err := sc._fetchDatabase(nonContextStruct.Ctx, dbName)
if err != nil || !found {
return false, err
}
Expand Down Expand Up @@ -1492,6 +1492,13 @@ func (sc *ServerContext) findBucketWithCallback(callback func(bucket string) (ex
}

func (sc *ServerContext) fetchDatabase(ctx context.Context, dbName string) (found bool, dbConfig *DatabaseConfig, err error) {
// fetch will update the databses
sc.lock.Lock()
defer sc.lock.Unlock()
return sc._fetchDatabase(ctx, dbName)
}

func (sc *ServerContext) _fetchDatabase(ctx context.Context, dbName string) (found bool, dbConfig *DatabaseConfig, err error) {
// loop code moved to foreachDbConfig
var cnf DatabaseConfig
callback := func(bucket string) (exit bool, err error) {
Expand Down Expand Up @@ -1525,7 +1532,7 @@ func (sc *ServerContext) fetchDatabase(ctx context.Context, dbName string) (foun
// bucket name we got the config from we need to maker this db context as corrupt. Then remove the context and
// in memory representation on the server context.
if bucket != *cnf.Bucket {
sc.handleInvalidDatabaseConfig(ctx, bucket, cnf)
sc._handleInvalidDatabaseConfig(ctx, bucket, cnf)
return true, fmt.Errorf("mismatch in persisted database bucket name %q vs the actual bucket name %q. Please correct db %q's config, groupID %q.", base.MD(cnf.Bucket), base.MD(bucket), base.MD(cnf.Name), base.MD(sc.Config.Bootstrap.ConfigGroupID))
}
bucketCopy := bucket
Expand Down Expand Up @@ -1553,6 +1560,12 @@ func (sc *ServerContext) fetchDatabase(ctx context.Context, dbName string) (foun
}

func (sc *ServerContext) handleInvalidDatabaseConfig(ctx context.Context, bucket string, cnf DatabaseConfig) {
sc.lock.Lock()
defer sc.lock.Unlock()
sc._handleInvalidDatabaseConfig(ctx, bucket, cnf)
}

func (sc *ServerContext) _handleInvalidDatabaseConfig(ctx context.Context, bucket string, cnf DatabaseConfig) {
// track corrupt database context
sc.invalidDatabaseConfigTracking.addInvalidDatabase(ctx, cnf.Name, cnf, bucket)
// don't load config + remove from server context (apart from corrupt database map)
Expand Down
11 changes: 7 additions & 4 deletions rest/utilities_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2511,13 +2511,16 @@ func DropAllTestIndexes(t *testing.T, tb *base.TestBucket) {
}
}

func (sc *ServerContext) RequireInvalidDatabaseConfigNames(t *testing.T, dbNames []string) {
func (sc *ServerContext) RequireInvalidDatabaseConfigNames(t *testing.T, expectedDbNames []string) {
sc.invalidDatabaseConfigTracking.m.RLock()
defer sc.invalidDatabaseConfigTracking.m.RUnlock()
require.Equal(t, len(dbNames), len(sc.invalidDatabaseConfigTracking.dbNames))
for _, v := range dbNames {
require.NotNil(t, sc.invalidDatabaseConfigTracking.dbNames[v])

dbNames := make([]string, 0, len(sc.invalidDatabaseConfigTracking.dbNames))

for name := range sc.invalidDatabaseConfigTracking.dbNames {
dbNames = append(dbNames, name)
}
require.EqualValues(t, expectedDbNames, dbNames)
}

// Calls DropAllIndexes to remove all indexes, then restores the primary index for TestBucketPool readier requirements
Expand Down