From 03794e0c8a35fabf74df5e28a0b8fb403ed92e80 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Fri, 13 Oct 2023 14:29:39 -0700 Subject: [PATCH] CBG-3384 Avoid triggering unnecessary resync when working with default collection (#6504) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CBG-3384 Modify computation of metadataID - Don’t persist syncInfo for empty metadataID (legacy config) - Modify requirements for assignment of default metadata ID to a database: 1. Remove requirement for presence of _sync:seq in default collection 2. Allow assignment when default collection’s syncInfo is using defaultMetadataID These avoid triggering resync for databases using only the default collection, in legacy and non-legacy mode. If database already exists in registry, new databases (in different config groups) should use the same metadataID. --- base/bootstrap.go | 35 ++++- base/constants_syncdocs.go | 8 + rest/adminapitest/admin_api_test.go | 13 -- rest/config_manager.go | 37 +++-- rest/config_manager_test.go | 21 ++- rest/upgradetest/upgrade_registry_test.go | 175 +++++++++++++++++++++- rest/utilities_testing_resttester.go | 8 + 7 files changed, 261 insertions(+), 36 deletions(-) diff --git a/base/bootstrap.go b/base/bootstrap.go index f10852fefa..ac5b036763 100644 --- a/base/bootstrap.go +++ b/base/bootstrap.go @@ -42,8 +42,12 @@ type BootstrapConnection interface { // TouchMetadataDocument sets the specified property in a bootstrap metadata document for a given bucket and key. Used to // trigger CAS update on the document, to block any racing updates. Does not retry on CAS failure. TouchMetadataDocument(ctx context.Context, bucket, key string, property string, value string, cas uint64) (casOut uint64, err error) - // KeyExists checks whether the specified key exists + // KeyExists checks whether the specified key exists in the bucket's default collection KeyExists(ctx context.Context, bucket, key string) (exists bool, err error) + // GetDocument retrieves the document with the specified key from the bucket's default collection. + // Returns exists=false if key is not found, returns error for any other error. + GetDocument(ctx context.Context, bucket, docID string, rv interface{}) (exists bool, err error) + // Returns the bootstrap connection's cluster connection as N1QLStore for the specified bucket/scope/collection. // Does NOT establish a bucket connection, the bucketName/scopeName/collectionName is for query scoping only GetClusterN1QLStore(bucketName, scopeName, collectionName string) (*ClusterOnlyN1QLStore, error) @@ -447,6 +451,7 @@ func (cc *CouchbaseCluster) UpdateMetadataDocument(ctx context.Context, location } +// KeyExists checks whether a key exists in the default collection for the specified bucket func (cc *CouchbaseCluster) KeyExists(ctx context.Context, location, docID string) (exists bool, err error) { if cc == nil { return false, errors.New("nil CouchbaseCluster") @@ -463,6 +468,34 @@ func (cc *CouchbaseCluster) KeyExists(ctx context.Context, location, docID strin return cc.configPersistence.keyExists(b.DefaultCollection(), docID) } +// GetDocument fetches a document from the default collection. Does not use configPersistence - callers +// requiring configPersistence handling should use GetMetadataDocument. +func (cc *CouchbaseCluster) GetDocument(ctx context.Context, bucketName, docID string, rv interface{}) (exists bool, err error) { + if cc == nil { + return false, errors.New("nil CouchbaseCluster") + } + + b, teardown, err := cc.getBucket(ctx, bucketName) + if err != nil { + return false, err + } + + defer teardown() + + getOptions := &gocb.GetOptions{ + Transcoder: NewSGJSONTranscoder(), + } + getResult, err := b.DefaultCollection().Get(docID, getOptions) + if err != nil { + if errors.Is(err, gocb.ErrDocumentNotFound) { + return false, nil + } + return false, err + } + err = getResult.Content(rv) + return true, err +} + // Close calls teardown for any cached buckets and removes from cachedBucketConnections func (cc *CouchbaseCluster) Close() { diff --git a/base/constants_syncdocs.go b/base/constants_syncdocs.go index a81a80b3a5..c5e086d902 100644 --- a/base/constants_syncdocs.go +++ b/base/constants_syncdocs.go @@ -375,6 +375,9 @@ func InitSyncInfo(ds DataStore, metadataID string) (requiresResync bool, err err var syncInfo SyncInfo _, fetchErr := ds.Get(SGSyncInfo, &syncInfo) if IsKeyNotFoundError(ds, fetchErr) { + if metadataID == "" { + return false, nil + } newSyncInfo := &SyncInfo{MetadataID: metadataID} _, addErr := ds.Add(SGSyncInfo, 0, newSyncInfo) if IsCasMismatch(addErr) { @@ -397,6 +400,11 @@ func InitSyncInfo(ds DataStore, metadataID string) (requiresResync bool, err err // SetSyncInfo sets syncInfo in a DataStore to the specified metadataID func SetSyncInfo(ds DataStore, metadataID string) error { + + // If the metadataID isn't defined, don't persist SyncInfo. Defensive handling for legacy use cases. + if metadataID == "" { + return nil + } syncInfo := &SyncInfo{ MetadataID: metadataID, } diff --git a/rest/adminapitest/admin_api_test.go b/rest/adminapitest/admin_api_test.go index 4b48b868d7..6142e316f5 100644 --- a/rest/adminapitest/admin_api_test.go +++ b/rest/adminapitest/admin_api_test.go @@ -4434,19 +4434,6 @@ func TestDeleteDatabasePointingAtSameBucketPersistent(t *testing.T) { resp = rest.BootstrapAdminRequest(t, http.MethodPut, "/db2/", fmt.Sprintf(dbConfig, "db2")) resp.RequireStatus(http.StatusCreated) - // because we moved database - resync is required for the default collection before we're able to bring db2 online - resp = rest.BootstrapAdminRequest(t, http.MethodPost, "/db2/_resync?regenerate_sequences=true", "") - resp.RequireStatus(http.StatusOK) - - // after resync is done, state will flip back to offline - BootstrapWaitForDatabaseState(t, "db2", db.DBOffline) - - // now bring the db online so we're able to check dest factory - resp = rest.BootstrapAdminRequest(t, http.MethodPost, "/db2/_online", "") - resp.RequireStatus(http.StatusOK) - - BootstrapWaitForDatabaseState(t, "db2", db.DBOnline) - scopeName := "" collectionNames := []string{} // Validate that deleted database is no longer in dest factory set diff --git a/rest/config_manager.go b/rest/config_manager.go index 78bccc61fe..1cab53d351 100644 --- a/rest/config_manager.go +++ b/rest/config_manager.go @@ -721,23 +721,32 @@ func (b *bootstrapContext) ComputeMetadataIDForDbConfig(ctx context.Context, con // computeMetadataID determines whether the database should use the default metadata storage location (to support configurations upgrading with // existing sync metadata in the default collection). The default metadataID is only used when all of the following // conditions are met: -// 1. The default metadataID isn't already in use by another database -// 2. The database includes _default._default -// 3. The _default._default collection isn't already associated with a different metadata ID (_sync:syncInfo is not present) -// 4. The _default._default collection has legacy data (_sync:seq is present) +// 1. There isn't already a metadataID defined for the database name in another config group +// 2. The default metadataID isn't already in use by another database in the registry +// 3. The database includes _default._default +// 4. The _default._default collection isn't already associated with a different metadata ID (syncInfo document is not present, or has a value of defaultMetadataID) func (b *bootstrapContext) computeMetadataID(ctx context.Context, registry *GatewayRegistry, config *DbConfig) string { standardMetadataID := b.standardMetadataID(config.Name) - // If the default metadata ID is already in use in the registry, use standard ID + // If there's already a metadataID assigned to this database in the registry (including other config groups), use that + defaultMetadataIDInUse := false for _, cg := range registry.ConfigGroups { - for _, db := range cg.Databases { + for dbName, db := range cg.Databases { + if dbName == config.Name { + return db.MetadataID + } if db.MetadataID == defaultMetadataID { - return standardMetadataID + defaultMetadataIDInUse = true } } } + // If the default metadata ID is already in use in the registry by a different database, use standard ID. + if defaultMetadataIDInUse { + return standardMetadataID + } + // If the database config doesn't include _default._default, use standard ID if config.Scopes != nil { defaultFound := false @@ -753,21 +762,19 @@ func (b *bootstrapContext) computeMetadataID(ctx context.Context, registry *Gate } } + // If _default._default is already associated with a non-default metadataID, use the standard ID bucketName := config.GetBucketName() - exists, err := b.Connection.KeyExists(ctx, bucketName, base.SGSyncInfo) + var syncInfo base.SyncInfo + exists, err := b.Connection.GetDocument(ctx, bucketName, base.SGSyncInfo, &syncInfo) if err != nil { - base.WarnfCtx(ctx, "Error checking whether metadataID is already defined for default collection - using standard metadataID. Error: %v", err) - return standardMetadataID - } - if exists { + base.WarnfCtx(ctx, "Error checking syncInfo metadataID in default collection - using standard metadataID. Error: %v", err) return standardMetadataID } - // If legacy _sync:seq doesn't exist, use the standard ID - legacySyncSeqExists, _ := b.Connection.KeyExists(ctx, bucketName, base.DefaultMetadataKeys.SyncSeqKey()) - if !legacySyncSeqExists { + if exists && syncInfo.MetadataID != defaultMetadataID { return standardMetadataID } + return defaultMetadataID } diff --git a/rest/config_manager_test.go b/rest/config_manager_test.go index f61ac3ae69..57b2722546 100644 --- a/rest/config_manager_test.go +++ b/rest/config_manager_test.go @@ -89,11 +89,11 @@ func TestComputeMetadataID(t *testing.T) { defaultVersion := "1-abc" defaultDbConfig := makeDbConfig(tb.GetName(), dbName, nil) - // No sync data in default collection, so should use standard ID + // Use defaultMetadataID if database targets the default collection metadataID := bootstrapContext.computeMetadataID(ctx, registry, &defaultDbConfig) - assert.Equal(t, standardMetadataID, metadataID) + assert.Equal(t, defaultMetadataID, metadataID) - // Set _sync:seq in default collection, verify computeMetadataID returns default ID + // Set _sync:seq in default collection, verify computeMetadataID still returns default ID defaultStore := tb.Bucket.DefaultDataStore() syncSeqKey := base.DefaultMetadataKeys.SyncSeqKey() _, err = defaultStore.Incr(syncSeqKey, 1, 0, 0) @@ -124,6 +124,21 @@ func TestComputeMetadataID(t *testing.T) { metadataID = bootstrapContext.computeMetadataID(ctx, registry, &defaultDbConfig) assert.Equal(t, defaultMetadataID, metadataID) + // If the database has been assigned a metadataID in another config group, that should be used + multiConfigGroupDbName := "multiConfigGroupDb" + existingDbConfigOtherConfigGroup := makeDbConfig(tb.GetName(), multiConfigGroupDbName, nil) + existingDatabaseConfigOtherConfigGroup := &DatabaseConfig{ + DbConfig: existingDbConfigOtherConfigGroup, + Version: defaultVersion, + MetadataID: multiConfigGroupDbName, + } + _, err = registry.upsertDatabaseConfig(ctx, "differentConfigGroup", existingDatabaseConfigOtherConfigGroup) + require.NoError(t, err) + + newDbConfig := makeDbConfig(tb.GetName(), multiConfigGroupDbName, nil) + metadataID = bootstrapContext.computeMetadataID(ctx, registry, &newDbConfig) + require.Equal(t, multiConfigGroupDbName, metadataID) + // Single, non-default collection should use standard metadata ID namedOnlyScopesConfig := ScopesConfig{base.DefaultScope: ScopeConfig{map[string]*CollectionConfig{"collection1": {}}}} defaultDbConfig.Scopes = namedOnlyScopesConfig diff --git a/rest/upgradetest/upgrade_registry_test.go b/rest/upgradetest/upgrade_registry_test.go index c15e13ae13..2a039b42c3 100644 --- a/rest/upgradetest/upgrade_registry_test.go +++ b/rest/upgradetest/upgrade_registry_test.go @@ -15,7 +15,9 @@ import ( "testing" "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/db" "github.com/couchbase/sync_gateway/rest" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -88,8 +90,8 @@ func TestDefaultMetadataIDDefaultToNamed(t *testing.T) { _ = rt.Bucket() dbName := "db" - // Create a database with named collections - // Update config to remove named collections + // Create a database with default collection, + // Update config to switch to named collections scopesConfig := rest.GetCollectionsConfig(t, rt.TestBucket, 2) dbConfig := rt.NewDbConfig() @@ -103,10 +105,10 @@ func TestDefaultMetadataIDDefaultToNamed(t *testing.T) { putResponse := rt.SendAdminRequest("PUT", "/"+dbName+"/_user/bob", userPayload) rest.RequireStatus(t, putResponse, 201) - bobDocName := "_sync:user:db:bob" + bobDocName := "_sync:user:bob" requireBobUserLocation(rt, bobDocName) - // Update database to only target default collection + // Update database to only target named collection dbConfig.Scopes = scopesConfig resp = rt.ReplaceDbConfig(dbName, dbConfig) rest.RequireStatus(t, resp, http.StatusCreated) @@ -160,6 +162,171 @@ func TestUpgradeDatabasePreHelium(t *testing.T) { } +func TestLegacyMetadataID(t *testing.T) { + + if base.TestsUseNamedCollections() { + t.Skip("This test covers legacy interaction with the default collection") + } + testCtx := base.TestCtx(t) + + tb1 := base.GetPersistentTestBucket(t) + // Create a non-persistent rest tester. Standard RestTester + // creates a database 'db' targeting the default collection (when !TestUseNamedCollections) + legacyRT := rest.NewRestTester(t, &rest.RestTesterConfig{ + CustomTestBucket: tb1.NoCloseClone(), + PersistentConfig: false, + }) + + // Create a document in the collection to trigger creation of _sync:seq + resp := legacyRT.SendAdminRequest("PUT", "/db/testLegacyMetadataID", `{"test":"test"}`) + assert.Equal(t, http.StatusCreated, resp.Code) + + // Get the legacy config for upgrade test below + resp = legacyRT.SendAdminRequest("GET", "/_config?include_runtime=true", "") + legacyConfigBytes := resp.BodyBytes() + + var legacyConfig rest.LegacyServerConfig + err := base.JSONUnmarshal(legacyConfigBytes, &legacyConfig) + assert.NoError(t, err) + legacyRT.Close() + + persistentRT := rest.NewRestTester(t, &rest.RestTesterConfig{ + CustomTestBucket: tb1, + PersistentConfig: true, + }) + defer persistentRT.Close() + + // Generate a dbConfig from the legacy startup config using ToStartupConfig, and use it to create a database + _, dbMap, err := legacyConfig.ToStartupConfig(testCtx) + require.NoError(t, err) + + dbConfig, ok := dbMap["db"] + require.True(t, ok) + + // Need to sanitize the db config, but can't use sanitizeDbConfigs because it assumes non-empty server address + dbConfig.Username = "" + dbConfig.Password = "" + dbConfigBytes, err := base.JSONMarshal(dbConfig) + + require.NoError(t, err) + resp = persistentRT.SendAdminRequest("PUT", "/db/", string(dbConfigBytes)) + assert.Equal(t, http.StatusCreated, resp.Code) + + // check if database is online + dbRoot := persistentRT.GetDatabaseRoot("db") + require.Equal(t, db.RunStateString[db.DBOnline], dbRoot.State) +} + +// TestMetadataIDRenameDatabase verifies that resync is not required when deleting and recreating a database (with a +// different name) targeting only the default collection. +func TestMetadataIDRenameDatabase(t *testing.T) { + + if base.TestsUseNamedCollections() { + t.Skip("This test covers legacy interaction with the default collection") + } + + // Create a persistent rest tester. + rt := rest.NewRestTester(t, &rest.RestTesterConfig{ + PersistentConfig: true, + }) + defer rt.Close() + + dbConfig := rt.NewDbConfig() + dbName := "db" + rt.CreateDatabase(dbName, dbConfig) + + // Write a document to ensure _sync:seq is initialized + resp := rt.SendAdminRequest("PUT", "/"+dbName+"/testRenameDatabase", `{"test":"test"}`) + require.Equal(t, http.StatusCreated, resp.Code) + + // Delete database + resp = rt.SendAdminRequest("DELETE", "/"+dbName+"/", "") + require.Equal(t, http.StatusOK, resp.Code) + + newDbName := "newdb" + dbConfig.Name = newDbName + + // Create a new db targeting the same bucket + rt.CreateDatabase(newDbName, dbConfig) + + // check if database is online + dbRoot := rt.GetDatabaseRoot(newDbName) + require.Equal(t, db.RunStateString[db.DBOnline], dbRoot.State) +} + +// Verifies that matching metadataIDs are computed if two config groups for the same database are upgraded +func TestMetadataIDWithConfigGroups(t *testing.T) { + + if base.TestsUseNamedCollections() { + t.Skip("This test covers legacy interaction with the default collection") + } + + testCtx := base.TestCtx(t) + + tb1 := base.GetPersistentTestBucket(t) + // Create a non-persistent rest tester. Standard RestTester + // creates a database 'db' targeting the default collection (when !TestUseNamedCollections) + legacyRT := rest.NewRestTester(t, &rest.RestTesterConfig{ + CustomTestBucket: tb1.NoCloseClone(), + PersistentConfig: false, + }) + + // Create a document in the collection to trigger creation of _sync:seq + resp := legacyRT.SendAdminRequest("PUT", "/db/testLegacyMetadataID", `{"test":"test"}`) + assert.Equal(t, http.StatusCreated, resp.Code) + + // Get the legacy config for upgrade test below + resp = legacyRT.SendAdminRequest("GET", "/_config?include_runtime=true", "") + legacyConfigBytes := resp.BodyBytes() + + var legacyConfig rest.LegacyServerConfig + err := base.JSONUnmarshal(legacyConfigBytes, &legacyConfig) + assert.NoError(t, err) + legacyRT.Close() + + group1RT := rest.NewRestTester(t, &rest.RestTesterConfig{ + CustomTestBucket: tb1, + PersistentConfig: true, + GroupID: base.StringPtr("group1"), + }) + defer group1RT.Close() + + group2RT := rest.NewRestTester(t, &rest.RestTesterConfig{ + CustomTestBucket: tb1, + PersistentConfig: true, + GroupID: base.StringPtr("group2"), + }) + defer group2RT.Close() + + // Generate a dbConfig from the legacy startup config using ToStartupConfig, and use it to create a database + _, dbMap, err := legacyConfig.ToStartupConfig(testCtx) + require.NoError(t, err) + + dbConfig, ok := dbMap["db"] + require.True(t, ok) + + // Need to sanitize the db config, but can't use sanitizeDbConfigs because it assumes non-empty server address + dbConfig.Username = "" + dbConfig.Password = "" + dbConfigBytes, err := base.JSONMarshal(dbConfig) + + // Create the database in both RTs, verify that it comes online in both with matching metadata IDs + require.NoError(t, err) + resp = group1RT.SendAdminRequest("PUT", "/db/", string(dbConfigBytes)) + assert.Equal(t, http.StatusCreated, resp.Code) + + require.NoError(t, err) + resp = group2RT.SendAdminRequest("PUT", "/db/", string(dbConfigBytes)) + assert.Equal(t, http.StatusCreated, resp.Code) + + // check if databases are online + dbRoot := group1RT.GetDatabaseRoot("db") + require.Equal(t, db.RunStateString[db.DBOnline], dbRoot.State) + + dbRoot = group2RT.GetDatabaseRoot("db") + require.Equal(t, db.RunStateString[db.DBOnline], dbRoot.State) +} + func requireBobUserLocation(rt *rest.RestTester, docName string) { metadataStore := rt.GetDatabase().Bucket.DefaultDataStore() diff --git a/rest/utilities_testing_resttester.go b/rest/utilities_testing_resttester.go index 5fbf2772ae..d5bbd5564b 100644 --- a/rest/utilities_testing_resttester.go +++ b/rest/utilities_testing_resttester.go @@ -104,6 +104,14 @@ func (rt *RestTester) DeleteDoc(docID, revID string) { fmt.Sprintf("/%s/%s?rev=%s", rt.GetSingleKeyspace(), docID, revID), ""), http.StatusOK) } +func (rt *RestTester) GetDatabaseRoot(dbname string) DatabaseRoot { + var dbroot DatabaseRoot + resp := rt.SendAdminRequest("GET", "/"+dbname+"/", "") + RequireStatus(rt.TB, resp, 200) + require.NoError(rt.TB, base.JSONUnmarshal(resp.BodyBytes(), &dbroot)) + return dbroot +} + func (rt *RestTester) WaitForRev(docID string, revID string) error { return rt.WaitForCondition(func() bool { rawResponse := rt.SendAdminRequest("GET", "/{{.keyspace}}/"+docID, "")