From b1a90b913d2c3a98a0c68428137a6cafee69bf76 Mon Sep 17 00:00:00 2001 From: Anubhav Srivastava Date: Thu, 1 Aug 2024 19:52:52 -0700 Subject: [PATCH] [#23428] docdb: Remove non-transactional snapshot code Summary: Transactional snapshots have been supported (and the default) since D8100 and D8349. The non-transactional snapshot flow is untested and clutters the transactional snapshots. To the best of our knowledge, it is unused. This diff removes the non-transactional snapshot code. **Upgrade/Rollback safety:** This diff deprecates the `transaction_aware` field in `CreateSnapshotRequestPB` and the `current_snapshot_id` field in `ListSnapshotsResponsePB`. Since masters are upgraded before tservers, the `transaction_aware` aware field will always be ignored (the new behavior) on an upgrade / downgrade. The `current_snapshot_id` field is only set for non-transactional snapshots (which are not being used), so that field's behavior does not change. Jira: DB-12349 Test Plan: Existing tests Reviewers: zdrudi, hsunder, loginov Reviewed By: zdrudi, hsunder, loginov Subscribers: loginov, ybase Differential Revision: https://phorge.dev.yugabyte.com/D37010 --- src/yb/client/client-internal.cc | 1 - src/yb/client/snapshot_test_util.cc | 1 - src/yb/integration-tests/snapshot-test.cc | 4 - src/yb/master/async_snapshot_tasks.cc | 39 -- src/yb/master/catalog_manager.cc | 49 -- src/yb/master/catalog_manager.h | 21 - src/yb/master/catalog_manager_ext.cc | 511 +----------------- src/yb/master/catalog_manager_if.h | 7 - src/yb/master/clone/clone_state_manager.cc | 1 - src/yb/master/master_backup.proto | 10 +- .../xcluster/xcluster_bootstrap_helper.cc | 1 - src/yb/tools/test_admin_client.cc | 1 - src/yb/tools/yb-admin_cli.cc | 11 - src/yb/tools/yb-admin_client.cc | 2 - 14 files changed, 21 insertions(+), 638 deletions(-) diff --git a/src/yb/client/client-internal.cc b/src/yb/client/client-internal.cc index aa9da83dd210..9996f617dbe0 100644 --- a/src/yb/client/client-internal.cc +++ b/src/yb/client/client-internal.cc @@ -1819,7 +1819,6 @@ class CreateSnapshotRpc table.SetIntoTableIdentifierPB(&id); req_.mutable_tables()->Add()->Swap(&id); } - req_.set_transaction_aware(true); return Status::OK(); } diff --git a/src/yb/client/snapshot_test_util.cc b/src/yb/client/snapshot_test_util.cc index db348109289f..2375b9f3f6c4 100644 --- a/src/yb/client/snapshot_test_util.cc +++ b/src/yb/client/snapshot_test_util.cc @@ -247,7 +247,6 @@ Result SnapshotTestUtil::DoStartSnapshot(const F& fill_tables) { rpc::RpcController controller; controller.set_timeout(60s); master::CreateSnapshotRequestPB req; - req.set_transaction_aware(true); fill_tables(&req); master::CreateSnapshotResponsePB resp; RETURN_NOT_OK(VERIFY_RESULT(MakeBackupServiceProxy()).CreateSnapshot(req, &resp, &controller)); diff --git a/src/yb/integration-tests/snapshot-test.cc b/src/yb/integration-tests/snapshot-test.cc index 30a64d5b9c79..fe637580f281 100644 --- a/src/yb/integration-tests/snapshot-test.cc +++ b/src/yb/integration-tests/snapshot-test.cc @@ -180,9 +180,6 @@ class SnapshotTest : public YBMiniClusterTestBase { LOG(INFO) << "Number of snapshots: " << list_resp.snapshots_size(); ASSERT_EQ(list_resp.snapshots_size(), snapshot_info.size()); - // Current snapshot is available for non-transaction aware snapshots only. - ASSERT_FALSE(list_resp.has_current_snapshot_id()); - for (int i = 0; i < list_resp.snapshots_size(); ++i) { LOG(INFO) << "Snapshot " << i << ": " << list_resp.snapshots(i).DebugString(); auto id = ASSERT_RESULT(FullyDecodeTxnSnapshotId(list_resp.snapshots(i).id())); @@ -270,7 +267,6 @@ class SnapshotTest : public YBMiniClusterTestBase { std::optional imported = std::nullopt) { CreateSnapshotRequestPB req; CreateSnapshotResponsePB resp; - req.set_transaction_aware(true); TableIdentifierPB* const table = req.mutable_tables()->Add(); table->set_table_name(kTableName.table_name()); table->mutable_namespace_()->set_name(kTableName.namespace_name()); diff --git a/src/yb/master/async_snapshot_tasks.cc b/src/yb/master/async_snapshot_tasks.cc index dac53f0ffd92..ac9f41735de4 100644 --- a/src/yb/master/async_snapshot_tasks.cc +++ b/src/yb/master/async_snapshot_tasks.cc @@ -140,46 +140,7 @@ void AsyncTabletSnapshotOp::HandleResponse(int attempt) { if (state() != server::MonitoredTaskState::kComplete) { VLOG_WITH_PREFIX(1) << "TabletSnapshotOp task is not completed"; - return; - } - - switch (operation_) { - case tserver::TabletSnapshotOpRequestPB::CREATE_ON_TABLET: { - // TODO: this class should not know CatalogManager API, - // remove circular dependency between classes. - master_->catalog_manager()->HandleCreateTabletSnapshotResponse( - tablet_.get(), resp_.has_error()); - return; - } - case tserver::TabletSnapshotOpRequestPB::RESTORE_ON_TABLET: { - // TODO: this class should not know CatalogManager API, - // remove circular dependency between classes. - master_->catalog_manager()->HandleRestoreTabletSnapshotResponse( - tablet_.get(), resp_.has_error()); - return; - } - case tserver::TabletSnapshotOpRequestPB::DELETE_ON_TABLET: { - // TODO: this class should not know CatalogManager API, - // remove circular dependency between classes. - // HandleDeleteTabletSnapshotResponse handles only non transaction aware snapshots. - // So prevent log flooding for transaction aware snapshots. - if (!TryFullyDecodeTxnSnapshotId(snapshot_id_)) { - master_->catalog_manager()->HandleDeleteTabletSnapshotResponse( - snapshot_id_, tablet_.get(), resp_.has_error()); - } - return; - } - case tserver::TabletSnapshotOpRequestPB::RESTORE_FINISHED: - return; - case tserver::TabletSnapshotOpRequestPB::CREATE_ON_MASTER: FALLTHROUGH_INTENDED; - case tserver::TabletSnapshotOpRequestPB::DELETE_ON_MASTER: FALLTHROUGH_INTENDED; - case tserver::TabletSnapshotOpRequestPB::RESTORE_SYS_CATALOG: FALLTHROUGH_INTENDED; - case google::protobuf::kint32min: FALLTHROUGH_INTENDED; - case google::protobuf::kint32max: FALLTHROUGH_INTENDED; - case tserver::TabletSnapshotOpRequestPB::UNKNOWN: break; // Not handled. } - - FATAL_INVALID_ENUM_VALUE(tserver::TabletSnapshotOpRequestPB::Operation, operation_); } bool AsyncTabletSnapshotOp::SendRequest(int attempt) { diff --git a/src/yb/master/catalog_manager.cc b/src/yb/master/catalog_manager.cc index cbe76c7c247a..392cc8645f4f 100644 --- a/src/yb/master/catalog_manager.cc +++ b/src/yb/master/catalog_manager.cc @@ -891,47 +891,6 @@ Result ToQLStmtType( } // anonymous namespace -//////////////////////////////////////////////////////////// -// Snapshot Loader -//////////////////////////////////////////////////////////// - -class SnapshotLoader : public Visitor { - public: - explicit SnapshotLoader(CatalogManager* catalog_manager) : catalog_manager_(catalog_manager) {} - - Status Visit(const SnapshotId& snapshot_id, const SysSnapshotEntryPB& metadata) override { - if (TryFullyDecodeTxnSnapshotId(snapshot_id)) { - // Transaction aware snapshots should be already loaded. - return Status::OK(); - } - return VisitNonTransactionAwareSnapshot(snapshot_id, metadata); - } - - Status VisitNonTransactionAwareSnapshot( - const SnapshotId& snapshot_id, const SysSnapshotEntryPB& metadata) { - // Setup the snapshot info. - auto snapshot_info = make_scoped_refptr(snapshot_id); - auto l = snapshot_info->LockForWrite(); - l.mutable_data()->pb.CopyFrom(metadata); - - // Add the snapshot to the IDs map (if the snapshot is not deleted). - auto emplace_result = - catalog_manager_->non_txn_snapshot_ids_map_.emplace(snapshot_id, std::move(snapshot_info)); - CHECK(emplace_result.second) << "Snapshot already exists: " << snapshot_id; - - LOG(INFO) << "Loaded metadata for snapshot (id=" << snapshot_id - << "): " << emplace_result.first->second->ToString() << ": " - << metadata.ShortDebugString(); - l.Commit(); - return Status::OK(); - } - - private: - CatalogManager* catalog_manager_; - - DISALLOW_COPY_AND_ASSIGN(SnapshotLoader); -}; - //////////////////////////////////////////////////////////// // CatalogManager //////////////////////////////////////////////////////////// @@ -1504,14 +1463,6 @@ Status CatalogManager::RunLoaders(SysCatalogLoadingState* state) { RETURN_NOT_OK(InitializeTransactionTablesConfig(state->epoch.leader_term)); } - // Clear the snapshots. - non_txn_snapshot_ids_map_.clear(); - - LOG_WITH_FUNC(INFO) << "Loading snapshots into memory."; - unique_ptr snapshot_loader(new SnapshotLoader(this)); - RETURN_NOT_OK_PREPEND( - sys_catalog_->Visit(snapshot_loader.get()), "Failed while visiting snapshots in sys catalog"); - RETURN_NOT_OK(LoadXReplStream()); RETURN_NOT_OK(LoadUniverseReplication()); RETURN_NOT_OK(LoadUniverseReplicationBootstrap()); diff --git a/src/yb/master/catalog_manager.h b/src/yb/master/catalog_manager.h index 2ef538911151..ffd20041df81 100644 --- a/src/yb/master/catalog_manager.h +++ b/src/yb/master/catalog_manager.h @@ -1290,13 +1290,6 @@ class CatalogManager : public tserver::TabletPeerLookupIf, rpc::RpcContext* rpc, const LeaderEpoch& epoch); - void HandleCreateTabletSnapshotResponse(TabletInfo* tablet, bool error) override; - - void HandleRestoreTabletSnapshotResponse(TabletInfo* tablet, bool error) override; - - void HandleDeleteTabletSnapshotResponse( - const SnapshotId& snapshot_id, TabletInfo* tablet, bool error) override; - // Is encryption at rest enabled for this cluster. Status IsEncryptionEnabled( const IsEncryptionEnabledRequestPB* req, IsEncryptionEnabledResponsePB* resp); @@ -2395,7 +2388,6 @@ class CatalogManager : public tserver::TabletPeerLookupIf, const TSDescriptorVector& ts_descs); private: - friend class SnapshotLoader; friend class yb::master::ClusterLoadBalancer; friend class CDCStreamLoader; friend class UniverseReplicationLoader; @@ -2764,14 +2756,6 @@ class CatalogManager : public tserver::TabletPeerLookupIf, std::unordered_set GetCDCSDKStreamsForTable(const TableId& table_id) const; - Status CreateNonTransactionAwareSnapshot( - const CreateSnapshotRequestPB* req, CreateSnapshotResponsePB* resp, const LeaderEpoch& epoch); - - Status RestoreNonTransactionAwareSnapshot( - const SnapshotId& snapshot_id, const LeaderEpoch& epoch); - - Status DeleteNonTransactionAwareSnapshot(const SnapshotId& snapshot_id, const LeaderEpoch& epoch); - Status AddNamespaceEntriesToPB( const std::vector& tables, google::protobuf::RepeatedPtrField* out, @@ -2944,11 +2928,6 @@ class CatalogManager : public tserver::TabletPeerLookupIf, std::unordered_map> delete_replica_task_throttler_per_ts_ GUARDED_BY(delete_replica_task_throttler_per_ts_mutex_); - // Snapshot map: snapshot-id -> SnapshotInfo. - typedef std::unordered_map> SnapshotInfoMap; - SnapshotInfoMap non_txn_snapshot_ids_map_; - SnapshotId current_snapshot_id_; - // mutex on should_send_universe_key_registry_mutex_. mutable simple_spinlock should_send_universe_key_registry_mutex_; // Should catalog manager resend latest universe key registry to tserver. diff --git a/src/yb/master/catalog_manager_ext.cc b/src/yb/master/catalog_manager_ext.cc index 94f636209277..1dce75a5d4b3 100644 --- a/src/yb/master/catalog_manager_ext.cc +++ b/src/yb/master/catalog_manager_ext.cc @@ -128,10 +128,7 @@ using strings::Substitute; DECLARE_int32(master_rpc_timeout_ms); -DEFINE_RUNTIME_bool(enable_transaction_snapshots, true, - "The flag enables usage of transaction aware snapshots."); -TAG_FLAG(enable_transaction_snapshots, hidden); -TAG_FLAG(enable_transaction_snapshots, advanced); +DEPRECATE_FLAG(bool, enable_transaction_snapshots, "08_2024"); DEPRECATE_FLAG(bool, allow_consecutive_restore, "10_2022"); @@ -229,10 +226,6 @@ Status CatalogManager::DoCreateSnapshot(const CreateSnapshotRequestPB* req, CoarseTimePoint deadline, const LeaderEpoch& epoch) { LOG(INFO) << "Servicing CreateSnapshot request: " << req->ShortDebugString(); - if (FLAGS_enable_transaction_snapshots && req->transaction_aware()) { - return CreateTransactionAwareSnapshot(*req, resp, deadline); - } - if (req->has_schedule_id()) { auto schedule_id = VERIFY_RESULT(FullyDecodeSnapshotScheduleId(req->schedule_id())); auto snapshot_id = master_->snapshot_coordinator().CreateForSchedule( @@ -245,88 +238,7 @@ Status CatalogManager::DoCreateSnapshot(const CreateSnapshotRequestPB* req, return Status::OK(); } - return CreateNonTransactionAwareSnapshot(req, resp, epoch); -} - -Status CatalogManager::CreateNonTransactionAwareSnapshot( - const CreateSnapshotRequestPB* req, - CreateSnapshotResponsePB* resp, - const LeaderEpoch& epoch) { - SnapshotId snapshot_id; - { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - // Verify that the system is not in snapshot creating/restoring state. - if (!current_snapshot_id_.empty()) { - return STATUS(IllegalState, - Format( - "Current snapshot id: $0. Parallel snapshot operations are not supported" - ": $1", current_snapshot_id_, req), - MasterError(MasterErrorPB::PARALLEL_SNAPSHOT_OPERATION)); - } - - // Create a new snapshot UUID. - snapshot_id = GenerateIdUnlocked(SysRowEntryType::SNAPSHOT); - } - - vector all_tablets; - - // Create in memory snapshot data descriptor. - scoped_refptr snapshot(new SnapshotInfo(snapshot_id)); - snapshot->mutable_metadata()->StartMutation(); - snapshot->mutable_metadata()->mutable_dirty()->pb.set_state(SysSnapshotEntryPB::CREATING); - - auto tables = VERIFY_RESULT(CollectTables(req->tables(), - req->add_indexes(), - true /* include_parent_colocated_table */)); - unordered_set added_namespaces; - SysSnapshotEntryPB& pb = snapshot->mutable_metadata()->mutable_dirty()->pb; - // Note: SysSnapshotEntryPB includes PBs for stored (1) namespaces (2) tables (3) tablets. - RETURN_NOT_OK(AddNamespaceEntriesToPB(tables, pb.mutable_entries(), &added_namespaces)); - RETURN_NOT_OK(AddTableAndTabletEntriesToPB( - tables, pb.mutable_entries(), pb.mutable_tablet_snapshots(), &all_tablets)); - - VLOG(1) << "Snapshot " << snapshot->ToString() - << ": PB=" << snapshot->mutable_metadata()->mutable_dirty()->pb.DebugString(); - - // Write the snapshot data descriptor to the system catalog (in "creating" state). - RETURN_NOT_OK(CheckLeaderStatus( - sys_catalog_->Upsert(leader_ready_term(), snapshot), - "inserting snapshot into sys-catalog")); - TRACE("Wrote snapshot to system catalog"); - - // Commit in memory snapshot data descriptor. - snapshot->mutable_metadata()->CommitMutation(); - - // Put the snapshot data descriptor to the catalog manager. - { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - // Verify that the snapshot does not exist. - auto inserted = non_txn_snapshot_ids_map_.emplace(snapshot_id, snapshot).second; - RSTATUS_DCHECK(inserted, IllegalState, Format("Snapshot already exists: $0", snapshot_id)); - current_snapshot_id_ = snapshot_id; - } - - // Send CreateSnapshot requests to all TServers (one tablet - one request). - for (const TabletInfoPtr& tablet : all_tablets) { - TRACE("Locking tablet"); - auto l = tablet->LockForRead(); - - LOG(INFO) << "Sending CreateTabletSnapshot to tablet: " << tablet->ToString(); - - // Send Create Tablet Snapshot request to each tablet leader. - auto call = CreateAsyncTabletSnapshotOp( - tablet, snapshot_id, tserver::TabletSnapshotOpRequestPB::CREATE_ON_TABLET, - epoch, TabletSnapshotOperationCallback()); - ScheduleTabletSnapshotOp(call); - } - - resp->set_snapshot_id(snapshot_id); - LOG(INFO) << "Successfully started snapshot " << snapshot_id << " creation"; - return Status::OK(); + return CreateTransactionAwareSnapshot(*req, resp, deadline); } Status CatalogManager::Submit(std::unique_ptr operation, int64_t leader_term) { @@ -516,42 +428,7 @@ Status CatalogManager::CreateTransactionAwareSnapshot( Status CatalogManager::ListSnapshots(const ListSnapshotsRequestPB* req, ListSnapshotsResponsePB* resp) { auto txn_snapshot_id = TryFullyDecodeTxnSnapshotId(req->snapshot_id()); - { - SharedLock lock(mutex_); - TRACE("Acquired catalog manager lock"); - - if (!current_snapshot_id_.empty()) { - resp->set_current_snapshot_id(current_snapshot_id_); - } - - auto setup_snapshot_pb_lambda = [resp](scoped_refptr snapshot_info) { - auto snapshot_lock = snapshot_info->LockForRead(); - - SnapshotInfoPB* const snapshot = resp->add_snapshots(); - snapshot->set_id(snapshot_info->id()); - *snapshot->mutable_entry() = snapshot_info->metadata().state().pb; - }; - - if (req->has_snapshot_id()) { - if (!txn_snapshot_id) { - TRACE("Looking up snapshot"); - scoped_refptr snapshot_info = - FindPtrOrNull(non_txn_snapshot_ids_map_, req->snapshot_id()); - if (snapshot_info == nullptr) { - return STATUS(InvalidArgument, "Could not find snapshot", req->snapshot_id(), - MasterError(MasterErrorPB::SNAPSHOT_NOT_FOUND)); - } - - setup_snapshot_pb_lambda(snapshot_info); - } - } else { - for (const SnapshotInfoMap::value_type& entry : non_txn_snapshot_ids_map_) { - setup_snapshot_pb_lambda(entry.second); - } - } - } - - if (req->prepare_for_backup() && (!req->has_snapshot_id() || !txn_snapshot_id)) { + if (req->prepare_for_backup() && !txn_snapshot_id) { return STATUS( InvalidArgument, "Request must have correct snapshot_id", (req->has_snapshot_id() ? req->snapshot_id() : "None"), MasterError(MasterErrorPB::SNAPSHOT_FAILED)); @@ -666,81 +543,14 @@ Status CatalogManager::RestoreSnapshot( const RestoreSnapshotRequestPB* req, RestoreSnapshotResponsePB* resp, rpc::RpcContext* rpc, const LeaderEpoch& epoch) { LOG(INFO) << "Servicing RestoreSnapshot request: " << req->ShortDebugString(); - - auto txn_snapshot_id = TryFullyDecodeTxnSnapshotId(req->snapshot_id()); - if (txn_snapshot_id) { - HybridTime ht; - if (req->has_restore_ht()) { - ht = HybridTime(req->restore_ht()); - } - TxnSnapshotRestorationId id = VERIFY_RESULT( - master_->snapshot_coordinator().Restore(txn_snapshot_id, ht, epoch.leader_term)); - resp->set_restoration_id(id.data(), id.size()); - return Status::OK(); - } - - return RestoreNonTransactionAwareSnapshot(req->snapshot_id(), epoch); -} - -Status CatalogManager::RestoreNonTransactionAwareSnapshot( - const string& snapshot_id, const LeaderEpoch& epoch) { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - if (!current_snapshot_id_.empty()) { - return STATUS( - IllegalState, - Format( - "Current snapshot id: $0. Parallel snapshot operations are not supported: $1", - current_snapshot_id_, snapshot_id), - MasterError(MasterErrorPB::PARALLEL_SNAPSHOT_OPERATION)); - } - - TRACE("Looking up snapshot"); - scoped_refptr snapshot = FindPtrOrNull(non_txn_snapshot_ids_map_, snapshot_id); - if (snapshot == nullptr) { - return STATUS(InvalidArgument, "Could not find snapshot", snapshot_id, - MasterError(MasterErrorPB::SNAPSHOT_NOT_FOUND)); - } - - auto snapshot_lock = snapshot->LockForWrite(); - - if (snapshot_lock->started_deleting()) { - return STATUS(NotFound, "The snapshot was deleted", snapshot_id, - MasterError(MasterErrorPB::SNAPSHOT_NOT_FOUND)); - } - - if (!snapshot_lock->is_complete()) { - return STATUS(IllegalState, "The snapshot state is not complete", snapshot_id, - MasterError(MasterErrorPB::SNAPSHOT_IS_NOT_READY)); - } - - TRACE("Updating snapshot metadata on disk"); - SysSnapshotEntryPB& snapshot_pb = snapshot_lock.mutable_data()->pb; - snapshot_pb.set_state(SysSnapshotEntryPB::RESTORING); - - // Update tablet states. - SetTabletSnapshotsState(SysSnapshotEntryPB::RESTORING, &snapshot_pb); - - // Update sys-catalog with the updated snapshot state. - // The mutation will be aborted when 'l' exits the scope on early return. - RETURN_NOT_OK(CheckLeaderStatus( - sys_catalog_->Upsert(leader_ready_term(), snapshot), - "updating snapshot in sys-catalog")); - - // CatalogManager lock 'lock_' is still locked here. - current_snapshot_id_ = snapshot_id; - - // Restore all entries. - for (const SysRowEntry& entry : snapshot_pb.entries()) { - RETURN_NOT_OK(RestoreEntry(entry, snapshot_id, epoch)); - } - - // Commit in memory snapshot data descriptor. - TRACE("Committing in-memory snapshot state"); - snapshot_lock.Commit(); - - LOG(INFO) << "Successfully started snapshot " << snapshot->ToString() << " restoring"; + auto txn_snapshot_id = VERIFY_RESULT(FullyDecodeTxnSnapshotId(req->snapshot_id())); + HybridTime ht; + if (req->has_restore_ht()) { + ht = HybridTime(req->restore_ht()); + } + TxnSnapshotRestorationId id = VERIFY_RESULT( + master_->snapshot_coordinator().Restore(txn_snapshot_id, ht, epoch.leader_term)); + resp->set_restoration_id(id.data(), id.size()); return Status::OK(); } @@ -826,82 +636,11 @@ Status CatalogManager::DeleteSnapshot( DeleteSnapshotResponsePB* resp, rpc::RpcContext* rpc, const LeaderEpoch& epoch) { - auto txn_snapshot_id = TryFullyDecodeTxnSnapshotId(req->snapshot_id()); - if (txn_snapshot_id) { - LOG(INFO) << "Servicing DeleteSnapshot request. id: " << txn_snapshot_id - << ", request: " << req->ShortDebugString(); - return master_->snapshot_coordinator().Delete( - txn_snapshot_id, epoch.leader_term, rpc->GetClientDeadline()); - } - LOG(INFO) << "Servicing DeleteSnapshot request: " << req->ShortDebugString(); - return DeleteNonTransactionAwareSnapshot(req->snapshot_id(), epoch); -} - -Status CatalogManager::DeleteNonTransactionAwareSnapshot( - const SnapshotId& snapshot_id, const LeaderEpoch& epoch) { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - TRACE("Looking up snapshot"); - scoped_refptr snapshot = FindPtrOrNull( - non_txn_snapshot_ids_map_, snapshot_id); - if (snapshot == nullptr) { - return STATUS(InvalidArgument, "Could not find snapshot", snapshot_id, - MasterError(MasterErrorPB::SNAPSHOT_NOT_FOUND)); - } - - auto snapshot_lock = snapshot->LockForWrite(); - - if (snapshot_lock->started_deleting()) { - return STATUS(NotFound, "The snapshot was deleted", snapshot_id, - MasterError(MasterErrorPB::SNAPSHOT_NOT_FOUND)); - } - - if (snapshot_lock->is_restoring()) { - return STATUS(InvalidArgument, "The snapshot is being restored now", snapshot_id, - MasterError(MasterErrorPB::PARALLEL_SNAPSHOT_OPERATION)); - } - - TRACE("Updating snapshot metadata on disk"); - SysSnapshotEntryPB& snapshot_pb = snapshot_lock.mutable_data()->pb; - snapshot_pb.set_state(SysSnapshotEntryPB::DELETING); - - // Update tablet states. - SetTabletSnapshotsState(SysSnapshotEntryPB::DELETING, &snapshot_pb); - - // Update sys-catalog with the updated snapshot state. - // The mutation will be aborted when 'l' exits the scope on early return. - RETURN_NOT_OK(CheckStatus( - sys_catalog_->Upsert(leader_ready_term(), snapshot), - "updating snapshot in sys-catalog")); - - // Send DeleteSnapshot requests to all TServers (one tablet - one request). - for (const SysRowEntry& entry : snapshot_pb.entries()) { - if (entry.type() == SysRowEntryType::TABLET) { - TRACE("Looking up tablet"); - TabletInfoPtr tablet = FindPtrOrNull(*tablet_map_, entry.id()); - if (tablet == nullptr) { - LOG(WARNING) << "Deleting tablet not found " << entry.id(); - } else { - TRACE("Locking tablet"); - auto l = tablet->LockForRead(); - - LOG(INFO) << "Sending DeleteTabletSnapshot to tablet: " << tablet->ToString(); - // Send DeleteSnapshot requests to all TServers (one tablet - one request). - auto task = CreateAsyncTabletSnapshotOp( - tablet, snapshot_id, tserver::TabletSnapshotOpRequestPB::DELETE_ON_TABLET, - epoch, TabletSnapshotOperationCallback()); - ScheduleTabletSnapshotOp(task); - } - } - } - - // Commit in memory snapshot data descriptor. - TRACE("Committing in-memory snapshot state"); - snapshot_lock.Commit(); - - LOG(INFO) << "Successfully started snapshot " << snapshot->ToString() << " deletion"; - return Status::OK(); + auto txn_snapshot_id = VERIFY_RESULT(FullyDecodeTxnSnapshotId(req->snapshot_id())); + LOG(INFO) << "Servicing DeleteSnapshot request. id: " << txn_snapshot_id + << ", request: " << req->ShortDebugString(); + return master_->snapshot_coordinator().Delete( + txn_snapshot_id, epoch.leader_term, rpc->GetClientDeadline()); } Status CatalogManager::DoImportSnapshotMeta( @@ -3133,224 +2872,6 @@ int64_t CatalogManager::LeaderTerm() { return consensus_result.get()->GetLeaderState(/* allow_stale= */ true).term; } -void CatalogManager::HandleCreateTabletSnapshotResponse(TabletInfo *tablet, bool error) { - LOG(INFO) << "Handling Create Tablet Snapshot Response for tablet " - << DCHECK_NOTNULL(tablet)->ToString() << (error ? " ERROR" : " OK"); - - // Get the snapshot data descriptor from the catalog manager. - scoped_refptr snapshot; - { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - if (current_snapshot_id_.empty()) { - return; - } - - snapshot = FindPtrOrNull(non_txn_snapshot_ids_map_, current_snapshot_id_); - - if (!snapshot) { - LOG(WARNING) << "Snapshot not found: " << current_snapshot_id_; - return; - } - } - - if (!snapshot->IsCreateInProgress()) { - LOG(WARNING) << "Snapshot is not in creating state: " << snapshot->id(); - return; - } - - auto tablet_l = tablet->LockForRead(); - auto l = snapshot->LockForWrite(); - auto* tablet_snapshots = l.mutable_data()->pb.mutable_tablet_snapshots(); - int num_tablets_complete = 0; - - for (int i = 0; i < tablet_snapshots->size(); ++i) { - SysSnapshotEntryPB_TabletSnapshotPB* tablet_info = tablet_snapshots->Mutable(i); - - if (tablet_info->id() == tablet->id()) { - tablet_info->set_state(error ? SysSnapshotEntryPB::FAILED : SysSnapshotEntryPB::COMPLETE); - } - - if (tablet_info->state() == SysSnapshotEntryPB::COMPLETE) { - ++num_tablets_complete; - } - } - - // Finish the snapshot. - bool finished = true; - if (error) { - l.mutable_data()->pb.set_state(SysSnapshotEntryPB::FAILED); - LOG(WARNING) << "Failed snapshot " << snapshot->id() << " on tablet " << tablet->id(); - } else if (num_tablets_complete == tablet_snapshots->size()) { - l.mutable_data()->pb.set_state(SysSnapshotEntryPB::COMPLETE); - LOG(INFO) << "Completed snapshot " << snapshot->id(); - } else { - finished = false; - } - - if (finished) { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - current_snapshot_id_ = ""; - } - - VLOG(1) << "Snapshot: " << snapshot->id() - << " PB: " << l.mutable_data()->pb.DebugString() - << " Complete " << num_tablets_complete << " tablets from " << tablet_snapshots->size(); - - const Status s = sys_catalog_->Upsert(leader_ready_term(), snapshot); - - l.CommitOrWarn(s, "updating snapshot in sys-catalog"); -} - -void CatalogManager::HandleRestoreTabletSnapshotResponse(TabletInfo *tablet, bool error) { - LOG(INFO) << "Handling Restore Tablet Snapshot Response for tablet " - << DCHECK_NOTNULL(tablet)->ToString() << (error ? " ERROR" : " OK"); - - // Get the snapshot data descriptor from the catalog manager. - scoped_refptr snapshot; - { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - if (current_snapshot_id_.empty()) { - LOG(WARNING) << "No restoring snapshot: " << current_snapshot_id_; - return; - } - - snapshot = FindPtrOrNull(non_txn_snapshot_ids_map_, current_snapshot_id_); - - if (!snapshot) { - LOG(WARNING) << "Restoring snapshot not found: " << current_snapshot_id_; - return; - } - } - - if (!snapshot->IsRestoreInProgress()) { - LOG(WARNING) << "Snapshot is not in restoring state: " << snapshot->id(); - return; - } - - auto tablet_l = tablet->LockForRead(); - auto l = snapshot->LockForWrite(); - auto* tablet_snapshots = l.mutable_data()->pb.mutable_tablet_snapshots(); - int num_tablets_complete = 0; - - for (int i = 0; i < tablet_snapshots->size(); ++i) { - SysSnapshotEntryPB_TabletSnapshotPB* tablet_info = tablet_snapshots->Mutable(i); - - if (tablet_info->id() == tablet->id()) { - tablet_info->set_state(error ? SysSnapshotEntryPB::FAILED : SysSnapshotEntryPB::COMPLETE); - } - - if (tablet_info->state() == SysSnapshotEntryPB::COMPLETE) { - ++num_tablets_complete; - } - } - - // Finish the snapshot. - if (error || num_tablets_complete == tablet_snapshots->size()) { - if (error) { - l.mutable_data()->pb.set_state(SysSnapshotEntryPB::FAILED); - LOG(WARNING) << "Failed restoring snapshot " << snapshot->id() - << " on tablet " << tablet->id(); - } else { - LOG_IF(DFATAL, num_tablets_complete != tablet_snapshots->size()) - << "Wrong number of tablets"; - l.mutable_data()->pb.set_state(SysSnapshotEntryPB::COMPLETE); - LOG(INFO) << "Restored snapshot " << snapshot->id(); - } - - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - current_snapshot_id_ = ""; - } - - VLOG(1) << "Snapshot: " << snapshot->id() - << " PB: " << l.mutable_data()->pb.DebugString() - << " Complete " << num_tablets_complete << " tablets from " << tablet_snapshots->size(); - - const Status s = sys_catalog_->Upsert(leader_ready_term(), snapshot); - - l.CommitOrWarn(s, "updating snapshot in sys-catalog"); -} - -void CatalogManager::HandleDeleteTabletSnapshotResponse( - const SnapshotId& snapshot_id, TabletInfo *tablet, bool error) { - LOG(INFO) << "Handling Delete Tablet Snapshot Response for tablet " - << DCHECK_NOTNULL(tablet)->ToString() << (error ? " ERROR" : " OK"); - - // Get the snapshot data descriptor from the catalog manager. - scoped_refptr snapshot; - { - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - snapshot = FindPtrOrNull(non_txn_snapshot_ids_map_, snapshot_id); - - if (!snapshot) { - LOG(WARNING) << __func__ << " Snapshot not found: " << snapshot_id; - return; - } - } - - if (!snapshot->IsDeleteInProgress()) { - LOG(WARNING) << "Snapshot is not in deleting state: " << snapshot->id(); - return; - } - - auto tablet_l = tablet->LockForRead(); - auto l = snapshot->LockForWrite(); - auto* tablet_snapshots = l.mutable_data()->pb.mutable_tablet_snapshots(); - int num_tablets_complete = 0; - - for (int i = 0; i < tablet_snapshots->size(); ++i) { - SysSnapshotEntryPB_TabletSnapshotPB* tablet_info = tablet_snapshots->Mutable(i); - - if (tablet_info->id() == tablet->id()) { - tablet_info->set_state(error ? SysSnapshotEntryPB::FAILED : SysSnapshotEntryPB::DELETED); - } - - if (tablet_info->state() != SysSnapshotEntryPB::DELETING) { - ++num_tablets_complete; - } - } - - if (num_tablets_complete == tablet_snapshots->size()) { - // Delete the snapshot. - l.mutable_data()->pb.set_state(SysSnapshotEntryPB::DELETED); - LOG(INFO) << "Deleted snapshot " << snapshot->id(); - - const Status s = sys_catalog_->Delete(leader_ready_term(), snapshot); - - LockGuard lock(mutex_); - TRACE("Acquired catalog manager lock"); - - if (current_snapshot_id_ == snapshot_id) { - current_snapshot_id_ = ""; - } - - // Remove it from the maps. - TRACE("Removing from maps"); - if (non_txn_snapshot_ids_map_.erase(snapshot_id) < 1) { - LOG(WARNING) << "Could not remove snapshot " << snapshot_id << " from map"; - } - - l.CommitOrWarn(s, "deleting snapshot from sys-catalog"); - } else if (error) { - l.mutable_data()->pb.set_state(SysSnapshotEntryPB::FAILED); - LOG(WARNING) << "Failed snapshot " << snapshot->id() << " deletion on tablet " << tablet->id(); - - const Status s = sys_catalog_->Upsert(leader_ready_term(), snapshot); - l.CommitOrWarn(s, "updating snapshot in sys-catalog"); - } - - VLOG(1) << "Deleting snapshot: " << snapshot->id() - << " PB: " << l.mutable_data()->pb.DebugString() - << " Complete " << num_tablets_complete << " tablets from " << tablet_snapshots->size(); -} - Status CatalogManager::CreateSnapshotSchedule(const CreateSnapshotScheduleRequestPB* req, CreateSnapshotScheduleResponsePB* resp, rpc::RpcContext* rpc) { diff --git a/src/yb/master/catalog_manager_if.h b/src/yb/master/catalog_manager_if.h index 5adf630dbfbd..acec165a67c7 100644 --- a/src/yb/master/catalog_manager_if.h +++ b/src/yb/master/catalog_manager_if.h @@ -220,13 +220,6 @@ class CatalogManagerIf { const SnapshotScheduleId& snapshot_schedule_id, HybridTime export_time, CoarseTimePoint deadline) = 0; - virtual void HandleCreateTabletSnapshotResponse(TabletInfo *tablet, bool error) = 0; - - virtual void HandleRestoreTabletSnapshotResponse(TabletInfo *tablet, bool error) = 0; - - virtual void HandleDeleteTabletSnapshotResponse( - const SnapshotId& snapshot_id, TabletInfo *tablet, bool error) = 0; - virtual Status GetTableLocations(const GetTableLocationsRequestPB* req, GetTableLocationsResponsePB* resp) = 0; diff --git a/src/yb/master/clone/clone_state_manager.cc b/src/yb/master/clone/clone_state_manager.cc index 3de4355d9630..f16d1c07540e 100644 --- a/src/yb/master/clone/clone_state_manager.cc +++ b/src/yb/master/clone/clone_state_manager.cc @@ -346,7 +346,6 @@ Status CloneStateManager::StartTabletsCloning( create_snapshot_req.mutable_tables()->Add()->set_table_id(new_table_id); } create_snapshot_req.set_add_indexes(false); - create_snapshot_req.set_transaction_aware(true); create_snapshot_req.set_imported(true); RETURN_NOT_OK(external_funcs_->DoCreateSnapshot( &create_snapshot_req, &create_snapshot_resp, deadline, clone_state->Epoch())); diff --git a/src/yb/master/master_backup.proto b/src/yb/master/master_backup.proto index e3a38c3a25d0..84f4e6954ff7 100644 --- a/src/yb/master/master_backup.proto +++ b/src/yb/master/master_backup.proto @@ -35,7 +35,9 @@ message BackupRowEntryPB { message CreateSnapshotRequestPB { repeated TableIdentifierPB tables = 1; - optional bool transaction_aware = 2; + + // Reserved for non-transactional snapshot deprecated field 'transaction_aware'. + reserved 2; // Automatically add table indexes into the snapshot. optional bool add_indexes = 3 [default = false]; @@ -122,10 +124,8 @@ message ListSnapshotsRequestPB { message ListSnapshotsResponsePB { optional MasterErrorPB error = 1; - - // Non-transactional snapshot currently being created/restored. - optional bytes current_snapshot_id = 2; - + // Reserved for non-transactional snapshot deprecated field 'current_snapshot_id'. + reserved 2; repeated SnapshotInfoPB snapshots = 3; } diff --git a/src/yb/master/xcluster/xcluster_bootstrap_helper.cc b/src/yb/master/xcluster/xcluster_bootstrap_helper.cc index 9b6fce624c6c..5bd052dd0702 100644 --- a/src/yb/master/xcluster/xcluster_bootstrap_helper.cc +++ b/src/yb/master/xcluster/xcluster_bootstrap_helper.cc @@ -385,7 +385,6 @@ SetupUniverseReplicationWithBootstrapHelper::DoReplicationBootstrapImportSnapsho } snapshot_req.set_add_indexes(false); - snapshot_req.set_transaction_aware(true); snapshot_req.set_imported(true); RETURN_NOT_OK( catalog_manager_.CreateTransactionAwareSnapshot(snapshot_req, &snapshot_resp, deadline)); diff --git a/src/yb/tools/test_admin_client.cc b/src/yb/tools/test_admin_client.cc index 2c473bfb4454..3bc908acf0e1 100644 --- a/src/yb/tools/test_admin_client.cc +++ b/src/yb/tools/test_admin_client.cc @@ -174,7 +174,6 @@ Result TestAdminClient::CreateSnapshot( req.set_schedule_id(schedule_id.data(), schedule_id.size()); } else { req.add_tables()->CopyFrom(tables); - req.set_transaction_aware(true); if (retention_duration_hours) { req.set_retention_duration_hours(*retention_duration_hours); } diff --git a/src/yb/tools/yb-admin_cli.cc b/src/yb/tools/yb-admin_cli.cc index b2dd849d4743..eac98498d1a8 100644 --- a/src/yb/tools/yb-admin_cli.cc +++ b/src/yb/tools/yb-admin_cli.cc @@ -216,17 +216,6 @@ Status ListSnapshots(ClusterAdminClient* client, const EnumBitSet