diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index b7333190be52..930d5e38c0e7 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -162,3 +162,4 @@ This is a list of people who have contributed code to the [YugabyteDB](https://g * [asrinivasanyb](https://github.com/asrinivasanyb) * [devansh-ism](https://github.com/devansh-ism) * [swapshivam3](https://github.com/swapshivam3) +* [utkarsh-um-yb] (https://github.com/utkarsh-um-yb) diff --git a/bin/yugabyted b/bin/yugabyted index 98c883e5cc9d..6cb33ee5ccbd 100755 --- a/bin/yugabyted +++ b/bin/yugabyted @@ -5145,7 +5145,7 @@ class ControlScript(object): user_configs = Configs.parse_user_config_file(args.config) # User should not be able to override data_dir if (self.configs.saved_data.get("cluster_member") - and args.data_dir is None): + and args.data_dir is None and user_configs.get("data_dir")): user_configs_data_dir = [ path for path in user_configs.get("data_dir").split(',') ] diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestYbAsh.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestYbAsh.java index a0e6f5f7fea0..0647cd3fb51c 100644 --- a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestYbAsh.java +++ b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestYbAsh.java @@ -19,9 +19,14 @@ import java.sql.ResultSet; import java.sql.Statement; +import java.util.ArrayList; import java.util.Collections; -import java.util.concurrent.TimeUnit; +import java.util.List; import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import org.junit.Test; import org.junit.runner.RunWith; @@ -280,4 +285,50 @@ public void testCatalogRequests() throws Exception { assertGreaterThan(res1, 0); } } + + /** + * Test that we don't capture more than 'ysql_yb_ash_sample_size' number of samples + */ + @Test + public void testSampleSize() throws Exception { + final int sample_size = 3; + setAshConfigAndRestartCluster(ASH_SAMPLING_INTERVAL, sample_size); + try (Statement statement = connection.createStatement()) { + statement.execute("CREATE TABLE test_table(k INT, v TEXT)"); + } + final int NUM_THREADS = 5; + final int NUM_INSERTS_PER_THREAD = 100; + ExecutorService ecs = Executors.newFixedThreadPool(NUM_THREADS); + List> futures = new ArrayList<>(); + for (int i = 1; i <= NUM_THREADS; ++i) { + final int threadIndex = i; + Future future = ecs.submit(() -> { + try (Statement statement = connection.createStatement()) { + for (int j = 0; j < NUM_INSERTS_PER_THREAD; ++j) { + statement.execute(String.format("INSERT INTO test_table VALUES(%d, 'v-%d')", + threadIndex, j)); + } + } catch (Exception e) { + fail(e.getMessage()); + } + }); + futures.add(future); + } + for (Future future : futures) { + future.get(); + } + ecs.shutdown(); + ecs.awaitTermination(30, TimeUnit.SECONDS); + try (Statement statement = connection.createStatement()) { + ResultSet rs = statement.executeQuery("SELECT sample_time, wait_event_component, " + + "count(*) FROM " + ASH_VIEW + " GROUP BY sample_time, wait_event_component"); + while (rs.next()) { + assertLessThanOrEqualTo(rs.getLong("count"), Long.valueOf(sample_size)); + } + rs = statement.executeQuery("SELECT sample_weight FROM " + ASH_VIEW); + while (rs.next()) { + assertGreaterThanOrEqualTo(rs.getDouble("sample_weight"), Double.valueOf(1.0)); + } + } + } } diff --git a/managed/.sbtopts b/managed/.sbtopts index 6b2710ca3ec0..0fa348fccf44 100644 --- a/managed/.sbtopts +++ b/managed/.sbtopts @@ -1,6 +1,6 @@ -J-XX:MinHeapFreeRatio=10 -J-XX:MaxHeapFreeRatio=20 --J-Xmx2g +-J-Xmx3g -J-XX:MaxMetaspaceSize=1g -J-XX:MetaspaceSize=500m -Dlogback.configurationFile=logback-sbt.xml diff --git a/managed/build.sbt b/managed/build.sbt index 2e1d6cd7152d..3011f6ed2dbc 100644 --- a/managed/build.sbt +++ b/managed/build.sbt @@ -203,7 +203,7 @@ libraryDependencies ++= Seq( "org.pac4j" %% "play-pac4j" % "9.0.2", "org.pac4j" % "pac4j-oauth" % "4.5.7" exclude("commons-io" , "commons-io"), "org.pac4j" % "pac4j-oidc" % "4.5.7" exclude("commons-io" , "commons-io"), - "org.playframework" %% "play-json" % "3.0.1", + "org.playframework" %% "play-json" % "3.0.4", "commons-validator" % "commons-validator" % "1.8.0", "org.apache.velocity" % "velocity-engine-core" % "2.3", "com.fasterxml.woodstox" % "woodstox-core" % "6.4.0", @@ -968,7 +968,28 @@ dependencyOverrides += "jakarta.annotation" % "jakarta.annotation-api" % "1.3.5" dependencyOverrides += "jakarta.ws.rs" % "jakarta.ws.rs-api" % "2.1.6" % Test dependencyOverrides += "com.fasterxml.jackson.module" % "jackson-module-jaxb-annotations" % "2.10.1" % Test -val jacksonVersion = "2.15.3" +// This is a custom version, built based on 1.0.3 with the following commit added on top: +// https://github.com/apache/pekko/commit/1e41829bf7abeec268b9a409f35051ed7f4e0090. +// This is required to fix TLS infinite loop issue, which causes high CPU usage. +// We can't use 1.1.0-M1 version yet, as it has the following issue: +// https://github.com/playframework/playframework/pull/12662 +// Once the issue is fixed we should migrate back on stable version. +val pekkoVersion = "1.0.3-tls-loop-fix" + +val pekkoLibs = Seq( + "org.apache.pekko" %% "pekko-actor-typed", + "org.apache.pekko" %% "pekko-actor", + "org.apache.pekko" %% "pekko-protobuf-v3", + "org.apache.pekko" %% "pekko-serialization-jackson", + "org.apache.pekko" %% "pekko-slf4j", + "org.apache.pekko" %% "pekko-stream", +) + +val pekkoOverrides = pekkoLibs.map(_ % pekkoVersion) + +dependencyOverrides ++= pekkoOverrides + +val jacksonVersion = "2.17.1" val jacksonLibs = Seq( "com.fasterxml.jackson.core" % "jackson-core", @@ -1107,9 +1128,6 @@ val swaggerGenTest: TaskKey[Unit] = taskKey[Unit]( "test generate swagger.json" ) -val swaggerJacksonVersion = "2.11.1" -val swaggerJacksonOverrides = jacksonLibs.map(_ % swaggerJacksonVersion) - lazy val swagger = project .dependsOn(root % "compile->compile;test->test") .settings(commonSettings) @@ -1121,7 +1139,8 @@ lazy val swagger = project "com.github.dwickern" %% "swagger-play3.0" % "4.0.0" ), - dependencyOverrides ++= swaggerJacksonOverrides, + dependencyOverrides ++= pekkoOverrides, + dependencyOverrides ++= jacksonOverrides, dependencyOverrides += "org.scala-lang.modules" %% "scala-xml" % "2.1.0", swaggerGen := Def.taskDyn { diff --git a/managed/devops/pex/Dockerfile b/managed/devops/pex/Dockerfile index 3235bf4d3cb5..484c24c4c2fb 100755 --- a/managed/devops/pex/Dockerfile +++ b/managed/devops/pex/Dockerfile @@ -1,6 +1,12 @@ # Get the latest docker image FROM quay.io/pypa/manylinux2014_x86_64 + +RUN sed -i 's/mirrorlist=/#mirrorlist=/g' /etc/yum.repos.d/CentOS-* +RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' \ + /etc/yum.repos.d/CentOS-* + + # Perform general yum updates RUN yum --enablerepo=extras -y install epel-release python3-pip RUN pip3 install --upgrade pip diff --git a/managed/devops/yb_release b/managed/devops/yb_release index 3d56544d506a..4c64ccb7d34b 100755 --- a/managed/devops/yb_release +++ b/managed/devops/yb_release @@ -28,9 +28,9 @@ activate_virtualenv bin/install_ansible_requirements.sh --force # Python modules build for yugabundle. -cd "$yb_devops_home" -docker build -t "$DOCKER_VENV_IMAGE_NAME" . -docker run -v "$yb_devops_home:/devops" -u "$UID:$(id -g $UID)" "$DOCKER_VENV_IMAGE_NAME" +# cd "$yb_devops_home" +# docker build -t "$DOCKER_VENV_IMAGE_NAME" . +# docker run -v "$yb_devops_home:/devops" -u "$UID:$(id -g $UID)" "$DOCKER_VENV_IMAGE_NAME" # PEX virtual env build for other deployments. cd "$yb_devops_home/pex" diff --git a/managed/devops/yb_release_manifest.json b/managed/devops/yb_release_manifest.json index ce54ca21e3ce..5a007db096f4 100644 --- a/managed/devops/yb_release_manifest.json +++ b/managed/devops/yb_release_manifest.json @@ -6,7 +6,6 @@ "configure-cluster-server.yml", "destroy-instance.yml", "python3_requirements_frozen.txt", - "python*_modules.tar.gz", "preprovision.yml", "use_custom_ssh_port.yml", "yb-otel-collector.yml", diff --git a/managed/project/plugins.sbt b/managed/project/plugins.sbt index 953e0e57fe80..6422980cbe08 100644 --- a/managed/project/plugins.sbt +++ b/managed/project/plugins.sbt @@ -22,7 +22,7 @@ dependencyOverrides += "com.google.googlejavaformat" % "google-java-format" % "1 libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.4.14" // The Play plugin -addSbtPlugin("org.playframework" % "sbt-plugin" % "3.0.0") +addSbtPlugin("org.playframework" % "sbt-plugin" % "3.0.4") // ORM addSbtPlugin("org.playframework" % "sbt-play-ebean" % "8.0.0") diff --git a/managed/src/main/java/MainModule.java b/managed/src/main/java/MainModule.java index f214bdc701ca..8569ae3a172a 100644 --- a/managed/src/main/java/MainModule.java +++ b/managed/src/main/java/MainModule.java @@ -25,7 +25,7 @@ import com.yugabyte.yw.commissioner.SupportBundleCleanup; import com.yugabyte.yw.commissioner.TaskExecutor; import com.yugabyte.yw.commissioner.TaskGarbageCollector; -import com.yugabyte.yw.commissioner.XClusterSyncScheduler; +import com.yugabyte.yw.commissioner.XClusterScheduler; import com.yugabyte.yw.commissioner.YbcUpgrade; import com.yugabyte.yw.common.AccessKeyRotationUtil; import com.yugabyte.yw.common.AccessManager; @@ -220,7 +220,7 @@ public void configure() { bind(AccessKeyRotationUtil.class).asEagerSingleton(); bind(GcpEARServiceUtil.class).asEagerSingleton(); bind(YbcUpgrade.class).asEagerSingleton(); - bind(XClusterSyncScheduler.class).asEagerSingleton(); + bind(XClusterScheduler.class).asEagerSingleton(); bind(PerfAdvisorScheduler.class).asEagerSingleton(); bind(PermissionUtil.class).asEagerSingleton(); bind(RoleUtil.class).asEagerSingleton(); diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/XClusterSyncScheduler.java b/managed/src/main/java/com/yugabyte/yw/commissioner/XClusterScheduler.java similarity index 64% rename from managed/src/main/java/com/yugabyte/yw/commissioner/XClusterSyncScheduler.java rename to managed/src/main/java/com/yugabyte/yw/commissioner/XClusterScheduler.java index ef5e6bd3fc6d..ef03fdeeccad 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/XClusterSyncScheduler.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/XClusterScheduler.java @@ -10,15 +10,22 @@ import com.yugabyte.yw.common.config.GlobalConfKeys; import com.yugabyte.yw.common.config.RuntimeConfGetter; import com.yugabyte.yw.common.config.UniverseConfKeys; +import com.yugabyte.yw.common.metrics.MetricService; import com.yugabyte.yw.common.services.YBClientService; import com.yugabyte.yw.common.table.TableInfoUtil; import com.yugabyte.yw.models.HighAvailabilityConfig; +import com.yugabyte.yw.models.Metric; import com.yugabyte.yw.models.Universe; import com.yugabyte.yw.models.XClusterConfig; import com.yugabyte.yw.models.XClusterConfig.XClusterConfigStatusType; import com.yugabyte.yw.models.XClusterTableConfig; -import io.jsonwebtoken.lang.Collections; +import com.yugabyte.yw.models.filters.MetricFilter; +import com.yugabyte.yw.models.helpers.CommonUtils; +import com.yugabyte.yw.models.helpers.KnownAlertLabels; +import com.yugabyte.yw.models.helpers.PlatformMetrics; import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; @@ -32,37 +39,53 @@ @Singleton @Slf4j -public class XClusterSyncScheduler { +public class XClusterScheduler { private final PlatformScheduler platformScheduler; private final RuntimeConfGetter confGetter; private final YBClientService ybClientService; private final XClusterUniverseService xClusterUniverseService; + private final MetricService metricService; @Inject - public XClusterSyncScheduler( + public XClusterScheduler( PlatformScheduler platformScheduler, RuntimeConfGetter confGetter, YBClientService ybClientService, - XClusterUniverseService xClusterUniverseService) { + XClusterUniverseService xClusterUniverseService, + MetricService metricService) { this.platformScheduler = platformScheduler; this.confGetter = confGetter; this.ybClientService = ybClientService; this.xClusterUniverseService = xClusterUniverseService; + this.metricService = metricService; } - private Duration getSchedulerInterval() { + private Duration getSyncSchedulerInterval() { return confGetter.getGlobalConf(GlobalConfKeys.xClusterSyncSchedulerInterval); } + private Duration getMetricsSchedulerInterval() { + return confGetter.getGlobalConf(GlobalConfKeys.xClusterMetricsSchedulerInterval); + } + public void start() { platformScheduler.schedule( - "XClusterSyncScheduler", + "XCluster-Sync-Scheduler", + Duration.ZERO /* initialDelay */, + this.getSyncSchedulerInterval(), + this::syncScheduleRunner); + + platformScheduler.schedule( + "XCluster-Metrics-Scheduler", Duration.ZERO /* initialDelay */, - this.getSchedulerInterval(), - this::scheduleRunner); + this.getMetricsSchedulerInterval(), + this::metricsScheduleRunner); } + // Sync XCluster config methods. + // -------------------------------------------------------------------------------- + private Set filterIndexTables( List allTableInfoList, Collection filterTableIds) { @@ -116,7 +139,7 @@ private Set getTableIdsToRemove( Universe sourceUniverse) { Set cdcStreamsInUniverse = - xClusterUniverseService.getAllCDCStreamsInUniverse(ybClientService, sourceUniverse); + xClusterUniverseService.getAllCDCStreamIdsInUniverse(ybClientService, sourceUniverse); Set tableIdsToRemove = tableIdsInYbaXClusterConfig.stream() @@ -158,13 +181,13 @@ private void updateXClusterConfig( Set tableIdsToRemove) { // remove tables - if (!Collections.isEmpty(tableIdsToRemove)) { + if (!CollectionUtils.isEmpty(tableIdsToRemove)) { log.info("Tables to remove {}", tableIdsToRemove); config.removeTables(tableIdsToRemove); } // add tables - if (!Collections.isEmpty(tableIdsToAdd)) { + if (!CollectionUtils.isEmpty(tableIdsToAdd)) { Set indexTableIdsToAdd = filterIndexTables(sourceUniverseTableInfoList, tableIdsToAdd); Set nonIndexTableIdsToAdd = @@ -172,11 +195,11 @@ private void updateXClusterConfig( .filter(tableId -> !indexTableIdsToAdd.contains(tableId)) .collect(Collectors.toSet()); - if (!Collections.isEmpty(indexTableIdsToAdd)) { + if (!CollectionUtils.isEmpty(indexTableIdsToAdd)) { log.info("Index tables to add {}", indexTableIdsToAdd); config.addTablesIfNotExist(indexTableIdsToAdd, null, true); } - if (!Collections.isEmpty(nonIndexTableIdsToAdd)) { + if (!CollectionUtils.isEmpty(nonIndexTableIdsToAdd)) { log.info("Non index Tables to add {}", nonIndexTableIdsToAdd); config.addTablesIfNotExist(nonIndexTableIdsToAdd, null, false); } @@ -248,7 +271,7 @@ public synchronized void syncXClusterConfig(XClusterConfig config) { } } - private void scheduleRunner() { + private void syncScheduleRunner() { if (HighAvailabilityConfig.isFollower()) { log.debug("Skipping scheduler for follower platform"); return; @@ -264,4 +287,91 @@ private void scheduleRunner() { log.error("Error running xCluster Sync Scheduler: {}", e); } } + + // -------------------------------------------------------------------------------- + // End of Sync XCluster config methods. + + // Publish XCluster config metrics methods. + // -------------------------------------------------------------------------------- + + private Metric buildMetricTemplate( + XClusterConfig xClusterConfig, XClusterTableConfig xClusterTableConfig) { + XClusterTableConfig.Status tableStatus = xClusterTableConfig.getStatus(); + if (xClusterTableConfig.getStatus().equals(XClusterTableConfig.Status.Running)) { + if (xClusterTableConfig.getReplicationStatusErrors().size() > 0) { + tableStatus = XClusterTableConfig.Status.ReplicationError; + } + } + + double value = tableStatus.getCode(); + + return MetricService.buildMetricTemplate(PlatformMetrics.XCLUSTER_TABLE_STATUS) + .setExpireTime( + CommonUtils.nowPlusWithoutMillis( + MetricService.DEFAULT_METRIC_EXPIRY_SEC, ChronoUnit.SECONDS)) + .setKeyLabel(KnownAlertLabels.TABLE_UUID, xClusterTableConfig.getTableId()) + .setSourceUuid(xClusterConfig.getUuid()) + .setLabel( + KnownAlertLabels.SOURCE_UNIVERSE_UUID, + xClusterConfig.getSourceUniverseUUID().toString()) + .setLabel( + KnownAlertLabels.TARGET_UNIVERSE_UUID, + xClusterConfig.getTargetUniverseUUID().toString()) + .setLabel(KnownAlertLabels.TABLE_TYPE, xClusterConfig.getTableType().toString()) + .setLabel( + KnownAlertLabels.XCLUSTER_REPLICATION_GROUP_NAME, + xClusterConfig.getReplicationGroupName()) + .setValue(value); + } + + private List collectMetrics(XClusterConfig xClusterConfig) { + List metricsList = new ArrayList<>(); + Universe targetUniverse = Universe.getOrBadRequest(xClusterConfig.getTargetUniverseUUID()); + Universe sourceUniverse = Universe.getOrBadRequest(xClusterConfig.getSourceUniverseUUID()); + if (!xClusterConfig.getStatus().equals(XClusterConfigStatusType.Running)) { + return metricsList; + } + if (sourceUniverse.getUniverseDetails().updateInProgress + || targetUniverse.getUniverseDetails().updateInProgress) { + return metricsList; + } + if (sourceUniverse.getUniverseDetails().universePaused + || targetUniverse.getUniverseDetails().universePaused) { + return metricsList; + } + + XClusterConfigTaskBase.setReplicationStatus( + xClusterUniverseService, ybClientService, xClusterConfig); + Set xClusterTableConfigs = xClusterConfig.getTableDetails(); + xClusterTableConfigs.forEach( + tableConfig -> { + metricsList.add(buildMetricTemplate(xClusterConfig, tableConfig)); + }); + + return metricsList; + } + + private void metricsScheduleRunner() { + if (HighAvailabilityConfig.isFollower()) { + log.debug("Skipping scheduler for follower platform"); + return; + } + log.info("Running xCluster Metrics Scheduler..."); + try { + List xClusterConfigs = XClusterConfig.getAllXClusterConfigs(); + List metricsList = new ArrayList<>(); + for (XClusterConfig config : xClusterConfigs) { + metricsList.addAll(collectMetrics(config)); + } + MetricFilter toClean = + MetricFilter.builder().metricName(PlatformMetrics.XCLUSTER_TABLE_STATUS).build(); + metricService.cleanAndSave(metricsList, toClean); + metricService.setOkStatusMetric( + MetricService.buildMetricTemplate(PlatformMetrics.XCLUSTER_METRIC_PROCESSOR_STATUS)); + } catch (Exception e) { + metricService.setFailureStatusMetric( + MetricService.buildMetricTemplate(PlatformMetrics.XCLUSTER_METRIC_PROCESSOR_STATUS)); + log.error("Error running xCluster Metrics Scheduler: {}", e); + } + } } diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/CreateBackup.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/CreateBackup.java index d2d9c0409c29..c9d53550c5b8 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/CreateBackup.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/CreateBackup.java @@ -263,6 +263,21 @@ public void runScheduledBackup( schedule.updateIncrementBacklogStatus(false); log.debug("Schedule {} increment backlog status is set to false", schedule.getScheduleUUID()); } + if (baseBackupUUID == null + && ScheduleUtil.isIncrementalBackupSchedule(schedule.getScheduleUUID())) { + // Update incremental backup task cycle while executing full backups. + long incrementalBackupFrequency = ScheduleUtil.getIncrementalBackupFrequency(schedule); + if (incrementalBackupFrequency != 0L) { + Date updatedNextIncrementalBackupTime = + new Date(new Date().getTime() + incrementalBackupFrequency); + log.debug( + "Updating next incremental backup task time for schedule {} to {} as full backup is" + + " preformed.", + schedule.getScheduleUUID(), + updatedNextIncrementalBackupTime); + schedule.updateNextIncrementScheduleTaskTime(updatedNextIncrementalBackupTime); + } + } log.info( "Submitted backup for universe: {}, task uuid = {}.", taskParams.getUniverseUUID(), diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/XClusterConfigTaskBase.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/XClusterConfigTaskBase.java index aeedb1be20ef..1dbed1019190 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/XClusterConfigTaskBase.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/XClusterConfigTaskBase.java @@ -732,6 +732,17 @@ private static Set getConsumerTableIdsFromClusterConfig( return consumerTableIdsFromClusterConfig; } + private static Set getConsumerTableIdsFromClusterConfig( + CatalogEntityInfo.SysClusterConfigEntryPB clusterConfig, String replicationGroupName) { + Set consumerTableIdsFromClusterConfig = new HashSet<>(); + ProducerEntryPB replicationGroup = + getReplicationGroupEntry(clusterConfig, replicationGroupName); + replicationGroup.getStreamMapMap().values().stream() + .map(StreamEntryPB::getConsumerTableId) + .forEach(consumerTableIdsFromClusterConfig::add); + return consumerTableIdsFromClusterConfig; + } + public static ProducerEntryPB getReplicationGroupEntry( CatalogEntityInfo.SysClusterConfigEntryPB clusterConfig, String replicationGroupName) { return clusterConfig.getConsumerRegistry().getProducerMapOrThrow(replicationGroupName); @@ -1508,6 +1519,11 @@ public static String getTableId(MasterDdlOuterClass.ListTablesResponsePB.TableIn return tableInfo.getId().toStringUtf8(); } + public static String getNamespaceId( + MasterDdlOuterClass.ListTablesResponsePB.TableInfo tableInfo) { + return tableInfo.getNamespace().getId().toStringUtf8(); + } + public static Set getTableIds( Collection tablesInfoList) { if (tablesInfoList == null) { @@ -1722,7 +1738,9 @@ public static void verifyTablesNotInReplication( } public static void setReplicationStatus( - XClusterUniverseService xClusterUniverseService, XClusterConfig xClusterConfig) { + XClusterUniverseService xClusterUniverseService, + YBClientService ybClientService, + XClusterConfig xClusterConfig) { Optional targetUniverseOptional = Objects.isNull(xClusterConfig.getTargetUniverseUUID()) ? Optional.empty() @@ -1834,6 +1852,164 @@ public static void setReplicationStatus( log.error("XClusterConfigTaskBase.isBootstrapRequired hit error : {}", e.getMessage()); } } + + // Update the xCluster config intermittently with table details not in replication. + if (xClusterConfig.getTableType().equals(XClusterConfig.TableType.YSQL) + && xClusterConfig.getStatus().equals(XClusterConfig.XClusterConfigStatusType.Running)) { + try { + + Universe sourceUniverse = Universe.getOrBadRequest(xClusterConfig.getSourceUniverseUUID()); + List sourceUniverseTableInfoList = + XClusterConfigTaskBase.getTableInfoList(ybClientService, sourceUniverse); + + Set sourceUniverseTableIds = + sourceUniverseTableInfoList.stream() + .map(tableInfo -> XClusterConfigTaskBase.getTableId(tableInfo)) + .collect(Collectors.toSet()); + + xClusterConfig.getTableDetails().stream() + .filter(tableConfig -> tableConfig.getStatus() == XClusterTableConfig.Status.Running) + .forEach( + tableConfig -> { + if (!sourceUniverseTableIds.contains(tableConfig.getTableId())) { + tableConfig.setStatus(XClusterTableConfig.Status.DroppedFromSource); + } + }); + + List tableConfigs = + getXClusterTableConfigFromReplication( + xClusterUniverseService, ybClientService, xClusterConfig); + tableConfigs.forEach( + tableConfig -> { + Optional xClusterTableConfig = + xClusterConfig.getTableDetails().stream() + .filter(tConfig -> tConfig.getTableId().equals(tableConfig.getTableId())) + .findFirst(); + if (xClusterTableConfig.isPresent()) { + xClusterTableConfig.get().setStatus(tableConfig.getStatus()); + } else { + xClusterConfig.addTableConfig(tableConfig); + } + }); + } catch (Exception e) { + log.error( + "Error getting table details not in replication for xCluster config {}", + xClusterConfig.getUuid(), + e); + } + } + } + + public static List getXClusterTableConfigFromReplication( + XClusterUniverseService xClusterUniverseService, + YBClientService ybClientService, + XClusterConfig xClusterConfig) { + List tableConfigs = new ArrayList<>(); + if (!xClusterConfig.getTableType().equals(XClusterConfig.TableType.YSQL)) { + return tableConfigs; + } + + Universe targetUniverse = Universe.getOrBadRequest(xClusterConfig.getTargetUniverseUUID()); + CatalogEntityInfo.SysClusterConfigEntryPB clusterConfig; + try (YBClient client = + ybClientService.getClient( + targetUniverse.getMasterAddresses(), targetUniverse.getCertificateNodetoNode())) { + clusterConfig = getClusterConfig(client, targetUniverse.getUniverseUUID()); + } catch (Exception e) { + log.error( + "Error getting cluster config for universe {}", targetUniverse.getUniverseUUID(), e); + return tableConfigs; + } + + tableConfigs.addAll( + getTargetOnlyTable( + xClusterUniverseService, ybClientService, xClusterConfig, clusterConfig)); + tableConfigs.addAll( + getSourceOnlyTable( + xClusterUniverseService, ybClientService, xClusterConfig, clusterConfig)); + + return tableConfigs; + } + + public static List getTargetOnlyTable( + XClusterUniverseService xClusterUniverseService, + YBClientService ybClientService, + XClusterConfig xClusterConfig, + CatalogEntityInfo.SysClusterConfigEntryPB clusterConfig) { + + try { + Set tableIdsInReplicationOnTargetUniverse = + getConsumerTableIdsFromClusterConfig( + clusterConfig, xClusterConfig.getReplicationGroupName()); + + List targetUniverseTableInfoList = + getTableInfoList( + ybClientService, Universe.getOrBadRequest(xClusterConfig.getTargetUniverseUUID())); + + return extractTablesNotInReplication( + tableIdsInReplicationOnTargetUniverse, + targetUniverseTableInfoList, + XClusterTableConfig.Status.ExtraTableOnTarget); + } catch (Exception e) { + log.error( + "Error getting target only table for xCluster config {}", xClusterConfig.getUuid(), e); + return new ArrayList<>(); + } + } + + public static List getSourceOnlyTable( + XClusterUniverseService xClusterUniverseService, + YBClientService ybClientService, + XClusterConfig xClusterConfig, + CatalogEntityInfo.SysClusterConfigEntryPB clusterConfig) { + + try { + Set tableIdsInReplicationOnSourceUniverse = + getProducerTableIdsFromClusterConfig( + clusterConfig, xClusterConfig.getReplicationGroupName()); + + List sourceUniverseTableInfoList = + getTableInfoList( + ybClientService, Universe.getOrBadRequest(xClusterConfig.getSourceUniverseUUID())); + + return extractTablesNotInReplication( + tableIdsInReplicationOnSourceUniverse, + sourceUniverseTableInfoList, + XClusterTableConfig.Status.ExtraTableOnSource); + } catch (Exception e) { + log.error( + "Error getting source only table for xCluster config {}", xClusterConfig.getUuid(), e); + return new ArrayList<>(); + } + } + + public static List extractTablesNotInReplication( + Set tablesIdsInReplication, + List allTables, + XClusterTableConfig.Status missingTableStatus) { + + Set namespaceIdsInReplication = + allTables.stream() + .filter(tableInfo -> tablesIdsInReplication.contains(getTableId(tableInfo))) + .map(tableInfo -> tableInfo.getNamespace().getId().toStringUtf8()) + .collect(Collectors.toSet()); + + Set tableIdsNotInReplication = + allTables.stream() + .filter(tableInfo -> namespaceIdsInReplication.contains(getNamespaceId(tableInfo))) + .filter(tableInfo -> isXClusterSupported(tableInfo)) + .filter(tableInfo -> !tablesIdsInReplication.contains(getTableId(tableInfo))) + .map(tableInfo -> getTableId(tableInfo)) + .collect(Collectors.toSet()); + + List tableConfigNotInReplication = new ArrayList<>(); + for (String tableId : tableIdsNotInReplication) { + XClusterTableConfig tableConfig = new XClusterTableConfig(); + tableConfig.setTableId(tableId); + tableConfig.setStatus(missingTableStatus); + tableConfigNotInReplication.add(tableConfig); + } + return tableConfigNotInReplication; } public Set getNamespaces( diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/xcluster/XClusterConfigSync.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/xcluster/XClusterConfigSync.java index 7a91e55ceaca..04db58865726 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/xcluster/XClusterConfigSync.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/xcluster/XClusterConfigSync.java @@ -3,7 +3,6 @@ import com.google.common.net.HostAndPort; import com.yugabyte.yw.commissioner.BaseTaskDependencies; -import com.yugabyte.yw.commissioner.XClusterSyncScheduler; import com.yugabyte.yw.commissioner.tasks.XClusterConfigTaskBase; import com.yugabyte.yw.common.XClusterUniverseService; import com.yugabyte.yw.common.table.TableInfoUtil; @@ -34,15 +33,10 @@ @Slf4j public class XClusterConfigSync extends XClusterConfigTaskBase { - private final XClusterSyncScheduler xClusterSyncScheduler; - @Inject protected XClusterConfigSync( - BaseTaskDependencies baseTaskDependencies, - XClusterUniverseService xClusterUniverseService, - XClusterSyncScheduler xClusterSyncScheduler) { + BaseTaskDependencies baseTaskDependencies, XClusterUniverseService xClusterUniverseService) { super(baseTaskDependencies, xClusterUniverseService); - this.xClusterSyncScheduler = xClusterSyncScheduler; } @Override diff --git a/managed/src/main/java/com/yugabyte/yw/common/AppInit.java b/managed/src/main/java/com/yugabyte/yw/common/AppInit.java index 31a86746d5b2..1b1cc4b60894 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/AppInit.java +++ b/managed/src/main/java/com/yugabyte/yw/common/AppInit.java @@ -22,7 +22,7 @@ import com.yugabyte.yw.commissioner.SetUniverseKey; import com.yugabyte.yw.commissioner.SupportBundleCleanup; import com.yugabyte.yw.commissioner.TaskGarbageCollector; -import com.yugabyte.yw.commissioner.XClusterSyncScheduler; +import com.yugabyte.yw.commissioner.XClusterScheduler; import com.yugabyte.yw.commissioner.YbcUpgrade; import com.yugabyte.yw.commissioner.tasks.subtasks.cloud.CloudImageBundleSetup; import com.yugabyte.yw.common.ConfigHelper.ConfigType; @@ -108,7 +108,7 @@ public AppInit( SupportBundleCleanup supportBundleCleanup, NodeAgentPoller nodeAgentPoller, YbcUpgrade ybcUpgrade, - XClusterSyncScheduler xClusterSyncScheduler, + XClusterScheduler xClusterScheduler, PerfAdvisorGarbageCollector perfRecGC, SnapshotCleanup snapshotCleanup, FileDataService fileDataService, @@ -328,8 +328,8 @@ public AppInit( shellLogsManager.startLogsGC(); nodeAgentPoller.init(); pitrConfigPoller.start(); - xClusterSyncScheduler.start(); autoMasterFailoverScheduler.init(); + xClusterScheduler.start(); ybcUpgrade.start(); diff --git a/managed/src/main/java/com/yugabyte/yw/common/ScheduleUtil.java b/managed/src/main/java/com/yugabyte/yw/common/ScheduleUtil.java index 1865e79e9b83..a3043dd1df68 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/ScheduleUtil.java +++ b/managed/src/main/java/com/yugabyte/yw/common/ScheduleUtil.java @@ -47,6 +47,18 @@ public static Backup fetchLatestSuccessfulBackupForSchedule( .orElse(null); } + public static Backup fetchInProgressBackupForSchedule(UUID customerUUID, UUID scheduleUUID) { + Schedule schedule = Schedule.getOrBadRequest(customerUUID, scheduleUUID); + ScheduleTask scheduleTask = ScheduleTask.getLastTask(schedule.getScheduleUUID()); + if (scheduleTask == null) { + return null; + } + return Backup.fetchAllBackupsByTaskUUID(scheduleTask.getTaskUUID()).stream() + .filter(bkp -> bkp.getState().equals(BackupState.InProgress)) + .findFirst() + .orElse(null); + } + public static long getIncrementalBackupFrequency(Schedule schedule) { BackupRequestParams scheduleParams = Json.fromJson(schedule.getTaskParams(), BackupRequestParams.class); diff --git a/managed/src/main/java/com/yugabyte/yw/common/XClusterUniverseService.java b/managed/src/main/java/com/yugabyte/yw/common/XClusterUniverseService.java index 609437cfa2f2..fe26a0d9015e 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/XClusterUniverseService.java +++ b/managed/src/main/java/com/yugabyte/yw/common/XClusterUniverseService.java @@ -45,6 +45,7 @@ import org.apache.commons.collections4.CollectionUtils; import org.yb.cdc.CdcConsumer; import org.yb.cdc.CdcConsumer.StreamEntryPB; +import org.yb.client.CDCStreamInfo; import org.yb.client.GetMasterClusterConfigResponse; import org.yb.client.GetReplicationStatusResponse; import org.yb.client.GetXClusterSafeTimeResponse; @@ -472,7 +473,14 @@ public Map isBootstrapRequired( * @return A set of strings representing the CDC stream IDs in the universe. * @throws RuntimeException if there is an error listing the CDC streams. */ - public Set getAllCDCStreamsInUniverse( + public Set getAllCDCStreamIdsInUniverse( + YBClientService ybClientService, Universe universe) { + return getAllCDCStreamInfoInUniverse(ybClientService, universe).stream() + .map(CDCStreamInfo::getStreamId) + .collect(Collectors.toSet()); + } + + public Set getAllCDCStreamInfoInUniverse( YBClientService ybClientService, Universe universe) { try (YBClient client = ybClientService.getClient( @@ -484,9 +492,7 @@ public Set getAllCDCStreamsInUniverse( "Error listing cdc streams for universe %s. Error: %s", universe.getName(), cdcStreamsResponse.errorMessage())); } - return cdcStreamsResponse.getStreams().stream() - .map(cdcStream -> cdcStream.getStreamId()) - .collect(Collectors.toSet()); + return cdcStreamsResponse.getStreams().stream().collect(Collectors.toSet()); } catch (Exception e) { log.error("XClusterUniverseService.getCDCStreams hit error : {}", e.getMessage()); throw new RuntimeException(e); diff --git a/managed/src/main/java/com/yugabyte/yw/common/config/GlobalConfKeys.java b/managed/src/main/java/com/yugabyte/yw/common/config/GlobalConfKeys.java index e1036fe73f8c..4247326f9333 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/config/GlobalConfKeys.java +++ b/managed/src/main/java/com/yugabyte/yw/common/config/GlobalConfKeys.java @@ -1355,4 +1355,12 @@ public class GlobalConfKeys extends RuntimeConfigKeysModule { "Interval at which the XCluster Sync Scheduler runs", ConfDataType.DurationType, ImmutableList.of(ConfKeyTags.BETA)); + public static final ConfKeyInfo xClusterMetricsSchedulerInterval = + new ConfKeyInfo<>( + "yb.xcluster.xcluster_metrics_scheduler_interval", + ScopeType.GLOBAL, + "XCluster Metrics Scheduler Interval", + "Interval at which the XCluster Metrics Scheduler runs", + ConfDataType.DurationType, + ImmutableList.of(ConfKeyTags.BETA)); } diff --git a/managed/src/main/java/com/yugabyte/yw/common/gflags/GFlagsValidation.java b/managed/src/main/java/com/yugabyte/yw/common/gflags/GFlagsValidation.java index fb33d0a3ac6e..1fd30e85de3a 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/gflags/GFlagsValidation.java +++ b/managed/src/main/java/com/yugabyte/yw/common/gflags/GFlagsValidation.java @@ -86,7 +86,7 @@ public class GFlagsValidation { Util.DB_VERSION_METADATA_FILENAME, YSQL_MIGRATION_FILES_LIST_FILE_NAME); - public static final String DB_BUILD_WITH_FLAG_FILES = "2.17.0.0-b1"; + public static final String DB_BUILD_WITH_FLAG_FILES = "2.16.0.0-b1"; @Inject public GFlagsValidation( diff --git a/managed/src/main/java/com/yugabyte/yw/controllers/DrConfigController.java b/managed/src/main/java/com/yugabyte/yw/controllers/DrConfigController.java index 048139bc753c..aa26edf9299f 100644 --- a/managed/src/main/java/com/yugabyte/yw/controllers/DrConfigController.java +++ b/managed/src/main/java/com/yugabyte/yw/controllers/DrConfigController.java @@ -3,7 +3,7 @@ import com.google.common.collect.Sets; import com.google.inject.Inject; import com.yugabyte.yw.commissioner.Commissioner; -import com.yugabyte.yw.commissioner.XClusterSyncScheduler; +import com.yugabyte.yw.commissioner.XClusterScheduler; import com.yugabyte.yw.commissioner.tasks.XClusterConfigTaskBase; import com.yugabyte.yw.common.DrConfigStates.State; import com.yugabyte.yw.common.PlatformServiceException; @@ -98,7 +98,7 @@ public class DrConfigController extends AuthenticatedController { private final RuntimeConfGetter confGetter; private final XClusterUniverseService xClusterUniverseService; private final AutoFlagUtil autoFlagUtil; - private final XClusterSyncScheduler xClusterSyncScheduler; + private final XClusterScheduler xClusterScheduler; @Inject public DrConfigController( @@ -110,7 +110,7 @@ public DrConfigController( RuntimeConfGetter confGetter, XClusterUniverseService xClusterUniverseService, AutoFlagUtil autoFlagUtil, - XClusterSyncScheduler xClusterSyncScheduler) { + XClusterScheduler xClusterScheduler) { this.commissioner = commissioner; this.metricQueryHelper = metricQueryHelper; this.backupHelper = backupHelper; @@ -119,7 +119,7 @@ public DrConfigController( this.confGetter = confGetter; this.xClusterUniverseService = xClusterUniverseService; this.autoFlagUtil = autoFlagUtil; - this.xClusterSyncScheduler = xClusterSyncScheduler; + this.xClusterScheduler = xClusterScheduler; } /** @@ -935,11 +935,12 @@ public Result get(UUID customerUUID, UUID drUUID) { DrConfig drConfig = DrConfig.getValidConfigOrBadRequest(customer, drUUID); for (XClusterConfig xClusterConfig : drConfig.getXClusterConfigs()) { - XClusterConfigTaskBase.setReplicationStatus(this.xClusterUniverseService, xClusterConfig); + XClusterConfigTaskBase.setReplicationStatus( + this.xClusterUniverseService, this.ybService, xClusterConfig); } XClusterConfig activeXClusterConfig = drConfig.getActiveXClusterConfig(); - xClusterSyncScheduler.syncXClusterConfig(activeXClusterConfig); + xClusterScheduler.syncXClusterConfig(activeXClusterConfig); activeXClusterConfig.refresh(); DrConfigGetResp resp = new DrConfigGetResp(drConfig, activeXClusterConfig); diff --git a/managed/src/main/java/com/yugabyte/yw/controllers/XClusterConfigController.java b/managed/src/main/java/com/yugabyte/yw/controllers/XClusterConfigController.java index 0baab1a1067a..9664a454379a 100644 --- a/managed/src/main/java/com/yugabyte/yw/controllers/XClusterConfigController.java +++ b/managed/src/main/java/com/yugabyte/yw/controllers/XClusterConfigController.java @@ -15,7 +15,7 @@ import com.google.common.collect.HashBiMap; import com.google.inject.Inject; import com.yugabyte.yw.commissioner.Commissioner; -import com.yugabyte.yw.commissioner.XClusterSyncScheduler; +import com.yugabyte.yw.commissioner.XClusterScheduler; import com.yugabyte.yw.commissioner.tasks.XClusterConfigTaskBase; import com.yugabyte.yw.common.PlatformServiceException; import com.yugabyte.yw.common.Util; @@ -104,7 +104,7 @@ public class XClusterConfigController extends AuthenticatedController { private final RuntimeConfGetter confGetter; private final XClusterUniverseService xClusterUniverseService; private final AutoFlagUtil autoFlagUtil; - private final XClusterSyncScheduler xClusterSyncScheduler; + private final XClusterScheduler xClusterScheduler; @Inject public XClusterConfigController( @@ -116,7 +116,7 @@ public XClusterConfigController( RuntimeConfGetter confGetter, XClusterUniverseService xClusterUniverseService, AutoFlagUtil autoFlagUtil, - XClusterSyncScheduler xClusterSyncScheduler) { + XClusterScheduler xClusterScheduler) { this.commissioner = commissioner; this.metricQueryHelper = metricQueryHelper; this.backupHelper = backupHelper; @@ -125,7 +125,7 @@ public XClusterConfigController( this.confGetter = confGetter; this.xClusterUniverseService = xClusterUniverseService; this.autoFlagUtil = autoFlagUtil; - this.xClusterSyncScheduler = xClusterSyncScheduler; + this.xClusterScheduler = xClusterScheduler; } /** @@ -324,7 +324,7 @@ public Result get(UUID customerUUID, UUID xclusterConfigUUID) { XClusterConfig xClusterConfig = XClusterConfig.getValidConfigOrBadRequest(customer, xclusterConfigUUID); - xClusterSyncScheduler.syncXClusterConfig(xClusterConfig); + xClusterScheduler.syncXClusterConfig(xClusterConfig); xClusterConfig.refresh(); // Set tableType if it is UNKNOWN. This is useful for xCluster configs that were created in old @@ -380,7 +380,8 @@ public Result get(UUID customerUUID, UUID xclusterConfigUUID) { lagMetricData = Json.newObject().put("error", errorMsg); } - XClusterConfigTaskBase.setReplicationStatus(this.xClusterUniverseService, xClusterConfig); + XClusterConfigTaskBase.setReplicationStatus( + this.xClusterUniverseService, this.ybService, xClusterConfig); // Wrap XClusterConfig with lag metric data. XClusterConfigGetResp resp = new XClusterConfigGetResp(); diff --git a/managed/src/main/java/com/yugabyte/yw/forms/BackupTableParams.java b/managed/src/main/java/com/yugabyte/yw/forms/BackupTableParams.java index c9d34a5fb286..525d9c7ae106 100644 --- a/managed/src/main/java/com/yugabyte/yw/forms/BackupTableParams.java +++ b/managed/src/main/java/com/yugabyte/yw/forms/BackupTableParams.java @@ -3,6 +3,7 @@ package com.yugabyte.yw.forms; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.yugabyte.yw.common.Util; import com.yugabyte.yw.common.backuprestore.BackupUtil; import com.yugabyte.yw.models.Backup.StorageConfigType; @@ -340,10 +341,12 @@ public List getTableNameList() { return new ArrayList(); } + @JsonProperty("isFullBackup") public boolean isFullBackup() { return isFullBackup; } + @JsonProperty("isFullBackup") public void setFullBackup(boolean isFullBackup) { this.isFullBackup = isFullBackup; } diff --git a/managed/src/main/java/com/yugabyte/yw/forms/MetricQueryParams.java b/managed/src/main/java/com/yugabyte/yw/forms/MetricQueryParams.java index 48c7bf5201de..29e933d4faa3 100644 --- a/managed/src/main/java/com/yugabyte/yw/forms/MetricQueryParams.java +++ b/managed/src/main/java/com/yugabyte/yw/forms/MetricQueryParams.java @@ -2,6 +2,7 @@ package com.yugabyte.yw.forms; +import com.fasterxml.jackson.annotation.JsonProperty; import com.yugabyte.yw.commissioner.tasks.UniverseTaskBase; import com.yugabyte.yw.metrics.MetricSettings; import com.yugabyte.yw.models.common.YbaApi; @@ -67,6 +68,7 @@ public class MetricQueryParams { @ApiModelProperty(value = "YbaApi Internal. Is Recharts") @YbaApi(visibility = YbaApiVisibility.INTERNAL, sinceYBAVersion = "2.14.0.0") + @JsonProperty("isRecharts") private boolean isRecharts; @ApiModelProperty(value = "YbaApi Internal. List of metrics with custom settings") diff --git a/managed/src/main/java/com/yugabyte/yw/models/Backup.java b/managed/src/main/java/com/yugabyte/yw/models/Backup.java index 1865f0516ca7..c607856c32fe 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/Backup.java +++ b/managed/src/main/java/com/yugabyte/yw/models/Backup.java @@ -139,6 +139,7 @@ public enum BackupState { .put(BackupState.Failed, BackupState.QueuedForDeletion) .put(BackupState.Stopped, BackupState.QueuedForDeletion) .put(BackupState.Stopped, BackupState.InProgress) + .put(BackupState.Stopped, BackupState.FailedToDelete) .put(BackupState.Stopping, BackupState.QueuedForDeletion) .put(BackupState.InProgress, BackupState.QueuedForDeletion) .put(BackupState.Completed, BackupState.QueuedForDeletion) diff --git a/managed/src/main/java/com/yugabyte/yw/models/BackupResp.java b/managed/src/main/java/com/yugabyte/yw/models/BackupResp.java index f03729744213..e731aa47efb9 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/BackupResp.java +++ b/managed/src/main/java/com/yugabyte/yw/models/BackupResp.java @@ -1,6 +1,7 @@ package com.yugabyte.yw.models; import com.fasterxml.jackson.annotation.JsonFormat; +import com.fasterxml.jackson.annotation.JsonProperty; import com.yugabyte.yw.models.Backup.BackupCategory; import com.yugabyte.yw.models.Backup.BackupState; import com.yugabyte.yw.models.Backup.StorageConfigType; @@ -36,7 +37,10 @@ public class BackupResp { Boolean onDemand; StorageConfigType storageConfigType; BackupCategory category; + + @JsonProperty("isFullBackup") Boolean isFullBackup; + TableType backupType; CommonBackupInfo commonBackupInfo; String scheduleName; diff --git a/managed/src/main/java/com/yugabyte/yw/models/Schedule.java b/managed/src/main/java/com/yugabyte/yw/models/Schedule.java index 51e9441758af..60f4ab3a083b 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/Schedule.java +++ b/managed/src/main/java/com/yugabyte/yw/models/Schedule.java @@ -13,8 +13,6 @@ import com.cronutils.model.time.ExecutionTime; import com.cronutils.parser.CronParser; import com.fasterxml.jackson.annotation.JsonFormat; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -126,8 +124,6 @@ public SortByIF getOrderField() { @Column(nullable = false) private UUID customerUUID; - @JsonProperty - @JsonIgnore public void setCustomerUUID(UUID customerUUID) { this.customerUUID = customerUUID; ObjectNode scheduleTaskParams = (ObjectNode) getTaskParams(); diff --git a/managed/src/main/java/com/yugabyte/yw/models/Users.java b/managed/src/main/java/com/yugabyte/yw/models/Users.java index ddbe080fc9a7..2e48bc504c1c 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/Users.java +++ b/managed/src/main/java/com/yugabyte/yw/models/Users.java @@ -8,6 +8,7 @@ import com.fasterxml.jackson.annotation.JsonFormat; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.yugabyte.yw.common.PlatformServiceException; import com.yugabyte.yw.common.concurrent.KeyLock; import com.yugabyte.yw.common.encryption.HashBuilder; @@ -172,6 +173,7 @@ public void setPassword(String password) { private Role role; @ApiModelProperty(value = "True if the user is the primary user") + @JsonProperty("isPrimary") private boolean isPrimary; @ApiModelProperty(value = "User Type") diff --git a/managed/src/main/java/com/yugabyte/yw/models/XClusterConfig.java b/managed/src/main/java/com/yugabyte/yw/models/XClusterConfig.java index b0d994451bd2..4c1c40e05a4c 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/XClusterConfig.java +++ b/managed/src/main/java/com/yugabyte/yw/models/XClusterConfig.java @@ -1102,7 +1102,7 @@ public static void checkXClusterConfigInCustomer( } } - private void addTableConfig(XClusterTableConfig tableConfig) { + public void addTableConfig(XClusterTableConfig tableConfig) { if (!this.getTables().add(tableConfig)) { log.debug( "Table with id {} already exists in xCluster config ({})", diff --git a/managed/src/main/java/com/yugabyte/yw/models/XClusterTableConfig.java b/managed/src/main/java/com/yugabyte/yw/models/XClusterTableConfig.java index 09c97b000dcf..9a5bc7ec770e 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/XClusterTableConfig.java +++ b/managed/src/main/java/com/yugabyte/yw/models/XClusterTableConfig.java @@ -118,19 +118,26 @@ public class XClusterTableConfig extends Model { // Statuses are declared in reverse severity for showing tables in UI with specific order. public enum Status { - Failed("Failed"), - Error("Error"), // Not stored in YBA DB. - Warning("Warning"), // Not stored in YBA DB. - UnableToFetch("UnableToFetch"), // Not stored in YBA DB. - Updating("Updating"), - Bootstrapping("Bootstrapping"), - Validated("Validated"), - Running("Running"); + Failed("Failed", -1), + Error("Error", -2), // Not stored in YBA DB. + Warning("Warning", -3), // Not stored in YBA DB. + UnableToFetch("UnableToFetch", -4), // Not stored in YBA DB. + Updating("Updating", 1), + Bootstrapping("Bootstrapping", 2), + Validated("Validated", 3), + Running("Running", 0), + DroppedFromSource("DroppedFromSource", -5), // Not stored in YBA DB. + DroppedFromTarget("DroppedFromTarget", -6), // Not stored in YBA DB. + ExtraTableOnSource("ExtraTableOnSource", -7), // Not stored in YBA DB. + ExtraTableOnTarget("ExtraTableOnTarget", -8), // Not stored in YBA DB. + ReplicationError("ReplicationError", -9); // Not stored in YBA DB. private final String status; + private final int code; - Status(String status) { + Status(String status, int code) { this.status = status; + this.code = code; } @Override @@ -138,6 +145,10 @@ public enum Status { public String toString() { return this.status; } + + public int getCode() { + return this.code; + } } // TODO move API response attributes out of the DB model diff --git a/managed/src/main/java/com/yugabyte/yw/models/filters/MetricFilter.java b/managed/src/main/java/com/yugabyte/yw/models/filters/MetricFilter.java index 92da34139d88..8d91659c93dc 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/filters/MetricFilter.java +++ b/managed/src/main/java/com/yugabyte/yw/models/filters/MetricFilter.java @@ -154,10 +154,10 @@ public boolean match(Metric metric) { return false; } if (expired != null) { - if (expired && metric.getExpireTime().after(new Date())) { + if (expired && metric.getExpireTime() != null && metric.getExpireTime().after(new Date())) { return false; } - if (!expired && metric.getExpireTime().before(new Date())) { + if (!expired && metric.getExpireTime() != null && metric.getExpireTime().before(new Date())) { return false; } } diff --git a/managed/src/main/java/com/yugabyte/yw/models/helpers/KnownAlertLabels.java b/managed/src/main/java/com/yugabyte/yw/models/helpers/KnownAlertLabels.java index 5142fdd58d17..ee09da492106 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/helpers/KnownAlertLabels.java +++ b/managed/src/main/java/com/yugabyte/yw/models/helpers/KnownAlertLabels.java @@ -53,7 +53,12 @@ public enum KnownAlertLabels { PARENT_TASK_TYPE, YBA_VERSION, DB_VERSION, - TASK_UUID; + TASK_UUID, + TABLE_UUID, + SOURCE_UNIVERSE_UUID, + TARGET_UNIVERSE_UUID, + XCLUSTER_CONFIG_UUID, + XCLUSTER_REPLICATION_GROUP_NAME; public String labelName() { return name().toLowerCase(); diff --git a/managed/src/main/java/com/yugabyte/yw/models/helpers/NodeConfig.java b/managed/src/main/java/com/yugabyte/yw/models/helpers/NodeConfig.java index aa56c17330f1..ca54797cb869 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/helpers/NodeConfig.java +++ b/managed/src/main/java/com/yugabyte/yw/models/helpers/NodeConfig.java @@ -2,6 +2,7 @@ package com.yugabyte.yw.models.helpers; +import com.fasterxml.jackson.annotation.JsonProperty; import io.swagger.annotations.ApiModel; import io.swagger.annotations.ApiModelProperty; import javax.validation.constraints.NotNull; @@ -34,10 +35,22 @@ public class NodeConfig { @ApiModel(description = "Validation result of a node config") public static class ValidationResult { private Type type; + private boolean isValid; private boolean isRequired; + private String description; private String value; + + @JsonProperty("isValid") + public boolean isValid() { + return isValid; + } + + @JsonProperty("isRequired") + public boolean isRequired() { + return isRequired; + } } /** diff --git a/managed/src/main/java/com/yugabyte/yw/models/helpers/PlatformMetrics.java b/managed/src/main/java/com/yugabyte/yw/models/helpers/PlatformMetrics.java index 119f6e9d0d77..29212651225c 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/helpers/PlatformMetrics.java +++ b/managed/src/main/java/com/yugabyte/yw/models/helpers/PlatformMetrics.java @@ -115,7 +115,10 @@ public enum PlatformMetrics { UNIVERSE_OS_UPDATE_REQUIRED( "More recent OS version is recommended for this universe", Unit.STATUS, false), UNIVERSE_RELEASE_FILES_STATUS( - "Local filepath for universe DB version is missing", Unit.STATUS, false); + "Local filepath for universe DB version is missing", Unit.STATUS, false), + // XCluster + XCLUSTER_METRIC_PROCESSOR_STATUS("xCluster metrics processor status", Unit.STATUS), + XCLUSTER_TABLE_STATUS("XCluster table status", Unit.STATUS); private final String help; private final Unit unit; private final Set validForSourceStates; diff --git a/managed/src/main/java/com/yugabyte/yw/models/helpers/provider/KubernetesInfo.java b/managed/src/main/java/com/yugabyte/yw/models/helpers/provider/KubernetesInfo.java index fafd177ab5d4..cf4b248106a8 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/helpers/provider/KubernetesInfo.java +++ b/managed/src/main/java/com/yugabyte/yw/models/helpers/provider/KubernetesInfo.java @@ -1,9 +1,6 @@ package com.yugabyte.yw.models.helpers.provider; -import com.fasterxml.jackson.annotation.JsonAlias; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.*; import com.yugabyte.yw.common.CloudProviderHelper.EditableInUseProvider; import com.yugabyte.yw.models.common.YbaApi; import com.yugabyte.yw.models.common.YbaApi.YbaApiVisibility; @@ -80,6 +77,7 @@ public class KubernetesInfo implements CloudInfoInterface { private String kubernetesPullSecretName; @ApiModelProperty(accessMode = AccessMode.READ_ONLY) + @JsonProperty("isKubernetesOperatorControlled") public boolean isKubernetesOperatorControlled = false; // Flag for identifying the legacy k8s providers created before release 2.18. diff --git a/managed/src/main/java/com/yugabyte/yw/scheduler/Scheduler.java b/managed/src/main/java/com/yugabyte/yw/scheduler/Scheduler.java index 0dcefbb453be..4f1906c6a901 100644 --- a/managed/src/main/java/com/yugabyte/yw/scheduler/Scheduler.java +++ b/managed/src/main/java/com/yugabyte/yw/scheduler/Scheduler.java @@ -231,18 +231,19 @@ void scheduleRunner() { boolean shouldRunTask = isExpectedScheduleTaskTimeExpired || backlogStatus; UUID baseBackupUUID = null; if (isIncrementalBackupSchedule) { + // fetch last successful full backup for the schedule on which incremental + // backup can be taken. baseBackupUUID = fetchBaseBackupUUIDfromLatestSuccessfulBackup(schedule); if (shouldRunTask || baseBackupUUID == null) { - // Update incremental backup task cycle while for full backups. - long incrementalBackupFrequency = - ScheduleUtil.getIncrementalBackupFrequency(schedule); - if (incrementalBackupFrequency != 0L) { - schedule.updateNextIncrementScheduleTaskTime( - new Date(new Date().getTime() + incrementalBackupFrequency)); - } // We won't do incremental backups if a full backup is due since // full backups take priority but make sure to take an incremental backup // either when it's scheduled or to catch up on any backlog. + if (baseBackupUUID == null) { + // If a scheduled backup is already not in progress and avoid running full backup. + if (!verifyScheduledBackupInProgress(schedule)) { + shouldRunTask = true; + } + } baseBackupUUID = null; log.debug("Scheduling a full backup for schedule {}", schedule.getScheduleUUID()); } else if (isExpectedIncrementScheduleTaskTime || incrementBacklogStatus) { @@ -307,6 +308,13 @@ private UUID fetchBaseBackupUUIDfromLatestSuccessfulBackup(Schedule schedule) { return backup == null ? null : backup.getBaseBackupUUID(); } + private boolean verifyScheduledBackupInProgress(Schedule schedule) { + Backup backup = + ScheduleUtil.fetchInProgressBackupForSchedule( + schedule.getCustomerUUID(), schedule.getScheduleUUID()); + return backup == null ? false : true; + } + private void runBackupTask(Schedule schedule, boolean alreadyRunning) { BackupUniverse backupUniverse = AbstractTaskBase.createTask(BackupUniverse.class); backupUniverse.runScheduledBackup(schedule, commissioner, alreadyRunning); diff --git a/managed/src/main/resources/reference.conf b/managed/src/main/resources/reference.conf index fa073ce9052e..b0bc1c9b745f 100644 --- a/managed/src/main/resources/reference.conf +++ b/managed/src/main/resources/reference.conf @@ -272,6 +272,7 @@ yb { ensure_sync_get_replication_status = false bootstrap_producer_timeout_ms = 120000 xcluster_sync_scheduler_interval = 10 minutes + xcluster_metrics_scheduler_interval = 2 minutes xcluster_sync_on_universe = true } diff --git a/managed/src/main/resources/swagger-strict.json b/managed/src/main/resources/swagger-strict.json index 8958a662ff57..e7e6e5ee826f 100644 --- a/managed/src/main/resources/swagger-strict.json +++ b/managed/src/main/resources/swagger-strict.json @@ -2392,6 +2392,7 @@ "type" : "boolean" }, "isFullBackup" : { + "readOnly" : true, "type" : "boolean" }, "isStorageConfigPresent" : { @@ -2435,7 +2436,7 @@ "type" : "boolean" } }, - "required" : [ "backupType", "category", "commonBackupInfo", "customerUUID", "expiryTimeUnit", "fullChainSizeInBytes", "hasIncrementalBackups", "isFullBackup", "isStorageConfigPresent", "isUniversePresent", "lastBackupState", "onDemand", "scheduleName", "scheduleUUID", "storageConfigType", "universeName", "universeUUID", "useTablespaces" ], + "required" : [ "backupType", "category", "commonBackupInfo", "customerUUID", "expiryTimeUnit", "fullChainSizeInBytes", "hasIncrementalBackups", "isStorageConfigPresent", "isUniversePresent", "lastBackupState", "onDemand", "scheduleName", "scheduleUUID", "storageConfigType", "universeName", "universeUUID", "useTablespaces" ], "type" : "object" }, "BackupStorageInfo" : { @@ -8033,9 +8034,6 @@ "example" : "{\"ip\":\"1.1.1.1\",\"sshUser\":\"centos\"}", "type" : "string" }, - "inUse" : { - "type" : "boolean" - }, "instanceName" : { "description" : "The node instance's name", "example" : "Mumbai instance", @@ -15199,9 +15197,11 @@ "type" : "string" }, "isRequired" : { + "readOnly" : true, "type" : "boolean" }, "isValid" : { + "readOnly" : true, "type" : "boolean" }, "type" : { @@ -15212,7 +15212,7 @@ "type" : "string" } }, - "required" : [ "description", "isRequired", "isValid", "type", "value" ], + "required" : [ "description", "type", "value" ], "type" : "object" }, "VolumeDetails" : { diff --git a/managed/src/main/resources/swagger.json b/managed/src/main/resources/swagger.json index 6a89e51bb924..18a215206b1a 100644 --- a/managed/src/main/resources/swagger.json +++ b/managed/src/main/resources/swagger.json @@ -2408,6 +2408,7 @@ "type" : "boolean" }, "isFullBackup" : { + "readOnly" : true, "type" : "boolean" }, "isStorageConfigPresent" : { @@ -2451,7 +2452,7 @@ "type" : "boolean" } }, - "required" : [ "backupType", "category", "commonBackupInfo", "customerUUID", "expiryTimeUnit", "fullChainSizeInBytes", "hasIncrementalBackups", "isFullBackup", "isStorageConfigPresent", "isUniversePresent", "lastBackupState", "onDemand", "scheduleName", "scheduleUUID", "storageConfigType", "universeName", "universeUUID", "useTablespaces" ], + "required" : [ "backupType", "category", "commonBackupInfo", "customerUUID", "expiryTimeUnit", "fullChainSizeInBytes", "hasIncrementalBackups", "isStorageConfigPresent", "isUniversePresent", "lastBackupState", "onDemand", "scheduleName", "scheduleUUID", "storageConfigType", "universeName", "universeUUID", "useTablespaces" ], "type" : "object" }, "BackupStorageInfo" : { @@ -8081,6 +8082,7 @@ "type" : "string" }, "inUse" : { + "description" : "True if the node is in use Deprecated since YBA version 2024.1.0.0. Use NodeInstance.state instead", "type" : "boolean" }, "instanceName" : { @@ -15344,9 +15346,11 @@ "type" : "string" }, "isRequired" : { + "readOnly" : true, "type" : "boolean" }, "isValid" : { + "readOnly" : true, "type" : "boolean" }, "type" : { @@ -15357,7 +15361,7 @@ "type" : "string" } }, - "required" : [ "description", "isRequired", "isValid", "type", "value" ], + "required" : [ "description", "type", "value" ], "type" : "object" }, "VolumeDetails" : { diff --git a/managed/src/test/java/com/yugabyte/yw/common/FakeDBApplication.java b/managed/src/test/java/com/yugabyte/yw/common/FakeDBApplication.java index 0809c066a87a..35870a15de1c 100644 --- a/managed/src/test/java/com/yugabyte/yw/common/FakeDBApplication.java +++ b/managed/src/test/java/com/yugabyte/yw/common/FakeDBApplication.java @@ -11,7 +11,7 @@ import com.yugabyte.yw.commissioner.CallHome; import com.yugabyte.yw.commissioner.Commissioner; import com.yugabyte.yw.commissioner.SetUniverseKey; -import com.yugabyte.yw.commissioner.XClusterSyncScheduler; +import com.yugabyte.yw.commissioner.XClusterScheduler; import com.yugabyte.yw.commissioner.YbcUpgrade; import com.yugabyte.yw.common.alerts.AlertConfigurationService; import com.yugabyte.yw.common.alerts.AlertDefinitionService; @@ -96,7 +96,7 @@ public class FakeDBApplication extends PlatformGuiceApplicationBaseTest { public GetTableSchemaResponse mockSchemaResponse = mock(GetTableSchemaResponse.class); public AutoFlagUtil mockAutoFlagUtil = mock(AutoFlagUtil.class); public ReleasesUtils mockReleasesUtils = mock(ReleasesUtils.class); - public XClusterSyncScheduler mockXClusterSyncScheduler = mock(XClusterSyncScheduler.class); + public XClusterScheduler mockXClusterScheduler = mock(XClusterScheduler.class); public MetricService metricService; public AlertService alertService; @@ -173,7 +173,7 @@ public Application provideApplication( .overrides( bind(PrometheusConfigManager.class).toInstance(mockPrometheusConfigManager))) .overrides(bind(FileHelperService.class).toInstance(mockFileHelperService)) - .overrides(bind(XClusterSyncScheduler.class).toInstance(mockXClusterSyncScheduler)) + .overrides(bind(XClusterScheduler.class).toInstance(mockXClusterScheduler)) .build(); } diff --git a/managed/src/test/java/com/yugabyte/yw/controllers/XClusterConfigControllerTest.java b/managed/src/test/java/com/yugabyte/yw/controllers/XClusterConfigControllerTest.java index 02f09bd66378..0d29c963001a 100644 --- a/managed/src/test/java/com/yugabyte/yw/controllers/XClusterConfigControllerTest.java +++ b/managed/src/test/java/com/yugabyte/yw/controllers/XClusterConfigControllerTest.java @@ -729,7 +729,7 @@ public void testGetUsesStreamIDCache() { setupMockMetricQueryHelperResponse(); - Mockito.doNothing().when(mockXClusterSyncScheduler).syncXClusterConfig(any()); + Mockito.doNothing().when(mockXClusterScheduler).syncXClusterConfig(any()); String getAPIEndpoint = apiEndpoint + "/" + xClusterConfig.getUuid(); diff --git a/managed/ui/src/components/config/Security/certificates/CertificateDetails.js b/managed/ui/src/components/config/Security/certificates/CertificateDetails.js index 077a16cfef18..18caf54e0e44 100644 --- a/managed/ui/src/components/config/Security/certificates/CertificateDetails.js +++ b/managed/ui/src/components/config/Security/certificates/CertificateDetails.js @@ -25,7 +25,7 @@ export const CertificateDetails = ({ certificate, visible, onHide }) => {
{certExpiry}
  • - +
    {certificate.certificate}
  • {certificate.privateKey && ( @@ -58,6 +58,28 @@ export const CertificateDetails = ({ certificate, visible, onHide }) => { )} + { + certificate.type === 'CustomCertHostPath' && ( + <> +
  • + +
    {certificate.customCertInfo?.nodeCertPath ?? '-'}
    +
  • +
  • + +
    {certificate.customCertInfo?.nodeKeyPath ?? '-'}
    +
  • +
  • + +
    {certificate.customCertInfo?.clientCertPath ?? '-'}
    +
  • +
  • + +
    {certificate.customCertInfo?.clientKeyPath ?? '-'}
    +
  • + + ) + } ); diff --git a/managed/ui/src/components/xcluster/XClusterTableStatusLabel.tsx b/managed/ui/src/components/xcluster/XClusterTableStatusLabel.tsx index 6cebc2c54556..505aa9e5b464 100644 --- a/managed/ui/src/components/xcluster/XClusterTableStatusLabel.tsx +++ b/managed/ui/src/components/xcluster/XClusterTableStatusLabel.tsx @@ -92,6 +92,11 @@ export const XClusterTableStatusLabel = ({ status, errors }: XClusterTableStatus ); case XClusterTableStatus.DROPPED: + case XClusterTableStatus.DROPPED_FROM_SOURCE: + case XClusterTableStatus.DROPPED_FROM_TARGET: + case XClusterTableStatus.REPLICATION_ERROR: + case XClusterTableStatus.EXTRA_TABLE_ON_TARGET: + case XClusterTableStatus.EXTRA_TABLE_ON_SOURCE: return ( {t(status)} diff --git a/managed/ui/src/components/xcluster/constants.ts b/managed/ui/src/components/xcluster/constants.ts index 06a02ead9fc0..c7c7a32587f7 100644 --- a/managed/ui/src/components/xcluster/constants.ts +++ b/managed/ui/src/components/xcluster/constants.ts @@ -40,7 +40,12 @@ export const XClusterTableStatus = { UNABLE_TO_FETCH: 'UnableToFetch', // DROPPPED - Client internal status. Does not exist on the backend. // Used to mark tables which are dropped on the source universe. - DROPPED: 'Dropped' + DROPPED: 'Dropped', + EXTRA_TABLE_ON_SOURCE: 'ExtraTableOnSource', + EXTRA_TABLE_ON_TARGET: 'DroppedFromSource', + DROPPED_FROM_SOURCE: 'DroppedFromSource', + DROPPED_FROM_TARGET: 'DroppedFromTarget', + REPLICATION_ERROR: 'ReplicationError' } as const; export type XClusterTableStatus = typeof XClusterTableStatus[keyof typeof XClusterTableStatus]; //------------------------------------------------------------------------------------ diff --git a/src/odyssey/sources/backend.c b/src/odyssey/sources/backend.c index b204f7856880..fac07f711f6a 100644 --- a/src/odyssey/sources/backend.c +++ b/src/odyssey/sources/backend.c @@ -19,6 +19,7 @@ void od_backend_close(od_server_t *server) assert(server->tls == NULL); server->is_transaction = 0; server->yb_sticky_connection = false; + server->reset_timeout = false; server->idle_time = 0; kiwi_key_init(&server->key); kiwi_key_init(&server->key_client); @@ -736,7 +737,8 @@ int od_backend_ready_wait(od_server_t *server, char *context, int count, "read error: %s", od_io_error(&server->io)); } - return -1; + /* return new status if timeout error */ + return -2; } kiwi_be_type_t type = *(char *)machine_msg_data(msg); od_debug(&instance->logger, context, server->client, server, diff --git a/src/odyssey/sources/reset.c b/src/odyssey/sources/reset.c index c783cd403822..b0e504570cfa 100644 --- a/src/odyssey/sources/reset.c +++ b/src/odyssey/sources/reset.c @@ -48,7 +48,7 @@ int od_reset(od_server_t *server) * * 3. Continue with (1) */ - int wait_timeout = 1000; + int wait_timeout = 5000; int wait_try = 0; int wait_try_cancel = 0; int wait_cancel_limit = 1; @@ -66,7 +66,8 @@ int od_reset(od_server_t *server) wait_try++; rc = od_backend_ready_wait(server, "reset", 1, wait_timeout); - if (rc == -1) + /* can be -1 or -2 */ + if (rc < 0) break; } if (rc == -1) { @@ -113,7 +114,7 @@ int od_reset(od_server_t *server) query_rlb, NULL, sizeof(query_rlb), wait_timeout, 1); - if (rc == -1) + if (rc < 0) goto error; assert(!server->is_transaction); } @@ -125,7 +126,7 @@ int od_reset(od_server_t *server) rc = od_backend_query(server, "reset-discard", query_discard, NULL, sizeof(query_discard), wait_timeout, 1); - if (rc == NOT_OK_RESPONSE) + if (rc < 0) goto error; } @@ -136,7 +137,7 @@ int od_reset(od_server_t *server) rc = od_backend_query(server, "reset-discard-smart", query_discard, NULL, sizeof(query_discard), wait_timeout, 1); - if (rc == NOT_OK_RESPONSE) + if (rc < 0) goto error; } @@ -147,6 +148,9 @@ int od_reset(od_server_t *server) NULL, sizeof(query_reset), wait_timeout, 1); if (rc == -1) goto error; + /* reset timeout */ + if (rc == -2) + server->reset_timeout = true; } /* ready */ diff --git a/src/odyssey/sources/router.c b/src/odyssey/sources/router.c index 485615ba930d..5855815c4dd1 100644 --- a/src/odyssey/sources/router.c +++ b/src/odyssey/sources/router.c @@ -725,8 +725,11 @@ void od_router_detach(od_router_t *router, od_client_t *client) * a. Creating TEMP TABLES. * b. Use of WITH HOLD CURSORS. * c. Client connection is a logical or physical replication connection + * d. It took too long to reset state on the server. */ - if (od_likely(!server->offline) && !server->yb_sticky_connection) { + if (od_likely(!server->offline) && + !server->yb_sticky_connection && + !server->reset_timeout) { od_instance_t *instance = server->global->instance; if (route->id.physical_rep || route->id.logical_rep) { od_debug(&instance->logger, "expire-replication", NULL, diff --git a/src/odyssey/sources/server.h b/src/odyssey/sources/server.h index f65123d6b6a4..1580720af92a 100644 --- a/src/odyssey/sources/server.h +++ b/src/odyssey/sources/server.h @@ -58,7 +58,9 @@ struct od_server { od_list_t link; + /* YB */ bool yb_sticky_connection; + bool reset_timeout; }; static const size_t OD_SERVER_DEFAULT_HASHMAP_SZ = 420; @@ -84,6 +86,7 @@ static inline void od_server_init(od_server_t *server, int reserve_prep_stmts) server->endpoint_selector = 0; od_stat_state_init(&server->stats_state); server->yb_sticky_connection = false; + server->reset_timeout = false; #ifdef USE_SCRAM od_scram_state_init(&server->scram_state); diff --git a/src/postgres/src/backend/storage/ipc/procarray.c b/src/postgres/src/backend/storage/ipc/procarray.c index aadfa7df6435..59bec5ca56c6 100644 --- a/src/postgres/src/backend/storage/ipc/procarray.c +++ b/src/postgres/src/backend/storage/ipc/procarray.c @@ -5289,16 +5289,12 @@ void YbStorePgAshSamples(TimestampTz sample_time) { int i; - int samples_stored = 0; + int samples_considered = 0; ProcArrayStruct *arrayP = procArray; LWLockAcquire(ProcArrayLock, LW_SHARED); - /* - * TODO: Add sampling logic to take random samples instead of - * the first 'N'. - */ for (i = 0; i < arrayP->numProcs; ++i) { int pgprocno = arrayP->pgprocnos[i]; @@ -5321,10 +5317,11 @@ YbStorePgAshSamples(TimestampTz sample_time) YbAshShouldIgnoreWaitEvent(proc->wait_event_info)) continue; - if (YbAshStoreSample(proc, arrayP->numProcs, sample_time, - &samples_stored) == 0) - break; + YbAshMaybeIncludeSample(proc, arrayP->numProcs, sample_time, + &samples_considered); } LWLockRelease(ProcArrayLock); + + YbAshFillSampleWeight(samples_considered); } diff --git a/src/postgres/src/backend/utils/misc/yb_ash.c b/src/postgres/src/backend/utils/misc/yb_ash.c index b1e0c737ba97..9f574025f07e 100644 --- a/src/postgres/src/backend/utils/misc/yb_ash.c +++ b/src/postgres/src/backend/utils/misc/yb_ash.c @@ -124,8 +124,10 @@ static void yb_ash_ProcessUtility(PlannedStmt *pstmt, const char *queryString, QueryCompletion *qc); static const unsigned char *get_yql_endpoint_tserver_uuid(); -static void copy_pgproc_sample_fields(PGPROC *proc); -static void copy_non_pgproc_sample_fields(float8 sample_weight, TimestampTz sample_time); +static void YbAshMaybeReplaceSample(PGPROC *proc, int num_procs, TimestampTz sample_time, + int samples_considered); +static void copy_pgproc_sample_fields(PGPROC *proc, int index); +static void copy_non_pgproc_sample_fields(TimestampTz sample_time, int index); static void YbAshIncrementCircularBufferIndex(void); static YBCAshSample *YbAshGetNextCircularBufferSlot(void); @@ -670,34 +672,52 @@ YbAshIncrementCircularBufferIndex(void) yb_ash->index = 0; } -/* - * Returns true if another sample should be stored in the circular buffer. - */ -bool -YbAshStoreSample(PGPROC *proc, int num_procs, TimestampTz sample_time, - int *samples_stored) +static void +YbAshMaybeReplaceSample(PGPROC *proc, int num_procs, TimestampTz sample_time, + int samples_considered) { + int random_index; + int replace_index; + + random_index = YBCGetRandomUniformInt(1, samples_considered); + + if (random_index > yb_ash_sample_size) + return; + /* - * If there are less samples available than the sample size, the sample - * weight must be 1. + * -1 because yb_ash->index points to where the next sample should + * be stored. */ - float8 sample_weight = Max(num_procs, yb_ash_sample_size) * 1.0 / yb_ash_sample_size; + replace_index = yb_ash->index - (yb_ash_sample_size - random_index) - 1; - copy_pgproc_sample_fields(proc); - copy_non_pgproc_sample_fields(sample_weight, sample_time); + if (replace_index < 0) + replace_index += yb_ash->max_entries; - YbAshIncrementCircularBufferIndex(); + YbAshStoreSample(proc, num_procs, sample_time, replace_index); +} - if (++(*samples_stored) == yb_ash_sample_size) - return false; +void +YbAshMaybeIncludeSample(PGPROC *proc, int num_procs, TimestampTz sample_time, + int *samples_considered) +{ + if (++(*samples_considered) <= yb_ash_sample_size) + YbAshStoreSample(proc, num_procs, sample_time, yb_ash->index); + else + YbAshMaybeReplaceSample(proc, num_procs, sample_time, *samples_considered); +} - return true; +void +YbAshStoreSample(PGPROC *proc, int num_procs, TimestampTz sample_time, int index) +{ + copy_pgproc_sample_fields(proc, index); + copy_non_pgproc_sample_fields(sample_time, index); + YbAshIncrementCircularBufferIndex(); } static void -copy_pgproc_sample_fields(PGPROC *proc) +copy_pgproc_sample_fields(PGPROC *proc, int index) { - YBCAshSample *cb_sample = &yb_ash->circular_buffer[yb_ash->index]; + YBCAshSample *cb_sample = &yb_ash->circular_buffer[index]; LWLockAcquire(&proc->yb_ash_metadata_lock, LW_SHARED); memcpy(&cb_sample->metadata, &proc->yb_ash_metadata, sizeof(YBCAshMetadata)); @@ -706,10 +726,11 @@ copy_pgproc_sample_fields(PGPROC *proc) cb_sample->encoded_wait_event_code = proc->wait_event_info; } +/* We don't fill the sample weight here. Check YbAshFillSampleWeight */ static void -copy_non_pgproc_sample_fields(float8 sample_weight, TimestampTz sample_time) +copy_non_pgproc_sample_fields(TimestampTz sample_time, int index) { - YBCAshSample *cb_sample = &yb_ash->circular_buffer[yb_ash->index]; + YBCAshSample *cb_sample = &yb_ash->circular_buffer[index]; /* yql_endpoint_tserver_uuid is constant for all PG samples */ if (get_yql_endpoint_tserver_uuid()) @@ -721,10 +742,34 @@ copy_non_pgproc_sample_fields(float8 sample_weight, TimestampTz sample_time) cb_sample->rpc_request_id = 0; /* TODO(asaha): Add aux info to circular buffer once it's available */ cb_sample->aux_info[0] = '\0'; - cb_sample->sample_weight = sample_weight; cb_sample->sample_time = sample_time; } +/* + * While inserting samples into the circular buffer, we don't know the actual + * number of samples considered. So after inserting all the samples, we go back + * and update the sample weight + */ +void +YbAshFillSampleWeight(int samples_considered) +{ + int samples_inserted; + float sample_weight; + int index; + + samples_inserted = Min(samples_considered, yb_ash_sample_size); + sample_weight = Max(samples_considered, yb_ash_sample_size) * 1.0 / yb_ash_sample_size; + index = yb_ash->index - 1; + + while (samples_inserted--) + { + if (index < 0) + index += yb_ash->max_entries; + + yb_ash->circular_buffer[index--].sample_weight = sample_weight; + } +} + /* * Returns a pointer to the circular buffer slot where the sample should be * inserted and increments the index. diff --git a/src/postgres/src/include/yb_ash.h b/src/postgres/src/include/yb_ash.h index 2240d6675b0b..3bda5ddcf4d8 100644 --- a/src/postgres/src/include/yb_ash.h +++ b/src/postgres/src/include/yb_ash.h @@ -54,9 +54,13 @@ extern void YbAshSetSessionId(uint64 session_id); extern void YbAshSetDatabaseId(Oid database_id); extern bool YbAshShouldIgnoreWaitEvent(uint32 wait_event_info); -extern bool YbAshStoreSample(PGPROC *proc, int num_procs, +extern void YbAshMaybeIncludeSample(PGPROC *proc, int num_procs, + TimestampTz sample_time, + int *samples_considered); +extern void YbAshStoreSample(PGPROC *proc, int num_procs, TimestampTz sample_time, - int *samples_stored); + int index); +extern void YbAshFillSampleWeight(int samples_considered); extern bool yb_enable_ash_check_hook(bool *newval, void **extra, diff --git a/src/yb/ash/wait_state.cc b/src/yb/ash/wait_state.cc index 2b2cd56734fe..9a493c91ff31 100644 --- a/src/yb/ash/wait_state.cc +++ b/src/yb/ash/wait_state.cc @@ -53,6 +53,16 @@ DEFINE_RUNTIME_PG_PREVIEW_FLAG(bool, yb_enable_ash, false, "and various background activities. This does nothing if " "ysql_yb_enable_ash_infra is disabled."); +DEFINE_NON_RUNTIME_PG_FLAG(int32, yb_ash_circular_buffer_size, 16 * 1024, + "Size (in KiBs) of ASH circular buffer that stores the samples"); + +DEFINE_RUNTIME_PG_FLAG(int32, yb_ash_sampling_interval_ms, 1000, + "Time (in milliseconds) between two consecutive sampling events"); +DEPRECATE_FLAG(int32, ysql_yb_ash_sampling_interval, "2024_03"); + +DEFINE_RUNTIME_PG_FLAG(int32, yb_ash_sample_size, 500, + "Number of samples captured from each component per sampling event"); + DEFINE_test_flag(bool, export_wait_state_names, yb::kIsDebug, "Exports wait-state name as a human understandable string."); DEFINE_test_flag(bool, trace_ash_wait_code_updates, yb::kIsDebug, diff --git a/src/yb/cdc/CMakeLists.txt b/src/yb/cdc/CMakeLists.txt index b8e5e89474f5..c762a1ebbcea 100644 --- a/src/yb/cdc/CMakeLists.txt +++ b/src/yb/cdc/CMakeLists.txt @@ -89,7 +89,6 @@ ADD_YB_LIBRARY(xcluster_producer_proto set(CDC_UTIL_SRCS cdc_util.cc xcluster_types.cc - xcluster_util.cc ) ADD_YB_LIBRARY( diff --git a/src/yb/cdc/cdc_service.cc b/src/yb/cdc/cdc_service.cc index 23941b49d146..9be085ce3712 100644 --- a/src/yb/cdc/cdc_service.cc +++ b/src/yb/cdc/cdc_service.cc @@ -96,9 +96,7 @@ using std::vector; constexpr uint32_t kUpdateIntervalMs = 15 * 1000; -DEFINE_NON_RUNTIME_int32(cdc_read_rpc_timeout_ms, 30 * 1000, - "Timeout used for CDC read rpc calls. Reads normally occur cross-cluster."); -TAG_FLAG(cdc_read_rpc_timeout_ms, advanced); +DECLARE_int32(cdc_read_rpc_timeout_ms); DEFINE_NON_RUNTIME_int32(cdc_write_rpc_timeout_ms, 30 * 1000, "Timeout used for CDC write rpc calls. Writes normally occur intra-cluster."); @@ -109,12 +107,6 @@ DEPRECATE_FLAG(int32, cdc_ybclient_reactor_threads, "09_2023"); DEFINE_RUNTIME_int32(cdc_state_checkpoint_update_interval_ms, kUpdateIntervalMs, "Rate at which CDC state's checkpoint is updated."); -DEFINE_NON_RUNTIME_string(certs_for_cdc_dir, "", - "The parent directory of where all certificates for xCluster producer universes will " - "be stored, for when the producer and consumer clusters use different certificates. " - "Place the certificates for each producer cluster in " - "//*."); - DEFINE_RUNTIME_int32(update_min_cdc_indices_interval_secs, 60, "How often to read cdc_state table to get the minimum applied index for each tablet " "across all streams. This information is used to correctly keep log files that " diff --git a/src/yb/client/CMakeLists.txt b/src/yb/client/CMakeLists.txt index 04b32b439fc7..b306a8337a56 100644 --- a/src/yb/client/CMakeLists.txt +++ b/src/yb/client/CMakeLists.txt @@ -77,12 +77,12 @@ set(CLIENT_LIBS tserver_proto tserver_service_proto pg_auto_analyze_service_proto + gutil test_echo_service_proto tserver_util yb_ql_expr yb_ash yb_util - gutil yrpc yb_dockv) @@ -96,6 +96,19 @@ ADD_YB_LIBRARY(yb_client SRCS ${CLIENT_SRCS} DEPS ${CLIENT_LIBS}) +set(CLIENT_MOCK_SRCS + xcluster_client_mock.cc +) + +set(CLIENT_MOCK_LIBS + yb_client + gmock + gtest) + +ADD_YB_LIBRARY(yb_client_mock + SRCS ${CLIENT_MOCK_SRCS} + DEPS ${CLIENT_MOCK_LIBS}) + if(NOT APPLE) # Localize thirdparty symbols using a linker version script. This hides them # from the client application. The OS X linker does not support the diff --git a/src/yb/client/client-internal.cc b/src/yb/client/client-internal.cc index f100fdb7235e..aa9da83dd210 100644 --- a/src/yb/client/client-internal.cc +++ b/src/yb/client/client-internal.cc @@ -1151,7 +1151,7 @@ Status YBClient::Data::IsCloneNamespaceInProgress( } auto state = resp.entries(0).aggregate_state(); *create_in_progress = - !(state == master::SysCloneStatePB::ABORTED || state == master::SysCloneStatePB::RESTORED); + !(state == master::SysCloneStatePB::ABORTED || state == master::SysCloneStatePB::COMPLETE); if (state == master::SysCloneStatePB_State_ABORTED) { return STATUS_FORMAT( diff --git a/src/yb/client/client.cc b/src/yb/client/client.cc index 1edaf1d74028..39811a5bebce 100644 --- a/src/yb/client/client.cc +++ b/src/yb/client/client.cc @@ -2346,7 +2346,7 @@ std::pair YBClient::NextRequestIdAndMinR return std::make_pair(id, *requests.running_requests.begin()); } -void YBClient::AddMetaCacheInfo(JsonWriter* writer) { +void YBClient::AddMetaCacheInfo(JsonWriter* writer) const { data_->meta_cache_->AddAllTabletInfo(writer); } diff --git a/src/yb/client/client.h b/src/yb/client/client.h index 3e01b3576d83..d3585cb7d353 100644 --- a/src/yb/client/client.h +++ b/src/yb/client/client.h @@ -313,7 +313,7 @@ class YBClientBuilder { // This class is thread-safe. class YBClient { public: - ~YBClient(); + virtual ~YBClient(); std::unique_ptr NewTableCreator(); @@ -1007,7 +1007,7 @@ class YBClient { std::pair NextRequestIdAndMinRunningRequestId(); - void AddMetaCacheInfo(JsonWriter* writer); + void AddMetaCacheInfo(JsonWriter* writer) const; void RequestsFinished(const RetryableRequestIdRange& request_id_range); @@ -1032,6 +1032,7 @@ class YBClient { private: class Data; + friend class MockYBClient; friend class YBClientBuilder; friend class YBNoOp; friend class YBTable; @@ -1051,7 +1052,7 @@ class YBClient { friend class internal::ClientMasterRpcBase; friend class PlacementInfoTest; friend class XClusterClient; - friend class XClusterRemoteClient; + friend class XClusterRemoteClientHolder; FRIEND_TEST(ClientTest, TestGetTabletServerBlacklist); FRIEND_TEST(ClientTest, TestMasterDown); @@ -1095,6 +1096,15 @@ class YBClient { DISALLOW_COPY_AND_ASSIGN(YBClient); }; +// A mock YBClient that can be used for testing. +// Currently it only allows us to create a MockYBClient object , and does not mock any member +// functions. +class MockYBClient : public YBClient { + public: + MockYBClient() = default; + virtual ~MockYBClient() = default; +}; + Result GetTableId(YBClient* client, const YBTableName& table_name); } // namespace client diff --git a/src/yb/client/clone_namespace-test.cc b/src/yb/client/clone_namespace-test.cc index 497896f47dfc..7b3e95999b0e 100644 --- a/src/yb/client/clone_namespace-test.cc +++ b/src/yb/client/clone_namespace-test.cc @@ -71,7 +71,7 @@ class CloneNamespaceTest : public SnapshotScheduleTest { Format("Expected 1 clone entry, got $0", done_resp.entries_size())); auto state = done_resp.entries(0).aggregate_state(); return state == master::SysCloneStatePB::ABORTED || - state == master::SysCloneStatePB::RESTORED; + state == master::SysCloneStatePB::COMPLETE; }, 60s, "Wait for clone to finish")); return Status::OK(); } diff --git a/src/yb/client/meta_cache.cc b/src/yb/client/meta_cache.cc index 808a8f613e02..d6389807a1b9 100644 --- a/src/yb/client/meta_cache.cc +++ b/src/yb/client/meta_cache.cc @@ -1370,7 +1370,7 @@ void MetaCache::InvalidateTableCache(const YBTable& table) { } } -void MetaCache::AddAllTabletInfo(JsonWriter* writer) { +void MetaCache::AddAllTabletInfo(JsonWriter* writer) const { SharedLock lock(mutex_); writer->StartObject(); writer->String("tablets"); diff --git a/src/yb/client/meta_cache.h b/src/yb/client/meta_cache.h index 8e6246176a38..23ef9373a4fb 100644 --- a/src/yb/client/meta_cache.h +++ b/src/yb/client/meta_cache.h @@ -621,7 +621,7 @@ class MetaCache : public RefCountedThreadSafe { void InvalidateTableCache(const YBTable& table); - void AddAllTabletInfo(JsonWriter* writer); + void AddAllTabletInfo(JsonWriter* writer) const; const std::string& LogPrefix() const { return log_prefix_; } @@ -736,7 +736,7 @@ class MetaCache : public RefCountedThreadSafe { YBClient* const client_; - std::shared_timed_mutex mutex_; + mutable std::shared_timed_mutex mutex_; // Cache of Tablet Server locations: TS UUID -> RemoteTabletServer*. // diff --git a/src/yb/client/xcluster_client.cc b/src/yb/client/xcluster_client.cc index df359516d597..c7d7759f29b0 100644 --- a/src/yb/client/xcluster_client.cc +++ b/src/yb/client/xcluster_client.cc @@ -14,6 +14,7 @@ #include "yb/client/xcluster_client.h" #include "yb/cdc/cdc_service.pb.h" +#include "yb/common/xcluster_util.h" #include "yb/client/client.h" #include "yb/client/client-internal.h" #include "yb/master/master_defaults.h" @@ -26,6 +27,8 @@ #include "yb/util/path_util.h" DECLARE_bool(use_node_to_node_encryption); +DECLARE_string(certs_for_cdc_dir); +DECLARE_int32(cdc_read_rpc_timeout_ms); #define CALL_SYNC_LEADER_MASTER_RPC(method, req) \ VERIFY_RESULT(SyncLeaderMasterRpc( \ @@ -33,6 +36,113 @@ DECLARE_bool(use_node_to_node_encryption); BOOST_PP_STRINGIZE(method), &master::MasterReplicationProxy::BOOST_PP_CAT(method, Async))) namespace yb::client { + +// XClusterRemoteClientHolder + +XClusterRemoteClientHolder::XClusterRemoteClientHolder( + const xcluster::ReplicationGroupId& replication_group_id) + : replication_group_id_(xcluster::GetOriginalReplicationGroupId(replication_group_id)) {} + +XClusterRemoteClientHolder::~XClusterRemoteClientHolder() { Shutdown(); } + +void XClusterRemoteClientHolder::Shutdown() { + if (yb_client_) { + yb_client_->Shutdown(); + } + if (messenger_) { + messenger_->Shutdown(); + } +} + +Status XClusterRemoteClientHolder::Init(const std::vector& remote_masters) { + SCHECK(!remote_masters.empty(), InvalidArgument, "No master addresses provided"); + const auto master_addrs = HostPort::ToCommaSeparatedString(remote_masters); + + rpc::MessengerBuilder messenger_builder("xcluster-remote"); + std::string certs_dir; + + if (FLAGS_use_node_to_node_encryption) { + if (!FLAGS_certs_for_cdc_dir.empty()) { + certs_dir = JoinPathSegments(FLAGS_certs_for_cdc_dir, replication_group_id_.ToString()); + } + secure_context_ = VERIFY_RESULT(rpc::SetupSecureContext( + certs_dir, /*root_dir=*/"", /*name=*/"", rpc::SecureContextType::kInternal, + &messenger_builder)); + } + messenger_ = VERIFY_RESULT(messenger_builder.Build()); + + yb_client_ = VERIFY_RESULT(YBClientBuilder() + .set_client_name(kClientName) + .add_master_server_addr(master_addrs) + .default_admin_operation_timeout( + MonoDelta::FromMilliseconds(FLAGS_cdc_read_rpc_timeout_ms)) + .Build(messenger_.get())); + xcluster_client_ = std::make_unique(*yb_client_); + + return Status::OK(); +} + +Result> XClusterRemoteClientHolder::Create( + const xcluster::ReplicationGroupId& replication_group_id, + const std::vector& remote_masters) { + auto client = std::shared_ptr( + new XClusterRemoteClientHolder(replication_group_id)); + RETURN_NOT_OK(client->Init(remote_masters)); + return client; +} + +Status XClusterRemoteClientHolder::SetMasterAddresses(const std::vector& remote_masters) { + return yb_client_->SetMasterAddresses(HostPort::ToCommaSeparatedString(remote_masters)); +} + +Status XClusterRemoteClientHolder::ReloadCertificates() { + if (!secure_context_) { + return Status::OK(); + } + + std::string cert_dir; + if (!FLAGS_certs_for_cdc_dir.empty()) { + cert_dir = JoinPathSegments(FLAGS_certs_for_cdc_dir, replication_group_id_.ToString()); + } + + return rpc::ReloadSecureContextKeysAndCertificates( + secure_context_.get(), cert_dir, "" /* node_name */); +} + +XClusterClient& XClusterRemoteClientHolder::GetXClusterClient() { + CHECK_NOTNULL(xcluster_client_); + return *xcluster_client_.get(); +} + +client::YBClient& XClusterRemoteClientHolder::GetYbClient() { + CHECK_NOTNULL(yb_client_); + return *yb_client_; +} + +google::protobuf::RepeatedPtrField GetXClusterStreamOptions() { + google::protobuf::RepeatedPtrField<::yb::master::CDCStreamOptionsPB> options; + options.Reserve(4); + auto source_type = options.Add(); + source_type->set_key(cdc::kSourceType); + source_type->set_value(CDCRequestSource_Name(cdc::CDCRequestSource::XCLUSTER)); + + auto record_type = options.Add(); + record_type->set_key(cdc::kRecordType); + record_type->set_value(CDCRecordType_Name(cdc::CDCRecordType::CHANGE)); + + auto record_format = options.Add(); + record_format->set_key(cdc::kRecordFormat); + record_format->set_value(CDCRecordFormat_Name(cdc::CDCRecordFormat::WAL)); + + auto checkpoint_type = options.Add(); + checkpoint_type->set_key(cdc::kCheckpointType); + checkpoint_type->set_value(CDCCheckpointType_Name(cdc::CDCCheckpointType::IMPLICIT)); + + return options; +} + +// XClusterClient + XClusterClient::XClusterClient(client::YBClient& yb_client) : yb_client_(yb_client) {} CoarseTimePoint XClusterClient::GetDeadline() const { @@ -366,7 +476,7 @@ void XClusterClient::CreateXClusterStreamAsync( const TableId& table_id, bool active, cdc::StreamModeTransactional transactional, CreateCDCStreamCallback callback) { yb_client_.data_->CreateXClusterStream( - &yb_client_, table_id, GetXClusterStreamOptions(), + &yb_client_, table_id, client::GetXClusterStreamOptions(), (active ? master::SysCDCStreamEntryPB::ACTIVE : master::SysCDCStreamEntryPB::INITIATED), transactional, GetDeadline(), std::move(callback)); } @@ -458,61 +568,7 @@ XClusterClient::GetUniverseReplicationInfo( return result; } -// XClusterRemoteClient - -XClusterRemoteClient::XClusterRemoteClient(const std::string& certs_for_cdc_dir, MonoDelta timeout) - : certs_for_cdc_dir_(certs_for_cdc_dir), timeout_(timeout) {} - -XClusterRemoteClient::~XClusterRemoteClient() { - if (messenger_) { - messenger_->Shutdown(); - } -} - -Status XClusterRemoteClient::Init( - const xcluster::ReplicationGroupId& replication_group_id, - const std::vector& remote_masters) { - SCHECK(!remote_masters.empty(), InvalidArgument, "No master addresses provided"); - const auto master_addrs = HostPort::ToCommaSeparatedString(remote_masters); - - rpc::MessengerBuilder messenger_builder("xcluster-remote"); - std::string certs_dir; - - if (FLAGS_use_node_to_node_encryption) { - if (!certs_for_cdc_dir_.empty()) { - certs_dir = JoinPathSegments(certs_for_cdc_dir_, replication_group_id.ToString()); - } - secure_context_ = VERIFY_RESULT(rpc::SetupSecureContext( - certs_dir, /*root_dir=*/"", /*name=*/"", rpc::SecureContextType::kInternal, - &messenger_builder)); - } - messenger_ = VERIFY_RESULT(messenger_builder.Build()); - - yb_client_ = VERIFY_RESULT(YBClientBuilder() - .add_master_server_addr(master_addrs) - .default_admin_operation_timeout(timeout_) - .Build(messenger_.get())); - xcluster_client_ = std::make_unique(*yb_client_); - - return Status::OK(); -} - -XClusterClient* XClusterRemoteClient::GetXClusterClient() { - CHECK_NOTNULL(xcluster_client_); - return xcluster_client_.get(); -} - -template -Result XClusterRemoteClient::SyncLeaderMasterRpc( - const RequestPB& req, const char* method_name, const Method& method) { - ResponsePB resp; - RETURN_NOT_OK(yb_client_->data_->SyncLeaderMasterRpc( - CoarseMonoClock::Now() + yb_client_->default_admin_operation_timeout(), req, &resp, - method_name, method)); - return resp; -} - -Result XClusterRemoteClient::SetupDbScopedUniverseReplication( +Result XClusterClient::SetupDbScopedUniverseReplication( const xcluster::ReplicationGroupId& replication_group_id, const std::vector& source_master_addresses, const std::vector& namespace_names, @@ -559,7 +615,7 @@ Result XClusterRemoteClient::SetupDbScopedUniverseReplication( return UniverseUuid::FromString(resp.universe_uuid()); } -Result XClusterRemoteClient::IsSetupUniverseReplicationDone( +Result XClusterClient::IsSetupUniverseReplicationDone( const xcluster::ReplicationGroupId& replication_group_id) { master::IsSetupUniverseReplicationDoneRequestPB req; req.set_replication_group_id(replication_group_id.ToString()); @@ -610,35 +666,32 @@ GetXClusterStreamsCallback CreateXClusterStreamsCallback(BootstrapProducerCallba }; } -Status XClusterRemoteClient::GetXClusterTableCheckpointInfos( +Status XClusterClient::GetXClusterTableCheckpointInfos( const xcluster::ReplicationGroupId& replication_group_id, const NamespaceId& namespace_id, const std::vector& table_names, const std::vector& pg_schema_names, BootstrapProducerCallback user_callback) { auto callback = CreateXClusterStreamsCallback(user_callback); - RETURN_NOT_OK(XClusterClient(*yb_client_) - .GetXClusterStreams( - CoarseMonoClock::Now() + yb_client_->default_admin_operation_timeout(), - replication_group_id, namespace_id, table_names, pg_schema_names, - std::move(callback))); + RETURN_NOT_OK(GetXClusterStreams( + CoarseMonoClock::Now() + yb_client_.default_admin_operation_timeout(), replication_group_id, + namespace_id, table_names, pg_schema_names, std::move(callback))); return Status::OK(); } -Status XClusterRemoteClient::GetXClusterTableCheckpointInfos( +Status XClusterClient::GetXClusterTableCheckpointInfos( const xcluster::ReplicationGroupId& replication_group_id, const NamespaceId& namespace_id, const std::vector& table_ids, BootstrapProducerCallback user_callback) { auto callback = CreateXClusterStreamsCallback(user_callback); - RETURN_NOT_OK(XClusterClient(*yb_client_) - .GetXClusterStreams( - CoarseMonoClock::Now() + yb_client_->default_admin_operation_timeout(), - replication_group_id, namespace_id, table_ids, std::move(callback))); + RETURN_NOT_OK(GetXClusterStreams( + CoarseMonoClock::Now() + yb_client_.default_admin_operation_timeout(), replication_group_id, + namespace_id, table_ids, std::move(callback))); return Status::OK(); } -Status XClusterRemoteClient::AddNamespaceToDbScopedUniverseReplication( +Status XClusterClient::AddNamespaceToDbScopedUniverseReplication( const xcluster::ReplicationGroupId& replication_group_id, const UniverseUuid& target_universe_uuid, const NamespaceName& namespace_name, const NamespaceId& source_namespace_id, const std::vector& source_table_ids, @@ -674,26 +727,4 @@ Status XClusterRemoteClient::AddNamespaceToDbScopedUniverseReplication( return Status::OK(); } -google::protobuf::RepeatedPtrField GetXClusterStreamOptions() { - google::protobuf::RepeatedPtrField<::yb::master::CDCStreamOptionsPB> options; - options.Reserve(4); - auto source_type = options.Add(); - source_type->set_key(cdc::kSourceType); - source_type->set_value(CDCRequestSource_Name(cdc::CDCRequestSource::XCLUSTER)); - - auto record_type = options.Add(); - record_type->set_key(cdc::kRecordType); - record_type->set_value(CDCRecordType_Name(cdc::CDCRecordType::CHANGE)); - - auto record_format = options.Add(); - record_format->set_key(cdc::kRecordFormat); - record_format->set_value(CDCRecordFormat_Name(cdc::CDCRecordFormat::WAL)); - - auto checkpoint_type = options.Add(); - checkpoint_type->set_key(cdc::kCheckpointType); - checkpoint_type->set_value(CDCCheckpointType_Name(cdc::CDCCheckpointType::IMPLICIT)); - - return options; -} - } // namespace yb::client diff --git a/src/yb/client/xcluster_client.h b/src/yb/client/xcluster_client.h index d3985e8a667a..3d85c7d11871 100644 --- a/src/yb/client/xcluster_client.h +++ b/src/yb/client/xcluster_client.h @@ -44,11 +44,49 @@ class SecureContext; namespace client { class YBClient; +class XClusterClient; using GetXClusterStreamsCallback = std::function)>; using IsXClusterBootstrapRequiredCallback = std::function)>; +// This class creates and holds a dedicated YbClient, XClusterClient and their dependant objects +// messenger and secure context. The client connects to a remote yb xCluster universe. +class XClusterRemoteClientHolder { + public: + static constexpr auto kClientName = "XClusterRemote"; + + static Result> Create( + const xcluster::ReplicationGroupId& replication_group_id, + const std::vector& remote_masters); + + virtual ~XClusterRemoteClientHolder(); + + virtual void Shutdown(); + + Status SetMasterAddresses(const std::vector& remote_masters); + + Status ReloadCertificates(); + + XClusterClient& GetXClusterClient(); + client::YBClient& GetYbClient(); + + private: + friend class MockXClusterRemoteClientHolder; + + explicit XClusterRemoteClientHolder(const xcluster::ReplicationGroupId& replication_group_id); + Status Init(const std::vector& remote_masters); + + const xcluster::ReplicationGroupId replication_group_id_; + std::unique_ptr secure_context_; + std::unique_ptr messenger_; + + std::unique_ptr yb_client_; + std::unique_ptr xcluster_client_; + + DISALLOW_COPY_AND_ASSIGN(XClusterRemoteClientHolder); +}; + // A wrapper over YBClient to handle xCluster related RPCs. // This class performs serialization of C++ objects to PBs and vice versa. class XClusterClient { @@ -170,30 +208,6 @@ class XClusterClient { Result GetUniverseReplicationInfo( const xcluster::ReplicationGroupId& replication_group_id); - private: - CoarseTimePoint GetDeadline() const; - - template - Result SyncLeaderMasterRpc( - const RequestPB& req, const char* method_name, const Method& method); - - client::YBClient& yb_client_; -}; - -// A wrapper over YBClient to handle xCluster related RPCs sent to a different yb universe. -// This class performs serialization of C++ objects to PBs and vice versa. -class XClusterRemoteClient { - public: - XClusterRemoteClient(const std::string& certs_for_cdc_dir, MonoDelta timeout); - virtual ~XClusterRemoteClient(); - - virtual Status Init( - const xcluster::ReplicationGroupId& replication_group_id, - const std::vector& remote_masters); - - XClusterClient* operator->() {return GetXClusterClient();} - XClusterClient* GetXClusterClient(); - // This requires flag enable_xcluster_api_v2 to be set. virtual Result SetupDbScopedUniverseReplication( const xcluster::ReplicationGroupId& replication_group_id, @@ -222,21 +236,16 @@ class XClusterRemoteClient { const std::vector& bootstrap_ids); private: + CoarseTimePoint GetDeadline() const; + template Result SyncLeaderMasterRpc( const RequestPB& req, const char* method_name, const Method& method); - const std::string certs_for_cdc_dir_; - const MonoDelta timeout_; - std::unique_ptr secure_context_; - std::unique_ptr messenger_; - - std::unique_ptr yb_client_; - std::unique_ptr xcluster_client_; + client::YBClient& yb_client_; }; -// TODO: Move xcluster_util to common and this into it. -google::protobuf::RepeatedPtrField GetXClusterStreamOptions(); +google::protobuf::RepeatedPtrField GetXClusterStreamOptions(); } // namespace client } // namespace yb diff --git a/src/yb/client/xcluster_client_mock.cc b/src/yb/client/xcluster_client_mock.cc new file mode 100644 index 000000000000..9a2541ea361a --- /dev/null +++ b/src/yb/client/xcluster_client_mock.cc @@ -0,0 +1,34 @@ +// Copyright (c) YugabyteDB, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#include "yb/client/xcluster_client_mock.h" +#include "yb/client/client.h" + +namespace yb::client { + +const xcluster::ReplicationGroupId kDummyReplicationGroupId = + xcluster::ReplicationGroupId("dummy-replication-group-id"); + +MockXClusterRemoteClientHolder::MockXClusterRemoteClientHolder() + : XClusterRemoteClientHolder(kDummyReplicationGroupId) { + yb_client_ = std::make_unique(); + xcluster_client_ = std::make_unique(*yb_client_); +} + +MockXClusterRemoteClientHolder::~MockXClusterRemoteClientHolder() {} + +MockXClusterClient::MockXClusterClient(YBClient& yb_client) : XClusterClient(yb_client) {} + +MockXClusterClient::~MockXClusterClient() {} + +} // namespace yb::client diff --git a/src/yb/client/xcluster_client_mock.h b/src/yb/client/xcluster_client_mock.h new file mode 100644 index 000000000000..7a269ca538f7 --- /dev/null +++ b/src/yb/client/xcluster_client_mock.h @@ -0,0 +1,62 @@ +// Copyright (c) YugabyteDB, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#pragma once + +#include + +#include "yb/client/xcluster_client.h" +#include "yb/util/is_operation_done_result.h" + +namespace yb::client { + +class MockXClusterClient : public XClusterClient { + public: + explicit MockXClusterClient(YBClient& yb_client); + + virtual ~MockXClusterClient(); + + MOCK_METHOD( + Result, SetupDbScopedUniverseReplication, + (const xcluster::ReplicationGroupId&, const std::vector&, + const std::vector&, const std::vector&, + const std::vector&, const std::vector&), + (override)); + + MOCK_METHOD( + Result, IsSetupUniverseReplicationDone, + (const xcluster::ReplicationGroupId&), (override)); + + MOCK_METHOD( + Status, AddNamespaceToDbScopedUniverseReplication, + (const xcluster::ReplicationGroupId& replication_group_id, + const UniverseUuid& target_universe_uuid, const NamespaceName& namespace_name, + const NamespaceId& source_namespace_id, const std::vector& source_table_ids, + const std::vector& bootstrap_ids), + (override)); +}; + +class MockXClusterRemoteClientHolder : public XClusterRemoteClientHolder { + public: + MockXClusterRemoteClientHolder(); + + virtual ~MockXClusterRemoteClientHolder(); + + MockXClusterClient& GetMockXClusterClient() { + return *static_cast(xcluster_client_.get()); + } + + MOCK_METHOD(void, Shutdown, (), (override)); +}; + +} // namespace yb::client diff --git a/src/yb/common/CMakeLists.txt b/src/yb/common/CMakeLists.txt index 79f0cfc6d75a..d0b5d04437d7 100644 --- a/src/yb/common/CMakeLists.txt +++ b/src/yb/common/CMakeLists.txt @@ -105,6 +105,7 @@ set(COMMON_SRCS transaction_error.cc wire_protocol.cc llvm_profile_dumper.cc + xcluster_util.cc ) set(COMMON_LIBS diff --git a/src/yb/common/common_flags.cc b/src/yb/common/common_flags.cc index 5cbffdd6ed55..738492ffd38d 100644 --- a/src/yb/common/common_flags.cc +++ b/src/yb/common/common_flags.cc @@ -193,6 +193,16 @@ DEFINE_NON_RUNTIME_PREVIEW_bool(enable_pg_cron, false, "Enables the pg_cron extension. Jobs will be run on a single tserver node. The node should be " "assumed to be selected randomly."); +DEFINE_NON_RUNTIME_string(certs_for_cdc_dir, "", + "The parent directory of where all certificates for xCluster source universes will " + "be stored, for when the source and target universes use different certificates. " + "Place the certificates for each source universe in " + "//*."); + +DEFINE_NON_RUNTIME_int32(cdc_read_rpc_timeout_ms, 30 * 1000, + "Timeout used for CDC read rpc calls. Reads normally occur cross-cluster."); +TAG_FLAG(cdc_read_rpc_timeout_ms, advanced); + namespace yb { void InitCommonFlags() { diff --git a/src/yb/cdc/xcluster_util.cc b/src/yb/common/xcluster_util.cc similarity index 97% rename from src/yb/cdc/xcluster_util.cc rename to src/yb/common/xcluster_util.cc index 4fa22a316371..3112d2364794 100644 --- a/src/yb/cdc/xcluster_util.cc +++ b/src/yb/common/xcluster_util.cc @@ -11,7 +11,8 @@ // under the License. // -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" + #include "yb/gutil/strings/util.h" namespace yb::xcluster { @@ -42,4 +43,5 @@ std::string ShortReplicationType(XClusterReplicationType type) { XClusterReplicationType_Name(type), "XCLUSTER_", "", /*replace_all=*/false); } + } // namespace yb::xcluster diff --git a/src/yb/cdc/xcluster_util.h b/src/yb/common/xcluster_util.h similarity index 100% rename from src/yb/cdc/xcluster_util.h rename to src/yb/common/xcluster_util.h diff --git a/src/yb/integration-tests/minicluster-snapshot-test.cc b/src/yb/integration-tests/minicluster-snapshot-test.cc index 7dce7f000c25..e24cce974ff0 100644 --- a/src/yb/integration-tests/minicluster-snapshot-test.cc +++ b/src/yb/integration-tests/minicluster-snapshot-test.cc @@ -81,6 +81,7 @@ DECLARE_int32(pgsql_proxy_webserver_port); DECLARE_uint64(snapshot_coordinator_poll_interval_ms); DECLARE_string(ysql_hba_conf_csv); DECLARE_bool(TEST_fail_clone_pg_schema); +DECLARE_bool(TEST_fail_clone_tablets); DECLARE_string(TEST_mini_cluster_pg_host_port); namespace yb { @@ -715,5 +716,20 @@ TEST_F(PgCloneTest, YB_DISABLE_TEST_IN_SANITIZERS(UserIsSet)) { ASSERT_EQ(owner[0], "test_user"); } +TEST_F(PgCloneTest, YB_DISABLE_TEST_IN_SANITIZERS(PreventConnectionsUntilCloneSuccessful)) { + // Test that we prevent connections to the target DB until the clone operation is successful. + ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_fail_clone_tablets) = true; + auto status = source_conn_->ExecuteFormat( + "CREATE DATABASE $0 TEMPLATE $1", kTargetNamespaceName1, kSourceNamespaceName); + ASSERT_NOK(status); + ASSERT_STR_CONTAINS(status.message().ToBuffer(), "fail_clone_tablets"); + + auto result = ConnectToDB(kTargetNamespaceName1, 3 /* connection timeout */); + ASSERT_NOK(result); + ASSERT_STR_CONTAINS( + result.status().message().ToBuffer(), + Format("database \"$0\" is not currently accepting connections", kTargetNamespaceName1)); +} + } // namespace master } // namespace yb diff --git a/src/yb/integration-tests/wait_states-itest.cc b/src/yb/integration-tests/wait_states-itest.cc index 91e286d3260b..c86d3a4c1871 100644 --- a/src/yb/integration-tests/wait_states-itest.cc +++ b/src/yb/integration-tests/wait_states-itest.cc @@ -49,6 +49,7 @@ using namespace std::literals; DECLARE_bool(ysql_yb_ash_enable_infra); DECLARE_bool(ysql_yb_enable_ash); +DECLARE_int32(ysql_yb_ash_sample_size); DECLARE_bool(allow_index_table_read_write); DECLARE_int32(client_read_write_timeout_ms); @@ -329,8 +330,7 @@ void WaitStateTestCheckMethodCounts::UpdateCounts( std::lock_guard lock(mutex_); VLOG(1) << "Received " << resp.ShortDebugString(); for (auto& container : - {resp.tserver_wait_states(), resp.cql_wait_states(), resp.flush_and_compaction_wait_states(), - resp.raft_log_appender_wait_states()}) { + {resp.tserver_wait_states(), resp.cql_wait_states()}) { for (auto& entry : container.wait_states()) { VLOG(2) << "Entry " << ++idx << " : " << yb::ToString(entry); const auto& method = @@ -363,6 +363,7 @@ void WaitStateTestCheckMethodCounts::DoAshCalls(std::atomic& stop) { req.set_fetch_flush_and_compaction_states(true); req.set_fetch_raft_log_appender_states(true); req.set_fetch_cql_states(true); + req.set_sample_size(FLAGS_ysql_yb_ash_sample_size); tserver::PgActiveSessionHistoryResponsePB resp; rpc::RpcController controller; while (!stop) { diff --git a/src/yb/integration-tests/xcluster/xcluster-test.cc b/src/yb/integration-tests/xcluster/xcluster-test.cc index e2edadd47200..bc96e352b8aa 100644 --- a/src/yb/integration-tests/xcluster/xcluster-test.cc +++ b/src/yb/integration-tests/xcluster/xcluster-test.cc @@ -22,7 +22,8 @@ #include "yb/cdc/cdc_service.pb.h" #include "yb/cdc/cdc_service.proxy.h" #include "yb/cdc/cdc_state_table.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/client/xcluster_client.h" +#include "yb/common/xcluster_util.h" #include "yb/cdc/xrepl_stream_metadata.h" #include "yb/client/client-test-util.h" @@ -3310,25 +3311,28 @@ Status VerifyMetaCacheObjectIsValid( return json_reader.ExtractObjectArray(meta_cache, "tablets", &tablets); } -void VerifyMetaCacheWithXClusterConsumerSetUp(const std::string& produced_json) { +Status VerifyMetaCacheWithXClusterConsumerSetUp(const std::string& produced_json) { JsonReader json_reader(produced_json); - EXPECT_OK(json_reader.Init()); + RETURN_NOT_OK(json_reader.Init()); const rapidjson::Value* object = nullptr; - EXPECT_OK(json_reader.ExtractObject(json_reader.root(), nullptr, &object)); - EXPECT_EQ(rapidjson::kObjectType, CHECK_NOTNULL(object)->GetType()); + RETURN_NOT_OK(json_reader.ExtractObject(json_reader.root(), nullptr, &object)); + SCHECK_EQ( + CHECK_NOTNULL(object)->GetType(), rapidjson::kObjectType, IllegalState, "Not an JSON object"); - EXPECT_OK(VerifyMetaCacheObjectIsValid(object, json_reader, "MainMetaCache")); + RETURN_NOT_OK(VerifyMetaCacheObjectIsValid(object, json_reader, "MainMetaCache")); bool found_xcluster_member = false; - for (auto it = object->MemberBegin(); it != object->MemberEnd(); - ++it) { + for (auto it = object->MemberBegin(); it != object->MemberEnd(); ++it) { std::string member_name = it->name.GetString(); - if (member_name.starts_with("XClusterConsumerRemote_")) { + if (member_name.starts_with(client::XClusterRemoteClientHolder::kClientName)) { found_xcluster_member = true; } - EXPECT_OK(VerifyMetaCacheObjectIsValid(object, json_reader, member_name.c_str())); + RETURN_NOT_OK(VerifyMetaCacheObjectIsValid(object, json_reader, member_name.c_str())); } - EXPECT_TRUE(found_xcluster_member) - << "No member name starting with XClusterConsumerRemote_ is found"; + SCHECK_FORMAT( + found_xcluster_member, IllegalState, "No member name starting with $0 found", + client::XClusterRemoteClientHolder::kClientName); + + return Status::OK(); } TEST_F_EX(XClusterTest, ListMetaCacheAfterXClusterSetup, XClusterTestNoParam) { @@ -3352,8 +3356,21 @@ TEST_F_EX(XClusterTest, ListMetaCacheAfterXClusterSetup, XClusterTestNoParam) { auto tserver_endpoint = tserver->bound_http_addr(); auto query_endpoint = "http://" + AsString(tserver_endpoint) + "/api/v1/meta-cache"; faststring result; - ASSERT_OK(EasyCurl().FetchURL(query_endpoint, &result)); - VerifyMetaCacheWithXClusterConsumerSetUp(result.ToString()); + + // Attempts to fetch url until a response with status OK, or until timeout. + // On mac the curl command fails with error "A libcurl function was given a bad argument", but + // succeeds on retries. + ASSERT_OK(WaitFor( + [&]() -> bool { + EasyCurl curl; + auto status = curl.FetchURL(query_endpoint, &result); + YB_LOG_IF_EVERY_N(WARNING, !status.ok(), 5) << status; + + return status.ok(); + }, + 30s, "Wait for curl response to return with status OK")); + + ASSERT_OK(VerifyMetaCacheWithXClusterConsumerSetUp(result.ToString())); } ASSERT_OK(DeleteUniverseReplication()); } diff --git a/src/yb/integration-tests/xcluster/xcluster_db_scoped-test.cc b/src/yb/integration-tests/xcluster/xcluster_db_scoped-test.cc index 825e2c89801e..b709c0661784 100644 --- a/src/yb/integration-tests/xcluster/xcluster_db_scoped-test.cc +++ b/src/yb/integration-tests/xcluster/xcluster_db_scoped-test.cc @@ -11,7 +11,7 @@ // under the License. // -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/table.h" #include "yb/client/xcluster_client.h" #include "yb/client/yb_table_name.h" diff --git a/src/yb/integration-tests/xcluster/xcluster_test_base.cc b/src/yb/integration-tests/xcluster/xcluster_test_base.cc index aa0c53fe94ea..ff24bd334894 100644 --- a/src/yb/integration-tests/xcluster/xcluster_test_base.cc +++ b/src/yb/integration-tests/xcluster/xcluster_test_base.cc @@ -15,7 +15,7 @@ #include -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/client.h" #include "yb/client/table.h" diff --git a/src/yb/master/CMakeLists.txt b/src/yb/master/CMakeLists.txt index 6699708c18f3..b9faacdf2603 100644 --- a/src/yb/master/CMakeLists.txt +++ b/src/yb/master/CMakeLists.txt @@ -260,6 +260,7 @@ ADD_YB_TEST(table_index_test) ADD_YB_TEST(tablet_creation_limits_test) ADD_YB_TEST(xcluster/xcluster_safe_time_service-test) ADD_YB_TEST(xcluster/xcluster_outbound_replication_group-test) +YB_TEST_TARGET_LINK_LIBRARIES(xcluster/xcluster_outbound_replication_group-test yb_client_mock) # Actual master executable. In LTO mode, can also act as the tablet server if executed through a # symlink named as the tablet server executable. diff --git a/src/yb/master/async_rpc_tasks.cc b/src/yb/master/async_rpc_tasks.cc index eec08d07f90b..daa054c6d0ad 100644 --- a/src/yb/master/async_rpc_tasks.cc +++ b/src/yb/master/async_rpc_tasks.cc @@ -2014,10 +2014,9 @@ void AsyncClonePgSchema::HandleResponse(int attempt) { << " failed: " << resp_status; TransitionToFailedState(state(), resp_status); } else { - resp_status = Status::OK(); TransitionToCompleteState(); } - WARN_NOT_OK(callback_(resp_status), "Failed to execute the call back of AsyncClonePgSchema"); + WARN_NOT_OK(callback_(resp_status), "Failed to execute the callback of AsyncClonePgSchema"); } bool AsyncClonePgSchema::SendRequest(int attempt) { @@ -2034,5 +2033,40 @@ bool AsyncClonePgSchema::SendRequest(int attempt) { MonoTime AsyncClonePgSchema::ComputeDeadline() { return deadline_; } +// ============================================================================ +// Class AsyncEnableDbConns. +// ============================================================================ +AsyncEnableDbConns::AsyncEnableDbConns( + Master* master, ThreadPool* callback_pool, const std::string& permanent_uuid, + const std::string& target_db_name, EnableDbConnsCallbackType callback) + : RetrySpecificTSRpcTask( + master, callback_pool, std::move(permanent_uuid), /* async_task_throttler */ nullptr), + target_db_name_(target_db_name), + callback_(std::move(callback)) {} + +std::string AsyncEnableDbConns::description() const { + return "Enable connections on cloned database " + target_db_name_; +} + +void AsyncEnableDbConns::HandleResponse(int attempt) { + Status resp_status; + if (resp_.has_error()) { + resp_status = StatusFromPB(resp_.error().status()); + LOG(WARNING) << "Failed to enable connections on cloned database " << target_db_name_ + << ". Status: " << resp_status; + TransitionToFailedState(state(), resp_status); + } else { + TransitionToCompleteState(); + } + WARN_NOT_OK(callback_(resp_status), "Failed to execute callback of AsyncEnableDbConns"); +} + +bool AsyncEnableDbConns::SendRequest(int attempt) { + tserver::EnableDbConnsRequestPB req; + req.set_target_db_name(target_db_name_); + ts_admin_proxy_->EnableDbConnsAsync(req, &resp_, &rpc_, BindRpcCallback()); + return true; +} + } // namespace master } // namespace yb diff --git a/src/yb/master/async_rpc_tasks.h b/src/yb/master/async_rpc_tasks.h index 1580d5262407..aa2127fa8f9f 100644 --- a/src/yb/master/async_rpc_tasks.h +++ b/src/yb/master/async_rpc_tasks.h @@ -1096,5 +1096,32 @@ class AsyncClonePgSchema : public RetrySpecificTSRpcTask { ClonePgSchemaCallbackType callback_; }; +class AsyncEnableDbConns : public RetrySpecificTSRpcTask { + public: + using EnableDbConnsCallbackType = std::function; + AsyncEnableDbConns( + Master* master, ThreadPool* callback_pool, const std::string& permanent_uuid, + const std::string& target_db_name, EnableDbConnsCallbackType callback); + + server::MonitoredTaskType type() const override { + return server::MonitoredTaskType::kEnableDbConns; + } + + std::string type_name() const override { return "Enable DB connections"; } + + std::string description() const override; + + protected: + void HandleResponse(int attempt) override; + bool SendRequest(int attempt) override; + // Not associated with a tablet. + TabletId tablet_id() const override { return TabletId(); } + + private: + std::string target_db_name_; + tserver::EnableDbConnsResponsePB resp_; + EnableDbConnsCallbackType callback_; +}; + } // namespace master } // namespace yb diff --git a/src/yb/master/catalog_entity_info.proto b/src/yb/master/catalog_entity_info.proto index 248d8555fda2..a08e6c06b708 100644 --- a/src/yb/master/catalog_entity_info.proto +++ b/src/yb/master/catalog_entity_info.proto @@ -606,13 +606,16 @@ message SysCloneStatePB { // ImportSnapshot (YCQL). Clone tablet RPCs are scheduled for all tablets. // CREATING --> RESTORING: All tablets in clone namespace are created and in a running // state. Restore snapshot is issued. - // RESTORING --> RESTORED: Restore snapshot completed. + // RESTORING --> COMPLETE (YCQL): Restore snapshot completed and DB is ready to be accessed. + // RESTORING --> RESTORED (YSQL): Restore snapshot completed. + // RESTORED --> COMPLETE (YSQL): DB is ready to be accessed. enum State { CLONE_SCHEMA_STARTED = 1; CREATING = 2; RESTORING = 3; RESTORED = 4; ABORTED = 5; + COMPLETE = 6; } optional State aggregate_state = 1; diff --git a/src/yb/master/clone/clone_state_entity.cc b/src/yb/master/clone/clone_state_entity.cc index 0b9239ef4078..946025c16f85 100644 --- a/src/yb/master/clone/clone_state_entity.cc +++ b/src/yb/master/clone/clone_state_entity.cc @@ -38,6 +38,16 @@ void CloneStateInfo::AddTabletData(TabletData tablet_data) { tablet_data_.push_back(std::move(tablet_data)); } +YQLDatabase CloneStateInfo::DatabaseType() { + std::lock_guard l(mutex_); + return database_type_; +} + +void CloneStateInfo::SetDatabaseType(YQLDatabase database_type) { + std::lock_guard l(mutex_); + database_type_ = database_type; +} + const TxnSnapshotId& CloneStateInfo::SourceSnapshotId() { std::lock_guard l(mutex_); return source_snapshot_id_; diff --git a/src/yb/master/clone/clone_state_entity.h b/src/yb/master/clone/clone_state_entity.h index 5be5a8e28d76..7388d43367e2 100644 --- a/src/yb/master/clone/clone_state_entity.h +++ b/src/yb/master/clone/clone_state_entity.h @@ -24,7 +24,7 @@ namespace yb::master { struct PersistentCloneStateInfo : public Persistent { bool IsDone() const { - return pb.aggregate_state() == SysCloneStatePB::RESTORED || + return pb.aggregate_state() == SysCloneStatePB::COMPLETE || pb.aggregate_state() == SysCloneStatePB::ABORTED; } }; @@ -45,6 +45,9 @@ class CloneStateInfo : public MetadataCowWrapper { std::vector GetTabletData(); void AddTabletData(CloneStateInfo::TabletData tablet_data); + YQLDatabase DatabaseType(); + void SetDatabaseType(YQLDatabase database_type); + const TxnSnapshotId& SourceSnapshotId(); void SetSourceSnapshotId(const TxnSnapshotId& source_snapshot_id); @@ -58,6 +61,8 @@ class CloneStateInfo : public MetadataCowWrapper { // The ID field is used in the sys_catalog table. const std::string clone_request_id_; + YQLDatabase database_type_ GUARDED_BY(mutex_); + // These fields are set before the clone state is set to CREATING. std::vector tablet_data_ GUARDED_BY(mutex_); TxnSnapshotId source_snapshot_id_ GUARDED_BY(mutex_) = TxnSnapshotId::Nil(); diff --git a/src/yb/master/clone/clone_state_manager-test.cc b/src/yb/master/clone/clone_state_manager-test.cc index 2c39e60f5921..da6feee28421 100644 --- a/src/yb/master/clone/clone_state_manager-test.cc +++ b/src/yb/master/clone/clone_state_manager-test.cc @@ -111,6 +111,10 @@ class CloneStateManagerTest : public YBTest { const std::string& target_db_name, const std::string& source_owner, const std::string& target_owner, HybridTime restore_ht, AsyncClonePgSchema::ClonePgSchemaCallbackType callback, MonoTime deadline), (override)); + MOCK_METHOD( + Status, ScheduleEnableDbConnectionsTask, + (const std::string& permanent_uuid, const std::string& target_db_name, + AsyncEnableDbConns::EnableDbConnsCallbackType callback), (override)); MOCK_METHOD( Status, Upsert, (const CloneStateInfoPtr& clone_state), (override)); @@ -216,7 +220,7 @@ class CloneStateManagerTest : public YBTest { Result CreateCloneState( uint32_t seq_no, const ExternalTableSnapshotDataMap& table_snapshot_data) { auto clone_state = VERIFY_RESULT(clone_state_manager_->CreateCloneState( - seq_no, kSourceNamespaceId, kTargetNamespaceName, kRestoreTime)); + seq_no, kSourceNamespaceId, GetDatabaseType(), kTargetNamespaceName, kRestoreTime)); RETURN_NOT_OK(clone_state_manager_->UpdateCloneStateWithSnapshotInfo( clone_state, kSourceSnapshotId, kTargetSnapshotId, table_snapshot_data)); @@ -525,10 +529,10 @@ TEST_F(CloneStateManagerTest, HandleRestoringStateRestored) { .WillOnce(DoAll(SetArgPointee<1>(resp), Return(Status::OK()))); EXPECT_CALL(MockFuncs(), Upsert); - // Should transition the clone to the RESTORED state. + // Should transition the clone to the COMPLETE state. ASSERT_OK(HandleRestoringState(clone_state)); - ASSERT_EQ(clone_state->LockForRead()->pb.aggregate_state(), SysCloneStatePB::RESTORED); + ASSERT_EQ(clone_state->LockForRead()->pb.aggregate_state(), SysCloneStatePB::COMPLETE); } TEST_F(CloneStateManagerTest, AbortInStartTabletsCloning) { @@ -625,7 +629,7 @@ TEST_F(CloneStateManagerTest, AbortInRestoringState) { TEST_F(CloneStateManagerTest, Load) { // Check that multiple clone states are all loaded and can be queried with ListClones. SysCloneStatePB clone_state1; - clone_state1.set_aggregate_state(SysCloneStatePB::RESTORED); + clone_state1.set_aggregate_state(SysCloneStatePB::COMPLETE); clone_state1.set_source_namespace_id(kSourceNamespaceId); clone_state1.set_target_namespace_name(kTargetNamespaceName); clone_state1.set_restore_time(kRestoreTime.ToUint64()); @@ -707,8 +711,8 @@ TEST_F(CloneStateManagerTest, AbortIncompleteCloneOnLoad) { ASSERT_EQ(loaded_lock->pb.aggregate_state(), SysCloneStatePB::ABORTED); ASSERT_EQ(loaded_lock->pb.abort_message(), kSampleAbortMessage); break; - case SysCloneStatePB_State_RESTORED: - ASSERT_EQ(loaded_lock->pb.aggregate_state(), SysCloneStatePB::RESTORED); + case SysCloneStatePB_State_COMPLETE: + ASSERT_EQ(loaded_lock->pb.aggregate_state(), SysCloneStatePB::COMPLETE); ASSERT_FALSE(loaded_lock->pb.has_abort_message()); break; default: diff --git a/src/yb/master/clone/clone_state_manager.cc b/src/yb/master/clone/clone_state_manager.cc index 10f74142cc68..997bf6cac15b 100644 --- a/src/yb/master/clone/clone_state_manager.cc +++ b/src/yb/master/clone/clone_state_manager.cc @@ -51,6 +51,7 @@ DEFINE_RUNTIME_PREVIEW_bool(enable_db_clone, false, "Enable DB cloning."); DECLARE_int32(ysql_clone_pg_schema_rpc_timeout_ms); DEFINE_test_flag(bool, fail_clone_pg_schema, false, "Fail clone pg schema operation for testing"); +DEFINE_test_flag(bool, fail_clone_tablets, false, "Fail StartTabletsCloning for testing"); namespace yb { namespace master { @@ -118,6 +119,14 @@ class CloneStateManagerExternalFunctions : public CloneStateManagerExternalFunct return catalog_manager_->ScheduleTask(task); } + Status ScheduleEnableDbConnectionsTask( + const TabletServerId& ts_uuid, const std::string& target_db_name, + AsyncEnableDbConns::EnableDbConnsCallbackType callback) override { + auto task = std::make_shared( + master_, catalog_manager_->AsyncTaskPool(), ts_uuid, target_db_name, callback); + return catalog_manager_->ScheduleTask(task); + } + Status DoCreateSnapshot( const CreateSnapshotRequestPB* req, CreateSnapshotResponsePB* resp, CoarseTimePoint deadline, const LeaderEpoch& epoch) override { @@ -266,8 +275,9 @@ Result> CloneStateManager::CloneNamespace( // Set up clone state. // Past this point, we should abort the clone state if we get a non-OK status from any step. - auto clone_state = VERIFY_RESULT( - CreateCloneState(seq_no, source_namespace_id, target_namespace_name, restore_time)); + auto clone_state = VERIFY_RESULT(CreateCloneState( + seq_no, source_namespace_id, source_namespace_identifier.database_type(), + target_namespace_name, restore_time)); // Clone PG Schema objects first in case of PGSQL databases. Tablets cloning is initiated in the // callback of ClonePgSchemaObjects async task. @@ -294,6 +304,10 @@ Status CloneStateManager::StartTabletsCloning( const std::string& target_namespace_name, CoarseTimePoint deadline, const LeaderEpoch& epoch) { + if (FLAGS_TEST_fail_clone_tablets) { + return STATUS_FORMAT(RuntimeError, "Failing clone due to test flag fail_clone_tablets"); + } + // Export snapshot info. auto [snapshot_info, not_snapshotted_tablets] = VERIFY_RESULT( external_funcs_->GenerateSnapshotInfoFromScheduleForClone( @@ -414,6 +428,7 @@ Status CloneStateManager::LoadCloneState(const std::string& id, const SysCloneSt Result CloneStateManager::CreateCloneState( uint32_t seq_no, const NamespaceId& source_namespace_id, + YQLDatabase database_type, const std::string& target_namespace_name, const HybridTime& restore_time) { // Check if there is an ongoing clone for the source namespace. @@ -433,6 +448,7 @@ Result CloneStateManager::CreateCloneState( } auto clone_state = std::make_shared(GenerateObjectId()); + clone_state->SetDatabaseType(database_type); clone_state->mutable_metadata()->StartMutation(); auto* pb = &clone_state->mutable_metadata()->mutable_dirty()->pb; pb->set_aggregate_state(SysCloneStatePB::CLONE_SCHEMA_STARTED); @@ -570,6 +586,33 @@ Status CloneStateManager::HandleCreatingState(const CloneStateInfoPtr& clone_sta return Status::OK(); } +Status CloneStateManager::EnableDbConnections(const CloneStateInfoPtr& clone_state) { + auto callback = [this, clone_state](const Status& enable_db_conns_status) -> Status { + + auto status = enable_db_conns_status; + if (status.ok()) { + auto lock = clone_state->LockForWrite(); + SCHECK_EQ(lock->pb.aggregate_state(), SysCloneStatePB::RESTORED, IllegalState, + "Expected clone to be in restored state"); + lock.mutable_data()->pb.set_aggregate_state(SysCloneStatePB::COMPLETE); + auto status = external_funcs_->Upsert(clone_state); + if (status.ok()) { + lock.Commit(); + } + } + if (!status.ok()) { + RETURN_NOT_OK(MarkCloneAborted(clone_state, status.ToString())); + } + return Status::OK(); + }; + + auto ts = external_funcs_->PickTserver(); + auto ts_permanent_uuid = ts->permanent_uuid(); + RETURN_NOT_OK(external_funcs_->ScheduleEnableDbConnectionsTask( + ts_permanent_uuid, clone_state->LockForRead()->pb.target_namespace_name(), callback)); + return Status::OK(); +} + Status CloneStateManager::HandleRestoringState(const CloneStateInfoPtr& clone_state) { auto lock = clone_state->LockForWrite(); SCHECK_EQ(lock->pb.aggregate_state(), SysCloneStatePB::RESTORING, IllegalState, @@ -584,10 +627,17 @@ Status CloneStateManager::HandleRestoringState(const CloneStateInfoPtr& clone_st return Status::OK(); } - lock.mutable_data()->pb.set_aggregate_state(SysCloneStatePB::RESTORED); - RETURN_NOT_OK(external_funcs_->Upsert(clone_state)); - lock.Commit(); - return Status::OK(); + if (clone_state->DatabaseType() == YQL_DATABASE_PGSQL) { + lock.mutable_data()->pb.set_aggregate_state(SysCloneStatePB::RESTORED); + RETURN_NOT_OK(external_funcs_->Upsert(clone_state)); + lock.Commit(); + return EnableDbConnections(clone_state); + } else { + lock.mutable_data()->pb.set_aggregate_state(SysCloneStatePB::COMPLETE); + RETURN_NOT_OK(external_funcs_->Upsert(clone_state)); + lock.Commit(); + return Status::OK(); + } } Status CloneStateManager::MarkCloneAborted( @@ -625,6 +675,7 @@ Status CloneStateManager::Run() { break; case SysCloneStatePB::CLONE_SCHEMA_STARTED: FALLTHROUGH_INTENDED; case SysCloneStatePB::RESTORED: FALLTHROUGH_INTENDED; + case SysCloneStatePB::COMPLETE: FALLTHROUGH_INTENDED; case SysCloneStatePB::ABORTED: break; } diff --git a/src/yb/master/clone/clone_state_manager.h b/src/yb/master/clone/clone_state_manager.h index 59c961efb993..3cd03fc89989 100644 --- a/src/yb/master/clone/clone_state_manager.h +++ b/src/yb/master/clone/clone_state_manager.h @@ -66,7 +66,7 @@ class CloneStateManager { const LeaderEpoch& epoch); Result CreateCloneState( - uint32_t seq_no, const NamespaceId& source_namespace_id, + uint32_t seq_no, const NamespaceId& source_namespace_id, YQLDatabase database_type, const std::string& target_namespace_name, const HybridTime& restore_time); Status UpdateCloneStateWithSnapshotInfo( @@ -105,6 +105,8 @@ class CloneStateManager { const std::string& target_namespace_name, CoarseTimePoint deadline, const LeaderEpoch& epoch); + Status EnableDbConnections(const CloneStateInfoPtr& clone_state); + Status HandleCreatingState(const CloneStateInfoPtr& clone_state); Status HandleRestoringState(const CloneStateInfoPtr& clone_state); Result IsDeleteNamespaceDone(const CloneStateInfoPtr& clone_state); diff --git a/src/yb/master/clone/external_functions.h b/src/yb/master/clone/external_functions.h index fa6fc8093ddb..cfa753175c35 100644 --- a/src/yb/master/clone/external_functions.h +++ b/src/yb/master/clone/external_functions.h @@ -61,6 +61,10 @@ class CloneStateManagerExternalFunctionsBase { AsyncClonePgSchema::ClonePgSchemaCallbackType callback, MonoTime deadline) = 0; + virtual Status ScheduleEnableDbConnectionsTask( + const std::string& permanent_uuid, const std::string& target_db_name, + AsyncEnableDbConns::EnableDbConnsCallbackType callback) = 0; + virtual Status DoCreateSnapshot( const CreateSnapshotRequestPB* req, CreateSnapshotResponsePB* resp, CoarseTimePoint deadline, const LeaderEpoch& epoch) = 0; diff --git a/src/yb/master/master-path-handlers.cc b/src/yb/master/master-path-handlers.cc index ea5bf1287d99..9bdbaa73e969 100644 --- a/src/yb/master/master-path-handlers.cc +++ b/src/yb/master/master-path-handlers.cc @@ -42,7 +42,7 @@ #include -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/common/common_types_util.h" #include "yb/common/hybrid_time.h" diff --git a/src/yb/master/xcluster/add_table_to_xcluster_target_task.cc b/src/yb/master/xcluster/add_table_to_xcluster_target_task.cc index ce35af7275eb..cce3d53d09d8 100644 --- a/src/yb/master/xcluster/add_table_to_xcluster_target_task.cc +++ b/src/yb/master/xcluster/add_table_to_xcluster_target_task.cc @@ -13,7 +13,7 @@ #include "yb/master/xcluster/add_table_to_xcluster_target_task.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/xcluster_client.h" #include "yb/master/catalog_manager.h" #include "yb/util/is_operation_done_result.h" @@ -23,7 +23,6 @@ #include "yb/rpc/messenger.h" #include "yb/util/logging.h" #include "yb/util/sync_point.h" -#include "yb/util/trace.h" DEFINE_test_flag(bool, xcluster_fail_table_create_during_bootstrap, false, "Fail the table or index creation during xcluster bootstrap stage."); @@ -101,8 +100,8 @@ Status AddTableToXClusterTargetTask::FirstStep() { VERIFY_RESULT(GetProducerNamespaceId(*universe_, table_info_->namespace_id())); // We need to keep the client alive until the callback is invoked. - remote_client_ = VERIFY_RESULT(GetXClusterRemoteClient(*universe_)); - return remote_client_->GetXClusterTableCheckpointInfos( + remote_client_ = VERIFY_RESULT(GetXClusterRemoteClientHolder(*universe_)); + return remote_client_->GetXClusterClient().GetXClusterTableCheckpointInfos( universe_->ReplicationGroupId(), producer_namespace_id, {table_info_->name()}, {table_info_->pgschema_name()}, std::move(callback)); } diff --git a/src/yb/master/xcluster/add_table_to_xcluster_target_task.h b/src/yb/master/xcluster/add_table_to_xcluster_target_task.h index 93827d3bb285..33ab7b416705 100644 --- a/src/yb/master/xcluster/add_table_to_xcluster_target_task.h +++ b/src/yb/master/xcluster/add_table_to_xcluster_target_task.h @@ -24,7 +24,7 @@ namespace yb { namespace client { -class XClusterRemoteClient; +class XClusterRemoteClientHolder; } // namespace client namespace master { @@ -64,7 +64,7 @@ class AddTableToXClusterTargetTask : public PostTabletCreateTaskBase { HybridTime bootstrap_time_ = HybridTime::kInvalid; HybridTime initial_xcluster_safe_time_ = HybridTime::kInvalid; scoped_refptr universe_; - std::shared_ptr remote_client_; + std::shared_ptr remote_client_; XClusterManagerIf& xcluster_manager_; bool is_db_scoped_ = false; }; diff --git a/src/yb/master/xcluster/xcluster_outbound_replication_group-test.cc b/src/yb/master/xcluster/xcluster_outbound_replication_group-test.cc index 4a75af9b208f..99f00a3e489c 100644 --- a/src/yb/master/xcluster/xcluster_outbound_replication_group-test.cc +++ b/src/yb/master/xcluster/xcluster_outbound_replication_group-test.cc @@ -15,7 +15,7 @@ #include -#include "yb/client/xcluster_client.h" +#include "yb/client/xcluster_client_mock.h" #include "yb/master/catalog_entity_info.h" #include "yb/master/xcluster/xcluster_outbound_replication_group_tasks.h" @@ -39,41 +39,12 @@ using testing::Return; namespace yb::master { -const UniverseUuid kTargetUniverseUuid = UniverseUuid::GenerateRandom(); -const LeaderEpoch kEpoch = LeaderEpoch(1, 1); +const auto kEpoch = master::LeaderEpoch(1, 1); inline bool operator==(const NamespaceCheckpointInfo& lhs, const NamespaceCheckpointInfo& rhs) { return YB_STRUCT_EQUALS(initial_bootstrap_required, table_infos); } -class XClusterRemoteClientMocked : public client::XClusterRemoteClient { - public: - XClusterRemoteClientMocked() : client::XClusterRemoteClient("na", MonoDelta::kMax) { - DefaultValue>::Set(kTargetUniverseUuid); - DefaultValue>::Set(IsOperationDoneResult::Done()); - } - - MOCK_METHOD2(Init, Status(const xcluster::ReplicationGroupId&, const std::vector&)); - MOCK_METHOD6( - SetupDbScopedUniverseReplication, - Result( - const xcluster::ReplicationGroupId&, const std::vector&, - const std::vector&, const std::vector&, - const std::vector&, const std::vector&)); - - MOCK_METHOD1( - IsSetupUniverseReplicationDone, - Result(const xcluster::ReplicationGroupId&)); - - MOCK_METHOD6( - AddNamespaceToDbScopedUniverseReplication, - Status( - const xcluster::ReplicationGroupId& replication_group_id, - const UniverseUuid& target_universe_uuid, const NamespaceName& namespace_name, - const NamespaceId& source_namespace_id, const std::vector& source_table_ids, - const std::vector& bootstrap_ids)); -}; - Status ValidateEpoch(const LeaderEpoch& epoch) { SCHECK_EQ(epoch, kEpoch, IllegalState, "Epoch does not match"); return Status::OK(); @@ -96,11 +67,11 @@ class XClusterOutboundReplicationGroupMocked : public XClusterOutboundReplicatio : XClusterOutboundReplicationGroup( replication_group_id, {}, std::move(helper_functions), /*tasks_tracker=*/nullptr, task_factory) { - remote_client_ = std::make_shared(); + remote_client_ = std::make_shared(); } - void SetRemoteClient(std::shared_ptr remote_client) { - remote_client_ = remote_client; + client::MockXClusterClient& GetMockXClusterClient() { + return remote_client_->GetMockXClusterClient(); } bool IsDeleted() const { @@ -151,16 +122,17 @@ class XClusterOutboundReplicationGroupMocked : public XClusterOutboundReplicatio } private: - virtual Result> GetRemoteClient( + virtual Result> GetRemoteClient( const std::vector& remote_masters) const override { return remote_client_; } - std::shared_ptr remote_client_; + std::shared_ptr remote_client_; }; class XClusterOutboundReplicationGroupMockedTest : public YBTest { public: + const UniverseUuid kTargetUniverseUuid = UniverseUuid::GenerateRandom(); const NamespaceName kNamespaceName = "db1"; const NamespaceId kNamespaceId = "db1_id"; const PgSchemaName kPgSchemaName = "public", kPgSchemaName2 = "public2"; @@ -172,6 +144,9 @@ class XClusterOutboundReplicationGroupMockedTest : public YBTest { XClusterOutboundReplicationGroupMockedTest() { google::SetVLOGLevel("*", 4); + DefaultValue>::Set(kTargetUniverseUuid); + DefaultValue>::Set(IsOperationDoneResult::Done()); + ThreadPoolBuilder thread_pool_builder("Test"); CHECK_OK(thread_pool_builder.Build(&thread_pool)); @@ -473,14 +448,13 @@ TEST_F(XClusterOutboundReplicationGroupMockedTest, CreateTargetReplicationGroup) auto outbound_rg_ptr = CreateReplicationGroup(); auto& outbound_rg = *outbound_rg_ptr; - auto remote_client = std::make_shared(); - outbound_rg.SetRemoteClient(remote_client); + auto& xcluster_client = outbound_rg.GetMockXClusterClient(); ASSERT_OK(outbound_rg.AddNamespaceSync(kEpoch, kNamespaceId, kTimeout)); std::vector streams{xcluster_streams.begin(), xcluster_streams.end()}; EXPECT_CALL( - *remote_client, + xcluster_client, SetupDbScopedUniverseReplication( kReplicationGroupId, _, std::vector{kNamespaceName}, std::vector{kNamespaceId}, std::vector{kTableId1}, streams)) @@ -488,7 +462,7 @@ TEST_F(XClusterOutboundReplicationGroupMockedTest, CreateTargetReplicationGroup) ASSERT_OK(outbound_rg.CreateXClusterReplication({}, {}, kEpoch)); - EXPECT_CALL(*remote_client, IsSetupUniverseReplicationDone(_)) + EXPECT_CALL(xcluster_client, IsSetupUniverseReplicationDone(_)) .WillOnce(Return(IsOperationDoneResult::NotDone())); auto create_result = ASSERT_RESULT(outbound_rg.IsCreateXClusterReplicationDone({}, kEpoch)); @@ -496,7 +470,7 @@ TEST_F(XClusterOutboundReplicationGroupMockedTest, CreateTargetReplicationGroup) // Fail the Setup. const auto error_str = "Failed by test"; - EXPECT_CALL(*remote_client, IsSetupUniverseReplicationDone(_)) + EXPECT_CALL(xcluster_client, IsSetupUniverseReplicationDone(_)) .WillOnce(Return(STATUS(IllegalState, error_str))); auto result = outbound_rg.IsCreateXClusterReplicationDone({}, kEpoch); ASSERT_NOK(result); @@ -509,7 +483,7 @@ TEST_F(XClusterOutboundReplicationGroupMockedTest, CreateTargetReplicationGroup) pb.target_universe_info().state(), SysXClusterOutboundReplicationGroupEntryPB::TargetUniverseInfoPB::CREATING_REPLICATION_GROUP); - EXPECT_CALL(*remote_client, IsSetupUniverseReplicationDone(_)) + EXPECT_CALL(xcluster_client, IsSetupUniverseReplicationDone(_)) .WillOnce(Return(IsOperationDoneResult::Done(STATUS(IllegalState, error_str)))); create_result = ASSERT_RESULT(outbound_rg.IsCreateXClusterReplicationDone({}, kEpoch)); ASSERT_TRUE(create_result.done()); @@ -519,10 +493,10 @@ TEST_F(XClusterOutboundReplicationGroupMockedTest, CreateTargetReplicationGroup) ASSERT_FALSE(pb.has_target_universe_info()); // Success case. - EXPECT_CALL(*remote_client, IsSetupUniverseReplicationDone(_)) + EXPECT_CALL(xcluster_client, IsSetupUniverseReplicationDone(_)) .WillOnce(Return(IsOperationDoneResult::Done())); - EXPECT_CALL(*remote_client, SetupDbScopedUniverseReplication(_, _, _, _, _, _)); + EXPECT_CALL(xcluster_client, SetupDbScopedUniverseReplication(_, _, _, _, _, _)); // Calling create again should not do anything. ASSERT_OK(outbound_rg.CreateXClusterReplication({}, {}, kEpoch)); diff --git a/src/yb/master/xcluster/xcluster_outbound_replication_group.cc b/src/yb/master/xcluster/xcluster_outbound_replication_group.cc index 2928f115f609..165cccf7aea4 100644 --- a/src/yb/master/xcluster/xcluster_outbound_replication_group.cc +++ b/src/yb/master/xcluster/xcluster_outbound_replication_group.cc @@ -12,7 +12,7 @@ // #include "yb/master/xcluster/xcluster_outbound_replication_group.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/xcluster_client.h" #include "yb/common/colocated_util.h" #include "yb/master/catalog_entity_info.h" @@ -24,9 +24,6 @@ DEFINE_RUNTIME_uint32(max_xcluster_streams_to_checkpoint_in_parallel, 200, "Maximum number of xCluster streams to checkpoint in parallel"); -DECLARE_int32(cdc_read_rpc_timeout_ms); -DECLARE_string(certs_for_cdc_dir); - using namespace std::placeholders; namespace yb::master { @@ -502,8 +499,8 @@ Status XClusterOutboundReplicationGroup::RemoveNamespace( UniverseUuid::FromString(outbound_group_pb.target_universe_info().universe_uuid())); auto remote_client = VERIFY_RESULT(GetRemoteClient(target_master_addresses)); - RETURN_NOT_OK( - (*remote_client)->RemoveNamespaceFromUniverseReplication(Id(), namespace_id, target_uuid)); + RETURN_NOT_OK(remote_client->GetXClusterClient().RemoveNamespaceFromUniverseReplication( + Id(), namespace_id, target_uuid)); } RETURN_NOT_OK(DeleteNamespaceStreams(epoch, namespace_id, outbound_group_pb)); @@ -533,8 +530,8 @@ Status XClusterOutboundReplicationGroup::Delete( UniverseUuid::FromString(outbound_group_pb.target_universe_info().universe_uuid())); auto remote_client = VERIFY_RESULT(GetRemoteClient(target_master_addresses)); - RETURN_NOT_OK( - (*remote_client)->DeleteUniverseReplication(Id(), /*ignore_errors=*/true, target_uuid)); + RETURN_NOT_OK(remote_client->GetXClusterClient().DeleteUniverseReplication( + Id(), /*ignore_errors=*/true, target_uuid)); } for (const auto& [namespace_id, _] : *outbound_group_pb.mutable_namespace_infos()) { @@ -678,13 +675,10 @@ XClusterOutboundReplicationGroup::GetNamespaceCheckpointInfoForTableIds( return ns_info; } -Result> +Result> XClusterOutboundReplicationGroup::GetRemoteClient( const std::vector& remote_masters) const { - auto client = std::make_shared( - FLAGS_certs_for_cdc_dir, MonoDelta::FromMilliseconds(FLAGS_cdc_read_rpc_timeout_ms)); - RETURN_NOT_OK(client->Init(Id(), remote_masters)); - return client; + return client::XClusterRemoteClientHolder::Create(Id(), remote_masters); } Status XClusterOutboundReplicationGroup::CreateXClusterReplication( @@ -729,9 +723,10 @@ Status XClusterOutboundReplicationGroup::CreateXClusterReplication( auto remote_client = VERIFY_RESULT(GetRemoteClient(target_master_addresses)); - auto target_uuid = VERIFY_RESULT(remote_client->SetupDbScopedUniverseReplication( - Id(), source_master_addresses, namespace_names, namespace_ids, source_table_ids, - bootstrap_ids)); + auto target_uuid = + VERIFY_RESULT(remote_client->GetXClusterClient().SetupDbScopedUniverseReplication( + Id(), source_master_addresses, namespace_names, namespace_ids, source_table_ids, + bootstrap_ids)); auto* target_universe_info = l.mutable_data()->pb.mutable_target_universe_info(); @@ -778,7 +773,8 @@ Result XClusterOutboundReplicationGroup::IsCreateXCluster // TODO(#20810): Remove this once async task that polls for IsCreateXClusterReplicationDone gets // added. auto remote_client = VERIFY_RESULT(GetRemoteClient(target_master_addresses)); - setup_result = VERIFY_RESULT(remote_client->IsSetupUniverseReplicationDone(Id())); + setup_result = + VERIFY_RESULT(remote_client->GetXClusterClient().IsSetupUniverseReplicationDone(Id())); } if (!setup_result.done()) { @@ -842,7 +838,7 @@ Status XClusterOutboundReplicationGroup::AddNamespaceToTarget( auto remote_client = VERIFY_RESULT(GetRemoteClient(target_master_addresses)); - RETURN_NOT_OK(remote_client->AddNamespaceToDbScopedUniverseReplication( + RETURN_NOT_OK(remote_client->GetXClusterClient().AddNamespaceToDbScopedUniverseReplication( Id(), target_uuid, namespace_name, source_namespace_id, source_table_ids, bootstrap_ids)); // TODO(#20810): Start a async task that will poll for IsCreateXClusterReplicationDone and update @@ -854,7 +850,8 @@ Status XClusterOutboundReplicationGroup::AddNamespaceToTarget( Result XClusterOutboundReplicationGroup::IsAlterXClusterReplicationDone( const std::vector& target_master_addresses, const LeaderEpoch& epoch) { auto remote_client = VERIFY_RESULT(GetRemoteClient(target_master_addresses)); - return remote_client->IsSetupUniverseReplicationDone(xcluster::GetAlterReplicationGroupId(Id())); + return remote_client->GetXClusterClient().IsSetupUniverseReplicationDone( + xcluster::GetAlterReplicationGroupId(Id())); } bool XClusterOutboundReplicationGroup::HasNamespace(const NamespaceId& namespace_id) const { diff --git a/src/yb/master/xcluster/xcluster_outbound_replication_group.h b/src/yb/master/xcluster/xcluster_outbound_replication_group.h index f77098f314e2..7719e42462b4 100644 --- a/src/yb/master/xcluster/xcluster_outbound_replication_group.h +++ b/src/yb/master/xcluster/xcluster_outbound_replication_group.h @@ -25,7 +25,7 @@ namespace yb { class IsOperationDoneResult; namespace client { -class XClusterRemoteClient; +class XClusterRemoteClientHolder; } // namespace client namespace master { @@ -167,7 +167,7 @@ class XClusterOutboundReplicationGroup const LeaderEpoch& epoch, const NamespaceId& namespace_id, const SysXClusterOutboundReplicationGroupEntryPB& pb) REQUIRES(mutex_); - virtual Result> GetRemoteClient( + virtual Result> GetRemoteClient( const std::vector& remote_masters) const; // Checks if the namespace is part of this replication group. Caller must hold the read or write diff --git a/src/yb/master/xcluster/xcluster_replication_group.cc b/src/yb/master/xcluster/xcluster_replication_group.cc index 62d750626ca4..e99b3ea2d6c1 100644 --- a/src/yb/master/xcluster/xcluster_replication_group.cc +++ b/src/yb/master/xcluster/xcluster_replication_group.cc @@ -24,14 +24,11 @@ #include "yb/util/is_operation_done_result.h" #include "yb/master/xcluster_rpc_tasks.h" #include "yb/master/xcluster/xcluster_manager_if.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/master/sys_catalog.h" #include "yb/util/flags/auto_flags_util.h" #include "yb/util/result.h" -DECLARE_int32(cdc_read_rpc_timeout_ms); -DECLARE_string(certs_for_cdc_dir); - DEFINE_RUNTIME_bool(xcluster_skip_health_check_on_replication_setup, false, "Skip health check on xCluster replication setup"); @@ -398,16 +395,13 @@ bool IncludesConsumerNamespace( return opt_namespace_id.has_value(); } -Result> GetXClusterRemoteClient( +Result> GetXClusterRemoteClientHolder( UniverseReplicationInfo& universe) { auto master_addresses = universe.LockForRead()->pb.producer_master_addresses(); std::vector hp; HostPortsFromPBs(master_addresses, &hp); - auto xcluster_client = std::make_shared( - FLAGS_certs_for_cdc_dir, MonoDelta::FromMilliseconds(FLAGS_cdc_read_rpc_timeout_ms)); - RETURN_NOT_OK(xcluster_client->Init(universe.ReplicationGroupId(), hp)); - return xcluster_client; + return client::XClusterRemoteClientHolder::Create(universe.ReplicationGroupId(), hp); } Result IsSetupUniverseReplicationDone( diff --git a/src/yb/master/xcluster/xcluster_replication_group.h b/src/yb/master/xcluster/xcluster_replication_group.h index 84665ad8f6be..d37d9c93a53f 100644 --- a/src/yb/master/xcluster/xcluster_replication_group.h +++ b/src/yb/master/xcluster/xcluster_replication_group.h @@ -23,7 +23,7 @@ class IsOperationDoneResult; class SysCatalogTable; namespace client { -class XClusterRemoteClient; +class XClusterRemoteClientHolder; } // namespace client namespace master { @@ -72,7 +72,7 @@ Result GetProducerNamespaceId( bool IncludesConsumerNamespace( UniverseReplicationInfo& universe, const NamespaceId& consumer_namespace_id); -Result> GetXClusterRemoteClient( +Result> GetXClusterRemoteClientHolder( UniverseReplicationInfo& universe); // Returns (false, Status::OK()) if the universe setup is still in progress. diff --git a/src/yb/master/xcluster/xcluster_source_manager.cc b/src/yb/master/xcluster/xcluster_source_manager.cc index 958a577c2acc..c2b83f0289f4 100644 --- a/src/yb/master/xcluster/xcluster_source_manager.cc +++ b/src/yb/master/xcluster/xcluster_source_manager.cc @@ -18,6 +18,7 @@ #include "yb/cdc/cdc_state_table.h" #include "yb/cdc/xcluster_types.h" #include "yb/client/xcluster_client.h" +#include "yb/common/xcluster_util.h" #include "yb/master/catalog_manager.h" #include "yb/master/master.h" #include "yb/master/xcluster/master_xcluster_util.h" diff --git a/src/yb/master/xcluster_rpc_tasks.cc b/src/yb/master/xcluster_rpc_tasks.cc index f8902fec3b1c..bf73227186af 100644 --- a/src/yb/master/xcluster_rpc_tasks.cc +++ b/src/yb/master/xcluster_rpc_tasks.cc @@ -16,7 +16,7 @@ #include "yb/client/client.h" #include "yb/client/yb_table_name.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/gutil/callback.h" diff --git a/src/yb/master/xrepl_catalog_manager.cc b/src/yb/master/xrepl_catalog_manager.cc index e97d2e9388df..89e7fb002603 100644 --- a/src/yb/master/xrepl_catalog_manager.cc +++ b/src/yb/master/xrepl_catalog_manager.cc @@ -12,7 +12,7 @@ #include "yb/cdc/cdc_service.h" #include "yb/cdc/cdc_state_table.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/meta_cache.h" #include "yb/client/schema.h" @@ -161,6 +161,17 @@ DEFINE_RUNTIME_bool(cdcsdk_enable_cleanup_of_non_eligible_tables_from_stream, fa "materialised view etc. in their stream metadata and these tables will be marked for removal " "by catalog manager background thread."); +DEFINE_RUNTIME_AUTO_bool(cdcsdk_enable_identification_of_non_eligible_tables, + kLocalPersisted, + false, + true, + "This flag, when true, identifies all non-eligible tables that are part of" + " a CDC stream metadata while loading the CDC streams on a master " + "restart/leadership change. This identification happens on all CDC " + "streams in the universe"); +TAG_FLAG(cdcsdk_enable_identification_of_non_eligible_tables, advanced); +TAG_FLAG(cdcsdk_enable_identification_of_non_eligible_tables, hidden); + DECLARE_bool(xcluster_wait_on_ddl_alter); DECLARE_int32(master_rpc_timeout_ms); DECLARE_bool(ysql_yb_enable_replication_commands); @@ -345,7 +356,7 @@ class CDCStreamLoader : public Visitor { // Check for any non-eligible tables like indexes, matview etc in CDC stream only if the // stream is not associated with a replication slot. - if (FLAGS_cdcsdk_enable_cleanup_of_non_eligible_tables_from_stream && + if (FLAGS_cdcsdk_enable_identification_of_non_eligible_tables && stream->GetCdcsdkYsqlReplicationSlotName().empty()) { catalog_manager_->FindAllNonEligibleTablesInCDCSDKStream( stream_id, metadata.table_id(), eligible_tables_info); @@ -7652,7 +7663,7 @@ CatalogManager::UpdateCheckpointForTabletEntriesInCDCState( "from CDC stream $1", table_to_be_removed, stream_id); - LOG_WITH_FUNC(INFO) << "Deleting cdc state table entry (tablet,stream) - " + LOG_WITH_FUNC(INFO) << "Deleting cdc state table entry (tablet, stream, table) - " << cdc_state_entries_to_be_deleted[0].ToString(); RETURN_NOT_OK_PREPEND( cdc_state_table_->DeleteEntries(cdc_state_entries_to_be_deleted), diff --git a/src/yb/server/monitored_task.h b/src/yb/server/monitored_task.h index 3d8fde44e2c3..bdf46ea7508a 100644 --- a/src/yb/server/monitored_task.h +++ b/src/yb/server/monitored_task.h @@ -70,6 +70,7 @@ YB_DEFINE_ENUM(MonitoredTaskType, (kCloneTablet) (kCreateReplica) (kDeleteReplica) + (kEnableDbConns) (kFlushTablets) (kGetSafeTime) (kGetTabletSplitKey) diff --git a/src/yb/tools/yb-admin-snapshot-schedule-test.cc b/src/yb/tools/yb-admin-snapshot-schedule-test.cc index 5637810a25f4..6b9674979560 100644 --- a/src/yb/tools/yb-admin-snapshot-schedule-test.cc +++ b/src/yb/tools/yb-admin-snapshot-schedule-test.cc @@ -267,7 +267,7 @@ class YbAdminSnapshotScheduleTest : public AdminTestBase { master::SysCloneStatePB::State_Parse( std::string(VERIFY_RESULT(GetMemberAsStr(entries[0], "aggregate_state"))), &state); return state == master::SysCloneStatePB::ABORTED || - state == master::SysCloneStatePB::RESTORED; + state == master::SysCloneStatePB::COMPLETE; }, timeout, "Wait for clone to complete")); return Status::OK(); } diff --git a/src/yb/tools/yb-admin_cli.cc b/src/yb/tools/yb-admin_cli.cc index 348a408c8250..84e00b505888 100644 --- a/src/yb/tools/yb-admin_cli.cc +++ b/src/yb/tools/yb-admin_cli.cc @@ -38,7 +38,7 @@ #include #include -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/xcluster_client.h" #include "yb/common/hybrid_time.h" #include "yb/common/json_util.h" diff --git a/src/yb/tools/yb-admin_client.cc b/src/yb/tools/yb-admin_client.cc index 5862be7ec695..ec0a3d253637 100644 --- a/src/yb/tools/yb-admin_client.cc +++ b/src/yb/tools/yb-admin_client.cc @@ -46,7 +46,7 @@ #include #include "yb/cdc/cdc_service.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/common/xcluster_util.h" #include "yb/client/client.h" #include "yb/client/table.h" #include "yb/client/table_creator.h" diff --git a/src/yb/tserver/pg_client.proto b/src/yb/tserver/pg_client.proto index da594418cac6..ad04f8f60381 100644 --- a/src/yb/tserver/pg_client.proto +++ b/src/yb/tserver/pg_client.proto @@ -865,6 +865,7 @@ message PgCancelTransactionResponsePB { } message PgActiveSessionHistoryRequestPB { + uint32 sample_size = 7; bool fetch_tserver_states = 1; bool fetch_flush_and_compaction_states = 2; bool fetch_cql_states = 3; @@ -876,13 +877,16 @@ message PgActiveSessionHistoryRequestPB { message WaitStatesPB { repeated WaitStateInfoPB wait_states = 1; uint32 component = 2; + float sample_weight = 3; } message PgActiveSessionHistoryResponsePB { AppStatusPB status = 1; WaitStatesPB tserver_wait_states = 2; - WaitStatesPB flush_and_compaction_wait_states = 3; - WaitStatesPB raft_log_appender_wait_states = 4; + // flush_and_compaction_wait_states and raft_log_appender_wait_states + // are included in tserver_wait_states + WaitStatesPB DEPRECATED_flush_and_compaction_wait_states = 3; + WaitStatesPB DEPRECATED_raft_log_appender_wait_states = 4; WaitStatesPB cql_wait_states = 5; } diff --git a/src/yb/tserver/pg_client_service.cc b/src/yb/tserver/pg_client_service.cc index fb6171fcf653..ea7874b08693 100644 --- a/src/yb/tserver/pg_client_service.cc +++ b/src/yb/tserver/pg_client_service.cc @@ -1389,22 +1389,34 @@ class PgClientServiceImpl::Impl { call.wait_state().aux_info().method() == "Perform"))); } + void MaybeIncludeSample( + tserver::WaitStatesPB* resp, const WaitStateInfoPB& wait_state_pb, int sample_size, + int& samples_considered) { + if (++samples_considered <= sample_size) { + resp->add_wait_states()->CopyFrom(wait_state_pb); + } else { + int random_index = RandomUniformInt(1, samples_considered); + if (random_index <= sample_size) { + resp->mutable_wait_states(random_index - 1)->CopyFrom(wait_state_pb); + } + } + } + void PopulateWaitStates( const PgActiveSessionHistoryRequestPB& req, const yb::rpc::RpcConnectionPB& conn, - tserver::WaitStatesPB* resp) { + tserver::WaitStatesPB* resp, int sample_size, int& samples_considered) { for (const auto& call : conn.calls_in_flight()) { if (ShouldIgnoreCall(req, call)) { VLOG(3) << "Ignoring " << call.wait_state().DebugString(); continue; } - auto* wait_state = resp->add_wait_states(); - wait_state->CopyFrom(call.wait_state()); + MaybeIncludeSample(resp, call.wait_state(), sample_size, samples_considered); } } void GetRpcsWaitStates( const PgActiveSessionHistoryRequestPB& req, ash::Component component, - tserver::WaitStatesPB* resp) { + tserver::WaitStatesPB* resp, int sample_size, int& samples_considered) { auto* messenger = tablet_server_.GetMessenger(component); if (!messenger) { LOG_WITH_FUNC(ERROR) << "got no messenger for " << yb::ToString(component); @@ -1424,11 +1436,11 @@ class PgClientServiceImpl::Impl { WARN_NOT_OK(messenger->DumpRunningRpcs(dump_req, &dump_resp), "DumpRunningRpcs failed"); for (const auto& conn : dump_resp.inbound_connections()) { - PopulateWaitStates(req, conn, resp); + PopulateWaitStates(req, conn, resp, sample_size, samples_considered); } if (dump_resp.has_local_calls()) { - PopulateWaitStates(req, dump_resp.local_calls(), resp); + PopulateWaitStates(req, dump_resp.local_calls(), resp, sample_size, samples_considered); } VLOG(3) << __PRETTY_FUNCTION__ << " wait-states: " << yb::ToString(resp->wait_states()); @@ -1436,7 +1448,7 @@ class PgClientServiceImpl::Impl { void AddWaitStatesToResponse( const ash::WaitStateTracker& tracker, bool export_wait_state_names, - tserver::WaitStatesPB* resp) { + tserver::WaitStatesPB* resp, int sample_size, int& samples_considered) { Result local_uuid = Uuid::FromHexStringBigEndian(instance_id_); DCHECK_OK(local_uuid); resp->set_component(yb::to_underlying(ash::Component::kTServer)); @@ -1452,7 +1464,7 @@ class PgClientServiceImpl::Impl { if (local_uuid) { local_uuid->ToBytes(wait_state_pb.mutable_metadata()->mutable_yql_endpoint_tserver_uuid()); } - resp->add_wait_states()->CopyFrom(wait_state_pb); + MaybeIncludeSample(resp, wait_state_pb, sample_size, samples_considered); } VLOG(2) << "Tracker call sending " << resp->DebugString(); } @@ -1460,25 +1472,35 @@ class PgClientServiceImpl::Impl { Status ActiveSessionHistory( const PgActiveSessionHistoryRequestPB& req, PgActiveSessionHistoryResponsePB* resp, rpc::RpcContext* context) { + int tserver_samples_considered = 0; + int cql_samples_considered = 0; + int sample_size = req.sample_size(); if (req.fetch_tserver_states()) { - GetRpcsWaitStates(req, ash::Component::kTServer, resp->mutable_tserver_wait_states()); + GetRpcsWaitStates(req, ash::Component::kTServer, resp->mutable_tserver_wait_states(), + sample_size, tserver_samples_considered); AddWaitStatesToResponse( ash::SharedMemoryPgPerformTracker(), req.export_wait_state_code_as_string(), - resp->mutable_tserver_wait_states()); + resp->mutable_tserver_wait_states(), sample_size, tserver_samples_considered); } if (req.fetch_flush_and_compaction_states()) { AddWaitStatesToResponse( ash::FlushAndCompactionWaitStatesTracker(), req.export_wait_state_code_as_string(), - resp->mutable_flush_and_compaction_wait_states()); + resp->mutable_tserver_wait_states(), sample_size, tserver_samples_considered); } if (req.fetch_raft_log_appender_states()) { AddWaitStatesToResponse( ash::RaftLogWaitStatesTracker(), req.export_wait_state_code_as_string(), - resp->mutable_raft_log_appender_wait_states()); + resp->mutable_tserver_wait_states(), sample_size, tserver_samples_considered); } if (req.fetch_cql_states()) { - GetRpcsWaitStates(req, ash::Component::kYCQL, resp->mutable_cql_wait_states()); - } + GetRpcsWaitStates(req, ash::Component::kYCQL, resp->mutable_cql_wait_states(), + sample_size, cql_samples_considered); + } + float tserver_sample_weight = + std::max(tserver_samples_considered, sample_size) * 1.0 / sample_size; + float cql_sample_weight = std::max(cql_samples_considered, sample_size) * 1.0 / sample_size; + resp->mutable_tserver_wait_states()->set_sample_weight(tserver_sample_weight); + resp->mutable_cql_wait_states()->set_sample_weight(cql_sample_weight); return Status::OK(); } @@ -1954,4 +1976,67 @@ void PgClientServiceImpl::method( \ BOOST_PP_SEQ_FOR_EACH(YB_PG_CLIENT_METHOD_DEFINE, ~, YB_PG_CLIENT_METHODS); BOOST_PP_SEQ_FOR_EACH(YB_PG_CLIENT_ASYNC_METHOD_DEFINE, ~, YB_PG_CLIENT_ASYNC_METHODS); +PgClientServiceMockImpl::PgClientServiceMockImpl( + const scoped_refptr& entity, PgClientServiceIf* impl) + : PgClientServiceIf(entity), impl_(impl) {} + +PgClientServiceMockImpl::Handle PgClientServiceMockImpl::SetMock( + const std::string& method, SharedFunctor&& mock) { + { + std::lock_guard lock(mutex_); + mocks_[method] = mock; + } + + return Handle{std::move(mock)}; +} + +Result PgClientServiceMockImpl::DispatchMock( + const std::string& method, const void* req, void* resp, rpc::RpcContext* context) { + SharedFunctor mock; + { + SharedLock lock(mutex_); + auto it = mocks_.find(method); + if (it != mocks_.end()) { + mock = it->second.lock(); + } + } + + if (!mock) { + return false; + } + RETURN_NOT_OK((*mock)(req, resp, context)); + return true; +} + +#define YB_PG_CLIENT_MOCK_METHOD_DEFINE(r, data, method) \ + void PgClientServiceMockImpl::method( \ + const BOOST_PP_CAT(BOOST_PP_CAT(Pg, method), RequestPB) * req, \ + BOOST_PP_CAT(BOOST_PP_CAT(Pg, method), ResponsePB) * resp, rpc::RpcContext context) { \ + auto result = DispatchMock(BOOST_PP_STRINGIZE(method), req, resp, &context); \ + if (!result.ok() || *result) { \ + Respond(ResultToStatus(result), resp, &context); \ + return; \ + } \ + impl_->method(req, resp, std::move(context)); \ + } + +template +auto MakeSharedFunctor(const std::function& func) { + return std::make_shared( + [func](const void* req, void* resp, rpc::RpcContext* context) { + return func(pointer_cast(req), pointer_cast(resp), context); + }); +} + +#define YB_PG_CLIENT_MOCK_METHOD_SETTER_DEFINE(r, data, method) \ + PgClientServiceMockImpl::Handle BOOST_PP_CAT(PgClientServiceMockImpl::Mock, method)( \ + const std::function& mock) { \ + return SetMock(BOOST_PP_STRINGIZE(method), MakeSharedFunctor(mock)); \ + } + +BOOST_PP_SEQ_FOR_EACH(YB_PG_CLIENT_MOCK_METHOD_DEFINE, ~, YB_PG_CLIENT_MOCKABLE_METHODS); +BOOST_PP_SEQ_FOR_EACH(YB_PG_CLIENT_MOCK_METHOD_SETTER_DEFINE, ~, YB_PG_CLIENT_MOCKABLE_METHODS); + } // namespace yb::tserver diff --git a/src/yb/tserver/pg_client_service.h b/src/yb/tserver/pg_client_service.h index 75448cd8ab17..dd77ba7d7434 100644 --- a/src/yb/tserver/pg_client_service.h +++ b/src/yb/tserver/pg_client_service.h @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "yb/client/client_fwd.h" @@ -135,5 +137,48 @@ class PgClientServiceImpl : public PgClientServiceIf { std::unique_ptr impl_; }; +#define YB_PG_CLIENT_MOCKABLE_METHODS \ + (Perform) \ + YB_PG_CLIENT_METHODS \ + YB_PG_CLIENT_ASYNC_METHODS \ + /**/ + +// PgClientServiceMockImpl implements the PgClientService interface to allow for mocking of tserver +// responses in MiniCluster tests. This implementation defaults to forwarding calls to +// PgClientServiceImpl if a suitable mock is not available. Usage of this implementation can be +// toggled via the test tserver gflag 'FLAGS_TEST_enable_pg_client_mock'. +class PgClientServiceMockImpl : public PgClientServiceIf { + public: + using Functor = std::function; + using SharedFunctor = std::shared_ptr; + + PgClientServiceMockImpl(const scoped_refptr& entity, PgClientServiceIf* impl); + + class Handle { + explicit Handle(SharedFunctor&& mock) : mock_(std::move(mock)) {} + SharedFunctor mock_; + + friend class PgClientServiceMockImpl; + }; + +#define YB_PG_CLIENT_MOCK_METHOD_SETTER_DECLARE(r, data, method) \ + [[nodiscard]] Handle BOOST_PP_CAT(Mock, method)( \ + const std::function& mock); + + BOOST_PP_SEQ_FOR_EACH(YB_PG_CLIENT_METHOD_DECLARE, ~, YB_PG_CLIENT_MOCKABLE_METHODS); + BOOST_PP_SEQ_FOR_EACH(YB_PG_CLIENT_MOCK_METHOD_SETTER_DECLARE, ~, YB_PG_CLIENT_MOCKABLE_METHODS); + + private: + PgClientServiceIf* impl_; + std::unordered_map mocks_; + rw_spinlock mutex_; + + Result DispatchMock( + const std::string& method, const void* req, void* resp, rpc::RpcContext* context); + Handle SetMock(const std::string& method, SharedFunctor&& mock); +}; + } // namespace tserver } // namespace yb diff --git a/src/yb/tserver/tablet_server.cc b/src/yb/tserver/tablet_server.cc index 8c3ee6379375..bc7e9080277d 100644 --- a/src/yb/tserver/tablet_server.cc +++ b/src/yb/tserver/tablet_server.cc @@ -233,6 +233,8 @@ DEFINE_RUNTIME_uint32(ysql_min_new_version_ignored_count, 10, DECLARE_bool(enable_pg_cron); +DEFINE_test_flag(bool, enable_pg_client_mock, false, "Enable mocking of PgClient service in tests"); + namespace yb::tserver { namespace { @@ -615,13 +617,25 @@ Status TabletServer::RegisterServices() { remote_bootstrap_service.get(); RETURN_NOT_OK(RegisterService( FLAGS_ts_remote_bootstrap_svc_queue_length, std::move(remote_bootstrap_service))); - auto pg_client_service = std::make_shared( - *this, tablet_manager_->client_future(), clock(), - std::bind(&TabletServer::TransactionPool, this), mem_tracker(), metric_entity(), messenger(), - permanent_uuid(), &options(), xcluster_context_.get(), &pg_node_level_mutation_counter_); - pg_client_service_ = pg_client_service; - LOG(INFO) << "yb::tserver::PgClientServiceImpl created at " << pg_client_service.get(); - RETURN_NOT_OK(RegisterService(FLAGS_pg_client_svc_queue_length, std::move(pg_client_service))); + + auto pg_client_service_holder = std::make_shared( + *this, tablet_manager_->client_future(), clock(), + std::bind(&TabletServer::TransactionPool, this), mem_tracker(), metric_entity(), + messenger(), permanent_uuid(), &options(), xcluster_context_.get(), + &pg_node_level_mutation_counter_); + PgClientServiceIf* pg_client_service_if = &pg_client_service_holder->impl; + LOG(INFO) << "yb::tserver::PgClientServiceImpl created at " << pg_client_service_if; + + if (PREDICT_FALSE(FLAGS_TEST_enable_pg_client_mock)) { + pg_client_service_holder->mock.emplace(metric_entity(), pg_client_service_if); + pg_client_service_if = &pg_client_service_holder->mock.value(); + LOG(INFO) << "Mock created for yb::tserver::PgClientServiceImpl"; + } + + pg_client_service_ = pg_client_service_holder; + RETURN_NOT_OK(RegisterService( + FLAGS_pg_client_svc_queue_length, std::shared_ptr( + std::move(pg_client_service_holder), pg_client_service_if))); if (FLAGS_TEST_echo_service_enabled) { auto test_echo_service = std::make_unique( @@ -1117,14 +1131,12 @@ void TabletServer::SetYsqlDBCatalogVersions( void TabletServer::WriteServerMetaCacheAsJson(JsonWriter* writer) { writer->StartObject(); + DbServerBase::WriteMainMetaCacheAsJson(writer); if (auto xcluster_consumer = GetXClusterConsumer()) { - auto clients = xcluster_consumer->GetYbClientsList(); - for (auto client : clients) { - writer->String(client->client_name()); - client->AddMetaCacheInfo(writer); - } + xcluster_consumer->WriteServerMetaCacheAsJson(*writer); } + writer->EndObject(); } @@ -1220,10 +1232,12 @@ Status TabletServer::ListMasterServers(const ListMasterServersRequestPB* req, void TabletServer::InvalidatePgTableCache() { auto pg_client_service = pg_client_service_.lock(); - if (pg_client_service) { - LOG(INFO) << "Invalidating all PgTableCache caches since catalog version incremented"; - pg_client_service->InvalidateTableCache(); + if (!pg_client_service) { + return; } + + LOG(INFO) << "Invalidating the entire PgTableCache cache since catalog version incremented"; + pg_client_service->impl.InvalidateTableCache(); } void TabletServer::InvalidatePgTableCache( @@ -1239,7 +1253,7 @@ void TabletServer::InvalidatePgTableCache( msg += Format("databases $0 are removed", yb::ToString(db_oids_deleted)); } LOG(INFO) << msg; - pg_client_service->InvalidateTableCache(db_oids_updated, db_oids_deleted); + pg_client_service->impl.InvalidateTableCache(db_oids_updated, db_oids_deleted); } } Status TabletServer::SetupMessengerBuilder(rpc::MessengerBuilder* builder) { diff --git a/src/yb/tserver/tablet_server.h b/src/yb/tserver/tablet_server.h index 821a93cfc80a..f2dd3cf6dced 100644 --- a/src/yb/tserver/tablet_server.h +++ b/src/yb/tserver/tablet_server.h @@ -58,6 +58,7 @@ #include "yb/master/master_heartbeat.pb.h" #include "yb/server/webserver_options.h" #include "yb/tserver/db_server_base.h" +#include "yb/tserver/pg_client_service.h" #include "yb/tserver/pg_mutation_counter.h" #include "yb/tserver/remote_bootstrap_service.h" #include "yb/tserver/tserver_shared_mem.h" @@ -337,7 +338,13 @@ class TabletServer : public DbServerBase, public TabletServerIf { std::string GetCertificateDetails() override; PgClientServiceImpl* TEST_GetPgClientService() { - return pg_client_service_.lock().get(); + auto holder = pg_client_service_.lock(); + return holder ? &holder->impl : nullptr; + } + + PgClientServiceMockImpl* TEST_GetPgClientServiceMock() { + auto holder = pg_client_service_.lock(); + return holder && holder->mock.has_value() ? &holder->mock.value() : nullptr; } RemoteBootstrapServiceImpl* GetRemoteBootstrapService() { @@ -366,6 +373,14 @@ class TabletServer : public DbServerBase, public TabletServerIf { void TEST_SetIsCronLeader(bool is_cron_leader); + struct PgClientServiceHolder { + template + explicit PgClientServiceHolder(Args&&... args) : impl(std::forward(args)...) {} + + PgClientServiceImpl impl; + std::optional mock; + }; + protected: virtual Status RegisterServices(); @@ -469,7 +484,7 @@ class TabletServer : public DbServerBase, public TabletServerIf { // An instance to pg client service. This pointer is no longer valid after RpcAndWebServerBase // is shut down. - std::weak_ptr pg_client_service_; + std::weak_ptr pg_client_service_; // Key to shared memory for ysql connection manager stats key_t ysql_conn_mgr_stats_shmem_key_ = 0; diff --git a/src/yb/tserver/tablet_service.cc b/src/yb/tserver/tablet_service.cc index 7cf9889856ce..93ebc949fb6e 100644 --- a/src/yb/tserver/tablet_service.cc +++ b/src/yb/tserver/tablet_service.cc @@ -1952,6 +1952,18 @@ void TabletServiceAdminImpl::CloneTablet( }); } +Result TabletServiceAdminImpl::GetLocalPgHostPort() { + HostPort local_pg_host_port; + if (!FLAGS_TEST_mini_cluster_pg_host_port.empty()) { + RETURN_NOT_OK(local_pg_host_port.ParseString( + FLAGS_TEST_mini_cluster_pg_host_port, pgwrapper::PgProcessConf::kDefaultPort)); + } else { + local_pg_host_port = server_->pgsql_proxy_bind_address(); + } + std::string unix_domain_socket = PgDeriveSocketDir(local_pg_host_port); + return HostPort(unix_domain_socket, local_pg_host_port.port()); +} + void TabletServiceAdminImpl::ClonePgSchema( const ClonePgSchemaRequestPB* req, ClonePgSchemaResponsePB* resp, rpc::RpcContext context) { auto status = DoClonePgSchema(req, resp); @@ -1965,17 +1977,9 @@ void TabletServiceAdminImpl::ClonePgSchema( Status TabletServiceAdminImpl::DoClonePgSchema( const ClonePgSchemaRequestPB* req, ClonePgSchemaResponsePB* resp) { // Run ysql_dump to generate the schema of the clone database as of restore time. - HostPort local_pg_host_port; - if (!FLAGS_TEST_mini_cluster_pg_host_port.empty()) { - RETURN_NOT_OK(local_pg_host_port.ParseString( - FLAGS_TEST_mini_cluster_pg_host_port, pgwrapper::PgProcessConf::kDefaultPort)); - } else { - local_pg_host_port = server_->pgsql_proxy_bind_address(); - } const std::string& target_db_name = req->target_db_name(); - std::string unix_domain_socket = PgDeriveSocketDir(local_pg_host_port); - HostPort local_hostport(unix_domain_socket, local_pg_host_port.port()); + auto local_hostport = VERIFY_RESULT(GetLocalPgHostPort()); YsqlDumpRunner ysql_dump_runner = VERIFY_RESULT(YsqlDumpRunner::GetYsqlDumpRunner(local_hostport)); std::string dump_output = VERIFY_RESULT(ysql_dump_runner.RunAndModifyForClone( @@ -1983,32 +1987,42 @@ Status TabletServiceAdminImpl::DoClonePgSchema( HybridTime(req->restore_ht()))); VLOG(2) << "Dump output: " << dump_output; - // Write the dump output to a file in order to execute it using ysqlsh. - std::unique_ptr dump_output_file; - std::string tmp_file_name; - RETURN_NOT_OK(Env::Default()->NewTempWritableFile( - WritableFileOptions(), target_db_name + "_ysql_dump_XXXXXX", &tmp_file_name, - &dump_output_file)); - RETURN_NOT_OK(dump_output_file->Append(dump_output)); - RETURN_NOT_OK(dump_output_file->Close()); - auto scope_exit = ScopeExit([tmp_file_name] { - if (Env::Default()->FileExists(tmp_file_name)) { - WARN_NOT_OK( - Env::Default()->DeleteFile(tmp_file_name), - Format("Failed to delete ysql_dump_file $0 as a cloning cleanup.", tmp_file_name)); - } - }); - // Execute the sql script to generate the PG database. - YsqlshRunner ysqlsh_runner = - VERIFY_RESULT(YsqlshRunner::GetYsqlshRunner(HostPort::FromPB(local_hostport))); - Result ysqlsh_output = VERIFY_RESULT(ysqlsh_runner.ExecuteSqlScript(tmp_file_name)); + YsqlshRunner ysqlsh_runner = VERIFY_RESULT(YsqlshRunner::GetYsqlshRunner(local_hostport)); + RETURN_NOT_OK(ysqlsh_runner.ExecuteSqlScript(dump_output, "ysql_dump" /* tmp_file_prefix */)); LOG(INFO) << Format( "Clone Pg Schema Objects for source database: $0 to clone database: $1 done successfully", req->source_db_name(), target_db_name); return Status::OK(); } +void TabletServiceAdminImpl::EnableDbConns( + const EnableDbConnsRequestPB* req, EnableDbConnsResponsePB* resp, + rpc::RpcContext context) { + auto status = DoEnableDbConns(req, resp); + if (!status.ok()) { + SetupErrorAndRespond(resp->mutable_error(), status, &context); + } else { + context.RespondSuccess(); + } +} + +Status TabletServiceAdminImpl::DoEnableDbConns( + const EnableDbConnsRequestPB* req, EnableDbConnsResponsePB* resp) { + const std::string script = Format( + "SET yb_non_ddl_txn_for_sys_tables_allowed = true;\n" + "UPDATE pg_database SET datallowconn = true WHERE datname = '$0'", req->target_db_name()); + + auto local_hostport = VERIFY_RESULT(GetLocalPgHostPort()); + YsqlshRunner ysqlsh_runner = + VERIFY_RESULT(YsqlshRunner::GetYsqlshRunner(HostPort::FromPB(local_hostport))); + RETURN_NOT_OK(ysqlsh_runner.ExecuteSqlScript(script, "enable_connections" /* tmp_file_prefix */)); + + LOG(INFO) << Format( + "Successfully enabled connections to clone target database $0", req->target_db_name()); + return Status::OK(); +} + void TabletServiceAdminImpl::UpgradeYsql( const UpgradeYsqlRequestPB* req, UpgradeYsqlResponsePB* resp, diff --git a/src/yb/tserver/tablet_service.h b/src/yb/tserver/tablet_service.h index 288464b8171a..eb15f8ebc2a2 100644 --- a/src/yb/tserver/tablet_service.h +++ b/src/yb/tserver/tablet_service.h @@ -322,6 +322,10 @@ class TabletServiceAdminImpl : public TabletServerAdminServiceIf { const ClonePgSchemaRequestPB* req, ClonePgSchemaResponsePB* resp, rpc::RpcContext context) override; + void EnableDbConns( + const EnableDbConnsRequestPB* req, EnableDbConnsResponsePB* resp, + rpc::RpcContext context) override; + void TestRetry( const TestRetryRequestPB* req, TestRetryResponsePB* resp, rpc::RpcContext context) override; @@ -331,8 +335,13 @@ class TabletServiceAdminImpl : public TabletServerAdminServiceIf { Status DoCreateTablet( const CreateTabletRequestPB* req, CreateTabletResponsePB* resp, const MonoDelta& timeout); + Result GetLocalPgHostPort(); + Status DoClonePgSchema(const ClonePgSchemaRequestPB* req, ClonePgSchemaResponsePB* resp); + Status DoEnableDbConns( + const EnableDbConnsRequestPB* req, EnableDbConnsResponsePB* resp); + Status SetupCDCSDKRetention( const tablet::ChangeMetadataRequestPB* req, ChangeMetadataResponsePB* resp, const tablet::TabletPeerPtr& peer); diff --git a/src/yb/tserver/tserver_admin.proto b/src/yb/tserver/tserver_admin.proto index 8e88347dc32b..a88aa5b2ec7b 100644 --- a/src/yb/tserver/tserver_admin.proto +++ b/src/yb/tserver/tserver_admin.proto @@ -354,7 +354,15 @@ message ClonePgSchemaRequestPB { optional bytes target_owner = 5; } -message ClonePgSchemaResponsePB{ +message ClonePgSchemaResponsePB { + optional TabletServerErrorPB error = 1; +} + +message EnableDbConnsRequestPB { + optional bytes target_db_name = 1; +} + +message EnableDbConnsResponsePB { optional TabletServerErrorPB error = 1; } @@ -408,6 +416,8 @@ service TabletServerAdminService { // Create PG objects (database, tables and indexes) of the clone database. rpc ClonePgSchema(ClonePgSchemaRequestPB) returns (ClonePgSchemaResponsePB); + rpc EnableDbConns(EnableDbConnsRequestPB) returns (EnableDbConnsResponsePB); + // For test purposes: returns TryAgain error until specified number of calls is done. rpc TestRetry(TestRetryRequestPB) returns (TestRetryResponsePB); } diff --git a/src/yb/tserver/xcluster_consumer.cc b/src/yb/tserver/xcluster_consumer.cc index 5fdbcfeccc3a..0e56228d82a2 100644 --- a/src/yb/tserver/xcluster_consumer.cc +++ b/src/yb/tserver/xcluster_consumer.cc @@ -11,12 +11,11 @@ // under the License. // -#include - #include "yb/cdc/xcluster_types.h" -#include "yb/cdc/xcluster_util.h" +#include "yb/client/error.h" #include "yb/client/session.h" #include "yb/client/table_handle.h" +#include "yb/client/xcluster_client.h" #include "yb/client/yb_op.h" #include "yb/client/yb_table_name.h" @@ -25,9 +24,7 @@ #include "yb/master/master_defaults.h" #include "yb/master/master_heartbeat.pb.h" -#include "yb/rpc/messenger.h" #include "yb/rpc/rpc.h" -#include "yb/rpc/secure_stream.h" #include "yb/tserver/xcluster_consumer.h" #include "yb/tserver/tserver_xcluster_context_if.h" #include "yb/tserver/xcluster_consumer_auto_flags_info.h" @@ -36,18 +33,15 @@ #include "yb/cdc/cdc_consumer.pb.h" -#include "yb/client/error.h" #include "yb/client/client.h" #include "yb/rocksdb/rate_limiter.h" #include "yb/gutil/map-util.h" -#include "yb/rpc/secure.h" #include "yb/util/callsite_profiling.h" #include "yb/util/flags.h" #include "yb/util/logging.h" -#include "yb/util/path_util.h" #include "yb/util/shared_lock.h" #include "yb/util/size_literals.h" #include "yb/util/status_log.h" @@ -103,11 +97,6 @@ DEFINE_test_flag(bool, xcluster_disable_delete_old_pollers, false, DEFINE_test_flag(bool, xcluster_enable_ddl_replication, false, "Enables xCluster automatic DDL replication."); -DECLARE_int32(cdc_read_rpc_timeout_ms); -DECLARE_int32(cdc_write_rpc_timeout_ms); -DECLARE_bool(use_node_to_node_encryption); -DECLARE_string(certs_for_cdc_dir); - using namespace std::chrono_literals; #define ACQUIRE_SHARED_LOCK_IF_ONLINE \ @@ -122,21 +111,6 @@ namespace yb { namespace tserver { -XClusterClient::~XClusterClient() { - if (messenger) { - messenger->Shutdown(); - } -} - -void XClusterClient::Shutdown() { - if (client) { - client->Shutdown(); - } - if (messenger) { - messenger->Shutdown(); - } -} - Result> CreateXClusterConsumer( std::function get_leader_term, const std::string& ts_uuid, client::YBClient& local_client, ConnectToPostgresFunc connect_to_pg_func, @@ -192,15 +166,14 @@ XClusterConsumer::~XClusterConsumer() { } Status XClusterConsumer::Init() { - // TODO(NIC): Unify xcluster_consumer thread_pool & remote_client_ threadpools RETURN_NOT_OK(yb::Thread::Create( "XClusterConsumer", "Poll", &XClusterConsumer::RunThread, this, &run_trigger_poll_thread_)); - ThreadPoolBuilder cdc_consumer_thread_pool_builder("XClusterConsumerHandler"); + ThreadPoolBuilder thread_pool_builder("XClusterConsumerHandler"); if (FLAGS_xcluster_consumer_thread_pool_size > 0) { - cdc_consumer_thread_pool_builder.set_max_threads(FLAGS_xcluster_consumer_thread_pool_size); + thread_pool_builder.set_max_threads(FLAGS_xcluster_consumer_thread_pool_size); } - return cdc_consumer_thread_pool_builder.Build(&thread_pool_); + return thread_pool_builder.Build(&thread_pool_); } void XClusterConsumer::Shutdown() { @@ -352,13 +325,12 @@ void XClusterConsumer::HandleMasterHeartbeatResponse( std::vector hp; HostPortsFromPBs(producer_entry_pb.master_addrs(), &hp); - auto master_addrs = HostPort::ToCommaSeparatedString(std::move(hp)); if (ContainsKey(old_uuid_master_addrs, replication_group_id) && - old_uuid_master_addrs[replication_group_id] != master_addrs) { + old_uuid_master_addrs[replication_group_id] != hp) { // If master addresses changed, mark for YBClient update. changed_master_addrs_.insert(replication_group_id); } - uuid_master_addrs_[replication_group_id] = std::move(master_addrs); + uuid_master_addrs_[replication_group_id] = std::move(hp); UpdateReplicationGroupInMemState(replication_group_id, producer_entry_pb); } @@ -495,7 +467,7 @@ void XClusterConsumer::TriggerPollForNewTablets() { // Update the Master Addresses, if altered after setup. if (ContainsKey(remote_clients_, replication_group_id) && changed_master_addrs_.count(replication_group_id) > 0) { - auto status = remote_clients_[replication_group_id]->client->SetMasterAddresses( + auto status = remote_clients_[replication_group_id]->SetMasterAddresses( uuid_master_addrs_[replication_group_id]); if (status.ok()) { changed_master_addrs_.erase(replication_group_id); @@ -515,53 +487,20 @@ void XClusterConsumer::TriggerPollForNewTablets() { if (start_polling) { // This is a new tablet, trigger a poll. // See if we need to create a new client connection - if (!ContainsKey(remote_clients_, replication_group_id)) { - CHECK(ContainsKey(uuid_master_addrs_, replication_group_id)); - - auto remote_client = std::make_unique(); - std::string dir; - if (FLAGS_use_node_to_node_encryption) { - rpc::MessengerBuilder messenger_builder("xcluster-consumer"); - if (!FLAGS_certs_for_cdc_dir.empty()) { - dir = JoinPathSegments( - FLAGS_certs_for_cdc_dir, - xcluster::GetOriginalReplicationGroupId(replication_group_id).ToString()); - } - - auto secure_context_result = rpc::SetupSecureContext( - dir, /*root_dir=*/"", /*name=*/"", rpc::SecureContextType::kInternal, - &messenger_builder); - if (!secure_context_result.ok()) { - LOG(WARNING) << "Could not create secure context for " << replication_group_id << ": " - << secure_context_result.status().ToString(); - return; // Don't finish creation. Try again on the next heartbeat. - } - remote_client->secure_context = std::move(*secure_context_result); - - auto messenger_result = messenger_builder.Build(); - if (!messenger_result.ok()) { - LOG(WARNING) << "Could not build messenger for " << replication_group_id << ": " - << secure_context_result.status().ToString(); - return; // Don't finish creation. Try again on the next heartbeat. - } - remote_client->messenger = std::move(*messenger_result); + if (!remote_clients_.contains(replication_group_id)) { + if (!uuid_master_addrs_.contains(replication_group_id)) { + LOG(DFATAL) << "Master address not found for " << replication_group_id; + return; // Don't finish creation. Try again on the next heartbeat. } - auto client_result = - yb::client::YBClientBuilder() - .set_client_name("XClusterConsumerRemote") - .add_master_server_addr(uuid_master_addrs_[replication_group_id]) - .skip_master_flagfile() - .default_rpc_timeout(MonoDelta::FromMilliseconds(FLAGS_cdc_read_rpc_timeout_ms)) - .Build(remote_client->messenger.get()); - if (!client_result.ok()) { - LOG(WARNING) << "Could not create a new YBClient for " << replication_group_id << ": " - << client_result.status().ToString(); + auto remote_client = client::XClusterRemoteClientHolder::Create( + replication_group_id, uuid_master_addrs_[replication_group_id]); + if (!remote_client) { + LOG(WARNING) << "Could not build messenger for " << replication_group_id << ": " + << remote_client.status(); return; // Don't finish creation. Try again on the next heartbeat. } - - remote_client->client = std::move(*client_result); - remote_clients_[replication_group_id] = std::move(remote_client); + remote_clients_[replication_group_id] = std::move(*remote_client); } SchemaVersion last_compatible_consumer_schema_version = cdc::kInvalidSchemaVersion; @@ -644,7 +583,7 @@ void XClusterConsumer::UpdatePollerSchemaVersionMaps( void XClusterConsumer::TriggerDeletionOfOldPollers() { // Shutdown outside of master_data_mutex_ lock, to not block any heartbeats. - std::vector> clients_to_delete; + std::vector> clients_to_delete; std::vector> pollers_to_shutdown; { ACQUIRE_SHARED_LOCK_IF_ONLINE; @@ -736,19 +675,8 @@ int32_t XClusterConsumer::cluster_config_version() const { Status XClusterConsumer::ReloadCertificates() { SharedLock read_lock(pollers_map_mutex_); - for (const auto& [replication_group_id, client] : remote_clients_) { - if (!client->secure_context) { - continue; - } - - std::string cert_dir; - if (!FLAGS_certs_for_cdc_dir.empty()) { - cert_dir = JoinPathSegments( - FLAGS_certs_for_cdc_dir, - xcluster::GetOriginalReplicationGroupId(replication_group_id).ToString()); - } - RETURN_NOT_OK(rpc::ReloadSecureContextKeysAndCertificates( - client->secure_context.get(), cert_dir, "" /* node_name */)); + for (const auto& [_, client] : remote_clients_) { + RETURN_NOT_OK(client->ReloadCertificates()); } return Status::OK(); @@ -870,17 +798,17 @@ Status XClusterConsumer::ReportNewAutoFlagConfigVersion( void XClusterConsumer::ClearAllClientMetaCaches() const { std::lock_guard write_lock_pollers(pollers_map_mutex_); for (auto& [group_id, xcluster_client] : remote_clients_) { - xcluster_client->client->ClearAllMetaCachesOnServer(); + xcluster_client->GetYbClient().ClearAllMetaCachesOnServer(); } } -std::vector> XClusterConsumer::GetYbClientsList() const { +void XClusterConsumer::WriteServerMetaCacheAsJson(JsonWriter& writer) const { SharedLock read_lock(pollers_map_mutex_); - std::vector> result; - for (auto& [_, remote_client] : remote_clients_) { - result.push_back(remote_client->client); + for (const auto& [_, remote_client] : remote_clients_) { + const auto& client = remote_client->GetYbClient(); + writer.String(client.client_name()); + client.AddMetaCacheInfo(&writer); } - return result; } } // namespace tserver diff --git a/src/yb/tserver/xcluster_consumer.h b/src/yb/tserver/xcluster_consumer.h index f3b43468a4ab..0a4af3849eaf 100644 --- a/src/yb/tserver/xcluster_consumer.h +++ b/src/yb/tserver/xcluster_consumer.h @@ -43,22 +43,18 @@ class RateLimiter; } // namespace rocksdb namespace yb { +class HostPort; class Thread; class ThreadPool; namespace rpc { class Messenger; class Rpcs; -class SecureContext; } // namespace rpc -namespace cdc { -class ConsumerRegistryPB; -} // namespace cdc - -namespace master { -class TSHeartbeatRequestPB; -} // namespace master +namespace client { +class XClusterRemoteClientHolder; +} // namespace client namespace tserver { class AutoFlagsVersionHandler; @@ -66,15 +62,6 @@ class XClusterPoller; class TabletServer; class TserverXClusterContextIf; -struct XClusterClient { - std::unique_ptr messenger; - std::unique_ptr secure_context; - std::shared_ptr client; - - ~XClusterClient(); - void Shutdown(); -}; - class XClusterConsumer : public XClusterConsumerIf { public: XClusterConsumer( @@ -110,7 +97,7 @@ class XClusterConsumer : public XClusterConsumerIf { return TEST_num_successful_write_rpcs_.load(std::memory_order_acquire); } - std::vector> GetYbClientsList() const override; + void WriteServerMetaCacheAsJson(JsonWriter& writer) const override; Status ReloadCertificates() override; @@ -235,9 +222,10 @@ class XClusterConsumer : public XClusterConsumerIf { client::YBClient& local_client_; // map: {replication_group_id : ...}. - std::unordered_map> remote_clients_ - GUARDED_BY(pollers_map_mutex_); - std::unordered_map uuid_master_addrs_ + std::unordered_map< + xcluster::ReplicationGroupId, std::shared_ptr> + remote_clients_ GUARDED_BY(pollers_map_mutex_); + std::unordered_map> uuid_master_addrs_ GUARDED_BY(master_data_mutex_); std::unordered_set changed_master_addrs_ GUARDED_BY(master_data_mutex_); diff --git a/src/yb/tserver/xcluster_consumer_if.h b/src/yb/tserver/xcluster_consumer_if.h index 37de9ceee113..bcc70d85bd9a 100644 --- a/src/yb/tserver/xcluster_consumer_if.h +++ b/src/yb/tserver/xcluster_consumer_if.h @@ -67,7 +67,7 @@ class XClusterConsumerIf { virtual std::vector TEST_producer_tablets_running() const = 0; virtual uint32_t TEST_GetNumSuccessfulWriteRpcs() = 0; virtual std::vector> TEST_ListPollers() const = 0; - virtual std::vector> GetYbClientsList() const = 0; + virtual void WriteServerMetaCacheAsJson(JsonWriter& writer) const = 0; virtual void ClearAllClientMetaCaches() const = 0; virtual scoped_refptr TEST_metric_replication_error_count() const = 0; virtual scoped_refptr TEST_metric_apply_failure_count() const = 0; diff --git a/src/yb/tserver/xcluster_poller.cc b/src/yb/tserver/xcluster_poller.cc index f38bfd05c3c9..5a35bfaf46b0 100644 --- a/src/yb/tserver/xcluster_poller.cc +++ b/src/yb/tserver/xcluster_poller.cc @@ -13,6 +13,7 @@ #include "yb/tserver/xcluster_poller.h" #include "yb/client/client_fwd.h" +#include "yb/client/xcluster_client.h" #include "yb/common/wire_protocol.h" #include "yb/gutil/strings/split.h" #include "yb/tserver/xcluster_consumer.h" @@ -112,9 +113,9 @@ XClusterPoller::XClusterPoller( const NamespaceId& consumer_namespace_id, std::shared_ptr auto_flags_version, ThreadPool* thread_pool, rpc::Rpcs* rpcs, client::YBClient& local_client, - const std::shared_ptr& producer_client, XClusterConsumer* xcluster_consumer, - SchemaVersion last_compatible_consumer_schema_version, int64_t leader_term, - std::function get_leader_term) + const std::shared_ptr& source_client, + XClusterConsumer* xcluster_consumer, SchemaVersion last_compatible_consumer_schema_version, + int64_t leader_term, std::function get_leader_term) : XClusterAsyncExecutor(thread_pool, local_client.messenger(), rpcs), producer_tablet_info_(producer_tablet_info), consumer_tablet_info_(consumer_tablet_info), @@ -128,7 +129,7 @@ XClusterPoller::XClusterPoller( last_compatible_consumer_schema_version_(last_compatible_consumer_schema_version), get_leader_term_(std::move(get_leader_term)), local_client_(local_client), - producer_client_(producer_client), + source_client_(source_client), xcluster_consumer_(xcluster_consumer), producer_safe_time_(HybridTime::kInvalid) { DCHECK_NE(GetLeaderTerm(), yb::OpId::kUnknownTerm); @@ -377,7 +378,7 @@ void XClusterPoller::DoPoll() { *handle = rpc::xcluster::CreateGetChangesRpc( CoarseMonoClock::now() + MonoDelta::FromMilliseconds(FLAGS_cdc_read_rpc_timeout_ms), nullptr, /* RemoteTablet: will get this from 'req' */ - producer_client_->client.get(), &req, + &source_client_->GetYbClient(), &req, [weak_ptr = weak_from_this(), this, handle, rpcs = rpcs_]( const Status& status, cdc::GetChangesResponsePB&& resp) { RpcCallback( diff --git a/src/yb/tserver/xcluster_poller.h b/src/yb/tserver/xcluster_poller.h index c0021d0e2e48..dd9e7b8b00c4 100644 --- a/src/yb/tserver/xcluster_poller.h +++ b/src/yb/tserver/xcluster_poller.h @@ -41,6 +41,10 @@ class CDCServiceProxy; } // namespace cdc +namespace client { +class XClusterRemoteClientHolder; +} // namespace client + namespace tserver { class AutoFlagsCompatibleVersion; @@ -55,9 +59,9 @@ class XClusterPoller : public XClusterAsyncExecutor { const NamespaceId& consumer_namespace_id, std::shared_ptr auto_flags_version, ThreadPool* thread_pool, rpc::Rpcs* rpcs, client::YBClient& local_client, - const std::shared_ptr& producer_client, XClusterConsumer* xcluster_consumer, - SchemaVersion last_compatible_consumer_schema_version, int64_t leader_term, - std::function get_leader_term); + const std::shared_ptr& source_client, + XClusterConsumer* xcluster_consumer, SchemaVersion last_compatible_consumer_schema_version, + int64_t leader_term, std::function get_leader_term); ~XClusterPoller(); void Init(bool use_local_tserver, rocksdb::RateLimiter* rate_limiter); @@ -161,7 +165,7 @@ class XClusterPoller : public XClusterAsyncExecutor { client::YBClient& local_client_; std::shared_ptr output_client_; - std::shared_ptr producer_client_; + std::shared_ptr source_client_; std::shared_ptr ddl_queue_handler_; // Unsafe to use after shutdown. diff --git a/src/yb/util/ysql_binary_runner.cc b/src/yb/util/ysql_binary_runner.cc index 44430504a6c7..fcb8ed5857af 100644 --- a/src/yb/util/ysql_binary_runner.cc +++ b/src/yb/util/ysql_binary_runner.cc @@ -14,6 +14,7 @@ #include #include "yb/util/env.h" +#include "yb/util/scope_exit.h" #include "yb/util/subprocess.h" #include "yb/util/ysql_binary_runner.h" @@ -77,6 +78,12 @@ const boost::regex UNQUOTED_DATABASE_RE("(^.*)\\s+DATABASE\\s+(\\S+)\\s+(.*)$"); const boost::regex QUOTED_CONNECT_RE("^\\\\connect -reuse-previous=on \"dbname='(.*)'\"$"); const boost::regex UNQUOTED_CONNECT_RE("^\\\\connect\\s+(\\S+)$"); const boost::regex TABLESPACE_RE("^\\s*SET\\s+default_tablespace\\s*=.*$"); + +std::string MakeDisallowConnectionsString(const std::string& new_db) { + return Format( + "SET yb_non_ddl_txn_for_sys_tables_allowed = true;\n" + "UPDATE pg_database SET datallowconn = false WHERE datname = '$0';", new_db); +} } // namespace std::string YsqlDumpRunner::ModifyLine( @@ -94,12 +101,14 @@ std::string YsqlDumpRunner::ModifyLine( values.clear(); if (boost::regex_split(std::back_inserter(values), modified_line, QUOTED_CONNECT_RE)) { std::string s = boost::replace_all_copy(new_db, "'", "\\'"); - return "\\connect -reuse-previous=on \"dbname='" + s + "'\""; + return "\\connect -reuse-previous=on \"dbname='" + s + "'\"" + "\n" + + MakeDisallowConnectionsString(new_db); } values.clear(); if (boost::regex_split(std::back_inserter(values), modified_line, UNQUOTED_CONNECT_RE)) { std::string s = boost::replace_all_copy(new_db, "'", "\\'"); - return "\\connect -reuse-previous=on \"dbname='" + s + "'\""; + return "\\connect -reuse-previous=on \"dbname='" + s + "'\"" + "\n" + + MakeDisallowConnectionsString(new_db); } return modified_line; } @@ -108,9 +117,25 @@ std::string YsqlDumpRunner::ModifyLine( // Class YsqlshRunner. // ============================================================================ -Result YsqlshRunner::ExecuteSqlScript(const std::string& sql_script_path) { - std::vector args = {"--file=" + sql_script_path}; - return VERIFY_RESULT(this->Run(args)); +Result YsqlshRunner::ExecuteSqlScript( + const std::string& sql_script, const std::string& tmp_file_prefix) { + // Write the dump output to a file in order to execute it using ysqlsh. + std::unique_ptr script_file; + std::string tmp_file_name; + RETURN_NOT_OK(Env::Default()->NewTempWritableFile( + WritableFileOptions(), tmp_file_prefix + "_XXXXXX", &tmp_file_name, &script_file)); + RETURN_NOT_OK(script_file->Append(sql_script)); + RETURN_NOT_OK(script_file->Close()); + auto scope_exit = ScopeExit([tmp_file_name] { + if (Env::Default()->FileExists(tmp_file_name)) { + WARN_NOT_OK( + Env::Default()->DeleteFile(tmp_file_name), + Format("Failed to delete temporary sql script file $0.", tmp_file_name)); + } + }); + + std::vector args = {"--file=" + tmp_file_name, "--set", "ON_ERROR_STOP=on"}; + return this->Run(args); } } // namespace yb diff --git a/src/yb/util/ysql_binary_runner.h b/src/yb/util/ysql_binary_runner.h index 60196512d5cd..971d068213ea 100644 --- a/src/yb/util/ysql_binary_runner.h +++ b/src/yb/util/ysql_binary_runner.h @@ -67,7 +67,8 @@ class YsqlshRunner : public YsqlBinaryRunner { return YsqlshRunner(tool_path, pg_host_port); } - Result ExecuteSqlScript(const std::string& sql_script_path); + Result ExecuteSqlScript( + const std::string& sql_script, const std::string& file_prefix); private: YsqlshRunner(std::string tool_path, HostPort pg_host_port) diff --git a/src/yb/yql/pggate/pg_client.cc b/src/yb/yql/pggate/pg_client.cc index a47de76a14f4..20f33cd91c4a 100644 --- a/src/yb/yql/pggate/pg_client.cc +++ b/src/yb/yql/pggate/pg_client.cc @@ -65,6 +65,7 @@ DECLARE_bool(TEST_ash_fetch_wait_states_for_raft_log); DECLARE_bool(TEST_ash_fetch_wait_states_for_rocksdb_flush_and_compaction); DECLARE_bool(TEST_export_wait_state_names); DECLARE_bool(ysql_enable_db_catalog_version_mode); +DECLARE_int32(ysql_yb_ash_sample_size); extern int yb_locks_min_txn_age; extern int yb_locks_max_transactions; @@ -1134,6 +1135,7 @@ class PgClient::Impl : public BigDataFetcher { req.set_fetch_cql_states(true); req.set_ignore_ash_and_perform_calls(true); req.set_export_wait_state_code_as_string(FLAGS_TEST_export_wait_state_names); + req.set_sample_size(FLAGS_ysql_yb_ash_sample_size); tserver::PgActiveSessionHistoryResponsePB resp; RETURN_NOT_OK(proxy_->ActiveSessionHistory(req, &resp, PrepareController())); diff --git a/src/yb/yql/pggate/util/ybc_util.cc b/src/yb/yql/pggate/util/ybc_util.cc index 416783f17879..2cc47c05e48e 100644 --- a/src/yb/yql/pggate/util/ybc_util.cc +++ b/src/yb/yql/pggate/util/ybc_util.cc @@ -487,6 +487,11 @@ uint8_t YBCGetQueryIdForCatalogRequests() { return static_cast(ash::FixedQueryId::kQueryIdForCatalogRequests); } +// Get a random integer between a and b +int YBCGetRandomUniformInt(int a, int b) { + return RandomUniformInt(a, b); +} + int YBCGetCallStackFrames(void** result, int max_depth, int skip_count) { return google::GetStackTrace(result, max_depth, skip_count); } diff --git a/src/yb/yql/pggate/util/ybc_util.h b/src/yb/yql/pggate/util/ybc_util.h index 6379a9681524..a26dcc0e683a 100644 --- a/src/yb/yql/pggate/util/ybc_util.h +++ b/src/yb/yql/pggate/util/ybc_util.h @@ -336,6 +336,7 @@ const char* YBCGetWaitEventClass(uint32_t wait_event_info); const char* YBCGetWaitEventComponent(uint32_t wait_event_info); const char* YBCGetWaitEventType(uint32_t wait_event_info); uint8_t YBCGetQueryIdForCatalogRequests(); +int YBCGetRandomUniformInt(int a, int b); int YBCGetCallStackFrames(void** result, int max_depth, int skip_count); diff --git a/src/yb/yql/pggate/ybc_pg_typedefs.h b/src/yb/yql/pggate/ybc_pg_typedefs.h index e7b671bedede..8cdbd6262601 100644 --- a/src/yb/yql/pggate/ybc_pg_typedefs.h +++ b/src/yb/yql/pggate/ybc_pg_typedefs.h @@ -727,7 +727,7 @@ typedef struct AshSample { // If a certain number of samples are available and we capture a portion of // them, the sample weight is the reciprocal of the captured portion or 1, // whichever is maximum. - double sample_weight; + float sample_weight; // Timestamp when the sample was captured. uint64_t sample_time; diff --git a/src/yb/yql/pggate/ybc_pggate.cc b/src/yb/yql/pggate/ybc_pggate.cc index 89344120b90e..60bb98058bcd 100644 --- a/src/yb/yql/pggate/ybc_pggate.cc +++ b/src/yb/yql/pggate/ybc_pggate.cc @@ -349,7 +349,7 @@ void AshCopyAuxInfo( void AshCopyTServerSample( YBCAshSample* cb_sample, uint32_t component, const WaitStateInfoPB& tserver_sample, - uint64_t sample_time) { + uint64_t sample_time, float sample_weight) { auto* cb_metadata = &cb_sample->metadata; const auto& tserver_metadata = tserver_sample.metadata(); @@ -359,7 +359,7 @@ void AshCopyTServerSample( cb_sample->rpc_request_id = tserver_metadata.rpc_request_id(); cb_sample->encoded_wait_event_code = AshEncodeWaitStateCodeWithComponent(component, tserver_sample.wait_state_code()); - cb_sample->sample_weight = 1; // TODO: Change this once sampling is done at tserver side + cb_sample->sample_weight = sample_weight; cb_sample->sample_time = sample_time; std::memcpy(cb_metadata->root_request_id, @@ -382,7 +382,8 @@ void AshCopyTServerSamples( YBCAshGetNextCircularBufferSlot get_cb_slot_fn, const tserver::WaitStatesPB& samples, uint64_t sample_time) { for (const auto& sample : samples.wait_states()) { - AshCopyTServerSample(get_cb_slot_fn(), samples.component(), sample, sample_time); + AshCopyTServerSample(get_cb_slot_fn(), samples.component(), sample, sample_time, + samples.sample_weight()); } } @@ -2325,8 +2326,6 @@ void YBCStoreTServerAshSamples( LOG(ERROR) << result.status(); } else { AshCopyTServerSamples(get_cb_slot_fn, result->tserver_wait_states(), sample_time); - AshCopyTServerSamples(get_cb_slot_fn, result->flush_and_compaction_wait_states(), sample_time); - AshCopyTServerSamples(get_cb_slot_fn, result->raft_log_appender_wait_states(), sample_time); AshCopyTServerSamples(get_cb_slot_fn, result->cql_wait_states(), sample_time); } } diff --git a/src/yb/yql/pgwrapper/pg_mini-test.cc b/src/yb/yql/pgwrapper/pg_mini-test.cc index 1a4b60657e37..46f26a5f4745 100644 --- a/src/yb/yql/pgwrapper/pg_mini-test.cc +++ b/src/yb/yql/pgwrapper/pg_mini-test.cc @@ -63,6 +63,8 @@ #include "yb/util/test_thread_holder.h" #include "yb/util/tsan_util.h" +#include "yb/rpc/rpc_context.h" + #include "yb/yql/pggate/pggate_flags.h" #include "yb/yql/pgwrapper/pg_mini_test_base.h" @@ -79,6 +81,7 @@ DECLARE_bool(flush_rocksdb_on_shutdown); DECLARE_bool(enable_wait_queues); DECLARE_bool(pg_client_use_shared_memory); DECLARE_bool(ysql_yb_enable_replica_identity); +DECLARE_bool(TEST_enable_pg_client_mock); DECLARE_double(TEST_respond_write_failed_probability); DECLARE_double(TEST_transaction_ignore_applying_probability); @@ -119,6 +122,7 @@ DECLARE_uint64(pg_client_heartbeat_interval_ms); DECLARE_bool(ysql_yb_ash_enable_infra); DECLARE_bool(ysql_yb_enable_ash); +DECLARE_int32(ysql_yb_ash_sample_size); METRIC_DECLARE_entity(tablet); METRIC_DECLARE_gauge_uint64(aborted_transactions_pending_cleanup); @@ -452,18 +456,19 @@ TEST_F_EX(PgMiniTest, YB_DISABLE_TEST_IN_TSAN(Ash), PgMiniAshTest) { req.set_fetch_tserver_states(true); req.set_fetch_flush_and_compaction_states(true); req.set_fetch_cql_states(true); + req.set_sample_size(FLAGS_ysql_yb_ash_sample_size); tserver::PgActiveSessionHistoryResponsePB resp; rpc::RpcController controller; std::unordered_map method_counts; int calls_without_aux_info_details = 0; for (int i = 0; i < kNumCalls; ++i) { ASSERT_OK(pg_proxy->ActiveSessionHistory(req, &resp, &controller)); - VLOG(1) << "Call " << i << " got " << yb::ToString(resp); + VLOG(0) << "Call " << i << " got " << yb::ToString(resp); controller.Reset(); SleepFor(10ms); int idx = 0; for (auto& entry : resp.tserver_wait_states().wait_states()) { - VLOG(2) << "Entry " << ++idx << " : " << yb::ToString(entry); + VLOG(0) << "Entry " << ++idx << " : " << yb::ToString(entry); if (entry.has_aux_info() && entry.aux_info().has_method()) { ++method_counts[entry.aux_info().method()]; } else { @@ -2232,4 +2237,56 @@ TEST_F_EX(PgMiniTest, DISABLED_ReadsDuringRBS, PgMiniStreamCompressionTest) { thread_holder.Stop(); } +Status MockAbortFailure( + const yb::tserver::PgFinishTransactionRequestPB* req, + yb::tserver::PgFinishTransactionResponsePB* resp, yb::rpc::RpcContext* context) { + LOG(INFO) << "FinishTransaction called for session: " << req->session_id(); + + if (req->session_id() == 1) { + context->CloseConnection(); + // The return status should not matter here. + return Status::OK(); + } else if (req->session_id() == 2) { + return STATUS(NetworkError, "Mocking network failure on FinishTransaction"); + } + + return Status::OK(); +} + +class PgRecursiveAbortTest : public PgMiniTestSingleNode { + public: + void SetUp() override { + ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_enable_pg_client_mock) = true; + PgMiniTest::SetUp(); + } + + template + tserver::PgClientServiceMockImpl::Handle MockFinishTransaction(const F& mock) { + auto* client = cluster_->mini_tablet_server(0)->server()->TEST_GetPgClientServiceMock(); + return client->MockFinishTransaction(mock); + } +}; + +TEST_F(PgRecursiveAbortTest, AbortOnTserverFailure) { + PGConn conn1 = ASSERT_RESULT(Connect()); + ASSERT_OK(conn1.Execute("CREATE TABLE t1 (k INT)")); + + // Validate that "connection refused" from tserver during a transaction does not produce a PANIC. + ASSERT_OK(conn1.StartTransaction(SNAPSHOT_ISOLATION)); + // Run a command to ensure that the transaction is created in the backend. + ASSERT_OK(conn1.Execute("INSERT INTO t1 VALUES (1)")); + auto handle = MockFinishTransaction(MockAbortFailure); + auto status = conn1.Execute("CREATE TABLE t2 (k INT)"); + ASSERT_TRUE(status.IsNetworkError()); + ASSERT_EQ(conn1.ConnStatus(), CONNECTION_BAD); + + // Validate that aborting a transaction does not produce a PANIC. + PGConn conn2 = ASSERT_RESULT(Connect()); + ASSERT_OK(conn2.StartTransaction(SNAPSHOT_ISOLATION)); + ASSERT_OK(conn2.Execute("INSERT INTO t1 VALUES (1)")); + status = conn2.Execute("ABORT"); + ASSERT_TRUE(status.IsNetworkError()); + ASSERT_EQ(conn1.ConnStatus(), CONNECTION_BAD); +} + } // namespace yb::pgwrapper diff --git a/src/yb/yql/pgwrapper/pg_mini_test_base.cc b/src/yb/yql/pgwrapper/pg_mini_test_base.cc index 6556fd67c4ce..820b915bacac 100644 --- a/src/yb/yql/pgwrapper/pg_mini_test_base.cc +++ b/src/yb/yql/pgwrapper/pg_mini_test_base.cc @@ -173,8 +173,10 @@ PGConnSettings PgMiniTestBase::MakeConnSettings(const std::string& dbname) const }; } -Result PgMiniTestBase::ConnectToDB(const std::string& dbname) const { - auto result = VERIFY_RESULT(PGConnBuilder(MakeConnSettings(dbname)).Connect()); +Result PgMiniTestBase::ConnectToDB(const std::string& dbname, size_t timeout) const { + auto settings = MakeConnSettings(dbname); + settings.connect_timeout = timeout; + auto result = VERIFY_RESULT(PGConnBuilder(settings).Connect()); RETURN_NOT_OK(SetupConnection(&result)); return result; } diff --git a/src/yb/yql/pgwrapper/pg_mini_test_base.h b/src/yb/yql/pgwrapper/pg_mini_test_base.h index 385b3098046c..01b0833d57a6 100644 --- a/src/yb/yql/pgwrapper/pg_mini_test_base.h +++ b/src/yb/yql/pgwrapper/pg_mini_test_base.h @@ -63,7 +63,7 @@ class PgMiniTestBase : public MiniClusterTestWithClient { return ConnectToDB(std::string() /* db_name */); } - Result ConnectToDB(const std::string& dbname) const; + Result ConnectToDB(const std::string& dbname, size_t timeout = 0) const; Status RestartCluster(); diff --git a/src/yb/yql/pgwrapper/pg_wrapper.cc b/src/yb/yql/pgwrapper/pg_wrapper.cc index 75b30a12617f..2ff1b33c4afc 100644 --- a/src/yb/yql/pgwrapper/pg_wrapper.cc +++ b/src/yb/yql/pgwrapper/pg_wrapper.cc @@ -305,16 +305,6 @@ DEFINE_validator(ysql_yb_xcluster_consistency_level, &ValidateXclusterConsistenc DEFINE_NON_RUNTIME_string(ysql_conn_mgr_warmup_db, "yugabyte", "Database for which warmup needs to be done."); -DEFINE_NON_RUNTIME_PG_FLAG(int32, yb_ash_circular_buffer_size, 16 * 1024, - "Size (in KiBs) of ASH circular buffer that stores the samples"); - -DEFINE_RUNTIME_PG_FLAG(int32, yb_ash_sampling_interval_ms, 1000, - "Time (in milliseconds) between two consecutive sampling events"); -DEPRECATE_FLAG(int32, ysql_yb_ash_sampling_interval, "2024_03"); - -DEFINE_RUNTIME_PG_FLAG(int32, yb_ash_sample_size, 500, - "Number of samples captured from each component per sampling event"); - DEFINE_NON_RUNTIME_string(ysql_cron_database_name, "yugabyte", "Database in which pg_cron metadata is kept.");