From 62d30d5b826a75825ddbe4539fae02fd0366356d Mon Sep 17 00:00:00 2001 From: Muthu Chidambaram Date: Wed, 4 Sep 2024 17:53:13 +0000 Subject: [PATCH] [PLAT-15091][PLAT-14092] Improve HA logging and backup metric Summary: Adding more logging to HA promotion, demotion, and switching prometheus config. Also changed the backup size metric to correctly maintain precision when converting to mb. Test Plan: HA setup and promotion, check logs. Verify backup metric not zero on new YBA Reviewers: dshubin, sanketh Reviewed By: dshubin Subscribers: yugaware Differential Revision: https://phorge.dev.yugabyte.com/D37674 --- .../yugabyte/yw/common/ha/PlatformReplicationHelper.java | 5 +++++ .../yugabyte/yw/common/ha/PlatformReplicationManager.java | 6 +++++- .../com/yugabyte/yw/controllers/InternalHAController.java | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationHelper.java b/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationHelper.java index a07f869af856..88a1cbd3a75c 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationHelper.java +++ b/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationHelper.java @@ -304,6 +304,7 @@ boolean exportPlatformInstances(HighAvailabilityConfig config, String remoteInst void switchPrometheusToFederated(URL remoteAddr) { try { + LOG.info("Switching local prometheus to federated"); File configFile = prometheusConfigHelper.getPrometheusConfigFile(); File configDir = configFile.getParentFile(); File previousConfigFile = new File(configDir, "previous_prometheus.yml"); @@ -315,6 +316,7 @@ void switchPrometheusToFederated(URL remoteAddr) { // Move the old file if it hasn't already been moved. if (configFile.exists() && !previousConfigFile.exists()) { + LOG.info("Creating previous_prometheus.yml from existing prometheus.yml"); FileUtils.moveFile(configFile.toPath(), previousConfigFile.toPath()); } @@ -327,6 +329,7 @@ void switchPrometheusToFederated(URL remoteAddr) { String federatedPoint = remoteAddr.getHost() + ":" + federatedURL.getPort(); boolean https = federatedURL.getScheme().equalsIgnoreCase("https"); this.writeFederatedPrometheusConfig(federatedPoint, configFile, https); + LOG.info("Wrote federated prometheus config."); // Reload the config. prometheusConfigHelper.reloadPrometheusConfig(); @@ -337,6 +340,7 @@ void switchPrometheusToFederated(URL remoteAddr) { void switchPrometheusToStandalone() { try { + LOG.info("Switching prometheus to standalone."); File configFile = prometheusConfigHelper.getPrometheusConfigFile(); File configDir = configFile.getParentFile(); File previousConfigFile = new File(configDir, "previous_prometheus.yml"); @@ -348,6 +352,7 @@ void switchPrometheusToStandalone() { FileUtils.moveFile(previousConfigFile.toPath(), configFile.toPath()); prometheusConfigHelper.reloadPrometheusConfig(); prometheusConfigManager.updateK8sScrapeConfigs(); + LOG.info("Moved previous_prometheus.yml to prometheus.yml"); } catch (Exception e) { LOG.error("Error switching prometheus config to standalone", e); } diff --git a/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationManager.java b/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationManager.java index 57e076b2a637..fb00fd4ddcec 100644 --- a/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationManager.java +++ b/managed/src/main/java/com/yugabyte/yw/common/ha/PlatformReplicationManager.java @@ -181,6 +181,7 @@ public JsonNode getBackupInfo() { public void demoteLocalInstance(PlatformInstance localInstance, String leaderAddr) throws MalformedURLException { + log.info("Demoting local instance."); if (!localInstance.getIsLocal()) { throw new RuntimeException("Cannot perform this action on a remote instance"); } @@ -196,6 +197,7 @@ public void demoteLocalInstance(PlatformInstance localInstance, String leaderAdd } public void promoteLocalInstance(PlatformInstance newLeader) { + log.info("Promoting local instance to active."); HighAvailabilityConfig config = newLeader.getConfig(); Optional previousLocal = config.getLocal(); @@ -212,6 +214,7 @@ public void promoteLocalInstance(PlatformInstance newLeader) { i.updateIsLocal(i.getUuid().equals(newLeader.getUuid())); try { // Clear out any old backups. + log.info("Cleaning up received backups."); replicationHelper.cleanupReceivedBackups(new URL(i.getAddress()), 0); } catch (MalformedURLException ignored) { } @@ -225,6 +228,7 @@ public void promoteLocalInstance(PlatformInstance newLeader) { .getRemoteInstances() .forEach( instance -> { + log.info("Demoting remote instance {}", instance.getAddress()); replicationHelper.demoteRemoteInstance(instance, true); }); // Promote the new local leader. @@ -296,7 +300,7 @@ public boolean sendBackup(PlatformInstance remoteInstance) { .getMostRecentBackup() .map( backup -> { - HA_LAST_BACKUP_SIZE.set(backup.length() / (1024 * 1024)); + HA_LAST_BACKUP_SIZE.set(backup.length() / (1024.0 * 1024.0)); return replicationHelper.exportBackups( config, clusterKey, remoteInstance.getAddress(), backup); }) diff --git a/managed/src/main/java/com/yugabyte/yw/controllers/InternalHAController.java b/managed/src/main/java/com/yugabyte/yw/controllers/InternalHAController.java index 1b8ec52fa32c..0e1866e5ca15 100644 --- a/managed/src/main/java/com/yugabyte/yw/controllers/InternalHAController.java +++ b/managed/src/main/java/com/yugabyte/yw/controllers/InternalHAController.java @@ -195,6 +195,7 @@ public Result syncBackups(Http.Request request) throws Exception { public Result demoteLocalLeader(long timestamp, boolean promote, Http.Request request) { try { + LOG.info("Received request to demote local instance."); Optional config = HighAvailabilityConfig.getByClusterKey(this.getClusterKey(request)); if (!config.isPresent()) {