Skip to content

Commit

Permalink
[PLAT-15091][PLAT-14092] Improve HA logging and backup metric
Browse files Browse the repository at this point in the history
Summary:
Adding more logging to HA promotion, demotion, and switching prometheus config.

Also changed the backup size metric to correctly maintain precision when converting to mb.

Test Plan:
HA setup and promotion, check logs.

Verify backup metric not zero on new YBA

Reviewers: dshubin, sanketh

Reviewed By: dshubin

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D37674
  • Loading branch information
mchiddy committed Sep 6, 2024
1 parent b02f6f7 commit 62d30d5
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ boolean exportPlatformInstances(HighAvailabilityConfig config, String remoteInst

void switchPrometheusToFederated(URL remoteAddr) {
try {
LOG.info("Switching local prometheus to federated");
File configFile = prometheusConfigHelper.getPrometheusConfigFile();
File configDir = configFile.getParentFile();
File previousConfigFile = new File(configDir, "previous_prometheus.yml");
Expand All @@ -315,6 +316,7 @@ void switchPrometheusToFederated(URL remoteAddr) {

// Move the old file if it hasn't already been moved.
if (configFile.exists() && !previousConfigFile.exists()) {
LOG.info("Creating previous_prometheus.yml from existing prometheus.yml");
FileUtils.moveFile(configFile.toPath(), previousConfigFile.toPath());
}

Expand All @@ -327,6 +329,7 @@ void switchPrometheusToFederated(URL remoteAddr) {
String federatedPoint = remoteAddr.getHost() + ":" + federatedURL.getPort();
boolean https = federatedURL.getScheme().equalsIgnoreCase("https");
this.writeFederatedPrometheusConfig(federatedPoint, configFile, https);
LOG.info("Wrote federated prometheus config.");

// Reload the config.
prometheusConfigHelper.reloadPrometheusConfig();
Expand All @@ -337,6 +340,7 @@ void switchPrometheusToFederated(URL remoteAddr) {

void switchPrometheusToStandalone() {
try {
LOG.info("Switching prometheus to standalone.");
File configFile = prometheusConfigHelper.getPrometheusConfigFile();
File configDir = configFile.getParentFile();
File previousConfigFile = new File(configDir, "previous_prometheus.yml");
Expand All @@ -348,6 +352,7 @@ void switchPrometheusToStandalone() {
FileUtils.moveFile(previousConfigFile.toPath(), configFile.toPath());
prometheusConfigHelper.reloadPrometheusConfig();
prometheusConfigManager.updateK8sScrapeConfigs();
LOG.info("Moved previous_prometheus.yml to prometheus.yml");
} catch (Exception e) {
LOG.error("Error switching prometheus config to standalone", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ public JsonNode getBackupInfo() {

public void demoteLocalInstance(PlatformInstance localInstance, String leaderAddr)
throws MalformedURLException {
log.info("Demoting local instance.");
if (!localInstance.getIsLocal()) {
throw new RuntimeException("Cannot perform this action on a remote instance");
}
Expand All @@ -196,6 +197,7 @@ public void demoteLocalInstance(PlatformInstance localInstance, String leaderAdd
}

public void promoteLocalInstance(PlatformInstance newLeader) {
log.info("Promoting local instance to active.");
HighAvailabilityConfig config = newLeader.getConfig();
Optional<PlatformInstance> previousLocal = config.getLocal();

Expand All @@ -212,6 +214,7 @@ public void promoteLocalInstance(PlatformInstance newLeader) {
i.updateIsLocal(i.getUuid().equals(newLeader.getUuid()));
try {
// Clear out any old backups.
log.info("Cleaning up received backups.");
replicationHelper.cleanupReceivedBackups(new URL(i.getAddress()), 0);
} catch (MalformedURLException ignored) {
}
Expand All @@ -225,6 +228,7 @@ public void promoteLocalInstance(PlatformInstance newLeader) {
.getRemoteInstances()
.forEach(
instance -> {
log.info("Demoting remote instance {}", instance.getAddress());
replicationHelper.demoteRemoteInstance(instance, true);
});
// Promote the new local leader.
Expand Down Expand Up @@ -296,7 +300,7 @@ public boolean sendBackup(PlatformInstance remoteInstance) {
.getMostRecentBackup()
.map(
backup -> {
HA_LAST_BACKUP_SIZE.set(backup.length() / (1024 * 1024));
HA_LAST_BACKUP_SIZE.set(backup.length() / (1024.0 * 1024.0));
return replicationHelper.exportBackups(
config, clusterKey, remoteInstance.getAddress(), backup);
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ public Result syncBackups(Http.Request request) throws Exception {

public Result demoteLocalLeader(long timestamp, boolean promote, Http.Request request) {
try {
LOG.info("Received request to demote local instance.");
Optional<HighAvailabilityConfig> config =
HighAvailabilityConfig.getByClusterKey(this.getClusterKey(request));
if (!config.isPresent()) {
Expand Down

0 comments on commit 62d30d5

Please sign in to comment.