Skip to content

Commit

Permalink
[#6506] Platform: Prometheus: Changes for Federation
Browse files Browse the repository at this point in the history
Summary:
Add ability to build a federated prometheus model for the HA cluster such that follower platform prometheus's read from the leader prometheus instead of directly scraping nodes.

When a node is promoted we then switch back to the normal prom config.

Since we're overwriting a file that is technically managed by replicated, we sort of have to "wrestle with replicated" and always keep ensuring that the correct file that the platform expects to be there is there. So during app startup + during each sync schedule we will ensure the correct file is present. This is to solve the not-to-be-expected too often scenario of replicated re-deploying the containers during stuff like YW upgrades or maintenance.

Test Plan:
Tested some of the bug fixes in here in the platform HA sandbox by manually patching them.

Tested switching prometheus config locally by writing a simple test API that called the code to generate the federated config, swap out the old one + reload the config through API.

Reviewers: sanketh, sb-yb

Reviewed By: sb-yb

Subscribers: jenkins-bot

Differential Revision: https://phabricator.dev.yugabyte.com/D10576
  • Loading branch information
daniel-yb committed Feb 18, 2021
1 parent ad28b20 commit b1a2b78
Show file tree
Hide file tree
Showing 25 changed files with 599 additions and 267 deletions.
4 changes: 3 additions & 1 deletion managed/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ libraryDependencies ++= Seq(
"org.hamcrest" % "hamcrest-core" % "2.2" % Test,
"pl.pragmatists" % "JUnitParams" % "1.1.1" % Test,
"com.icegreen" % "greenmail" % "1.6.1" % Test,
"com.icegreen" % "greenmail-junit4" % "1.6.1" % Test
"com.icegreen" % "greenmail-junit4" % "1.6.1" % Test,
"org.apache.velocity" % "velocity" % "1.7",
"org.apache.velocity" % "velocity-tools" % "2.0"
)
// Clear default resolvers.
appResolvers := None
Expand Down
6 changes: 3 additions & 3 deletions managed/devops/bin/yb_platform_backup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ create_postgres_backup() {
db_port="$4"
verbose="$5"
if [[ "${verbose}" = true ]]; then
backup_cmd="pg_dump -h ${db_host} -p ${db_port} -U ${db_username} -Fc -v ${PLATFORM_DB_NAME}"
backup_cmd="pg_dump -h ${db_host} -p ${db_port} -U ${db_username} -Fc -v --clean ${PLATFORM_DB_NAME}"
else
backup_cmd="pg_dump -h ${db_host} -p ${db_port} -U ${db_username} -Fc ${PLATFORM_DB_NAME}"
backup_cmd="pg_dump -h ${db_host} -p ${db_port} -U ${db_username} -Fc --clean ${PLATFORM_DB_NAME}"
fi
# Run pg_dump.
echo "Creating Yugabyte Platform DB backup ${backup_path}..."
Expand Down Expand Up @@ -162,7 +162,7 @@ create_backup() {
exclude_releases_flag="--exclude release*"
fi

exclude_dirs="--exclude postgresql --exclude devops --exclude yugaware/lib \
exclude_dirs="--exclude postgres* --exclude devops --exclude yugaware/lib \
--exclude yugaware/logs --exclude yugaware/README.md --exclude yugaware/bin \
--exclude yugaware/conf --exclude backup_*.tgz --exclude helm"

Expand Down
9 changes: 7 additions & 2 deletions managed/devops/replicated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,13 @@ components:
permission: "0777"
- host_path: '{{repl ConfigOption "storage_path"}}/yugaware/swamper_targets'
container_path: /opt/yugabyte/prometheus/targets
- host_path: '{{repl ConfigOptions "storage_path"}}/prometheus_configs'
container_path: /prometheus_configs
permission: "0777"
ports:
- private_port: "9090"
public_port: "9090"
cmd: '["--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", "--storage.tsdb.retention={{repl ConfigOption \"prometheus_retention\"}}", "--web.enable-admin-api"]'
cmd: '["--config.file=/prometheus_configs/prometheus.yml", "--storage.tsdb.path=/prometheus", "--storage.tsdb.retention={{repl ConfigOption \"prometheus_retention\"}}", "--web.enable-admin-api", "--web.enable-lifecycle"]'
publish_events:
- name: Prometheus started
trigger: container-start
Expand All @@ -95,7 +98,7 @@ components:
container: postgres
action: start
config_files:
- filename: /etc/prometheus/prometheus.yml
- filename: /prometheus_configs/prometheus.yml
contents: |
global:
scrape_interval: {{repl ConfigOption "prometheus_scrape_interval"}}
Expand Down Expand Up @@ -163,6 +166,8 @@ components:
container_path: /opt/yugabyte/releases
- host_path: '{{repl ConfigOption "storage_path"}}/prometheusv2'
container_path: /prometheus
- host_path: '{{repl ConfigOptions "storage_path"}}/prometheus_configs'
container_path: /prometheus_configs
env_vars:
- name: JAVA_OPTS
static_val: '-XX:PermSize=1024m -XX:MaxPermSize=1024m'
Expand Down
12 changes: 4 additions & 8 deletions managed/src/main/java/AppInit.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,14 @@

import com.yugabyte.yw.commissioner.TaskGarbageCollector;
import com.yugabyte.yw.common.*;
import com.yugabyte.yw.common.ha.PlatformReplicationManager;
import com.yugabyte.yw.models.*;
import io.ebean.Ebean;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.yugabyte.yw.cloud.AWSInitializer;

import com.yugabyte.yw.models.Customer;
import com.yugabyte.yw.models.ExtraMigration;
import com.yugabyte.yw.models.InstanceType;
import com.yugabyte.yw.models.MetricConfig;
import com.yugabyte.yw.models.Provider;

import play.Application;
import play.Configuration;
import play.Environment;
Expand Down Expand Up @@ -130,8 +126,8 @@ public AppInit(Environment environment, Application application,
// Schedule garbage collection of old completed tasks in database.
taskGC.start();

// Start periodic platform backups
replicationManager.start();
// Startup platform HA.
replicationManager.init();

// Add checksums for all certificates that don't have a checksum.
CertificateHelper.createChecksums();
Expand Down
3 changes: 3 additions & 0 deletions managed/src/main/java/Module.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
import com.yugabyte.yw.common.*;
import com.yugabyte.yw.common.config.RuntimeConfigFactory;
import com.yugabyte.yw.common.config.impl.SettableRuntimeConfigFactory;
import com.yugabyte.yw.common.ha.PlatformReplicationManager;
import com.yugabyte.yw.common.kms.EncryptionAtRestManager;
import com.yugabyte.yw.common.kms.util.EncryptionAtRestUniverseKeyCache;
import com.yugabyte.yw.common.services.LocalYBClientService;
import com.yugabyte.yw.common.services.YBClientService;
import com.yugabyte.yw.common.ha.PlatformReplicationHelper;
import com.yugabyte.yw.controllers.PlatformHttpActionAdapter;
import com.yugabyte.yw.metrics.MetricQueryHelper;
import com.yugabyte.yw.queries.QueryHelper;
Expand Down Expand Up @@ -84,6 +86,7 @@ public void configure() {
bind(QueryAlerts.class).asEagerSingleton();
bind(PlatformReplicationManager.class).asEagerSingleton();
bind(PlatformInstanceClientFactory.class).asEagerSingleton();
bind(PlatformReplicationHelper.class).asEagerSingleton();

final CallbackController callbackController = new CallbackController();
callbackController.setDefaultUrl(config.getString("yb.url", ""));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import akka.stream.javadsl.Source;
import akka.util.ByteString;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableMap;
import com.yugabyte.yw.controllers.HAAuthenticator;
import com.yugabyte.yw.controllers.ReverseInternalHAController;
Expand Down Expand Up @@ -109,8 +110,9 @@ public void syncInstances(long timestamp, JsonNode payload) {
* {@link com.yugabyte.yw.controllers.InternalHAController#demoteLocalLeader(long timestamp)}
* on remote platform instance
*/
public void demoteInstance(long timestamp) {
this.makeRequest(this.controller.demoteLocalLeader(timestamp), Json.newObject());
public void demoteInstance(String localAddr, long timestamp) {
ObjectNode formData = Json.newObject().put("leader_address", localAddr);
this.makeRequest(this.controller.demoteLocalLeader(timestamp), formData);
}

public JsonNode syncBackups(
Expand Down
14 changes: 7 additions & 7 deletions managed/src/main/java/com/yugabyte/yw/common/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
import com.fasterxml.jackson.databind.ObjectMapper;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Functions;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.net.HostAndPort;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.Cluster;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.ClusterType;
Expand All @@ -26,7 +22,6 @@
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.text.SimpleDateFormat;
import java.security.MessageDigest;
Expand All @@ -38,6 +33,7 @@
import org.slf4j.LoggerFactory;

import static com.yugabyte.yw.common.PlacementInfoUtil.getNumMasters;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;

public class Util {
public static final Logger LOG = LoggerFactory.getLogger(Util.class);
Expand Down Expand Up @@ -307,16 +303,20 @@ public static String getFileChecksum(String file) throws IOException, NoSuchAlgo

byte[] bytes = digest.digest();
StringBuilder sb = new StringBuilder();
for(int i = 0; i < bytes.length; i++) {
for (int i = 0; i < bytes.length; i++) {
sb.append(Integer.toString((bytes[i] & 0xff) + 0x100, 16).substring(1));
}
return sb.toString();
}

static List<File> listFiles(Path backupDir, String pattern) throws IOException {
public static List<File> listFiles(Path backupDir, String pattern) throws IOException {
return StreamSupport.stream(
Files.newDirectoryStream(backupDir, pattern).spliterator(), false)
.map(Path::toFile)
.collect(Collectors.toList());
}

public static void moveFile(Path source, Path destination) throws IOException {
Files.move(source, destination, REPLACE_EXISTING);
}
}
Loading

0 comments on commit b1a2b78

Please sign in to comment.