Skip to content

Commit

Permalink
[PLAT-7423] Make all inter-container communication go through docker …
Browse files Browse the repository at this point in the history
…gateway + fix YBA scraping

Summary:
Currently we're using host interface for yugaware<->prometheus communications.
We shouldn't do that.
Hense, we need to make prometheus and yugaware continers be exposed on both docker gateway and host interfaces in Replicated environment.
And we need the rest of the code to use internal/external urls/ips/ports where needed.
Another issue was that prometheus port was hardcoded in some places in backup script - passed configured port everwhere.

As a result - yugaware and prometheus containers will continue to listen on ports 9000/9443 and 9090 on the host interface - for external communications.
But will also listen on ports 9100/9543 and 9190 on docker gateway interface for all inter-container comunications.

Test Plan:
Installed Replicated YBA.
Make sure yugaware and prometheus targets are scraped by Prometheus.
Make sure yugaware queries metrics and alerts from prometheus successfully.
Make sure host IP is used in Prometheus links in YBA UI.

Start YBA locally.
Make sure local prometheus URL is used in links in YBA UI.

Reviewers: vbansal, sanketh, sb-yb

Reviewed By: sb-yb

Subscribers: jenkins-bot, yugaware

Differential Revision: https://phabricator.dev.yugabyte.com/D23312
  • Loading branch information
anmalysh-yb committed Mar 10, 2023
1 parent e1ef49f commit 4467546
Show file tree
Hide file tree
Showing 24 changed files with 163 additions and 94 deletions.
24 changes: 16 additions & 8 deletions managed/devops/bin/yb_platform_backup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,12 @@ run_sudo_cmd() {
# Query prometheus for it's data directory and set as env var
set_prometheus_data_dir() {
prometheus_host="$1"
prometheus_port="$2"
data_dir="$2"
if [[ "$DOCKER_BASED" = true ]]; then
PROMETHEUS_DATA_DIR="${data_dir}/prometheusv2"
else
PROMETHEUS_DATA_DIR=$(curl "http://${prometheus_host}:9090/api/v1/status/flags" |
PROMETHEUS_DATA_DIR=$(curl "http://${prometheus_host}:${prometheus_port}/api/v1/status/flags" |
${PYTHON_EXECUTABLE} -c "import sys, json; print(json.load(sys.stdin)['data']['storage.tsdb.path'])")
fi
if [[ -z "$PROMETHEUS_DATA_DIR" ]]; then
Expand Down Expand Up @@ -295,7 +296,7 @@ create_backup() {
if [[ "$exclude_prometheus" = false ]]; then
trap 'run_sudo_cmd "rm -rf ${data_dir}/${PROMETHEUS_SNAPSHOT_DIR}"' RETURN
echo "Creating prometheus snapshot..."
set_prometheus_data_dir "${prometheus_host}" "${data_dir}"
set_prometheus_data_dir "${prometheus_host}" "${prometheus_port}" "${data_dir}"
snapshot_dir=$(curl -X POST "http://${prometheus_host}:${prometheus_port}/api/v1/admin/tsdb/snapshot" |
${PYTHON_EXECUTABLE} -c "import sys, json; print(json.load(sys.stdin)['data']['name'])")
mkdir -p "$data_dir/$PROMETHEUS_SNAPSHOT_DIR"
Expand Down Expand Up @@ -338,10 +339,11 @@ restore_backup() {
db_username="${5}"
verbose="${6}"
prometheus_host="${7}"
data_dir="${8}"
k8s_namespace="${9}"
k8s_pod="${10}"
disable_version_check="${11}"
prometheus_port="${8}"
data_dir="${9}"
k8s_namespace="${10}"
k8s_pod="${11}"
disable_version_check="${12}"
prometheus_dir_regex="^${PROMETHEUS_SNAPSHOT_DIR}/$"
if [[ "${yba_installer}" = true ]]; then
prometheus_dir_regex="${PROMETHEUS_SNAPSHOT_DIR}"
Expand Down Expand Up @@ -466,7 +468,7 @@ restore_backup() {
# Restore prometheus data.
if tar -tf "${input_path}" | grep $prometheus_dir_regex; then
echo "Restoring prometheus snapshot..."
set_prometheus_data_dir "${prometheus_host}" "${data_dir}"
set_prometheus_data_dir "${prometheus_host}" "${prometheus_port}" "${data_dir}"
modify_service prometheus stop
run_sudo_cmd "rm -rf ${PROMETHEUS_DATA_DIR}/*"
if [[ "${yba_installer}" = true ]]; then
Expand Down Expand Up @@ -538,6 +540,7 @@ print_restore_usage() {
echo " -h, --db_host=HOST postgres host (default: localhost)"
echo " -P, --db_port=PORT postgres port (default: 5432)"
echo " -n, --prometheus_host=HOST prometheus host (default: localhost)"
echo " -t, --prometheus_port=PORT prometheus port (default: 9090)"
echo " -e, --prometheus_user=USERNAME prometheus user (default: prometheus)"
echo " --k8s_namespace kubernetes namespace"
echo " --k8s_pod kubernetes pod"
Expand Down Expand Up @@ -737,6 +740,10 @@ case $command in
prometheus_host=$2
shift 2
;;
-t|--prometheus_port)
prometheus_port=$2
shift 2
;;
-e|--prometheus_user)
prometheus_user=$2
shift 2
Expand Down Expand Up @@ -784,7 +791,8 @@ case $command in
validate_k8s_args "${k8s_namespace}" "${k8s_pod}"

restore_backup "$input_path" "$destination" "$db_host" "$db_port" "$db_username" "$verbose" \
"$prometheus_host" "$data_dir" "$k8s_namespace" "$k8s_pod" "$disable_version_check"
"$prometheus_host" "$prometheus_port" "$data_dir" "$k8s_namespace" "$k8s_pod" \
"$disable_version_check"
exit 0
;;
*)
Expand Down
22 changes: 18 additions & 4 deletions managed/devops/replicated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ components:
ports:
- private_port: "9090"
public_port: '{{repl ConfigOption "prometheus_external_port"}}'
- private_port: "9090"
public_port: "9190"
interface: docker0
entrypoint: [ "/bin/sh", "-c", "rm -rf /prometheus/lock && \
cp /prometheus_configs/default_prometheus.yml /prometheus_configs/prometheus.yml && \
exec /bin/prometheus \
Expand Down Expand Up @@ -275,7 +278,7 @@ components:
scrape_timeout: {{repl ConfigOption "prometheus_scrape_timeout"}}
static_configs:
- targets: [
'{{repl HostPrivateIpAddress "prometheus" "prom/prometheus" }}:{{repl ConfigOption "prometheus_external_port"}}'
'{{repl ConfigOption "docker_gateway" }}:9190'
]
- job_name: 'platform'
Expand All @@ -288,7 +291,7 @@ components:
{{repl end}}
static_configs:
- targets: [
'{{repl ConfigOption "docker_gateway" }}:{{repl if ConfigOptionEquals "https_enabled" "1"}}9443{{repl else}}9000{{repl end}}'
'{{repl ConfigOption "docker_gateway" }}:{{repl if ConfigOptionEquals "https_enabled" "1"}}9543{{repl else}}9100{{repl end}}'
]
- job_name: "node"
Expand Down Expand Up @@ -458,10 +461,20 @@ components:
public_port: '{{repl ConfigOption "ui_https_port"}}'
port_type: tcp
when: '{{repl ConfigOptionEquals "https_enabled" "1"}}'
- private_port: "9443"
public_port: "9543"
port_type: tcp
when: '{{repl ConfigOptionEquals "https_enabled" "1"}}'
interface: docker0
- private_port: "9000"
public_port: '{{repl ConfigOption "ui_http_port"}}'
port_type: tcp
when: '{{repl ConfigOptionEquals "http_enabled" "1"}}'
- private_port: "9000"
public_port: "9100"
port_type: tcp
when: '{{repl ConfigOptionEquals "http_enabled" "1"}}'
interface: docker0
config_files:
- filename: /opt/yugabyte/yugaware/conf/server.pem
contents: |
Expand Down Expand Up @@ -529,9 +542,10 @@ components:
yb {
devops.home = /opt/yugabyte/devops
metrics.host="{{repl HostPrivateIpAddress "prometheus" "prom/prometheus" }}"
metrics.port="{{repl ConfigOption "prometheus_external_port"}}"
metrics.host = "{{repl ConfigOption "docker_gateway" }}"
metrics.port = 9190
metrics.scrape_interval = "{{repl ConfigOption "prometheus_scrape_interval" }}"
metrics.external.url = "http://{{repl HostPrivateIpAddress "prometheus" "prom/prometheus" }}:{{repl ConfigOption "prometheus_external_port"}}"
storage.path = /opt/yugabyte/yugaware/data
docker.network = bridge
seedData = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
import com.yugabyte.yw.common.ApiHelper;
import com.yugabyte.yw.common.ShellProcessHandler;
import com.yugabyte.yw.common.ShellResponse;
import com.yugabyte.yw.common.config.GlobalConfKeys;
import com.yugabyte.yw.common.config.RuntimeConfGetter;
import com.yugabyte.yw.common.config.impl.SettableRuntimeConfigFactory;
import com.yugabyte.yw.common.ha.PlatformReplicationManager.PlatformBackupParams;
import com.yugabyte.yw.common.utils.FileUtils;
import com.yugabyte.yw.metrics.MetricUrlProvider;
import com.yugabyte.yw.models.HighAvailabilityConfig;
import com.yugabyte.yw.models.PlatformInstance;
import java.io.BufferedWriter;
Expand Down Expand Up @@ -68,54 +71,65 @@ public class PlatformReplicationHelper {
static final String DB_HOST_CONFIG_KEY = "db.default.host";
static final String DB_PORT_CONFIG_KEY = "db.default.port";

private final RuntimeConfGetter confGetter;

private final SettableRuntimeConfigFactory runtimeConfigFactory;

private final ApiHelper apiHelper;

private final PlatformInstanceClientFactory remoteClientFactory;

private final MetricUrlProvider metricUrlProvider;

@VisibleForTesting ShellProcessHandler shellProcessHandler;

@Inject
public PlatformReplicationHelper(
RuntimeConfGetter confGetter,
SettableRuntimeConfigFactory runtimeConfigFactory,
ApiHelper apiHelper,
PlatformInstanceClientFactory remoteClientFactory,
ShellProcessHandler shellProcessHandler) {
ShellProcessHandler shellProcessHandler,
MetricUrlProvider metricUrlProvider) {
this.confGetter = confGetter;
this.runtimeConfigFactory = runtimeConfigFactory;
this.apiHelper = apiHelper;
this.remoteClientFactory = remoteClientFactory;
this.shellProcessHandler = shellProcessHandler;
this.metricUrlProvider = metricUrlProvider;
}

Path getBackupDir() {
return Paths.get(
runtimeConfigFactory.globalRuntimeConf().getString(STORAGE_PATH_KEY), BACKUP_DIR)
return Paths.get(confGetter.getStaticConf().getString(STORAGE_PATH_KEY), BACKUP_DIR)
.toAbsolutePath();
}

String getPrometheusHost() {
return runtimeConfigFactory.globalRuntimeConf().getString(PROMETHEUS_HOST_CONFIG_KEY);
return confGetter.getStaticConf().getString(PROMETHEUS_HOST_CONFIG_KEY);
}

int getPrometheusPort() {
return confGetter.getStaticConf().getInt(PROMETHEUS_PORT_CONFIG_KEY);
}

int getNumBackupsRetention() {
return Math.max(0, runtimeConfigFactory.globalRuntimeConf().getInt(NUM_BACKUP_RETENTION_KEY));
return Math.max(0, confGetter.getStaticConf().getInt(NUM_BACKUP_RETENTION_KEY));
}

String getDBUser() {
return runtimeConfigFactory.globalRuntimeConf().getString(DB_USERNAME_CONFIG_KEY);
return confGetter.getStaticConf().getString(DB_USERNAME_CONFIG_KEY);
}

String getDBPassword() {
return runtimeConfigFactory.globalRuntimeConf().getString(DB_PASSWORD_CONFIG_KEY);
return confGetter.getStaticConf().getString(DB_PASSWORD_CONFIG_KEY);
}

String getDBHost() {
return runtimeConfigFactory.globalRuntimeConf().getString(DB_HOST_CONFIG_KEY);
return confGetter.getStaticConf().getString(DB_HOST_CONFIG_KEY);
}

int getDBPort() {
return runtimeConfigFactory.globalRuntimeConf().getInt(DB_PORT_CONFIG_KEY);
return confGetter.getStaticConf().getInt(DB_PORT_CONFIG_KEY);
}

boolean isBackupScheduleEnabled() {
Expand All @@ -134,7 +148,7 @@ boolean isBackupScheduleRunning(Cancellable schedule) {

private File getPrometheusConfigDir() {
String outputDirString =
runtimeConfigFactory.globalRuntimeConf().getString(PROMETHEUS_FEDERATED_CONFIG_DIR_KEY);
confGetter.getStaticConf().getString(PROMETHEUS_FEDERATED_CONFIG_DIR_KEY);

return new File(outputDirString);
}
Expand All @@ -146,7 +160,7 @@ private File getPrometheusConfigFile() {
}

boolean isBackupScriptOutputEnabled() {
return runtimeConfigFactory.globalRuntimeConf().getBoolean(LOG_SHELL_CMD_OUTPUT_KEY);
return confGetter.getGlobalConf(GlobalConfKeys.logScriptOutput);
}

Duration getBackupFrequency() {
Expand All @@ -164,7 +178,7 @@ JsonNode getBackupInfoJson(long frequency, boolean isRunning) {
}

Path getReplicationDirFor(String leader) {
String storagePath = runtimeConfigFactory.globalRuntimeConf().getString(STORAGE_PATH_KEY);
String storagePath = confGetter.getStaticConf().getString(STORAGE_PATH_KEY);
return Paths.get(storagePath, REPLICATION_DIR, leader);
}

Expand Down Expand Up @@ -192,14 +206,11 @@ private void writeFederatedPrometheusConfig(String remoteAddr, File file) {

private void reloadPrometheusConfig() {
try {
String localPromHost =
runtimeConfigFactory.globalRuntimeConf().getString(PROMETHEUS_HOST_CONFIG_KEY);
int localPromPort =
runtimeConfigFactory.globalRuntimeConf().getInt(PROMETHEUS_PORT_CONFIG_KEY);
URL reloadEndpoint = new URL("http", localPromHost, localPromPort, "/-/reload");
String baseUrl = metricUrlProvider.getMetricsInternalUrl();
String reloadUrl = baseUrl + "/-/reload";

// Send the reload request.
this.apiHelper.postRequest(reloadEndpoint.toString(), Json.newObject());
this.apiHelper.postRequest(reloadUrl, Json.newObject());
} catch (Exception e) {
LOG.error("Error reloading prometheus config", e);
}
Expand Down Expand Up @@ -273,8 +284,7 @@ void switchPrometheusToFederated(URL remoteAddr) {
// Write the filled in template to disk.
// TBD: Need to fetch the Prometheus port from the remote PlatformInstance and use that here.
// For now we assume that the remote instance also uses the same port as the local one.
int remotePort = runtimeConfigFactory.globalRuntimeConf().getInt(PROMETHEUS_PORT_CONFIG_KEY);
String federatedAddr = remoteAddr.getHost() + ":" + remotePort;
String federatedAddr = metricUrlProvider.getMetricsExternalUrl();
this.writeFederatedPrometheusConfig(federatedAddr, configFile);

// Reload the config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,9 @@ abstract class PlatformBackupParams {

// The addr that the prometheus server is running on.
private final String prometheusHost;

// The port that the prometheus server is running on.
private final int prometheusPort;
// The username that YW uses to connect to it's DB.
private final String dbUsername;
// The password that YW uses to authenticate connections to it's DB.
Expand All @@ -380,6 +383,7 @@ abstract class PlatformBackupParams {

protected PlatformBackupParams() {
this.prometheusHost = replicationHelper.getPrometheusHost();
this.prometheusPort = replicationHelper.getPrometheusPort();
this.dbUsername = replicationHelper.getDBUser();
this.dbPassword = replicationHelper.getDBPassword();
this.dbHost = replicationHelper.getDBHost();
Expand All @@ -400,6 +404,8 @@ List<String> getCommandArgs() {
commandArgs.add(Integer.toString(dbPort));
commandArgs.add("--prometheus_host");
commandArgs.add(prometheusHost);
commandArgs.add("--prometheus_port");
commandArgs.add(String.valueOf(prometheusPort));
commandArgs.add("--verbose");
commandArgs.add("--skip_restart");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ private JsonNode getMetrics() {
private JsonNode getMetrics(Map<String, String> queryParam) {
String queryUrl;
if (queryParam.containsKey("end")) {
queryUrl = metricUrlProvider.getMetricsUrl() + "/query_range";
queryUrl = metricUrlProvider.getMetricsApiUrl() + "/query_range";
} else {
queryUrl = metricUrlProvider.getMetricsUrl() + "/query";
queryUrl = metricUrlProvider.getMetricsApiUrl() + "/query";
}

log.trace("Executing metric query {}: {}", queryUrl, queryParam);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ public class MetricQueryHelper {
public static final String ALERTS_PATH = "alerts";

public static final String MANAGEMENT_COMMAND_RELOAD = "reload";
private static final String PROMETHEUS_METRICS_URL_PATH = "yb.metrics.url";
private static final String PROMETHEUS_MANAGEMENT_URL_PATH = "yb.metrics.management.url";
public static final String PROMETHEUS_MANAGEMENT_ENABLED = "yb.metrics.management.enabled";

private static final String CONTAINER_METRIC_PREFIX = "container";
Expand Down Expand Up @@ -387,12 +385,6 @@ public JsonNode query(
}
}

String metricsUrl = appConfig.getString(PROMETHEUS_METRICS_URL_PATH);
if ((null == metricsUrl || metricsUrl.isEmpty())) {
LOG.error("Error fetching metrics data: no prometheus metrics URL configured");
return Json.newObject();
}

ExecutorService threadPool =
platformExecutorFactory.createFixedExecutor(
getClass().getSimpleName(),
Expand Down Expand Up @@ -485,7 +477,7 @@ public List<AlertData> queryAlerts() {
}

public void postManagementCommand(String command) {
final String queryUrl = getPrometheusManagementUrl(command);
final String queryUrl = metricUrlProvider.getMetricsManagementUrl() + "/" + command;
if (!apiHelper.postRequest(queryUrl)) {
throw new RuntimeException(
"Failed to perform " + command + " on prometheus instance " + queryUrl);
Expand All @@ -496,20 +488,8 @@ public boolean isPrometheusManagementEnabled() {
return appConfig.getBoolean(PROMETHEUS_MANAGEMENT_ENABLED);
}

private String getPrometheusManagementUrl(String path) {
final String prometheusManagementUrl = appConfig.getString(PROMETHEUS_MANAGEMENT_URL_PATH);
if (StringUtils.isEmpty(prometheusManagementUrl)) {
throw new RuntimeException(PROMETHEUS_MANAGEMENT_URL_PATH + " not set");
}
return prometheusManagementUrl + "/" + path;
}

private String getPrometheusQueryUrl(String path) {
final String metricsUrl = appConfig.getString(PROMETHEUS_METRICS_URL_PATH);
if (StringUtils.isEmpty(metricsUrl)) {
throw new RuntimeException(PROMETHEUS_METRICS_URL_PATH + " not set");
}
return metricsUrl + "/" + path;
return metricUrlProvider.getMetricsApiUrl() + "/" + path;
}

// Return a regex string for filtering the metrics based on
Expand Down
Loading

0 comments on commit 4467546

Please sign in to comment.