Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(dashmate): invalid drive status check #2248

Merged
merged 4 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion packages/dashmate/src/commands/status/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,12 @@ export default class StatusCommand extends ConfigBaseCommand {

plain['Platform Enabled'] = platform.enabled || 'n/a';

const platformStatus = platform.tenderdash.serviceStatus !== ServiceStatusEnum.up
? platform.tenderdash.serviceStatus
: platform.drive.serviceStatus;

if (platform.enabled) {
plain['Platform Status'] = colors.status(platform.tenderdash.serviceStatus)(platform.tenderdash.serviceStatus) || 'n/a';
plain['Platform Status'] = colors.status(platformStatus)(platformStatus) || 'n/a';

if (platform.tenderdash.serviceStatus === ServiceStatusEnum.up) {
plain['Platform Version'] = platform.tenderdash.version || 'n/a';
Expand Down
1 change: 1 addition & 0 deletions packages/dashmate/src/config/generateEnvsFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export default function generateEnvsFactory(configFile, homeDir, getConfigProfil

let driveAbciMetricsUrl = '';
if (config.get('platform.drive.abci.metrics.enabled')) {
// IP and port inside container
driveAbciMetricsUrl = 'http://0.0.0.0:29090';
}

Expand Down
4 changes: 4 additions & 0 deletions packages/dashmate/src/status/determineStatus.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ export default {
return coreIsSynced ? ServiceStatusEnum.up : ServiceStatusEnum.wait_for_core;
}

if (dockerStatus === DockerStatusEnum.not_started) {
return ServiceStatusEnum.stopped;
}

return ServiceStatusEnum.error;
},
};
3 changes: 2 additions & 1 deletion packages/dashmate/src/status/scopes/overview.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ export default function getOverviewScopeFactory(
}

if (config.get('platform.enable')) {
const { tenderdash } = await getPlatformScope(config);
const { drive, tenderdash } = await getPlatformScope(config);

platform.drive = drive;
platform.tenderdash = tenderdash;
}

Expand Down
38 changes: 22 additions & 16 deletions packages/dashmate/src/status/scopes/platform.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import prettyMs from 'pretty-ms';
import DockerComposeError from '../../docker/errors/DockerComposeError.js';
import providers from '../providers.js';
import { DockerStatusEnum } from '../enums/dockerStatus.js';
import { ServiceStatusEnum } from '../enums/serviceStatus.js';
Expand Down Expand Up @@ -162,31 +163,36 @@ export default function getPlatformScopeFactory(

try {
info.dockerStatus = await determineStatus.docker(dockerCompose, config, 'drive_abci');
info.serviceStatus = determineStatus.platform(info.dockerStatus, isCoreSynced, mnRRSoftFork);
} catch (e) {
if (e instanceof ContainerIsNotPresentError) {
info.dockerStatus = DockerStatusEnum.not_started;
} else {
throw e;
}
}

info.serviceStatus = determineStatus.platform(info.dockerStatus, isCoreSynced, mnRRSoftFork);

if (info.serviceStatus === ServiceStatusEnum.up) {
const driveEchoResult = await dockerCompose.execCommand(
// Get Drive status to make sure it's responding
if (info.serviceStatus === ServiceStatusEnum.up) {
try {
await dockerCompose.execCommand(
config,
'drive_abci',
'drive-abci status',
);

if (driveEchoResult.exitCode !== 0) {
} catch (e) {
if (e instanceof DockerComposeError
&& e.dockerComposeExecutionResult
&& e.dockerComposeExecutionResult.exitCode !== 0) {
info.serviceStatus = ServiceStatusEnum.error;
} else {
throw e;
}
}

return info;
} catch (e) {
if (e instanceof ContainerIsNotPresentError) {
return {
dockerStatus: DockerStatusEnum.not_started,
serviceStatus: ServiceStatusEnum.stopped,
};
}

return info;
}

return info;
};

/**
Expand Down
8 changes: 5 additions & 3 deletions packages/dashmate/test/unit/status/scopes/platform.spec.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import ContainerIsNotPresentError
from '../../../../src/docker/errors/ContainerIsNotPresentError.js';
import providers from '../../../../src/status/providers.js';
import determineStatus from '../../../../src/status/determineStatus.js';
import getConfigMock from '../../../../src/test/mock/getConfigMock.js';
Expand Down Expand Up @@ -377,7 +379,7 @@ describe('getPlatformScopeFactory', () => {
mockDetermineDockerStatus.withArgs(mockDockerCompose, config, 'drive_tenderdash')
.returns(DockerStatusEnum.running);
mockDetermineDockerStatus.withArgs(mockDockerCompose, config, 'drive_abci')
.throws();
.throws(new ContainerIsNotPresentError('drive_abci'));
mockDockerCompose.execCommand.returns({ exitCode: 0, out: '' });
mockMNOWatchProvider.returns(Promise.resolve('OPEN'));

Expand Down Expand Up @@ -430,8 +432,8 @@ describe('getPlatformScopeFactory', () => {
network: 'test',
},
drive: {
dockerStatus: null,
serviceStatus: null,
dockerStatus: DockerStatusEnum.not_started,
serviceStatus: ServiceStatusEnum.stopped,
},
};

Expand Down
1 change: 0 additions & 1 deletion packages/rs-drive-abci/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ metrics-exporter-prometheus = { version = "0.15", default-features = false, feat
"http-listener",
] }
url = { version = "2.3.1" }
ureq = { "version" = "2.6.2" }
tokio = { version = "1.40", features = [
"macros",
"signal",
Expand Down
82 changes: 41 additions & 41 deletions packages/rs-drive-abci/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
//! RS-Drive-ABCI server starts a single-threaded server and listens to connections from Tenderdash.

use clap::{Parser, Subcommand};
use dapi_grpc::platform::v0::get_status_request;
use dapi_grpc::platform::v0::get_status_request::GetStatusRequestV0;
use dapi_grpc::platform::v0::platform_client::PlatformClient;
use dapi_grpc::tonic::transport::Uri;
use dpp::version::PlatformVersion;
use drive_abci::config::{FromEnv, PlatformConfig};
use drive_abci::core::wait_for_core_to_sync::v0::wait_for_core_to_sync_v0;
use drive_abci::logging::{LogBuilder, LogConfig, LogDestination, Loggers};
use drive_abci::metrics::{Prometheus, DEFAULT_PROMETHEUS_PORT};
use drive_abci::metrics::Prometheus;
use drive_abci::platform_types::platform::Platform;
use drive_abci::rpc::core::DefaultCoreRPC;
use drive_abci::{logging, server};
Expand All @@ -17,6 +21,7 @@ use std::fs::remove_file;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::process::ExitCode;
use std::str::FromStr;
use std::sync::Arc;
use tokio::runtime::{Builder, Runtime};
use tokio::signal::unix::{signal, SignalKind};
Expand Down Expand Up @@ -98,6 +103,13 @@ impl Cli {
) -> Result<(), String> {
match self.command {
Commands::Start => {
tracing::info!(
version = env!("CARGO_PKG_VERSION"),
features = list_enabled_features().join(","),
rust = env!("CARGO_PKG_RUST_VERSION"),
"drive-abci server initializing",
);
Comment on lines +106 to +111
Copy link
Contributor

@coderabbitai coderabbitai bot Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Potential undefined environment variable CARGO_PKG_RUST_VERSION

The use of env!("CARGO_PKG_RUST_VERSION") may result in a compilation error because CARGO_PKG_RUST_VERSION is not a standard Cargo-provided environment variable. This could cause the build to fail if the environment variable isn't defined.

Consider using the rustc_version crate to obtain the Rust compiler version:

  1. Add rustc_version to your Cargo.toml dependencies:

    [dependencies]
    rustc_version = "0.4"
  2. Modify the code to use rustc_version:

        tracing::info!(
            version = env!("CARGO_PKG_VERSION"),
            features = list_enabled_features().join(","),
    -       rust = env!("CARGO_PKG_RUST_VERSION"),
    +       rust = rustc_version::version().unwrap().to_string(),
            "drive-abci server initializing",
        );

Copy link
Member Author

@shumkov shumkov Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a standard env.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shumkov, understood. Thank you for the clarification.

(・ω・)?


✏️ Learnings added
Learnt from: shumkov
PR: dashpay/platform#2248
File: packages/rs-drive-abci/src/main.rs:106-111
Timestamp: 2024-10-17T08:52:54.159Z
Learning: In this project, the environment variable `CARGO_PKG_RUST_VERSION` is defined and can be used safely with the `env!` macro.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


if config.drive.grovedb_verify_on_startup {
verify_grovedb(&config.db_path, false)?;
}
Expand Down Expand Up @@ -129,10 +141,12 @@ impl Cli {

server::start(runtime, Arc::new(platform), config, cancel);

tracing::info!("drive-abci server is stopped");

return Ok(());
}
Commands::Config => dump_config(&config)?,
Commands::Status => check_status(&config)?,
Commands::Status => runtime.block_on(check_status(&config))?,
Commands::Verify => verify_grovedb(&config.db_path, true)?,
};

Expand Down Expand Up @@ -202,13 +216,6 @@ fn main() -> Result<(), ExitCode> {
install_panic_hook(cancel.clone());

// Start runtime in the main thread
tracing::info!(
version = env!("CARGO_PKG_VERSION"),
features = list_enabled_features().join(","),
rust = env!("CARGO_PKG_RUST_VERSION"),
"drive-abci server initializing",
);

let runtime_guard = runtime.enter();

runtime.spawn(handle_signals(cancel.clone(), loggers));
Expand All @@ -219,15 +226,13 @@ fn main() -> Result<(), ExitCode> {
Ok(())
}
Err(e) => {
tracing::error!(error = e, "drive-abci failed");
tracing::error!(error = e, "drive-abci failed: {e}");
Err(ExitCode::FAILURE)
}
};

drop(runtime_guard);
runtime.shutdown_timeout(Duration::from_millis(SHUTDOWN_TIMEOUT_MILIS));
tracing::info!("drive-abci server is stopped");

result
}

Expand Down Expand Up @@ -303,31 +308,27 @@ fn list_enabled_features() -> Vec<&'static str> {
}

/// Check status of ABCI server.
fn check_status(config: &PlatformConfig) -> Result<(), String> {
if let Some(prometheus_addr) = &config.prometheus_bind_address {
let url =
url::Url::parse(prometheus_addr).expect("cannot parse ABCI_PROMETHEUS_BIND_ADDRESS");

let addr = format!(
"{}://{}:{}/metrics",
url.scheme(),
url.host()
.ok_or("ABCI_PROMETHEUS_BIND_ADDRESS must contain valid host".to_string())?,
url.port().unwrap_or(DEFAULT_PROMETHEUS_PORT)
);

let body: String = ureq::get(&addr)
.set("Content-type", "text/plain")
.call()
.map_err(|e| e.to_string())?
.into_string()
.map_err(|e| e.to_string())?;
async fn check_status(config: &PlatformConfig) -> Result<(), String> {
// Convert the gRPC bind address string to a Uri
let uri = Uri::from_str(&format!("http://{}", config.grpc_bind_address))
.map_err(|e| format!("invalid url: {e}"))?;

// Connect to the gRPC server
let mut client = PlatformClient::connect(uri.clone())
.await
.map_err(|e| format!("can't connect to grpc server {uri}: {e}"))?;

// Make a request to the server
let request = dapi_grpc::platform::v0::GetStatusRequest {
version: Some(get_status_request::Version::V0(GetStatusRequestV0 {})),
};

println!("{}", body);
Ok(())
} else {
Err("ABCI_PROMETHEUS_BIND_ADDRESS not defined, cannot check status".to_string())
}
// Should return non-zero error code if Drive is not responding
client
.get_status(request)
.await
.map(|_| ())
.map_err(|e| format!("can't request status: {e}"))
Comment on lines +311 to +331
Copy link
Contributor

@coderabbitai coderabbitai bot Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Enhance error handling in check_status function

The current error handling in the check_status function could be improved to provide more context and robustness. Adding timeouts and handling specific error cases can make the function more reliable.

Consider implementing timeouts and more detailed error messages:

use tokio::time::{timeout, Duration};

async fn check_status(config: &PlatformConfig) -> Result<(), String> {
    // Convert the gRPC bind address string to a Uri
    let uri = Uri::from_str(&format!("http://{}", config.grpc_bind_address))
        .map_err(|e| format!("Invalid gRPC URL '{}': {}", config.grpc_bind_address, e))?;

    // Set a timeout duration
    let timeout_duration = Duration::from_secs(5);

    // Connect to the gRPC server within the timeout period
    let mut client = timeout(
        timeout_duration,
        PlatformClient::connect(uri.clone())
    )
    .await
    .map_err(|_| format!("Timeout connecting to gRPC server at {}", uri))?
    .map_err(|e| format!("Can't connect to gRPC server {}: {}", uri, e))?;

    // Create the request
    let request = dapi_grpc::platform::v0::GetStatusRequest {
        version: Some(get_status_request::Version::V0(GetStatusRequestV0 {})),
    };

    // Send the request within the timeout period
    timeout(timeout_duration, client.get_status(request))
        .await
        .map_err(|_| "Timeout requesting status from gRPC server".to_string())?
        .map(|_| ())
        .map_err(|e| format!("Can't request status: {}", e))
}

This adds a timeout to both the connection and the request, providing better control over potential network issues.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This connection happening in local network. Either port is listening or not. We do not expect timeouts.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shumkov, understood. Since the connection is within the local network and timeouts are not expected, the current implementation should suffice.


✏️ Learnings added
Learnt from: shumkov
PR: dashpay/platform#2248
File: packages/rs-drive-abci/src/main.rs:311-331
Timestamp: 2024-10-17T08:52:46.706Z
Learning: In our codebase, connections to the gRPC server happen within the local network, so timeouts are not expected, and adding timeouts is unnecessary.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.

}

/// Verify GroveDB integrity.
Expand Down Expand Up @@ -415,7 +416,7 @@ fn configure_logging(cli: &Cli, config: &PlatformConfig) -> Result<Loggers, logg
if configs.is_empty() || cli.verbose > 0 {
let cli_config = LogConfig {
destination: LogDestination::StdOut,
level: cli.verbose.try_into().unwrap(),
level: cli.verbose.try_into()?,
color: cli.color,
..Default::default()
};
Expand All @@ -442,15 +443,14 @@ fn install_panic_hook(cancel: CancellationToken) {

#[cfg(test)]
mod test {
use ::drive::{drive::Drive, query::Element};
use dpp::block::epoch::Epoch;
use drive::drive::credit_pools::epochs::epoch_key_constants;
use std::{
fs,
path::{Path, PathBuf},
};

use ::drive::{drive::Drive, query::Element};
use dpp::block::epoch::Epoch;
use drive::drive::credit_pools::epochs::epoch_key_constants;

use dpp::version::PlatformVersion;
use drive::drive::credit_pools::epochs::paths::EpochProposers;
use drive_abci::logging::LogLevel;
Expand Down
Loading