Skip to content

Commit

Permalink
feat(metadata_json): port the backfill command. refactor to a worker …
Browse files Browse the repository at this point in the history
…struct to be used by the backfill and ingest commands. setup receiver for sending assets that need to be indexed to a redis stream.
  • Loading branch information
kespinola committed Jan 11, 2024
1 parent 0cc0482 commit f4dbe2c
Show file tree
Hide file tree
Showing 15 changed files with 672 additions and 2 deletions.
83 changes: 82 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[workspace]
members = [
"digital_asset_types",
"metadata_json",
"metaplex-rpc-proxy",
"nft_ingester",
"tree_backfiller",
Expand Down
97 changes: 97 additions & 0 deletions metadata_json/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
[package]
name = "das-metadata-json"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]

[[bin]]
name = "das-metadata-json"

[dependencies]

backon = "0.4.1"
log = "0.4.17"
env_logger = "0.10.0"
anyhow = "1.0.75"
derive_more = "0.99.17"
redis = { version = "0.22.3", features = [
"aio",
"tokio-comp",
"streams",
"tokio-native-tls-comp",
] }
futures = { version = "0.3.25" }
futures-util = "0.3.27"
base64 = "0.21.0"
indicatif = "0.17.5"
thiserror = "1.0.31"
serde_json = "1.0.81"
cadence = "0.29.0"
cadence-macros = "0.29.0"
hyper = "0.14.23"
anchor-client = "0.28.0"
das-tree-backfiller = { path = "../tree_backfiller" }
tokio = { version = "1.26.0", features = ["full", "tracing"] }
sqlx = { version = "0.6.2", features = [
"macros",
"runtime-tokio-rustls",
"postgres",
"uuid",
"offline",
"json",
] }
sea-orm = { version = "0.10.6", features = [
"macros",
"runtime-tokio-rustls",
"sqlx-postgres",
"with-chrono",
"mock",
] }
sea-query = { version = "0.28.1", features = ["postgres-array"] }
chrono = "0.4.19"
tokio-postgres = "0.7.7"
serde = "1.0.136"
bs58 = "0.4.0"
reqwest = "0.11.11"
plerkle_messenger = { version = "1.6.0", features = ['redis'] }
plerkle_serialization = { version = "1.6.0" }
flatbuffers = "23.1.21"
lazy_static = "1.4.0"
regex = "1.5.5"
digital_asset_types = { path = "../digital_asset_types", features = [
"json_types",
"sql_types",
] }
mpl-bubblegum = "1.0.1-beta.3"
spl-account-compression = { version = "0.2.0", features = ["no-entrypoint"] }
spl-concurrent-merkle-tree = "0.2.0"
uuid = "1.0.0"
async-trait = "0.1.53"
num-traits = "0.2.15"
blockbuster = "0.9.0-beta.1"
figment = { version = "0.10.6", features = ["env", "toml", "yaml"] }
solana-sdk = "~1.16.16"
solana-client = "~1.16.16"
spl-token = { version = ">= 3.5.0, < 5.0", features = ["no-entrypoint"] }
solana-transaction-status = "~1.16.16"
solana-account-decoder = "~1.16.16"
solana-geyser-plugin-interface = "~1.16.16"
solana-sdk-macro = "~1.16.16"
rand = "0.8.5"
rust-crypto = "0.2.36"
url = "2.3.1"
anchor-lang = "0.28.0"
borsh = "~0.10.3"
stretto = { version = "0.7", features = ["async"] }
tokio-stream = "0.1.12"
tracing-subscriber = { version = "0.3.16", features = [
"json",
"env-filter",
"ansi",
] }
clap = { version = "4.2.2", features = ["derive", "cargo", "env"] }

[lints]
workspace = true
89 changes: 89 additions & 0 deletions metadata_json/src/cmds/backfill.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use {
crate::worker::{Worker, WorkerArgs},
backon::{ExponentialBuilder, Retryable},
clap::Parser,
das_tree_backfiller::db,
das_tree_backfiller::metrics::{Metrics, MetricsArgs},
digital_asset_types::dao::asset_data,
log::info,
reqwest::ClientBuilder,
sea_orm::{entity::*, prelude::*, query::*, EntityTrait, SqlxPostgresConnector},
tokio::{
sync::mpsc,
time::{Duration, Instant},
},
};

#[derive(Parser, Clone, Debug)]
pub struct BackfillArgs {
#[clap(flatten)]
database: db::PoolArgs,

#[command(flatten)]
metrics: MetricsArgs,

#[command(flatten)]
worker: WorkerArgs,

#[arg(long, default_value = "1000")]
timeout: u64,

#[arg(long, default_value = "1000")]
batch_size: u64,
}

pub async fn run(args: BackfillArgs) -> Result<(), anyhow::Error> {
let batch_size = args.batch_size;

let pool = db::connect(args.database).await?;

let metrics = Metrics::try_from_config(args.metrics)?;

let client = ClientBuilder::new()
.timeout(Duration::from_millis(args.timeout))
.build()?;

let worker = Worker::from(args.worker);

let (tx, handle) = worker.start(pool.clone(), metrics.clone(), client.clone());

let conn = SqlxPostgresConnector::from_sqlx_postgres_pool(pool);

let mut condition = Condition::all();
condition = condition.add(asset_data::Column::Reindex.eq(true));
let query = asset_data::Entity::find()
.filter(condition)
.order_by(asset_data::Column::Id, Order::Asc);

let mut after = None;

loop {
let mut query = query.clone().cursor_by(asset_data::Column::Id);
let mut query = query.first(batch_size);

if let Some(after) = after {
query = query.after(after);
}

let assets = query.all(&conn).await?;
let assets_count = assets.len();

for asset in assets.clone() {
tx.send(asset.id).await?;
}

if u64::try_from(assets_count)? < batch_size {
break;
}

after = assets.last().cloned().map(|asset| asset.id);
}

drop(tx);

info!("Waiting for tasks to finish");
handle.await?;

info!("Tasks finished");
Ok(())
}
62 changes: 62 additions & 0 deletions metadata_json/src/cmds/ingest.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use crate::stream::{Receiver, ReceiverArgs};
use crate::worker::{Worker, WorkerArgs};
use clap::Parser;
use das_tree_backfiller::{
db,
metrics::{Metrics, MetricsArgs},
};
use digital_asset_types::dao::asset_data;
use log::info;
use reqwest::{Client, ClientBuilder};
use tokio::{sync::mpsc, time::Duration};

#[derive(Parser, Clone, Debug)]
pub struct IngestArgs {
#[clap(flatten)]
receiver: ReceiverArgs,

#[clap(flatten)]
metrics: MetricsArgs,

#[clap(flatten)]
database: db::PoolArgs,

#[arg(long, default_value = "1000")]
timeout: u64,

#[clap(flatten)]
worker: WorkerArgs,
}

pub async fn run(args: IngestArgs) -> Result<(), anyhow::Error> {
let rx = Receiver::try_from_config(args.receiver.into()).await?;

let pool = db::connect(args.database).await?;

let metrics = Metrics::try_from_config(args.metrics)?;

let client = ClientBuilder::new()
.timeout(Duration::from_millis(args.timeout))
.build()?;

let worker = Worker::from(args.worker);

let (tx, handle) = worker.start(pool.clone(), metrics.clone(), client.clone());

while let Ok(messages) = rx.recv().await {
for message in messages.clone() {
tx.send(message.data).await?;
}

let ids: Vec<String> = messages.into_iter().map(|m| m.id).collect();
rx.ack(&ids).await?;
}

drop(tx);

handle.await?;

info!("Ingesting stopped");

Ok(())
}
2 changes: 2 additions & 0 deletions metadata_json/src/cmds/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod backfill;
pub mod ingest;
3 changes: 3 additions & 0 deletions metadata_json/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mod stream;

pub use stream::*;
Loading

0 comments on commit f4dbe2c

Please sign in to comment.