Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/add stocks dataset example #319

Merged
merged 15 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions crates/proof-of-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,47 +89,55 @@ required-features = ["test"]

[[example]]
name = "posql_db"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "space"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "dog_breeds"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "wood_types"
required-features = ["arrow"]

[[example]]
name = "movies"
required-features = [ "arrow" ]

[[example]]
name = "dinosaurs"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "books"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "programming_books"
required-features = ["arrow"]

[[example]]
name = "brands"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "plastics"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "avocado-prices"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "sushi"
required-features = [ "arrow" ]
required-features = ["arrow"]

[[example]]
name = "stocks"
required-features = ["arrow"]

[[example]]
name = "countries"
Expand All @@ -142,14 +150,14 @@ required-features = [ "arrow" ]
[[bench]]
name = "posql_benches"
harness = false
required-features = [ "blitzar" ]
required-features = ["blitzar"]

[[bench]]
name = "bench_append_rows"
harness = false
required-features = [ "test" ]
required-features = ["test"]

[[bench]]
name = "jaeger_benches"
harness = false
required-features = [ "blitzar" ]
required-features = ["blitzar"]
120 changes: 120 additions & 0 deletions crates/proof-of-sql/examples/movies/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//! This is a non-interactive example of using Proof of SQL with a movies dataset.
//! To run this, use `cargo run --release --example movies`.
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example movies --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.

use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
},
proof_primitive::dory::{
DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters,
VerifierSetup,
},
sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof},
};
use rand::{rngs::StdRng, SeedableRng};
use std::{fs::File, time::Instant};

// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS.
// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`.
const DORY_SETUP_MAX_NU: usize = 8;
// This should be a "nothing-up-my-sleeve" phrase or number.
const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d";

/// # Panics
/// Will panic if the query does not parse or the proof fails to verify.
fn prove_and_verify_query(
sql: &str,
accessor: &OwnedTableTestAccessor<DynamicDoryEvaluationProof>,
prover_setup: &ProverSetup,
verifier_setup: &VerifierSetup,
) {
// Parse the query:
println!("Parsing the query: {sql}...");
let now = Instant::now();
let query_plan = QueryExpr::<DynamicDoryCommitment>::try_new(
sql.parse().unwrap(),
"movies".parse().unwrap(),
accessor,
)
.unwrap();
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Generate the proof and result:
print!("Generating proof...");
let now = Instant::now();
let (proof, provable_result) = QueryProof::<DynamicDoryEvaluationProof>::new(
query_plan.proof_expr(),
accessor,
&prover_setup,
);
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Verify the result with the proof:
print!("Verifying proof...");
let now = Instant::now();
let result = proof
.verify(
query_plan.proof_expr(),
accessor,
&provable_result,
&verifier_setup,
)
.unwrap();
let result = apply_postprocessing_steps(result.table, query_plan.postprocessing());
println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Display the result
println!("Query Result:");
println!("{result:?}");
}

fn main() {
let mut rng = StdRng::from_seed(DORY_SEED);
let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng);
let prover_setup = ProverSetup::from(&public_parameters);
let verifier_setup = VerifierSetup::from(&public_parameters);

let filename = "./crates/proof-of-sql/examples/movies/movies.csv";
let inferred_schema =
SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap());
let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema);

let movies_batch = ReaderBuilder::new(posql_compatible_schema)
.with_header(true)
.build(File::open(filename).unwrap())
.unwrap()
.next()
.unwrap()
.unwrap();

// Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"movies.movies".parse().unwrap(),
OwnedTable::try_from(movies_batch).unwrap(),
0,
);

// Query 1: Count the total number of movies
prove_and_verify_query(
"SELECT COUNT(*) AS total_movies FROM movies",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 2: Find the top 5 highest-rated movies
prove_and_verify_query(
"SELECT title, rating FROM movies ORDER BY rating DESC LIMIT 5",
&accessor,
&prover_setup,
&verifier_setup,
);
}
31 changes: 31 additions & 0 deletions crates/proof-of-sql/examples/movies/movies.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
id, title, year, genre, rating, duration
1, The Matrix, 1999, Action, 8.7, 136
2, The Matrix Reloaded, 2003, Action, 7.2, 138
3, The Matrix Revolutions, 2003, Action, 6.7, 129
4, Inception, 2010, Sci-Fi, 8.8, 148
5, The Dark Knight, 2008, Action, 9.0, 152
6, Pulp Fiction, 1994, Crime, 8.9, 154
7, Forrest Gump, 1994, Drama, 8.8, 142
8, The Shawshank Redemption, 1994, Drama, 9.3, 142
9, The Godfather, 1972, Crime, 9.2, 175
10, Fight Club, 1999, Drama, 8.8, 139
11, The Lord of the Rings: The Fellowship of the Ring, 2001, Adventure, 8.8, 178
12, Goodfellas, 1990, Crime, 8.7, 146
13, The Silence of the Lambs, 1991, Thriller, 8.6, 118
14, Schindler's List, 1993, Biography, 8.9, 195
15, Interstellar, 2014, Sci-Fi, 8.6, 169
16, The Green Mile, 1999, Crime, 8.6, 189
17, Saving Private Ryan, 1998, War, 8.6, 169
18, Gladiator, 2000, Action, 8.5, 155
19, The Avengers, 2012, Action, 8.0, 143
20, Jurassic Park, 1993, Adventure, 8.1, 127
21, The Lion King, 1994, Animation, 8.5, 88
22, Titanic, 1997, Romance, 7.8, 194
23, The Departed, 2006, Crime, 8.5, 151
24, The Prestige, 2006, Mystery, 8.5, 130
25, The Social Network, 2010, Biography, 7.7, 120
26, Avatar, 2009, Action, 7.8, 162
27, The Sixth Sense, 1999, Thriller, 8.1, 107
28, The Usual Suspects, 1995, Crime, 8.5, 106
29, Memento, 2000, Mystery, 8.4, 113
30, Eternal Sunshine of the Spotless Mind, 2004, Romance, 8.3, 108
144 changes: 144 additions & 0 deletions crates/proof-of-sql/examples/stocks/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
//! This is a non-interactive example of using Proof of SQL with a stocks dataset.
//! To run this, use cargo run --release --example stocks.
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run cargo run --release --example stocks --no-default-features --features="arrow cpu-perf" instead. It will be slower for proof generation.

use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
},
proof_primitive::dory::{
DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters,
VerifierSetup,
},
sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof},
};
use rand::{rngs::StdRng, SeedableRng};
use std::{fs::File, time::Instant};

// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS.
// The max_nu should be set such that the maximum table size is less than 2^(2*max_nu-1).
const DORY_SETUP_MAX_NU: usize = 8;
// This should be a "nothing-up-my-sleeve" phrase or number.
const DORY_SEED: [u8; 32] = *b"f9d2e8c1b7a654309cfe81d2b7a3c940";

/// # Panics
/// Will panic if the query does not parse or the proof fails to verify.
fn prove_and_verify_query(
sql: &str,
accessor: &OwnedTableTestAccessor<DynamicDoryEvaluationProof>,
prover_setup: &ProverSetup,
verifier_setup: &VerifierSetup,
) {
// Parse the query:
println!("Parsing the query: {sql}...");
let now = Instant::now();
let query_plan = QueryExpr::<DynamicDoryCommitment>::try_new(
sql.parse().unwrap(),
"stocks".parse().unwrap(),
accessor,
)
.unwrap();
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Generate the proof and result:
print!("Generating proof...");
let now = Instant::now();
let (proof, provable_result) = QueryProof::<DynamicDoryEvaluationProof>::new(
query_plan.proof_expr(),
accessor,
&prover_setup,
);
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Verify the result with the proof:
print!("Verifying proof...");
let now = Instant::now();
let result = proof
.verify(
query_plan.proof_expr(),
accessor,
&provable_result,
&verifier_setup,
)
.unwrap();
let result = apply_postprocessing_steps(result.table, query_plan.postprocessing());
println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Display the result
println!("Query Result:");
println!("{result:?}");
}

fn main() {
let mut rng = StdRng::from_seed(DORY_SEED);
let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng);
let prover_setup = ProverSetup::from(&public_parameters);
let verifier_setup = VerifierSetup::from(&public_parameters);

let filename = "./crates/proof-of-sql/examples/stocks/stocks.csv";
let schema = get_posql_compatible_schema(&SchemaRef::new(
infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(),
));
let stocks_batch = ReaderBuilder::new(schema)
.with_header(true)
.build(File::open(filename).unwrap())
.unwrap()
.next()
.unwrap()
.unwrap();

// Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"stocks.stocks".parse().unwrap(),
OwnedTable::try_from(stocks_batch).unwrap(),
0,
);

// Query 1: Calculate total market cap and count of stocks
prove_and_verify_query(
"SELECT SUM(MarketCap) as total_market_cap, COUNT(*) as c FROM stocks",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 2: Find technology stocks with PE ratio under 30 and dividend yield > 0
prove_and_verify_query(
"SELECT Symbol, Company, PE_Ratio, DividendYield
FROM stocks
WHERE Sector = 'Technology' AND PE_Ratio < 30 AND DividendYield > 0
ORDER BY PE_Ratio DESC",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 3: Average market cap by sector (using SUM/COUNT instead of AVG)
prove_and_verify_query(
"SELECT Sector, SUM(MarketCap)/COUNT(*) as avg_market_cap, COUNT(*) as c
FROM stocks
GROUP BY Sector
ORDER BY avg_market_cap DESC",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 4: High value stocks with significant volume and dividend yield
prove_and_verify_query(
"SELECT Symbol, Company, Price, Volume, DividendYield
FROM stocks
WHERE Volume > 20000000 AND DividendYield > 0 AND Price > 100
ORDER BY Volume DESC",
&accessor,
&prover_setup,
&verifier_setup,
);
}
Loading
Loading