From 1c85c18519fe4166da6803e6f4e33a5526adf7e7 Mon Sep 17 00:00:00 2001 From: Evgenii Khramkov Date: Tue, 18 Jun 2024 08:59:06 +0900 Subject: [PATCH] Update from readme and acknowledgements (#286) * update acknowledgements * updte readme --- spiceaidocs/docs/acknowledgements/index.md | 43 ++++++++++++++++------ spiceaidocs/docs/index.md | 28 ++++++++++---- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/spiceaidocs/docs/acknowledgements/index.md b/spiceaidocs/docs/acknowledgements/index.md index f3bbbe0e..7447d144 100644 --- a/spiceaidocs/docs/acknowledgements/index.md +++ b/spiceaidocs/docs/acknowledgements/index.md @@ -93,21 +93,27 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - anyhow 1.0.86, Apache-2.0 OR MIT
https://github.com/dtolnay/anyhow -- arrow 51.0.0, Apache-2.0 +- arrow 52.0.0, Apache-2.0
https://github.com/apache/arrow-rs -- arrow-flight 51.0.0, Apache-2.0 +- arrow-flight 52.0.0, Apache-2.0
https://github.com/apache/arrow-rs -- arrow-ipc 51.0.0, Apache-2.0 +- arrow-ipc 52.0.0, Apache-2.0
https://github.com/apache/arrow-rs -- arrow-json 51.0.0, Apache-2.0 +- arrow-json 52.0.0, Apache-2.0
https://github.com/apache/arrow-rs -- arrow-odbc 9.0.0, MIT +- arrow-odbc 11.1.0, MIT
https://github.com/pacman82/arrow-odbc +- async-graphql 7.0.5, Apache-2.0 OR MIT +
https://github.com/async-graphql/async-graphql + +- async-graphql-axum 7.0.5, Apache-2.0 OR MIT +
https://github.com/async-graphql/async-graphql + - async-openai 0.21.0, MIT
https://github.com/64bit/async-openai @@ -183,6 +189,9 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - chrono-tz 0.8.6, Apache-2.0 OR MIT
https://github.com/chronotope/chrono-tz +- chrono-tz 0.9.0, Apache-2.0 OR MIT +
https://github.com/chronotope/chrono-tz + - clap 4.5.4, Apache-2.0 OR MIT
https://github.com/clap-rs/clap @@ -195,7 +204,7 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - dashmap 5.5.3, MIT
https://github.com/xacrimon/dashmap -- datafusion 38.0.0, Apache-2.0 +- datafusion 39.0.0, Apache-2.0
https://github.com/apache/datafusion - datafusion-federation 0.1.3, Apache-2.0 @@ -204,13 +213,16 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - datafusion-federation-sql 0.1.3, Apache-2.0
-- deltalake 0.17.3, Apache-2.0 +- deltalake 0.18.0, Apache-2.0
https://github.com/delta-io/delta.rs +- derive_builder 0.20.0, Apache-2.0 OR MIT +
https://github.com/colin-kiegel/rust-derive-builder + - dirs 5.0.1, Apache-2.0 OR MIT
https://github.com/soc/dirs-rs -- duckdb 0.10.2, MIT +- duckdb 1.0.0, MIT
https://github.com/wangfenjin/duckdb-rs - fundu 2.0.0, MIT @@ -237,6 +249,9 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - itertools 0.12.1, Apache-2.0 OR MIT
https://github.com/rust-itertools/itertools +- itertools 0.13.0, Apache-2.0 OR MIT +
https://github.com/rust-itertools/itertools + - keyring 2.3.2, Apache-2.0 OR MIT
https://github.com/hwchen/keyring-rs.git @@ -276,6 +291,9 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - object_store 0.9.1, Apache-2.0 OR MIT
https://github.com/apache/arrow-rs/tree/master/object_store +- object_store 0.10.1, Apache-2.0 OR MIT +
https://github.com/apache/arrow-rs/tree/master/object_store + - odbc-api 7.0.0, MIT
https://github.com/pacman82/odbc-api @@ -285,7 +303,7 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - opentelemetry-proto 0.4.0, Apache-2.0
https://github.com/open-telemetry/opentelemetry-rust/tree/main/opentelemetry-proto -- parquet 51.0.0, Apache-2.0 +- parquet 52.0.0, Apache-2.0
https://github.com/apache/arrow-rs - pem 1.1.1, MIT @@ -318,7 +336,10 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - r2d2 0.8.10, Apache-2.0 OR MIT
https://github.com/sfackler/r2d2 -- regex 1.10.4, Apache-2.0 OR MIT +- rand 0.8.5, Apache-2.0 OR MIT +
https://github.com/rust-random/rand + +- regex 1.10.5, Apache-2.0 OR MIT
https://github.com/rust-lang/regex - reqwest 0.11.27, Apache-2.0 OR MIT @@ -366,7 +387,7 @@ gopkg.in/yaml.v3, https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE, MIT - snafu 0.8.2, Apache-2.0 OR MIT
https://github.com/shepmaster/snafu -- snowflake-api 0.8.0, Apache-2.0 +- snowflake-api 0.9.0, Apache-2.0
https://github.com/mycelial/snowflake-rs - spark-connect-rs 0.0.1-beta.4, Apache-2.0 diff --git a/spiceaidocs/docs/index.md b/spiceaidocs/docs/index.md index 2e359853..107cc77f 100644 --- a/spiceaidocs/docs/index.md +++ b/spiceaidocs/docs/index.md @@ -10,27 +10,41 @@ import ThemeBasedImage from '@site/src/components/ThemeBasedImage'; ## What is Spice? -**Spice** is a portable runtime providing developers with a unified SQL interface to materialize, accelerate, and query data sourced from any database, data warehouse, or data lake. +**Spice** is a portable runtime offering developers a unified SQL interface to materialize, accelerate, and query data from any database, data warehouse, or data lake. 📣 Read the [Spice.ai OSS announcement blog post](https://blog.spiceai.org/posts/2024/03/28/adding-spice-the-next-generation-of-spice.ai-oss/). -Spice connects, fuses, and delivers data to applications and AI, acting as an application-specific, tier-optimized Database CDN. +Spice connects, fuses, and delivers data to applications, machine-learning models, and AI-backends, functioning as an application-specific, tier-optimized Database CDN. -The Spice runtime is written in Rust and is built-with industry leading technologies like [Apache DataFusion](https://datafusion.apache.org), Apache Arrow, Apache Arrow Flight, SQlite, and DuckDB. +The Spice runtime, written in Rust, is built-with industry leading technologies such as [Apache DataFusion](https://datafusion.apache.org), Apache Arrow, Apache Arrow Flight, SQLite, and DuckDB. ## Why Spice? -Spice makes querying data by SQL across one or more data sources simple and fast. Easily co-locate a managed working set of data with your application or ML, accelerated with in-memory Arrow, with SQLite/DuckDB, or with attached PostgreSQL for high-performance, low-latency queries. Accelerated engines run tier-native in your infrastructure giving you flexibility and control over cost and performance. +Spice makes it easy and fast to query data from one or more sources using SQL. You can co-locate a managed dataset with your application or machine learning model, and accelerate it with Arrow in-memory, SQLite/DuckDB, or with attached PostgreSQL for fast, high-concurrency, low-latency queries. Accelerated engines give you flexibility and control over query cost and performance. ### How is Spice different? -1. Tier-optimized Acceleration with **both OLAP (Arrow/DuckDB) and OLTP (SQLite/PostgreSQL)** databases at dataset granularity compared to other OLAP only or OLTP only systems. +1. **Application-focused:** Spice is designed to integrate at the application level; 1:1 or 1:N application to Spice mapping, whereas most other data systems are designed for multiple applications to share a single database or data warehouse. It's not uncommon to have many Spice instances, even down to one for each tenant or customer. -2. **Separation of materialization and storage/compute** compared with monolith data systems and data lakes. Keep compute colocated with source data while bringing a materialized working set next to your application, dashboard, or data/ML pipeline. +2. **Dual-Engine Acceleration:** Spice supports both **OLAP** (Arrow/DuckDB) and **OLTP** (SQLite/PostgreSQL) databases at the dataset level, unlike other systems that only support one type. -3. **Edge to cloud native**. Designed to be deployed standalone, as a container sidecar, as a microservice, in a cluster across laptops, the Edge, On-Prem, to a POP, and to all public clouds. Spice instances can also be chained, and deployed distributed across tiers of infrastructure. +3. **Separation of Materialization and Storage/Compute:** Spice separates storage and compute, allowing you to keep data close to its source and bring a materialized working set next to your application, dashboard, or data/ML pipeline. + +4. **Edge to Cloud Native**. Spice is designed to be deployed anywhere, from a standalone instance to a Kubernetes container sidecar, microservice, or cluster at the Edge/POP, On-Prem, or in public clouds. You can also chain Spice instances and deploy them across multiple infrastructure tiers. + +### How does Spice compare? + +| | Spice | Trino/Presto | Dremio | Clickhouse | +| -------------------------- | ---------------------------------- | -------------------------------- | -------------------------------- | ----------------------- | +| Primary Use-Case | Data & AI Applications | Big Data Analytics | Interative Analytics | Real-Time Analytics | +| Typical Deployment | Colocated with application | Cloud Cluster | Cloud Cluster | On-Prem/Cloud Cluster | +| Application-to-Data System | One-to-One/Many | Many-to-One | Many-to-One | Many-to-One | +| Query Federation | Native with query push-down | Supported with push-down | Supported with limited push-down | Limited | +| Materialization | Arrow/SQLite/DuckDB/PostgreSQL | Intermediate Storage | Reflections (Iceberg) | Views & MergeTree | +| Query Result Caching | Supported | Supported | Supported | Supported | +| Typical Configuration | Single-Binary/Sidecar/Microservice | Coodinator+Executor w/ Zookeeper | Coodinator+Executor w/ Zookeeper | Clickhouse Keeper+Nodes | ### Before Spice