Skip to content

Commit

Permalink
Add Prophet algorithm in augurs-prophet crate (#118)
Browse files Browse the repository at this point in the history
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
  • Loading branch information
sd2k and coderabbitai[bot] authored Oct 10, 2024
1 parent 253f6d9 commit d9f79a7
Show file tree
Hide file tree
Showing 27 changed files with 5,438 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/target
/Cargo.lock
.bacon-locations
.vscode
8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,20 @@ augurs-ets = { version = "0.3.1", path = "crates/augurs-ets" }
augurs-forecaster = { path = "crates/augurs-forecaster" }
augurs-mstl = { version = "0.3.1", path = "crates/augurs-mstl" }
augurs-outlier = { version = "0.3.1", path = "crates/augurs-outlier" }
augurs-prophet = { version = "0.3.1", path = "crates/augurs-prophet" }
augurs-seasons = { version = "0.3.1", path = "crates/augurs-seasons" }
augurs-testing = { path = "crates/augurs-testing" }

anyhow = "1.0.89"
bytemuck = "1.18.0"
chrono = "0.4.38"
distrs = "0.2.1"
itertools = "0.13.0"
num-traits = "0.2.19"
rand = "0.8.5"
roots = "0.0.8"
serde = { version = "1.0.166", features = ["derive"] }
statrs = "0.17.1"
thiserror = "1.0.40"
tinyvec = "1.6.0"
tracing = "0.1.37"
Expand All @@ -46,6 +53,7 @@ assert_approx_eq = "1.1.0"
criterion = "0.5.1"
iai = "0.1.1"
pprof = { version = "0.13.0", features = ["criterion", "frame-pointer", "prost-codec"] }
pretty_assertions = "1.4.1"

# See https://nnethercote.github.io/perf-book/build-configuration.html
# for more information on why we're using these settings.
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,19 @@ APIs are subject to change, and functionality may not be fully implemented.
| [`augurs-ets`][] | Automatic exponential smoothing models | alpha - non-seasonal models working and tested against statsforecast |
| [`augurs-mstl`][] | Multiple Seasonal Trend Decomposition using LOESS (MSTL) | beta - working and tested against R |
| [`augurs-outlier`][] | Outlier detection for time series | alpha |
| [`augurs-prophet`][] | The Prophet time series forecasting algorithm | alpha |
| [`augurs-seasons`][] | Seasonality detection using periodograms | alpha - working and tested against Python in limited scenarios |
| [`augurs-testing`][] | Testing data and, eventually, evaluation harness for implementations | alpha - just data right now |
| [`augurs-js`][] | WASM bindings to augurs | alpha |
| [`pyaugurs`][] | Python bindings to augurs | alpha |

## Developing

This project uses [`just`] as a command runner; this will need to be installed separately.
See the [`justfile`](./justfile) for more information.

Some of the tasks require [`bacon`], which will also need to be installed separately.

## Releasing

Releases are made using `release-plz`: a PR should be automatically created for each release, and merging will perform the release and publish automatically.
Expand Down Expand Up @@ -73,6 +81,9 @@ Licensed under the Apache License, Version 2.0 `<http://www.apache.org/licenses/
[`augurs-mstl`]: https://crates.io/crates/augurs-mstl
[`augurs-js`]: https://crates.io/crates/augurs-js
[`augurs-outlier`]: https://crates.io/crates/augurs-outlier
[`augurs-prophet`]: https://crates.io/crates/augurs-prophet
[`augurs-seasons`]: https://crates.io/crates/augurs-seasons
[`augurs-testing`]: https://crates.io/crates/augurs-testing
[`pyaugurs`]: https://crates.io/crates/pyaugurs
[`just`]: https://just.systems/man/en/
[`bacon`]: https://dystroy.org/bacon
113 changes: 113 additions & 0 deletions bacon.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# This is a configuration file for the bacon tool
#
# Bacon repository: https://github.com/Canop/bacon
# Complete help on configuration: https://dystroy.org/bacon/config/
# You can also check bacon's own bacon.toml file
# as an example: https://github.com/Canop/bacon/blob/main/bacon.toml

default_job = "clippy"
summary = true

[jobs.check]
command = ["cargo", "check", "--color", "always"]
need_stdout = false

[jobs.check-all]
command = ["cargo", "check", "--all-targets", "--color", "always"]
need_stdout = false

# Run clippy on the default target
[jobs.clippy]
command = [
"cargo", "clippy",
"--color", "always",
]
need_stdout = false

# Run clippy on all targets
# To disable some lints, you may change the job this way:
# [jobs.clippy-all]
# command = [
# "cargo", "clippy",
# "--all-targets",
# "--color", "always",
# "--",
# "-A", "clippy::bool_to_int_with_if",
# "-A", "clippy::collapsible_if",
# "-A", "clippy::derive_partial_eq_without_eq",
# ]
# need_stdout = false
[jobs.clippy-all]
command = [
"cargo", "clippy",
"--all-targets",
"--color", "always",
]
need_stdout = false

# This job lets you run
# - all tests: bacon test
# - a specific test: bacon test -- config::test_default_files
# - the tests of a package: bacon test -- -- -p config
[jobs.test]
command = [
"cargo", "nextest", "--color", "always",
"run", "--all-features", "--workspace",
]
need_stdout = true
analyzer = "nextest"

[jobs.doc-test]
command = [
"cargo", "test", "--doc", "--color", "always",
"--all-features", "--workspace",
"--exclude", "augurs-js",
"--exclude", "pyaugurs",
]
need_stdout = true

[jobs.doc]
command = ["cargo", "doc", "--color", "always", "--no-deps"]
need_stdout = false

# If the doc compiles, then it opens in your browser and bacon switches
# to the previous job
[jobs.doc-open]
command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"]
need_stdout = false
on_success = "back" # so that we don't open the browser at each change

# You can run your application and have the result displayed in bacon,
# *if* it makes sense for this crate.
# Don't forget the `--color always` part or the errors won't be
# properly parsed.
# If your program never stops (eg a server), you may set `background`
# to false to have the cargo run output immediately displayed instead
# of waiting for program's end.
[jobs.run]
command = [
"cargo", "run",
"--color", "always",
# put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = true

# This parameterized job runs the example of your choice, as soon
# as the code compiles.
# Call it as
# bacon ex -- my-example
[jobs.ex]
command = ["cargo", "run", "--color", "always", "--example"]
need_stdout = true
allow_warnings = true

# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
# alt-m = "job:my-job"
c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target
d = "job:doc-test"
2 changes: 1 addition & 1 deletion crates/augurs-ets/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ distrs.workspace = true
itertools.workspace = true
lstsq = "0.6.0"
nalgebra = "0.33.0"
rand = "0.8.5"
rand.workspace = true
rand_distr = "0.4.3"
roots.workspace = true
serde = { workspace = true, optional = true, features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion crates/augurs-outlier/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ bench = false
itertools.workspace = true
rayon = { version = "1.10.0", optional = true }
roots.workspace = true
rand = "0.8.5"
rand.workspace = true
rustc-hash = "2.0.0"
rv = { version = "0.17.0", default-features = false }
serde = { workspace = true, features = ["derive"], optional = true }
Expand Down
26 changes: 26 additions & 0 deletions crates/augurs-prophet/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[package]
name = "augurs-prophet"
license.workspace = true
authors.workspace = true
documentation.workspace = true
repository.workspace = true
version.workspace = true
edition.workspace = true
keywords.workspace = true

[dependencies]
anyhow.workspace = true
bytemuck = { workspace = true, features = ["derive"], optional = true }
itertools.workspace = true
num-traits.workspace = true
rand.workspace = true
statrs.workspace = true
thiserror.workspace = true

[dev-dependencies]
augurs-testing.workspace = true
chrono.workspace = true
pretty_assertions.workspace = true

[features]
bytemuck = ["dep:bytemuck"]
1 change: 1 addition & 0 deletions crates/augurs-prophet/LICENSE-APACHE
1 change: 1 addition & 0 deletions crates/augurs-prophet/LICENSE-MIT
72 changes: 72 additions & 0 deletions crates/augurs-prophet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Prophet: forecasting at scale

`augurs-prophet` contains an implementation of the [Prophet]
time series forecasting library.

This crate aims to be low-dependency to enable it to run in as
many places as possible. With that said, we need to talk about
optimizers…

## Optimizers

The original Prophet library uses [Stan] to handle optimization and MCMC sampling.
Stan is a platform for statistical modeling which can perform Bayesian statistical
inference as well as maximum likelihood estimation using optimizers such as L-BFGS.
However, it is written in C++ and has non-trivial dependencies, which makes it
difficult to interface with from Rust (or, indeed, Python).

`augurs-prophet` (similar to the Python library) abstracts optimization
and sampling implementations using the `Optimizer` and `Sampler` traits.
These are yet to be implemented, but I have a few ideas:

### `cmdstan`

This is the approach now taken by the Python implementation, which uses
the `cmdstanpy` package and compiles the Stan program into a standalone
binary on installation. It then executes that binary during the fitting
stage to perform optimization or sampling, passing the data and
parameters between Stan and Python using files on the filesystem.

This works fine if you're operating in a desktop or server environment,
but poses issues when running in more esoteric environments such as
WebAssembly.

### `libstan`

We could choose to write a `libstan` crate which uses [`cxx`][cxx] to
interface directly with the C++ library generated by Stan. Since the
model code is constant (unless we upgrade the version of `stanc` used to
generate it), we could also write a small amount of C++ to make it
possible for us to pass data directly to it from Rust.

In theory this should work OK for any target which Stan can compile to.
The problem I've noticed is that Stan isn't particularly careful about
which headers it imports, so even just compiling the `model.hpp` library,
you end up with a bunch of I/O and filesystem related headers imported,
which aren't available when using standard WASM.

Perhaps we could clean Stan up so it didn't import those things? We should
be able to target most environments in that case.

### WASM Components

For WASM, we could abstract the C++ side of things behind a
[WASM component] which exposes an `optimize` interface,
and create a second Prophet component which imports that
interface to implement the `Optimizer` trait of this crate.

### A reimplementation of Stan

We could re-implement Stan in a new Rust crate and use that
here. This is likely to be by far the largest amount of work!

## Credits

This implementation is based heavily on the original [Prophet] Python
package. Some changes have been made to make the APIs more idiomatic
Rust or to take advantage of the type system.

[Prophet]: https://facebook.github.io/prophet/
[Stan]: https://mc-stan.org/
[cxx]: https://cxx.rs/
[WASM component]: https://component-model.bytecodealliance.org/
Loading

0 comments on commit d9f79a7

Please sign in to comment.