Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more benchmarks for rust-enzyme #1832

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions enzyme/benchmarks/ReverseMode/adbench/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ struct LSTMOutput {
};

extern "C" {
void rust_dlstm_objective(
int l,
int c,
int b,
double const* main_params,
double* dmain_params,
double const* extra_params,
double* dextra_params,
double* state,
double const* sequence,
double* loss,
double* dloss
);

void dlstm_objective(
int l,
int c,
Expand Down Expand Up @@ -291,6 +305,41 @@ int main(const int argc, const char* argv[]) {
}

}

{

struct LSTMInput input = {};

// Read instance
read_lstm_instance("data/" + path, &input.l, &input.c, &input.b, input.main_params, input.extra_params, input.state,
input.sequence);

std::vector<double> state = std::vector<double>(input.state.size());

int Jcols = 8 * input.l * input.b + 3 * input.b;
struct LSTMOutput result = { 0, std::vector<double>(Jcols) };

{
struct timeval start, end;
gettimeofday(&start, NULL);
calculate_jacobian<rust_dlstm_objective>(input, result);
gettimeofday(&end, NULL);
printf("Enzyme (Rust) combined %0.6f\n", tdiff(&start, &end));
json enzyme;
enzyme["name"] = "Enzyme (Rust) combined";
enzyme["runtime"] = tdiff(&start, &end);
for (unsigned i = result.gradient.size() - 5;
i < result.gradient.size(); i++) {
printf("%f ", result.gradient[i]);
enzyme["result"].push_back(result.gradient[i]);
}
test_suite["tools"].push_back(enzyme);

printf("\n");
}

}

test_suite["llvm-version"] = __clang_version__;
test_suite["mode"] = "ReverseMode";
test_suite["batch-size"] = 1;
Expand Down
7 changes: 7 additions & 0 deletions enzyme/benchmarks/ReverseMode/fft/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions enzyme/benchmarks/ReverseMode/fft/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "fft"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

[lib]
crate-type = ["lib"]

[profile.release]
lto = "fat"
opt-level = 3

[profile.dev]
lto = "fat"
20 changes: 7 additions & 13 deletions enzyme/benchmarks/ReverseMode/fft/Makefile.make
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B fft-unopt.ll fft-raw.ll fft-opt.ll results.txt VERBOSE=1 -f %s
# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%newLoadClangEnzyme" make -B fft.o results.txt VERBOSE=1 -f %s

.PHONY: clean

dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)

clean:
rm -f *.ll *.o results.txt

%-unopt.ll: %.cpp
clang++ $(BENCH) $^ -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
#clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm

%-raw.ll: %-unopt.ll
opt $^ $(LOAD) -enzyme -o $@ -S

%-opt.ll: %-raw.ll
opt $^ -o $@ -S
#opt $^ -O2 -o $@ -S
$(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a: src/lib.rs Cargo.toml
cargo +enzyme rustc --release --lib --crate-type=staticlib

fft.o: fft-opt.ll
clang++ -O2 $^ -o $@ $(BENCHLINK) -lm
fft.o: fft.cpp $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
clang++ $(LOAD) $(BENCH) fft.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o fft.o -lpthread $(BENCHLINK) -lm -lfft -L $(dir)/benchmarks/ReverseMode/fft/target/release/ -L /usr/lib/gcc/x86_64-linux-gnu/11

results.txt: fft.o
./$^ 1048576 | tee $@
56 changes: 56 additions & 0 deletions enzyme/benchmarks/ReverseMode/fft/fft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@ extern "C" {
int enzyme_dupnoneed;
}

extern "C" void rust_dfoobar(int n, double* data, double* ddata);
extern "C" void rust_foobar(int n, double* data);

static double rust_foobar_and_gradient(unsigned len) {
double *inp = new double[2*len];
for(int i=0; i<2*len; i++) inp[i] = 2.0;
double *dinp = new double[2*len];
for(int i=0; i<2*len; i++) dinp[i] = 1.0;
rust_dfoobar(len*2, inp, dinp);
double res = dinp[0];
delete[] dinp;
delete[] inp;
return res;
}

static double foobar_and_gradient(unsigned len) {
double *inp = new double[2*len];
for(int i=0; i<2*len; i++) inp[i] = 2.0;
Expand Down Expand Up @@ -202,6 +217,46 @@ static void enzyme_sincos(double inp, unsigned len) {
}
}

static void enzyme_rust_sincos(double inp, unsigned len) {

{
struct timeval start, end;
gettimeofday(&start, NULL);

double *x = new double[2*len];
for(int i=0; i<2*len; i++) x[i] = 2.0;
rust_foobar(len, x);
double res = x[0];

gettimeofday(&end, NULL);
printf("Enzyme (Rust) real %0.6f res=%f\n", tdiff(&start, &end), res);
delete[] x;
}

{
struct timeval start, end;
gettimeofday(&start, NULL);

double *x = new double[2*len];
for(int i=0; i<2*len; i++) x[i] = 2.0;
rust_foobar(len, x);
double res = x[0];

gettimeofday(&end, NULL);
printf("Enzyme (Rust) forward %0.6f res=%f\n", tdiff(&start, &end), res);
delete[] x;
}

{
struct timeval start, end;
gettimeofday(&start, NULL);

double res2 = rust_foobar_and_gradient(len);

gettimeofday(&end, NULL);
printf("Enzyme (Rust) combined %0.6f res'=%f\n", tdiff(&start, &end), res2);
}
}

/* Function to check if x is power of 2*/
bool isPowerOfTwo (int x)
Expand Down Expand Up @@ -233,5 +288,6 @@ int main(int argc, char** argv) {
adept_sincos(inp, iters);
tapenade_sincos(inp, iters);
enzyme_sincos(inp, iters);
enzyme_rust_sincos(inp, iters);
}
}
106 changes: 106 additions & 0 deletions enzyme/benchmarks/ReverseMode/fft/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#![feature(autodiff)]

use std::slice;
use std::f64::consts::PI;

fn bitreversal_perm<T>(data: &mut [T]) {
let len = data.len() / 2;
let mut j = 1;

let mut i = 1;
while i < 2*len {
if j > i {
//dbg!(&i, &j);
data.swap(j-1, i-1);
data.swap(j, i);
}

let mut m = len;
while m >= 2 && j > m {
j -= m;
m >>= 1;
}

j += m;
i += 2;
}
}

fn radix2(data: &mut [f64], i_sign: f64, n: usize) {
if n == 1 {
return;
}

let (a,b) = data.split_at_mut(n);
radix2(a, i_sign, n/2);
radix2(b, i_sign, n/2);

let wtemp = i_sign * (PI / n as f64).sin();
let wpi = -i_sign * (2.0 * PI / n as f64).sin();
let wpr = -2.0 * wtemp * wtemp;
let mut wr = 1.0;
let mut wi = 0.0;

let mut i = 0;
while i < n {
let in_n = i + n;

let tempr = data[in_n] * wr - data[in_n + 1] * wi;
let tempi = data[in_n] * wi + data[in_n + 1] * wr;

data[in_n] = data[i] - tempr;
data[in_n + 1] = data[i + 1] - tempi;
data[i] += tempr;
data[i + 1] += tempi;

let wtemp_new = wr;
wr += wr * wpr - wi * wpi;
wi += wi * wpr + wtemp_new * wpi;

i += 2;
}
}

fn rescale(data: &mut [f64], scale: f64) {
let scale = 1. / scale;
for elm in data {
*elm *= scale;
}
}

fn fft(data: &mut [f64]) {
bitreversal_perm(data);
radix2(data, 1.0, data.len() / 2);
}

fn ifft(data: &mut [f64]) {
bitreversal_perm(data);
radix2(data, -1.0, data.len() / 2);
rescale(data, data.len() as f64 / 2.);
}

#[autodiff(dfoobar, Reverse, Duplicated)]
pub fn foobar(data: &mut [f64]) {
fft(data);
ifft(data);
}

#[no_mangle]
pub extern "C" fn rust_dfoobar(n: usize, data: *mut f64, ddata: *mut f64) {

let (data, ddata) = unsafe {
(
slice::from_raw_parts_mut(data, n),
slice::from_raw_parts_mut(ddata, n)
)
};

dfoobar(data, ddata);
}

#[no_mangle]
pub extern "C" fn rust_foobar(n: usize, data: *mut f64) {
let data = unsafe { slice::from_raw_parts_mut(data, n) };

foobar(data);
}
14 changes: 14 additions & 0 deletions enzyme/benchmarks/ReverseMode/fft/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use fft::dfoobar;

fn main() {
let mut data = vec![1.0; 32];
for i in 0..16 {
data[i] = 2.0;
}
let mut data_d = vec![1.0; data.len()];

dfoobar(&mut data, &mut data_d);

dbg!(&data_d);
dbg!(&data);
}
7 changes: 7 additions & 0 deletions enzyme/benchmarks/ReverseMode/lstm/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions enzyme/benchmarks/ReverseMode/lstm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "lstm"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

[lib]
crate-type = ["lib"]

[profile.release]
lto = "fat"
opt-level = 3

[profile.dev]
lto = "fat"
19 changes: 12 additions & 7 deletions enzyme/benchmarks/ReverseMode/lstm/Makefile.make
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" make -B lstm-raw.ll results.json -f %s
# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%newLoadEnzyme %enzyme" make -B lstm-raw.ll results.json -f %s

.PHONY: clean

dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)

clean:
rm -f *.ll *.o results.txt
rm -f *.ll *.o results.json

$(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a: src/lib.rs Cargo.toml
cargo +enzyme rustc --release --lib --crate-type=staticlib

%-unopt.ll: %.cpp
clang++ $(BENCH) $^ -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
#clang++ $(BENCH) $^ -O1 -Xclang -disable-llvm-passes -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
clang++ $(BENCH) $^ -O2 --gcc-install-dir=/usr/lib/gcc/x86_64-linux-gnu/11 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm

%-raw.ll: %-unopt.ll
opt $^ $(LOAD) -enzyme -o $@ -S
@echo $(LOAD)
opt $^ $(LOAD) -o $@ -S

%-opt.ll: %-raw.ll
opt $^ -o $@ -S
#opt $^ -O2 -o $@ -S

lstm.o: lstm-opt.ll
clang++ -O2 $^ -o $@ $(BENCHLINK) -lm
lstm.o: lstm-opt.ll $(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a
clang++ --gcc-install-dir=/usr/lib/gcc/x86_64-linux-gnu/11 -O2 $^ -o $@ $(BENCHLINK) -lm $(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a

results.json: lstm.o
./$^
Loading
Loading