Skip to content

Commit

Permalink
first pass a C core decoder and ctests
Browse files Browse the repository at this point in the history
  • Loading branch information
drowe67 committed Nov 14, 2024
1 parent 878af3e commit c748fbd
Show file tree
Hide file tree
Showing 9 changed files with 298 additions and 10 deletions.
22 changes: 20 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -515,17 +515,35 @@ add_test(NAME bbfm_sc_bpf_loss
# compiled-in weights are for model19_check3
add_test(NAME c_encoder_model5
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
./inference.sh model05/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null; \
${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \
cat features_in.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_enc 1 0 ${CMAKE_SOURCE_DIR}/bin/model05.bin > z_c.f32; \
python3 stateful_encoder.py model05/checkpoints/checkpoint_epoch_100.pth features_in.f32 /dev/null --read_latent z_c.f32 --loss_test 0.2")
set_tests_properties(c_encoder_model5 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

# we need the DSP code in the loop to test the core encoder with model19_check3 as the bottleneck (3) is at rate Fs
add_test(NAME c_encoder_model19_check3
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
./inference.sh model05/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null --write_latent z.f32; \
${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \
cat features_in.f32 | python3 radae_txe.py | python3 radae_rxe.py > features_out.f32; \
cat features_in.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_enc 3 1 > z_c.f32; \
cat z_c.f32 | python3 radae_txe.py --bypass_enc | python3 radae_rxe.py > features_rx_out.f32; \
python3 loss.py features_in.f32 features_out.f32 --features_hat2 features_rx_out.f32 --compare")
set_tests_properties(c_encoder_model19_check3 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

# compare Python decoder (features_out.f32) C decoder (features_c.f32), for same latent inputs z.f32
# note inference.sh creates features_in.f32 & features_out.f32
add_test(NAME c_decoder_model5
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
./inference.sh model05/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null --write_latent z.f32; \
cat z.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_dec 0 ${CMAKE_SOURCE_DIR}/bin/model05.bin > features_c.f32; \
python3 loss.py features_in.f32 features_out.f32 --features_hat2 features_c.f32 --compare")
set_tests_properties(c_decoder_model5 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

add_test(NAME c_decoder_model19_check3
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --auxdata --correct_freq_offset \
--write_latent z_hat.f32; \
cat z_hat.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_dec 1 > features_c.f32; \
python3 loss.py features_in.f32 features_out.f32 --features_hat2 features_c.f32 --compare")
set_tests_properties(c_decoder_model19_check3 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ target_link_libraries(radae_rx rade Python3::Python)
target_include_directories(radae_rx PRIVATE
"$<TARGET_PROPERTY:Python3::NumPy,INTERFACE_INCLUDE_DIRECTORIES>")

add_library(radecore rade_enc.c rade_enc_data.c)
add_library(radecore rade_enc.c rade_dec.c rade_enc_data.c rade_dec_data.c)
set_target_properties(radecore PROPERTIES
SOVERSION "0.1"
PUBLIC_HEADER "rade_api.h"
Expand All @@ -32,5 +32,8 @@ set_target_properties(radecore PROPERTIES
add_executable(test_rade_enc test_rade_enc.c)
target_link_libraries(test_rade_enc radecore opus m)

add_executable(test_rade_dec test_rade_dec.c)
target_link_libraries(test_rade_dec radecore opus m)

add_executable(write_rade_weights write_rade_weights.c)
target_link_libraries(write_rade_weights radecore opus m)
3 changes: 2 additions & 1 deletion src/rade_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ void rade_init_encoder(RADEEncState *enc_state);
void rade_core_encoder(RADEEncState *enc_state, const RADEEnc *model, float *z, const float *features, int arch, int bottleneck);

void rade_init_decoder(RADEDecState *dec_state);
void rade_core_decoder(RADEDecState *dec_state, const RADEDec *model, float *features, const float *z_hat);
void rade_core_decoder(RADEDecState *dec_state, const RADEDec *model, float *features, const float *z_hat, int arch);

extern const WeightArray radeenc_arrays[];
extern const WeightArray radedec_arrays[];

#endif
102 changes: 102 additions & 0 deletions src/rade_dec.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "rade_dec.h"
#include "rade_constants.h"
#include "os_support.h"

void rade_init_decoder(RADEDecState *dec_state)
{
memset(dec_state, 0, sizeof(*dec_state));
}

static void conv1_cond_init(float *mem, int len, int dilation, int *init)
{
if (!*init) {
int i;
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
}
*init = 1;
}

void rade_core_decoder(
RADEDecState *dec_state,
const RADEDec *model,
float *features, /* i: four concatentated feature vecs */
const float *latents, /* i: latent vector */
int arch
)
{
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
+ DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
int output_index = 0;

/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->dec_dense1, &buffer[output_index], latents, ACTIVATION_TANH, arch);
output_index += DEC_DENSE1_OUT_SIZE;

compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
output_index += DEC_GRU1_OUT_SIZE;
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV1_OUT_SIZE;

compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
output_index += DEC_GRU2_OUT_SIZE;
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV2_OUT_SIZE;

compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
output_index += DEC_GRU3_OUT_SIZE;
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV3_OUT_SIZE;

compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
output_index += DEC_GRU4_OUT_SIZE;
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV4_OUT_SIZE;

compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
output_index += DEC_GRU5_OUT_SIZE;
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV5_OUT_SIZE;

compute_generic_dense(&model->dec_output, features, buffer, ACTIVATION_LINEAR, arch);
}
48 changes: 48 additions & 0 deletions src/rade_dec.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifndef RADE_DEC_H
#define RADE_DEC_H

#include "rade_core.h"
#include "rade_dec_data.h"

struct RADEDecStruct {
int initialized;
float gru1_state[DEC_GRU1_STATE_SIZE];
float gru2_state[DEC_GRU2_STATE_SIZE];
float gru3_state[DEC_GRU3_STATE_SIZE];
float gru4_state[DEC_GRU4_STATE_SIZE];
float gru5_state[DEC_GRU5_STATE_SIZE];
float conv1_state[DEC_CONV1_STATE_SIZE];
float conv2_state[DEC_CONV2_STATE_SIZE];
float conv3_state[DEC_CONV3_STATE_SIZE];
float conv4_state[DEC_CONV4_STATE_SIZE];
float conv5_state[DEC_CONV5_STATE_SIZE];
};

#endif
4 changes: 2 additions & 2 deletions src/rade_dec_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -222148,14 +222148,14 @@ const WeightArray radedec_arrays[] = {
#endif /* USE_WEIGHTS_FILE */

#ifndef DUMP_BINARY_WEIGHTS
int init_radedec(RADEDec *model, const WeightArray *arrays) {
int init_radedec(RADEDec *model, const WeightArray *arrays, int output_dim) {
if (linear_init(&model->dec_dense1, arrays, "dec_dense1_bias", NULL, NULL,"dec_dense1_weights_float", NULL, NULL, NULL, 80, 96)) return 1;
if (linear_init(&model->dec_glu1, arrays, "dec_glu1_bias", "dec_glu1_subias", "dec_glu1_weights_int8","dec_glu1_weights_float", NULL, NULL, "dec_glu1_scale", 96, 96)) return 1;
if (linear_init(&model->dec_glu2, arrays, "dec_glu2_bias", "dec_glu2_subias", "dec_glu2_weights_int8","dec_glu2_weights_float", NULL, NULL, "dec_glu2_scale", 96, 96)) return 1;
if (linear_init(&model->dec_glu3, arrays, "dec_glu3_bias", "dec_glu3_subias", "dec_glu3_weights_int8","dec_glu3_weights_float", NULL, NULL, "dec_glu3_scale", 96, 96)) return 1;
if (linear_init(&model->dec_glu4, arrays, "dec_glu4_bias", "dec_glu4_subias", "dec_glu4_weights_int8","dec_glu4_weights_float", NULL, NULL, "dec_glu4_scale", 96, 96)) return 1;
if (linear_init(&model->dec_glu5, arrays, "dec_glu5_bias", "dec_glu5_subias", "dec_glu5_weights_int8","dec_glu5_weights_float", NULL, NULL, "dec_glu5_scale", 96, 96)) return 1;
if (linear_init(&model->dec_output, arrays, "dec_output_bias", NULL, NULL,"dec_output_weights_float", NULL, NULL, NULL, 736, 84)) return 1;
if (linear_init(&model->dec_output, arrays, "dec_output_bias", NULL, NULL,"dec_output_weights_float", NULL, NULL, NULL, 736, output_dim)) return 1;
if (linear_init(&model->dec_gru1_input, arrays, "dec_gru1_input_bias", "dec_gru1_input_subias", "dec_gru1_input_weights_int8","dec_gru1_input_weights_float", "dec_gru1_input_weights_idx", NULL, "dec_gru1_input_scale", 96, 288)) return 1;
if (linear_init(&model->dec_gru1_recurrent, arrays, "dec_gru1_recurrent_bias", "dec_gru1_recurrent_subias", "dec_gru1_recurrent_weights_int8","dec_gru1_recurrent_weights_float", NULL, NULL, "dec_gru1_recurrent_scale", 96, 288)) return 1;
if (linear_init(&model->dec_gru2_input, arrays, "dec_gru2_input_bias", "dec_gru2_input_subias", "dec_gru2_input_weights_int8","dec_gru2_input_weights_float", "dec_gru2_input_weights_idx", NULL, "dec_gru2_input_scale", 224, 288)) return 1;
Expand Down
2 changes: 1 addition & 1 deletion src/rade_dec_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,6 @@ struct RADEDec {
LinearLayer dec_conv5;
};

int init_radedec(RADEDec *model, const WeightArray *arrays);
int init_radedec(RADEDec *model, const WeightArray *arrays, int output_dim);

#endif /* RADE_DEC_DATA_H */
3 changes: 0 additions & 3 deletions src/rade_enc.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,4 @@ struct RADEEncStruct {
float conv5_state[2*ENC_CONV5_STATE_SIZE];
};

void rade_encode_dframe(RADEEncState *enc_state, const RADEEnc *model, float *latents, float *initial_state, const float *input);


#endif
119 changes: 119 additions & 0 deletions src/test_rade_dec.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/* RADE core decoder test program, z_hat.f32 on stdin, featutres_out.f32 on stdout */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/stat.h>

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "rade_core.h"
#include "rade_dec.h"
#include "rade_dec_data.h"

int opus_select_arch(void);

int main(int argc, char *argv[])
{
RADEDec dec_model;
RADEDecState dec_state;

if (argc < 2) {
fprintf(stderr, "usage: %s auxdata[0-1] [weights_blob.bin]\n", argv[0]);
exit(1);
}

int auxdata = atoi(argv[1]);
int nb_total_features = 36;
int num_features = 20;
int num_used_features = 20;
int frames_per_step = RADE_FRAMES_PER_STEP;

if (auxdata) {
num_features += 1;
}
int output_dim = num_features*frames_per_step;

int fd;
void *data;
int len;
int nb_arrays;
struct stat st;
WeightArray *list;

if (argc == 3) {
const char *filename = argv[2];
fprintf(stderr, "loading %s ....\n", filename);
int ret = stat(filename, &st);
assert(ret != -1);
len = st.st_size;
fprintf(stderr, "size is %d\n", len);
fd = open(filename, O_RDONLY);
assert(fd != -1);
// note this needs to stay mapped at run time
data = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
nb_arrays = parse_weights(&list, data, len);
for (int i=0;i<nb_arrays;i++) {
fprintf(stderr, "found %s: size %d\n", list[i].name, list[i].size);
}
if (init_radedec(&dec_model, list, output_dim) != 0) {
fprintf(stderr, "Error initialising decoder model from %s\n", argv[2]);
exit(1);
}
} else if (init_radedec(&dec_model, radedec_arrays, output_dim) != 0) {
fprintf(stderr, "Error initialising built-in decoder model\n");
exit(1);
}
rade_init_decoder(&dec_state);

assert(dec_model.dec_dense1.nb_inputs == RADE_LATENT_DIM);

float features_write[frames_per_step*nb_total_features];
float features[output_dim];
float z_hat[RADE_LATENT_DIM];

// From celt/cpu_support.h:
/* We currently support 5 x86 variants:
* arch[0] -> non-sse
* arch[1] -> sse
* arch[2] -> sse2
* arch[3] -> sse4.1
* arch[4] -> avx
*/
int arch = 0;

// This auto-magically selects best arch
// arch = opus_select_arch();

fprintf(stderr, "arch: %d auxdata: %d output_dim: %d n_z_in: %d\n",
arch, auxdata, output_dim, dec_model.dec_dense1.nb_inputs);
int nb_latent_vecs = 0;
size_t to_write = frames_per_step*nb_total_features;
for(int i=0; i<frames_per_step*nb_total_features; i++) features_write[i] = 0.0;

while(fread(z_hat, sizeof(float), RADE_LATENT_DIM, stdin) == RADE_LATENT_DIM) {
rade_core_decoder(&dec_state, &dec_model, features, z_hat, arch);
for (int i=0; i<frames_per_step; i++) {
for(int j=0; j<num_used_features; j++)
features_write[i*nb_total_features+j] = features[i*num_features+j];
if (auxdata)
features_write[i*nb_total_features+num_used_features] = features[i*num_features+num_used_features];
}
fwrite(features_write, sizeof(float), to_write, stdout);
fflush(stdout);
nb_latent_vecs++;
}
fprintf(stderr, "%d latent vectors processed\n", nb_latent_vecs);

if (argc == 3) {
munmap(data, len);
close(fd);
free(list);
}

return 0;
}

0 comments on commit c748fbd

Please sign in to comment.