Skip to content

Commit

Permalink
Merge pull request #33 from drowe67/dr-cport
Browse files Browse the repository at this point in the history
C port of core encoder and decoder
  • Loading branch information
drowe67 authored Nov 25, 2024
2 parents 490dfcc + 98cf722 commit d05ed99
Show file tree
Hide file tree
Showing 48 changed files with 452,493 additions and 305 deletions.
2 changes: 1 addition & 1 deletion BBFM.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@ A single carrier PSK modem "back end" that connects the ML symbols to the radio.
python3 loss.py features_in.f32 features_rx_out.f32
loss: 0.035
```
This is a really good result, and likely inaudible. The `feature*.f32` files are produced as intermediate outputs form the `bbfm_inference.sh` and `bbfm_rx.sh` scripts.
This is a really good result, and likely inaudible. The `feature*.f32` files are produced as intermediate outputs from the `bbfm_inference.sh` and `bbfm_rx.sh` scripts.
99 changes: 88 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
endif("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")

# Set default flags (from opus-ng build)
set(CMAKE_C_FLAGS "-O2 -fvisibility=hidden -fstack-protector-strong -W -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes")
set(CMAKE_C_FLAGS "-O2 -fstack-protector-strong -W -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -DHAVE_CONFIG_H")

if(NOT CMAKE_CROSSCOMPILING)
# Python tells us the CFLAGS we need for Embedding Python in a C lib.
Expand Down Expand Up @@ -307,7 +307,7 @@ add_test(NAME radae_rx_awgn
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/all.wav /dev/null \
--EbNodB 1 --freq_offset 13 \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --time_offset -16 --write_rx rx.f32 \
--prepend_noise 1 --append_noise 3 --end_of_over --auxdata; \
--prepend_noise 1 --append_noise 3 --end_of_over --auxdata --correct_freq_offset; \
cat rx.f32 | python3 radae_rxe.py --model model19_check3/checkpoints/checkpoint_epoch_100.pth -v 2 > features_rx_out.f32; \
python3 loss.py features_in.f32 features_rx_out.f32 --loss 0.3 --acq_time_test 1.0 --clip_end 100")
set_tests_properties(radae_rx_awgn PROPERTIES PASS_REGULAR_EXPRESSION "PASS")
Expand Down Expand Up @@ -401,20 +401,40 @@ add_test(NAME radae_rx_slip_minus
# profiles a run with a 50 second file (no pass/fail, run with -V to get a rough idea of execution time)
add_test(NAME radae_rx_profile
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; \
./inference.sh model17/checkpoints/checkpoint_epoch_100.pth wav/all.wav /dev/null \
--EbNodB 1 --freq_offset 13 --df_dt 0.1 \
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/all.wav /dev/null \
--EbNodB 1 --freq_offset 13 --auxdata \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --time_offset -16 --write_rx rx.f32 \
--prepend_noise 1 --append_noise 3 --end_of_over; \
cat rx.f32 | python3 -m cProfile -s time radae_rxe.py --model model17/checkpoints/checkpoint_epoch_100.pth -v 0 --noauxdata --no_stdout | head -n20")
cat rx.f32 | python3 -m cProfile -s time radae_rxe.py -v 1 --no_stdout | head -n20")

# Characterise run time using full simplex Tx stack (C core encoder version). No pass/fail, just for characterisation of run time
add_test(NAME radae_tx_stack_c
COMMAND bash -c "WAV='wav/all.wav'; cd ${CMAKE_SOURCE_DIR}; \
\\time -o log.txt -f '%e' ${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features $WAV - |
PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_tx > /dev/null; \
RUN_TIME=$(cat log.txt); DUR=$(soxi -D $WAV); percent=$(python3 -c \"percent=100*$RUN_TIME/$DUR; print('%f' % percent)\"); \
printf \"\nrun time: %5.2f duration: %5.2f percent CPU: %5.2f\n\n\" $RUN_TIME $DUR $percent ")

# performs a run using the streaming FARGAN decoder, ie the full simplex rx decode stack. No pass/fail, just for characterisation of run time
add_test(NAME radae_rx_fargan
# Characterise run time using full simplex Rx stack (Python core decoder version). No pass/fail, just for characterisation of run time
add_test(NAME radae_rx_stack_py
COMMAND bash -c "WAV='wav/all.wav'; cd ${CMAKE_SOURCE_DIR}; \
./inference.sh model17/checkpoints/checkpoint_epoch_100.pth $WAV /dev/null \
--EbNodB 10 --freq_offset 13 --df_dt -0.1 \
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth $WAV /dev/null \
--EbNodB 10 --freq_offset 13 --df_dt -0.1 --auxdata \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --time_offset -16 --write_rx rx.f32 \
--prepend_noise 1 --append_noise 3 --end_of_over; \
\\time -o log.txt -f '%e' cat rx.f32 | python3 radae_rxe.py --model model17/checkpoints/checkpoint_epoch_100.pth -v 0 --noauxdata | \
--prepend_noise 1 --append_noise 3 --end_of_over --correct_freq_offset ; \
\\time -o log.txt -f '%e' cat rx.f32 | python3 radae_rxe.py -v 0 | \
${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -fargan-synthesis - /dev/null; \
RUN_TIME=$(cat log.txt); DUR=$(soxi -D $WAV); percent=$(python3 -c \"percent=100*$RUN_TIME/$DUR; print('%f' % percent)\"); \
printf \"\nrun time: %5.2f duration: %5.2f percent CPU: %5.2f\n\n\" $RUN_TIME $DUR $percent ")

# As above but with C core decoder for comparison
add_test(NAME radae_rx_stack_c
COMMAND bash -c "WAV='wav/all.wav'; cd ${CMAKE_SOURCE_DIR}; \
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth $WAV /dev/null \
--EbNodB 10 --freq_offset 13 --df_dt -0.1 --auxdata \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --time_offset -16 --write_rx rx.f32 \
--prepend_noise 1 --append_noise 3 --end_of_over --correct_freq_offset ; \
\\time -o log.txt -f '%e' cat rx.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_rx | \
${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -fargan-synthesis - /dev/null; \
RUN_TIME=$(cat log.txt); DUR=$(soxi -D $WAV); percent=$(python3 -c \"percent=100*$RUN_TIME/$DUR; print('%f' % percent)\"); \
printf \"\nrun time: %5.2f duration: %5.2f percent CPU: %5.2f\n\n\" $RUN_TIME $DUR $percent ")
Expand Down Expand Up @@ -474,6 +494,63 @@ add_test(NAME radae_rx_embed_c
set_tests_properties(radae_rx_embed_c PROPERTIES PASS_REGULAR_EXPRESSION "PASS")


# C Port of Core Encoder/decoder ------------------------------------------------------------------------------------

if (NOT WIN32)
# we test by comparing loss of features_in/features_out, can all happen at rate Rs. We load model05.bin weights as
# compiled-in weights are for model19_check3
add_test(NAME c_encoder_model5
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \
cat features_in.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_enc 1 0 ${CMAKE_SOURCE_DIR}/bin/model05.bin > z_c.f32; \
python3 stateful_encoder.py model05/checkpoints/checkpoint_epoch_100.pth features_in.f32 /dev/null --read_latent z_c.f32 --loss_test 0.2")
set_tests_properties(c_encoder_model5 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

# we need the DSP code in the loop to test the core encoder with model19_check3 as the bottleneck (3) is at rate Fs
add_test(NAME c_encoder_model19_check3
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \
cat features_in.f32 | python3 radae_txe.py | python3 radae_rxe.py > features_out.f32; \
cat features_in.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_enc 3 1 > z_c.f32; \
cat z_c.f32 | python3 radae_txe.py --bypass_enc | python3 radae_rxe.py > features_rx_out.f32; \
python3 loss.py features_in.f32 features_out.f32 --features_hat2 features_rx_out.f32 --compare")
set_tests_properties(c_encoder_model19_check3 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

# compare Python decoder (features_out.f32) C decoder (features_c.f32), for same latent inputs z.f32
# note inference.sh creates features_in.f32 & features_out.f32
add_test(NAME c_decoder_model5
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
./inference.sh model05/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null --write_latent z.f32; \
cat z.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_dec 0 ${CMAKE_SOURCE_DIR}/bin/model05.bin > features_c.f32; \
python3 loss.py features_in.f32 features_out.f32 --features_hat2 features_c.f32 --compare")
set_tests_properties(c_decoder_model5 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

# As well as testing C core decoder, this test also uses radae_rxe.py in bypass mode
add_test(NAME c_decoder_model19_check3
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR};
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --auxdata --correct_freq_offset \
--write_rx rx.f32; \
cat rx.f32 | python3 radae_rxe.py --bypass_dec > z_hat.f32
cat z_hat.f32 | ${CMAKE_CURRENT_BINARY_DIR}/src/test_rade_dec 1 > features_c.f32; \
python3 loss.py features_in.f32 features_out.f32 --features_hat2 features_c.f32 --compare")
set_tests_properties(c_decoder_model19_check3 PROPERTIES PASS_REGULAR_EXPRESSION "PASS")
endif(NOT WIN32)

# Test embedded data (--auxdata) use for false sync detection, with C core decoder, with C callable embedded Python API.
# --foff_err forces a false sync state after first sync. See also "radae_rx_aux_mpp" above. Tests uw_errors are being passed
# back from C decoder to Python state machine
add_test(NAME c_decoder_aux_mpp
COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; \
test/make_g.sh; \
./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/all.wav /dev/null \
--rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --time_offset -16 --auxdata \
--EbNodB 4 --freq_offset -11 --g_file g_mpp.f32 --write_rx rx.f32 \
--prepend_noise 1 --append_noise 3 --end_of_over --correct_freq_offset; \
cat rx.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_rx 1 > features_c.f32; \
python3 loss.py features_in.f32 features_c.f32 --loss 0.3 --clip_start 300")
set_tests_properties(c_decoder_aux_mpp PROPERTIES PASS_REGULAR_EXPRESSION "PASS")

# BBFM -----------------------------------------------------------------------------------------------

# single carrier modem internal (inside single_carrier class) tests
Expand Down
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -572,3 +572,29 @@ Work in progress notes, needs a clean up once this settles down.
```
sox ~/Downloads/sdr.ironstonerange.com_2024-08-19T22_03_13Z_7185.00_lsb.wav -t .s16 -r 8000 -c 1 - | python3 int16tof32.py --zeropad | python3 radae_rx.py model19_check3/checkpoints/checkpoint_epoch_100.pth -v 2 --auxdata | ./build/src/lpcnet_demo -fargan-synthesis - - | aplay -f S16_LE -r 16000
```
# C Port of Core Encoder and Decoder
The model weights can be compiled in or loaded at init-time from a binary blob. The actual model is hard coded in `rade_enc.c` and `rade_dec.c`, and can't be easily changed.
To compile-in the weights:
1. Export weights:
```
cd radae
python3 export_rade_weights.py model19_check3/checkpoints/checkpoint_epoch_100.pth src
```
1. We need to make some manual changes to the weight files to support changing input dimension at run time. In `rade_enc_dat.c`, the first call to `linear_init()` should look like:
```
int init_radeenc(RADEEnc *model, const WeightArray *arrays, int input_dim) {
if (linear_init(&model->enc_dense1, arrays, "enc_dense1_bias", NULL, NULL,"enc_dense1_weights_float", NULL, NULL, NULL, input_dim, 64)) return 1;
```
e.g. the fixed input dimension (84 for `model19_check3`, 80 for earlier models without auxdata) should be changed to the `input_dim` variable. This allows us to enable/disable `auxdata` at init time, without changing the C code for the model.
1. Also make manual changes to support `output_dim` in `rade_dec_dat.c`, `init_radedec()`.
3. Build C code.
4. Run ctests.
To export the compiled in weights to a binary blob:
```
cd radae/build
./src/write_rade_weights ../bin/model05.bin
```
These can then be loaded at init-time, see examples in `src/test_rand_enc.c` and `src/test_rand_dec.c`.
2 changes: 1 addition & 1 deletion bbfm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@

# load model from a checkpoint file
model = BBFM(num_features, latent_dim, args.CNRdB)
checkpoint = torch.load(args.model_name, map_location='cpu')
checkpoint = torch.load(args.model_name, map_location='cpu', weights_only=True)
model.load_state_dict(checkpoint['state_dict'], strict=False)
checkpoint['state_dict'] = model.state_dict()

Expand Down
2 changes: 1 addition & 1 deletion bbfm_rx.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@

# load model from a checkpoint file
model = BBFM(num_features, latent_dim, CNRdB=100)
checkpoint = torch.load(args.model_name, map_location='cpu')
checkpoint = torch.load(args.model_name, map_location='cpu', weights_only=True)
model.load_state_dict(checkpoint['state_dict'], strict=False)
checkpoint['state_dict'] = model.state_dict()

Expand Down
Binary file added bin/model05.bin
Binary file not shown.
Binary file added bin/model19_check3.bin
Binary file not shown.
8 changes: 5 additions & 3 deletions cmake/BuildOpus.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ if(APPLE AND BUILD_OSX_UNIVERSAL)
ExternalProject_Add(build_opus_x86
DOWNLOAD_EXTRACT_TIMESTAMP NO
BUILD_IN_SOURCE 1
PATCH_COMMAND sh -c "patch dnn/nnet.h < ${CMAKE_SOURCE_DIR}/src/opus-nnet.h.diff"
CONFIGURE_COMMAND ${CONFIGURE_COMMAND} --host=x86_64-apple-darwin --target=x86_64-apple-darwin CFLAGS=-arch\ x86_64\ -O2\ -mmacosx-version-min=10.11
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND ""
Expand All @@ -23,6 +24,7 @@ ExternalProject_Add(build_opus_x86
ExternalProject_Add(build_opus_arm
DOWNLOAD_EXTRACT_TIMESTAMP NO
BUILD_IN_SOURCE 1
PATCH_COMMAND sh -c "patch dnn/nnet.h < ${CMAKE_SOURCE_DIR}/src/opus-nnet.h.diff"
CONFIGURE_COMMAND ${CONFIGURE_COMMAND} --host=aarch64-apple-darwin --target=aarch64-apple-darwin CFLAGS=-arch\ arm64\ -O2\ -mmacosx-version-min=10.11
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND ""
Expand All @@ -44,7 +46,7 @@ add_custom_target(
libopus.a
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libopus${CMAKE_STATIC_LIBRARY_SUFFIX})

include_directories(${SOURCE_DIR}/dnn ${SOURCE_DIR}/celt ${SOURCE_DIR}/include)
include_directories(${SOURCE_DIR}/dnn ${SOURCE_DIR}/celt ${SOURCE_DIR}/include ${SOURCE_DIR})

add_library(opus STATIC IMPORTED)
add_dependencies(opus libopus.a)
Expand All @@ -54,8 +56,8 @@ set_target_properties(opus PROPERTIES

else(APPLE AND BUILD_OSX_UNIVERSAL)
ExternalProject_Add(build_opus
DOWNLOAD_EXTRACT_TIMESTAMP NO
BUILD_IN_SOURCE 1
PATCH_COMMAND sh -c "patch dnn/nnet.h < ${CMAKE_SOURCE_DIR}/src/opus-nnet.h.diff"
CONFIGURE_COMMAND ${CONFIGURE_COMMAND}
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND ""
Expand All @@ -72,5 +74,5 @@ set_target_properties(opus PROPERTIES
IMPORTED_IMPLIB "${BINARY_DIR}/.libs/libopus${CMAKE_STATIC_LIBRARY_SUFFIX}"
)

include_directories(${SOURCE_DIR}/dnn ${SOURCE_DIR}/celt ${SOURCE_DIR}/include)
include_directories(${SOURCE_DIR}/dnn ${SOURCE_DIR}/celt ${SOURCE_DIR}/include ${SOURCE_DIR})
endif(APPLE AND BUILD_OSX_UNIVERSAL)
Loading

0 comments on commit d05ed99

Please sign in to comment.