Skip to content

Commit

Permalink
feat(rtc): add AudioResampler, combineAudioFrames (#286)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbsp authored Oct 3, 2024
1 parent 5d0edf3 commit 9881d52
Show file tree
Hide file tree
Showing 6 changed files with 756 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .changeset/smooth-lies-tie.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/rtc-node": minor
---

add AudioResampler, combineAudioFrames
2 changes: 1 addition & 1 deletion packages/livekit-rtc/rust-sdks
Submodule rust-sdks updated 95 files
+1 −1 .github/workflows/gen-protocol.yaml
+23 −12 .github/workflows/publish.yml
+1 −0 .nanparc
+16 −1 Cargo.lock
+1 −0 Cargo.toml
+2 −0 libwebrtc/.nanparc
+2 −0 livekit-api/.nanparc
+5 −5 livekit-api/src/signal_client/mod.rs
+4 −2 livekit-api/src/signal_client/signal_stream.rs
+2 −0 livekit-ffi/.nanparc
+2 −1 livekit-ffi/Cargo.toml
+78 −0 livekit-ffi/protocol/audio_frame.proto
+7 −1 livekit-ffi/protocol/ffi.proto
+5 −0 livekit-ffi/src/cabi.rs
+1 −0 livekit-ffi/src/conversion/mod.rs
+1 −0 livekit-ffi/src/conversion/resampler.rs
+207 −2 livekit-ffi/src/livekit.proto.rs
+6 −2 livekit-ffi/src/server/mod.rs
+119 −1 livekit-ffi/src/server/requests.rs
+147 −0 livekit-ffi/src/server/resampler.rs
+2 −0 livekit-protocol/.nanparc
+3 −0 livekit-protocol/generate_proto.sh
+68 −3 livekit-protocol/src/livekit.serde.rs
+2 −0 livekit-runtime/.nanparc
+2 −0 livekit/.nanparc
+24 −7 livekit/src/rtc_engine/mod.rs
+76 −71 livekit/src/rtc_engine/rtc_session.rs
+2 −0 soxr-sys/.nanparc
+32 −0 soxr-sys/Cargo.lock
+14 −0 soxr-sys/Cargo.toml
+46 −0 soxr-sys/build.rs
+1 −0 soxr-sys/generate_bindings.sh
+23 −0 soxr-sys/src/LICENCE
+39 −0 soxr-sys/src/aliases.h
+33 −0 soxr-sys/src/avfft32.c
+32 −0 soxr-sys/src/avfft32s.c
+75 −0 soxr-sys/src/ccrw2.h
+314 −0 soxr-sys/src/cr-core.c
+588 −0 soxr-sys/src/cr.c
+178 −0 soxr-sys/src/cr.h
+8 −0 soxr-sys/src/cr32.c
+8 −0 soxr-sys/src/cr32s.c
+8 −0 soxr-sys/src/cr64.c
+8 −0 soxr-sys/src/cr64s.c
+223 −0 soxr-sys/src/data-io.c
+39 −0 soxr-sys/src/data-io.h
+149 −0 soxr-sys/src/dbesi0.c
+54 −0 soxr-sys/src/dev32s.h
+42 −0 soxr-sys/src/dev64s.h
+1,346 −0 soxr-sys/src/fft4g.c
+23 −0 soxr-sys/src/fft4g.h
+36 −0 soxr-sys/src/fft4g32.c
+31 −0 soxr-sys/src/fft4g32s.c
+35 −0 soxr-sys/src/fft4g64.c
+92 −0 soxr-sys/src/fft4g_cache.h
+125 −0 soxr-sys/src/fifo.h
+277 −0 soxr-sys/src/filter.c
+44 −0 soxr-sys/src/filter.h
+75 −0 soxr-sys/src/half-coefs.h
+61 −0 soxr-sys/src/half-fir.h
+84 −0 soxr-sys/src/internal.h
+150 −0 soxr-sys/src/lib.rs
+31 −0 soxr-sys/src/math-wrap.h
+40 −0 soxr-sys/src/pffft-avx.h
+110 −0 soxr-sys/src/pffft-wrap.c
+1,946 −0 soxr-sys/src/pffft.c
+197 −0 soxr-sys/src/pffft.h
+39 −0 soxr-sys/src/pffft32.c
+34 −0 soxr-sys/src/pffft32s.c
+34 −0 soxr-sys/src/pffft64s.c
+150 −0 soxr-sys/src/poly-fir.h
+56 −0 soxr-sys/src/poly-fir0.h
+31 −0 soxr-sys/src/rdft.h
+24 −0 soxr-sys/src/rdft_t.h
+158 −0 soxr-sys/src/rint-clip.h
+102 −0 soxr-sys/src/rint.h
+1 −0 soxr-sys/src/samplerate.h
+28 −0 soxr-sys/src/soxr-config.h
+198 −0 soxr-sys/src/soxr-lsr.c
+78 −0 soxr-sys/src/soxr-lsr.h
+842 −0 soxr-sys/src/soxr.c
+344 −0 soxr-sys/src/soxr.h
+343 −0 soxr-sys/src/soxr.rs
+48 −0 soxr-sys/src/std-types.h
+89 −0 soxr-sys/src/util-simd.c
+8 −0 soxr-sys/src/util32s.c
+23 −0 soxr-sys/src/util32s.h
+8 −0 soxr-sys/src/util64s.c
+23 −0 soxr-sys/src/util64s.h
+115 −0 soxr-sys/src/vr-coefs.c
+94 −0 soxr-sys/src/vr-coefs.h
+651 −0 soxr-sys/src/vr32.c
+2 −0 webrtc-sys/.nanparc
+2 −0 webrtc-sys/build/.nanparc
+3 −3 webrtc-sys/src/audio_track.cpp
39 changes: 39 additions & 0 deletions packages/livekit-rtc/src/audio_frame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,42 @@ export class AudioFrame {
});
}
}

/**
* Combines one or more `rtc.AudioFrame` objects into a single `rtc.AudioFrame`.
*
* This function concatenates the audio data from multiple frames, ensuring that all frames have
* the same sample rate and number of channels. It efficiently merges the data by preallocating the
* necessary memory and copying the frame data without unnecessary reallocations.
*
* @param buffer - a single AudioFrame or list thereof
*/
export const combineAudioFrames = (buffer: AudioFrame | AudioFrame[]): AudioFrame => {
if (!buffer['length']) {
return buffer as AudioFrame;
}
buffer = buffer as AudioFrame[];

if (buffer.length === 0) {
throw new Error('buffer is empty');
}

const sampleRate = buffer[0].sampleRate;
const channels = buffer[0].channels;

let totalSamplesPerChannel = 0;
for (const frame of buffer) {
if (frame.sampleRate != sampleRate) {
throw new Error(`sample rate mismatch: expected ${sampleRate}, got ${frame.sampleRate}`);
}

if (frame.channels != channels) {
throw new Error(`channel mismatch: expected ${channels}, got ${frame.channels}`);
}

totalSamplesPerChannel += frame.samplesPerChannel;
}

const data = new Int16Array(buffer.map((x) => [...x.data]).flat());
return new AudioFrame(data, sampleRate, channels, totalSamplesPerChannel);
};
166 changes: 166 additions & 0 deletions packages/livekit-rtc/src/audio_resampler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { AudioFrame } from './audio_frame';
import { FfiClient, FfiHandle } from './ffi_client';
import type {
FlushSoxResamplerResponse,
NewSoxResamplerResponse,
PushSoxResamplerResponse,
} from './proto/audio_frame_pb';
import {
FlushSoxResamplerRequest,
NewSoxResamplerRequest,
PushSoxResamplerRequest,
SoxQualityRecipe,
SoxResamplerDataType,
} from './proto/audio_frame_pb';

/**
* Resampler quality. Higher quality settings result in better audio quality but require more
* processing power.
*/
export enum AudioResamplerQuality {
QUICK = SoxQualityRecipe.SOXR_QUALITY_QUICK,
LOW = SoxQualityRecipe.SOXR_QUALITY_LOW,
MEDIUM = SoxQualityRecipe.SOXR_QUALITY_MEDIUM,
HIGH = SoxQualityRecipe.SOXR_QUALITY_HIGH,
VERY_HIGH = SoxQualityRecipe.SOXR_QUALITY_VERYHIGH,
}

/**
* AudioResampler provides functionality to resample audio data from an input sample rate to
* an output sample rate using the Sox resampling library. It supports multiple channels and
* configurable resampling quality.
*/
export class AudioResampler {
#inputRate: number;
#outputRate: number;
#channels: number;
#ffiHandle: FfiHandle;

/**
* Initializes a new AudioResampler.
*
* @param inputRate - The sample rate of the input audio data (in Hz).
* @param outputRate - The desired sample rate of the output audio data (in Hz).
* @param channels - The number of audio channels (e.g., 1 for mono, 2 for stereo). Defaults to 1.
* @param quality - The quality setting for the resampler. Defaults to
* `AudioResamplerQuality.MEDIUM`.
*/
constructor(
inputRate: number,
outputRate: number,
channels = 1,
quality = AudioResamplerQuality.MEDIUM,
) {
this.#inputRate = inputRate;
this.#outputRate = outputRate;
this.#channels = channels;

const req = new NewSoxResamplerRequest({
inputRate,
outputRate,
numChannels: channels,
qualityRecipe: quality as unknown as SoxQualityRecipe,
inputDataType: SoxResamplerDataType.SOXR_DATATYPE_INT16I,
outputDataType: SoxResamplerDataType.SOXR_DATATYPE_INT16I,
flags: 0,
});

const res = FfiClient.instance.request<NewSoxResamplerResponse>({
message: {
case: 'newSoxResampler',
value: req,
},
});

if (res.error) {
throw new Error(res.error);
}

this.#ffiHandle = new FfiHandle(res.resampler.handle.id);
}

/**
* Push audio data into the resampler and retrieve any available resampled data.
*
* This method accepts audio data, resamples it according to the configured input and output rates,
* and returns any resampled data that is available after processing the input.
*
* @param data - The audio frame to resample
*
* @returns A list of {@link AudioFrame} objects containing the resampled audio data. The list may
* be empty if no output data is available yet.
*/
push(data: AudioFrame): AudioFrame[] {
const req = new PushSoxResamplerRequest({
resamplerHandle: this.#ffiHandle.handle,
dataPtr: data.protoInfo().dataPtr,
size: data.data.length,
});

const res = FfiClient.instance.request<PushSoxResamplerResponse>({
message: {
case: 'pushSoxResampler',
value: req,
},
});

if (res.error) {
throw new Error(res.error);
}

if (res.outputPtr) {
return [];
}

const outputData = FfiClient.instance.copyBuffer(res.outputPtr, res.size);
return [
new AudioFrame(
new Int16Array(outputData.subarray()),
this.#outputRate,
this.#channels,
Math.trunc(outputData.length / this.#channels / 2),
),
];
}

/**
* Flush any remaining audio data through the resampler and retrieve the resampled data.
*
* @remarks
* This method should be called when no more input data will be provided to ensure that all
* internal buffers are processed and all resampled data is output.
*/
flush(): AudioFrame[] {
const req = new FlushSoxResamplerRequest({
resamplerHandle: this.#ffiHandle.handle,
});

const res = FfiClient.instance.request<FlushSoxResamplerResponse>({
message: {
case: 'flushSoxResampler',
value: req,
},
});

if (res.error) {
throw new Error(res.error);
}

if (res.outputPtr) {
return [];
}

const outputData = FfiClient.instance.copyBuffer(res.outputPtr, res.size);
return [
new AudioFrame(
new Int16Array(outputData.subarray()),
this.#outputRate,
this.#channels,
Math.trunc(outputData.length / this.#channels / 2),
),
];
}
}
Loading

0 comments on commit 9881d52

Please sign in to comment.