-
Notifications
You must be signed in to change notification settings - Fork 5
/
audio_renderer.js
302 lines (263 loc) · 11.4 KB
/
audio_renderer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
const DATA_BUFFER_DECODE_TARGET_DURATION = 0.3;
const DATA_BUFFER_DURATION = 0.6;
const DECODER_QUEUE_SIZE_MAX = 5;
const ENABLE_DEBUG_LOGGING = false;
import { MP4PullDemuxer } from "./mp4_pull_demuxer.js";
import { RingBuffer } from "./ringbuf.js";
function debugLog(msg) {
if (!ENABLE_DEBUG_LOGGING) {
return;
}
console.debug(msg);
}
function URLFromFiles(files) {
const promises = files.map(file =>
fetch(file).then(response => response.text())
);
return Promise.all(promises).then(texts => {
const text = texts.join("");
const blob = new Blob([text], { type: "application/javascript" });
return URL.createObjectURL(blob);
});
}
export class AudioRenderer {
async initialize(fileUri) {
this.fillInProgress = false;
this.playing = false;
this.demuxer = new MP4PullDemuxer(fileUri);
let trackInfo = await this.demuxer.getAudioTrackInfo();
this.demuxer.selectAudio();
this.decoder = new AudioDecoder({
output: this.bufferAudioData.bind(this),
error: e => console.error(e)
});
const config = {
codec: trackInfo.codec,
sampleRate: trackInfo.sampleRate,
numberOfChannels: trackInfo.numberOfChannels,
description: trackInfo.extradata
};
this.sampleRate = trackInfo.sampleRate;
this.channelCount = trackInfo.numberOfChannels;
debugLog(config);
console.assert(AudioDecoder.isConfigSupported(config));
this.decoder.configure(config);
// Initialize the AudioWorkletProcessor
this.audioContext = new AudioContext({ sampleRate: trackInfo.sampleRate, latencyHint: "playback" });
this.audioContext.suspend();
// Initialize the ring buffer between the decoder and the real-time audio
// rendering thread. The AudioRenderer has buffer space for approximately
// 500ms of decoded audio ahead.
let sampleCountIn500ms =
DATA_BUFFER_DURATION * this.audioContext.sampleRate * trackInfo.numberOfChannels;
let sab = RingBuffer.getStorageForCapacity(
sampleCountIn500ms,
Float32Array
);
this.ringbuffer = new RingBuffer(sab, Float32Array);
this.interleavingBuffers = [];
// Get an instance of the AudioSink worklet, passing it the memory for a
// ringbuffer, connect it to a GainNode for volume. This GainNode is in
// turn connected to the destination.
var workletSource = await URLFromFiles(["ringbuf.js", "audiosink.js"]);
await this.audioContext.audioWorklet.addModule(workletSource);
this.audioSink = new AudioWorkletNode(this.audioContext, "AudioSink", {
processorOptions: { sab: sab, mediaChannelCount: this.channelCount },
outputChannelCount: [trackInfo.numberOfChannels]
});
this.volume = new GainNode(this.audioContext);
this.audioSink.connect(this.volume).connect(this.audioContext.destination);
this.init_resolver = null;
let promise = new Promise(resolver => (this.init_resolver = resolver));
this.fillDataBuffer();
return promise;
}
setVolume(volume) {
if (volume < 0.0 && volume > 1.0) {
return;
}
// Smooth exponential volume ramps on change
this.volume.gain.setTargetAtTime(
volume,
this.audioContext.currentTime,
0.3
);
}
play() {
// resolves when audio has effectively started: this can take some time if using
// bluetooth, for example.
debugLog("playback start");
this.playing = true;
this.fillDataBuffer();
return this.audioContext.resume();
}
pause() {
// resolves when audio has effectively stopped, this can take some time if using
// bluetooth, for example.
debugLog("playback stop");
this.playing = false;
return this.audioContext.suspend();
}
getTotalOutputLatencyInSeconds(useAudioContextOutputLatency) {
let totalOutputLatency = 0.0;
if (!useAudioContextOutputLatency || this.audioContext.outputLatency == undefined) {
// Put appropriate values for Chromium here, not sure what latencies are
// used. Likely OS-dependent, certainly hardware dependant. Assume 40ms.
totalOutputLatency += 0.04;
} else {
totalOutputLatency += this.audioContext.outputLatency;
}
// This looks supported by Chromium, always 128 / samplerate.
totalOutputLatency += this.audioContext.baseLatency;
return totalOutputLatency;
}
getMediaTimeInMicroSeconds(totalOutputLatency) {
// The currently rendered audio sample is the current time of the
// AudioContext, offset by the total output latency, that is composed of
// the internal buffering of the AudioContext (e.g., double buffering), and
// the inherent latency of the audio playback system: OS buffering,
// hardware buffering, etc. This starts out negative, because it takes some
// time to buffer, and crosses zero as the first audio sample is produced
// by the audio output device.
let time = Math.max(
this.audioContext.currentTime - totalOutputLatency,
0.0
);
return time * 1000 * 1000; // microseconds
}
makeChunk(sample) {
const type = sample.is_sync ? "key" : "delta";
const pts_us = (sample.cts * 1000000) / sample.timescale;
const duration_us = (sample.duration * 1000000) / sample.timescale;
return new EncodedAudioChunk({
type: type,
timestamp: pts_us,
duration: duration_us,
data: sample.data
});
}
async fillDataBuffer() {
// This method is called from multiple places to ensure the buffer stays
// healthy. Sometimes these calls may overlap, but at any given point only
// one call is desired.
if (this.fillInProgress)
return;
this.fillInProgress = true;
// This should be this file's ONLY call to the *Internal() variant of this method.
await this.fillDataBufferInternal();
this.fillInProgress = false;
}
async fillDataBufferInternal() {
debugLog(`fillDataBufferInternal()`);
if (this.decoder.decodeQueueSize >= DECODER_QUEUE_SIZE_MAX) {
debugLog('\tdecoder saturated');
// Some audio decoders are known to delay output until the next input.
// Make sure the DECODER_QUEUE_SIZE is big enough to avoid stalling on the
// return below. We're relying on decoder output callback to trigger
// another call to fillDataBuffer().
console.assert(DECODER_QUEUE_SIZE_MAX >= 2);
return;
}
let usedBufferElements = this.ringbuffer.capacity() - this.ringbuffer.available_write();
let usedBufferSecs = usedBufferElements / (this.channelCount * this.sampleRate);
let pcntOfTarget = 100 * usedBufferSecs / DATA_BUFFER_DECODE_TARGET_DURATION;
if (usedBufferSecs >= DATA_BUFFER_DECODE_TARGET_DURATION) {
debugLog(`\taudio buffer full usedBufferSecs: ${usedBufferSecs} pcntOfTarget: ${pcntOfTarget}`);
// When playing, schedule timeout to periodically refill buffer. Don't
// bother scheduling timeout if decoder already saturated. The output
// callback will call us back to keep filling.
if (this.playing)
// Timeout to arrive when buffer is half empty.
window.setTimeout(this.fillDataBuffer.bind(this), 1000 * usedBufferSecs / 2);
// Initialize() is done when the buffer fills for the first time.
if (this.init_resolver) {
this.init_resolver();
this.init_resolver = null;
}
// Buffer full, so no further work to do now.
return;
}
// Decode up to the buffering target or until decoder is saturated.
while (usedBufferSecs < DATA_BUFFER_DECODE_TARGET_DURATION &&
this.decoder.decodeQueueSize < DECODER_QUEUE_SIZE_MAX) {
debugLog(`\tmoar samples. usedBufferSecs:${usedBufferSecs} < target:${DATA_BUFFER_DECODE_TARGET_DURATION}.`);
let sample = await this.demuxer.readSample();
this.decoder.decode(this.makeChunk(sample));
// NOTE: awaiting the demuxer.readSample() above will also give the
// decoder output callbacka a chance to run, so we may see usedBufferSecs
// increase.
usedBufferElements = this.ringbuffer.capacity() - this.ringbuffer.available_write();
usedBufferSecs = usedBufferElements / (this.channelCount * this.sampleRate);
}
if (ENABLE_DEBUG_LOGGING) {
let logPrefix = usedBufferSecs >= DATA_BUFFER_DECODE_TARGET_DURATION ?
'\tbuffered enough' : '\tdecoder saturated';
pcntOfTarget = 100 * usedBufferSecs / DATA_BUFFER_DECODE_TARGET_DURATION;
debugLog(logPrefix + `; bufferedSecs:${usedBufferSecs} pcntOfTarget: ${pcntOfTarget}`);
}
}
bufferHealth() {
return (1 - this.ringbuffer.available_write() / this.ringbuffer.capacity()) * 100;
}
// From a array of Float32Array containing planar audio data `input`, writes
// interleaved audio data to `output`. Start the copy at sample
// `inputOffset`: index of the sample to start the copy from
// `inputSamplesToCopy`: number of input samples to copy
// `output`: a Float32Array to write the samples to
// `outputSampleOffset`: an offset in `output` to start writing
interleave(inputs, inputOffset, inputSamplesToCopy, output, outputSampleOffset) {
if (inputs.length * inputs[0].length < output.length) {
throw `not enough space in destination (${inputs.length * inputs[0].length} < ${output.length}})`
}
let channelCount = inputs.length;
let outIdx = outputSampleOffset;
let inputIdx = Math.floor(inputOffset / channelCount);
var channel = inputOffset % channelCount;
for (var i = 0; i < inputSamplesToCopy; i++) {
output[outIdx++] = inputs[channel][inputIdx];
if (++channel == inputs.length) {
channel = 0;
inputIdx++;
}
}
}
bufferAudioData(data) {
if (this.interleavingBuffers.length != data.numberOfChannels) {
this.interleavingBuffers = new Array(this.channelCount);
for (var i = 0; i < this.interleavingBuffers.length; i++) {
this.interleavingBuffers[i] = new Float32Array(data.numberOfFrames);
}
}
debugLog(`bufferAudioData() ts:${data.timestamp} durationSec:${data.duration / 1000000}`);
// Write to temporary planar arrays, and interleave into the ring buffer.
for (var i = 0; i < this.channelCount; i++) {
data.copyTo(this.interleavingBuffers[i], { planeIndex: i });
}
// Write the data to the ring buffer. Because it wraps around, there is
// potentially two copyTo to do.
let wrote = this.ringbuffer.writeCallback(
data.numberOfFrames * data.numberOfChannels,
(first_part, second_part) => {
this.interleave(this.interleavingBuffers, 0, first_part.length, first_part, 0);
this.interleave(this.interleavingBuffers, first_part.length, second_part.length, second_part, 0);
}
);
// FIXME - this could theoretically happen since we're pretty agressive
// about saturating the decoder without knowing the size of the
// AudioData.duration vs ring buffer capacity.
console.assert(wrote == data.numberOfChannels * data.numberOfFrames, 'Buffer full, dropping data!')
// Logging maxBufferHealth below shows we currently max around 73%, so we're
// safe from the assert above *for now*. We should add an overflow buffer
// just to be safe.
// let bufferHealth = this.bufferHealth();
// if (!('maxBufferHealth' in this))
// this.maxBufferHealth = 0;
// if (bufferHealth > this.maxBufferHealth) {
// this.maxBufferHealth = bufferHealth;
// console.log(`new maxBufferHealth:${this.maxBufferHealth}`);
// }
// fillDataBuffer() gives up if too much decode work is queued. Keep trying
// now that we've finished some.
this.fillDataBuffer();
}
}