Break Free of the Realtime Jail

Cliff Hall
Nov 5, 2016 · 17 min read
Realtime, who needs it? Photo by Kevin on Unsplash

Serendipitous Asynchronicity

Setting Up the Audio Nodes

Web Audio API Node Graph
Web Audio API Node Graph
// Set up audio nodes for playback and analysis
function setupAudioNodes() {
// If AudioContext is there, create the AudioVO and initialize its AudioContext
window.AudioContext = window.AudioContext || window.webkitAudioContext;
if (!window.AudioContext) {
console.log('No AudioContext found.');
} else {
// Set up the audio context
audioVO.context = new AudioContext();
// Set up a script processor node
audioVO.javascriptNode = audioVO.context.createScriptProcessor(2048, 1, 1);
// Set up channel and frequency analyzers
audioVO.analyser = audioVO.context.createAnalyser();
audioVO.analyser.smoothingTimeConstant = 0.5;
audioVO.analyser.fftSize = 1024;
audioVO.analyser2 = audioVO.context.createAnalyser();
audioVO.analyser2.smoothingTimeConstant = 0.5;
audioVO.analyser2.fftSize = 1024;
audioVO.analyserFreq = audioVO.context.createAnalyser();
audioVO.analyserFreq.smoothingTimeConstant = 0.3;
audioVO.analyserFreq.fftSize = 256;
// Create a buffer source node and splitter
audioVO.sourceNode = audioVO.context.createBufferSource();
audioVO.splitter = audioVO.context.createChannelSplitter();
// Connect buffer source node to frequency analyser and splitter
audioVO.sourceNode.connect(audioVO.splitter);
audioVO.sourceNode.connect(audioVO.analyserFreq);
audioVO.sourceNode.onended = finished;
// Connect outputs from splitter to channel analysers
audioVO.splitter.connect(audioVO.analyser, 0, 0);
audioVO.splitter.connect(audioVO.analyser2, 1, 0);
// Connect the analysers to the javascriptnode
audioVO.analyser.connect(audioVO.javascriptNode);
audioVO.analyser2.connect(audioVO.javascriptNode);
}
}

Pre-processing the Audio

How Audio Buffer Size Affects Frame Rate

buffer-512-calls-per-sec-about-80
buffer-512-calls-per-sec-about-80
With buffer size of 512, calls per second is about 80
buffer-1024-calls-per-sec-about-40
buffer-1024-calls-per-sec-about-40
With buffer size of 1024, calls per second is about 40
buffer-2048-calls-per-sec-about-20
buffer-2048-calls-per-sec-about-20
With buffer size of 2048, calls per second is about 20
buffer-4096-calls-per-sec-about-10
buffer-4096-calls-per-sec-about-10
With buffer size of 4096, calls per second is about 10
// Audio value object.
// Values extracted on each audio sample that affect rendered scene.
var audioVO = {
context: null,
sourceNode: null,
javascriptNode: null,
splitter: null,
analyser: null,
analyser2: null,
analyserFreq: null,
audio_freqArray: [],
audio_lAverage: 0,
audio_rAverage: 0,
audioVolume: 0,
audioBass: 0,
audioHigh: 0,
audioMid: 0
};
// Audio processing callback
//
// Collects an array of arrays: seconds and snapshots.
// We don't know duration of the audio, so we track
// each second, collecting an array of snapshots,
// each taken at its calculated offset within the second.
// Since there could be hiccups in timing, and we have
// no assurance that we'll be called exactly on time for
// each snapshot, we take a snapshot (in audioVO) each
// time we're called, and when we reach (or pass) the
// next target frame time, we use last one taken.
function process() {
// On first call, get offset since context was created
if (!offset) offset = audioVO.context.currentTime;
// Get the adjusted time in seconds since we started playback
currentTime = audioVO.context.currentTime - offset;
// Get the array of frames for the current second
currentSecond = Math.floor(currentTime);
if (seconds.length > currentSecond) {
currentFrameSet = seconds[currentSecond];
} else {
currentFrameSet = new Array(FPS);
seconds.push(currentFrameSet);
currentFrame = 0;
}
// Calculate the target time for this frame
// Frame frequency of 0.03333 yields 30 fps
// Frame frequency of 0.02083 yields 48 fps
// Frame frequency of 0.01666 yields 60 fps
targetTime = currentSecond + (currentFrame * frameFreq);
// Have we passed the target time?
// If so, store the last snapshot for the target.
if (currentTime > targetTime) {
displayMessage('Processing audio for second: ' + snapshot.second + ' / frame: ' + snapshot.frame );
currentFrameSet[currentFrame] = snapshot;
currentFrame++;
}
// Get average for the first channel
var array = new Uint8Array(audioVO.analyser.frequencyBinCount);
audioVO.analyser.getByteFrequencyData(array);
var average = getAverageVolume(array);
// Get average for the second channel
var array2 = new Uint8Array(audioVO.analyser2.frequencyBinCount);
audioVO.analyser2.getByteFrequencyData(array2);
var average2 = getAverageVolume(array2);
// Get frequency spectrum
var freqArray = new Uint8Array(audioVO.analyserFreq.frequencyBinCount);
audioVO.analyserFreq.getByteFrequencyData(freqArray);
// Load the audioVO with the actionable values
audioVO.audio_lAverage = average;
audioVO.audio_rAverage = average2;
audioVO.audioVolume = (average + average2) / 2 / 100;
audioVO.audio_freqArray = freqArray;
audioVO.audioBass = getAverageVolume(freqArray.slice(0, 7)) / 200;
audioVO.audioMid = getAverageVolume(freqArray.slice(8, 65)) / 125;
audioVO.audioHigh = getAverageVolume(freqArray.slice(65, 128)) / 75;
// Snapshot just the actionable values and timing info
snapshot = {
second: currentSecond,
frame: currentFrame,
currentTime: currentTime,
targetTime: targetTime,
audio_lAverage: audioVO.audio_lAverage,
audio_rAverage: audioVO.audio_rAverage,
audioVolume: audioVO.audioVolume,
audioBass: audioVO.audioBass,
audioHigh: audioVO.audioHigh,
audioMid: audioVO.audioMid
};
// Uncomment to determine how often this method is called
// console.log("processing "+currentSecond);
// Inner private function to get average volume
function getAverageVolume (array) {
var values = 0;
var average;
var length = array.length;
// get all the frequency amplitudes
for (var i = 0; i < length; i++) {
values += array[i];
}
average = values / length;
return average;
}
}
// The audio playback complete handler
function onPlaybackComplete() {
displayMessage('Audio playback complete');
audioVO.sourceNode.stop(0);
audioVO.sourceNode.disconnect(audioVO.context.destination);
audioVO.javascriptNode.disconnect(audioVO.context.destination);
audioVO.javascriptNode.onaudioprocess = null;
postProcessAudio();
}

Rendering the Frames

// The render loop
var done = false;
var frame = 0;
var clones = [];
var i;
function render() {
if (!done) {// Tie cube location to audio low, mid, high and l/r averages
// Also, rock it around the clock
var radius = 2;
var angle = frame.mod(360);
var newX = (radius + audioVO.audioHigh) * Math.cos(angle / audioVO.audio_lAverage);
var newY = (radius + audioVO.audioMid) * Math.sin(angle / audioVO.audio_rAverage);
var newZ = (radius - audioVO.audioBass) / Math.tan(angle * audioVO.audioVolume);
cube.position.set(newX, newY, newZ);
// Tie rotation to averages
cube.rotation.x += audioVO.audio_lAverage;
cube.rotation.y += audioVO.audio_rAverage;
// Tie cube scale to audio bass
var scale = (frame).mod(audioVO.audioBass);
var newScale = scale / audioVO.audioBass;
cube.scale.set(newScale,newScale,newScale);
// Throw the occasional clone
if (Math.floor(Math.random() * 2)){
var clone = new THREE.Mesh( geometry, material );
clone.scale.set(newScale / 5, newScale / 5, newScale / 5);
clone.position.set(newX, newY, newZ);
clone.rotation.x -= audioVO.audio_rAverage;
clone.rotation.y -= audioVO.audio_lAverage;
clone.velocity = {
x: (Math.random() * audioVO.audioBass)/2,
y: (Math.random() * audioVO.audioMid)/2,
z: (Math.random() * audioVO.audioHigh)/2
};
scene.add(clone);
clones.push(clone);
}
// Adjust the clones
for (i=0; i<clones.length; i++){
clone = clones[i];
clone.position.z -= (clone.velocity.z - angle.mod(-newZ / i));
clone.position.y += (clone.velocity.y + angle.mod(-newY / i));
clone.position.x -= (clone.velocity.x - angle.mod(newX / i));
newScale = audioVO.audioVolume.mod(i) / 2;
clone.scale.set(newScale, newScale, newScale);
}
// Kick the camera rotation based on volume
camera.rotation.z -= audioVO.audioVolume/2;
// Render and increment frame
requestAnimationFrame(render);
renderer.render(scene, camera);
frame++;
} else {
// Clear canvas after final frame is rendered
renderer.clear();
}
}

Actors and Communication Protocol

Demo Architecture
Demo Architecture

The Server

Server is Listening

The Web Worker

The Page

The Conversation

Step 1 - Prompt for Audio File

Choose a file.
Choose a file.
User chooses a file

Step 2 — Process Audio

Send the audio to the server and start processing it
Send the audio to the server and start processing it
Page loads the audio file, sends it to the server, and starts processing it

Step 3 — Render Frames

003 - Rendering Frames
003 - Rendering Frames
Page renders the frames, using the pre-processed audio, and sending each one to the server

Step 4 — Create Video

004 - Creating Video
004 - Creating Video
Server creates the video

Step 5 — Mission Accomplished

005 - Video Complete
005 - Video Complete
Server reports location of final video file. Page calculates and displays total build time

Conclusion

The previous article in this series is: WebGL Performance Challenge: Render Thirty Frames per Second and Send them to the ServerThis article has been reblogged at the following sites:DZone: http://bit.ly/break-free-of-the-realtime-jail

Sinewav3

A web-based music visualization and video creation tool for artists and developers. Artists: Easily choreograph 3D visual accompaniment for your music. Developers: Use Three.js, WebGL, and our API to create amazing plugins and share the revenue when they’re used commercially.

Cliff Hall

Written by

Software Architect, Author, Musician, Dude. https://cliffordhall.com

Sinewav3

Sinewav3

A web-based music visualization and video creation tool for artists and developers. Artists: Easily choreograph 3D visual accompaniment for your music. Developers: Use Three.js, WebGL, and our API to create amazing plugins and share the revenue when they’re used commercially.