CS 470: Project #2 “Featured Artist”

Tamish
15 min readFeb 8, 2024

--

WHO needs a band anyways? — Tamish Pulappadi

This project uses audio samples that I made from the different musical elements that comprise a backing track and then ran it through a slightly modified mosaic generator that uses the mic as an input. The thought process is that I use my guitar as the performance tool and a musical backing is generated simply using what a room mic is hearing.

Phase 1 Code:

// output file (if empty, will print to console)
"" => string OUTPUT_FILE;
// get from arguments
if( me.args() > 0 ) me.arg(0) => OUTPUT_FILE;

// check
if( Machine.silent() == false )
{
// print helpful message
<<< "-----------------", "" >>>;
<<< "[feature-extract]: chuck is currently running in REAL-TIME mode;", "" >>>;
<<< "[feature-extract]: this step has no audio; may run much faster in SILENT mode!", "" >>>;
<<< "[feature-extract]: to run in SILENT mode, restart chuck with --silent flag", "" >>>;
<<< "-----------------", "" >>>;
}


//---------------------------------------------------------------------
// analysis network -- this determines which feature will be extracted
//---------------------------------------------------------------------
// audio input into a FFT
SndBuf audioFile => FFT fft;
// a thing for collecting multiple features into one vector
FeatureCollector combo => blackhole;
// add spectral feature: Centroid
fft =^ Centroid centroid =^ combo;
// add spectral feature: Flux
fft =^ Flux flux =^ combo;
// add spectral feature: RMS
fft =^ RMS rms =^ combo;
// add spectral feature: MFCC
fft =^ MFCC mfcc =^ combo;


//---------------------------------------------------------------------
// setting analysis parameters -- important for tuning your extraction
//---------------------------------------------------------------------
// set number of coefficients in MFCC (how many we get out)
20 => mfcc.numCoeffs;
// set number of mel filters in MFCC (internal to MFCC)
10 => mfcc.numFilters;

// do one .upchuck() so FeatureCollector knows how many total dimension
combo.upchuck();
// get number of total feature dimensions
combo.fvals().size() => int NUM_DIMENSIONS;

// set FFT size
2048 => fft.size;
// set window type and size
Windowing.hann(fft.size()) => fft.window;
// our hop size (how often to perform analysis)
2048::samp => dur HOP;


//---------------------------------------------------------------------
// training data -- preparation specific to a train dataset
//---------------------------------------------------------------------
// labels (and filename roots)
["blues", "classical", "country", "disco", "hiphop",
"jazz", "metal", "pop", "reggae", "rock"] @=> string labels[];
// how many per label
100 => int NUM_EXAMPLES_PER_LABEL;
// how much time to aggregate features for each file
30::second => dur EXTRACT_TIME;
// given EXTRACT_TIME and HOP, how many frames per file?
(EXTRACT_TIME / HOP) $ int => int numFrames;
// relative path
"gtzan/genres_original/" => string PATH;

// a feature frame
float featureFrame[NUM_DIMENSIONS];
// how many input files
0 => int NUM_FILES;

// output reference, default is error stream (cherr)
cherr @=> IO @ theOut;
// instantiate
FileIO fout;
// output file
if( OUTPUT_FILE != "" )
{
// print
<<< "opening file for output:", OUTPUT_FILE >>>;
// open
fout.open( OUTPUT_FILE, FileIO.WRITE );
// test
if( !fout.good() )
{
<<< "cannot open file for writing...", "" >>>;
me.exit();
}
// override
fout @=> theOut;
}


//---------------------------------------------------------------------
// extraction -- iterating over entire training dataset
//---------------------------------------------------------------------

// filename
string filename;
// loop over labels
for( int i; i < labels.size(); i++)
{
// get current label
labels[i] => string label;
// loop over examples under each label
for( int j; j < NUM_EXAMPLES_PER_LABEL; j++ )
{
// construct filepath
me.dir() + PATH + label + "/" + label + ".000" + (j<10?"0":"") + j + ".wav" => filename;
// extract the file
if( !extractFeatures( filename, label, theOut ) )
{
// issue warning
cherr <= "PROBLEM during extraction: " <= filename <= IO.newline();
// bail out
me.exit();
}
}
}

// flush the output
theOut.flush();


//---------------------------------------------------------------------
// function: extract and print features from a single file
//---------------------------------------------------------------------
fun int extractFeatures( string inputFilePath, string label, IO out )
{
// increment
NUM_FILES++;
// log
cherr <= "[" <= NUM_FILES <= "] extracting features: " <= inputFilePath <= IO.newline();

// load by block to speed up IO
2048 => audioFile.chunks;
// read the audio file
inputFilePath => audioFile.read;
// zero out
featureFrame.zero();

// let one FFT-size of time pass (to buffer)
fft.size()::samp => now;
// loop over frames
for( int i; i < numFrames; i++ )
{
//-------------------------------------------------------------
// a single upchuck() will trigger analysis on everything
// connected upstream from combo via the upchuck operator (=^)
// the total number of output dimensions is the sum of
// dimensions of all the connected unit analyzers
//-------------------------------------------------------------
combo.upchuck();
// for each dimension
for( int d; d < NUM_DIMENSIONS; d++ )
{
// copy
combo.fval(d) +=> featureFrame[d];
}
// advance time
HOP => now;
}

//-------------------------------------------------------------
// average into a single feature vector per file
// NOTE: this can be easily modified to N feature vectors
// spread out over the length of an audio file; for now
// we will just do one feature vector per file
//-------------------------------------------------------------
for( int d; d < NUM_DIMENSIONS; d++ )
{
// average by total number of frames
numFrames /=> featureFrame[d];
// print the MFCC results
out <= featureFrame[d] <= " ";
}
// print label name and endline
out <= label <= IO.newline();

// done
return true;
}

For Phase 2, my code remained mostly the same with minor changes for the audio snippet length and so that I could run the command for different loops I had created where it would append to the same text file:

// input audio file
"" => string INPUT;
// output file (if empty, will print to console)
"" => string OUTPUT_FILE;
// get from arguments
if( me.args() > 0 ) me.arg(0) => INPUT;
// get from arguments
if( me.args() > 1 ) me.arg(1) => OUTPUT_FILE;

// print usage
if( me.args() == 0 )
{
<<< "usage: chuck --silent mosaic-extract.ck:INPUT:OUTPUT", "" >>>;
<<< " |- INPUT: audio file (.wav), or text file (.txt) listing audio files", "" >>>;
<<< " |- OUTPUT: model file (.txt) to contain extracted feature vectors", "" >>>;
me.exit();
}

// detect; print helpful message
if( Machine.silent() == false )
{
<<< "-----------------", "" >>>;
<<< "[mosaic-extract]: chuck is currently running in REAL-TIME mode;", "" >>>;
<<< "[mosaic-extract]: this step has no audio output; may run faster in SILENT mode!", "" >>>;
<<< "[mosaic-extract]: to run in SILENT mode, restart chuck with --silent flag", "" >>>;
<<< "-----------------", "" >>>;
}


//------------------------------------------------------------------------------
// analysis network -- this determines which feature will be extracted
// NOTE: see examples/ai/features for examples of different features
//------------------------------------------------------------------------------
// audio input into a FFT
SndBuf audioFile => FFT fft;
44100 => int srate;
srate => int windowSize; // 1-second frame, which is 44100 samples
// a thing for collecting multiple features into one vector
FeatureCollector combo => blackhole;
// add spectral feature: Centroid
fft =^ Centroid centroid =^ combo;
// add spectral feature: Flux
fft =^ Flux flux =^ combo;
// add spectral feature: RMS
fft =^ RMS rms =^ combo;
// add spectral feature: MFCC
fft =^ MFCC mfcc =^ combo;


//------------------------------------------------------------------------------
// analysis parameters -- useful for tuning your extraction
//------------------------------------------------------------------------------
// set number of coefficients in MFCC (how many we get out)
20 => mfcc.numCoeffs;
// set number of mel filters in MFCC (internal to MFCC)
10 => mfcc.numFilters;

// do one .upchuck() so FeatureCollector knows how many total dimension
combo.upchuck();
// get number of total feature dimensions
combo.fvals().size() => int NUM_DIMENSIONS;

// set FFT size
windowSize => fft.size;
// set window type and size
Windowing.hann(fft.size()) => fft.window;
// our hop size (how often to perform analysis)
windowSize => int hopSize;
hopSize::samp => dur HOP;
// how many frames to aggregate before averaging?
4 => int NUM_FRAMES;


//------------------------------------------------------------------------------
// OUTPUT: prepare for output
//------------------------------------------------------------------------------
// a feature frame
float featureFrame[NUM_DIMENSIONS];
// how many input files
0 => int NUM_FILES;

// output reference, default is error stream (cherr)
cherr @=> IO @ theOut;
// instantiate
FileIO fout;
// output file
if( OUTPUT_FILE != "" )
{
// print
<<< "opening file for output:", OUTPUT_FILE >>>;
// open
fout.open( OUTPUT_FILE, FileIO.APPEND );
// test
if( !fout.good() )
{
<<< " |- cannot open file for writing...", "" >>>;
me.exit();
}
// override
fout @=> theOut;
}


//------------------------------------------------------------------------------
// INPUT: prepare for iterating over input data and extract features
//------------------------------------------------------------------------------

// array input audio files
string filenames[0];
// parse INPUT, which may be an audio file (.wav) or a list of filenames (.txt)
if( !parseInput( INPUT, filenames ) ) me.exit();

// loop over filenname
for( int i; i < filenames.size(); i++)
{
// extract the file
if( !extractTrajectory( me.dir()+filenames[i], filenames[i], i, theOut ) )
{
// issue warning
cherr <= "[mosaic-extract]: problem extracting (and skipping): " <= filenames[i] <= IO.newline();
// skip
continue;
}
}

// flush output
theOut.flush();
// close
theOut.close();


//------------------------------------------------------------------------------
// extractTrajectory() -- extracts and outputs feature vectors from a single file
//------------------------------------------------------------------------------
fun int extractTrajectory( string inputFilePath, string shortName, int fileIndex, IO out )
{
// increment
NUM_FILES++;
// log
cherr <= "[" <= NUM_FILES <= "] extracting features: " <= inputFilePath <= IO.newline();

// load by block to speed up IO
fft.size() => audioFile.chunks;
// read the audio file
inputFilePath => audioFile.read;
// file position (in seconds)
int pos;
// frame index
int index;

while( audioFile.pos() < audioFile.samples() )
{
// remember the starting pos of each vector
audioFile.pos() => int pos;
// let one FFT-size of time pass (to buffer)
fft.size()::samp => now;
// zero out
featureFrame.zero();
// loop over frames
for( int i; i < NUM_FRAMES; i++ )
{
//-------------------------------------------------------------
// a single upchuck() will trigger analysis on everything
// connected upstream from combo via the upchuck operator (=^)
// the total number of output dimensions is the sum of
// dimensions of all the connected unit analyzers
//-------------------------------------------------------------
combo.upchuck();
// for each dimension
for( int d; d < NUM_DIMENSIONS; d++ )
{
// copy
combo.fval(d) +=> featureFrame[d];
}
// advance time
HOP => now;
}

// print label name and endline
out <= shortName <= " " <= (pos::samp)/second <= " ";

//-------------------------------------------------------------
// average into a single feature vector per file
// NOTE: this can be easily modified to N feature vectors
// spread out over the length of an audio file; for now
// we will just do one feature vector per file
//-------------------------------------------------------------
for( int d; d < NUM_DIMENSIONS; d++ )
{
// average by total number of frames
NUM_FRAMES /=> featureFrame[d];
// print the MFCC results
out <= featureFrame[d] <= " ";
}

out <= IO.newline();

// print .
if( out != cherr ) { cherr <= "."; cherr.flush(); }

// increment index
index++;
}

// print newline to screen
if( out != cherr ) cherr <= IO.newline();

// done
return true;
}


//------------------------------------------------------------------------------
// parse INPUT argument -- either single audio file or a text file containing a list
//------------------------------------------------------------------------------
fun int parseInput( string input, string results[] )
{
// clear results
results.clear();
// see if input is a file name
if( input.rfind( ".wav" ) > 0 || input.rfind( ".aiff" ) > 0 )
{
// make new string (since << current appends by reference)
input => string sss;
// append
results << sss;
}
else
{
// load data
FileIO fio;
if( !fio.open( me.dir() + input, FileIO.READ ) )
{
// error
<<< "cannot open file:", me.dir() + input >>>;
// close
fio.close();
// return done
return false;
}

// read each filename
while( fio.more() )
{
// read each line
fio.readLine().trim() => string line;
// if not empty
if( line != "" )
{
results << line;
}
}
}

return true;
}

For Phase 3, the main thing I was trying to achieve was parallel processing with the different instruments. I experimented with trying to add shreds but ultimately decided that running the code on three seperate terminals was the easiest and best sounding option. I also made sure that all the loops I had created were in the same bpm if that was going to help at all.

In reflection, this was quite a fun process. I went through recording a bunch of loops in different scales and found that for some reason minor scales worked better for me than major scales but I was still able to play modally on my guitar and get it to sound decent. Along those same lines, I tried to stay on “wrong” notes to see if it would sound and different and played with volume swells and the whammy bar to try and get as many different sounds as I could out of the mosaic generator.

I found myself limited on the guitar in terms of expression compared to the voice. I tried to tailor the code a little bit to be more sensitive to pitch and volume which I could play with more on the guitar:

{
// Convert targetPitch to a frequency (if not already in Hz)
float frequency = pitchToFrequency(targetPitch);

// Map targetLoudness (e.g., RMS level) to gain
float gain = mapRMSLevelToGain(targetLoudness);

// Use frequency and gain to control a synthesizer or playback
frequency => osc.freq;
gain => synth.volume;
}

{
// Get the current RMS level
float currentRMS = rms.value();

// Map this value to a suitable gain level
float gain = mapRMSLevelToGain(currentRMS);

// Apply this gain to your synthesis output
// This is a conceptual step - you'll need to implement according to your synthesis setup
gain => synth.volume;
}

This helped me get around some of the rigidness of a stringed, fretted instrument and get some interested sounds.

In the end, I was happy with how it turned out and it was a lot of fun to play with. If I had to make changes to it, I would probably add more loops in a diverse range of keys and scales and try to modulate keys as I performed on my guitar and see how the code would handle that situation. Here is my video demo and code used:


// input: pre-extracted model file
string FEATURES_FILE;
// if have arguments, override filename
if( me.args() > 0 )
{
me.arg(0) => FEATURES_FILE;
}
else
{
// print usage
<<< "usage: chuck mosaic-synth-mic.ck:INPUT", "" >>>;
<<< " |- INPUT: model file (.txt) containing extracted feature vectors", "" >>>;
}
//------------------------------------------------------------------------------
// expected model file format; each VALUE is a feature value
// (feel free to adapt and modify the file format as needed)
//------------------------------------------------------------------------------
// filePath windowStartTime VALUE VALUE ... VALUE
// filePath windowStartTime VALUE VALUE ... VALUE
// ...
// filePath windowStartTime VALUE VALUE ... VALUE
//------------------------------------------------------------------------------


//------------------------------------------------------------------------------
// unit analyzer network: *** this must match the features in the features file
//------------------------------------------------------------------------------
// audio input into a FFT
adc => FFT fft;
// a thing for collecting multiple features into one vector
FeatureCollector combo => blackhole;
// add spectral feature: Centroid
fft =^ Centroid centroid =^ combo;
// add spectral feature: Flux
fft =^ Flux flux =^ combo;
// add spectral feature: RMS
fft =^ RMS rms =^ combo;
// add spectral feature: MFCC
fft =^ MFCC mfcc =^ combo;


//-----------------------------------------------------------------------------
// setting analysis parameters -- also should match what was used during extration
//-----------------------------------------------------------------------------
// set number of coefficients in MFCC (how many we get out)
// 13 is a commonly used value; using less here for printing
20 => mfcc.numCoeffs;
// set number of mel filters in MFCC
10 => mfcc.numFilters;

// do one .upchuck() so FeatureCollector knows how many total dimension
combo.upchuck();
// get number of total feature dimensions
combo.fvals().size() => int NUM_DIMENSIONS;

// set FFT size
4096 => fft.size;
// set window type and size
Windowing.hann(fft.size()) => fft.window;
// our hop size (how often to perform analysis)
(fft.size()/2)::samp => dur HOP;
// how many frames to aggregate before averaging?
// (this does not need to match extraction; might play with this number)
4 => int NUM_FRAMES;
// how much time to aggregate features for each file
fft.size()::samp * NUM_FRAMES => dur EXTRACT_TIME;


//------------------------------------------------------------------------------
// unit generator network: for real-time sound synthesis
//------------------------------------------------------------------------------
// how many max at any time?
16 => int NUM_VOICES;
// a number of audio buffers to cycel between
SndBuf buffers[NUM_VOICES]; ADSR envs[NUM_VOICES]; Pan2 pans[NUM_VOICES];
// set parameters
for( int i; i < NUM_VOICES; i++ )
{
// connect audio
buffers[i] => envs[i] => pans[i] => dac;
// set chunk size (how to to load at a time)
// this is important when reading from large files
// if this is not set, SndBuf.read() will load the entire file immediately
fft.size() => buffers[i].chunks;
// randomize pan
Math.random2f(-.75,.75) => pans[i].pan;
// set envelope parameters
envs[i].set( EXTRACT_TIME, EXTRACT_TIME/2, 1, EXTRACT_TIME );
}


//------------------------------------------------------------------------------
// load feature data; read important global values like numPoints and numCoeffs
//------------------------------------------------------------------------------
// values to be read from file
0 => int numPoints; // number of points in data
0 => int numCoeffs; // number of dimensions in data
// file read PART 1: read over the file to get numPoints and numCoeffs
loadFile( FEATURES_FILE ) @=> FileIO @ fin;
// check
if( !fin.good() ) me.exit();
// check dimension at least
if( numCoeffs != NUM_DIMENSIONS )
{
// error
<<< "[error] expecting:", NUM_DIMENSIONS, "dimensions; but features file has:", numCoeffs >>>;
// stop
me.exit();
}


//------------------------------------------------------------------------------
// each Point corresponds to one line in the input file, which is one audio window
//------------------------------------------------------------------------------
class AudioWindow
{
// unique point index (use this to lookup feature vector)
int uid;
// which file did this come file (in files arary)
int fileIndex;
// starting time in that file (in seconds)
float windowTime;

// set
fun void set( int id, int fi, float wt )
{
id => uid;
fi => fileIndex;
wt => windowTime;
}
}

// array of all points in model file
AudioWindow windows[numPoints];
// unique filenames; we will append to this
string files[0];
// map of filenames loaded
int filename2state[0];
// feature vectors of data points
float inFeatures[numPoints][numCoeffs];
// generate array of unique indices
int uids[numPoints]; for( int i; i < numPoints; i++ ) i => uids[i];

// use this for new input
float features[NUM_FRAMES][numCoeffs];
// average values of coefficients across frames
float featureMean[numCoeffs];


//------------------------------------------------------------------------------
// read the data
//------------------------------------------------------------------------------
readData( fin );


//------------------------------------------------------------------------------
// set up our KNN object to use for classification
// (KNN2 is a fancier version of the KNN object)
// -- run KNN2.help(); in a separate program to see its available functions --
//------------------------------------------------------------------------------
KNN2 knn;
// k nearest neighbors
2 => int K;
// results vector (indices of k nearest points)
int knnResult[K];
// knn train
knn.train( inFeatures, uids );


// used to rotate sound buffers
0 => int which;

//------------------------------------------------------------------------------
// SYNTHESIS!!
// this function is meant to be sporked so it can be stacked in time
//------------------------------------------------------------------------------
fun void synthesize( int uid )
{
// get the buffer to use
buffers[which] @=> SndBuf @ sound;
// get the envelope to use
envs[which] @=> ADSR @ envelope;
// increment and wrap if needed
which++; if( which >= buffers.size() ) 0 => which;

// get a referencde to the audio fragment to synthesize
windows[uid] @=> AudioWindow @ win;
// get filename
files[win.fileIndex] => string filename;
// load into sound buffer
filename => sound.read;
// seek to the window start time
((win.windowTime::second)/samp) $ int => sound.pos;

// print what we are about to play
chout <= "synthsizing window: ";
// print label
chout <= win.uid <= "["
<= win.fileIndex <= ":"
<= win.windowTime <= ":POSITION="
<= sound.pos() <= "]";
// endline
chout <= IO.newline();

// open the envelope, overlap add this into the overall audio
envelope.keyOn();
// wait
(EXTRACT_TIME*3)-envelope.releaseTime() => now;
// start the release
envelope.keyOff();
// wait
envelope.releaseTime() => now;
}


//------------------------------------------------------------------------------
// real-time similarity retrieval loop
//------------------------------------------------------------------------------
while( true )
{
// aggregate features over a period of time
for( int frame; frame < NUM_FRAMES; frame++ )
{
//-------------------------------------------------------------
// a single upchuck() will trigger analysis on everything
// connected upstream from combo via the upchuck operator (=^)
// the total number of output dimensions is the sum of
// dimensions of all the connected unit analyzers
//-------------------------------------------------------------
combo.upchuck();
// get features
for( int d; d < NUM_DIMENSIONS; d++)
{
// store them in current frame
combo.fval(d) => features[frame][d];
}
// advance time
HOP => now;
}

// compute means for each coefficient across frames
for( int d; d < NUM_DIMENSIONS; d++ )
{
// zero out
0.0 => featureMean[d];
// loop over frames
for( int j; j < NUM_FRAMES; j++ )
{
// add
features[j][d] +=> featureMean[d];
}
// average
NUM_FRAMES /=> featureMean[d];
}

//-------------------------------------------------
// search using KNN2; results filled in knnResults,
// which should the indices of k nearest points
//-------------------------------------------------
knn.search( featureMean, K, knnResult );

// SYNTHESIZE THIS
spork ~ synthesize( knnResult[Math.random2(0,knnResult.size()-1)] );
}
//------------------------------------------------------------------------------
// end of real-time similiarity retrieval loop
//------------------------------------------------------------------------------




//------------------------------------------------------------------------------
// function: load data file
//------------------------------------------------------------------------------
fun FileIO loadFile( string filepath )
{
// reset
0 => numPoints;
0 => numCoeffs;

// load data
FileIO fio;
if( !fio.open( filepath, FileIO.READ ) )
{
// error
<<< "cannot open file:", filepath >>>;
// close
fio.close();
// return
return fio;
}

string str;
string line;
// read the first non-empty line
while( fio.more() )
{
// read each line
fio.readLine().trim() => str;
// check if empty line
if( str != "" )
{
numPoints++;
str => line;
}
}

// a string tokenizer
StringTokenizer tokenizer;
// set to last non-empty line
tokenizer.set( line );
// negative (to account for filePath windowTime)
-2 => numCoeffs;
// see how many, including label name
while( tokenizer.more() )
{
tokenizer.next();
numCoeffs++;
}

// see if we made it past the initial fields
if( numCoeffs < 0 ) 0 => numCoeffs;

// check
if( numPoints == 0 || numCoeffs <= 0 )
{
<<< "no data in file:", filepath >>>;
fio.close();
return fio;
}

// print
<<< "# of data points:", numPoints, "dimensions:", numCoeffs >>>;

// done for now
return fio;
}


//------------------------------------------------------------------------------
// function: read the data
//------------------------------------------------------------------------------
fun void readData( FileIO fio )
{
// rewind the file reader
fio.seek( 0 );

// a line
string line;
// a string tokenizer
StringTokenizer tokenizer;

// points index
0 => int index;
// file index
0 => int fileIndex;
// file name
string filename;
// window start time
float windowTime;
// coefficient
int c;

// read the first non-empty line
while( fio.more() )
{
// read each line
fio.readLine().trim() => line;
// check if empty line
if( line != "" )
{
// set to last non-empty line
tokenizer.set( line );
// file name
tokenizer.next() => filename;
// window start time
tokenizer.next() => Std.atof => windowTime;
// have we seen this filename yet?
if( filename2state[filename] == 0 )
{
// make a new string (<< appends by reference)
filename => string sss;
// append
files << sss;
// new id
files.size() => filename2state[filename];
}
// get fileindex
filename2state[filename]-1 => fileIndex;
// set
windows[index].set( index, fileIndex, windowTime );

// zero out
0 => c;
// for each dimension in the data
repeat( numCoeffs )
{
// read next coefficient
tokenizer.next() => Std.atof => inFeatures[index][c];
// increment
c++;
}

// increment global index
index++;
}
}
}

--

--