Convert speech from an audio file to text using Google Speech API

Julien
Julien
Jan 24, 2018 · 6 min read

The backstory

The tasks

'use strict';const ffmpeg = require('fluent-ffmpeg');  
const mime = require('mime');
const fs = require('fs');
module.exports = (filePathIn, filePathOut) => new Promise((resolve, reject) => {
if (!filePathIn || !filePathOut) {
throw new Error('You must specify a path for both input and output files.');
}
if (!fs.existsSync(filePathIn)) {
throw new Error('Input file must exist.');
}
if (mime.lookup(filePathIn).indexOf('audio') > -1) {
try {
ffmpeg()
.input(filePathIn)
.outputOptions([
'-f s16le',
'-acodec pcm_s16le',
'-vn',
'-ac 1',
'-ar 16k',
'-map_metadata -1'
])
.save(filePathOut)
.on('end', () => resolve(filePathOut));
} catch (e) {
reject(e);
}
} else {
throw new Error('File must have audio mime.');
}
});
npm i --save linear16
const gcs = require('@google-cloud/storage')({  
projectId: 'your-projectid-12345',
keyFilename: './credentials.json'
});
const bucket = gcs.bucket('your-bucket-name');module.exports = filePath => new Promise((resolve, reject) =>
bucket.upload(filePath, function (err, file) {
if (err) {
reject(err);
} else {
resolve(file);
}
})
);
gs://your-bucket-name/your-file-name.ext
const speechClient = require('@google-cloud/speech')({  
projectId: 'your-projectid-12345',
keyFilename: './credentials.json'
});
const options = {
'languageCode': 'en-US',
'sampleRate': 16600,
'encoding': 'LINEAR16'
};
module.exports = fileName =>
new Promise((resolve, reject) => {
speechClient.startRecognition(fileName, options, function (err, operation) {
if (err) {
return reject(err)
}
operation
.on('error', function (err) {
return reject(err);
})
.on('complete', function (results) {
return resolve(results);
});
});
}
);
'use strict';const linear16 = require('linear16');  
const Spinner = require('clui').Spinner;
const cloudStore = require('./libs/cloud-storage');
const cloudSpeech = require('./libs/cloud-speech');
const path = require('path');
const chalk = require('chalk');
try { const countdown = new Spinner(`Starting...`);
countdown.start();
const params = {
input: './input/input.m4a',
output: './output/output.wav'
};
Promise.resolve(params)
.then(paths => {
countdown.message(`Converting ${path.basename(paths.input)} to ${path.basename(paths.output)}...`);
return linear16(paths.input, paths.output);
})
.then(wavFile => {
countdown.message(`Storing ${path.basename(wavFile)}...`);
return cloudStore(wavFile);
})
.then(storageFile => {
countdown.message(`Transcribing ${storageFile.name}...`);
return cloudSpeech('gs://messages-audio/' + storageFile.name);
})
.then(transcription => {
countdown.stop();
console.log(chalk.green(transcription));
})
.catch(err => console.error(err));
} catch (err) {
console.log(chalk.red(err.message));
console.error(err);
}

cod3

Stuff I wish had been on top of Google results when I was stuck, mostly about web development. Also anything I feel like sharing.

Julien

Written by

Julien

Entrepreneur / App developer & designer / Desktop, web & mobile

cod3

cod3

Stuff I wish had been on top of Google results when I was stuck, mostly about web development. Also anything I feel like sharing.