Krotos Modules 3
Loading...
Searching...
No Matches
krotos::AudioEmbedding Class Reference

#include <AudioEmbedding.h>

Public Member Functions

 AudioEmbedding ()
 
void initialise ()
 
std::vector< float > forward (const AudioSampleBuffer &buffer)
 
std::vector< float > hz_to_mel (std::vector< float > freqs, bool htk=false)
 
std::vector< float > mel_to_hz (std::vector< float > mels, bool htk=false)
 
std::vector< float > mel_frequencies (float fmin, float fmax, int n_mels, bool htk=false)
 
std::vector< std::vector< float > > mel (int sr, int n_fft, int n_mels, float fmin, float fmax, bool htk=false)
 

Private Member Functions

void L2Normalise (std::vector< float > &x, float eps=1e-5f)
 
std::vector< float > applyMelFilterbank (const std::vector< float > &x)
 
std::vector< float > applyAutoEncoder (const std::vector< float > &x)
 
void writeMatrixToFile (const std::vector< std::vector< float > > &matrix, const std::string &filename)
 

Private Attributes

bool m_initialised
 
int m_fftSize
 
int m_hopSize
 
juce::dsp::FFT m_fft
 
juce::dsp::WindowingFunction< float > m_window
 
std::vector< std::vector< float > > m_mel
 
std::vector< std::vector< float > > m_weight
 

Constructor & Destructor Documentation

◆ AudioEmbedding()

krotos::AudioEmbedding::AudioEmbedding ( )

Constructor

Member Function Documentation

◆ applyAutoEncoder()

std::vector< float > krotos::AudioEmbedding::applyAutoEncoder ( const std::vector< float > & x)
private

Applies an autoencoder to the input.

Parameters
xinput to autoencoder

◆ applyMelFilterbank()

std::vector< float > krotos::AudioEmbedding::applyMelFilterbank ( const std::vector< float > & x)
private

Applies a filterbank to the input.

Parameters
xinput to apply filterbank

◆ forward()

std::vector< float > krotos::AudioEmbedding::forward ( const AudioSampleBuffer & buffer)

Computes an audio embedding.

Parameters
bufferbuffer of audio samples to analyse
Returns
audio embedding vector

◆ hz_to_mel()

std::vector< float > krotos::AudioEmbedding::hz_to_mel ( std::vector< float > freqs,
bool htk = false )

Convert Hz to Mels.

Parameters
freqsvector of frequencies
htkuse HTK formula instead of Slaney
Returns
input frequencies in Mels

◆ initialise()

void krotos::AudioEmbedding::initialise ( )

Initialise with default parameters

◆ L2Normalise()

void krotos::AudioEmbedding::L2Normalise ( std::vector< float > & x,
float eps = 1e-5f )
private

Normalises a vector using the L2 norm.

Parameters
xvector to normalise
epsepsilon to avoid division by zero

◆ mel()

std::vector< std::vector< float > > krotos::AudioEmbedding::mel ( int sr,
int n_fft,
int n_mels,
float fmin,
float fmax,
bool htk = false )

Create a Mel filter-bank.

Parameters
srsampling rate of the incoming signal
n_fftnumber of FFT components
n_melsnumber of mel bins
fminminimum frequency (Hz).
fmaxmaximum frequency (Hz).
htkuse HTK formula instead of Slaney
Returns
Mel transform matrix

◆ mel_frequencies()

std::vector< float > krotos::AudioEmbedding::mel_frequencies ( float fmin,
float fmax,
int n_mels,
bool htk = false )

Compute an array of acoustic frequencies tuned to the mel scale.

Parameters
n_melsnumber of mel bins
fminminimum frequency (Hz).
fmaxmaximum frequency (Hz).
htkif True, use HTK formula to convert Hz to mel. Otherwise (False), use Slaney’s Auditory Toolbox
Returns
vector of n_mels frequencies in Hz which are uniformly spaced on the Mel axis

◆ mel_to_hz()

std::vector< float > krotos::AudioEmbedding::mel_to_hz ( std::vector< float > mels,
bool htk = false )

Convert mel bin numbers to frequencies.

Parameters
melsmel bins to convert
htkuse HTK formula instead of Slaney
Returns
input mels in Hz

◆ writeMatrixToFile()

void krotos::AudioEmbedding::writeMatrixToFile ( const std::vector< std::vector< float > > & matrix,
const std::string & filename )
private

This function exists as a debugging mechanism to compare the spectrogram created here with it's equivalent in MATLAB.

Member Data Documentation

◆ m_fft

juce::dsp::FFT krotos::AudioEmbedding::m_fft
private

◆ m_fftSize

int krotos::AudioEmbedding::m_fftSize
private

◆ m_hopSize

int krotos::AudioEmbedding::m_hopSize
private

◆ m_initialised

bool krotos::AudioEmbedding::m_initialised
private

◆ m_mel

std::vector<std::vector<float> > krotos::AudioEmbedding::m_mel
private

◆ m_weight

std::vector<std::vector<float> > krotos::AudioEmbedding::m_weight
private

linear autoencoder weights exported from PyTorch

◆ m_window

juce::dsp::WindowingFunction<float> krotos::AudioEmbedding::m_window
private

The documentation for this class was generated from the following files: