#include <AudioEmbedding.h>
|
| | AudioEmbedding () |
| |
| void | initialise () |
| |
| std::vector< float > | forward (const AudioSampleBuffer &buffer) |
| |
| std::vector< float > | hz_to_mel (std::vector< float > freqs, bool htk=false) |
| |
| std::vector< float > | mel_to_hz (std::vector< float > mels, bool htk=false) |
| |
| std::vector< float > | mel_frequencies (float fmin, float fmax, int n_mels, bool htk=false) |
| |
| std::vector< std::vector< float > > | mel (int sr, int n_fft, int n_mels, float fmin, float fmax, bool htk=false) |
| |
◆ AudioEmbedding()
| krotos::AudioEmbedding::AudioEmbedding |
( |
| ) |
|
◆ applyAutoEncoder()
| std::vector< float > krotos::AudioEmbedding::applyAutoEncoder |
( |
const std::vector< float > & | x | ) |
|
|
private |
Applies an autoencoder to the input.
- Parameters
-
◆ applyMelFilterbank()
| std::vector< float > krotos::AudioEmbedding::applyMelFilterbank |
( |
const std::vector< float > & | x | ) |
|
|
private |
Applies a filterbank to the input.
- Parameters
-
| x | input to apply filterbank |
◆ forward()
| std::vector< float > krotos::AudioEmbedding::forward |
( |
const AudioSampleBuffer & | buffer | ) |
|
Computes an audio embedding.
- Parameters
-
| buffer | buffer of audio samples to analyse |
- Returns
- audio embedding vector
◆ hz_to_mel()
| std::vector< float > krotos::AudioEmbedding::hz_to_mel |
( |
std::vector< float > | freqs, |
|
|
bool | htk = false ) |
Convert Hz to Mels.
- Parameters
-
| freqs | vector of frequencies |
| htk | use HTK formula instead of Slaney |
- Returns
- input frequencies in Mels
◆ initialise()
| void krotos::AudioEmbedding::initialise |
( |
| ) |
|
Initialise with default parameters
◆ L2Normalise()
| void krotos::AudioEmbedding::L2Normalise |
( |
std::vector< float > & | x, |
|
|
float | eps = 1e-5f ) |
|
private |
Normalises a vector using the L2 norm.
- Parameters
-
| x | vector to normalise |
| eps | epsilon to avoid division by zero |
◆ mel()
| std::vector< std::vector< float > > krotos::AudioEmbedding::mel |
( |
int | sr, |
|
|
int | n_fft, |
|
|
int | n_mels, |
|
|
float | fmin, |
|
|
float | fmax, |
|
|
bool | htk = false ) |
Create a Mel filter-bank.
- Parameters
-
| sr | sampling rate of the incoming signal |
| n_fft | number of FFT components |
| n_mels | number of mel bins |
| fmin | minimum frequency (Hz). |
| fmax | maximum frequency (Hz). |
| htk | use HTK formula instead of Slaney |
- Returns
- Mel transform matrix
◆ mel_frequencies()
| std::vector< float > krotos::AudioEmbedding::mel_frequencies |
( |
float | fmin, |
|
|
float | fmax, |
|
|
int | n_mels, |
|
|
bool | htk = false ) |
Compute an array of acoustic frequencies tuned to the mel scale.
- Parameters
-
| n_mels | number of mel bins |
| fmin | minimum frequency (Hz). |
| fmax | maximum frequency (Hz). |
| htk | if True, use HTK formula to convert Hz to mel. Otherwise (False), use Slaney’s Auditory Toolbox |
- Returns
- vector of n_mels frequencies in Hz which are uniformly spaced on the Mel axis
◆ mel_to_hz()
| std::vector< float > krotos::AudioEmbedding::mel_to_hz |
( |
std::vector< float > | mels, |
|
|
bool | htk = false ) |
Convert mel bin numbers to frequencies.
- Parameters
-
| mels | mel bins to convert |
| htk | use HTK formula instead of Slaney |
- Returns
- input mels in Hz
◆ writeMatrixToFile()
| void krotos::AudioEmbedding::writeMatrixToFile |
( |
const std::vector< std::vector< float > > & | matrix, |
|
|
const std::string & | filename ) |
|
private |
This function exists as a debugging mechanism to compare the spectrogram created here with it's equivalent in MATLAB.
◆ m_fft
| juce::dsp::FFT krotos::AudioEmbedding::m_fft |
|
private |
◆ m_fftSize
| int krotos::AudioEmbedding::m_fftSize |
|
private |
◆ m_hopSize
| int krotos::AudioEmbedding::m_hopSize |
|
private |
◆ m_initialised
| bool krotos::AudioEmbedding::m_initialised |
|
private |
◆ m_mel
| std::vector<std::vector<float> > krotos::AudioEmbedding::m_mel |
|
private |
◆ m_weight
| std::vector<std::vector<float> > krotos::AudioEmbedding::m_weight |
|
private |
linear autoencoder weights exported from PyTorch
◆ m_window
| juce::dsp::WindowingFunction<float> krotos::AudioEmbedding::m_window |
|
private |
The documentation for this class was generated from the following files: