8 : m_fftSize(2048), m_hopSize(1024), m_fft(static_cast<int>(std::log2(m_fftSize))),
9 m_window(m_fftSize + 1, dsp::WindowingFunction<float>::hann, false)
18 const int n_mels = 40;
19 const float fmin = 20.f;
20 const float fmax = 8000.f;
21 const bool htk =
false;
30 const auto inputSize = buffer.getNumSamples();
31 const auto inputData = buffer.getReadPointer(0);
33 std::vector<float> sigFrame(2 *
m_fftSize);
34 std::vector<float> embedding(
m_mel.size(), 0.f);
36 std::size_t numFrames = 0;
37 for (
int posin = 0; posin < inputSize; posin +=
m_hopSize)
42 if (posin + i < inputSize)
43 sigFrame.at(i) = inputData[posin + i];
52 m_fft.performFrequencyOnlyForwardTransform(sigFrame.data(),
false);
58 std::transform(embedding.begin(), embedding.end(), melFrame.begin(), embedding.begin(), std::plus<float>());
64 numFrames = numFrames > 0 ? numFrames : 1;
65 std::for_each(embedding.begin(), embedding.end(), [numFrames](
float& v) { v = v / numFrames; });
67 assert(embedding.size() == 40);
72 assert(embedding.size() == 20);
79 const auto norm = std::sqrt(std::inner_product(x.begin(), x.end(), x.begin(), eps));
80 std::for_each(x.begin(), x.end(), [norm](
float& v) { v = v / norm; });
86 std::vector<float> melFrame(
m_mel.size(), 0.f);
87 for (std::size_t i = 0; i <
m_mel.size(); ++i)
89 melFrame[i] = std::inner_product(x.begin(), x.begin() + n,
m_mel[i].begin(), 0.f);
96 std::vector<float> latent(
m_weight.size(), 0.f);
97 for (std::size_t i = 0; i <
m_weight.size(); ++i)
99 latent[i] = std::inner_product(x.begin(), x.end(),
m_weight[i].begin(), 0.f);
106 std::vector<float> mels(freqs.size());
109 for (std::size_t i = 0; i < mels.size(); ++i)
111 mels[i] = 2595.0f * std::log10f(1.0f + freqs[i] / 700.0f);
116 const float fmin = 0.0f;
117 const float f_sp = 200.0f / 3.0f;
119 for (std::size_t i = 0; i < mels.size(); ++i)
121 mels[i] = (freqs[i] - fmin) / f_sp;
124 const float min_log_hz = 1000.0f;
125 const float min_log_mel = (min_log_hz - fmin) / f_sp;
126 const float logstep = std::log(6.4f) / 27.0f;
128 for (std::size_t i = 0; i < mels.size(); ++i)
130 if (freqs[i] >= min_log_hz)
132 mels[i] = min_log_mel + std::log(freqs[i] / min_log_hz) / logstep;
141 std::vector<float> freqs(mels.size());
144 for (std::size_t i = 0; i < mels.size(); ++i)
146 freqs[i] = 700.0f * (std::pow(10.0f, mels[i] / 2595.0f) - 1.0f);
151 const float f_min = 0.0f;
152 const float f_sp = 200.0f / 3.0f;
154 for (std::size_t i = 0; i < mels.size(); ++i)
156 freqs[i] = f_min + f_sp * mels[i];
159 const float min_log_hz = 1000.0f;
160 const float min_log_mel = (min_log_hz - f_min) / f_sp;
161 const float logstep = std::log(6.4f) / 27.0f;
163 for (std::size_t i = 0; i < mels.size(); ++i)
165 if (mels[i] >= min_log_mel)
167 freqs[i] = min_log_hz * std::exp(logstep * (mels[i] - min_log_mel));
176 const auto fmin_v = std::vector<float>(1, fmin);
177 const auto fmax_v = std::vector<float>(1, fmax);
178 const float min_mel =
hz_to_mel(fmin_v, htk)[0];
179 const float max_mel =
hz_to_mel(fmax_v, htk)[0];
181 const auto step = (max_mel - min_mel) /
static_cast<float>(n_mels - 1);
182 std::vector<float> mels = std::vector<float>(n_mels);
183 for (
int i = 0; i < n_mels; ++i)
185 mels[i] = min_mel + step *
static_cast<float>(i);
191std::vector<std::vector<float>>
AudioEmbedding::mel(
int sr,
int n_fft,
int n_mels,
float fmin,
float fmax,
bool htk)
193 const int length = 1 + n_fft / 2;
196 fmax =
static_cast<float>(sr) / 2.0f;
199 std::vector<std::vector<float>> weights(n_mels, std::vector<float>(length));
201 std::vector<float> fft_freqs(length);
202 for (
int i = 0; i < length; ++i)
204 fft_freqs[i] =
static_cast<float>(sr) /
static_cast<float>(n_fft) *
static_cast<float>(i);
209 std::vector<float> fdiff(mel_f.size() - 1);
210 for (std::size_t i = 0; i < fdiff.size(); ++i)
212 fdiff[i] = mel_f[i + 1] - mel_f[i];
215 std::vector<std::vector<float>> ramps(mel_f.size(), std::vector<float>(fft_freqs.size()));
216 for (std::size_t i = 0; i < mel_f.size(); ++i)
218 for (std::size_t j = 0; j < fft_freqs.size(); ++j)
220 ramps[i][j] = mel_f[i] - fft_freqs[j];
224 auto lower = std::vector<float>(fft_freqs.size());
225 auto upper = std::vector<float>(fft_freqs.size());
226 for (
int i = 0; i < n_mels; ++i)
228 for (std::size_t j = 0; j < lower.size(); j++)
230 lower[j] = -1 * ramps[i][j] / fdiff[i];
233 for (std::size_t j = 0; j < lower.size(); ++j)
235 upper[j] = ramps[i + 2][j] / fdiff[i + 1];
238 for (std::size_t j = 0; j < lower.size(); ++j)
240 auto lower_upper_minimum = 0.0f;
241 if (lower[j] > upper[j])
243 lower_upper_minimum = upper[j];
247 lower_upper_minimum = lower[j];
250 if (lower_upper_minimum > 0.0f)
252 weights[i][j] = lower_upper_minimum;
256 weights[i][j] = 0.0f;
261 for (
int i = 0; i < n_mels; ++i)
263 const auto enorm = 2.0f / (mel_f[2 + i] - mel_f[i]);
264 for (
int j = 0; j < length; ++j)
266 weights[i][j] = enorm * weights[i][j];
275 std::ofstream outputFile(filename);
276 if (!outputFile.is_open())
278 DBG(
"Failed to open file: ");
282 for (
const auto& row : matrix)
284 for (
const auto& value : row)
286 outputFile << value <<
" ";
std::vector< float > forward(const AudioSampleBuffer &buffer)
Definition AudioEmbedding.cpp:26
std::vector< float > applyAutoEncoder(const std::vector< float > &x)
Definition AudioEmbedding.cpp:94
int m_hopSize
Definition AudioEmbedding.h:78
std::vector< float > mel_to_hz(std::vector< float > mels, bool htk=false)
Definition AudioEmbedding.cpp:139
int m_fftSize
Definition AudioEmbedding.h:77
void writeMatrixToFile(const std::vector< std::vector< float > > &matrix, const std::string &filename)
Definition AudioEmbedding.cpp:273
void initialise()
Definition AudioEmbedding.cpp:14
std::vector< std::vector< float > > m_mel
Definition AudioEmbedding.h:81
juce::dsp::FFT m_fft
Definition AudioEmbedding.h:79
bool m_initialised
Definition AudioEmbedding.h:76
void L2Normalise(std::vector< float > &x, float eps=1e-5f)
Definition AudioEmbedding.cpp:77
juce::dsp::WindowingFunction< float > m_window
Definition AudioEmbedding.h:80
std::vector< float > hz_to_mel(std::vector< float > freqs, bool htk=false)
Definition AudioEmbedding.cpp:104
std::vector< float > applyMelFilterbank(const std::vector< float > &x)
Definition AudioEmbedding.cpp:83
AudioEmbedding()
Definition AudioEmbedding.cpp:7
std::vector< std::vector< float > > m_weight
Definition AudioEmbedding.h:84
std::vector< std::vector< float > > mel(int sr, int n_fft, int n_mels, float fmin, float fmax, bool htk=false)
Definition AudioEmbedding.cpp:191
std::vector< float > mel_frequencies(float fmin, float fmax, int n_mels, bool htk=false)
Definition AudioEmbedding.cpp:174
Definition AirAbsorptionFilter.cpp:2