Krotos Modules 3
Loading...
Searching...
No Matches
ERB_FFTSpectrogram.cpp
Go to the documentation of this file.
1//=====================================================================================
7//=====================================================================================
8
9//=====================================================================================
11{
12 // create the vector of N ERB-scale frequencies spaced between m_lo and m_hi
13 // TODO: Tsovolas!
15}
16
17//=====================================================================================
19{
20 m_sampleRate = fs;
21
22 // calculate hopSize in samples from sampling rate
23 m_hopSizeSamples = static_cast<int>(round(m_sampleRate * m_hopSizeSec));
24
25 // configure earFilter
26 m_earFilter.configure(fs);
27
28 // limit to 1/2 erb below Nyquist
29 m_hi = std::min(m_hi, m_sampleRate / 2.0f - cferb(m_sampleRate / 2.0f) / 2.0f);
30
31 // winSize param
32 m_wSize = static_cast<int>(std::pow(2.0f, getNextPowerOfTwo(m_ERD * m_sampleRate * 2.0f)));
33
34 // create gammaTone window
35 m_gtWin = gtWindow(m_wSize, (float)m_wSize / (m_ERD * m_sampleRate), 4);
36}
37
38std::vector<std::vector<float>> krotos::ERB_FFTSpectrogram::filterSpectrum(std::vector<float>& inputSignal)
39{
40 // apply the mid outer ear filter to the signal
41 std::vector<float> filteredSignal = applyEarFilter(inputSignal);
42 int length = static_cast<int>(filteredSignal.size());
43
44 // pad signal with zeros to align analysis point with window power centroid
45 // square gammatone window
46 std::vector<float> gtWinSquared;
47 gtWinSquared.reserve(m_gtWin.size());
48
49 std::transform(m_gtWin.begin(), m_gtWin.end(), std::back_inserter(gtWinSquared), [](float n) { return n * n; });
50 // calculate centroid and spread - take ceil to provide zeropad offset
51 int zpOffest = static_cast<int>(std::ceil(centroid(gtWinSquared) - 1.0f));
52 // zeroPad the signal
53 filteredSignal.insert(filteredSignal.begin(), zpOffest, 0.0f);
54 // calculate last hopIndex where the window will fit within the end of the
55 // signal
56 int lastIndex = (length - (m_wSize - zpOffest)) / m_hopSizeSamples * m_hopSizeSamples + 1;
57 std::vector<float> startSamples;
58 for (int i = 0; i <= lastIndex; i += m_hopSizeSamples)
59 {
60 startSamples.push_back(static_cast<float>(i));
61 }
62
63 // build the filterBank
64 buildFilterBank(static_cast<int>(startSamples.size()));
65
66 // matrix of windowed slices of signal
67 std::vector<std::vector<float>> fr(m_wSize, std::vector<float>(startSamples.size()));
68
69 // obtain windowed segments of the signal
70 for (int i = 0; i < startSamples.size(); ++i)
71 {
72 for (int j = 0; j < m_wSize; ++j)
73 {
74 fr[j][i] = filteredSignal[static_cast<int>(startSamples[i]) + j] * m_gtWin[j];
75 }
76 }
77
78 // power spectrum
79 std::vector<std::vector<float>> powerSpec = calculatePowerSpectrum(fr);
80
81 std::vector<std::vector<float>> tmp(powerSpec[0].size(), std::vector<float>(m_wfunct[0].size(), 0.0f));
82
83 for (size_t i = 0; i < m_wfunct[0].size(); ++i)
84 {
85 for (size_t j = 0; j < powerSpec[0].size(); ++j)
86 {
87 for (size_t k = 0; k < m_wfunct.size(); ++k)
88 {
89 tmp[j][i] += static_cast<float>(m_wfunct[k][i]) * powerSpec[k][j];
90 }
91 tmp[j][i] = std::pow(tmp[j][i], 0.25f);
92 }
93 }
94
95 // write to file to import to MATLAB and compare against the timbretoolbox
96 // implementation std::string filename = "matrix_data.txt";
97 // writeMatrixToFile(tmp, filename);
98
99 return tmp;
100}
101
102std::vector<float> krotos::ERB_FFTSpectrogram::applyEarFilter(std::vector<float>& inputSignal)
103{
104 jassert(m_sampleRate != -1.0f);
105
106 // TODO: rewrite to not copy memory around !!
107 std::vector<float> earFilteredSignal = m_earFilter.processSignal(inputSignal);
108
109 return earFilteredSignal;
110}
111
113{
114 // array of kernel bandwidth coeffs
115 std::vector<float> b = cferb(m_cfArray);
116
117 // ERB to gammatone b parameter
118 float scalar = 0.982f; // keep here for now
119 transform(b.begin(), b.end(), b.begin(), [scalar](float value) { return value / scalar; });
120
121 std::vector<float> bb(b.size());
122 transform(b.begin(), b.end(), bb.begin(), [=](float x) {
123 float diffSquared = x * x - m_b0 * m_b0;
124 return std::sqrt(diffSquared);
125 });
126
127 // matrix of kernels(array of gammatone power tfs sampled at fft spectrum
128 // frequencies).
129 std::vector<std::vector<float>> fSupport(m_wSize / 2, std::vector<float>(m_numChans));
130 for (int i = 0; i < m_wSize / 2; i++)
131 {
132 for (int j = 0; j < m_numChans; j++)
133 {
134 fSupport[i][j] = static_cast<float>(i + 1) * m_sampleRate / m_wSize;
135 }
136 }
137
138 std::vector<std::vector<float>> cf(m_wSize / 2, std::vector<float>(m_cfArray.size()));
139 for (int i = 0; i < m_wSize / 2; i++)
140 {
141 for (int j = 0; j < m_cfArray.size(); j++)
142 {
143 cf[i][j] = m_cfArray[j];
144 }
145 }
146
147 std::vector<std::vector<float>> repeated_bb(m_wSize / 2, std::vector<float>(bb.size()));
148 for (int i = 0; i < m_wSize / 2; ++i)
149 {
150 repeated_bb[i] = bb;
151 }
152
153 // power transfer functions- need to cast to double to avoid underflow -- DAMN
154 // IT!!!
155 size_t rows = fSupport.size();
156 size_t cols = cf[0].size();
157 m_wfunct.resize(rows, std::vector<double>(cols));
158
159 for (size_t i = 0; i < rows; ++i)
160 {
161 for (size_t j = 0; j < cols; ++j)
162 {
163 double term = std::pow((1.0 / (startSamplesSize * (fSupport[i][j] - cf[i][j]) + repeated_bb[i][j])), 4.0);
164 m_wfunct[i][j] = std::abs(std::pow(term, 2.0));
165 }
166 }
167
168 // adjust so weight == ERB
169 std::vector<double> adjustweight(m_cfArray.size());
170 for (size_t j = 0; j < m_wfunct[0].size(); ++j)
171 {
172 double sum_wfunct = 0.0;
173 for (size_t i = 0; i < m_wfunct.size(); ++i)
174 {
175 sum_wfunct += m_wfunct[i][j];
176 }
177
178 adjustweight[j] = cferb(static_cast<double>(m_cfArray[j])) / sum_wfunct;
179 }
180
181 // TODO::Ask if this would work: Optimize it by using SIMD (Single
182 // Instruction, Multiple Data) instructions to perform parallel computations on
183 // multiple elements simultaneously. perhaps using: Intel's SIMD-oriented Fast
184 // Mersenne Twister (SIMD-optimized PRNG) or Intel Intrinsics.??????????
185 size_t adjustweight_size = adjustweight.size();
186 for (size_t i = 0; i < m_wfunct.size(); ++i)
187 {
188 for (size_t j = 0; j < m_wfunct[0].size(); ++j)
189 {
190 m_wfunct[i][j] *= adjustweight[j % adjustweight_size];
191 }
192 }
193
194 // find the maximum value in wfunct
195 double maxVal = 0.0;
196 for (const auto& row : m_wfunct)
197 {
198 auto maxElement = std::max_element(row.begin(), row.end());
199 if (maxElement != row.end() && *maxElement > maxVal)
200 {
201 maxVal = *maxElement;
202 }
203 }
204
205 // divide each element of wfunct by the maximum value
206 for (auto& row : m_wfunct)
207 {
208 std::transform(row.begin(), row.end(), row.begin(), [maxVal](double val) { return val / maxVal; });
209 }
210}
211
212std::vector<float> krotos::ERB_FFTSpectrogram::gtWindow(int n, float b, int order)
213{
214 std::vector<float> y(n);
215
216 for (int i = 0; i < n; ++i)
217 {
218 float t = static_cast<float>(i) / n;
219 y[i] = std::pow(b, (float)order) * std::pow(t, (float)order - 1.0f) * std::exp(-2.0f * float(M_PI) * b * t);
220 }
221
222 std::reverse(y.begin(), y.end());
223
224 // normalize the window
225 float maxVal = *std::max_element(y.begin(), y.end());
226 for (auto& val : y)
227 {
228 val /= maxVal;
229 }
230
231 return y;
232}
233
234std::vector<float> krotos::ERB_FFTSpectrogram::cferb(std::vector<float> cf)
235{
236 std::vector<float> bw;
237
238 for (float cfValue : cf)
239 {
240 cfValue = (24.7f * (1.0f + 4.37f * cfValue / 1000.0f));
241 bw.push_back(cfValue);
242 }
243
244 return bw;
245}
246
248{
249 float bw = 24.7f * (1.0f + 4.37f * cf / 1000.0f);
250 return bw;
251}
252
254{
255 double bw = 24.7 * (1.0 + 4.37 * cf / 1000.0);
256 return bw;
257}
258
259std::vector<float> krotos::ERB_FFTSpectrogram::erbSpace(float low, float hi, int N)
260{
261 // change the following parameters if you wish to use a different
262 // ERB scale. Must change in makeERBCoeffs too.
263 float EarQ = 9.26449f; // Glasberg and Moore Parameters
264 float minBW = 24.7f;
265
266 std::vector<float> y(N);
267
268 float a = EarQ * minBW;
269 for (int i = 0; i < N; ++i)
270 {
271 float cf = -a + std::exp(i * (-std::log(hi + a) + std::log(low + a)) / (N - 1)) * (hi + a);
272 y[N - i - 1] = cf;
273 }
274
275 return y;
276}
277
278float krotos::ERB_FFTSpectrogram::centroid(const std::vector<float>& x)
279{
280 std::size_t n = x.size();
281 float sum = 0.0f;
282 float totalWeight = 0.0f;
283
284 // compute the weighted sum and total weight
285 for (std::size_t i = 0; i < n; i++)
286 {
287 sum += static_cast<float>(i + 1) * x[i];
288 totalWeight += x[i];
289 }
290
291 // normalize and return the centroid
292 return (sum + std::numeric_limits<float>::epsilon()) / (totalWeight + std::numeric_limits<float>::epsilon());
293}
294
296 const std::vector<std::vector<float>>& inFrames)
297{
298 size_t numRows = inFrames.size();
299 size_t numCols = inFrames[0].size();
300 size_t fftSize = numRows;
301
302 std::vector<std::vector<float>> powerSpectrum(numRows, std::vector<float>(numCols));
303
304 // create the dsp::FFT object
305 int fftOrder = static_cast<int>(log2(fftSize));
306 dsp::FFT fft(fftOrder);
307 m_fftSize = static_cast<int>(fftSize);
308
309 // create input and output buffers for the FFT
310 std::vector<float> inputBuffer(2 * fftSize, 0.0f);
311
312 for (size_t col = 0; col < numCols; ++col)
313 {
314 // copy column data to the input buffer
315 for (size_t row = 0; row < numRows; ++row)
316 {
317 inputBuffer[row] = inFrames[row][col];
318 }
319
320 // perform frequency-only forward transform
321 fft.performFrequencyOnlyForwardTransform(inputBuffer.data());
322
323 // get the magnitude squared of the FFT coefficients
324 for (size_t row = 0; row < numRows; ++row)
325 {
326 float magnitudeSquared = std::norm(inputBuffer[row]);
327 powerSpectrum[row][col] = magnitudeSquared;
328 }
329
330 // zero inputBuffer to prepare for next frame
331 std::fill(inputBuffer.begin(), inputBuffer.end(), 0.0f);
332 }
333
334 return powerSpectrum;
335}
336
337void krotos::ERB_FFTSpectrogram::drawSpectrogram(juce::Image& image, const std::vector<std::vector<float>>& stftMatrix)
338{
339 auto numOfFrames = stftMatrix.size();
340
341 auto imageWidth = image.getWidth();
342 auto imageHeight = image.getHeight();
343
344 jassert(imageWidth > 0 && imageHeight > 0);
345
346 float factor = (static_cast<float>(numOfFrames)) / (static_cast<float>(imageWidth));
347
348 for (int x = 0; x < imageWidth; x++)
349 {
350 int frameIndex = static_cast<int>(factor * static_cast<float>(x));
351
352 jassert(frameIndex <= numOfFrames);
353
354 for (auto y = 1; y < imageHeight; ++y)
355 {
356 auto maxLevel = juce::FloatVectorOperations::findMinAndMax(stftMatrix.at(frameIndex).data(), m_fftSize);
357
358 // proportion of the image that this y pixel corresponds to ?
359 auto skewedProportionY = 1.0f - std::exp(std::log((float)y / (float)imageHeight) * m_scalingConstant);
360 // proportion to fft Index
361 auto fftDataIndex = (size_t)juce::jlimit(
362 /*lowerLimit*/ 0,
363 /*upperLimit*/ m_fftSize,
364 /*value to constrain*/ (int)(skewedProportionY * m_fftSize));
365
366 // maps from Magnitude of spectrum to 0...1
367 auto level = juce::jmap(/*sourceValue*/ stftMatrix.at(frameIndex).at(fftDataIndex),
368 /*sourceRangeMin*/ 0.0f,
369 /*sourceRangeMax*/ juce::jmax(maxLevel.getEnd(), 1e-5f),
370 /*targetRangeMin*/ 0.0f,
371 /*targetRangeMax*/ 1.0f);
372
373 image.setPixelAt(x, y, juce::Colour::fromHSV(level, 1.0f, level, 1.0f));
374 }
375 }
376}
377
379{
380 if (value <= 0.0f)
381 return 0.0f;
382
383 float exponent = std::ceilf(std::log2f(value));
384
385 return exponent;
386}
387
388std::vector<float> krotos::ERB_FFTSpectrogram::erbs2Hz(const std::vector<float>& erbsVector)
389{
390 std::vector<float> hzVector(erbsVector.size());
391
392 std::transform(erbsVector.begin(), erbsVector.end(), hzVector.begin(),
393 [](float hz) { return std::pow(2.0f, (hz / 6.44f) + 7.84f) - 229.0f; });
394
395 return hzVector;
396}
397
398Eigen::VectorXf krotos::ERB_FFTSpectrogram::erbs2Hz(const Eigen::VectorXf& erbsVector)
399{
400 return Eigen::pow(2.0f, erbsVector.array() / 6.44f + 7.84f) - 229.0f;
401}
402
403std::vector<float> krotos::ERB_FFTSpectrogram::hz2Erbs(const std::vector<float>& hzVector)
404{
405 std::vector<float> erbsVector(hzVector.size());
406
407 std::transform(hzVector.begin(), hzVector.end(), erbsVector.begin(),
408 [](float hz) { return 6.44f * (std::log2(229.0f + hz) - 7.84f); });
409
410 return erbsVector;
411}
412
413float krotos::ERB_FFTSpectrogram::hz2Erbs(const float hz) { return 6.44f * (std::log2(229.0f + hz) - 7.84f); }
414
415Eigen::VectorXf krotos::ERB_FFTSpectrogram::hz2Erbs(const Eigen::VectorXf& hzVector)
416{
417 return 6.44f * (Eigen::log2(229.0f + hzVector.array()) - 7.84f);
418}
static std::vector< float > hz2Erbs(const std::vector< float > &hzVector)
Definition ERB_FFTSpectrogram.cpp:403
float centroid(const std::vector< float > &x)
Definition ERB_FFTSpectrogram.cpp:278
ERB_FFTSpectrogram()
Definition ERB_FFTSpectrogram.cpp:10
void drawSpectrogram(juce::Image &image, const std::vector< std::vector< float > > &erbSTFTPowMatrix)
Definition ERB_FFTSpectrogram.cpp:337
int m_numChans
Definition ERB_FFTSpectrogram.h:160
void setSampleRate(float fs)
Definition ERB_FFTSpectrogram.cpp:18
std::vector< float > cferb(std::vector< float > cf)
Definition ERB_FFTSpectrogram.cpp:234
std::vector< float > erbSpace(float low, float hi, int N)
Definition ERB_FFTSpectrogram.cpp:259
std::vector< std::vector< float > > filterSpectrum(std::vector< float > &inputSignal)
Definition ERB_FFTSpectrogram.cpp:38
float getNextPowerOfTwo(float value)
Definition ERB_FFTSpectrogram.cpp:378
std::vector< float > applyEarFilter(std::vector< float > &inputSignal)
Definition ERB_FFTSpectrogram.cpp:102
static std::vector< float > erbs2Hz(const std::vector< float > &erbsVector)
Definition ERB_FFTSpectrogram.cpp:388
void buildFilterBank(int startSamplesSize)
Definition ERB_FFTSpectrogram.cpp:112
std::vector< std::vector< float > > calculatePowerSpectrum(const std::vector< std::vector< float > > &inFrames)
Definition ERB_FFTSpectrogram.cpp:295
std::vector< float > gtWindow(int numberOfPoints, float bParam, int order)
Definition ERB_FFTSpectrogram.cpp:212
std::vector< float > m_cfArray
Definition ERB_FFTSpectrogram.h:144
float m_lo
Definition ERB_FFTSpectrogram.h:156
float m_hi
Definition ERB_FFTSpectrogram.h:158
#define M_PI
Definition windowing.h:9