3#include <openssl/evp.h>
4#include <openssl/err.h>
14 if (ggmlDataFile.existsAsFile())
26 if (!threadShouldExit())
53 for (File& file : filesToProcess)
56 if (!success || Thread::currentThreadShouldExit())
71 return appendedDataToEmbeddingsFile;
77 if (!presetFile.exists())
109 String presetTagsString;
122 if (Thread::currentThreadShouldExit())
128 StringArray assetNameTokens;
129 assetNameTokens.addTokens(assetName, StringRef(
delimiters), StringRef(
"'"));
132 for (
auto token : assetNameTokens)
137 if (
excludeTerms.contains(token.toLowerCase()) ==
false)
139 if (token ==
"FOLEYFeet")
143 if (
m_UCS->isValid(token))
147 token =
m_UCS->getCategorySubCategory(token);
148 token = token.removeCharacters(
",");
157#if AUDIO_METADATA_TAGS
162 if (Thread::currentThreadShouldExit())
170 AudioFormatManager formatManager;
171 formatManager.registerBasicFormats();
172 std::unique_ptr<AudioFormatReader> reader;
175 if (assetPath.startsWith(
".") ==
true)
182 File assetFile(assetPath);
183 if (assetFile.exists() ==
false)
185 assetPath = assetPath.upToFirstOccurrenceOf(
".wav",
false,
true) +
".kaf";
187 if (assetPath.startsWith(
".") ==
true)
191 assetFile = File(assetPath);
201 for (String key : reader->metadataValues.getAllKeys())
205 auto description = reader->metadataValues.getValue(key,
"");
207 tokens.addTokens(description,
",", StringRef(
""));
208 for (
auto token : tokens)
213 token.containsIgnoreCase(
"krt") ==
false &&
214 token.containsIgnoreCase(
"krotos") ==
false && token.startsWith(
"\\") ==
false &&
215 token.contains(
m_presetFile.getFileNameWithoutExtension().toLowerCase()) ==
false)
219 if (token.startsWith(
" "))
221 token = token.fromFirstOccurrenceOf(
" ",
false,
true);
233 std::array<unsigned char, BUF_SIZE> m_sampleData;
235 if (assetFile.existsAsFile() ==
false)
240 std::ifstream inputFile(assetPath.toStdString(), std::ios::binary);
247 int64 assetFileSize = assetFile.getSize();
248 std::unique_ptr<juce::MemoryBlock> outputDataBlock =
249 std::make_unique<juce::MemoryBlock>(assetFile.getSize());
251 juce::MemoryOutputStream outputDataStream(*outputDataBlock,
false);
254 std::vector<unsigned char> plaintext(
BUFFER_SIZE + EVP_MAX_BLOCK_LENGTH);
256 std::vector<unsigned char> iv(
IV_SIZE);
257 inputFile.read(
reinterpret_cast<char*
>(iv.data()),
IV_SIZE);
260 std::string encKey(
"5128bd6c2bc42ed6835d9d24ee0e7200cc8aeae68e18e9f1ea3f6cd52cc082a8");
267 if (!(ctx = EVP_CIPHER_CTX_new()))
273 if (EVP_DecryptInit_ex(ctx, EVP_aes_256_cbc(), NULL,
m_data, iv.data()) != 1)
277 auto* thread = Thread::getCurrentThread();
280 int totalBytesProcessed = 0;
281 while (inputFile.read(
reinterpret_cast<char*
>(buffer.data()), buffer.size()))
283 auto bytesRead = inputFile.gcount();
285 if (thread && thread->threadShouldExit())
290 if (EVP_DecryptUpdate(ctx, plaintext.data(), &len, buffer.data(),
static_cast<int>(bytesRead)) != 1)
295 outputDataStream.write(
reinterpret_cast<char*
>(plaintext.data()), len);
296 totalBytesProcessed += len;
300 auto remainingBytes = inputFile.gcount();
301 if (remainingBytes > 0)
303 if (EVP_DecryptUpdate(ctx, plaintext.data(), &len, buffer.data(),
304 static_cast<int>(remainingBytes)) != 1)
307 outputDataStream.write(
reinterpret_cast<char*
>(plaintext.data()), len);
309 outputDataStream.flush();
310 totalBytesProcessed += len;
311 EVP_CIPHER_CTX_free(ctx);
313 std::unique_ptr<MemoryInputStream> audioDataStream =
314 std::make_unique<MemoryInputStream>(*outputDataBlock,
true);
316 reader.reset(formatManager.createReaderFor(std::move(audioDataStream)));
321 for (String key : reader->metadataValues.getAllKeys())
325 auto description = reader->metadataValues.getValue(key,
"");
327 tokens.addTokens(description,
",", StringRef(
""));
328 for (
auto token : tokens)
333 token.containsIgnoreCase(
"krt") ==
false &&
334 token.containsIgnoreCase(
"krotos") ==
false && token.startsWith(
"\\") ==
false &&
335 token.containsAnyOf(
m_presetName.toLowerCase()) ==
false)
339 if (token.startsWith(
" "))
341 token = token.fromFirstOccurrenceOf(
" ",
false,
true);
354#if PRESET_FILENAME_TAGS
356 StringArray presetNameTokens;
357 presetNameTokens.addTokens(
m_presetFile.getFileNameWithoutExtension(),
" _", StringRef(
""));
358 for (
auto& presetNameToken : presetNameTokens)
360 if (presetNameToken.isEmpty() ==
false && presetNameToken.containsOnly(
ttpNumericalMetadata) ==
false)
362 if (!
excludeTerms.contains(presetNameToken.toLowerCase()))
372 if (eliminateDuplicates ==
true)
374 presetTagsString.clear();
376 StringArray uniqueTags;
381 tag = tag.toLowerCase();
382 if (!uniqueTags.contains(tag))
390 presetTagsString = uniqueTags.joinIntoString(
", ");
394 if (Thread::currentThreadShouldExit())
400 std::vector<float> embeddings;
430 int numChildrean = root.getNumChildren();
432 while ((child = root.getChild(index)).isValid())
434 if (child.hasType(tag))
436 if (child.getProperty(Identifier(
"id")).toString() == property.toString())
438 String samplePath = child.getPropertyAsValue(Identifier(
"value"),
nullptr).toString();
439 if (samplePath.isNotEmpty())
449 String sampleName = File(resultingCurrentSamplePath).getFileName();
470StringArray
TextToPreset::combineSearch(
const String& searchTerm,
const StringArray& classicResults,
const StringArray& ttpResults, std::map<String, std::vector<float>>& embeddingsCache,
const size_t& maxResults)
474 return classicResults;
477 StringArray combinedResults;
478 std::vector<std::pair<String, float>> scoredResults;
479 std::vector<std::vector<float>> combinedEmbeddings;
480 std::vector<String> combinedNames;
483 scoredResults.reserve(maxResults);
484 combinedEmbeddings.reserve(maxResults);
485 combinedNames.reserve(maxResults);
491 auto getEmbedding = [&embeddingsCache,
this](
const String& result) -> std::vector<float> {
492 if (embeddingsCache.find(result) == embeddingsCache.end())
496 return embeddingsCache[result];
500 for (
const auto& result : classicResults)
502 if (combinedEmbeddings.size() >= maxResults)
break;
503 combinedEmbeddings.push_back(getEmbedding(result));
504 combinedNames.push_back(result);
508 for (
const auto& result : ttpResults)
510 if (combinedEmbeddings.size() >= maxResults)
break;
511 combinedEmbeddings.push_back(getEmbedding(result));
512 combinedNames.push_back(result);
516 for (
size_t i = 0; i < combinedNames.size(); ++i)
519 scoredResults.push_back({combinedNames[i], similarity});
523 std::partial_sort(scoredResults.begin(), scoredResults.begin() + std::min(scoredResults.size(), maxResults), scoredResults.end(), [](
const auto& a,
const auto& b) {
524 return a.second > b.second;
528 for (
size_t i = 0; i < std::min(scoredResults.size(), maxResults); ++i)
530 combinedResults.add(scoredResults[i].first);
533 return combinedResults;
538 float dotProduct = std::inner_product(queryEmbedding.begin(), queryEmbedding.end(), resultEmbedding.begin(), 0.0f);
539 float queryNorm = std::sqrt(std::inner_product(queryEmbedding.begin(), queryEmbedding.end(), queryEmbedding.begin(), 0.0f));
540 float resultNorm = std::sqrt(std::inner_product(resultEmbedding.begin(), resultEmbedding.end(), resultEmbedding.begin(), 0.0f));
543 float denominator = std::max(queryNorm * resultNorm, 1e-8f);
545 return dotProduct / denominator;
550 StringArray closestPresets;
557 auto indices =
m_kNN.
knnQuery(queryEmbedding, maxNumPresetsToFind);
558 for (
auto index : indices)
563 return closestPresets;
568 String stringWithNumbers;
569 for (
auto& number : numbers)
571 stringWithNumbers.append(String(number) + String(
", "), 100);
573 return stringWithNumbers;
578 auto values = StringArray::fromTokens(stringWithNumbers,
",",
"");
579 std::vector<float> numbers;
580 for (
auto value : values)
582 if (value.containsNonWhitespaceChars())
584 numbers.push_back(value.getFloatValue());
597 DynamicObject* dataset =
new DynamicObject();
602 const auto presetName = element.first;
603 const auto tags = element.second.tags;
604 const auto embeddings = element.second.embeddings;
606 DynamicObject* metadata =
new DynamicObject();
609 Array<var> tagsArray;
610 for (
auto value : tags)
611 tagsArray.add(value);
614 Array<var> embeddingsArray;
615 for (
auto value : embeddings)
616 embeddingsArray.add(value);
622 dataset->setProperty(
"PresetsTagEmbeddings", rows);
624 FileOutputStream stream(file);
625 if (stream.openedOk())
628 stream.setPosition(0);
630 JSON::writeToStream(stream, dataset);
639 if (file.existsAsFile() ==
false)
647 const Result jsonParseResult = JSON::parse(file.loadFileAsString(), parsedJson);
649 if (jsonParseResult.failed())
655 var presetTagEmbeddings = parsedJson.getProperty(Identifier(
"PresetsTagEmbeddings"), 0);
657 const auto resultSize = presetTagEmbeddings.size();
658 for (
int i = 0; i < resultSize; ++i)
660 auto presetName = presetTagEmbeddings[i].getProperty(Identifier(
presetNameField), 0).toString();
661 var tagsArray = presetTagEmbeddings[i].getProperty(Identifier(
tagsNameField), 0);
662 var embeddingsArray = presetTagEmbeddings[i].getProperty(Identifier(
embeddingsNameField), 0);
665 for (
int j = 0; j < tagsArray.size(); ++j)
666 tags.add(tagsArray[j]);
668 std::vector<float> embeddings;
669 for (
int j = 0; j < embeddingsArray.size(); ++j)
670 embeddings.push_back(embeddingsArray[j]);
674 Data data(tags, embeddings);
692 std::map<String, Data> presetData;
693 std::vector<String> presetNames;
695 if (embeddingsFile.existsAsFile())
697 auto json = JSON::parse(embeddingsFile);
698 var result = json.getProperty(Identifier(
"PresetsTagEmbeddings"), 0);
699 for (
int i = 0; i < result.size(); ++i)
701 auto presetName = result[i].getProperty(Identifier(
presetNameField), 0).toString();
702 var tagsArray = result[i].getProperty(Identifier(
tagsNameField), 0);
706 for (
int j = 0; j < tagsArray.size(); ++j)
708 tags.add(tagsArray[j]);
711 std::vector<float> embeddings;
712 for (
int j = 0; j < embeddingsArray.size(); ++j)
714 embeddings.push_back(embeddingsArray[j]);
717 Data data(tags, embeddings);
718 presetData[presetName] = data;
719 presetNames.push_back(presetName);
724 embeddingsFile.create();
730 const auto presetName = element.first;
731 const auto tags = element.second.tags;
732 const auto embeddings = element.second.embeddings;
733 Data data(tags, embeddings);
734 presetData[presetName] = data;
735 presetNames.push_back(presetName);
739 DynamicObject* dataset =
new DynamicObject();
742 for (
const auto& element : presetData)
744 const auto presetName = element.first;
745 const auto tags = element.second.tags;
746 const auto embeddings = element.second.embeddings;
748 DynamicObject* metadata =
new DynamicObject();
751 Array<var> tagsArray;
752 for (
auto value : tags)
753 tagsArray.add(value);
756 Array<var> embeddingsArray;
757 for (
auto value : embeddings)
758 embeddingsArray.add(value);
764 dataset->setProperty(
"PresetsTagEmbeddings", rows);
770 TemporaryFile tempEmbeddingsFile(embeddingsFile, TemporaryFile::useHiddenFile);
771 if (
auto stream = tempEmbeddingsFile.getFile().createOutputStream())
774 stream->setPosition(0);
776 JSON::writeToStream(*stream, dataset);
782 const bool success = tempEmbeddingsFile.overwriteTargetFileWithTemporary();
806 if (presetTagEmbeddingsPath.existsAsFile())
812 Thread* currentThread = getCurrentThread();
813 if (currentThread !=
nullptr)
817 signalThreadShouldExit();
827 return StringArray();
831 String searchTerm = query;
833 StringArray presetRecommendations;
834 bool search = !isThreadRunning();
838 int maxNumResults = 10;
841 return presetRecommendations;
846 thread.addTimeSliceClient(
this);
853 thread.signalThreadShouldExit();
860 thread.removeTimeSliceClient(
this);
862 bool threadExitedSafely =
thread.stopThread(2000);
863 if (!threadExitedSafely)
877 if (file.existsAsFile())
879 auto json = JSON::parse(file);
880 var result = json.getProperty(Identifier(
"PresetsTagEmbeddings"), 0);
882 for (
int i = 0; i < result.size(); ++i)
884 if (
thread.threadShouldExit())
889 auto presetName = result[i].getProperty(Identifier(
presetNameField), 0).toString();
894 if (
thread.threadShouldExit())
903 if (
thread.threadShouldExit())
909 String filenameToCheck = file.getFileName();
917 if (
thread.threadShouldExit())
925 return fileProcessingSuccessful;
930 DirectoryIterator iter(directory,
true,
"*", File::findFilesAndDirectories);
933 auto file = iter.getFile();
934 if (file.isDirectory())
947 String dirModificationDate =
m_directory.getLastModificationTime().toString(
true,
true);
948 String lastRecordedModificationDate;
955 if (ggmlDataFile.existsAsFile() ==
false ||
thread.threadShouldExit())
961 if (textEmbeddingsStatusFile.exists() ==
false)
963 textEmbeddingsStatusFile.create();
970 DynamicObject* ttpStatus =
new DynamicObject();
971 var ttpStatusObj(ttpStatus);
973 FileOutputStream stream(textEmbeddingsStatusFile);
974 if (stream.openedOk())
976 stream.setPosition(0);
978 JSON::writeToStream(stream, ttpStatus);
983 DBG(
"TTP embeddings update aborted");
984 textEmbeddingsStatusFile.deleteFile();
990 auto ttpStatusJSON = JSON::parse(textEmbeddingsStatusFile);
1000 !
thread.threadShouldExit())
1007 DynamicObject* ttpStatus =
new DynamicObject();
1009 FileOutputStream stream(textEmbeddingsStatusFile);
1010 if (stream.openedOk())
1012 stream.setPosition(0);
1014 JSON::writeToStream(stream, ttpStatus);
static File getPluginDirectory()
Definition AssetManager.cpp:392
static File convertFilePath(const String &)
Definition AssetManager.cpp:604
static String convertFilePathString(const String &)
Definition AssetManager.cpp:583
static File getAssetDirectory()
Definition AssetManager.cpp:383
static String readFactorySamplesPath()
Definition AssetManager.cpp:112
int useTimeSlice() override
timeslice thread checks for modufication dir date and handles it accordingly
Definition TextToPreset.cpp:945
File m_directory
Definition TextToPreset.h:250
bool handleContentUpdate()
handles content update and triggers ttp embeddings calculation accordingly
Definition TextToPreset.cpp:869
DirectoryWatcher(const File &dirToWatch)
Definition TextToPreset.cpp:844
void getAllPresetFiles(const File &directory, Array< File > &fileArray)
gets all presets under a directory recursivelly
Definition TextToPreset.cpp:928
String m_dirModificationDate
Definition TextToPreset.h:247
Array< File > m_presetFilesToProcess
Definition TextToPreset.h:262
~DirectoryWatcher() override
Definition TextToPreset.cpp:850
TextToPresetShared m_textToPresetSharedInstance
Definition TextToPreset.h:253
TimeSliceThread thread
Definition TextToPreset.h:241
StringArray m_presetsWithEmbeddings
Definition TextToPreset.h:259
Array< File > m_presetFilesArray
Definition TextToPreset.h:256
void buildIndex()
Definition KDTreeND.cpp:6
std::vector< std::size_t > knnQuery(const std::vector< float > &x, std::size_t k) const
Definition KDTreeND.cpp:18
void addDatasetItem(const std::vector< float > &x)
Definition KDTreeND.cpp:4
void clear()
Definition KDTreeND.h:37
static ValueTree getValueTreeFromFile(const File &presetFile)
Definition PresetManager.cpp:154
bool calculateEmbeddingsForPresetFile(File presetFile)
create text embeddings file for the given preset file
Definition TextToPreset.cpp:74
unsigned char m_data[BUFFER_SIZE]
Definition TextToPreset.h:206
String assetPathsDelimiter
Definition TextToPreset.h:182
~TextToPreset()
Definition TextToPreset.cpp:22
bool processFileList(Array< File > &filesToProcess)
processes a file list to create text embeddings
Definition TextToPreset.cpp:41
KDTreeND m_kNN
Definition TextToPreset.h:197
StringArray m_sampleFilenames
Definition TextToPreset.h:157
bool m_modelFileAvailable
Definition TextToPreset.h:209
StringArray m_samplePaths
Definition TextToPreset.h:154
TextToPreset()
Definition TextToPreset.cpp:9
File m_presetFile
Definition TextToPreset.h:160
StringArray m_presetTags
Definition TextToPreset.h:166
StringArray excludeTerms
Definition TextToPreset.h:175
SentenceTransformer m_sentenceTransformer
Definition TextToPreset.h:191
StringArray combineSearch(const String &searchTerm, const StringArray &classicResults, const StringArray &ttpResults, std::map< String, std::vector< float > > &embeddingsCache, const size_t &maxResults=25)
combine multiple StringArrays, megre and rerank the items based on cosine similarity
Definition TextToPreset.cpp:470
void searchForTagAndProperty(ValueTree &root, Identifier tag, Identifier property)
this will recursivelly search in a preset ValueTree structure for a given idendtifier and property na...
Definition TextToPreset.cpp:426
std::vector< float > destringify(String stringWithNumbers)
destrignifies embeddings
Definition TextToPreset.cpp:576
bool readFromFile(File file)
load embeddings from file
Definition TextToPreset.cpp:637
String delimiters
Definition TextToPreset.h:188
void getPresetAssetNames(ValueTree &root)
will get preset asset paths and names to the m_samplePaths and m_sampleFilenames arrays
Definition TextToPreset.cpp:421
std::vector< String > m_presetNames
Definition TextToPreset.h:200
void clear()
clears internal data
Definition TextToPreset.cpp:32
void loadEmbeddings()
will load embeddings data from file to all modules that need it
Definition TextToPreset.cpp:801
StringArray performSearch(String query)
performs search based on text embeddings and KNN
Definition TextToPreset.cpp:823
float computeSimilarity(const std::vector< float > &queryEmbedding, const std::vector< float > &resultEmbedding) const
compute cosine similarity based of result to query
Definition TextToPreset.cpp:536
String m_presetName
Definition TextToPreset.h:163
std::map< String, Data > m_presetData
Definition TextToPreset.h:169
std::map< std::string, std::string > m_presetNameAndPresetPathMap
Definition TextToPreset.h:172
StringArray m_treatedAssetPaths
Definition TextToPreset.h:185
String stringify(std::vector< T > numbers)
Definition TextToPreset.cpp:566
SharedResourcePointer< UniversalCategorySystem > m_UCS
Definition TextToPreset.h:194
bool appendDataToEmbeddingsFile()
appends data to the main embeddings file
Definition TextToPreset.cpp:685
bool assetNamesToTags(bool eliminateDuplicates)
Splits the asset names, in respect to the excluded terms, and pushes the remaining terms into a new s...
Definition TextToPreset.cpp:104
void run() override
Definition TextToPreset.cpp:24
File createFilePath(String filename)
creates the path for the embeddings file
Definition TextToPreset.cpp:590
bool saveToFile(File file)
saves embeddings to file
Definition TextToPreset.cpp:595
void getSampleMetadata(const std::string &input, unsigned char *byteArray)
Definition TextToPreset.h:93
CriticalSection m_cs
Definition TextToPreset.h:203
StringArray findClosestPresets(String searchTerm, int maxNumPresetsToFind) const
returns a String array that contains K closest presets,
Definition TextToPreset.cpp:548
SharedResourcePointer< TextToPreset > m_textToPreset
Definition TextToPreset.h:216
String StringsIntoPath(Args... args)
Joins multiple string arguments into a path string.
Definition helpers.h:25
Definition AirAbsorptionFilter.cpp:2
constexpr size_t IV_SIZE
Definition TextToPreset.h:21
constexpr char tagsNameField[]
Definition TextToPreset.h:16
constexpr char ttpStatusDateProperty[]
Definition TextToPreset.h:11
const String kafFileExtension("kaf")
constexpr char ttpNumericalMetadata[]
Definition TextToPreset.h:12
constexpr char embeddingsNameField[]
Definition TextToPreset.h:15
constexpr char wavMetadataFieldInFile[]
Definition TextToPreset.h:13
constexpr char modelFileName[]
Definition SentenceTransformer.h:7
constexpr char ttpStatusFileName[]
Definition TextToPreset.h:10
constexpr char presetNameField[]
Definition TextToPreset.h:14
static const String ttpThreadName
Definition TextToPreset.h:17
constexpr char embeddingsFileName[]
Definition TextToPreset.h:9
constexpr size_t BUFFER_SIZE
Definition TextToPreset.h:20
Definition TextToPreset.h:144