117 URL url(
"https://api.anthropic.com/v1/messages");
120 const int ivLength{32};
121 const int claudeApiKeyLength{108};
123 std::vector<unsigned char> ivBytes, ciphertextBytes, encryptionKeyBytes;
125 std::vector<unsigned char> plaintextBytes;
127 const std::string encKey(
"23c3b67142161bb753706f4673f234906a811e10cf434e832790a4f120ab90e1");
140 catch (
const std::invalid_argument& e)
142 DBG(
"TextToAssets: Failed to open Claude API Key\n\t" + String(e.what()));
144 return StringArray();
148 EVP_CIPHER_CTX* cipherContext;
152 plaintextBytes.resize(ciphertextBytes.size());
155 if (!(cipherContext = EVP_CIPHER_CTX_new()))
157 DBG(
"TextToAssets: Could not create new EVP context");
161 if (EVP_DecryptInit_ex(cipherContext, EVP_aes_256_cbc(), NULL, encryptionKeyBytes.data(), ivBytes.data()) != 1)
163 DBG(
"TextToAssets: Could not initialise EVP context with the given parameters");
167 if (EVP_DecryptUpdate(cipherContext, plaintextBytes.data(), &outputLen, ciphertextBytes.data(),
168 ciphertextBytes.size()) != 1)
170 DBG(
"TextToAssets: Could not perform decryption");
172 plaintextLen = outputLen;
175 if (EVP_DecryptFinal_ex(cipherContext, plaintextBytes.data() + outputLen, &outputLen) != 1)
177 DBG(
"TextToAssets: Could not finalise decryption");
179 plaintextLen += outputLen;
182 EVP_CIPHER_CTX_free(cipherContext);
185 std::string apiKey(plaintextBytes.begin(), plaintextBytes.end());
186 apiKey = apiKey.substr(0, claudeApiKeyLength);
189 String requestBody = R
"({
190 "model": "claude-3-haiku-20240307",
193 "system": "Your task is to design an ambience preset for a given soundscape.\nAmbience presets should contain 4 layers.\nEach layer should contribute something unique and meaningful to the preset while being relevent to the requested soundscape.\nExample layers: Birds, Wind, Rain, Stream, Crickets, Animal calls, Waterfall, Campfire, Thunder\nHere is an example ambience preset in JSON format:\n{\"Soundscape\": \"Forest\", \"Layer-1\": \"Birds\", \"Layer-2\": \"Wind\", \"Layer-3\": \"Crickets\", \"Layer-4\": \"Stream\"}\nReturn the designed preset in JSON format and no other output.",
195 {"role": "user", "content": "{query}"}
200 requestBody = requestBody.replace(
"{query}", query);
201 url = url.withPOSTData(requestBody);
205 StringPairArray responseHeaders;
207 auto stream = url.createInputStream(
208 URL::InputStreamOptions(URL::ParameterHandling::inAddress)
209 .withHttpRequestCmd(
"POST")
210 .withExtraHeaders({
"x-api-key: " + apiKey +
211 "\nanthropic-version: 2023-06-01\ncontent-type: application/json"})
212 .withConnectionTimeoutMs(10000)
213 .withResponseHeaders(&responseHeaders)
214 .withStatusCode(&statusCode));
223 const char* jsonBinaryData =
reinterpret_cast<const char*
>(KrotosBinaryData::preset_layers_json);
226 String jsonString(jsonBinaryData);
229 auto parsedData = JSON::parse(jsonString);
231 if (!parsedData.isVoid())
233 const auto presetLayersIdentifier = Identifier(
"presetLayers");
234 const auto parentCatIDIdentifier = Identifier(
"parentCatID");
235 const auto collectionIdentifier = Identifier(
"collection");
236 const auto layerNameIdentifier = Identifier(
"layerName");
237 const auto layerDescriptionIdentifier = Identifier(
"layerDescription");
238 const auto assetCatIDIdentifier = Identifier(
"assetCatID");
239 const auto assetDescriptionIdentifier = Identifier(
"assetDescription");
240 const auto engineIdentifier = Identifier(
"engine");
241 const auto layerEmbeddingIdentifier = Identifier(
"layerEmbedding");
242 const auto assetEmbeddingIdentifier = Identifier(
"assetEmbedding");
244 var result = parsedData.getProperty(presetLayersIdentifier, 0);
245 for (
int i = 0; i < result.size(); ++i)
248 layer.isValid =
true;
250 layer.parentCatID = result[i].getProperty(parentCatIDIdentifier, 0).toString();
251 layer.collection = result[i].getProperty(collectionIdentifier, 0).toString();
252 layer.layerName = result[i].getProperty(layerNameIdentifier, 0).toString();
253 layer.layerDescription = result[i].getProperty(layerDescriptionIdentifier, 0).toString();
254 layer.assetCatID = result[i].getProperty(assetCatIDIdentifier, 0).toString();
255 layer.assetDescription = result[i].getProperty(assetDescriptionIdentifier, 0).toString();
256 layer.engine = result[i].getProperty(engineIdentifier, 0).toString();
259 auto layerEmbeddingArray = result[i].getProperty(layerEmbeddingIdentifier, 0);
260 layer.layerEmbedding.resize(layerEmbeddingArray.size(), 0.0f);
261 for (
int j = 0; j < layerEmbeddingArray.size(); ++j)
263 layer.layerEmbedding[j] = layerEmbeddingArray[j];
267 auto assetEmbeddingArray = result[i].getProperty(assetEmbeddingIdentifier, 0);
268 layer.assetEmbedding.resize(assetEmbeddingArray.size(), 0.0f);
269 for (
int j = 0; j < assetEmbeddingArray.size(); ++j)
271 layer.assetEmbedding[j] = assetEmbeddingArray[j];
280 DBG(
"Failed to load KrotosBinaryData::preset_layers_json");
288 const char* jsonBinaryData =
reinterpret_cast<const char*
>(KrotosBinaryData::query_embeddings_json);
291 String jsonString(jsonBinaryData);
294 auto parsedData = JSON::parse(jsonString);
296 if (!parsedData.isVoid())
298 const auto queryEmbeddingsIdentifier = Identifier(
"queryEmbeddings");
299 const auto catIDIdentifier = Identifier(
"catID");
300 const auto embeddingIdentifier = Identifier(
"embedding");
302 var result = parsedData.getProperty(queryEmbeddingsIdentifier, 0);
304 for (
int i = 0; i < result.size(); ++i)
306 auto catID = result[i].getProperty(catIDIdentifier, 0).toString();
307 auto embeddingArray = result[i].getProperty(embeddingIdentifier, 0);
308 std::vector<float> embedding(embeddingArray.size(), 0.0f);
309 for (
int j = 0; j < embeddingArray.size(); ++j)
311 embedding[j] = embeddingArray[j];
318 DBG(
"Failed to load KrotosBinaryData::query_embeddings_json");
324 const std::vector<float>& queryEmbedding)
333 bool isTropical = query.contains(
"tropical");
336 auto maxScore = std::numeric_limits<float>::lowest();
337 std::vector<float> maxEmbedding(queryEmbedding.size(), 0.0f);
340 const auto name = element.first;
341 if ((!isTropical) && (name ==
"AMBTrop"))
346 const auto embedding = element.second;
347 const auto score =
dot(embedding, queryEmbedding);
348 if (score >= maxScore)
352 maxEmbedding = embedding;
358 float maxScore2 = maxScore;
361 const auto name = element.first;
362 if ((!isTropical) && (name ==
"AMBTrop"))
369 auto embedding = element.second;
370 std::transform(embedding.begin(), embedding.end(), maxEmbedding.begin(), embedding.begin(), std::plus<>{});
372 const auto score =
dot(embedding, queryEmbedding);
373 if (score >= maxScore2)
387 constexpr float threshold = 0.1f;
388 const auto isValid = maxScore2 >= (maxScore + threshold) ?
true : false;
389 CatID2 = isValid ? CatID2 :
"";
391 return {CatID, CatID2};
396 std::vector<int> indices(x.size());
397 std::iota(indices.begin(), indices.end(), 0);
398 std::sort(indices.begin(), indices.end(), [&x](
int left,
int right) ->
bool { return x[left] > x[right]; });
403 const std::vector<float>& presetEmbedding,
404 const std::vector<Layer>& layers,
405 const std::vector<float>& layerScoresLLM)
407 auto layerScore = std::numeric_limits<float>::lowest();
409 bool isValid =
false;
410 for (
int i = 0; i < (int)layers.size(); ++i)
412 auto layer = layers[i];
419 auto embedding = layer.layerEmbedding;
422 std::transform(embedding.begin(), embedding.end(), presetEmbedding.begin(), embedding.begin(),
429 auto simScore =
dot(embedding, queryEmbedding);
437 simScore = 0.5f * simScore + 0.5f * layerScoresLLM[i];
439 if (simScore >= layerScore)
441 layerScore = simScore;
446 return {layerScore, layerIdx, isValid};
451 const auto catID = layer.assetCatID;
452 const auto description = layer.assetDescription;
453 const auto embedding = layer.assetEmbedding;
465 const auto bm25Scores =
m_bm25->getBatchScores(description, ids);
470 auto maxScore = std::numeric_limits<float>::lowest();
471 std::size_t maxIdx = 0;
472 auto cosineScore = std::numeric_limits<float>::lowest();
473 for (std::size_t i = 0; i < ids.size(); ++i)
475 auto score = rrfScores[i];
476 if (score >= maxScore)
480 cosineScore = vectorScores[i];
484 return {path, cosineScore};
489 for (
const auto& keyword : keywords)
491 if (text.contains(keyword))
501 std::for_each(layers.begin(), layers.end(), [tag](Layer& layer) {
502 layer.isValid = (layer.assetCatID.contains(tag)) ? false : layer.isValid;
508 return text.substring(0, 1).toUpperCase() + text.substring(1).toLowerCase();
512 const StringArray& generatedLayerNames)
516 std::vector<std::vector<float>> embeddings;
517 embeddings.reserve(generatedLayerNames.size());
518 for (
const auto& name : generatedLayerNames)
521 embeddings.push_back(embedding);
524 std::vector<float> layerScoresLLM;
525 layerScoresLLM.reserve(layers.size());
526 for (
const auto& layer : layers)
528 const auto layerEmbedding = layer.layerEmbedding;
529 const auto layerBias = layer.bias;
530 float maxScore = std::numeric_limits<float>::lowest();
531 for (std::size_t i = 0; i < generatedLayerNames.size(); ++i)
533 auto score =
dot(embeddings[i], layerEmbedding) + layerBias;
534 maxScore = std::max(maxScore, score);
536 layerScoresLLM.push_back(maxScore);
538 return layerScoresLLM;
544 const std::unordered_map<String, std::vector<String>> collectionKeywords = {
545 {
"aeroplane interior", {
"aeroplane interior",
"airplane interior",
"commercial flight"}},
546 {
"airport", {
"airport"}},
547 {
"apartment", {
"apartment"}},
548 {
"basement", {
"basement"}},
549 {
"bathroom", {
"bathroom"}},
550 {
"beach", {
"beach",
"seaside"}},
551 {
"bus interior", {
"bus interior"}},
552 {
"city", {
"city",
"urban"}},
553 {
"construction", {
"construction",
"building site"}},
554 {
"countryside", {
"countryside",
"rural"}},
555 {
"desert", {
"desert"}},
556 {
"forest", {
"forest",
"woodland"}},
557 {
"garage", {
"garage"}},
558 {
"hospital", {
"hospital"}},
559 {
"hotel", {
"hotel"}},
560 {
"kitchen", {
"kitchen"}},
561 {
"office", {
"office",
"workplace"}},
563 {
"restaurant", {
"restaurant",
"cafe"}},
564 {
"sci-fi", {
"starship",
"spaceship"}},
565 {
"sewer", {
"sewer"}},
566 {
"suburban", {
"suburban",
"suburbs"}},
567 {
"subway", {
"subway"}},
568 {
"swamp", {
"swamp",
"marsh"}},
569 {
"town", {
"town",
"village"}},
570 {
"underwater", {
"underwater"}}};
573 std::vector<String> detectedCollections;
576 for (
const auto& [collection, keywords] : collectionKeywords)
579 auto currentCollection = collection;
580 bool collectionDetected =
581 std::any_of(keywords.begin(), keywords.end(), [&query, ¤tCollection](
const String& keyword) {
583 bool isDetected = (currentCollection ==
"desert") ? query.containsWholeWord(keyword)
584 : query.contains(keyword);
588 DBG(
"[collections] " + currentCollection);
592 if (collectionDetected)
594 detectedCollections.push_back(collection);
602 for (
auto& layer : layers)
604 if (std::find(detectedCollections.begin(), detectedCollections.end(), layer.collection) !=
605 detectedCollections.end())
614 std::tuple<String, std::array<TextToAssets::TTPAsset, 4>> TextToAssets::findPresetFiles(String searchTerm)
619 std::array<TTPAsset, 4> results;
622 constexpr float cosineSimilarityThreshold = 0.5f;
625 auto [query, excludeList] = preprocessQuery(searchTerm);
628 auto queryEmbedding = m_sentenceTransformer.encode(query.toStdString());
631 auto [closestCatID, secondaryCatID] = findClosestAmbienceCatID(query, queryEmbedding);
632 if (closestCatID.isEmpty())
634 return {
"", results};
636 auto categorySubCategory = m_UCS->getCategorySubCategory(closestCatID);
638 DBG(
"[closestCatID] " << closestCatID);
639 DBG(
"[secondaryCatID] " << secondaryCatID);
641 std::vector<Layer> layers;
646 auto layers = m_layersMap[closestCatID];
649 return {categorySubCategory, results};
653 if (secondaryCatID.isNotEmpty())
655 const auto secondaryLayers = m_layersMap[secondaryCatID];
656 layers.insert(layers.end(), secondaryLayers.begin(), secondaryLayers.end());
660 for (
const auto& layer : layers)
662 DBG(layer.layerName <<
": " << layer.layerDescription);
668 for (
auto [categoryName, categoryLayers] : m_layersMap)
670 layers.insert(layers.end(), categoryLayers.begin(), categoryLayers.end());
674 addBiasToLayers(query, layers);
680 auto generatedLayerNames = anthropicAPICall(searchTerm);
681 if (generatedLayerNames.size() != 4)
683 DBG(
"LLM generation failed");
684 return {
"", results};
688 auto layerScoresLLM = calculateLLMLayerScores(layers, generatedLayerNames);
692 std::for_each(layers.begin(), layers.end(), [](Layer& layer) { layer.isValid = true; });
695 excludeLayers(excludeList, layers);
698 std::vector<float> presetEmbedding(queryEmbedding.size(), 0.0f);
701 std::set<String> uniqueAssets;
702 const int maximumSearchIterations = 20;
703 std::size_t resultsIndex = 0;
704 for (
int i = 0; i < maximumSearchIterations; ++i)
707 auto [layerScore, layerIdx, layerValid] =
708 findLayer(queryEmbedding, presetEmbedding, layers, layerScoresLLM);
714 auto layer = layers[layerIdx];
715 auto name = layer.layerName;
718 std::for_each(layers.begin(), layers.end(),
719 [name](Layer& layer) { layer.isValid = (layer.layerName == name) ? false : layer.isValid; });
722 auto [asset, assetScore] = findAsset(layer);
725 if (assetScore < cosineSimilarityThreshold)
728 DBG(
"missing local asset for layer with name: " << name);
731 else if (uniqueAssets.contains(asset))
734 DBG(
"skipping asset as already selected: " << asset);
739 std::transform(presetEmbedding.begin(), presetEmbedding.end(), layer.layerEmbedding.begin(),
740 presetEmbedding.begin(), std::plus<>{});
743 results[resultsIndex].path = asset;
744 results[resultsIndex].label = capitalize(name);
745 results[resultsIndex].engine = layer.engine;
748 uniqueAssets.insert(asset);
754 if (resultsIndex == results.size())
760 return {categorySubCategory, results};
763 bool TextToAssets::isUCSValid(String catID)
765 jassert(m_UCS !=
nullptr);
766 return m_UCS->isValid(catID);
769 void TextToAssets::run()
771 if (!threadShouldExit())
773 loadQueryEmbeddings();
775 loadFileEmbeddings();
779 File TextToAssets::createFilePath(String filename)
781 return utils::StringsIntoPath(AssetManager::getPluginDirectory().getFullPathName(),
"ttpResources", filename);
784 bool TextToAssets::appendDataToEmbeddingsFile()
787 m_assetCounter.clear();
790 m_bm25Corpus.clear();
793 auto embeddingsFile = createFilePath(ttfEmbeddingsFileName);
795 std::unordered_map<String, std::vector<Data>> datasetMap;
797 if (embeddingsFile.existsAsFile())
799 auto json = JSON::parse(embeddingsFile);
800 var result = json.getProperty(FilesEmbeddingsIdentifier, 0);
801 for (
int i = 0; i < result.size(); ++i)
803 auto fileName = result[i].getProperty(fileNameIdentifier, 0).toString();
804 auto catID = result[i].getProperty(catIDNameIdentifier, 0).toString();
805 var embeddingsArray = result[i].getProperty(ttfEmbeddingsNameIdentifier, 0);
807 std::vector<float> embeddings;
808 for (
int j = 0; j < embeddingsArray.size(); ++j)
810 embeddings.push_back(embeddingsArray[j]);
813 Data data(catID, fileName, embeddings);
814 datasetMap[catID].push_back(data);
819 embeddingsFile.create();
823 for (
auto const& [key, dataset] : m_datasetMap)
825 for (
const auto& element : dataset)
827 datasetMap[key].push_back(element);
832 DynamicObject* datasetObject =
new DynamicObject();
833 var datasetObjectVar(datasetObject);
836 for (
auto const& [key, dataset] : datasetMap)
838 for (
const auto& element : dataset)
840 const auto fileName = element.path;
841 const auto catID = element.catID;
842 const auto embeddings = element.embeddings;
844 DynamicObject* metadata =
new DynamicObject();
845 var metadataVar(metadata);
846 metadata->setProperty(fileNameIdentifier, var(fileName));
848 metadata->setProperty(catIDNameIdentifier, var(catID));
850 Array<var> embeddingsArray;
851 for (
auto value : embeddings)
853 embeddingsArray.add(value);
855 metadata->setProperty(ttfEmbeddingsNameIdentifier, embeddingsArray);
861 datasetObject->setProperty(FilesEmbeddingsIdentifier, rows);
864 m_datasetMap = datasetMap;
866 TemporaryFile tempEmbeddingsFile(embeddingsFile, TemporaryFile::useHiddenFile);
867 if (
auto stream = tempEmbeddingsFile.getFile().createOutputStream())
870 stream->setPosition(0);
872 JSON::writeToStream(*stream, datasetObject);
878 const bool success = tempEmbeddingsFile.overwriteTargetFileWithTemporary();
892 readFromFile(embeddingsFile);
897 void TextToAssets::loadFileEmbeddings()
901 auto path = createFilePath(ttfEmbeddingsFileName);
902 if (path.existsAsFile())
909 String TextToAssets::sanitizeString(String text)
911 text = text.replace(
"_",
" ");
912 text = text.replace(
"RXd",
"");
913 if (text.contains(
"SNDBTS"))
915 text = text.upToFirstOccurrenceOf(
"SNDBTS",
false,
false);
917 text = text.removeCharacters(
"0123456789");
919 text = text.toLowerCase();
923 bool TextToAssets::readFromFile(File file)
925 if (file.existsAsFile() ==
false)
932 auto json = JSON::parse(file);
933 var result = json.getProperty(FilesEmbeddingsIdentifier, 0);
934 auto resultSize = result.size();
935 for (
int i = 0; i < resultSize; ++i)
937 auto fileName = result[i].getProperty(fileNameIdentifier, 0).toString();
938 auto catID = result[i].getProperty(catIDNameIdentifier, 0).toString();
939 var embeddingsArray = result[i].getProperty(ttfEmbeddingsNameIdentifier, 0);
941 std::vector<float> embeddings;
942 for (
int j = 0; j < embeddingsArray.size(); ++j)
944 embeddings.push_back(embeddingsArray[j]);
947 auto name = File(fileName).getFileNameWithoutExtension();
948 name = name.replace(catID,
"");
949 name = sanitizeString(name);
950 m_datasetIDMap[catID].push_back(m_bm25Corpus.size());
951 m_bm25Corpus.push_back(name);
952 ++m_assetCounter[catID];
954 Data data(catID, fileName, embeddings);
955 m_datasetMap[catID].push_back(data);
959 if (!m_bm25Corpus.empty())
961 m_bm25 = std::make_unique<BM25>(m_bm25Corpus);
967 void TextToAssets::clear()
969 m_datasetMap.clear();
970 m_datasetIDMap.clear();
971 m_assetCounter.clear();
972 m_bm25Corpus.clear();
973 m_samplePaths.clear();
974 m_sampleFilenames.clear();
977 bool TextToAssets::processFileList(Array<File>& filesToProcess)
979 if (!isModelFileAvailable())
986 notifyAvailabilityStatus(
false);
992 const int totalFiles = filesToProcess.size();
993 int processedFiles = 0;
994 const Time startTime = Time::getCurrentTime();
995 Time lastUpdateTime = startTime;
997 for (File& file : filesToProcess)
999 calculateEmbeddingsForFile(file);
1000 if (Thread::currentThreadShouldExit())
1005 notifyAvailabilityStatus(
true);
1011 const Time currentTime = Time::getCurrentTime();
1012 const RelativeTime elapsedTime = currentTime - startTime;
1013 const RelativeTime timeSinceLastUpdate = currentTime - lastUpdateTime;
1016 if (timeSinceLastUpdate.inMilliseconds() >= 500)
1018 const double averageTimePerFile = elapsedTime.inSeconds() / processedFiles;
1019 const int remainingFiles = totalFiles - processedFiles;
1020 const double estimatedRemainingSeconds = averageTimePerFile * remainingFiles;
1023 const String estimatedTimeRemaining = RelativeTime(estimatedRemainingSeconds).getDescription();
1030 lastUpdateTime = currentTime;
1035 if (!m_bm25Corpus.empty())
1037 m_bm25 = std::make_unique<BM25>(m_bm25Corpus);
1040 notifyAvailabilityStatus(
true);
1043 auto appendSuccessful = appendDataToEmbeddingsFile();
1044 return appendSuccessful;
1047 void TextToAssets::calculateEmbeddingsForFile(
const File& file)
1049 ScopedLock sl(m_cs);
1051 if (m_UCS ==
nullptr)
1056 auto name = file.getFileNameWithoutExtension();
1057 auto catID = name.upToFirstOccurrenceOf(
"_",
false,
false);
1058 catID = (catID ==
"AMBROOM") ?
"AMBRoom" : catID;
1059 catID = (catID ==
"AMBUrb") ?
"AMBUrbn" : catID;
1060 const auto valid = isUCSValid(catID);
1064 const auto sampleRate = 48000;
1065 const auto bitDepth = 16;
1066 const auto channelCount = 2;
1067 const auto durationSeconds = 5;
1068 const auto bytesThreshold = (bitDepth / 8) * channelCount * (durationSeconds * sampleRate);
1069 auto sizeInBytes = file.getSize();
1070 if (sizeInBytes >= bytesThreshold)
1072 name = name.replace(catID,
"");
1073 name = sanitizeString(name);
1074 auto embedding = m_sentenceTransformer.encode(name.toStdString());
1077 Data data(catID, file.getFullPathName().toStdString(), embedding);
1078 m_datasetMap[catID].push_back(data);
1081 ++m_assetCounter[catID];
1084 m_datasetIDMap[catID].push_back(m_bm25Corpus.size());
1085 m_bm25Corpus.push_back(name);
1089 bool TextToAssets::matchesCategory(
const String& catID, StringArray tags)
const
1091 for (
auto tag : tags)
1093 if (catID.startsWith(tag))
1099 std::vector<float> TextToAssets::rankify(
const std::vector<float>& x)
const
1101 const auto n = (int)x.size();
1104 std::vector<float> R(n, 0);
1106 std::vector<std::pair<float, int>> T(n);
1107 for (
int i = 0; i < n; i++)
1109 T[i] = std::make_pair(x[i], i);
1112 std::sort(T.begin(), T.end(),
1113 [](
const std::pair<float, int>& a,
const std::pair<float, int>& b) { return a.first > b.first; });
1115 float rank = 1.0f, m = 1.0f, i = 0.0f;
1120 while (j < n - 1 && T[j].first == T[j + 1].first)
1127 for (
int k = 0; k < m; ++k)
1131 int idx = T[i + k].second;
1132 R[idx] = (double)(rank + (m - 1) * 0.5);
1142 std::vector<std::pair<float, int>> TextToAssets::topk(std::vector<std::pair<float, int>> data,
int topk)
const
1145 topk = std::min(topk, (
int)data.size());
1147 data.begin(), data.begin() + topk, data.end(),
1148 [](
const std::pair<float, int>& a,
const std::pair<float, int>& b) { return a.first > b.first; });
1153 std::vector<float> TextToAssets::calculateReciprocalRankFusion(
const std::vector<float>& scores1,
1154 const std::vector<float>& scores2,
float k,
1157 const auto ranks1 = rankify(scores1);
1158 const auto ranks2 = rankify(scores2);
1159 std::vector<float> scores;
1160 for (
int i = 0; i < (int)ranks1.size(); ++i)
1162 const auto score1 = 1.0f / (ranks1[i] + k);
1163 const auto score2 = 1.0f / (ranks2[i] + k);
1164 const auto score = alpha * score1 + (1.0f - alpha) * score2;
1165 scores.push_back(score);
1170 std::tuple<String, StringArray> TextToAssets::preprocessQuery(String query)
const
1172 StringArray queryTokens;
1173 queryTokens.addTokens(query.toLowerCase(),
" ",
"\"");
1176 StringArray excludeList;
1178 for (
const auto& token : queryTokens)
1180 if (token.startsWithChar(
'-'))
1182 excludeList.addIfNotAlreadyThere(token.replaceSection(0, 1,
""));
1190 return {query, excludeList};