Krotos Modules 3
Loading...
Searching...
No Matches
TextToFile.cpp
Go to the documentation of this file.
1namespace krotos
2{
3
4TextToFile::TextToFile() : Thread("TextToFileThread")
5{
6 startThread(); // set a priority?
7}
8
9TextToFile::~TextToFile() { stopThread(4000); }
10
12{
13 bool shouldSave = true;
14
15 // load embeddings from json file if it exists
16 auto pathToEmbeddingsFile = embeddingsFilePath(textToFileEmbeddingsFilename);
17 std::map<String, std::vector<float>> fileNameEmbeddingsMap;
18 if (pathToEmbeddingsFile.existsAsFile())
19 {
20 fileNameEmbeddingsMap = readFromFile(pathToEmbeddingsFile);
21 shouldSave = false;
22 }
23
24 std::set<std::string> filenames;
25 std::size_t count = 0;
27 for (const auto& path : File(root).findChildFiles(2, true, "*.wav"))
28 {
29 if (threadShouldExit())
30 break;
31
32 auto basename = path.getFileName().toStdString();
33
34 // try to avoid duplicate files coming from different folders
35 if (filenames.count(basename) == 0)
36 {
37 auto fullPathName = path.getFullPathName();
38
39 // load previously calculated embeddings if they are available
40 std::vector<float> embedding;
41 if (fileNameEmbeddingsMap.count(fullPathName) > 0)
42 {
43 embedding = fileNameEmbeddingsMap[fullPathName];
44 }
45 else
46 {
47 auto query = path.getFileNameWithoutExtension();
48 auto catID = query.upToFirstOccurrenceOf("_", false, false);
49 if (catID == "FOLEYFeet")
50 catID = "FOLYFeet"; // fix for typo in our factory assets
51 auto valid = m_UCS->isValid(catID);
52 if (valid)
53 {
54 // replace "CatID_" with "Category, Subcategory"
55 auto category = m_UCS->getCategorySubCategory(catID);
56 query = query.replace(catID, category);
57 query = query.replaceCharacters("_", " ");
58 }
59 embedding = m_sentenceTransformer.encode(query.toStdString());
60 fileNameEmbeddingsMap[fullPathName] = embedding;
61 shouldSave = true;
62 }
63 m_tree.addDatasetItem(embedding);
64 m_files.push_back(path);
65 ++count;
66 }
67 filenames.insert(basename);
68 }
69 filenames.clear();
70
71 if (shouldSave)
72 {
73 // if there were new files, save their paths/embeddings
74 // TODO: remove missing files?
75 auto success = saveToFile(pathToEmbeddingsFile, fileNameEmbeddingsMap);
76 jassert(success); // TODO: handle failure :'(
77 }
78
79 if (!m_files.empty())
80 {
82 }
83}
84
85std::vector<File> TextToFile::search(String query, std::size_t k)
86{
87 std::vector<File> results;
88 if (m_files.empty())
89 return results;
90
91 const String excludeOperator = "NOT";
92 if (query.containsWholeWord(excludeOperator))
93 {
94 auto exclude = query.fromFirstOccurrenceOf(excludeOperator, false, false).trim();
95 StringArray excludeTokens;
96 excludeTokens.addTokens(exclude, " ", StringRef(""));
97
98 query = query.upToFirstOccurrenceOf(excludeOperator, false, false);
99
100 auto embedding = m_sentenceTransformer.encode(query.toStdString());
101 // search for more results than needed, so we can filter and return the top-k
102 auto indices = m_tree.knnQuery(embedding, 4 * k);
103 for (const auto& index : indices)
104 {
105 auto text = m_files[index].getFileNameWithoutExtension().replaceCharacters("_", " ");
106 bool shouldFilter = false;
107 for (const auto& token : excludeTokens)
108 {
109 shouldFilter = text.containsWholeWordIgnoreCase(token) ? true : shouldFilter;
110 }
111 if (!shouldFilter)
112 {
113 results.push_back(m_files[index]);
114 if (results.size() == k)
115 break; // maximum of top-k results
116 }
117 }
118 }
119 else
120 {
121 // simple search without exclude tags
122 auto embedding = m_sentenceTransformer.encode(query.toStdString());
123 auto indices = m_tree.knnQuery(embedding, k);
124 for (const auto& index : indices)
125 {
126 results.push_back(m_files[index]);
127 }
128 }
129 return results;
130}
131
132File TextToFile::embeddingsFilePath(String filename) const
133{
134 return utils::StringsIntoPath(AssetManager::getPluginDirectory().getFullPathName(), "ttpResources", filename);
135}
136
137} // namespace krotos
static File getPluginDirectory()
Definition AssetManager.cpp:392
static String readFactorySamplesPath()
Definition AssetManager.cpp:112
void buildIndex()
Definition KDTreeND.cpp:6
std::vector< std::size_t > knnQuery(const std::vector< float > &x, std::size_t k) const
Definition KDTreeND.cpp:18
void addDatasetItem(const std::vector< float > &x)
Definition KDTreeND.cpp:4
std::vector< float > encode(std::string sentence) const
Definition SentenceTransformer.cpp:49
void run() override
Definition TextToFile.cpp:11
bool saveToFile(File file, const std::map< String, std::vector< float > > &data)
Definition TextToFile.h:20
std::vector< File > m_files
Definition TextToFile.h:81
TextToFile()
Definition TextToFile.cpp:4
KDTreeND m_tree
Definition TextToFile.h:80
std::map< String, std::vector< float > > readFromFile(File file)
Definition TextToFile.h:56
SharedResourcePointer< UniversalCategorySystem > m_UCS
Definition TextToFile.h:82
SentenceTransformer m_sentenceTransformer
Definition TextToFile.h:79
File embeddingsFilePath(String filename) const
Definition TextToFile.cpp:132
~TextToFile()
Definition TextToFile.cpp:9
std::vector< File > search(String query, std::size_t k=25)
Definition TextToFile.cpp:85
String StringsIntoPath(Args... args)
Joins multiple string arguments into a path string.
Definition helpers.h:25
Definition AirAbsorptionFilter.cpp:2
constexpr char textToFileEmbeddingsFilename[]
Definition TextToFile.h:7