Krotos Modules 3
Loading...
Searching...
No Matches
TextToPreset.h
Go to the documentation of this file.
1#pragma once
2
3namespace krotos
4{
5
6#define AUDIO_METADATA_TAGS 1
7#define PRESET_FILENAME_TAGS 0
8
9constexpr char embeddingsFileName[] = ".ttpEmbCache";
10constexpr char ttpStatusFileName[] = ".ttpStatus";
11constexpr char ttpStatusDateProperty[] = "date modified";
12constexpr char ttpNumericalMetadata[] = "-0123456789";
13constexpr char wavMetadataFieldInFile[] = "bwav description";
14constexpr char presetNameField[] = "presetName";
15constexpr char embeddingsNameField[] = "embeddings";
16constexpr char tagsNameField[] = "tags";
17const static String ttpThreadName{"TTPThread"};
18
19static constexpr size_t BUF_SIZE = 32; // Key size - must be 32
20constexpr size_t BUFFER_SIZE = 1024;
21constexpr size_t IV_SIZE = 16;
22
25class TextToPreset : public Thread
26{
27 public:
30
36 float computeSimilarity(const std::vector<float>& queryEmbedding, const std::vector<float>& resultEmbedding) const;
37
45 StringArray combineSearch(const String& searchTerm, const StringArray& classicResults, const StringArray& ttpResults, std::map<String, std::vector<float>>& embeddingsCache, const size_t& maxResults = 25);
46
52 StringArray findClosestPresets(String searchTerm, int maxNumPresetsToFind) const;
53
58 std::string getPresetPath(std::string presetNameToLoad) const
59 {
60 return m_presetNameAndPresetPathMap.at(presetNameToLoad);
61 }
62
64 String logString() const { return m_logString; }
65
70 bool processFileList(Array<File>& filesToProcess);
71
75 bool calculateEmbeddingsForPresetFile(File presetFile);
76
80 bool appendDataToEmbeddingsFile(/*CriticalSection& cs*/);
81
83 File createFilePath(String filename);
84
86 void loadEmbeddings();
87
89 StringArray performSearch(String query);
90
91 // TODO: this needs to be picked up from decryption code, copied it here, untill
92 // there is a consistent way to inter-include km3 modules.
93 void getSampleMetadata(const std::string& input, unsigned char* byteArray)
94 {
95 for (size_t i = 0, j = 0; i < input.length(); i += 2, j++)
96 {
97 std::string byteStr = input.substr(i, 2);
98 unsigned int byteValue;
99 std::istringstream(byteStr) >> std::hex >> byteValue;
100 byteArray[j] = static_cast<unsigned char>(byteValue);
101 }
102 }
103
104 private:
106 void clear();
107
108 // on a thread
109 void run() override;
110
111 // log string
113
115 void getPresetAssetNames(ValueTree& root);
116
124 bool assetNamesToTags(bool eliminateDuplicates);
125
128 void searchForTagAndProperty(ValueTree& root, Identifier tag, Identifier property);
129
131 std::vector<float> destringify(String stringWithNumbers);
132
134 bool saveToFile(File file);
135
137 bool readFromFile(File file);
138
140 template <class T> String stringify(std::vector<T> numbers);
141
142 // simple struct to hold preset tags and embeddings
143 struct Data
144 {
145 StringArray tags;
146 std::vector<float> embeddings;
147 Data(StringArray tags = StringArray(), std::vector<float> embeddings = std::vector<float>())
149 {
150 }
151 };
152
153 // asset sample paths
154 StringArray m_samplePaths;
155
156 // asset file names
157 StringArray m_sampleFilenames;
158
159 // preset file to process
161
162 // preset name to process
164
165 // will keep the tags for each preset file
166 StringArray m_presetTags;
167
168 // map that holds preset tags and embeddings
169 std::map<String, Data> m_presetData;
170
171 // map that holds preset name and preset path
172 std::map<std::string, std::string> m_presetNameAndPresetPathMap;
173
174 // includes words within preset asset names that are tag irrelevant and need to be excluded
175 StringArray excludeTerms{"Woje", "woje", "SNDBTS_CAS1", "SNDBTS_CAS4", ".wav", "wav", "kaf", ".kaf",
176 "sndbts", "krotos", "krt", "krts", "mf", "ks", "mono", "stereo",
177 "ambisonic", "and", "in", "on", "up", "or", "i", "ii",
178 "type1", "type2", "type3", "type4", "type5"};
179
180 // Multiple asset paths per engine are currently split by the following string .. this is a temp fix till its sorted
181 // out
183
184 // split by the delimiter above if necessary and keep treated paths inside
186
187 // delimiters TODO: see if more needed by inspecting asset file names and paths
188 String delimiters{",|._- "};
189
190 // the sentence transformer (bert)
192
193 // class that holds UCS CatIDs
194 SharedResourcePointer<UniversalCategorySystem> m_UCS;
195
196 // KD-Tree for nearest neighbour search
198
199 // vector of preset names
200 std::vector<String> m_presetNames;
201
202 // critical section for thread safety
203 CriticalSection m_cs;
204
205 // data will hold the hex representation of the encryption key
206 unsigned char m_data[BUFFER_SIZE];
207
208 // model file available
210};
211
214{
215 public:
216 SharedResourcePointer<TextToPreset> m_textToPreset;
217};
218
221class DirectoryWatcher : private TimeSliceClient
222{
223 public:
224 DirectoryWatcher(const File& dirToWatch);
225
226 ~DirectoryWatcher() override;
227
231 bool handleContentUpdate();
232
237 void getAllPresetFiles(const File& directory, Array<File>& fileArray);
238
239 private:
240 // the thread the watcher runs on
241 TimeSliceThread thread{" Directory Watcher Thread"};
242
244 int useTimeSlice() override;
245
246 // dir modification date
248
249 // root dir
251
252 // shared ttp instance
254
255 // presets file array
257
258 // presets already existing in the embeddings file
260
261 // list of presets file to process
263};
264
265} // namespace krotos
directory watcher class for text to preset module to process data when there is changes in the rpeset...
Definition TextToPreset.h:222
int useTimeSlice() override
timeslice thread checks for modufication dir date and handles it accordingly
Definition TextToPreset.cpp:945
File m_directory
Definition TextToPreset.h:250
bool handleContentUpdate()
handles content update and triggers ttp embeddings calculation accordingly
Definition TextToPreset.cpp:869
DirectoryWatcher(const File &dirToWatch)
Definition TextToPreset.cpp:844
void getAllPresetFiles(const File &directory, Array< File > &fileArray)
gets all presets under a directory recursivelly
Definition TextToPreset.cpp:928
String m_dirModificationDate
Definition TextToPreset.h:247
Array< File > m_presetFilesToProcess
Definition TextToPreset.h:262
~DirectoryWatcher() override
Definition TextToPreset.cpp:850
TextToPresetShared m_textToPresetSharedInstance
Definition TextToPreset.h:253
TimeSliceThread thread
Definition TextToPreset.h:241
StringArray m_presetsWithEmbeddings
Definition TextToPreset.h:259
Array< File > m_presetFilesArray
Definition TextToPreset.h:256
Definition KDTreeND.h:30
Definition SentenceTransformer.h:9
text to preset class for text to preset embeddings calculation and dictionary creation
Definition TextToPreset.h:26
bool calculateEmbeddingsForPresetFile(File presetFile)
create text embeddings file for the given preset file
Definition TextToPreset.cpp:74
unsigned char m_data[BUFFER_SIZE]
Definition TextToPreset.h:206
String assetPathsDelimiter
Definition TextToPreset.h:182
~TextToPreset()
Definition TextToPreset.cpp:22
bool processFileList(Array< File > &filesToProcess)
processes a file list to create text embeddings
Definition TextToPreset.cpp:41
KDTreeND m_kNN
Definition TextToPreset.h:197
StringArray m_sampleFilenames
Definition TextToPreset.h:157
bool m_modelFileAvailable
Definition TextToPreset.h:209
StringArray m_samplePaths
Definition TextToPreset.h:154
TextToPreset()
Definition TextToPreset.cpp:9
File m_presetFile
Definition TextToPreset.h:160
StringArray m_presetTags
Definition TextToPreset.h:166
StringArray excludeTerms
Definition TextToPreset.h:175
SentenceTransformer m_sentenceTransformer
Definition TextToPreset.h:191
StringArray combineSearch(const String &searchTerm, const StringArray &classicResults, const StringArray &ttpResults, std::map< String, std::vector< float > > &embeddingsCache, const size_t &maxResults=25)
combine multiple StringArrays, megre and rerank the items based on cosine similarity
Definition TextToPreset.cpp:470
void searchForTagAndProperty(ValueTree &root, Identifier tag, Identifier property)
this will recursivelly search in a preset ValueTree structure for a given idendtifier and property na...
Definition TextToPreset.cpp:426
String m_logString
Definition TextToPreset.h:112
std::vector< float > destringify(String stringWithNumbers)
destrignifies embeddings
Definition TextToPreset.cpp:576
bool readFromFile(File file)
load embeddings from file
Definition TextToPreset.cpp:637
String delimiters
Definition TextToPreset.h:188
void getPresetAssetNames(ValueTree &root)
will get preset asset paths and names to the m_samplePaths and m_sampleFilenames arrays
Definition TextToPreset.cpp:421
std::vector< String > m_presetNames
Definition TextToPreset.h:200
void clear()
clears internal data
Definition TextToPreset.cpp:32
void loadEmbeddings()
will load embeddings data from file to all modules that need it
Definition TextToPreset.cpp:801
StringArray performSearch(String query)
performs search based on text embeddings and KNN
Definition TextToPreset.cpp:823
std::string getPresetPath(std::string presetNameToLoad) const
return the preset path
Definition TextToPreset.h:58
float computeSimilarity(const std::vector< float > &queryEmbedding, const std::vector< float > &resultEmbedding) const
compute cosine similarity based of result to query
Definition TextToPreset.cpp:536
String m_presetName
Definition TextToPreset.h:163
std::map< String, Data > m_presetData
Definition TextToPreset.h:169
std::map< std::string, std::string > m_presetNameAndPresetPathMap
Definition TextToPreset.h:172
StringArray m_treatedAssetPaths
Definition TextToPreset.h:185
String stringify(std::vector< T > numbers)
Definition TextToPreset.cpp:566
SharedResourcePointer< UniversalCategorySystem > m_UCS
Definition TextToPreset.h:194
bool appendDataToEmbeddingsFile()
appends data to the main embeddings file
Definition TextToPreset.cpp:685
String logString() const
returns the log string
Definition TextToPreset.h:64
bool assetNamesToTags(bool eliminateDuplicates)
Splits the asset names, in respect to the excluded terms, and pushes the remaining terms into a new s...
Definition TextToPreset.cpp:104
void run() override
Definition TextToPreset.cpp:24
File createFilePath(String filename)
creates the path for the embeddings file
Definition TextToPreset.cpp:590
bool saveToFile(File file)
saves embeddings to file
Definition TextToPreset.cpp:595
void getSampleMetadata(const std::string &input, unsigned char *byteArray)
Definition TextToPreset.h:93
CriticalSection m_cs
Definition TextToPreset.h:203
StringArray findClosestPresets(String searchTerm, int maxNumPresetsToFind) const
returns a String array that contains K closest presets,
Definition TextToPreset.cpp:548
wrapper class for text to preset module shared resource pointer
Definition TextToPreset.h:214
SharedResourcePointer< TextToPreset > m_textToPreset
Definition TextToPreset.h:216
Definition AirAbsorptionFilter.cpp:2
constexpr size_t IV_SIZE
Definition TextToPreset.h:21
constexpr char tagsNameField[]
Definition TextToPreset.h:16
constexpr char ttpStatusDateProperty[]
Definition TextToPreset.h:11
constexpr char ttpNumericalMetadata[]
Definition TextToPreset.h:12
constexpr char embeddingsNameField[]
Definition TextToPreset.h:15
static constexpr size_t BUF_SIZE
Definition TextToPreset.h:19
constexpr char wavMetadataFieldInFile[]
Definition TextToPreset.h:13
constexpr char ttpStatusFileName[]
Definition TextToPreset.h:10
constexpr char presetNameField[]
Definition TextToPreset.h:14
static const String ttpThreadName
Definition TextToPreset.h:17
constexpr char embeddingsFileName[]
Definition TextToPreset.h:9
constexpr size_t BUFFER_SIZE
Definition TextToPreset.h:20
Definition TextToPreset.h:144
std::vector< float > embeddings
Definition TextToPreset.h:146
Data(StringArray tags=StringArray(), std::vector< float > embeddings=std::vector< float >())
Definition TextToPreset.h:147
StringArray tags
Definition TextToPreset.h:145