Krotos Modules 3
Loading...
Searching...
No Matches
SentenceTransformer.cpp
Go to the documentation of this file.
1#include "bert.h"
2#include <cstdio>
3#include <unistd.h>
4
5namespace krotos
6{
7
9{
10
11 // Hush noisy bert init log
12 File tempLogFile = File::createTempFile(".log");
13 auto createLogResult = tempLogFile.create();
14 int stdoutFd(-1);
15 if (createLogResult.wasOk())
16 {
17 redirectPrint(tempLogFile, stdoutFd);
18 }
19
20 bert_params params;
21 const auto path = getModelPath();
22 // jassert(path.existsAsFile()); // need to have model file locally
23 params.model = path.getFullPathName().toRawUTF8();
24 n_max_tokens = 256;
25
26 if ((bctx = bert_load_from_file(params.model)) != nullptr)
27 {
29 }
30
31 if (createLogResult.wasOk())
32 {
33 restorePrint(stdoutFd);
34 tempLogFile.deleteFile();
35 }
36 // https://github.com/skeskinen/bert.cpp/issues/37
37 // n_max_tokens = bert_n_max_tokens(bctx);
38}
39
41{
42
43 if (bctx != nullptr)
44 {
46 }
47}
48
49std::vector<float> SentenceTransformer::encode(std::string sentence) const
50{
51
52 if (m_modelFileAvailable == false)
53 {
54 return std::vector<float>();
55 }
56
57 // tokenize the prompt
58 std::vector<bert_vocab_id> tokens(n_max_tokens);
59 int n_tokens;
60 bert_tokenize(bctx, sentence.c_str(), tokens.data(), &n_tokens, n_max_tokens);
61 tokens.resize(n_tokens);
62
63 std::vector<float> embeddings(bert_n_embd(bctx));
64#ifdef JUCE_MAC
65 const int n_threads = 6;
66#else
67 const int n_threads = 1;
68#endif
69 bert_eval(bctx, n_threads, tokens.data(), n_tokens, embeddings.data());
70 return embeddings; // 384
71}
72
74{
75 // ggml model file
76 File ggmlDataFile = File(utils::StringsIntoPath(AssetManager::getPluginDirectory().getFullPathName(),
77 "ttpResources", modelFileName));
78
79 // get existing one
80 if (ggmlDataFile.exists() == true)
81 {
82 return ggmlDataFile;
83 }
84 else
85 return File();
86}
87
88void SentenceTransformer::redirectPrint(File outputFile, int& stdoutFd)
89{
90 auto filePath = AssetManager::convertFilePathToOSX(outputFile.getFullPathName()).toStdString();
91
92 // Redirect standard output to given file
93 fflush(stdout);
94 stdoutFd = dup(fileno(stdout));
95 if (freopen(filePath.c_str(), "w", stdout) == NULL)
96 {
97 close(stdoutFd);
98 std::cout << "Failed to redirect standard output to " << filePath << std::endl;
99 }
100}
101
103{
104 fflush(stdout);
105 if (stdoutFd >= 0)
106 {
107 dup2(stdoutFd, fileno(stdout));
108 close(stdoutFd);
109 clearerr(stdout);
110 }
111 else
112 {
113 std::cout << "Failed to restore redirected printf" << std::endl;
114 }
115}
116
117} // namespace krotos
struct bert_ctx * bert_load_from_file(const char *fname)
void bert_tokenize(struct bert_ctx *ctx, const char *text, bert_vocab_id *tokens, int32_t *n_tokens, int32_t n_max_tokens)
int32_t bert_n_embd(bert_ctx *ctx)
void bert_eval(struct bert_ctx *ctx, int32_t n_threads, bert_vocab_id *tokens, int32_t n_tokens, float *embeddings)
void bert_free(bert_ctx *ctx)
static String convertFilePathToOSX(const String &path)
Definition AssetManager.cpp:596
static File getPluginDirectory()
Definition AssetManager.cpp:392
void restorePrint(int stdoutFd)
Definition SentenceTransformer.cpp:102
~SentenceTransformer()
Definition SentenceTransformer.cpp:40
File getModelPath() const
Definition SentenceTransformer.cpp:73
int n_max_tokens
Definition SentenceTransformer.h:24
SentenceTransformer()
Definition SentenceTransformer.cpp:8
bert_ctx * bctx
Definition SentenceTransformer.h:23
std::vector< float > encode(std::string sentence) const
Definition SentenceTransformer.cpp:49
bool m_modelFileAvailable
Definition SentenceTransformer.h:18
void redirectPrint(File filePath, int &stdoutFd)
Definition SentenceTransformer.cpp:88
String StringsIntoPath(Args... args)
Joins multiple string arguments into a path string.
Definition helpers.h:25
Definition AirAbsorptionFilter.cpp:2
constexpr char modelFileName[]
Definition SentenceTransformer.h:7
Definition bert.h:12
const char * model
Definition bert.h:16