Krotos Modules 3
Loading...
Searching...
No Matches
bert.h
Go to the documentation of this file.
1#ifndef BERT_H
2#define BERT_H
3
4
5
6#ifdef __cplusplus
7extern "C"
8{
9#endif
10
12 {
13 int32_t n_threads = 6;
14 int32_t port = 8080; // server mode port to bind
15
16 const char* model = "models/all-MiniLM-L6-v2/ggml-model-q4_0.bin"; // model path
17 const char* prompt = "test prompt";
18 };
19
20 bool bert_params_parse(int argc, char** argv, bert_params& params);
21
22 struct bert_ctx;
23
24 typedef int32_t bert_vocab_id;
25
26 struct bert_ctx* bert_load_from_file(const char* fname);
27 void bert_free(bert_ctx* ctx);
28
29 // Main api, does both tokenizing and evaluation
30
31 void bert_encode(struct bert_ctx* ctx, int32_t n_threads, const char* texts, float* embeddings);
32
33 // n_batch_size - how many to process at a time
34 // n_inputs - total size of texts and embeddings arrays
35 void bert_encode_batch(struct bert_ctx* ctx, int32_t n_threads, int32_t n_batch_size, int32_t n_inputs,
36 const char** texts, float** embeddings);
37
38 // Api for separate tokenization & eval
39
40 void bert_tokenize(struct bert_ctx* ctx, const char* text, bert_vocab_id* tokens, int32_t* n_tokens,
41 int32_t n_max_tokens);
42
43 void bert_eval(struct bert_ctx* ctx, int32_t n_threads, bert_vocab_id* tokens, int32_t n_tokens, float* embeddings);
44
45 // NOTE: for batch processing the longest input must be first
46 void bert_eval_batch(struct bert_ctx* ctx, int32_t n_threads, int32_t n_batch_size, bert_vocab_id** batch_tokens,
47 int32_t* n_tokens, float** batch_embeddings);
48
49 int32_t bert_n_embd(bert_ctx* ctx);
50 int32_t bert_n_max_tokens(bert_ctx* ctx);
51
52 const char* bert_vocab_id_to_token(bert_ctx* ctx, bert_vocab_id id);
53
54#ifdef __cplusplus
55}
56#endif
57
58#endif // BERT_H
void bert_encode(struct bert_ctx *ctx, int32_t n_threads, const char *texts, float *embeddings)
void bert_eval_batch(struct bert_ctx *ctx, int32_t n_threads, int32_t n_batch_size, bert_vocab_id **batch_tokens, int32_t *n_tokens, float **batch_embeddings)
struct bert_ctx * bert_load_from_file(const char *fname)
const char * bert_vocab_id_to_token(bert_ctx *ctx, bert_vocab_id id)
void bert_tokenize(struct bert_ctx *ctx, const char *text, bert_vocab_id *tokens, int32_t *n_tokens, int32_t n_max_tokens)
int32_t bert_n_embd(bert_ctx *ctx)
bool bert_params_parse(int argc, char **argv, bert_params &params)
void bert_eval(struct bert_ctx *ctx, int32_t n_threads, bert_vocab_id *tokens, int32_t n_tokens, float *embeddings)
void bert_encode_batch(struct bert_ctx *ctx, int32_t n_threads, int32_t n_batch_size, int32_t n_inputs, const char **texts, float **embeddings)
void bert_free(bert_ctx *ctx)
int32_t bert_vocab_id
Definition bert.h:24
int32_t bert_n_max_tokens(bert_ctx *ctx)
Definition bert.h:12
const char * model
Definition bert.h:16
const char * prompt
Definition bert.h:17
int32_t port
Definition bert.h:14
int32_t n_threads
Definition bert.h:13