16 const char*
model =
"models/all-MiniLM-L6-v2/ggml-model-q4_0.bin";
31 void bert_encode(
struct bert_ctx* ctx, int32_t n_threads,
const char* texts,
float* embeddings);
35 void bert_encode_batch(
struct bert_ctx* ctx, int32_t n_threads, int32_t n_batch_size, int32_t n_inputs,
36 const char** texts,
float** embeddings);
41 int32_t n_max_tokens);
47 int32_t* n_tokens,
float** batch_embeddings);
void bert_encode(struct bert_ctx *ctx, int32_t n_threads, const char *texts, float *embeddings)
void bert_eval_batch(struct bert_ctx *ctx, int32_t n_threads, int32_t n_batch_size, bert_vocab_id **batch_tokens, int32_t *n_tokens, float **batch_embeddings)
struct bert_ctx * bert_load_from_file(const char *fname)
const char * bert_vocab_id_to_token(bert_ctx *ctx, bert_vocab_id id)
void bert_tokenize(struct bert_ctx *ctx, const char *text, bert_vocab_id *tokens, int32_t *n_tokens, int32_t n_max_tokens)
int32_t bert_n_embd(bert_ctx *ctx)
bool bert_params_parse(int argc, char **argv, bert_params ¶ms)
void bert_eval(struct bert_ctx *ctx, int32_t n_threads, bert_vocab_id *tokens, int32_t n_tokens, float *embeddings)
void bert_encode_batch(struct bert_ctx *ctx, int32_t n_threads, int32_t n_batch_size, int32_t n_inputs, const char **texts, float **embeddings)
void bert_free(bert_ctx *ctx)
int32_t bert_vocab_id
Definition bert.h:24
int32_t bert_n_max_tokens(bert_ctx *ctx)
const char * model
Definition bert.h:16
const char * prompt
Definition bert.h:17
int32_t port
Definition bert.h:14
int32_t n_threads
Definition bert.h:13