bert_8h_source.html

#ifndef BERT_H

#define BERT_H


#ifdef __cplusplus

extern "C"

{

#endif


    struct bert_params

    {

        int32_t n_threads = 6;

        int32_t port = 8080; // server mode port to bind


        const char* model = "models/all-MiniLM-L6-v2/ggml-model-q4_0.bin"; // model path

        const char* prompt = "test prompt";

    };


    bool bert_params_parse(int argc, char** argv, bert_params& params);


    struct bert_ctx;


    typedef int32_t bert_vocab_id;


    struct bert_ctx* bert_load_from_file(const char* fname);

    void bert_free(bert_ctx* ctx);


    // Main api, does both tokenizing and evaluation


    void bert_encode(struct bert_ctx* ctx, int32_t n_threads, const char* texts, float* embeddings);


    // n_batch_size - how many to process at a time

    // n_inputs     - total size of texts and embeddings arrays

    void bert_encode_batch(struct bert_ctx* ctx, int32_t n_threads, int32_t n_batch_size, int32_t n_inputs,

                           const char** texts, float** embeddings);


    // Api for separate tokenization & eval


    void bert_tokenize(struct bert_ctx* ctx, const char* text, bert_vocab_id* tokens, int32_t* n_tokens,

                       int32_t n_max_tokens);


    void bert_eval(struct bert_ctx* ctx, int32_t n_threads, bert_vocab_id* tokens, int32_t n_tokens, float* embeddings);


    // NOTE: for batch processing the longest input must be first

    void bert_eval_batch(struct bert_ctx* ctx, int32_t n_threads, int32_t n_batch_size, bert_vocab_id** batch_tokens,

                         int32_t* n_tokens, float** batch_embeddings);


    int32_t bert_n_embd(bert_ctx* ctx);

    int32_t bert_n_max_tokens(bert_ctx* ctx);


    const char* bert_vocab_id_to_token(bert_ctx* ctx, bert_vocab_id id);


#ifdef __cplusplus

}

#endif


#endif // BERT_H

bert_encode
void bert_encode(struct bert_ctx *ctx, int32_t n_threads, const char *texts, float *embeddings)

bert_eval_batch
void bert_eval_batch(struct bert_ctx *ctx, int32_t n_threads, int32_t n_batch_size, bert_vocab_id **batch_tokens, int32_t *n_tokens, float **batch_embeddings)

bert_load_from_file
struct bert_ctx * bert_load_from_file(const char *fname)

bert_vocab_id_to_token
const char * bert_vocab_id_to_token(bert_ctx *ctx, bert_vocab_id id)

bert_tokenize
void bert_tokenize(struct bert_ctx *ctx, const char *text, bert_vocab_id *tokens, int32_t *n_tokens, int32_t n_max_tokens)

bert_n_embd
int32_t bert_n_embd(bert_ctx *ctx)

bert_params_parse
bool bert_params_parse(int argc, char **argv, bert_params &params)

bert_eval
void bert_eval(struct bert_ctx *ctx, int32_t n_threads, bert_vocab_id *tokens, int32_t n_tokens, float *embeddings)

bert_encode_batch
void bert_encode_batch(struct bert_ctx *ctx, int32_t n_threads, int32_t n_batch_size, int32_t n_inputs, const char **texts, float **embeddings)

bert_free
void bert_free(bert_ctx *ctx)

bert_vocab_id
int32_t bert_vocab_id
Definition bert.h:24

bert_n_max_tokens
int32_t bert_n_max_tokens(bert_ctx *ctx)

bert_params
Definition bert.h:12

bert_params::model
const char * model
Definition bert.h:16

bert_params::prompt
const char * prompt
Definition bert.h:17

bert_params::port
int32_t port
Definition bert.h:14

bert_params::n_threads
int32_t n_threads
Definition bert.h:13