173#if defined(_WIN32) && !defined(__MINGW32__)
175#define GGML_API __declspec(dllexport)
177#define GGML_API __declspec(dllimport)
180#define GGML_API __attribute__((visibility("default")))
190#define GGML_FILE_MAGIC 0x67676d6c
191#define GGML_FILE_VERSION 1
193#define GGML_MAX_DIMS 4
194#define GGML_MAX_NODES 4096
195#define GGML_MAX_PARAMS 16
196#define GGML_MAX_CONTEXTS 64
197#define GGML_MAX_OPT 4
198#define GGML_DEFAULT_N_THREADS 4
200#define GGML_ASSERT(x) \
205 fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
447 int64_t ne1, int64_t ne2);
450 int64_t ne1, int64_t ne2, int64_t ne3);
557 int64_t ne1, int64_t ne2);
574 int axis2,
int axis3);
793#define GGML_RESTRICT restrict
GGML_API void ggml_fp32_to_fp16_row(const float *x, ggml_fp16_t *y, size_t n)
GGML_API int ggml_cpu_has_f16c(void)
GGML_API int ggml_cpu_has_vsx(void)
GGML_API struct ggml_tensor * ggml_new_tensor_2d(struct ggml_context *ctx, enum ggml_type type, int64_t ne0, int64_t ne1)
struct ggml_tensor * ggml_alibi(struct ggml_context *ctx, struct ggml_tensor *a, int n_past, int n_head)
GGML_API void ggml_set_i32_1d(const struct ggml_tensor *tensor, int i, int32_t value)
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor *tensor)
GGML_API size_t ggml_set_scratch(struct ggml_context *ctx, struct ggml_scratch scratch)
GGML_API struct ggml_tensor * ggml_map_unary_f32(struct ggml_context *ctx, struct ggml_tensor *a, const ggml_unary_op_f32_t fun)
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params)
GGML_API int ggml_cpu_has_clblast(void)
GGML_API void * ggml_get_data(const struct ggml_tensor *tensor)
#define GGML_MAX_NODES
Definition ggml.h:194
GGML_API struct ggml_tensor * ggml_norm(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API void ggml_graph_reset(struct ggml_cgraph *cgraph)
GGML_API int ggml_cpu_has_neon(void)
GGML_API struct ggml_tensor * ggml_map_binary_f32(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b, const ggml_binary_op_f32_t fun)
void(* ggml_binary_op_f32_t)(const int, float *, const float *, const float *)
Definition ggml.h:618
GGML_API int ggml_cpu_has_avx512(void)
GGML_API size_t ggml_quantize_q4_0(const float *src, void *dst, int n, int k, int64_t *hist)
ggml_opt_result
Definition ggml.h:669
@ GGML_OPT_OK
Definition ggml.h:670
@ GGML_LINESEARCH_MINIMUM_STEP
Definition ggml.h:677
@ GGML_OPT_DID_NOT_CONVERGE
Definition ggml.h:671
@ GGML_OPT_INVALID_WOLFE
Definition ggml.h:673
@ GGML_OPT_NO_CONTEXT
Definition ggml.h:672
@ GGML_OPT_FAIL
Definition ggml.h:674
@ GGML_LINESEARCH_MAXIMUM_ITERATIONS
Definition ggml.h:679
@ GGML_LINESEARCH_MAXIMUM_STEP
Definition ggml.h:678
@ GGML_LINESEARCH_FAIL
Definition ggml.h:676
@ GGML_LINESEARCH_INVALID_PARAMETERS
Definition ggml.h:680
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x)
GGML_API struct ggml_tensor * ggml_reshape_2d(struct ggml_context *ctx, struct ggml_tensor *a, int64_t ne0, int64_t ne1)
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context *ctx, float value)
GGML_API struct ggml_tensor * ggml_add_inplace(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API void ggml_free(struct ggml_context *ctx)
ggml_opt_type
Definition ggml.h:652
@ GGML_OPT_ADAM
Definition ggml.h:653
@ GGML_OPT_LBFGS
Definition ggml.h:654
GGML_API void ggml_print_object(const struct ggml_object *obj)
GGML_API const char * ggml_type_name(enum ggml_type type)
GGML_API size_t ggml_nbytes(const struct ggml_tensor *tensor)
ggml_op
Definition ggml.h:267
@ GGML_OP_MAP_UNARY
Definition ggml.h:310
@ GGML_OP_DUP
Definition ggml.h:270
@ GGML_OP_ROPE
Definition ggml.h:302
@ GGML_OP_CONT
Definition ggml.h:294
@ GGML_OP_COUNT
Definition ggml.h:313
@ GGML_OP_MUL_MAT
Definition ggml.h:290
@ GGML_OP_ALIBI
Definition ggml.h:303
@ GGML_OP_SILU
Definition ggml.h:286
@ GGML_OP_CPY
Definition ggml.h:293
@ GGML_OP_SQR
Definition ggml.h:275
@ GGML_OP_MEAN
Definition ggml.h:278
@ GGML_OP_VIEW
Definition ggml.h:296
@ GGML_OP_NONE
Definition ggml.h:268
@ GGML_OP_ABS
Definition ggml.h:280
@ GGML_OP_ADD
Definition ggml.h:271
@ GGML_OP_GET_ROWS
Definition ggml.h:299
@ GGML_OP_DIV
Definition ggml.h:274
@ GGML_OP_SUB
Definition ggml.h:272
@ GGML_OP_RMS_NORM
Definition ggml.h:288
@ GGML_OP_SGN
Definition ggml.h:281
@ GGML_OP_FLASH_ATTN
Definition ggml.h:307
@ GGML_OP_PERMUTE
Definition ggml.h:297
@ GGML_OP_MUL
Definition ggml.h:273
@ GGML_OP_FLASH_FF
Definition ggml.h:308
@ GGML_OP_RELU
Definition ggml.h:284
@ GGML_OP_NORM
Definition ggml.h:287
@ GGML_OP_CONV_1D_2S
Definition ggml.h:305
@ GGML_OP_STEP
Definition ggml.h:283
@ GGML_OP_SOFT_MAX
Definition ggml.h:301
@ GGML_OP_DIAG_MASK_INF
Definition ggml.h:300
@ GGML_OP_SCALE
Definition ggml.h:292
@ GGML_OP_TRANSPOSE
Definition ggml.h:298
@ GGML_OP_CONV_1D_1S
Definition ggml.h:304
@ GGML_OP_SQRT
Definition ggml.h:276
@ GGML_OP_GELU
Definition ggml.h:285
@ GGML_OP_REPEAT
Definition ggml.h:279
@ GGML_OP_NEG
Definition ggml.h:282
@ GGML_OP_SUM
Definition ggml.h:277
@ GGML_OP_RESHAPE
Definition ggml.h:295
@ GGML_OP_MAP_BINARY
Definition ggml.h:311
GGML_API size_t ggml_quantize_q5_0(const float *src, void *dst, int n, int k, int64_t *hist)
GGML_API int ggml_cpu_has_wasm_simd(void)
GGML_API size_t ggml_type_size(enum ggml_type type)
GGML_API struct ggml_tensor * ggml_get_rows(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_tensor * ggml_mul_mat(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API size_t ggml_quantize_q5_1(const float *src, void *dst, int n, int k, int64_t *hist)
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype)
static const size_t GGML_OBJECT_SIZE
Definition ggml.h:327
GGML_API void ggml_graph_compute(struct ggml_context *ctx, struct ggml_cgraph *cgraph)
GGML_API int ggml_cpu_has_fp16_va(void)
GGML_API int ggml_cpu_has_avx512_vnni(void)
GGML_API struct ggml_tensor * ggml_mul(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_tensor * ggml_neg(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_cpy(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API void ggml_set_param(struct ggml_context *ctx, struct ggml_tensor *tensor)
GGML_API size_t ggml_used_mem(const struct ggml_context *ctx)
GGML_API void ggml_time_init(void)
GGML_API struct ggml_tensor * ggml_rope(struct ggml_context *ctx, struct ggml_tensor *a, int n_past, int n_dims, int mode)
GGML_API struct ggml_tensor * ggml_view_1d(struct ggml_context *ctx, struct ggml_tensor *a, int64_t ne0, size_t offset)
GGML_API struct ggml_tensor * ggml_soft_max(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_sum(struct ggml_context *ctx, struct ggml_tensor *a)
#define GGML_RESTRICT
Definition ggml.h:793
GGML_API struct ggml_tensor * ggml_silu(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API int64_t ggml_time_us(void)
GGML_API int64_t ggml_cycles_per_ms(void)
GGML_API size_t ggml_element_size(const struct ggml_tensor *tensor)
ggml_type
Definition ggml.h:233
@ GGML_TYPE_Q4_2
Definition ggml.h:238
@ GGML_TYPE_Q8_1
Definition ggml.h:243
@ GGML_TYPE_F32
Definition ggml.h:234
@ GGML_TYPE_I16
Definition ggml.h:245
@ GGML_TYPE_Q5_0
Definition ggml.h:240
@ GGML_TYPE_I8
Definition ggml.h:244
@ GGML_TYPE_F16
Definition ggml.h:235
@ GGML_TYPE_Q4_1
Definition ggml.h:237
@ GGML_TYPE_Q8_0
Definition ggml.h:242
@ GGML_TYPE_I32
Definition ggml.h:246
@ GGML_TYPE_Q5_1
Definition ggml.h:241
@ GGML_TYPE_COUNT
Definition ggml.h:247
@ GGML_TYPE_Q4_0
Definition ggml.h:236
GGML_API float * ggml_get_data_f32(const struct ggml_tensor *tensor)
GGML_API int ggml_cpu_has_avx512_vbmi(void)
GGML_API struct ggml_tensor * ggml_gelu(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API int ggml_cpu_has_blas(void)
GGML_API struct ggml_tensor * ggml_sgn(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_mean(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_add(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API float ggml_get_f32_1d(const struct ggml_tensor *tensor, int i)
GGML_API struct ggml_tensor * ggml_dup_tensor(struct ggml_context *ctx, const struct ggml_tensor *src)
GGML_API enum ggml_opt_result ggml_opt(struct ggml_context *ctx, struct ggml_opt_params params, struct ggml_tensor *f)
GGML_API struct ggml_tensor * ggml_step(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_scale(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API void ggml_graph_print(const struct ggml_cgraph *cgraph)
#define GGML_MAX_DIMS
Definition ggml.h:193
GGML_API struct ggml_tensor * ggml_sqrt(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_repeat(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_tensor * ggml_sub(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type)
uint16_t ggml_fp16_t
Definition ggml.h:219
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context *ctx, const struct ggml_tensor *src)
GGML_API struct ggml_tensor * ggml_rms_norm(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API int ggml_cpu_has_avx(void)
GGML_API int64_t ggml_cycles(void)
void(* dequantize_row_q_t)(const void *GGML_RESTRICT x, float *GGML_RESTRICT y, int k)
Definition ggml.h:795
GGML_API struct ggml_cgraph ggml_build_forward(struct ggml_tensor *tensor)
GGML_API struct ggml_tensor * ggml_new_tensor(struct ggml_context *ctx, enum ggml_type type, int n_dims, const int64_t *ne)
GGML_API int64_t ggml_time_ms(void)
GGML_API int64_t ggml_nelements(const struct ggml_tensor *tensor)
GGML_API void ggml_print_objects(const struct ggml_context *ctx)
GGML_API struct ggml_tensor * ggml_flash_attn(struct ggml_context *ctx, struct ggml_tensor *q, struct ggml_tensor *k, struct ggml_tensor *v, bool masked)
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x)
GGML_API struct ggml_tensor * ggml_flash_ff(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b0, struct ggml_tensor *b1, struct ggml_tensor *c0, struct ggml_tensor *c1)
GGML_API struct ggml_tensor * ggml_dup(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_relu(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API size_t ggml_quantize_q4_1(const float *src, void *dst, int n, int k, int64_t *hist)
GGML_API void ggml_set_name(struct ggml_tensor *tensor, const char *name)
GGML_API int ggml_cpu_has_fma(void)
GGML_API int ggml_blck_size(enum ggml_type type)
GGML_API struct ggml_tensor * ggml_div(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API int ggml_cpu_has_avx2(void)
void(* vec_dot_q_t)(const int n, float *GGML_RESTRICT s, const void *GGML_RESTRICT x, const void *GGML_RESTRICT y)
Definition ggml.h:797
GGML_API struct ggml_tensor * ggml_conv_1d_2s(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_tensor * ggml_abs(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_transpose(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API struct ggml_tensor * ggml_new_tensor_4d(struct ggml_context *ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3)
GGML_API struct ggml_tensor * ggml_set_i32(struct ggml_tensor *tensor, int32_t value)
GGML_API struct ggml_tensor * ggml_view_2d(struct ggml_context *ctx, struct ggml_tensor *a, int64_t ne0, int64_t ne1, size_t nb1, size_t offset)
GGML_API struct ggml_tensor * ggml_reshape(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context *ctx, int32_t value)
GGML_API int ggml_cpu_has_sse3(void)
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor *tensor, int i)
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float *src, void *dst, int start, int n, int64_t *hist)
GGML_API const char * ggml_get_name(const struct ggml_tensor *tensor)
ggml_ftype
Definition ggml.h:252
@ GGML_FTYPE_MOSTLY_Q4_1_SOME_F16
Definition ggml.h:258
@ GGML_FTYPE_MOSTLY_Q4_2
Definition ggml.h:259
@ GGML_FTYPE_MOSTLY_Q8_0
Definition ggml.h:260
@ GGML_FTYPE_UNKNOWN
Definition ggml.h:253
@ GGML_FTYPE_ALL_F32
Definition ggml.h:254
@ GGML_FTYPE_MOSTLY_Q4_0
Definition ggml.h:256
@ GGML_FTYPE_MOSTLY_Q4_1
Definition ggml.h:257
@ GGML_FTYPE_MOSTLY_Q5_0
Definition ggml.h:261
@ GGML_FTYPE_MOSTLY_F16
Definition ggml.h:255
@ GGML_FTYPE_MOSTLY_Q5_1
Definition ggml.h:262
GGML_API struct ggml_tensor * ggml_new_tensor_1d(struct ggml_context *ctx, enum ggml_type type, int64_t ne0)
GGML_API struct ggml_tensor * ggml_new_tensor_3d(struct ggml_context *ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2)
void(* quantize_row_q_t)(const float *GGML_RESTRICT x, void *GGML_RESTRICT y, int k)
Definition ggml.h:796
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph *gb, const struct ggml_cgraph *gf, const char *filename)
GGML_API struct ggml_tensor * ggml_reshape_3d(struct ggml_context *ctx, struct ggml_tensor *a, int64_t ne0, int64_t ne1, int64_t ne2)
GGML_API bool ggml_is_quantized(enum ggml_type type)
quantize_fns_t ggml_internal_get_quantize_fn(size_t i)
GGML_API struct ggml_tensor * ggml_conv_1d_1s(struct ggml_context *ctx, struct ggml_tensor *a, struct ggml_tensor *b)
GGML_API struct ggml_tensor * ggml_permute(struct ggml_context *ctx, struct ggml_tensor *a, int axis0, int axis1, int axis2, int axis3)
GGML_API struct ggml_tensor * ggml_view_3d(struct ggml_context *ctx, struct ggml_tensor *a, int64_t ne0, int64_t ne1, int64_t ne2, size_t nb1, size_t nb2, size_t offset)
void(* ggml_unary_op_f32_t)(const int, float *, const float *)
Definition ggml.h:617
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context *ctx, struct ggml_cgraph *gf, bool keep)
#define GGML_API
Definition ggml.h:183
GGML_API struct ggml_tensor * ggml_set_f32(struct ggml_tensor *tensor, float value)
GGML_API struct ggml_tensor * ggml_diag_mask_inf(struct ggml_context *ctx, struct ggml_tensor *a, int n_past)
GGML_API size_t ggml_quantize_q8_0(const float *src, void *dst, int n, int k, int64_t *hist)
GGML_API struct ggml_tensor * ggml_cont(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API void ggml_set_f32_1d(const struct ggml_tensor *tensor, int i, float value)
GGML_API float ggml_type_sizef(enum ggml_type type)
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t *x, float *y, size_t n)
GGML_API size_t ggml_quantize_q4_2(const float *src, void *dst, int n, int k, int64_t *hist)
GGML_API int ggml_cpu_has_gpublas(void)
GGML_API int ggml_cpu_has_cublas(void)
GGML_API struct ggml_tensor * ggml_sqr(struct ggml_context *ctx, struct ggml_tensor *a)
GGML_API int ggml_cpu_has_arm_fma(void)
ggml_linesearch
Definition ggml.h:659
@ GGML_LINESEARCH_BACKTRACKING_ARMIJO
Definition ggml.h:662
@ GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE
Definition ggml.h:664
@ GGML_LINESEARCH_DEFAULT
Definition ggml.h:660
@ GGML_LINESEARCH_BACKTRACKING_WOLFE
Definition ggml.h:663
#define GGML_MAX_OPT
Definition ggml.h:197
GGML_API void ggml_build_forward_expand(struct ggml_cgraph *cgraph, struct ggml_tensor *tensor)
int n_leafs
Definition ggml.h:370
struct ggml_tensor * nodes[GGML_MAX_NODES]
Definition ggml.h:376
struct ggml_tensor * leafs[GGML_MAX_NODES]
Definition ggml.h:378
struct ggml_tensor * work
Definition ggml.h:374
size_t work_size
Definition ggml.h:373
int n_nodes
Definition ggml.h:369
struct ggml_tensor * grads[GGML_MAX_NODES]
Definition ggml.h:377
int64_t perf_cycles
Definition ggml.h:382
int64_t perf_time_us
Definition ggml.h:383
int perf_runs
Definition ggml.h:381
int n_threads
Definition ggml.h:371
bool no_alloc
Definition ggml.h:399
size_t mem_size
Definition ggml.h:397
void * mem_buffer
Definition ggml.h:398
char padding[8]
Definition ggml.h:324
struct ggml_object * next
Definition ggml.h:322
size_t size
Definition ggml.h:320
size_t offs
Definition ggml.h:319
float delta
Definition ggml.h:700
bool print_forward_graph
Definition ggml.h:710
enum ggml_opt_type type
Definition ggml.h:689
int past
Definition ggml.h:699
bool print_backward_graph
Definition ggml.h:711
float ftol
Definition ggml.h:734
enum ggml_linesearch linesearch
Definition ggml.h:739
float wolfe
Definition ggml.h:735
struct ggml_opt_params::@1 adam
int max_no_improvement
Definition ggml.h:708
float alpha
Definition ggml.h:718
float min_step
Definition ggml.h:736
struct ggml_opt_params::@2 lbfgs
float beta2
Definition ggml.h:720
float eps
Definition ggml.h:721
int n_iter
Definition ggml.h:716
float eps_g
Definition ggml.h:723
float beta1
Definition ggml.h:719
int n_threads
Definition ggml.h:691
float eps_f
Definition ggml.h:722
int max_linesearch
Definition ggml.h:731
float max_step
Definition ggml.h:737
int m
Definition ggml.h:729
size_t size
Definition ggml.h:390
void * data
Definition ggml.h:391
size_t offs
Definition ggml.h:389
enum ggml_op op
Definition ggml.h:342
void * data
Definition ggml.h:359
size_t nb[GGML_MAX_DIMS]
Definition ggml.h:336
int64_t ne[GGML_MAX_DIMS]
Definition ggml.h:335
int n_dims
Definition ggml.h:334
int64_t perf_time_us
Definition ggml.h:357
int64_t perf_cycles
Definition ggml.h:356
enum ggml_type type
Definition ggml.h:332
struct ggml_tensor * opt[GGML_MAX_OPT]
Definition ggml.h:349
struct ggml_tensor * grad
Definition ggml.h:346
struct ggml_tensor * src0
Definition ggml.h:347
int n_tasks
Definition ggml.h:352
char name[32]
Definition ggml.h:361
char padding[8]
Definition ggml.h:363
int perf_runs
Definition ggml.h:355
struct ggml_tensor * src1
Definition ggml.h:348
bool is_param
Definition ggml.h:344
dequantize_row_q_t dequantize_row_q
Definition ggml.h:802
vec_dot_q_t vec_dot_q
Definition ggml.h:806
quantize_row_q_t quantize_row_q_reference
Definition ggml.h:804
quantize_row_q_t quantize_row_q_dot
Definition ggml.h:805
quantize_row_q_t quantize_row_q
Definition ggml.h:803
enum ggml_type vec_dot_type
Definition ggml.h:807