From 4c3a7cdd46ca747c8cfceedb38af6b041230b496 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Sat, 7 Oct 2023 22:34:33 +0800 Subject: [PATCH 1/8] fixed bad memory access exception on ios 17 --- ggml-alloc.c | 54 ++++++++++++++++++++++++---------------------------- ggml.c | 53 ++++++++++++++++++++++++++------------------------- llama.cpp | 48 ++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 92 insertions(+), 63 deletions(-) diff --git a/ggml-alloc.c b/ggml-alloc.c index 805759db74fef..66e1eee5b9357 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -291,21 +291,17 @@ void ggml_allocr_reset(struct ggml_allocr * alloc) { struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment) { struct ggml_allocr * alloc = (struct ggml_allocr *)malloc(sizeof(struct ggml_allocr) /* + n_free_blocks * sizeof(struct free_block) */); - *alloc = (struct ggml_allocr){ - /*.data = */ data, - /*.size = */ size, - /*.alignment = */ alignment, - /*.n_free_blocks = */ 0, - /*.free_blocks = */ {{0}}, - /*.hash_table = */ {{0}}, - /*.max_size = */ 0, - /*.measure = */ false, - /*.parse_seq = */ {0}, - /*.parse_seq_len = */ 0, -#ifdef GGML_ALLOCATOR_DEBUG - /*.allocated_tensors = */ {0}, -#endif - }; + (*alloc).data = data; + (*alloc).size = size; + (*alloc).alignment = alignment; + (*alloc).n_free_blocks = 0; + (*alloc).max_size = 0; + (*alloc).measure = false; + (*alloc).parse_seq_len = 0; + + memset((*alloc).free_blocks, 0, sizeof((*alloc).free_blocks)); + memset((*alloc).hash_table, 0, sizeof((*alloc).hash_table)); + memset((*alloc).parse_seq, 0, sizeof((*alloc).parse_seq)); ggml_allocr_reset(alloc); @@ -370,22 +366,22 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) { size_t size; alloc_measure_vmem(&base_addr, &size); - - *alloc = (struct ggml_allocr){ - /*.data = */ base_addr, - /*.size = */ size, - /*.alignment = */ alignment, - /*.n_free_blocks = */ 0, - /*.free_blocks = */ {{0}}, - /*.hash_table = */ {{0}}, - /*.max_size = */ 0, - /*.measure = */ true, - /*.parse_seq = */ {0}, - /*.parse_seq_len = */ 0, + + (*alloc).data = base_addr; + (*alloc).size = size; + (*alloc).alignment = alignment; + (*alloc).n_free_blocks = 0; + (*alloc).max_size = 0; + (*alloc).measure = true; + (*alloc).parse_seq_len = 0; + + memset((*alloc).free_blocks, 0, sizeof((*alloc).free_blocks)); + memset((*alloc).hash_table, 0, sizeof((*alloc).hash_table)); + memset((*alloc).parse_seq, 0, sizeof((*alloc).parse_seq)); + #ifdef GGML_ALLOCATOR_DEBUG - /*.allocated_tensors = */ {0}, + (*alloc).allocated_tensors = {0}; #endif - }; ggml_allocr_reset(alloc); diff --git a/ggml.c b/ggml.c index 911a63988e027..a288895ff6d08 100644 --- a/ggml.c +++ b/ggml.c @@ -4722,19 +4722,21 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { } const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); - - *ctx = (struct ggml_context) { - /*.mem_size =*/ mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), - /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, - /*.no_alloc =*/ params.no_alloc, - /*.no_alloc_save =*/ params.no_alloc, - /*.n_objects =*/ 0, - /*.objects_begin =*/ NULL, - /*.objects_end =*/ NULL, - /*.scratch =*/ { 0, 0, NULL, }, - /*.scratch_save =*/ { 0, 0, NULL, }, - }; + + ctx = (struct ggml_context *)malloc(sizeof(struct ggml_context)); + + struct ggml_scratch empty_scratch = { 0, 0, NULL }; + + (*ctx).mem_size = mem_size; + (*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size); + (*ctx).mem_buffer_owned = params.mem_buffer ? false : true; + (*ctx).no_alloc = params.no_alloc; + (*ctx).no_alloc_save = params.no_alloc; + (*ctx).n_objects = 0; + (*ctx).objects_begin = NULL; + (*ctx).objects_end = NULL; + (*ctx).scratch = empty_scratch; + (*ctx).scratch_save = empty_scratch; GGML_ASSERT(ctx->mem_buffer != NULL); @@ -18078,19 +18080,18 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); - - *cgraph = (struct ggml_cgraph) { - /*.n_nodes =*/ 0, - /*.n_leafs =*/ 0, - /*.nodes =*/ { NULL }, - /*.grads =*/ { NULL }, - /*.leafs =*/ { NULL }, - /*.hash_table =*/ { NULL }, - /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, - /*.perf_runs =*/ 0, - /*.perf_cycles =*/ 0, - /*.perf_time_us =*/ 0, - }; + + (*cgraph).n_nodes = 0; + (*cgraph).n_leafs = 0; + (*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; + (*cgraph).perf_runs = 0; + (*cgraph).perf_cycles = 0; + (*cgraph).perf_time_us = 0; + + memset((*cgraph).nodes, 0, sizeof((*cgraph).nodes)); + memset((*cgraph).grads, 0, sizeof((*cgraph).grads)); + memset((*cgraph).leafs, 0, sizeof((*cgraph).leafs)); + memset((*cgraph).visited_hash_table, 0, sizeof((*cgraph).visited_hash_table)); return cgraph; } diff --git a/llama.cpp b/llama.cpp index d10656bb801db..4d5fb7e4be308 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8222,7 +8222,21 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat const size_t elt_size = ggml_element_size(kv_self.k); ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); - ggml_cgraph gf{}; + + // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` + struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph))); + + (*gf).n_nodes = 0; + (*gf).n_leafs = 0; + (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; + (*gf).perf_runs = 0; + (*gf).perf_cycles = 0; + (*gf).perf_time_us = 0; + + memset((*gf).nodes, 0, sizeof((*gf).nodes)); + memset((*gf).grads, 0, sizeof((*gf).grads)); + memset((*gf).leafs, 0, sizeof((*gf).leafs)); + memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table)); ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); std::vector kout3d_data(ggml_nbytes(kout3d), 0); @@ -8240,9 +8254,9 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat kv_head, n_embd, n_layer, elt_size*n_ctx, elt_size*n_ctx*n_embd, 0); - ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d)); - ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d)); - ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1); + ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d)); + ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d)); + ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1); ggml_free(cpy_ctx); @@ -8250,6 +8264,10 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat // write them to file data_ctx->write(kout3d_data.data(), kout3d_data.size()); data_ctx->write(vout3d_data.data(), vout3d_data.size()); + + // free our allocated graph + free(gf); + gf = NULL; } for (uint32_t i = 0; i < kv_size; ++i) { @@ -8350,7 +8368,21 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const size_t elt_size = ggml_element_size(kv_self.k); ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); - ggml_cgraph gf{}; + + // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` + struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph))); + + (*gf).n_nodes = 0; + (*gf).n_leafs = 0; + (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; + (*gf).perf_runs = 0; + (*gf).perf_cycles = 0; + (*gf).perf_time_us = 0; + + memset((*gf).nodes, 0, sizeof((*gf).nodes)); + memset((*gf).grads, 0, sizeof((*gf).grads)); + memset((*gf).leafs, 0, sizeof((*gf).leafs)); + memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table)); ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); kin3d->data = (void *) inp; @@ -8368,9 +8400,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { kv_head, n_embd, n_layer, elt_size*n_ctx, elt_size*n_ctx*n_embd, 0); - ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d)); - ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d)); - ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1); + ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d)); + ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d)); + ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1); ggml_free(cpy_ctx); } From cd0022f70f7f93220db3146cae401d20df9100d4 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Sat, 7 Oct 2023 22:55:16 +0800 Subject: [PATCH 2/8] added GGML_ALLOCATOR_DEBUG code back --- ggml-alloc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml-alloc.c b/ggml-alloc.c index 66e1eee5b9357..8d1117b203f84 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -303,6 +303,10 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment) memset((*alloc).hash_table, 0, sizeof((*alloc).hash_table)); memset((*alloc).parse_seq, 0, sizeof((*alloc).parse_seq)); +#ifdef GGML_ALLOCATOR_DEBUG + memset((*alloc).allocated_tensors, 0, sizeof((*alloc).allocated_tensors)); +#endif + ggml_allocr_reset(alloc); return alloc; @@ -380,7 +384,7 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) { memset((*alloc).parse_seq, 0, sizeof((*alloc).parse_seq)); #ifdef GGML_ALLOCATOR_DEBUG - (*alloc).allocated_tensors = {0}; + memset((*alloc).allocated_tensors, 0, sizeof((*alloc).allocated_tensors)); #endif ggml_allocr_reset(alloc); From 88a14fcfefd6ae5816bbda2da2a30f91890a01c4 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Sun, 8 Oct 2023 00:33:29 +0800 Subject: [PATCH 3/8] fixed logic error, should not allocate more memory, and instead update existing parts --- ggml.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ggml.c b/ggml.c index a288895ff6d08..6864bd161c0b8 100644 --- a/ggml.c +++ b/ggml.c @@ -4723,20 +4723,18 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); - ctx = (struct ggml_context *)malloc(sizeof(struct ggml_context)); - struct ggml_scratch empty_scratch = { 0, 0, NULL }; - (*ctx).mem_size = mem_size; - (*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size); - (*ctx).mem_buffer_owned = params.mem_buffer ? false : true; - (*ctx).no_alloc = params.no_alloc; - (*ctx).no_alloc_save = params.no_alloc; - (*ctx).n_objects = 0; - (*ctx).objects_begin = NULL; - (*ctx).objects_end = NULL; - (*ctx).scratch = empty_scratch; - (*ctx).scratch_save = empty_scratch; + ctx->mem_size = mem_size; + ctx->mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size); + ctx->mem_buffer_owned = params.mem_buffer ? false : true; + ctx->no_alloc = params.no_alloc; + ctx->no_alloc_save = params.no_alloc; + ctx->n_objects = 0; + ctx->objects_begin = NULL; + ctx->objects_end = NULL; + ctx->scratch = empty_scratch; + ctx->scratch_save = empty_scratch; GGML_ASSERT(ctx->mem_buffer != NULL); From 9ee8aeccd73dbc4a000eb93c1c08eea843c752a3 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Sun, 8 Oct 2023 00:36:20 +0800 Subject: [PATCH 4/8] fixed memory leak by freeing temporary graph during session load --- llama.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama.cpp b/llama.cpp index 4d5fb7e4be308..242815af2471f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8405,6 +8405,10 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1); ggml_free(cpy_ctx); + + // free our allocated graph + free(gf); + gf = NULL; } ctx->kv_self.head = kv_head; From cb5869057985a2b036f8c8ea6795336c5a8f26cc Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Thu, 12 Oct 2023 15:54:01 +0800 Subject: [PATCH 5/8] reverted changes in ggml.c --- ggml.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/ggml.c b/ggml.c index 8747eca4d7494..d16233f12c999 100644 --- a/ggml.c +++ b/ggml.c @@ -4698,19 +4698,19 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { } const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); - - struct ggml_scratch empty_scratch = { 0, 0, NULL }; - - ctx->mem_size = mem_size; - ctx->mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size); - ctx->mem_buffer_owned = params.mem_buffer ? false : true; - ctx->no_alloc = params.no_alloc; - ctx->no_alloc_save = params.no_alloc; - ctx->n_objects = 0; - ctx->objects_begin = NULL; - ctx->objects_end = NULL; - ctx->scratch = empty_scratch; - ctx->scratch_save = empty_scratch; + + *ctx = (struct ggml_context) { + /*.mem_size =*/ mem_size, + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), + /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, + /*.no_alloc =*/ params.no_alloc, + /*.no_alloc_save =*/ params.no_alloc, + /*.n_objects =*/ 0, + /*.objects_begin =*/ NULL, + /*.objects_end =*/ NULL, + /*.scratch =*/ { 0, 0, NULL, }, + /*.scratch_save =*/ { 0, 0, NULL, }, + }; GGML_ASSERT(ctx->mem_buffer != NULL); @@ -18052,18 +18052,19 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); - - (*cgraph).n_nodes = 0; - (*cgraph).n_leafs = 0; - (*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; - (*cgraph).perf_runs = 0; - (*cgraph).perf_cycles = 0; - (*cgraph).perf_time_us = 0; - - memset((*cgraph).nodes, 0, sizeof((*cgraph).nodes)); - memset((*cgraph).grads, 0, sizeof((*cgraph).grads)); - memset((*cgraph).leafs, 0, sizeof((*cgraph).leafs)); - memset((*cgraph).visited_hash_table, 0, sizeof((*cgraph).visited_hash_table)); + + *cgraph = (struct ggml_cgraph) { + /*.n_nodes =*/ 0, + /*.n_leafs =*/ 0, + /*.nodes =*/ { NULL }, + /*.grads =*/ { NULL }, + /*.leafs =*/ { NULL }, + /*.hash_table =*/ { NULL }, + /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, + /*.perf_runs =*/ 0, + /*.perf_cycles =*/ 0, + /*.perf_time_us =*/ 0, + }; return cgraph; } @@ -22003,4 +22004,4 @@ int ggml_cpu_has_vsx(void) { #endif } -//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// \ No newline at end of file From 6f553d9183e2a256f480260700bb4304d44f2c19 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Thu, 12 Oct 2023 15:58:10 +0800 Subject: [PATCH 6/8] added trailing newlines --- ggml-alloc.c | 2 +- ggml.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml-alloc.c b/ggml-alloc.c index 49657b79760e3..34eba3f830e84 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -591,4 +591,4 @@ size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * size_t ggml_allocr_max_size(struct ggml_allocr * alloc) { return alloc->max_size; -} \ No newline at end of file +} diff --git a/ggml.c b/ggml.c index d16233f12c999..1f5598fa6af8f 100644 --- a/ggml.c +++ b/ggml.c @@ -22004,4 +22004,4 @@ int ggml_cpu_has_vsx(void) { #endif } -//////////////////////////////////////////////////////////////////////////////// \ No newline at end of file +//////////////////////////////////////////////////////////////////////////////// From c8159ead24d212675f5e6fa4cbd191ef70b729e5 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Thu, 12 Oct 2023 15:59:18 +0800 Subject: [PATCH 7/8] removed trailing whitespaces --- llama.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index 56474c68cdf29..81671031004b7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9001,10 +9001,10 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat const size_t elt_size = ggml_element_size(kv_self.k); ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); - + // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph))); - + (*gf).n_nodes = 0; (*gf).n_leafs = 0; (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; @@ -9043,7 +9043,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat // write them to file data_ctx->write(kout3d_data.data(), kout3d_data.size()); data_ctx->write(vout3d_data.data(), vout3d_data.size()); - + // free our allocated graph free(gf); gf = NULL; @@ -9150,7 +9150,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph))); - + (*gf).n_nodes = 0; (*gf).n_leafs = 0; (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; From 0ae5b1aa62b26ba389b61a822c38e4b332a40736 Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Thu, 12 Oct 2023 16:11:21 +0800 Subject: [PATCH 8/8] removed trailing whitespace --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 81671031004b7..c7deb3fb5fe92 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9147,7 +9147,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const size_t elt_size = ggml_element_size(kv_self.k); ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); - + // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));