diff --git a/Makefile b/Makefile index 8d2ccddc469f97..7e015af3e9bfed 100644 --- a/Makefile +++ b/Makefile @@ -325,9 +325,9 @@ ifdef LLAMA_DEBUG endif else MK_CPPFLAGS += -DNDEBUG - MK_CFLAGS += -O3 - MK_CXXFLAGS += -O3 - MK_NVCCFLAGS += -O3 + MK_CFLAGS += -O3 -g + MK_CXXFLAGS += -O3 -g + MK_NVCCFLAGS += -O3 -g endif ifdef LLAMA_SANITIZE_THREAD diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 548661b9bb6368..d0311f6cacab2a 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -254,18 +254,8 @@ #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) -#define GGML_ASSERT(x) \ - do { \ - if (!(x)) { \ - fflush(stdout); \ - fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \ - ggml_print_backtrace(); \ - abort(); \ - } \ - } while (0) - #ifndef NDEBUG -#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached") +#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0) #elif defined(__GNUC__) #define GGML_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) @@ -274,6 +264,16 @@ #define GGML_UNREACHABLE() ((void) 0) #endif +#ifdef __cplusplus +#define GGML_NORETURN [[noreturn]] +#elif defined(_MSC_VER) +#define GGML_NORETURN __declspec(noreturn) +#else +#define GGML_NORETURN _Noreturn +#endif + +#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x) + // used to copy the number of elements and stride in bytes of tensors into local variables. // main purpose is to reduce code duplication and improve readability. // @@ -322,6 +322,8 @@ extern "C" { #endif + GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr); + enum ggml_status { GGML_STATUS_ALLOC_FAILED = -2, GGML_STATUS_FAILED = -1, @@ -636,8 +638,11 @@ extern "C" { GGML_CGRAPH_EVAL_ORDER_COUNT }; + typedef uint32_t ggml_bitset_t; + struct ggml_hash_set { size_t size; + ggml_bitset_t * used; struct ggml_tensor ** keys; }; @@ -651,7 +656,7 @@ extern "C" { struct ggml_tensor ** grads; struct ggml_tensor ** leafs; - struct ggml_hash_set visited_hash_table; + struct ggml_hash_set visited_hash_set; enum ggml_cgraph_eval_order order; }; @@ -698,8 +703,6 @@ extern "C" { GGML_API int64_t ggml_cycles(void); GGML_API int64_t ggml_cycles_per_ms(void); - GGML_API void ggml_print_backtrace(void); - // accepts a UTF-8 path, even on Windows GGML_API FILE * ggml_fopen(const char * fname, const char * mode); @@ -2005,8 +2008,8 @@ extern "C" { // ggml_graph_plan() has to be called before ggml_graph_compute() // when plan.work_size > 0, caller must allocate memory for plan.work_data - GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); - GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); + GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); + GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); // same as ggml_graph_compute() but the work data is allocated as a part of the context // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index e176b883e38c50..43378302e50c25 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -443,7 +443,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) { } } - free(galloc->hash_set.keys); + ggml_hash_set_free(&galloc->hash_set); free(galloc->hash_values); free(galloc->bufts); free(galloc->buffers); @@ -456,7 +456,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) { typedef struct ggml_gallocr * ggml_gallocr_t; static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) { - size_t i = ggml_hash_find_or_insert(galloc->hash_set, t); + size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t); return &galloc->hash_values[i]; } @@ -565,8 +565,8 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) { static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { // clear hash tables - memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *)); - memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node)); + ggml_hash_set_reset(&galloc->hash_set); + memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size); // allocate leafs // these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes @@ -671,21 +671,19 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr } bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { - size_t hash_size = graph->visited_hash_table.size; + size_t min_hash_size = graph->n_nodes + graph->n_leafs; + // add 25% margin to avoid hash collisions + min_hash_size += min_hash_size / 4; // initialize hash table - if (galloc->hash_set.size < hash_size) { - free(galloc->hash_set.keys); - free(galloc->hash_values); - galloc->hash_set.size = hash_size; - galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *)); - galloc->hash_values = calloc(hash_size, sizeof(struct hash_node)); + if (galloc->hash_set.size < min_hash_size) { + ggml_hash_set_free(&galloc->hash_set); + galloc->hash_set = ggml_hash_set_new(min_hash_size); GGML_ASSERT(galloc->hash_set.keys != NULL); + + free(galloc->hash_values); + galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size); GGML_ASSERT(galloc->hash_values != NULL); - } else { - // reset hash table - memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size); - memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size); } // reset allocators @@ -817,7 +815,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * } static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) { - ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL; + ggml_backend_buffer_type_t buft = galloc->bufts[talloc->buffer_id]; size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node); return talloc->size_max >= node_size; } diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c index d39cfed8886f42..240136a003c1e5 100644 --- a/ggml/src/ggml-backend.c +++ b/ggml/src/ggml-backend.c @@ -1055,11 +1055,10 @@ struct ggml_backend_sched { ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS]; ggml_gallocr_t galloc; - // hash keys of the nodes in the graph - struct ggml_hash_set hash_set; - // hash values - int * tensor_backend_id; - struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES]; + // hash map of the nodes in the graph + struct ggml_hash_set hash_set; + int * hv_tensor_backend_ids; // [hash_set.size] + struct ggml_tensor ** hv_tensor_copies; // [hash_set.size][n_backends][n_copies] int * node_backend_ids; // [graph_size] int * leaf_backend_ids; // [graph_size] @@ -1068,7 +1067,7 @@ struct ggml_backend_sched { int * prev_leaf_backend_ids; // [graph_size] // copy of the graph with modified inputs - struct ggml_cgraph * graph; + struct ggml_cgraph graph; // graph splits struct ggml_backend_sched_split * splits; @@ -1087,19 +1086,16 @@ struct ggml_backend_sched { ggml_backend_sched_eval_callback callback_eval; void * callback_eval_user_data; - bool debug; + char * context_buffer; + size_t context_buffer_size; - // align context_buffer to GGML_MEM_ALIGN -#ifdef _MSC_VER - __declspec(align(GGML_MEM_ALIGN)) -#else - __attribute__((aligned(GGML_MEM_ALIGN))) -#endif - char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)]; + bool debug; }; -#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor) -#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)] +#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor) +#define tensor_backend_id(tensor) sched->hv_tensor_backend_ids[hash_id(tensor)] +#define tensor_id_copy(id, backend_id, copy_id) sched->hv_tensor_copies[(id) * sched->n_backends * sched->n_copies + (backend_id) * sched->n_copies + (copy_id)] +#define tensor_copy(tensor, backend_id, copy_id) tensor_id_copy(hash_id(tensor), backend_id, copy_id) // returns the priority of the backend, lower id is higher priority static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) { @@ -1169,7 +1165,6 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st return cur_backend_id; } - // assign nodes that use weights to the backend of the weights // operations with weights are preferably run on the same backend as the weights for (int i = 0; i < GGML_MAX_SRC; i++) { const struct ggml_tensor * src = tensor->src[i]; @@ -1275,7 +1270,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg sched->is_reset = false; struct ggml_init_params params = { - /* .mem_size = */ sizeof(sched->context_buffer), + /* .mem_size = */ sched->context_buffer_size, /* .mem_buffer = */ sched->context_buffer, /* .no_alloc = */ true }; @@ -1292,31 +1287,36 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; int * leaf_backend_id = &tensor_backend_id(leaf); - if (*leaf_backend_id != -1) { - // do not overwrite user assignments - continue; + // do not overwrite user assignments + if (*leaf_backend_id == -1) { + *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf); } - *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf); } for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; int * node_backend_id = &tensor_backend_id(node); - if (*node_backend_id != -1) { - // do not overwrite user assignments - continue; - } - *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node); - // src - for (int j = 0; j < GGML_MAX_SRC; j++) { - struct ggml_tensor * src = node->src[j]; - if (src == NULL) { + // do not overwrite user assignments + if (*node_backend_id == -1) { + *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node); + +#if 0 + // src + if (node->op == GGML_OP_NONE) { continue; } - int * src_backend_id = &tensor_backend_id(src); - if (*src_backend_id == -1) { - *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src); + + for (int j = 0; j < GGML_MAX_SRC; j++) { + struct ggml_tensor * src = node->src[j]; + if (src == NULL) { + continue; + } + int * src_backend_id = &tensor_backend_id(src); + if (*src_backend_id == -1) { + *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src); + } } +#endif } } @@ -1488,12 +1488,13 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } } - // pass 4: split graph, find tensors that need to be copied + // pass 5: split graph, find tensors that need to be copied { int i_split = 0; struct ggml_backend_sched_split * split = &sched->splits[0]; // find the backend of the first split, skipping view ops - for (int i = 0; i < graph->n_nodes; i++) { + int i = 0; + for (; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (!ggml_is_view_op(node->op)) { split->backend_id = tensor_backend_id(node); @@ -1502,9 +1503,8 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } split->i_start = 0; split->n_inputs = 0; - memset(split->inputs, 0, sizeof(split->inputs)); //HACK int cur_backend_id = split->backend_id; - for (int i = 0; i < graph->n_nodes; i++) { + for (; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; if (ggml_is_view_op(node->op)) { @@ -1513,7 +1513,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg const int node_backend_id = tensor_backend_id(node); - GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now + assert(node_backend_id != -1); // all nodes should be assigned by now // check if we should start a new split based on the sources of the current node bool need_new_split = false; @@ -1527,7 +1527,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg // by starting a new split, the memory of the previously offloaded weights can be reused if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { int src_backend_id = tensor_backend_id(src); - if (src_backend_id != -1 && src_backend_id != cur_backend_id) { + if (src_backend_id != cur_backend_id) { need_new_split = true; break; } @@ -1536,9 +1536,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg // FIXME: count the number of inputs instead of only checking when full if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) { const size_t id = hash_id(src); - int src_backend_id = sched->tensor_backend_id[id]; + int src_backend_id = sched->hv_tensor_backend_ids[id]; bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id); - if (src_backend_id != cur_backend_id && sched->tensor_copies[hash_id(src)][cur_backend_id][0] == NULL && !supported) { + if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == NULL && !supported) { //printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name); need_new_split = true; break; @@ -1570,12 +1570,12 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg continue; } - const int src_backend_id = tensor_backend_id(src); + size_t src_id = hash_id(src); + const int src_backend_id = sched->hv_tensor_backend_ids[src_id]; assert(src_backend_id != -1); // all inputs should be assigned by now if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) { - size_t id = hash_id(src); - if (sched->tensor_copies[id][src_backend_id][0] == NULL) { + if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) { ggml_backend_t backend = sched->backends[src_backend_id]; for (int c = 0; c < sched->n_copies; c++) { struct ggml_tensor * tensor_copy; @@ -1589,7 +1589,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg ggml_set_input(tensor_copy); ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor } - sched->tensor_copies[id][src_backend_id][c] = tensor_copy; + tensor_id_copy(src_id, src_backend_id, c) = tensor_copy; SET_CAUSE(tensor_copy, "4.cpy"); } int n_graph_inputs = sched->n_graph_inputs++; @@ -1598,11 +1598,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } } - bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id); - if (src_backend_id != cur_backend_id && !supported) { + if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) { // create a copy of the input in the split's backend - const size_t id = hash_id(src); - if (sched->tensor_copies[id][cur_backend_id][0] == NULL) { + if (tensor_id_copy(src_id, cur_backend_id, 0) == NULL) { ggml_backend_t backend = sched->backends[cur_backend_id]; for (int c = 0; c < sched->n_copies; c++) { struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); @@ -1611,14 +1609,14 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg ggml_set_input(tensor_copy); ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor } - sched->tensor_copies[id][cur_backend_id][c] = tensor_copy; + tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy; SET_CAUSE(tensor_copy, "4.cpy"); } int n_inputs = split->n_inputs++; GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS); split->inputs[n_inputs] = src; } - node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy]; + node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy); } } } @@ -1630,7 +1628,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg ggml_backend_sched_print_assignments(sched, graph); } - // swap node_backend_ids and leaf_backend_ids and prevs + // swap node_backend_ids and leaf _backend_ids with prevs { int * tmp = sched->node_backend_ids; sched->node_backend_ids = sched->prev_node_backend_ids; @@ -1641,9 +1639,19 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg sched->prev_leaf_backend_ids = tmp; } - // create copies of the graph for each split - // TODO: avoid this copy - struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2, false); + int graph_size = graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2; + if (sched->graph.size < graph_size) { + sched->graph.size = graph_size; + sched->graph.nodes = realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *)); + sched->graph.leafs = realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *)); + GGML_ASSERT(sched->graph.nodes != NULL); + GGML_ASSERT(sched->graph.leafs != NULL); + } + sched->graph.n_nodes = 0; + sched->graph.n_leafs = 0; + + struct ggml_cgraph * graph_copy = &sched->graph; + for (int i = 0; i < sched->n_splits; i++) { struct ggml_backend_sched_split * split = &sched->splits[i]; split->graph = ggml_graph_view(graph, split->i_start, split->i_end); @@ -1654,12 +1662,12 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg struct ggml_tensor * input = split->inputs[j]; const size_t input_id = hash_id(input); - struct ggml_tensor * input_cpy = sched->tensor_copies[input_id][split->backend_id][sched->cur_copy]; + struct ggml_tensor * input_cpy = tensor_id_copy(input_id, split->backend_id, sched->cur_copy); // add a dependency to the input source so that it is not freed before the copy is done struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input); input_dep->src[0] = input; - sched->node_backend_ids[graph_copy->n_nodes] = sched->tensor_backend_id[input_id]; + sched->node_backend_ids[graph_copy->n_nodes] = sched->hv_tensor_backend_ids[input_id]; graph_copy->nodes[graph_copy->n_nodes++] = input_dep; // add a dependency to the input copy so that it is allocated at the start of the split @@ -1681,7 +1689,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg size_t id = hash_id(input); int backend_id = tensor_backend_id(input); for (int c = 0; c < sched->n_copies; c++) { - struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c]; + struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c); sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; } @@ -1694,7 +1702,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg struct ggml_tensor * input = split->inputs[j]; size_t id = hash_id(input); for (int c = 0; c < sched->n_copies; c++) { - struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c]; + struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c); sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; } @@ -1708,13 +1716,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf); graph_copy->leafs[graph_copy->n_leafs++] = leaf; } - - sched->graph = graph_copy; } static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { bool backend_ids_changed = false; - for (int i = 0; i < sched->graph->n_nodes; i++) { + for (int i = 0; i < sched->graph.n_nodes; i++) { if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] && sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) { backend_ids_changed = true; @@ -1722,7 +1728,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { } } if (!backend_ids_changed) { - for (int i = 0; i < sched->graph->n_leafs; i++) { + for (int i = 0; i < sched->graph.n_leafs; i++) { if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] && sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) { backend_ids_changed = true; @@ -1732,14 +1738,14 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { } // allocate graph - if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { + if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) { // the re-allocation may cause the split inputs to be moved to a different address ggml_backend_sched_synchronize(sched); #ifndef NDEBUG - fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__); + fprintf(stderr, "%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed); #endif - ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids); - if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { + ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids); + if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) { fprintf(stderr, "%s: failed to allocate graph\n", __func__); return false; } @@ -1760,7 +1766,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s for (int j = 0; j < split->n_inputs; j++) { ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]); struct ggml_tensor * input = split->inputs[j]; - struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy]; + struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy); if (input->flags & GGML_TENSOR_FLAG_INPUT) { // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done @@ -1846,21 +1852,23 @@ ggml_backend_sched_t ggml_backend_sched_new( struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched)); sched->debug = getenv("GGML_SCHED_DEBUG") != NULL; + sched->n_backends = n_backends; + sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1; // initialize hash table - sched->hash_set = ggml_hash_set_new(graph_size); - sched->tensor_backend_id = calloc(sched->hash_set.size, sizeof(sched->tensor_backend_id[0])); - sched->tensor_copies = calloc(sched->hash_set.size, sizeof(sched->tensor_copies[0])); + // FIXME: needs to be size*2 to account for leafs (do it in graph_split instead) + sched->hash_set = ggml_hash_set_new(graph_size); + sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0])); + sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *)); const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2; - sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0])); - sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0])); + sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0])); + sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0])); sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0])); sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0])); - sched->n_backends = n_backends; - - sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1; + sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false); + sched->context_buffer = malloc(sched->context_buffer_size); const int initial_splits_capacity = 16; sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0])); @@ -1895,37 +1903,37 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) { } ggml_gallocr_free(sched->galloc); ggml_free(sched->ctx); + ggml_hash_set_free(&sched->hash_set); free(sched->splits); - free(sched->hash_set.keys); - free(sched->tensor_backend_id); - free(sched->tensor_copies); + free(sched->hv_tensor_backend_ids); + free(sched->hv_tensor_copies); free(sched->node_backend_ids); free(sched->leaf_backend_ids); free(sched->prev_node_backend_ids); free(sched->prev_leaf_backend_ids); + free(sched->context_buffer); + free(sched->graph.nodes); + free(sched->graph.leafs); free(sched); } void ggml_backend_sched_reset(ggml_backend_sched_t sched) { // reset state for the next run if (!sched->is_reset) { - size_t hash_size = sched->hash_set.size; - memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT - memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size); - memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size); - + ggml_hash_set_reset(&sched->hash_set); + memset(sched->hv_tensor_backend_ids, -1, sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0])); + memset(sched->hv_tensor_copies, 0, sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *)); sched->is_reset = true; } sched->is_alloc = false; } bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { - GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes); + GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs); ggml_backend_sched_split_graph(sched, measure_graph); - // TODO: extract this to a separate function - if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) { + if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) { return false; } @@ -1936,10 +1944,11 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * } bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { - GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes); + GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs); ggml_backend_sched_split_graph(sched, graph); + if (!ggml_backend_sched_alloc_splits(sched)) { return false; } @@ -2009,6 +2018,7 @@ void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct gg GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); tensor_backend_id(node) = backend_index; SET_CAUSE(node, "usr"); + sched->is_reset = false; } ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { @@ -2051,9 +2061,9 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, GGML_ASSERT(src != NULL); GGML_ASSERT(src->data && "graph must be allocated"); - size_t id = ggml_hash_insert(hash_set, src); - if (id == GGML_HASHTABLE_ALREADY_EXISTS) { - return node_copies[ggml_hash_find(hash_set, src)]; + size_t id = ggml_hash_insert(&hash_set, src); + if (id == GGML_HASHSET_ALREADY_EXISTS) { + return node_copies[ggml_hash_find(&hash_set, src)]; } struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src); @@ -2078,7 +2088,7 @@ static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, return dst; } -static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { +static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { size_t id = ggml_hash_find(hash_set, src); if (node_init[id]) { return; @@ -2105,10 +2115,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te } struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) { - struct ggml_hash_set hash_set = { - /* .size = */ graph->visited_hash_table.size, - /* .keys = */ calloc(graph->visited_hash_table.size, sizeof(hash_set.keys[0])) // NOLINT - }; + struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size); struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT bool * node_init = calloc(hash_set.size, sizeof(node_init[0])); @@ -2123,7 +2130,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s if (ctx_allocated == NULL || ctx_unallocated == NULL) { fprintf(stderr, "failed to allocate context for graph copy\n"); - free(hash_set.keys); + ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); ggml_free(ctx_allocated); @@ -2146,7 +2153,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend); if (buffer == NULL) { fprintf(stderr, "failed to allocate buffer for graph copy\n"); - free(hash_set.keys); + ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); ggml_free(ctx_allocated); @@ -2164,19 +2171,19 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s // copy data and init views for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - graph_copy_init_tensor(hash_set, node_copies, node_init, node); + graph_copy_init_tensor(&hash_set, node_copies, node_init, node); } // build graph copy struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false); for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - struct ggml_tensor * node_copy = node_copies[ggml_hash_find(hash_set, node)]; + struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)]; graph_copy->nodes[i] = node_copy; } graph_copy->n_nodes = graph->n_nodes; - free(hash_set.keys); + ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index a2c8dbec0824f0..0b856a7b32a95e 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -634,21 +634,121 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) #endif -#define GGML_HASHTABLE_FULL ((size_t)-1) -#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2) +// bitset + +static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated"); +#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8) +#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1) + +static size_t ggml_bitset_size(size_t n) { + return (n + BITSET_MASK) >> BITSET_SHR; +} + +static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, int n) { + return !!(bitset[n >> BITSET_SHR] & (1u << (n & BITSET_MASK))); +} + +static inline void ggml_bitset_set(ggml_bitset_t * bitset, int n) { + bitset[n >> BITSET_SHR] |= (1u << (n & BITSET_MASK)); +} + +static inline void ggml_bitset_clear(ggml_bitset_t * bitset, int n) { + bitset[n >> BITSET_SHR] &= ~(1u << (n & BITSET_MASK)); +} + +// hash set + +#define GGML_HASHSET_FULL ((size_t)-1) +#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2) struct ggml_hash_set ggml_hash_set_new(size_t size); +void ggml_hash_set_free(struct ggml_hash_set * hash_set); + +// returns the minimum size for a hash set that can hold min_sz elements +size_t ggml_hash_size(size_t min_sz); -bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key); +// remove all elements from the hash set +void ggml_hash_set_reset(struct ggml_hash_set * hash_set); -// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted -size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key); +// returns true if key is in the hash set +static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key); -// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full -size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key); +// returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted +static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key); + +// returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full +static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key); // return index, asserts if table is full -size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key); +static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key); + +// hash function for ggml_tensor +static inline size_t ggml_hash(const struct ggml_tensor * p) { + // the last 4 bits are always zero due to alignment + return (size_t)(uintptr_t)p >> 4; +} + +static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) { + size_t h = ggml_hash(key) % hash_set->size; + + // linear probing + size_t i = h; + while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) { + i = (i + 1) % hash_set->size; + if (i == h) { + // visited all hash table entries -> not found + return GGML_HASHSET_FULL; + } + } + return i; +} + +static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) { + size_t i = ggml_hash_find(hash_set, key); + return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i); +} + +static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { + size_t h = ggml_hash(key) % hash_set->size; + + // linear probing + size_t i = h; + do { + if (!ggml_bitset_get(hash_set->used, i)) { + ggml_bitset_set(hash_set->used, i); + hash_set->keys[i] = key; + return i; + } + if (hash_set->keys[i] == key) { + return GGML_HASHSET_ALREADY_EXISTS; + } + i = (i + 1) % hash_set->size; + } while (i != h); + + // visited all hash table entries -> not found + GGML_ASSERT(false); +} + +static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { + size_t h = ggml_hash(key) % hash_set->size; + + // linear probing + size_t i = h; + do { + if (!ggml_bitset_get(hash_set->used, i)) { + ggml_bitset_set(hash_set->used, i); + hash_set->keys[i] = key; + return i; + } + if (hash_set->keys[i] == key) { + return i; + } + i = (i + 1) % hash_set->size; + } while (i != h); + + // visited all hash table entries -> not found + GGML_ASSERT(false); +} #ifdef __cplusplus } diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 47418597c00d8f..091a55e2cde0b0 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -14623,7 +14623,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte } if (nbytes % ggml_type_size(type) != 0) { - fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type); + fprintf(stderr, "%s: invalid size %zu for type %s (type size = %zu)\n", __func__, nbytes, ggml_type_name(type), ggml_type_size(type)); return false; } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 29afcc7f8978bd..09bc0fa3273f12 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -141,23 +141,25 @@ typedef pthread_t ggml_thread_t; #include -void ggml_print_backtrace(void) { - /* - #include - #include - +#if defined(__linux__) +#include +static void ggml_print_backtrace_symbols(void) { void * trace[100]; - int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0])); - backtrace_symbols_fd(trace, nptrs, STDERR_FILENO); - */ +} +#else +static void ggml_print_backtrace_symbols(void) { + // platform not supported +} +#endif - // backtrack_symbols does not show line numbers, use gdb instead +static void ggml_print_backtrace(void) { char attach[32]; snprintf(attach, sizeof(attach), "attach %d", getpid()); int pid = fork(); if (pid == 0) { + // try gdb execlp("gdb", "gdb", "--batch", "-ex", "set style enabled on", "-ex", attach, @@ -165,16 +167,37 @@ void ggml_print_backtrace(void) { "-ex", "detach", "-ex", "quit", (char *) NULL); + // try lldb + execlp("lldb", "lldb", "--batch", + "-o", "bt", + "-o", "quit", + "-p", attach, + (char *) NULL); + exit(EXIT_FAILURE); } else { - waitpid(pid, NULL, 0); + int wstatus; + waitpid(pid, &wstatus, 0); + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == EXIT_FAILURE) { + // gdb failed, fallback to backtrace_symbols + ggml_print_backtrace_symbols(); + } + } } } #else -void ggml_print_backtrace(void) { +static void ggml_print_backtrace(void) { // platform not supported } #endif +void ggml_abort(const char * file, int line, const char * expr) { + fflush(stdout); + fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", file, line, expr); + ggml_print_backtrace(); + abort(); +} + #define GGML_DEBUG 0 #define GGML_GELU_FP16 #define GGML_GELU_QUICK_FP16 @@ -3372,7 +3395,7 @@ static inline int ggml_up(int n, int m) { } // assert that pointer is aligned to GGML_MEM_ALIGN -#define ggml_assert_aligned(ptr) \ +#define GGML_ASSERT_ALIGNED(ptr) \ GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0) //////////////////////////////////////////////////////////////////////////////// @@ -3473,7 +3496,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { GGML_ASSERT(ctx->mem_buffer != NULL); - ggml_assert_aligned(ctx->mem_buffer); + GGML_ASSERT_ALIGNED(ctx->mem_buffer); GGML_PRINT_DEBUG("%s: context initialized\n", __func__); @@ -3605,7 +3628,7 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml .type = type, }; - ggml_assert_aligned(mem_buffer + obj_new->offs); + GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs); if (obj_cur != NULL) { obj_cur->next = obj_new; @@ -3706,7 +3729,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( #endif // TODO: this should not be needed as long as we don't rely on aligned SIMD loads - //ggml_assert_aligned(result->data); + //GGML_ASSERT_ALIGNED(result->data); for (int i = 0; i < n_dims; i++) { result->ne[i] = ne[i]; @@ -4270,8 +4293,14 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) { } struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) { - strncpy(tensor->name, name, sizeof(tensor->name) - 1); +#ifdef _MSC_VER + strcpy_s(tensor->name, sizeof(tensor->name), name); +#elif defined(__APPLE__) || __GLIBC_PREREQ(2, 38) + strlcpy(tensor->name, name, sizeof(tensor->name)); +#else + strncpy(tensor->name, name, sizeof(tensor->name)); tensor->name[sizeof(tensor->name) - 1] = '\0'; +#endif return tensor; } @@ -16963,7 +16992,25 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm //////////////////////////////////////////////////////////////////////////////// -static size_t ggml_hash_size(size_t min_sz) { +struct ggml_hash_set ggml_hash_set_new(size_t size) { + size = ggml_hash_size(size); + struct ggml_hash_set result; + result.size = size; + result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size); + result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t)); + return result; +} + +void ggml_hash_set_reset(struct ggml_hash_set * hash_set) { + memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size)); +} + +void ggml_hash_set_free(struct ggml_hash_set * hash_set) { + GGML_FREE(hash_set->used); + GGML_FREE(hash_set->keys); +} + +size_t ggml_hash_size(size_t min_sz) { // next primes after powers of two static const size_t primes[] = { 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031, @@ -16974,7 +17021,7 @@ static size_t ggml_hash_size(size_t min_sz) { }; static const size_t n_primes = sizeof(primes)/sizeof(primes[0]); - // find the smallest prime that is larger or equal to min_sz + // find the smallest prime that is larger or equal than min_sz size_t l = 0; size_t r = n_primes; while (l < r) { @@ -16989,67 +17036,6 @@ static size_t ggml_hash_size(size_t min_sz) { return sz; } -static size_t ggml_hash(const void * p) { - return (size_t)p; -} - -size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) { - size_t h = ggml_hash(key) % hash_set.size; - - // linear probing - size_t i = h; - while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) { - i = (i + 1) % hash_set.size; - if (i == h) { - // visited all hash table entries -> not found - return GGML_HASHTABLE_FULL; - } - } - return i; -} - -bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) { - size_t i = ggml_hash_find(hash_set, key); - return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key; -} - -size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) { - size_t i = ggml_hash_find(hash_set, key); - - GGML_ASSERT(i != GGML_HASHTABLE_FULL); - - if (hash_set.keys[i] == key) { - return GGML_HASHTABLE_ALREADY_EXISTS; - } - - // insert - GGML_ASSERT(hash_set.keys[i] == NULL); - hash_set.keys[i] = key; - return i; -} - -size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) { - size_t i = ggml_hash_find(hash_set, key); - - GGML_ASSERT(i != GGML_HASHTABLE_FULL); - - hash_set.keys[i] = key; - return i; -} - -struct ggml_hash_set ggml_hash_set_new(size_t size) { - size = ggml_hash_size(size); - struct ggml_hash_set result; - result.size = size; - result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size); - memset(result.keys, 0, sizeof(struct ggml_tensor *) * size); - return result; -} - -static void ggml_hash_set_free(struct ggml_hash_set hash_set) { - GGML_FREE(hash_set.keys); -} - struct hash_map { struct ggml_hash_set set; struct ggml_tensor ** vals; @@ -17058,13 +17044,12 @@ struct hash_map { static struct hash_map * ggml_new_hash_map(size_t size) { struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map)); result->set = ggml_hash_set_new(size); - result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size); - memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size); + result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *)); return result; } static void ggml_hash_map_free(struct hash_map * map) { - ggml_hash_set_free(map->set); + ggml_hash_set_free(&map->set); GGML_FREE(map->vals); GGML_FREE(map); } @@ -17085,7 +17070,7 @@ static struct ggml_tensor * ggml_recompute_graph_node( return node; } - if (!ggml_hash_contains(graph->visited_hash_table, node)) { + if (!ggml_hash_contains(&graph->visited_hash_set, node)) { return node; } @@ -17100,8 +17085,8 @@ static struct ggml_tensor * ggml_recompute_graph_node( return node; } - size_t i = ggml_hash_find(replacements->set, node); - GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full + size_t i = ggml_hash_find(&replacements->set, node); + GGML_ASSERT(i != GGML_HASHSET_FULL); // assert that not full if (replacements->set.keys[i] == node) { return replacements->vals[i]; } @@ -17159,8 +17144,8 @@ void ggml_build_backward_gradient_checkpointing( // insert checkpoints in replacements for (int i = 0; i < n_checkpoints; ++i) { - size_t k = ggml_hash_find(replacements->set, checkpoints[i]); - GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full + size_t k = ggml_hash_find(&replacements->set, checkpoints[i]); + GGML_ASSERT(k != GGML_HASHSET_FULL); // assert that not full GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite replacements->set.keys[k] = checkpoints[i]; replacements->vals[k] = checkpoints[i]; @@ -17188,7 +17173,7 @@ void ggml_build_backward_gradient_checkpointing( // functions to change gradients considering the case that input a might be initial gradient with zero value -static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) { +static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) { if (ggml_hash_contains(zero_table, a)) { return b; } else { @@ -17196,7 +17181,7 @@ static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct gg } } -static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) { +static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set * zero_table) { if (ggml_hash_contains(zero_table, a)) { struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f); return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false); @@ -17205,7 +17190,7 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg } } -static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) { +static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) { if (ggml_hash_contains(zero_table, a)) { return ggml_repeat(ctx, b, a); } else { @@ -17213,7 +17198,7 @@ static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct g } } -static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) { +static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) { if (ggml_hash_contains(zero_table, a)) { return ggml_neg(ctx, b); } else { @@ -17221,7 +17206,7 @@ static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct gg } } -static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) { +static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set * zero_table) { struct ggml_tensor * src0 = tensor->src[0]; struct ggml_tensor * src1 = tensor->src[1]; struct ggml_tensor * src2 = tensor->src[2]; @@ -18049,7 +18034,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * } // check if already visited - if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) { + if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) { return; } @@ -18131,7 +18116,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size); for (int i = 0; i < gf->n_nodes; i++) { if (gf->grads[i]) { - ggml_hash_insert(zero_table, gf->grads[i]); + ggml_hash_insert(&zero_table, gf->grads[i]); } } @@ -18141,7 +18126,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * // inplace operations to add gradients are not created by ggml_compute_backward // use allocator to automatically make inplace operations if (node->grad) { - ggml_compute_backward(ctx, node, zero_table); + ggml_compute_backward(ctx, node, &zero_table); } } @@ -18154,16 +18139,29 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * } } - ggml_hash_set_free(zero_table); + ggml_hash_set_free(&zero_table); +} + +static void * incr_ptr_aligned(void ** p, size_t size, size_t align) { + void * ptr = *p; + ptr = (void *) GGML_PAD((uintptr_t) ptr, align); + *p = (void *) ((char *) ptr + size); + return ptr; } static size_t ggml_graph_nbytes(size_t size, bool grads) { - size_t nbytes = sizeof(struct ggml_cgraph); - nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes + size_t hash_size = ggml_hash_size(size * 2); + void * p = 0; + incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1); + incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes + incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs + incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys if (grads) { - nbytes += size * sizeof(struct ggml_tensor *); // grads + incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads } - nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set + incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); + + size_t nbytes = (size_t) p; return nbytes; } @@ -18180,19 +18178,19 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); - struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1); - + // the size of the hash table is doubled since it needs to hold both nodes and leafs size_t hash_size = ggml_hash_size(size * 2); - struct ggml_tensor ** nodes_ptr = data_start; - struct ggml_tensor ** leafs_ptr = nodes_ptr + size; - struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size; - struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL; - // check that we allocated the correct amount of memory - assert(obj_size == (size_t) ( - (grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph)); + void * p = cgraph + 1; + + struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); + struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); + struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); + struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; + ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); - memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *)); + // check that we allocated the correct amount of memory + assert(obj_size == (size_t)((char *)p - (char *)cgraph)); *cgraph = (struct ggml_cgraph) { /*.size =*/ size, @@ -18201,10 +18199,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz /*.nodes =*/ nodes_ptr, /*.grads =*/ grads_ptr, /*.leafs =*/ leafs_ptr, - /*.hash_table =*/ { hash_size, hash_keys_ptr }, + /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr }, /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, }; + ggml_hash_set_reset(&cgraph->visited_hash_set); + return cgraph; } @@ -18220,7 +18220,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) /*.nodes =*/ cgraph0->nodes + i0, /*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL, /*.leafs =*/ NULL, - /*.hash_table =*/ { 0, NULL }, + /*.hash_table =*/ { 0, NULL, NULL }, /*.order =*/ cgraph0->order, }; @@ -18230,7 +18230,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { GGML_ASSERT(dst->size >= src->n_leafs); GGML_ASSERT(dst->size >= src->n_nodes); - GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size); + GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size); dst->n_leafs = src->n_leafs; dst->n_nodes = src->n_nodes; @@ -18251,9 +18251,9 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { } } - for (size_t i = 0; i < src->visited_hash_table.size; ++i) { - if (src->visited_hash_table.keys[i]) { - ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]); + for (size_t i = 0; i < src->visited_hash_set.size; ++i) { + if (src->visited_hash_set.keys[i]) { + ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]); } } } @@ -18279,7 +18279,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) { void ggml_graph_clear(struct ggml_cgraph * cgraph) { cgraph->n_leafs = 0; cgraph->n_nodes = 0; - memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *)); + ggml_hash_set_reset(&cgraph->visited_hash_set); } //