Skip to content

Commit

Permalink
ggml : reduce hash table reset cost
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Jul 26, 2024
1 parent 01aec4a commit 7d5a8c1
Show file tree
Hide file tree
Showing 7 changed files with 371 additions and 263 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,9 @@ ifdef LLAMA_DEBUG
endif
else
MK_CPPFLAGS += -DNDEBUG
MK_CFLAGS += -O3
MK_CXXFLAGS += -O3
MK_NVCCFLAGS += -O3
MK_CFLAGS += -O3 -g
MK_CXXFLAGS += -O3 -g
MK_NVCCFLAGS += -O3 -g
endif

ifdef LLAMA_SANITIZE_THREAD
Expand Down
35 changes: 19 additions & 16 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,18 +254,8 @@

#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))

#define GGML_ASSERT(x) \
do { \
if (!(x)) { \
fflush(stdout); \
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
ggml_print_backtrace(); \
abort(); \
} \
} while (0)

#ifndef NDEBUG
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#elif defined(__GNUC__)
#define GGML_UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER)
Expand All @@ -274,6 +264,16 @@
#define GGML_UNREACHABLE() ((void) 0)
#endif

#ifdef __cplusplus
#define GGML_NORETURN [[noreturn]]
#elif defined(_MSC_VER)
#define GGML_NORETURN __declspec(noreturn)
#else
#define GGML_NORETURN _Noreturn
#endif

#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x)

// used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability.
//
Expand Down Expand Up @@ -322,6 +322,8 @@
extern "C" {
#endif

GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr);

enum ggml_status {
GGML_STATUS_ALLOC_FAILED = -2,
GGML_STATUS_FAILED = -1,
Expand Down Expand Up @@ -636,8 +638,11 @@ extern "C" {
GGML_CGRAPH_EVAL_ORDER_COUNT
};

typedef uint32_t ggml_bitset_t;

struct ggml_hash_set {
size_t size;
ggml_bitset_t * used;
struct ggml_tensor ** keys;
};

Expand All @@ -651,7 +656,7 @@ extern "C" {
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;

struct ggml_hash_set visited_hash_table;
struct ggml_hash_set visited_hash_set;

enum ggml_cgraph_eval_order order;
};
Expand Down Expand Up @@ -698,8 +703,6 @@ extern "C" {
GGML_API int64_t ggml_cycles(void);
GGML_API int64_t ggml_cycles_per_ms(void);

GGML_API void ggml_print_backtrace(void);

// accepts a UTF-8 path, even on Windows
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);

Expand Down Expand Up @@ -2005,8 +2008,8 @@ extern "C" {

// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
Expand Down
30 changes: 14 additions & 16 deletions ggml/src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
}
}

free(galloc->hash_set.keys);
ggml_hash_set_free(&galloc->hash_set);
free(galloc->hash_values);
free(galloc->bufts);
free(galloc->buffers);
Expand All @@ -456,7 +456,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
typedef struct ggml_gallocr * ggml_gallocr_t;

static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
size_t i = ggml_hash_find_or_insert(galloc->hash_set, t);
size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t);
return &galloc->hash_values[i];
}

Expand Down Expand Up @@ -565,8 +565,8 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {

static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
// clear hash tables
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
ggml_hash_set_reset(&galloc->hash_set);
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);

// allocate leafs
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
Expand Down Expand Up @@ -671,21 +671,19 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
}

bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
size_t hash_size = graph->visited_hash_table.size;
size_t min_hash_size = graph->n_nodes + graph->n_leafs;
// add 25% margin to avoid hash collisions
min_hash_size += min_hash_size / 4;

// initialize hash table
if (galloc->hash_set.size < hash_size) {
free(galloc->hash_set.keys);
free(galloc->hash_values);
galloc->hash_set.size = hash_size;
galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
if (galloc->hash_set.size < min_hash_size) {
ggml_hash_set_free(&galloc->hash_set);
galloc->hash_set = ggml_hash_set_new(min_hash_size);
GGML_ASSERT(galloc->hash_set.keys != NULL);

free(galloc->hash_values);
galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size);
GGML_ASSERT(galloc->hash_values != NULL);
} else {
// reset hash table
memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size);
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
}

// reset allocators
Expand Down Expand Up @@ -817,7 +815,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
}

static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL;
ggml_backend_buffer_type_t buft = galloc->bufts[talloc->buffer_id];
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
return talloc->size_max >= node_size;
}
Expand Down
Loading

0 comments on commit 7d5a8c1

Please sign in to comment.