diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ddcead86f0259..3a498abee2f52c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -344,7 +344,7 @@ if (LLAMA_MPI) if (MPI_C_FOUND) message(STATUS "MPI found") set(GGML_HEADERS_MPI ggml-mpi.h) - set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h) + set(GGML_SOURCES_MPI ggml-mpi.cpp ggml-mpi.h) add_compile_definitions(GGML_USE_MPI) add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS}) if (NOT MSVC) diff --git a/Makefile b/Makefile index 240744ea2c50e4..c61580b7fef65e 100644 --- a/Makefile +++ b/Makefile @@ -502,7 +502,7 @@ ggml-metal.o: ggml-metal.m ggml-metal.h endif # LLAMA_METAL ifdef LLAMA_MPI -ggml-mpi.o: ggml-mpi.c ggml-mpi.h +ggml-mpi.o: ggml-mpi.cpp ggml-mpi.h $(CC) $(CFLAGS) -c $< -o $@ endif # LLAMA_MPI @@ -537,16 +537,16 @@ $(info ) # Build library # -ggml.o: ggml.c ggml.h ggml-cuda.h +ggml.o: ggml.cpp ggml.h ggml-cuda.h $(CXX) $(CXXFLAGS) -c $< -o $@ -ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h +ggml-alloc.o: ggml-alloc.cpp ggml.h ggml-alloc.h $(CXX) $(CXXFLAGS) -c $< -o $@ -ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h +ggml-backend.o: ggml-backend.cpp ggml.h ggml-backend.h $(CXX) $(CXXFLAGS) -c $< -o $@ -ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h +ggml-quants.o: ggml-quants.cpp ggml.h ggml-quants.h $(CXX) $(CXXFLAGS) -c $< -o $@ OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o diff --git a/ggml-quants.cpp b/ggml-quants.cpp index a084f66c9c5860..094fb8ccb6c9c1 100644 --- a/ggml-quants.cpp +++ b/ggml-quants.cpp @@ -6595,8 +6595,8 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * __restrict__ s, const void * __ void ggml_vec_dot_q6_K_q8_K(const int n, float * __restrict__ s, const void * __restrict__ vx, const void * __restrict__ vy) { assert(n % QK_K == 0); - const block_q6_K * __restrict__ x = vx; - const block_q8_K * __restrict__ y = vy; + const block_q6_K * __restrict__ x = (const block_q6_K *)vx; + const block_q8_K * __restrict__ y = (const block_q8_K *)vy; const int nb = n / QK_K; diff --git a/ggml.cpp b/ggml.cpp index 2ccf51fe989b22..f3cb26f952f897 100644 --- a/ggml.cpp +++ b/ggml.cpp @@ -1818,12 +1818,31 @@ struct ggml_context { struct ggml_scratch scratch; struct ggml_scratch scratch_save; + + ggml_context(): + mem_size(0), + mem_buffer(0), + mem_buffer_owned(0), + no_alloc(0), + no_alloc_save(0), + n_objects(0), + objects_begin(0), + objects_end(0), + scratch(), + scratch_save() + { + + } }; struct ggml_context_container { bool used; struct ggml_context context; + + ggml_context_container(): used(0),context(){ + + } }; // @@ -1851,6 +1870,11 @@ struct ggml_numa_nodes { struct ggml_state { struct ggml_context_container contexts[GGML_MAX_CONTEXTS]; struct ggml_numa_nodes numa; + + ggml_state():contexts(), numa() + { + + } }; // global state @@ -2222,12 +2246,13 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { // TODOFIXME // g_state = (struct ggml_state) { - // /*.contexts =*/ { { 0 } }, - // /*.numa =*/ { - // .n_nodes = 0, - // .total_cpus = 0, - // }, - //}; + // struct ggml_context_container contexts[64]; + ///g_state.contexts[0][0] = 0 ; + g_state.numa.n_nodes = 0; + g_state.numa.total_cpus = 0; + + + for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) { g_state.contexts[i].used = false; @@ -2277,18 +2302,19 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); - *ctx = (struct ggml_context) { - /*.mem_size =*/ mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), - /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, - /*.no_alloc =*/ params.no_alloc, - /*.no_alloc_save =*/ params.no_alloc, - /*.n_objects =*/ 0, - /*.objects_begin =*/ NULL, - /*.objects_end =*/ NULL, - /*.scratch =*/ { 0, 0, NULL, }, - /*.scratch_save =*/ { 0, 0, NULL, }, - }; + // FIXME + // *ctx = (struct ggml_context) { + // /*.mem_size =*/ mem_size, + // /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), + // /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, + // /*.no_alloc =*/ params.no_alloc, + // /*.no_alloc_save =*/ params.no_alloc, + // /*.n_objects =*/ 0, + // /*.objects_begin =*/ NULL, + // /*.objects_end =*/ NULL, + // /*.scratch =*/ { 0, 0, NULL, }, + // /*.scratch_save =*/ { 0, 0, NULL, }, + // }; GGML_ASSERT(ctx->mem_buffer != NULL); @@ -2488,7 +2514,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( return NULL; } - data = (char * const) ctx->scratch.data + ctx->scratch.offs; + data = (void*)(((char *)ctx->scratch.data) + ctx->scratch.offs); ctx->scratch.offs += data_size; } else { @@ -2502,29 +2528,29 @@ static struct ggml_tensor * ggml_new_tensor_impl( // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs); - - *result = (struct ggml_tensor) { - /*.type =*/ type, - /*.backend =*/ GGML_BACKEND_CPU, - /*.buffer =*/ NULL, - /*.n_dims =*/ n_dims, - /*.ne =*/ { 1, 1, 1, 1 }, - /*.nb =*/ { 0, 0, 0, 0 }, - /*.op =*/ GGML_OP_NONE, - /*.op_params =*/ { 0 }, - /*.is_param =*/ false, - /*.grad =*/ NULL, - /*.src =*/ { NULL }, - /*.perf_runs =*/ 0, - /*.perf_cycles =*/ 0, - /*.perf_time_us =*/ 0, - /*.view_src =*/ view_src, - /*.view_offs =*/ view_offs, - /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, - /*.name =*/ { 0 }, - /*.extra =*/ NULL, - /*.padding =*/ { 0 }, - }; + // FIXME + // *result = (struct ggml_tensor) { + // /*.type =*/ type, + // /*.backend =*/ GGML_BACKEND_CPU, + // /*.buffer =*/ NULL, + // /*.n_dims =*/ n_dims, + // /*.ne =*/ { 1, 1, 1, 1 }, + // /*.nb =*/ { 0, 0, 0, 0 }, + // /*.op =*/ GGML_OP_NONE, + // /*.op_params =*/ { 0 }, + // /*.is_param =*/ false, + // /*.grad =*/ NULL, + // /*.src =*/ { NULL }, + // /*.perf_runs =*/ 0, + // /*.perf_cycles =*/ 0, + // /*.perf_time_us =*/ 0, + // /*.view_src =*/ view_src, + // /*.view_offs =*/ view_offs, + // /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, + // /*.name =*/ { 0 }, + // /*.extra =*/ NULL, + // /*.padding =*/ { 0 }, + // }; // TODO: this should not be needed as long as we don't rely on aligned SIMD loads //ggml_assert_aligned(result->data); @@ -2643,7 +2669,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) { const int nc = tensor->ne[0]; const size_t n1 = tensor->nb[1]; - char * const data = tensor->data; + char * const data = (char*)tensor->data; switch (tensor->type) { case GGML_TYPE_I8: @@ -2695,7 +2721,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) { const int nc = tensor->ne[0]; const size_t n1 = tensor->nb[1]; - char * const data = tensor->data; + char * const data = (char*)tensor->data; switch (tensor->type) { case GGML_TYPE_I8: @@ -2807,6 +2833,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) { return 0.0f; } +void ggml_tensor_checksum(const struct ggml_tensor * tensor); void ggml_tensor_checksum(const struct ggml_tensor * tensor) { const int64_t ne = ggml_nelements(tensor) ; float fmin=0; @@ -2839,7 +2866,7 @@ void ggml_tensor_checksum(const struct ggml_tensor * tensor) { fmin, fsum, tensor->name, - type_name + std::string(type_name).c_str() ); } @@ -3125,7 +3152,7 @@ struct ggml_tensor * ggml_view_tensor( struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx) { struct ggml_object * obj = ctx->objects_begin; - char * const mem_buffer = ctx->mem_buffer; + char * const mem_buffer = (char*)ctx->mem_buffer; while (obj != NULL) { if (obj->type == GGML_OBJECT_TENSOR) { @@ -3142,7 +3169,7 @@ struct ggml_tensor * ggml_get_next_tensor(struct ggml_context * ctx, struct ggml struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE); obj = obj->next; - char * const mem_buffer = ctx->mem_buffer; + char * const mem_buffer = (char*)ctx->mem_buffer; while (obj != NULL) { if (obj->type == GGML_OBJECT_TENSOR) { @@ -3158,7 +3185,7 @@ struct ggml_tensor * ggml_get_next_tensor(struct ggml_context * ctx, struct ggml struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { struct ggml_object * obj = ctx->objects_begin; - char * const mem_buffer = ctx->mem_buffer; + char * const mem_buffer = (char*)ctx->mem_buffer; while (obj != NULL) { if (obj->type == GGML_OBJECT_TENSOR) { @@ -3354,7 +3381,7 @@ static struct ggml_tensor * ggml_acc_impl( struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; + int32_t params[] = { (int32_t)nb1, (int32_t)nb2, (int32_t)nb3, (int32_t)offset, inplace ? 1 : 0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ACC; @@ -4207,7 +4234,7 @@ static struct ggml_tensor * ggml_set_impl( // make a view of the destination struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; + int32_t params[] = { (int32_t)nb1,(int32_t) nb2, (int32_t)nb3, (int32_t)offset, inplace ? 1 : 0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_SET; @@ -5464,7 +5491,7 @@ struct ggml_tensor * ggml_pool_2d( }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); - int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; + int32_t params[] = { op, k0, k1, s0, s1, (int32_t)p0, (int32_t)p1 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_POOL_2D; @@ -8324,7 +8351,7 @@ static void ggml_compute_forward_repeat_back_f32( GGML_ASSERT(nb00 == sizeof(float)); if (ggml_is_contiguous(dst)) { - ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); + ggml_vec_set_f32(ne0*ne1*ne2*ne3, (float*)dst->data, 0); } else { for (int k3 = 0; k3 < ne3; k3++) { for (int k2 = 0; k2 < ne2; k2++) { @@ -9490,7 +9517,7 @@ static void ggml_compute_forward_mul_mat( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows - fprintf(stderr, "%s: params_type:%d src0:%p ->data %p src1:%p ->data %p\n", __func__, params->type, (void*)src0, src0->data, (void*)src1, src1->data); + fprintf(stderr, "%s: params_type:%d src0:%p ->data %p src1:%p ->data %p\n", __func__, params->type, (const void*)src0, src0->data, (const void*)src1, src1->data); #if defined(GGML_USE_CLBLAST) if (ggml_cl_can_mul_mat(src0, src1, dst)) { @@ -9556,7 +9583,7 @@ static void ggml_compute_forward_mul_mat( if (params->type == GGML_TASK_INIT) { if (src1->type != vec_dot_type) { - char * wdata = params->wdata; + char * wdata = (char*)params->wdata; const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type); for (int64_t i13 = 0; i13 < ne13; ++i13) { @@ -9582,7 +9609,7 @@ static void ggml_compute_forward_mul_mat( const int64_t nr0 = ne01; // src0 rows const int64_t nr1 = ne11*ne12*ne13; // src1 rows - printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1); + printf("nr0 = %ld, nr1 = %ld\n", nr0, nr1); // distribute the thread work across the inner or outer loop based on which one is larger @@ -9601,7 +9628,7 @@ static void ggml_compute_forward_mul_mat( const int64_t ir110 = dr1*ith1; const int64_t ir111 = MIN(ir110 + dr1, nr1); - printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111); + printf("ir010 = %6ld, ir011 = %6ld, ir110 = %6ld, ir111 = %6ld\n", ir010, ir011, ir110, ir111); // threads with no work simply yield (not sure if it helps) if (ir010 >= ir011 || ir110 >= ir111) { @@ -9710,7 +9737,7 @@ static void ggml_compute_forward_out_prod_f32( return; } #endif - ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); + ggml_vec_set_f32(ne0*ne1*ne2*ne3, (float*)dst->data, 0); return; } @@ -9893,7 +9920,7 @@ static void ggml_compute_forward_out_prod_q_f32( // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) if (params->type == GGML_TASK_INIT) { - ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); + ggml_vec_set_f32(ne0*ne1*ne2*ne3, (float*)dst->data, 0); return; } @@ -11907,7 +11934,7 @@ static void ggml_compute_forward_pool_1d( struct ggml_tensor * dst) { const int32_t * opts = (const int32_t *)dst->op_params; - enum ggml_op_pool op = opts[0]; + enum ggml_op_pool op = (ggml_op_pool)opts[0]; const int k0 = opts[1]; const int s0 = opts[2]; const int p0 = opts[3]; @@ -11931,7 +11958,7 @@ static void ggml_compute_forward_pool_2d( } const int32_t * opts = (const int32_t *)dst->op_params; - enum ggml_op_pool op = opts[0]; + enum ggml_op_pool op = (ggml_op_pool)opts[0]; const int k0 = opts[1]; const int k1 = opts[2]; const int s0 = opts[3]; @@ -13866,50 +13893,50 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm return; } - float fmin1=0; - float ffirst1=0; - float fmax1=0; - float fsum1=0; + // float fmin1=0; + // // float ffirst1=0; + // float fmax1=0; + // float fsum1=0; - float fmin0=0; - float ffirst0=0; - float fmax0=0; - float fsum0=0; + // float fmin0=0; + // float ffirst0=0; + // float fmax0=0; + // float fsum0=0; - float fmin2=0; - float ffirst2=0; - float fmax2=0; - float fsum2=0; + // float fmin2=0; + // float ffirst2=0; + // float fmax2=0; + // float fsum2=0; - int64_t elem_src = ggml_nelements(tensor->src[0]); - int64_t elem_src1 = 0; //ggml_nelements(tensor->src[1]); + // int64_t elem_src = ggml_nelements(tensor->src[0]); + // int64_t elem_src1 = 0; //ggml_nelements(tensor->src[1]); - if (tensor->src[0]) { - const size_t size = ggml_nbytes(tensor->src[0])/sizeof(float); - for (int i = 0; i src[0]->data))+i); - } - } + // if (tensor->src[0]) { + // const size_t size = ggml_nbytes(tensor->src[0])/sizeof(float); + // for (size_t i = 0; i src[0]->data))+i); + // } + // } - if (tensor->src[1]) { - elem_src1 = ggml_nelements(tensor->src[1]); - const size_t size = ggml_nbytes(tensor->src[1])/sizeof(float); - for (int i = 0; i src[1]->data))+i); - if (i ==0) { - ffirst1 = f; - fmin1 = f; - fmax1 = f; - } - fsum1 += f; - if (f < fmin1){ - fmin1 = f; - } - if (f >fmax1){ - fmax1 = f; - } - } - } + // if (tensor->src[1]) { + // elem_src1 = ggml_nelements(tensor->src[1]); + // const size_t size = ggml_nbytes(tensor->src[1])/sizeof(float); + // for (size_t i = 0; i src[1]->data))+i); + // if (i ==0) { + // ffirst1 = f; + // fmin1 = f; + // fmax1 = f; + // } + // fsum1 += f; + // if (f < fmin1){ + // fmin1 = f; + // } + // if (f >fmax1){ + // fmax1 = f; + // } + // } + //} #ifdef GGML_USE_CUBLAS bool skip_cpu = ggml_cuda_compute_forward(params, tensor); @@ -14271,25 +14298,25 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } // now report - int64_t elem_dst = ggml_nelements(tensor); + // int64_t elem_dst = ggml_nelements(tensor); - const size_t size = ggml_nbytes(tensor)/sizeof(float); + // const size_t size = ggml_nbytes(tensor)/sizeof(float); - for (int i = 0; i data))+i); - if (i ==0) { - ffirst2 = f; - fmin2 = f; - fmax2 = f; - } - fsum2 += f; - if (f < fmin2){ - fmin2 = f; - } - if (f >fmax2){ - fmax2 = f; - } - } + // for (size_t i = 0; i data))+i); + // if (i ==0) { + // ffirst2 = f; + // fmin2 = f; + // fmax2 = f; + // } + // fsum2 += f; + // if (f < fmin2){ + // fmin2 = f; + // } + // if (f >fmax2){ + // fmax2 = f; + // } + // } if (tensor->src[1]) { ggml_tensor_checksum(tensor->src[0]); @@ -14427,7 +14454,7 @@ static struct ggml_hash_set ggml_hash_set_new(size_t size) { size = ggml_hash_size(size); struct ggml_hash_set result; result.size = size; - result.keys = malloc(sizeof(struct ggml_tensor *) * size); + result.keys = (ggml_tensor **)malloc(sizeof(struct ggml_tensor *) * size); memset(result.keys, 0, sizeof(struct ggml_tensor *) * size); return result; } @@ -14442,9 +14469,9 @@ struct hash_map { }; static struct hash_map * ggml_new_hash_map(size_t size) { - struct hash_map * result = malloc(sizeof(struct hash_map)); + struct hash_map * result = (hash_map *)malloc(sizeof(struct hash_map)); result->set = ggml_hash_set_new(size); - result->vals = malloc(sizeof(struct ggml_tensor *) * result->set.size); + result->vals = (ggml_tensor **)malloc(sizeof(struct ggml_tensor *) * result->set.size); memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size); return result; } @@ -15564,20 +15591,20 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz (grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph)); memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *)); - - *cgraph = (struct ggml_cgraph) { - /*.size =*/ size, - /*.n_nodes =*/ 0, - /*.n_leafs =*/ 0, - /*.nodes =*/ nodes_ptr, - /*.grads =*/ grads_ptr, - /*.leafs =*/ leafs_ptr, - /*.hash_table =*/ { hash_size, hash_keys_ptr }, - /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, - /*.perf_runs =*/ 0, - /*.perf_cycles =*/ 0, - /*.perf_time_us =*/ 0, - }; + // FIXME + // *cgraph = (struct ggml_cgraph) { + // /*.size =*/ size, + // /*.n_nodes =*/ 0, + // /*.n_leafs =*/ 0, + // /*.nodes =*/ nodes_ptr, + // /*.grads =*/ grads_ptr, + // /*.leafs =*/ leafs_ptr, + // /*.hash_table =*/ { hash_size, hash_keys_ptr }, + // /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, + // /*.perf_runs =*/ 0, + // /*.perf_cycles =*/ 0, + // /*.perf_time_us =*/ 0, + // }; return cgraph; } @@ -15591,19 +15618,19 @@ struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgra struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); - *cgraph = (struct ggml_cgraph) { - /*.size =*/ 0, - /*.n_nodes =*/ i1 - i0, - /*.n_leafs =*/ 0, - /*.nodes =*/ cgraph0->nodes + i0, - /*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL, - /*.leafs =*/ NULL, - /*.hash_table =*/ { 0, NULL }, - /*.order =*/ cgraph0->order, - /*.perf_runs =*/ 0, - /*.perf_cycles =*/ 0, - /*.perf_time_us =*/ 0, - }; + // *cgraph = (struct ggml_cgraph) { + // /*.size =*/ 0, + // /*.n_nodes =*/ i1 - i0, + // /*.n_leafs =*/ 0, + // /*.nodes =*/ cgraph0->nodes + i0, + // /*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL, + // /*.leafs =*/ NULL, + // /*.hash_table =*/ { 0, NULL }, + // /*.order =*/ cgraph0->order, + // /*.perf_runs =*/ 0, + // /*.perf_cycles =*/ 0, + // /*.perf_time_us =*/ 0, + // }; return cgraph; } @@ -16368,11 +16395,12 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { // create thread pool if (n_threads > 1) { for (int j = 1; j < n_threads; ++j) { - workers[j] = (struct ggml_compute_state) { - .thrd = 0, - .ith = j, - .shared = &state_shared, - }; + // FIXME + // workers[j] = (struct ggml_compute_state) { + // .thrd = 0, + // .ith = j, + // .shared = &state_shared, + // }; const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]); GGML_ASSERT(rc == 0); @@ -16692,11 +16720,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * { const size_t overhead = 1*ggml_tensor_overhead(); - struct ggml_init_params params = { - .mem_size = fsize + overhead, - .mem_buffer = NULL, - .no_alloc = false, - }; + // FIXME + struct ggml_init_params params;// = { + // .mem_size = fsize + overhead, + // .mem_buffer = NULL, + // .no_alloc = false, + // }; *ctx_data = ggml_init(params); @@ -16748,11 +16777,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * { const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false); - struct ggml_init_params params = { - .mem_size = size_eval + overhead, - .mem_buffer = NULL, - .no_alloc = true, - }; + // FIXME + struct ggml_init_params params;// = { + // .mem_size = size_eval + overhead, + // .mem_buffer = NULL, + // .no_alloc = true, + // }; *ctx_eval = ggml_init(params); @@ -16960,7 +16990,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { continue; } - GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0); + GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name((ggml_op)i), (double) perf_total_per_op_us[i] / 1000.0); } GGML_PRINT("========================================\n"); @@ -17233,10 +17263,10 @@ static enum ggml_opt_result ggml_opt_adam( const float accum_norm = 1.0f / (float) n_accum; float * g = (float*)opt->adam.g->data; // gradients - float * m = opt->adam.m->data; // first moment - float * v = opt->adam.v->data; // second moment + float * m = (float*)opt->adam.m->data; // first moment + float * v = (float*)opt->adam.v->data; // second moment - float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values + float * pf = params.past > 0 ? (float *)opt->adam.pf->data : NULL; // past function values struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads); struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size); @@ -17504,7 +17534,7 @@ static enum ggml_opt_result linesearch_backtracking( } else { // Armijo condition is satisfied if (params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_ARMIJO) { - return count; + return (ggml_opt_result)count; } ggml_vec_dot_f32(nx, &dg, g, d); @@ -17522,7 +17552,7 @@ static enum ggml_opt_result linesearch_backtracking( width = dec; } else { // strong Wolfe condition (GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) - return count; + return (ggml_opt_result)count; } } } @@ -17587,11 +17617,11 @@ static enum ggml_opt_result ggml_opt_lbfgs( struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size); cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs; - float * x = opt->lbfgs.x->data; // current parameters - float * xp = opt->lbfgs.xp->data; // previous parameters - float * g = opt->lbfgs.g->data; // current gradient - float * gp = opt->lbfgs.gp->data; // previous gradient - float * d = opt->lbfgs.d->data; // search direction + float * x = (float*)opt->lbfgs.x->data; // current parameters + float * xp = (float*)opt->lbfgs.xp->data; // previous parameters + float * g = (float*)opt->lbfgs.g->data; // current gradient + float * gp = (float*)opt->lbfgs.gp->data; // previous gradient + float * d = (float*)opt->lbfgs.d->data; // search direction float * pf = params.past > 0 ? opt->lbfgs.pf->data : NULL; // past function values @@ -17606,10 +17636,10 @@ static enum ggml_opt_result ggml_opt_lbfgs( ggml_opt_get_params(np, ps, x); // the L-BFGS memory - float * lm_alpha = opt->lbfgs.lmal->data; - float * lm_ys = opt->lbfgs.lmys->data; - float * lm_s = opt->lbfgs.lms->data; - float * lm_y = opt->lbfgs.lmy->data; + float * lm_alpha = (float*)opt->lbfgs.lmal->data; + float * lm_ys = (float*)opt->lbfgs.lmys->data; + float * lm_s = (float*)opt->lbfgs.lms->data; + float * lm_y = (float*)opt->lbfgs.lmy->data; bool cancel = false; @@ -17706,7 +17736,7 @@ static enum ggml_opt_result ggml_opt_lbfgs( ggml_vec_cpy_f32(nx, x, xp); ggml_vec_cpy_f32(nx, g, gp); - return ls; + return (ggml_opt_result)ls; } opt->loss_after = fx; @@ -17817,34 +17847,35 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) { switch (type) { case GGML_OPT_ADAM: { - result = (struct ggml_opt_params) { - .type = GGML_OPT_ADAM, - .graph_size = GGML_DEFAULT_GRAPH_SIZE, - .n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ? - .past = 0, - .delta = 1e-5f, - - .max_no_improvement = 100, - - .print_forward_graph = true, - .print_backward_graph = true, - - .n_gradient_accumulation = 1, - - .adam = { - .n_iter = 10000, - .sched = 1.000f, - .decay = 0.0f, - .decay_min_ndim = 2, - .alpha = 0.001f, - .beta1 = 0.9f, - .beta2 = 0.999f, - .eps = 1e-8f, - .eps_f = 1e-5f, - .eps_g = 1e-3f, - .gclip = 0.0f, - }, - }; + // FIXME + // result = (struct ggml_opt_params) { + // .type = GGML_OPT_ADAM, + // .graph_size = GGML_DEFAULT_GRAPH_SIZE, + // .n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ? + // .past = 0, + // .delta = 1e-5f, + + // .max_no_improvement = 100, + + // .print_forward_graph = true, + // .print_backward_graph = true, + + // .n_gradient_accumulation = 1, + + // .adam = { + // .n_iter = 10000, + // .sched = 1.000f, + // .decay = 0.0f, + // .decay_min_ndim = 2, + // .alpha = 0.001f, + // .beta1 = 0.9f, + // .beta2 = 0.999f, + // .eps = 1e-8f, + // .eps_f = 1e-5f, + // .eps_g = 1e-3f, + // .gclip = 0.0f, + // }, + // }; } break; case GGML_OPT_LBFGS: break; @@ -17964,11 +17995,11 @@ enum ggml_opt_result ggml_opt( struct ggml_tensor * f) { bool free_ctx = false; if (ctx == NULL) { - struct ggml_init_params params_ctx = { - .mem_size = 16*1024*1024, - .mem_buffer = NULL, - .no_alloc = false, - }; + struct ggml_init_params params_ctx;// = { + // .mem_size = 16*1024*1024, + // .mem_buffer = NULL, + // .no_alloc = false, + // }; ctx = ggml_init(params_ctx); if (ctx == NULL) { @@ -18615,11 +18646,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p (ctx->header.n_tensors )*ggml_tensor_overhead() : (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size; - struct ggml_init_params pdata = { - .mem_size = mem_size, - .mem_buffer = NULL, - .no_alloc = params.no_alloc, - }; + // FIXME + struct ggml_init_params pdata; + // = { + // .mem_size = mem_size, + // .mem_buffer = NULL, + // .no_alloc = params.no_alloc, + // }; *params.ctx = ggml_init(pdata); diff --git a/ggml.h b/ggml.h index f2fce0f22d357a..6d625fd6773560 100644 --- a/ggml.h +++ b/ggml.h @@ -571,6 +571,12 @@ extern "C" { size_t offs; size_t size; void * data; + + ggml_scratch() + : offs(0), + size(0), + data(0) + {} }; struct ggml_init_params {