[feature] load fake model weight.

intel · Nov 13, 2023 · 2d0f4dc · 2d0f4dc
1 parent c294e7c
commit 2d0f4dc
Show file tree

Hide file tree

Showing 21 changed files with 203 additions and 0 deletions.
diff --git a/examples/model_config/baichuan2-13b/config.ini b/examples/model_config/baichuan2-13b/config.ini
@@ -0,0 +1,17 @@
+[baichuan]
+model_name = /data/models/Baichuan2-13B-Chat
+head_num = 40
+size_per_head = 128
+inter_size = 13696
+max_pos_seq_len = 0
+model_max_length = 4096
+num_layer = 40
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 125696
+start_id = 1
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/baichuan2-13b/tokenizer.model b/examples/model_config/baichuan2-13b/tokenizer.model
diff --git a/examples/model_config/baichuan2-7b/config.ini b/examples/model_config/baichuan2-7b/config.ini
@@ -0,0 +1,16 @@
+[baichuan]
+model_name = /data/models/Baichuan2-7B-Chat
+head_num = 32
+size_per_head = 128
+inter_size = 11008
+max_pos_seq_len = 4096
+model_max_length = 4096
+num_layer = 32
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 125696
+start_id = 1
+end_id = 2
+weight_data_type = fp16
diff --git a/examples/model_config/baichuan2-7b/tokenizer.model b/examples/model_config/baichuan2-7b/tokenizer.model
diff --git a/examples/model_config/chatglm-6b/config.ini b/examples/model_config/chatglm-6b/config.ini
@@ -0,0 +1,16 @@
+[chatglm]
+model_name = /data/models/chatglm-6b-hf/
+head_num = 32
+size_per_head = 128
+inter_size = 16384
+max_pos_seq_len = 2048
+num_layer = 28
+layernorm_eps = 1e-5
+layernorm_type = pre_layernorm
+activation_type = Gelu
+has_post_decoder_layernorm = 0
+vocab_size = 130528
+start_id = 130004
+end_id = 130005
+weight_data_type = fp16
+
diff --git a/examples/model_config/chatglm-6b/tokenizer.model b/examples/model_config/chatglm-6b/tokenizer.model
diff --git a/examples/model_config/chatglm2-6b/config.ini b/examples/model_config/chatglm2-6b/config.ini
@@ -0,0 +1,22 @@
+[chatglm2]
+model_name = /data/chatglm2-6b-hf/
+head_num = 32
+size_per_head = 128
+inter_size = 13696
+max_pos_seq_len = 32768
+num_layer = 28
+layernorm_eps = 1e-05
+layernorm_type = pre_layernorm
+activation_type = swiglu
+has_post_decoder_layernorm = 1
+vocab_size = 65024
+start_id = None
+end_id = 2
+weight_data_type = fp16
+kv_channels = 128
+rmsnorm = 1
+apply_residual_connection_post_layernorm = 0
+multi_query_attention = 1
+kv_head_num = 2
+pad_id = None
+
diff --git a/examples/model_config/chatglm2-6b/tokenizer.model b/examples/model_config/chatglm2-6b/tokenizer.model
diff --git a/examples/model_config/llama-13b/config.ini b/examples/model_config/llama-13b/config.ini
@@ -0,0 +1,16 @@
+[llama]
+model_name = /data/Chinese-Alpaca-13B-HF
+head_num = 40
+size_per_head = 128
+inter_size = 13824
+max_pos_seq_len = 2048
+num_layer = 40
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 49954
+start_id = 1
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/llama-13b/tokenizer.model b/examples/model_config/llama-13b/tokenizer.model
diff --git a/examples/model_config/llama-2-13b/config.ini b/examples/model_config/llama-2-13b/config.ini
@@ -0,0 +1,16 @@
+[llama]
+model_name = /data/models/llama-2-13b-chat-hf/
+head_num = 40
+size_per_head = 128
+inter_size = 13824
+max_pos_seq_len = 2048
+num_layer = 40
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 32000
+start_id = 1
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/llama-2-70b/config.ini b/examples/model_config/llama-2-70b/config.ini
@@ -0,0 +1,17 @@
+[llama]
+model_name = /data/models/llama-2-70b-chat-hf/
+head_num = 64
+kv_head_num = 8
+size_per_head = 128
+inter_size = 28672
+max_pos_seq_len = 2048
+num_layer = 80
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 32000
+start_id = 1
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/llama-2-70b/tokenizer.model b/examples/model_config/llama-2-70b/tokenizer.model
diff --git a/examples/model_config/llama-2-7b/config.ini b/examples/model_config/llama-2-7b/config.ini
@@ -0,0 +1,16 @@
+[llama]
+model_name = /data/Llama-2-7b-chat-hf/
+head_num = 32
+size_per_head = 128
+inter_size = 11008
+max_pos_seq_len = 4096
+num_layer = 32
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 32000
+start_id = 1
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/llama-2-7b/tokenizer.model b/examples/model_config/llama-2-7b/tokenizer.model
diff --git a/examples/model_config/llama-7b/config.ini b/examples/model_config/llama-7b/config.ini
@@ -0,0 +1,16 @@
+[llama]
+model_name = /data/models/llama-7b/
+head_num = 32
+size_per_head = 128
+inter_size = 11008
+max_pos_seq_len = 2048
+num_layer = 32
+rms_norm_eps = 1e-6
+layernorm_type = pre_layernorm
+activation_type = silu
+has_post_decoder_layernorm = 1
+vocab_size = 32000
+start_id = 1
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/llama-7b/tokenizer.model b/examples/model_config/llama-7b/tokenizer.model
diff --git a/examples/model_config/opt-30b/config.ini b/examples/model_config/opt-30b/config.ini
@@ -0,0 +1,15 @@
+[gpt]
+model_name = /data/models/opt-30b/
+head_num = 56
+size_per_head = 128
+inter_size = 28672
+max_pos_seq_len = 2048
+num_layer = 48
+layernorm_eps = 1e-5
+layernorm_type = pre_layernorm
+activation_type = Relu
+has_post_decoder_layernorm = 1
+vocab_size = 50272
+start_id = 2
+end_id = 2
+weight_data_type = fp16
diff --git a/examples/model_config/opt-6.7b/config.ini b/examples/model_config/opt-6.7b/config.ini
@@ -0,0 +1,16 @@
+[gpt]
+model_name = /mnt/home/opt-6.7b/
+head_num = 32
+size_per_head = 128
+inter_size = 16384
+max_pos_seq_len = 2048
+num_layer = 32
+layernorm_eps = 1e-5
+layernorm_type = pre_layernorm
+activation_type = Relu
+has_post_decoder_layernorm = 1
+vocab_size = 50272
+start_id = 2
+end_id = 2
+weight_data_type = fp16
+
diff --git a/examples/model_config/opt-66b/config.ini b/examples/model_config/opt-66b/config.ini
@@ -0,0 +1,15 @@
+[gpt]
+model_name = /data/models/opt-66b/
+head_num = 72
+size_per_head = 128
+inter_size = 36864
+max_pos_seq_len = 2048
+num_layer = 64
+layernorm_eps = 1e-5
+layernorm_type = pre_layernorm
+activation_type = Relu
+has_post_decoder_layernorm = 1
+vocab_size = 50272
+start_id = 2
+end_id = 2
+weight_data_type = fp16
diff --git a/src/utils/weight_util.h b/src/utils/weight_util.h
@@ -55,6 +55,11 @@ int readFile(const std::string &path, T *values, int size) {
     int count = 0;
     int nthreads = std::min(omp_get_max_threads(), 16);
     int chunk_size = (size + nthreads - 1) / nthreads;
+    int enable = (getenv("XFT_FAKE_MODEL") ? atoi(getenv("XFT_FAKE_MODEL")) : 0);
+    if (enable) {
+        printf("Loading fake model file %s.\n", path.c_str());
+        return size;
+    }
 
     {
         std::ifstream file(path, std::ios::binary);