Skip to content

Commit

Permalink
feat: Support DBRX model in Llama
Browse files Browse the repository at this point in the history
  • Loading branch information
reneleonhardt committed Apr 21, 2024
1 parent 39679d9 commit b9421ad
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,14 @@ public enum HuggingFaceModel {
LLAMA_3_8B_Q8_0(8, 8, "Meta-Llama-3-8B-Instruct-Q8_0.gguf", "lmstudio-community"),
LLAMA_3_70B_IQ1(70, 1, "Meta-Llama-3-70B-Instruct-IQ1_M.gguf", "lmstudio-community"),
LLAMA_3_70B_IQ2_XS(70, 2, "Meta-Llama-3-70B-Instruct-IQ2_XS.gguf", "lmstudio-community"),
LLAMA_3_70B_Q4_K_M(70, 4, "Meta-Llama-3-70B-Instruct-Q4_K_M.gguf", "lmstudio-community");
LLAMA_3_70B_Q4_K_M(70, 4, "Meta-Llama-3-70B-Instruct-Q4_K_M.gguf", "lmstudio-community"),

DBRX_12B_Q3_K_M(12, 3, "dbrx-16x12b-instruct-q3_k_m-gguf", "phymbert"),
DBRX_12B_Q4_0(12, 4, "dbrx-16x12b-instruct-q4_0-gguf", "phymbert"),
DBRX_12B_Q6_K(12, 6, "dbrx-16x12b-instruct-q6_k-gguf", "phymbert"),
DBRX_12B_Q8_0(12, 8, "dbrx-16x12b-instruct-q8_0-gguf", "phymbert"),
DBRX_12B_Q3_S(12, 3, "dbrx-16x12b-instruct-iq3_s-gguf", "phymbert"),
DBRX_12B_Q3_XXS(12, 3, "dbrx-16x12b-instruct-iq3_xxs-gguf", "phymbert");

private final int parameterSize;
private final int quantization;
Expand Down Expand Up @@ -86,7 +93,8 @@ public String getFileName() {
if ("TheBloke".equals(user)) {
return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
}
return modelName;
// TODO: Download all 10 files ;(
return modelName.toLowerCase().replace("-gguf", "-00001-of-00010.gguf");
}

public URL getFileURL() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,21 @@ public enum LlamaModel {
HuggingFaceModel.LLAMA_3_8B_Q8_0,
HuggingFaceModel.LLAMA_3_70B_IQ1,
HuggingFaceModel.LLAMA_3_70B_IQ2_XS,
HuggingFaceModel.LLAMA_3_70B_Q4_K_M));
HuggingFaceModel.LLAMA_3_70B_Q4_K_M)),
DBRX(
"DBRX",
"DBRX is a Mixture-of-Experts (MoE) model with 132B total parameters and 36B live parameters."
+ "Generation speed is significantly faster than LLaMA2-70B, while at the same time "
+ "beating other open source models, such as, LLaMA2-70B, Mixtral, and Grok-1 on "
+ "language understanding, programming, math, and logic.",
PromptTemplate.CHAT_ML,
List.of(
HuggingFaceModel.DBRX_12B_Q3_K_M,
HuggingFaceModel.DBRX_12B_Q4_0,
HuggingFaceModel.DBRX_12B_Q6_K,
HuggingFaceModel.DBRX_12B_Q8_0,
HuggingFaceModel.DBRX_12B_Q3_S,
HuggingFaceModel.DBRX_12B_Q3_XXS));

private final String label;
private final String description;
Expand Down

0 comments on commit b9421ad

Please sign in to comment.