Skip to content

Commit

Permalink
feat: Support DBRX model in Llama
Browse files Browse the repository at this point in the history
  • Loading branch information
reneleonhardt committed Apr 15, 2024
1 parent 5f16213 commit 27b9d62
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,29 @@ public enum HuggingFaceModel {
WIZARD_CODER_PYTHON_13B_Q5(13, 5, "WizardCoder-Python-13B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_34B_Q3(34, 3, "WizardCoder-Python-34B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_34B_Q4(34, 4, "WizardCoder-Python-34B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF");
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF"),

DBRX_12B_Q3_K_M(12, 3, "dbrx-16x12b-instruct-q3_k_m-gguf", "phymbert"),
DBRX_12B_Q4_0(12, 4, "dbrx-16x12b-instruct-q4_0-gguf", "phymbert"),
DBRX_12B_Q6_K(12, 6, "dbrx-16x12b-instruct-q6_k-gguf", "phymbert"),
DBRX_12B_Q8_0(12, 8, "dbrx-16x12b-instruct-q8_0-gguf", "phymbert"),
DBRX_12B_Q3_S(12, 3, "dbrx-16x12b-instruct-iq3_s-gguf", "phymbert"),
DBRX_12B_Q3_XXS(12, 3, "dbrx-16x12b-instruct-iq3_xxs-gguf", "phymbert");

private final int parameterSize;
private final int quantization;
private final String modelName;
private final String user;

HuggingFaceModel(int parameterSize, int quantization, String modelName) {
this(parameterSize, quantization, modelName, "TheBloke");
}

HuggingFaceModel(int parameterSize, int quantization, String modelName, String user) {
this.parameterSize = parameterSize;
this.quantization = quantization;
this.modelName = modelName;
this.user = user;
}

public int getParameterSize() {
Expand All @@ -68,21 +81,25 @@ public String getCode() {
}

public String getFileName() {
return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
if ("TheBloke".equals(user)) {
return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
}
// TODO: Download all 10 files ;(
return modelName.toLowerCase().replace("-gguf", "-00001-of-00010.gguf");
}

public URL getFileURL() {
try {
return new URL(
format("https://huggingface.co/TheBloke/%s/resolve/main/%s", modelName, getFileName()));
"https://huggingface.co/%s/%s/resolve/main/%s".formatted(user, modelName, getFileName()));
} catch (MalformedURLException ex) {
throw new RuntimeException(ex);
}
}

public URL getHuggingFaceURL() {
try {
return new URL("https://huggingface.co/TheBloke/" + modelName);
return new URL("https://huggingface.co/%s/%s".formatted(user, modelName));
} catch (MalformedURLException ex) {
throw new RuntimeException(ex);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,21 @@ public enum LlamaModel {
HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q5,
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q3,
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q4,
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5));
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5)),
DBRX(
"DBRX",
"DBRX is a Mixture-of-Experts (MoE) model with 132B total parameters and 36B live parameters."
+ "Generation speed is significantly faster than LLaMA2-70B, while at the same time "
+ "beating other open source models, such as, LLaMA2-70B, Mixtral, and Grok-1 on "
+ "language understanding, programming, math, and logic.",
PromptTemplate.LLAMA,
List.of(
HuggingFaceModel.DBRX_12B_Q3_K_M,
HuggingFaceModel.DBRX_12B_Q4_0,
HuggingFaceModel.DBRX_12B_Q6_K,
HuggingFaceModel.DBRX_12B_Q8_0,
HuggingFaceModel.DBRX_12B_Q3_S,
HuggingFaceModel.DBRX_12B_Q3_XXS));

private final String label;
private final String description;
Expand Down

0 comments on commit 27b9d62

Please sign in to comment.