Skip to content

Commit

Permalink
Add support for Radeon VII
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonix committed Jul 11, 2024
1 parent fdb47ca commit b47f854
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ endif

# AMD flags
ROCM_PATH ?= /opt/rocm
AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-offload-arch -a)
AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
HIPCC := $(shell which hipcc 2>/dev/null)
HIPIFY := $(shell which hipify-perl 2>/dev/null)
HIPCC_FLAGS = -O3 -march=native -I$(BUILD_DIR)/hip -fno-strict-aliasing
Expand All @@ -69,6 +69,10 @@ ifneq ($(filter gfx1100,$(AMDGPU_TARGETS)),)
USE_HIPBLAS ?= 1
USE_CK ?= 1
AMDGPU_TARGETS := gfx1100
else ifneq ($(filter gfx906,$(AMDGPU_TARGETS)),)
WAVEFRONTSIZE64 ?= 1
USE_HIPBLAS ?= 1
AMDGPU_TARGETS := gfx906
else ifneq ($(filter gfx90a,$(AMDGPU_TARGETS)),)
WAVEFRONTSIZE64 ?= 1
BUILD_XDL ?= 1
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# llm.c for AMD devices
This is a fork of [Andrej Karpathy's llm.c](https://github.com/karpathy/llm.c) with support for AMD's RDNA and CDNA devices.
This is a fork of [Andrej Karpathy's llm.c](https://github.com/karpathy/llm.c) with support for AMD devices.

It has been tested on Radeon VII (aka gfx906), MI250X (aka gfx90a), and 7900 XTX (aka gfx1100).

## Performance

Expand Down
2 changes: 2 additions & 0 deletions llmc/mfu.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ static const PerfData HOPPER = {378.f, 756.f, 756.f, 756.f, 1513.f, 1513.f, 1620
static const PerfData ADA = {82.6f, 165.2f, 165.2f, 330.3f, 330.3f, 660.6f, 2520.f, 512.f};
static const PerfData RDNA3 = {61.42f, 122.8f, 122.8f, -1.f, -1.f, -1.f, 2500.f, 384.f};
static const PerfData CDNA2 = {95.7f, 383.0f, 383.0f, -1.f, -1.f, -1.f, 1690.f, 208.f};
static const PerfData GCN5 = {10.75f, 10.75f, 21.5f, -1.f, -1.f, -1.f, 1750.f, 60.f};

typedef struct {
const char* name;
Expand Down Expand Up @@ -79,6 +80,7 @@ static GPUEntry gpu_db[] = {
{"NVIDIA H100 80GB HBM3", &HOPPER, 528, 1830}, // HBM3 = SXM5
{"Radeon RX 7900 XTX", &RDNA3, 384, 2500},
{"AMD Instinct MI250X/MI250", &CDNA2, 208, 1690},
{"AMD Radeon VII", &GCN5, 60, 1750},
};

float get_flops_promised(const char* device, int precision_mode) {
Expand Down

0 comments on commit b47f854

Please sign in to comment.