Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonix committed Jul 10, 2024
2 parents 72ce690 + db2454f commit d680fbc
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 42 deletions.
36 changes: 18 additions & 18 deletions test_gpt2.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ int check_tensor(float *a, float *b, int n, const char* label) {
int print_upto = 5;
int ok = 1;
float maxdiff = 0.0f;
float tol = 2e-2;
float tol = 2e-2f;
printf("%s\n", label);
for (int i = 0; i < n; i++) {
// look at the diffence at position i of these two tensors
Expand Down Expand Up @@ -52,7 +52,7 @@ int main(int argc, char *argv[]) {
FILE *state_file = fopen("gpt2_124M_debug_state.bin", "rb");
if (state_file == NULL) { printf("Error opening state file\n"); return 1; }
int state_header[256];
fread(state_header, sizeof(int), 256, state_file);
freadCheck(state_header, sizeof(int), 256, state_file);
if (state_header[0] != 20240327) { printf("Bad magic state file\n"); return 1; }
if (state_header[1] != 2) {
printf("Bad version in state file\n");
Expand All @@ -75,28 +75,28 @@ int main(int argc, char *argv[]) {
float* expected_loss = (float*) malloc(1 * sizeof(float));

// read reference information from Python
fread(x, sizeof(int), B*T, state_file);
fread(y, sizeof(int), B*T, state_file);
fread(expected_logits, sizeof(float), B*T*V, state_file);
fread(expected_loss, sizeof(float), 1, state_file);
fread(expected_grads_memory, sizeof(float), model.num_parameters, state_file);
fclose(state_file);
freadCheck(x, sizeof(int), B*T, state_file);
freadCheck(y, sizeof(int), B*T, state_file);
freadCheck(expected_logits, sizeof(float), B*T*V, state_file);
freadCheck(expected_loss, sizeof(float), 1, state_file);
freadCheck(expected_grads_memory, sizeof(float), model.num_parameters, state_file);
fcloseCheck(state_file);

// overall OK signal for the test
int allok = 1;

// let's do 10 training iterations, following the pytorch code
float expected_losses[10] = {
5.270007133483887,
4.059706687927246,
3.3751230239868164,
2.8007826805114746,
2.315382242202759,
1.8490285873413086,
1.3946564197540283,
0.9991465210914612,
0.6240804195404053,
0.37651097774505615
5.270007133483887f,
4.059706687927246f,
3.3751230239868164f,
2.8007826805114746f,
2.315382242202759f,
1.8490285873413086f,
1.3946564197540283f,
0.9991465210914612f,
0.6240804195404053f,
0.37651097774505615f
};
for (int step = 0; step < 10; step++) {

Expand Down
20 changes: 10 additions & 10 deletions test_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -304,16 +304,16 @@ int main(int argc, char *argv[]) {

// expected losses are as follows, from Python
float expected_losses[10] = {
5.270009,
4.060681,
3.320085,
2.717550,
2.181066,
1.653923,
1.168050,
0.736873,
0.401021,
0.187493
5.270009f,
4.060681f,
3.320085f,
2.717550f,
2.181066f,
1.653923f,
1.168050f,
0.736873f,
0.401021f,
0.187493f
};

// compare
Expand Down
22 changes: 11 additions & 11 deletions test_gpt2_fp32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) {
// at this point, target should be equal to expected_logits, let's compare
// copy logits to CPU so we can compare them
float* logits_cpu = (float*)mallocCheck(B * T * Vp * sizeof(float));
cudaMemcpy(logits_cpu, model.acts.output, B * T * Vp * sizeof(float), cudaMemcpyDeviceToHost);
cudaCheck(cudaMemcpy(logits_cpu, model.acts.output, B * T * Vp * sizeof(float), cudaMemcpyDeviceToHost));

// compare the output logits from the forward pass
// also careful that we don't access and compare the padded columns of logits
Expand Down Expand Up @@ -200,16 +200,16 @@ int main(int argc, char *argv[]) {

// expected losses are as follows, from Python
float expected_losses[10] = {
5.270007133483887,
4.059706687927246,
3.3751230239868164,
2.8007826805114746,
2.315382242202759,
1.8490285873413086,
1.3946564197540283,
0.9991465210914612,
0.6240804195404053,
0.37651097774505615
5.270007133483887f,
4.059706687927246f,
3.3751230239868164f,
2.8007826805114746f,
2.315382242202759f,
1.8490285873413086f,
1.3946564197540283f,
0.9991465210914612f,
0.6240804195404053f,
0.37651097774505615f
};

// compare
Expand Down
2 changes: 1 addition & 1 deletion train_gpt2.c
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ void attention_backward(float* dinp, float* dpreatt, float* datt,
// dout is (B, T, C)
int C3 = C*3;
int hs = C / NH; // head size
float scale = 1.0 / sqrtf(hs);
float scale = 1.f / sqrtf(hs);

for (int b = 0; b < B; b++) {
for (int t = 0; t < T; t++) {
Expand Down
4 changes: 2 additions & 2 deletions train_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1638,8 +1638,8 @@ int main(int argc, char *argv[]) {
// build an EvalLoader for HellaSwag
EvalLoader eval_loader;
const char* hellaswag_path = "dev/data/hellaswag/hellaswag_val.bin";
const char hellaswag_available = access(hellaswag_path, F_OK) == 0;
const char run_hellaswag = hellaswag_eval && hellaswag_available;
const bool hellaswag_available = access(hellaswag_path, F_OK) == 0;
const bool run_hellaswag = hellaswag_eval && hellaswag_available;
if (run_hellaswag) {
evalloader_init(&eval_loader, hellaswag_path, B, T, multi_gpu_config.process_rank, multi_gpu_config.num_processes);
}
Expand Down

0 comments on commit d680fbc

Please sign in to comment.