Skip to content

Commit

Permalink
Page align weights file for ~8% boost
Browse files Browse the repository at this point in the history
  • Loading branch information
cgbur committed Aug 16, 2023
1 parent 6d408e4 commit 23c0711
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ language implementations (no OpenBlas, etc.).
| Implementation | Tokens/s |
| --------------------------------------------------- | -------- |
| llama2.zig (this repo) | 612 |
| llama2.zig (this repo) | 656 |
| llama2.c `make runfast -march=native` | 548 |
| [llama2.zig](https://github.com/clebert/llama2.zig) | 496 |
| llama2.c `make run -march=native` | 122 |
Expand All @@ -92,7 +92,7 @@ language implementations (no OpenBlas, etc.).
| Implementation | Tokens/s |
| ------------------------------------- | -------- |
| llama2.zig (this repo) | 579 |
| llama2.zig (this repo) | 596 |
| llama2.c `make runfast -march=native` | 241 |
## Multi-threaded
Expand Down
16 changes: 8 additions & 8 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -834,13 +834,13 @@ pub fn main() !void {
}

// read the config from the checkpoint
var checkpoint = try std.fs.cwd().openFile(bin_path.?, .{}); // close this by hand
var checkpoint = try std.fs.cwd().openFile(bin_path.?, .{});
// close by hand
var config_read: ConfigReader = try checkpoint.reader().readStruct(ConfigReader);
// negative vocab size is hacky way of signaling unshared weights. bit yikes.
const shared_weights: bool = config_read.vocab_size > 0;
config_read.vocab_size = try std.math.absInt(config_read.vocab_size);
const file_size = (try checkpoint.stat()).size;
checkpoint.close();
const config = config_read.config(); // convert to usize version

log("config: {any}\n", .{config});
Expand All @@ -850,22 +850,22 @@ pub fn main() !void {
log("SIMD vector size: {d}\n", .{DEFAULT_VECTOR_WIDTH});
log("\n", .{});

const mapped_checkpoint = try std.fs.cwd().openFile(bin_path.?, .{});
defer mapped_checkpoint.close();
const data: []align(mem.page_size) u8 = blk: {
const buffer = try allocator.alignedAlloc(u8, mem.page_size, file_size);
const read_len = try mapped_checkpoint.readAll(buffer);
if (read_len != file_size) {
const weights_size: usize = file_size - @sizeOf(ConfigReader);
const buffer = try allocator.alignedAlloc(u8, mem.page_size, weights_size);
const read_len = try checkpoint.readAll(buffer);
if (read_len != weights_size) {
std.debug.print("error: failed to read checkpoint file\n", .{});
std.process.exit(1);
}
checkpoint.close();
break :blk buffer;
// mmap seems slower
// break :blk try std.os.mmap(null, file_size, std.os.PROT.READ, std.os.MAP.PRIVATE, mapped_checkpoint.handle, 0);
};
defer allocator.free(data);

const weights = Weights.init(&config, data[@sizeOf(ConfigReader)..], shared_weights);
const weights = Weights.init(&config, data, shared_weights);

// load the tokens for the model
const tokenizer = try Tokenizer.fromFile("tokenizer.bin", config.vocab_size, allocator);
Expand Down

1 comment on commit 23c0711

@cgbur
Copy link
Owner Author

@cgbur cgbur commented on 23c0711 Aug 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

Please sign in to comment.