Skip to content

Commit

Permalink
Merge pull request #9 from perftool-incubator/pp-python
Browse files Browse the repository at this point in the history
Use python for post-processing
  • Loading branch information
atheurer authored Aug 7, 2024
2 parents d7ce43d + 2bea211 commit 27ec6c3
Showing 1 changed file with 73 additions and 58 deletions.
131 changes: 73 additions & 58 deletions ilab-post-process
Original file line number Diff line number Diff line change
@@ -1,62 +1,77 @@
#!/usr/bin/perl
## -*- mode: perl; indent-tabs-mode: nil; perl-indent-level: 4 -*-
## vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=perl

use strict;
use warnings;
use JSON::XS;
use Data::Dumper;
use Time::Piece;
BEGIN {
if (!(exists $ENV{'TOOLBOX_HOME'} && -d "$ENV{'TOOLBOX_HOME'}/perl")) {
print "This script requires libraries that are provided by the toolbox project.\n";
print "Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and\n";
print "then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.\n";
exit 1;
}
}
use lib "$ENV{'TOOLBOX_HOME'}/perl";
use toolbox::json;
use toolbox::metrics;

my $coder = JSON::XS->new;
my $log_file = "training_params_and_metrics_global0.jsonl";
my @logfile_metrics;
my %names = ();
my %desc = ('source' => 'ilab', 'class' => 'throughput', 'type' => 'train-samples-sec');
(my $rc, my $log_fh) = open_read_text_file($log_file);
# file contents to parse:
#{"epoch": 0, "step": 1, "rank": 0, "loss": 0.18146394193172455, "overall_throughput": 3.5244029279710176, "lr": 0.0, "cuda_mem_allocated": 14.08400821685791, "cuda_malloc_retries": 0, "num_loss_counted_tokens": 4940, "batch_size": 14, "total_loss": 0.4069821238517761, "gradnorm": null, "weight_norm": 557.9681396484375, "timestamp": "2024-07-18T22:46:41.628932"}
while (<$log_fh>) {
my $json_ref;
$json_ref = $coder->decode($_);
# Strptime does not recognize microseconds, so we split the timestamp in two sections
if ( exists $$json_ref{"epoch"} and $$json_ref{"timestamp"} =~ /([^\.]*)\.(\d+)/ ) {
my $timestamp = $1;
my $msec = $2 /1000;
my $epoch = Time::Piece->strptime($timestamp, '%Y-%m-%dT%T')->epoch;
my $epoch_ms = $epoch * 1000 + $msec;
my %s = ('end' => int $epoch_ms, 'value' => $$json_ref{"overall_throughput"});
log_sample("0", \%desc, \%names, \%s);
}
}
close($log_fh);
my $metric_data_name = finish_samples();
#!/usr/bin/env python3
# -*- mode: python; indent-tabs-mode: nil; python-indent-level: 4 -*-
# vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=python

import sys
import os
import lzma
import re
import copy
import math
import json
from datetime import datetime
from pathlib import Path

TOOLBOX_HOME = os.environ.get('TOOLBOX_HOME')
if TOOLBOX_HOME is None:
print("This script requires libraries that are provided by the toolbox project.")
print("Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and")
print("then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.")
exit(1)
else:
p = Path(TOOLBOX_HOME) / 'python'
if not p.exists() or not p.is_dir():
print("ERROR: <TOOLBOX_HOME>/python ('%s') does not exist!" % (p))
exit(2)
sys.path.append(str(p))
from toolbox.metrics import log_sample
from toolbox.metrics import finish_samples

# In any benchmark post-process script, the metrics generated need to be attributed to a
# time-period (AKA benchmark-phase). The period which is used to report and offical
# result for the benchmark is the 'measurement' period. Ohter periods thay may exist
# result for the benchmark is the 'measurement' period. Other periods thay may exist
# could be "warm-up", "prep", etc.
my %sample;
my @periods;
my %period = ('name' => 'measurement');
my @metric_files = ( $metric_data_name );
$period{'metric-files'} = \@metric_files;
push(@periods, \%period);
$sample{'primary-metric'} = "train-samples-sec";
$sample{'primary-period'} = "measurement";
$sample{'benchmark'} = "ilab";
$sample{'periods'} = \@periods;
$sample{'rickshaw-bench-metric'}{'schema'}{'version'} = "2021.04.12";
open(JSON_FH, ">post-process-data.json") || die("Could not open file post-process-data.json for writing\n");
print JSON_FH $coder->encode(\%sample);

iter_sample = { 'primary-metric': "train-samples-sec", # will [eventually] vary depending on what was done
'primary-period': "measurement",
'benchmark': "ilab",
'periods': [],
'rickshaw-bench-metric': { 'schema': { 'version': '2021.04.12' } }
}

metric_files = []

period = { 'name': 'measurement', 'metric-files': [] }
file_id = 'global0'

jsonl_desc = {'source' : 'ilab', 'type': 'train-samples-sec', 'class': 'throughput'}
names = {}
filename = 'training_params_and_metrics_global0.jsonl.xz'
print('Opening file {0:s}'.format(filename))
with lzma.open(filename, 'rt') as file:
for line in file:
d = json.loads(line)
# file contents to parse (per line):
#{"epoch": 0, "step": 1, "rank": 0,
# "loss": 0.18146394193172455,
# "overall_throughput": 3.5244029279710176,
# "lr": 0.0, "cuda_mem_allocated": 14.08400821685791,
# "cuda_malloc_retries": 0,
# "num_loss_counted_tokens": 4940, "batch_size": 14,
# "total_loss": 0.4069821238517761, "gradnorm": null,
# "weight_norm": 557.9681396484375,
# "timestamp": "2024-07-18T22:46:41.628932"}
if 'epoch' in d.keys():
dt = datetime.strptime(d['timestamp'], '%Y-%m-%dT%X.%f')
ts = math.floor(dt.timestamp() * 1000)
sample = {'end': ts, 'value': d['overall_throughput']}
log_sample(file_id, jsonl_desc, names, sample)

metric_file_name = finish_samples()

period['metric-files'].append(metric_file_name)
iter_sample['periods'].append(period)

f = open('post-process-data.json', 'w')
f.write(json.dumps(iter_sample))
f.close

0 comments on commit 27ec6c3

Please sign in to comment.