-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from perftool-incubator/pp-python
Use python for post-processing
- Loading branch information
Showing
1 changed file
with
73 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,77 @@ | ||
#!/usr/bin/perl | ||
## -*- mode: perl; indent-tabs-mode: nil; perl-indent-level: 4 -*- | ||
## vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=perl | ||
|
||
use strict; | ||
use warnings; | ||
use JSON::XS; | ||
use Data::Dumper; | ||
use Time::Piece; | ||
BEGIN { | ||
if (!(exists $ENV{'TOOLBOX_HOME'} && -d "$ENV{'TOOLBOX_HOME'}/perl")) { | ||
print "This script requires libraries that are provided by the toolbox project.\n"; | ||
print "Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and\n"; | ||
print "then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.\n"; | ||
exit 1; | ||
} | ||
} | ||
use lib "$ENV{'TOOLBOX_HOME'}/perl"; | ||
use toolbox::json; | ||
use toolbox::metrics; | ||
|
||
my $coder = JSON::XS->new; | ||
my $log_file = "training_params_and_metrics_global0.jsonl"; | ||
my @logfile_metrics; | ||
my %names = (); | ||
my %desc = ('source' => 'ilab', 'class' => 'throughput', 'type' => 'train-samples-sec'); | ||
(my $rc, my $log_fh) = open_read_text_file($log_file); | ||
# file contents to parse: | ||
#{"epoch": 0, "step": 1, "rank": 0, "loss": 0.18146394193172455, "overall_throughput": 3.5244029279710176, "lr": 0.0, "cuda_mem_allocated": 14.08400821685791, "cuda_malloc_retries": 0, "num_loss_counted_tokens": 4940, "batch_size": 14, "total_loss": 0.4069821238517761, "gradnorm": null, "weight_norm": 557.9681396484375, "timestamp": "2024-07-18T22:46:41.628932"} | ||
while (<$log_fh>) { | ||
my $json_ref; | ||
$json_ref = $coder->decode($_); | ||
# Strptime does not recognize microseconds, so we split the timestamp in two sections | ||
if ( exists $$json_ref{"epoch"} and $$json_ref{"timestamp"} =~ /([^\.]*)\.(\d+)/ ) { | ||
my $timestamp = $1; | ||
my $msec = $2 /1000; | ||
my $epoch = Time::Piece->strptime($timestamp, '%Y-%m-%dT%T')->epoch; | ||
my $epoch_ms = $epoch * 1000 + $msec; | ||
my %s = ('end' => int $epoch_ms, 'value' => $$json_ref{"overall_throughput"}); | ||
log_sample("0", \%desc, \%names, \%s); | ||
} | ||
} | ||
close($log_fh); | ||
my $metric_data_name = finish_samples(); | ||
#!/usr/bin/env python3 | ||
# -*- mode: python; indent-tabs-mode: nil; python-indent-level: 4 -*- | ||
# vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=python | ||
|
||
import sys | ||
import os | ||
import lzma | ||
import re | ||
import copy | ||
import math | ||
import json | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
TOOLBOX_HOME = os.environ.get('TOOLBOX_HOME') | ||
if TOOLBOX_HOME is None: | ||
print("This script requires libraries that are provided by the toolbox project.") | ||
print("Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and") | ||
print("then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.") | ||
exit(1) | ||
else: | ||
p = Path(TOOLBOX_HOME) / 'python' | ||
if not p.exists() or not p.is_dir(): | ||
print("ERROR: <TOOLBOX_HOME>/python ('%s') does not exist!" % (p)) | ||
exit(2) | ||
sys.path.append(str(p)) | ||
from toolbox.metrics import log_sample | ||
from toolbox.metrics import finish_samples | ||
|
||
# In any benchmark post-process script, the metrics generated need to be attributed to a | ||
# time-period (AKA benchmark-phase). The period which is used to report and offical | ||
# result for the benchmark is the 'measurement' period. Ohter periods thay may exist | ||
# result for the benchmark is the 'measurement' period. Other periods thay may exist | ||
# could be "warm-up", "prep", etc. | ||
my %sample; | ||
my @periods; | ||
my %period = ('name' => 'measurement'); | ||
my @metric_files = ( $metric_data_name ); | ||
$period{'metric-files'} = \@metric_files; | ||
push(@periods, \%period); | ||
$sample{'primary-metric'} = "train-samples-sec"; | ||
$sample{'primary-period'} = "measurement"; | ||
$sample{'benchmark'} = "ilab"; | ||
$sample{'periods'} = \@periods; | ||
$sample{'rickshaw-bench-metric'}{'schema'}{'version'} = "2021.04.12"; | ||
open(JSON_FH, ">post-process-data.json") || die("Could not open file post-process-data.json for writing\n"); | ||
print JSON_FH $coder->encode(\%sample); | ||
|
||
iter_sample = { 'primary-metric': "train-samples-sec", # will [eventually] vary depending on what was done | ||
'primary-period': "measurement", | ||
'benchmark': "ilab", | ||
'periods': [], | ||
'rickshaw-bench-metric': { 'schema': { 'version': '2021.04.12' } } | ||
} | ||
|
||
metric_files = [] | ||
|
||
period = { 'name': 'measurement', 'metric-files': [] } | ||
file_id = 'global0' | ||
|
||
jsonl_desc = {'source' : 'ilab', 'type': 'train-samples-sec', 'class': 'throughput'} | ||
names = {} | ||
filename = 'training_params_and_metrics_global0.jsonl.xz' | ||
print('Opening file {0:s}'.format(filename)) | ||
with lzma.open(filename, 'rt') as file: | ||
for line in file: | ||
d = json.loads(line) | ||
# file contents to parse (per line): | ||
#{"epoch": 0, "step": 1, "rank": 0, | ||
# "loss": 0.18146394193172455, | ||
# "overall_throughput": 3.5244029279710176, | ||
# "lr": 0.0, "cuda_mem_allocated": 14.08400821685791, | ||
# "cuda_malloc_retries": 0, | ||
# "num_loss_counted_tokens": 4940, "batch_size": 14, | ||
# "total_loss": 0.4069821238517761, "gradnorm": null, | ||
# "weight_norm": 557.9681396484375, | ||
# "timestamp": "2024-07-18T22:46:41.628932"} | ||
if 'epoch' in d.keys(): | ||
dt = datetime.strptime(d['timestamp'], '%Y-%m-%dT%X.%f') | ||
ts = math.floor(dt.timestamp() * 1000) | ||
sample = {'end': ts, 'value': d['overall_throughput']} | ||
log_sample(file_id, jsonl_desc, names, sample) | ||
|
||
metric_file_name = finish_samples() | ||
|
||
period['metric-files'].append(metric_file_name) | ||
iter_sample['periods'].append(period) | ||
|
||
f = open('post-process-data.json', 'w') | ||
f.write(json.dumps(iter_sample)) | ||
f.close |