Adding script to post-process apex_profiles.csv and provide same outp…

…ut as what we get at the end of the run, but with greater flexibility.
UO-OACISS · Feb 9, 2023 · 0b69993 · 0b69993
1 parent c2e9a26
commit 0b69993
Showing 1 changed file with 93 additions and 0 deletions.
diff --git a/src/scripts/apex-summary.py b/src/scripts/apex-summary.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+#print('Importing modules...')
+import pandas as pd
+import numpy as np
+import argparse
+
+# The output header looks like this:
+#"rank","name","type","num samples/calls","minimum","mean","maximum","stddev","total","inclusive (ns)","num threads","total per thread"
+
+def parseArgs():
+    parser = argparse.ArgumentParser(description='Post-process APEX flat profiles.')
+    parser.add_argument('--filename', type=str, required=False,
+        help='The filename to parse (default: ./apex_profiles.csv)', default='./apex_profiles.csv')
+    parser.add_argument('--counters', dest='counters', action='store_true',
+        help='Print the counter data (default: false)', default=False)
+    parser.add_argument('--timers', dest='timers', action='store_true',
+        help='Print the timer data (default: false)', default=False)
+    parser.add_argument('--other', dest='other', action='store_true',
+        help='Aggregate all other timers and show value (default: false)', default=False)
+    parser.add_argument('--limit', dest='timer_limit', type=int, default=30, required=False,
+        metavar='N', help='Limit timers to top N timers (default: 30)')
+    parser.add_argument('--agg', dest='timer_agg', type=str, default='mean', required=False,
+        metavar='A', help='Aggregation operation for timers and counters (default: mean)')
+    parser.add_argument('--sort', dest='sort_by', type=str, default='tot/thr', required=False,
+        metavar='C', help='Column to sort timers (default: tot/thr)')
+    args = parser.parse_args()
+    if not args.timers and not args.counters:
+        args.timers = True
+        args.counters = True
+    return args
+
+def showCounters(counters, args):
+    counters = counters.rename(columns={'name': 'Counter', 'num samples/calls': 'samples' })
+    df = counters.groupby('Counter').agg(args.timer_agg, numeric_only=True)
+    pd.set_option('display.float_format', lambda x: '%.2f' % x)
+    print('-'*100)
+    print('APEX Counters aggregated by', args.timer_agg)
+    print('-'*100)
+    print(df[['samples', 'minimum', 'mean', 'maximum', 'stddev']])
+    print()
+
+def showMeans(timers, args):
+    timers = timers.rename(columns={'name': 'Timer', 'num samples/calls': 'calls', 'num threads': 'threads' })
+    if 'yields' not in timers:
+        timers['yields'] = 0
+    timers['tot/call'] = timers['total'] / timers['calls']
+    timers['tot/thr'] = timers['total'] / timers['threads']
+    df = timers.groupby('Timer').agg(args.timer_agg, numeric_only=True)
+    topN = df.nlargest(args.timer_limit,args.sort_by)
+    top1 = df.nlargest(1,'tot/call')
+    topN['%total'] = (topN['total'] / top1.iloc[0]['total']) * 100.0
+    topN['%wall'] = (topN['tot/thr'] / top1.iloc[0]['total']) * 100.0
+    # Aggregate all others?
+    allTimers = df.agg('sum', numeric_only=True)
+    allTopN = topN.agg('sum', numeric_only=True)
+    if args.other:
+        other = pd.Series({'calls':allTimers['calls']-allTopN['calls'],
+            'threads':allTimers['calls']-allTopN['calls'],
+            'tot/call':allTimers['tot/call']-allTopN['tot/call'],
+            'total':allTimers['total']-allTopN['total'],
+            'tot/thr':allTimers['tot/thr']-allTopN['tot/thr']
+            }, name='other')
+        topN = topN.append(other)
+    # scale all values to seconds
+    topN['total'] = topN['total'] * 1.0e-9
+    topN['tot/call'] = topN['tot/call'] * 1.0e-9
+    topN['tot/thr'] = topN['tot/thr'] * 1.0e-9
+    pd.set_option('display.float_format', lambda x: '%.2f' % x)
+    print('-'*100)
+    print('Top',args.timer_limit,'APEX Timers sorted by',args.sort_by, 'aggregated by', args.timer_agg)
+    print('-'*100)
+    print(topN[['total', 'calls', 'tot/call', 'yields', 'threads', 'tot/thr','%total','%wall']])
+    print()
+
+def main():
+    args = parseArgs()
+    #print('Reading profiles...')
+    df = pd.read_csv('apex_profiles.csv') #, index_col=[0,1])
+    df = df.fillna(0)
+    print()
+    if (args.counters):
+        # get the counters
+        counters = df[df['type'] == 'counter']
+        showCounters(counters, args)
+    if (args.timers):
+        timers = df[df['type'] == 'timer']
+        # Get the means
+        showMeans(timers, args)
+    #print('done.')
+
+if __name__ == '__main__':
+    main()