Skip to content

Commit

Permalink
Merge pull request #557 from rafaelsteil/export-csv-pr
Browse files Browse the repository at this point in the history
Add command to export log files to CSV
altendky authored Jun 12, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents e6ece02 + f658093 commit 5a9b631
Showing 2 changed files with 138 additions and 1 deletion.
121 changes: 121 additions & 0 deletions src/plotman/csv_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import csv
import sys
from dateutil.parser import parse as parse_date

import attr

from plotman.log_parser import PlotLogParser

def row_ib(name):
return attr.ib(converter=str, metadata={'name': name})

@attr.frozen
class Row:
plot_id: str = row_ib(name='Plot ID')
started_at: str = row_ib(name='Started at')
date: str = row_ib(name='Date')
size: str = row_ib(name='Size')
buffer: str = row_ib(name='Buffer')
buckets: str = row_ib(name='Buckets')
threads: str = row_ib(name='Threads')
tmp_dir_1: str = row_ib(name='Tmp dir 1')
tmp_dir_2: str = row_ib(name='Tmp dir 2')
phase_1_duration_raw: str = row_ib(name='Phase 1 duration (raw)')
phase_1_duration: str = row_ib(name='Phase 1 duration')
phase_1_duration_minutes: str = row_ib(name='Phase 1 duration (minutes)')
phase_1_duration_hours: str = row_ib(name='Phase 1 duration (hours)')
phase_2_duration_raw: str = row_ib(name='Phase 2 duration (raw)')
phase_2_duration: str = row_ib(name='Phase 2 duration')
phase_2_duration_minutes: str = row_ib(name='Phase 2 duration (minutes)')
phase_2_duration_hours: str = row_ib(name='Phase 2 duration (hours)')
phase_3_duration_raw: str = row_ib(name='Phase 3 duration (raw)')
phase_3_duration: str = row_ib(name='Phase 3 duration')
phase_3_duration_minutes: str = row_ib(name='Phase 3 duration (minutes)')
phase_3_duration_hours: str = row_ib(name='Phase 3 duration (hours)')
phase_4_duration_raw: str = row_ib(name='Phase 4 duration (raw)')
phase_4_duration: str = row_ib(name='Phase 4 duration')
phase_4_duration_minutes: str = row_ib(name='Phase 4 duration (minutes)')
phase_4_duration_hours: str = row_ib(name='Phase 4 duration (hours)')
total_time_raw: str = row_ib(name='Total time (raw)')
total_time: str = row_ib(name='Total time')
total_time_minutes: str = row_ib(name='Total time (minutes)')
total_time_hours: str = row_ib(name='Total time (hours)')
copy_time_raw: str = row_ib(name='Copy time (raw)')
copy_time: str = row_ib(name='Copy time')
copy_time_minutes: str = row_ib(name='Copy time (minutes)')
copy_time_hours: str = row_ib(name='Copy time (hours)')
filename: str = row_ib(name='Filename')

@classmethod
def names(cls):
return [field.metadata['name'] for field in attr.fields(cls)]

@classmethod
def from_info(cls, info):
return cls(
plot_id=info.plot_id,
started_at=info.started_at.isoformat(),
date=info.started_at.date().isoformat(),
size=info.plot_size,
buffer=info.buffer,
buckets=info.buckets,
threads=info.threads,
tmp_dir_1=info.tmp_dir1,
tmp_dir_2=info.tmp_dir2,
phase_1_duration_raw=info.phase1_duration_raw,
phase_1_duration=info.phase1_duration,
phase_1_duration_minutes=info.phase1_duration_minutes,
phase_1_duration_hours=info.phase1_duration_hours,
phase_2_duration_raw=info.phase2_duration_raw,
phase_2_duration=info.phase2_duration,
phase_2_duration_minutes=info.phase2_duration_minutes,
phase_2_duration_hours=info.phase2_duration_hours,
phase_3_duration_raw=info.phase3_duration_raw,
phase_3_duration=info.phase3_duration,
phase_3_duration_minutes=info.phase3_duration_minutes,
phase_3_duration_hours=info.phase3_duration_hours,
phase_4_duration_raw=info.phase4_duration_raw,
phase_4_duration=info.phase4_duration,
phase_4_duration_minutes=info.phase4_duration_minutes,
phase_4_duration_hours=info.phase4_duration_hours,
total_time_raw=info.total_time_raw,
total_time=info.total_time,
total_time_minutes=info.total_time_minutes,
total_time_hours=info.total_time_hours,
copy_time_raw=info.copy_time_raw,
copy_time=info.copy_time,
copy_time_minutes=info.copy_time_minutes,
copy_time_hours=info.copy_time_hours,
filename=info.filename,
)

def name_dict(self):
return {
field.metadata['name']: value
for field, value in zip(attr.fields(type(self)), attr.astuple(self))
}

def parse_logs(logfilenames):
parser = PlotLogParser()
result = []

for filename in logfilenames:
with open(filename) as file:
info = parser.parse(file)

if not info.in_progress():
result.append(info)

result.sort(key=lambda element: element.started_at)
return result


def generate(logfilenames, file):
writer = csv.DictWriter(file, fieldnames=Row.names())
writer.writeheader()

logs = parse_logs(logfilenames)

for info in logs:
row = Row.from_info(info=info)
writer.writerow(rowdict=row.name_dict())
18 changes: 17 additions & 1 deletion src/plotman/plotman.py
Original file line number Diff line number Diff line change
@@ -5,14 +5,16 @@
import logging
import logging.handlers
import os
import glob
import random
from shutil import copyfile
import sys
import time

import pendulum

# Plotman libraries
from plotman import analyzer, archive, configuration, interactive, manager, plot_util, reporting
from plotman import analyzer, archive, configuration, interactive, manager, plot_util, reporting, csv_exporter
from plotman import resources as plotman_resources
from plotman.job import Job

@@ -46,6 +48,9 @@ def parse_args(self):

sp.add_parser('archive', help='move completed plots to farming location')

p_export = sp.add_parser('export', help='exports metadata from the plot logs as CSV')
p_export.add_argument('-o', dest='save_to', default=None, type=str, help='save to file. Optional, prints to stdout by default')

p_config = sp.add_parser('config', help='display or generate plotman.yaml configuration')
sp_config = p_config.add_subparsers(dest='config_subcommand')
sp_config.add_parser('generate', help='generate a default plotman.yaml file and print path')
@@ -188,6 +193,17 @@ def main():
analyzer.analyze(args.logfile, args.clipterminals,
args.bytmp, args.bybitfield)

#
# Exports log metadata to CSV
#
elif args.cmd == 'export':
logfilenames = glob.glob(os.path.join(cfg.logging.plots, '*.plot.log'))
if args.save_to is None:
csv_exporter.generate(logfilenames=logfilenames, file=sys.stdout)
else:
with open(args.save_to, 'w', encoding='utf-8') as file:
csv_exporter.generate(logfilenames=logfilenames, file=file)

else:
jobs = Job.get_running_jobs(cfg.logging.plots)

0 comments on commit 5a9b631

Please sign in to comment.