From 010ae58a148d79161cdc6169fa505f3e77a42c57 Mon Sep 17 00:00:00 2001
From: Nick Dokos <ndokos@redhat.com>
Date: Fri, 29 Apr 2022 14:12:07 -0400
Subject: [PATCH] Delete deprecated bench-scripts from agent Makefile

Fixes #2801

PR #2782 deleted deprecated bench scripts from the tree, but did
not delete them from the Makefile.

Additional problems were uncovered in code review:

- pyproject.toml contained a couple of references to
no-longer-existing files and a reference to dbench-postprocess.

- dbench-postprocess is no longer necessary and is deleted (both
the "real" version and the mocked test version).

- pbench-agent-default.cfg still contained references to the deleted
bench-scripts, so it is cleaned up.

docs/pbench-agent.org still mentions dbench (and maybe other deleted
bench-scripts) but I declared it out of scope for this PR. We'll take
care of it in the documentation cleanup.
---
 agent/Makefile                                |   6 -
 .../postprocess/dbench-postprocess            | 135 ------------------
 .../mock-postprocess/dbench-postprocess       |  17 ---
 agent/config/pbench-agent-default.cfg         |  12 --
 pyproject.toml                                |   3 -
 5 files changed, 173 deletions(-)
 delete mode 100755 agent/bench-scripts/postprocess/dbench-postprocess
 delete mode 100755 agent/bench-scripts/test-bin/mock-postprocess/dbench-postprocess

diff --git a/agent/Makefile b/agent/Makefile
index fbf62824f0..09acf2b822 100644
--- a/agent/Makefile
+++ b/agent/Makefile
@@ -72,15 +72,10 @@ click-scripts = \
 	pbench-register-tool-trigger
 
 bench-scripts = \
-	pbench-cyclictest \
-	pbench-dbench \
 	pbench-fio \
 	pbench-fio.md \
 	pbench-gen-iterations \
-	pbench-iozone \
 	pbench-linpack \
-	pbench-migrate \
-	pbench-netperf \
 	pbench-run-benchmark \
 	pbench-run-benchmark.pl \
 	pbench-run-benchmark-sample \
@@ -93,7 +88,6 @@ bench-scripts = \
 bench-postprocess = \
 	BenchPostprocess.pm \
 	compare-bench-results \
-	dbench-postprocess \
 	fio-postprocess \
 	fio-postprocess-cdm \
 	fio-postprocess-viz.py \
diff --git a/agent/bench-scripts/postprocess/dbench-postprocess b/agent/bench-scripts/postprocess/dbench-postprocess
deleted file mode 100755
index 128ddf07ad..0000000000
--- a/agent/bench-scripts/postprocess/dbench-postprocess
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use lib $ENV{'pbench_lib_dir'};
-use lib $ENV{'pbench_bspp_dir'};
-no lib ".";
-use GenData qw(gen_data);
-use BenchPostprocess qw(get_cpubusy_series calc_ratio_series calc_sum_series);
-use File::Basename;
-
-my $script = basename($0);
-my $dir = $ARGV[0];
-my $tool_label_pattern = $ARGV[1];
-my $tool_group = $ARGV[2];
-my %sample;
-my %rate;
-my $timestamp_ms = 0;
-my $prev_timestamp_ms = 0;
-
-# Load the data from dbench output and create throughput metrics
-# There can be several result files, once for each copy of dbench
-opendir(my $dh, $dir) || die "$script: could not open directory $dir: $!\n";
-my $nr_result_files=0;
-foreach my $result_file ( readdir($dh) ) {
-	if ( $result_file =~ /^result-(.+).txt$/) {
-		my $client_id = $1;
-		my $this_nr_bytes_label = $client_id . "-bytes";  # a number of bytes (not a througput metric)
-		my $this_tput_label = $client_id . "-GiB_sec";    # a number of 1024^3 bytes over time
-		open(IN_FILE, "$dir/$result_file") || die "$script: could not open file $dir/$result_file: $!\n";
-		$prev_timestamp_ms = 0;
-		while (<IN_FILE>) {
-			my $line = "$_";
-			chomp($line);
-			# for each sample in the dbench result file, we get a number of bytes and a timestamp
-			# example of a sample: "time:1395170796 name:Txn2 nr_bytes:22037790720 nr_ops:43042560"
-			if ($line =~  /^execute total_bytes:\s(\d+)\sepochtime_ms:\s(\d+)\slatency_ms:\s(\d+\.\d+)/){
-				my $bytes = $1;
-				$timestamp_ms = $2;
-				$sample{$this_nr_bytes_label}{$timestamp_ms} = $bytes;
-				if ($prev_timestamp_ms != 0) {
-					my $timestamp_s_diff = ($timestamp_ms - $prev_timestamp_ms)/1000;
-					my $bytes_diff = $sample{$this_nr_bytes_label}{$timestamp_ms}
-						- $sample{$this_nr_bytes_label}{$prev_timestamp_ms};
-					$rate{"dbench"}{"Per_Instance_Throughput"}{$this_tput_label}{$timestamp_ms} = {
-						"date"  => int $timestamp_ms,
-						"value" => $bytes_diff / (1024 * 1024 * 1024) / $timestamp_s_diff
-					};
-				}
-				$prev_timestamp_ms = $timestamp_ms;
-			}
-		}
-		$nr_result_files++;
-		close(IN_FILE);
-	}
-}
-closedir $dh;
-if ($nr_result_files == 0) {
-	print STDERR "$script: could not find any result files to process, exiting\n";
-	exit;
-}
-
-# Define a set of tool directories which we want to use to report CPU and efficiency metrics
-# Search for tool directories which match the $tool_label_pattern
-my %tool_ids;
-my $tool_group_dir = "$dir/tools-$tool_group";
-if (opendir(my $dh, $tool_group_dir)) {
-	foreach my $this_tool_dir (readdir($dh)) {
-		if ($this_tool_dir =~ /^$tool_label_pattern/) {
-			my $tool_dir_id = $this_tool_dir;
-			$tool_dir_id =~ s/^$tool_label_pattern//;
-			$this_tool_dir = $tool_group_dir . "/" . $this_tool_dir;
-			$tool_ids{$this_tool_dir} = $tool_dir_id;
-		}
-	}
-} else {
-	print STDERR "$script: could not find any directories in $tool_group_dir which matched $tool_label_pattern\n";
-}
-
-# If there are multiple result files (multiple dbench copies), we compose a
-# throughput metric by adding the values across all the results.  We seed the
-# sum with the values from the first result and then add in the values from
-# the rest.
-my @per_server_results = (keys %{ $rate{"dbench"}{"Per_Instance_Throughput"} });
-$rate{"dbench"}{"Throughput"}{"GiB_sec"} = {
-	(%{$rate{"dbench"}{"Per_Instance_Throughput"}{$per_server_results[0]}})
-};
-shift @per_server_results;
-foreach my $per_server_result (@per_server_results) {
-	calc_sum_series(
-		\%{$rate{"dbench"}{"Per_Instance_Throughput"}{$per_server_result}},
-		\%{$rate{"dbench"}{"Throughput"}{"GiB_sec"}}
-	);
-}
-
-# At the same time, generate an efficiency (GiB_sec/CPU) series for both the client and server CPU data
-foreach my $this_tool_dir (keys %tool_ids) {
-	my $this_tool_id = $tool_ids{$this_tool_dir};
-	my $this_cpu_label = "CPU_" . $this_tool_id;
-	my $this_eff_label = "GiB_sec/" . $this_cpu_label;
-	my $res = get_cpubusy_series(
-		$this_tool_dir,
-		\%{ $rate{"dbench"}{"CPU_usage"}{$this_cpu_label} },
-	);
-	if ($res == 0) {
-		calc_ratio_series(
-			\%{ $rate{"dbench"}{"Throughput"}{"GiB_sec"} },
-			\%{ $rate{"dbench"}{"CPU_usage"}{$this_cpu_label} },
-			\%{ $rate{"dbench"}{"Effiency"}{$this_eff_label} });
-	}
-}
-
-# Convert the timeseries data back to the form which gen_data() expects.
-foreach my $htmlpage (values %rate) {
-	foreach my $chart (values %{$htmlpage}) {
-		foreach my $series_key (values %{$chart}) {
-			foreach my $timestamp_ms (values %{$series_key}) {
-				$timestamp_ms = $timestamp_ms->{'value'};
-			}
-		}
-	}
-}
-
-# define the graph types
-# if you want something other than lineChart, put it here
-my %graph_type;
-
-# threshold for displying a series in a graph
-my %graph_threshold;
-
-# N.B. Final parameter of 1 tells gen_data to do the expensive
-# combinatorial check of timestamps. dbench-postprocess is the
-# only script that makes use of it.
-gen_data(\%rate, \%graph_type, \%graph_threshold, $dir, 1);
diff --git a/agent/bench-scripts/test-bin/mock-postprocess/dbench-postprocess b/agent/bench-scripts/test-bin/mock-postprocess/dbench-postprocess
deleted file mode 100755
index 38f13cbbe3..0000000000
--- a/agent/bench-scripts/test-bin/mock-postprocess/dbench-postprocess
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-
-outfn = os.environ.get("_testlog")
-if not outfn:
-    sys.stderr.write("WARNING _testlog environment variable not present\n")
-    sys.exit(1)
-else:
-    with open(outfn, "a") as outf:
-        outf.write(" ".join(sys.argv) + "\n")
-
-with open(os.path.join(sys.argv[1], "dbench-average.txt"), "w") as avgf:
-    avgf.write("Per_Instance_Throughput-1-127.0.0.1-GiB_sec=3.538694\n")
-    avgf.write("Per_Instance_Throughput-2-127.0.0.1-GiB_sec=3.213416\n")
-    avgf.write("Throughput-GiB_sec=6.749179\n")
diff --git a/agent/config/pbench-agent-default.cfg b/agent/config/pbench-agent-default.cfg
index e723398e74..8fd393066b 100644
--- a/agent/config/pbench-agent-default.cfg
+++ b/agent/config/pbench-agent-default.cfg
@@ -43,28 +43,16 @@ interval = 3
 # to try to avoid large and unweildy data sets.
 interval = 30
 
-[dbench]
-version = 4.00
-match = gte
-
 [fio]
 version = 3.21
 match = gte
 server_port = 8765
 histogram_interval_msec = 10000
 
-[iozone]
-version = 3.430
-match = gte
-
 [linpack]
 version = 11.1.3
 match = equ
 
-[netperf]
-version = 2.7.0
-match = gte
-
 [uperf]
 version = 1.0.7
 match = gte
diff --git a/pyproject.toml b/pyproject.toml
index 55b9acf533..289cd3fe69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,6 @@ skip-string-normalization = false
 include = '''
 \.pyi?$
 | ^/agent/bench-scripts/postprocess/user-benchmark-wrapper$
-| ^/agent/bench-scripts/test-bin/mock-postprocess/dbench-postprocess$
 | ^/agent/bench-scripts/test-bin/mock-postprocess/uperf-postprocess$
 | ^/agent/bench-scripts/test-bin/java$
 | ^/agent/tool-scripts/datalog/haproxy-ocp-datalog$
@@ -42,8 +41,6 @@ include = '''
 extend-exclude = '''
 (
 fio-histo-log-pctiles\.py$
-| binary-search\.py$
-| profile-builder\.py$
 | agent/stockpile
 | web-server/v0\.3/demo\.py$
 )