diff --git a/evaluation/eval.ipynb b/evaluation/eval.ipynb
index 1fa1735..d22f039 100644
--- a/evaluation/eval.ipynb
+++ b/evaluation/eval.ipynb
@@ -26,6 +26,135 @@
     "!TEST_SUBSET=True TEST_NAME=jqkungfu python run_slicedice.py"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to run boa - wasm-slice\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-slice /home/don/wasm-r3/benchmarks/boa/boa.wasm\n",
+      "Failed to run boa - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/boa/boa.wasm\n",
+      "Failed to run funky-kart - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/funky-kart/funky-kart.wasm\n",
+      "Failed to run rfxgen - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/rfxgen/rfxgen.wasm\n",
+      "Failed to run rguilayout - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/rguilayout/rguilayout.wasm\n",
+      "Failed to run rguistyler - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/rguistyler/rguistyler.wasm\n",
+      "Failed to run riconpacker - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/riconpacker/riconpacker.wasm\n",
+      "Failed to run rguilayout - wasm-shrink\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-shrink /home/don/wasm-r3/benchmarks/rguilayout/rguilayout.wasm\n",
+      "Failed to run sqlgui - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/sqlgui/sqlgui.wasm\n",
+      "Failed to run commanderkeen - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/commanderkeen/commanderkeen.wasm\n",
+      "Failed to run hydro - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/hydro/hydro.wasm\n",
+      "Failed to run rtexviewer - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/rtexviewer/rtexviewer.wasm\n",
+      "Failed to run mandelbrot - wasm-reduce\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-reduce /home/don/wasm-r3/benchmarks/mandelbrot/mandelbrot.wasm\n",
+      "Failed to run mandelbrot - wasm-shrink\n",
+      "timeout 120s python /home/don/wasm-r3/evaluation/run_reduction_tool.py wasm-shrink /home/don/wasm-r3/benchmarks/mandelbrot/mandelbrot.wasm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time, subprocess, json, os, concurrent\n",
+    "\n",
+    "TIMEOUT = 120\n",
+    "WASMR3_PATH = os.getenv(\"WASMR3_PATH\", \"~/wasm-r3\")\n",
+    "\n",
+    "with open(\"metrics.json\", \"r\") as f:\n",
+    "    metrics = json.load(f)\n",
+    "\n",
+    "testset = [\n",
+    "    'boa', # this doesn't work for wasm-slice\n",
+    "    \"guiicons\",\n",
+    "    \"funky-kart\",\n",
+    "    \"jsc\",\n",
+    "    \"rfxgen\",\n",
+    "    \"rguilayout\",\n",
+    "    \"rguistyler\",\n",
+    "    \"riconpacker\",\n",
+    "    \"sqlgui\",\n",
+    "    \"commanderkeen\",\n",
+    "    \"hydro\",\n",
+    "    \"rtexviewer\",\n",
+    "    \"mandelbrot\",\n",
+    "]\n",
+    "\n",
+    "our_tool = [\"wasm-slice\"]\n",
+    "\n",
+    "toolset = [\n",
+    "    \"wasm-reduce\",\n",
+    "    \"wasm-shrink\",\n",
+    "]\n",
+    "\n",
+    "tool_to_suffix = {\n",
+    "    \"wasm-slice\": \"sliced\",\n",
+    "    \"wasm-reduce\": \"reduced\",\n",
+    "    \"wasm-shrink\": \"shrunken\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "def run_reduction_tool(testname, tool):\n",
+    "    try:\n",
+    "        command = f\"timeout {TIMEOUT}s python {WASMR3_PATH}/evaluation/run_reduction_tool.py {tool} {WASMR3_PATH}/benchmarks/{testname}/{testname}.wasm\"\n",
+    "        start_time = time.time()\n",
+    "        result = subprocess.run(command, shell=True, capture_output=True, text=True)\n",
+    "        end_time = time.time()\n",
+    "        elapsed = end_time - start_time\n",
+    "        reduced_size = os.path.getsize(\n",
+    "            f\"{WASMR3_PATH}/benchmarks/{testname}/{testname}.{tool_to_suffix[tool]}.wasm\"\n",
+    "        )\n",
+    "        return [testname, tool, elapsed, reduced_size]\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to run {testname} - {tool}\")\n",
+    "        print(f\"timeout {TIMEOUT}s python {WASMR3_PATH}/evaluation/run_reduction_tool.py {tool} {WASMR3_PATH}/benchmarks/{testname}/{testname}.wasm\")\n",
+    "        return [testname, tool, \"fail\", \"fail\"]\n",
+    "\n",
+    "\n",
+    "for testname in testset:\n",
+    "    metrics[testname][\"reduction_comparison\"] = {\n",
+    "        \"original_size\": os.path.getsize(f\"{WASMR3_PATH}/benchmarks/{testname}/{testname}.wasm\")\n",
+    "    }\n",
+    "\n",
+    "with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n",
+    "    futures = [\n",
+    "        executor.submit(run_reduction_tool, testname, tool)\n",
+    "        for testname in testset\n",
+    "        for tool in our_tool\n",
+    "    ]\n",
+    "    results = [future.result() for future in concurrent.futures.as_completed(futures)]\n",
+    "\n",
+    "with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:\n",
+    "    futures = [\n",
+    "        executor.submit(run_reduction_tool, testname, tool)\n",
+    "        for testname in testset\n",
+    "        for tool in toolset\n",
+    "    ]\n",
+    "    results = results + [\n",
+    "        future.result() for future in concurrent.futures.as_completed(futures)\n",
+    "    ]\n",
+    "\n",
+    "for result in results:\n",
+    "    testname, tool, elapsed, reduced_size = result\n",
+    "    metrics[testname][\"reduction_comparison\"][f\"{tool}-size\"] = reduced_size\n",
+    "    metrics[testname][\"reduction_comparison\"][f\"{tool}-time\"] = elapsed\n",
+    "\n",
+    "with open(\"metrics.json\", \"w\") as f:\n",
+    "    sorted_metrics = {k: metrics[k] for k in sorted(metrics)}\n",
+    "    json.dump(sorted_metrics, f, indent=4)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
diff --git a/evaluation/interesting.py b/evaluation/interesting.py
index 36eb772..19f1597 100755
--- a/evaluation/interesting.py
+++ b/evaluation/interesting.py
@@ -50,15 +50,17 @@ def run_command(engine, test_input):
 
 def crash_on_wizard():
     result = run_command(ENGINE, WASM)
+    # print('crash_on_wizard: ', result.returncode)
     return result.returncode != 0 and result.stdout.find('no main export from module') == -1
 
 def ok_on_wasmtime():
-    command = ['timeout', '1', 'wasmtime', '--invoke', 'main', WASM]
+    command = ['timeout', '5', 'wasmtime', '--invoke', 'main', WASM]
     result = subprocess.run(
         command,
         capture_output=True,
         text=True,
     )
+    # print('ok_on_wasmtime: ', result.returncode)
     return result.returncode == 0
 
 if crash_on_wizard() and ok_on_wasmtime():
diff --git a/evaluation/metrics.json b/evaluation/metrics.json
index c9b0d2e..fdbd82f 100644
--- a/evaluation/metrics.json
+++ b/evaluation/metrics.json
@@ -1847,6 +1847,15 @@
         "4558": {
             "slice-dice": 7313,
             "wasm-r3": 11075
+        },
+        "reduction_comparison": {
+            "original_size": 22264896,
+            "wasm-slice-size": "fail",
+            "wasm-slice-time": "fail",
+            "wasm-reduce-size": 22180313,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 22216362,
+            "wasm-shrink-time": 120.01304936408997
         }
     },
     "bullet": {
@@ -9175,6 +9184,15 @@
         "1968": {
             "slice-dice": 6984,
             "wasm-r3": 94225
+        },
+        "reduction_comparison": {
+            "original_size": 4331017,
+            "wasm-slice-size": 995605,
+            "wasm-slice-time": 90.20325756072998,
+            "wasm-reduce-size": 676743,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 4328380,
+            "wasm-shrink-time": 120.00192189216614
         }
     },
     "factorial": {
@@ -13425,6 +13443,15 @@
         "1586": {
             "slice-dice": 1228,
             "wasm-r3": 80029
+        },
+        "reduction_comparison": {
+            "original_size": 3725095,
+            "wasm-slice-size": 2053785,
+            "wasm-slice-time": 69.60664987564087,
+            "wasm-reduce-size": 2299553,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 3675826,
+            "wasm-shrink-time": 120.00278520584106
         }
     },
     "game-of-life": {
@@ -14105,6 +14132,15 @@
         "504": {
             "slice-dice": 446,
             "wasm-r3": 14199
+        },
+        "reduction_comparison": {
+            "original_size": 376838,
+            "wasm-slice-size": 452103,
+            "wasm-slice-time": 30.84325075149536,
+            "wasm-reduce-size": 375265,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 315993,
+            "wasm-shrink-time": 120.00191926956177
         }
     },
     "hydro": {
@@ -14319,6 +14355,15 @@
         "1586": {
             "slice-dice": 893,
             "wasm-r3": 2385
+        },
+        "reduction_comparison": {
+            "original_size": 824085,
+            "wasm-slice-size": 380686,
+            "wasm-slice-time": 6.540843486785889,
+            "wasm-reduce-size": 645689,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 734868,
+            "wasm-shrink-time": 120.00204253196716
         }
     },
     "jqkungfu": {
@@ -24091,6 +24136,15 @@
         "20796": {
             "slice-dice": 6564,
             "wasm-r3": 24387
+        },
+        "reduction_comparison": {
+            "original_size": 5470509,
+            "wasm-slice-size": 4900022,
+            "wasm-slice-time": 44.25752401351929,
+            "wasm-reduce-size": 5286120,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 5159781,
+            "wasm-shrink-time": 120.00331735610962
         }
     },
     "mandelbrot": {
@@ -24241,6 +24295,15 @@
         "164": {
             "slice-dice": 174,
             "wasm-r3": 31976
+        },
+        "reduction_comparison": {
+            "original_size": 240403,
+            "wasm-slice-size": 404385,
+            "wasm-slice-time": 97.6673104763031,
+            "wasm-reduce-size": 237739,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 232459,
+            "wasm-shrink-time": 120
         }
     },
     "multiplyDouble": {
@@ -26257,6 +26320,15 @@
         "572": {
             "slice-dice": 568,
             "wasm-r3": 11958
+        },
+        "reduction_comparison": {
+            "original_size": 502035,
+            "wasm-slice-size": 540367,
+            "wasm-slice-time": 53.18854331970215,
+            "wasm-reduce-size": 485322,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 492377,
+            "wasm-shrink-time": 120.00193786621094
         }
     },
     "rguilayout": {
@@ -27003,6 +27075,15 @@
         "502": {
             "slice-dice": 464,
             "wasm-r3": 15897
+        },
+        "reduction_comparison": {
+            "original_size": 516369,
+            "wasm-slice-size": 596183,
+            "wasm-slice-time": 31.644596576690674,
+            "wasm-reduce-size": 514791,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 514240,
+            "wasm-shrink-time": 120
         }
     },
     "rguistyler": {
@@ -27765,6 +27846,15 @@
         "573": {
             "slice-dice": 627,
             "wasm-r3": 11898
+        },
+        "reduction_comparison": {
+            "original_size": 558009,
+            "wasm-slice-size": 845480,
+            "wasm-slice-time": 32.91276717185974,
+            "wasm-reduce-size": 551339,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 546181,
+            "wasm-shrink-time": 120.00490522384644
         }
     },
     "riconpacker": {
@@ -28415,6 +28505,15 @@
         "575": {
             "slice-dice": 582,
             "wasm-r3": 1990
+        },
+        "reduction_comparison": {
+            "original_size": 486543,
+            "wasm-slice-size": 425501,
+            "wasm-slice-time": 6.774238348007202,
+            "wasm-reduce-size": 486626,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 457528,
+            "wasm-shrink-time": 120.00188422203064
         }
     },
     "rtexpacker": {
@@ -28435,6 +28534,15 @@
         "365": {
             "slice-dice": 381,
             "wasm-r3": 3042
+        },
+        "reduction_comparison": {
+            "original_size": 386612,
+            "wasm-slice-size": 209441,
+            "wasm-slice-time": 3.9366116523742676,
+            "wasm-reduce-size": 239415,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 217342,
+            "wasm-shrink-time": 120.00243353843689
         }
     },
     "sandspiel": {
@@ -31051,6 +31159,15 @@
         "1893": {
             "slice-dice": 1058,
             "wasm-r3": 3728
+        },
+        "reduction_comparison": {
+            "original_size": 712714,
+            "wasm-slice-size": 551302,
+            "wasm-slice-time": 7.06463360786438,
+            "wasm-reduce-size": 648272,
+            "wasm-reduce-time": 120,
+            "wasm-shrink-size": 664983,
+            "wasm-shrink-time": 120.00179672241211
         }
     }
 }
\ No newline at end of file
diff --git a/evaluation/run_reduction_tool.py b/evaluation/run_reduction_tool.py
index c57b6a5..2caa92e 100755
--- a/evaluation/run_reduction_tool.py
+++ b/evaluation/run_reduction_tool.py
@@ -5,6 +5,8 @@
 import subprocess
 import shutil
 
+WASMR3_PATH = os.getenv("WASMR3_PATH", "/home/wasm-r3")
+
 # Exit if BINARYEN_ROOT is not set
 if "BINARYEN_ROOT" not in os.environ:
     print("Error: BINARYEN_ROOT environment variable is not set")
@@ -31,16 +33,21 @@
     'mandelbrot': 'MODE=spc',
 }
 
-tool_to_command = {
-    "wasm-reduce": "wasm-reduce -b $BINARYEN_ROOT/bin '--command=./evaluation/interesting.py test.shrunken.wasm' -t test.shrunken.wasm -w work.shrunken.wasm",
-    "wasm-shrink": "wasm-tools shrink ./evaluation/interesting.py",
-    "wasm-slice": "wasm-slice ./evaluation/interesting.py",
-}
+def tool_to_command(tool, test_name):
+    if tool == "wasm-reduce":
+        test_path = f'./benchmarks/{test_name}/{test_name}.reduced.wasm'
+        return f"wasm-reduce -to 10 -b $BINARYEN_ROOT/bin '--command={WASMR3_PATH}/evaluation/interesting.py {test_path}' -t {test_path} -w work.reduced.wasm"
+    elif tool == "wasm-shrink":
+        return f"wasm-tools shrink {WASMR3_PATH}/evaluation/interesting.py"
+    elif tool == "wasm-slice":
+        return  f"wasm-slice {WASMR3_PATH}/evaluation/interesting.py"
+    else:
+        exit("not supported")
 
 def run_command(tool, test_input):
     test_name = os.path.splitext(os.path.basename(test_input))[0]
     mode = test_to_mode.get(test_name, '')
-    command = tool_to_command.get(tool, '')
+    command = tool_to_command(tool, test_name)
     
     if not command:
         print(f"Error: Unknown tool '{tool}'")
diff --git a/wasm-slice b/wasm-slice
index 925f1d0..bcb7fb8 100755
--- a/wasm-slice
+++ b/wasm-slice
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-import sys, subprocess, concurrent.futures, re, os, re, subprocess, csv
+import sys, subprocess, concurrent.futures, re, os, re, subprocess, csv, shutil
 
 # this takes upto 150GB of memory.
 # TODO: why?
@@ -133,6 +133,7 @@ def run_slicedice(testname, fidx):
         output = subprocess.check_output(command, shell=True, text=True)
         # TODO: make this configurable
         replay_wasm_path = f'{WASMR3_PATH}/benchmarks/{test_name}/out/{fidx}/benchmarks/bin_1/replay.wasm'
+        shutil.copy(replay_wasm_path, f'{WASMR3_PATH}/benchmarks/{test_name}/{test_name}.sliced.wasm')
         interestingness_command = [interesting_script, replay_wasm_path]
         result = subprocess.run(interestingness_command, check=False)
         test_input_size = os.path.getsize(test_input)