From 772646b95a69fbbf236cd5391032d97a9cb118f4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 17 Jun 2024 21:12:16 +0000
Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 echofilter/inference.py                       |    8 +-
 echofilter/nn/wrapper.py                      |    2 +-
 notebooks/Building Data Loader 2.ipynb        | 1360 +++++------
 notebooks/Building Data Loader 3.ipynb        | 1662 ++++++-------
 notebooks/Building Data Loader.ipynb          | 1194 ++++-----
 notebooks/Completely decomposing mask.ipynb   | 1056 ++++----
 notebooks/Data Loader - Stationary.ipynb      |  994 ++++----
 notebooks/Estimate mean and stdev.ipynb       | 1194 ++++-----
 notebooks/Finding mask all removed.ipynb      |  706 +++---
 .../Generating lines from masked csv.ipynb    | 1042 ++++----
 notebooks/Passive metadata labelling.ipynb    | 2152 ++++++++---------
 notebooks/Plot Metrics Distribution.ipynb     |  284 +--
 notebooks/Plot results.ipynb                  |  646 ++---
 notebooks/Splitting Passive Data.ipynb        | 1712 ++++++-------
 notebooks/Surface anomaly removal.ipynb       | 1146 ++++-----
 notebooks/check chunking.ipynb                |  548 ++---
 .../check making lines from masked csv.ipynb  |  414 ++--
 notebooks/check splitting passive data.ipynb  |  884 +++----
 18 files changed, 8503 insertions(+), 8501 deletions(-)

diff --git a/echofilter/inference.py b/echofilter/inference.py
index 1f3c3f41..b5b12417 100755
--- a/echofilter/inference.py
+++ b/echofilter/inference.py
@@ -703,9 +703,11 @@ def run_inference(
             print(
                 "Echoview application would{} be opened {}.".format(
                     "" if do_open else " not",
-                    "to convert EV files to CSV"
-                    if do_open
-                    else "(no EV files to process)",
+                    (
+                        "to convert EV files to CSV"
+                        if do_open
+                        else "(no EV files to process)"
+                    ),
                 )
             )
         do_open = False
diff --git a/echofilter/nn/wrapper.py b/echofilter/nn/wrapper.py
index 2abd2a3c..8e29426c 100644
--- a/echofilter/nn/wrapper.py
+++ b/echofilter/nn/wrapper.py
@@ -119,7 +119,7 @@ def __init__(
         mapping_extra = {}
         for key in mapping:
             for alias_map in self.aliases:
-                for (alias_a, alias_b) in [alias_map, alias_map[::-1]]:
+                for alias_a, alias_b in [alias_map, alias_map[::-1]]:
                     if "_" + alias_a not in key:
                         continue
                     alt_key = key.replace("_" + alias_a, "_" + alias_b)
diff --git a/notebooks/Building Data Loader 2.ipynb b/notebooks/Building Data Loader 2.ipynb
index ce320fbf..1534c03f 100644
--- a/notebooks/Building Data Loader 2.ipynb	
+++ b/notebooks/Building Data Loader 2.ipynb	
@@ -1,682 +1,682 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import csv\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from echofilter.raw.loader import evl_loader, transect_loader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce\"\n",
-    "ROOT_DATA_DIR = \"/data/dsforce\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_transect_data(\n",
-    "    transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
-    "):\n",
-    "\n",
-    "    dirname = os.path.join(root_data_dir, dataset)\n",
-    "    raw_fname = os.path.join(dirname, transect_pth + \"_Sv_raw.csv\")\n",
-    "    bot_fname = os.path.join(dirname, transect_pth + \"_bottom.evl\")\n",
-    "    top_fname = os.path.join(dirname, transect_pth + \"_turbulence.evl\")\n",
-    "\n",
-    "    timestamps, depths, signals = transect_loader(raw_fname)\n",
-    "    t_bot, d_bot = evl_loader(bot_fname)\n",
-    "    t_top, d_top = evl_loader(top_fname)\n",
-    "\n",
-    "    return (\n",
-    "        timestamps,\n",
-    "        depths,\n",
-    "        signals,\n",
-    "        np.interp(timestamps, t_top, d_top),\n",
-    "        np.interp(timestamps, t_bot, d_bot),\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_transect_data2(\n",
-    "    survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
-    "):\n",
-    "\n",
-    "    return load_transect_data(\n",
-    "        os.path.join(\n",
-    "            \"Survey{}\".format(survey), \"Survey{}_{}\".format(survey, transect_name)\n",
-    "        ),\n",
-    "        dataset=dataset,\n",
-    "        root_data_dir=root_data_dir,\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_transect_data(\n",
-    "    transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
-    "):\n",
-    "\n",
-    "    timestamps, depths, signals, d_top, d_bot = load_transect_data(\n",
-    "        transect_pth, dataset, root_data_dir\n",
-    "    )\n",
-    "\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "    plt.plot(timestamps, -d_bot, \"b\")\n",
-    "    plt.plot(timestamps, -d_top, \"c\")\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_transect_data2(\n",
-    "    survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
-    "):\n",
-    "\n",
-    "    timestamps, depths, signals, d_top, d_bot = load_transect_data2(\n",
-    "        survey, transect_name, dataset, root_data_dir\n",
-    "    )\n",
-    "\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "    plt.plot(timestamps, -d_bot, \"b\")\n",
-    "    plt.plot(timestamps, -d_top, \"c\")\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_transect_data(\"Survey17/Survey17_GR1_N0A_E\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "survey = 17\n",
-    "transect_name = \"GR1_N0A_E\"\n",
-    "plot_transect_data2(survey, transect_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_partition_data(\n",
-    "    partition,\n",
-    "    dataset=\"surveyExports\",\n",
-    "    partitioning_version=\"firstpass\",\n",
-    "    root_data_dir=ROOT_DATA_DIR,\n",
-    "):\n",
-    "\n",
-    "    dirname = os.path.join(root_data_dir, dataset, \"sets\", partitioning_version)\n",
-    "    fname_partition = os.path.join(dirname, partition + \".txt\")\n",
-    "    fname_header = os.path.join(dirname, \"header\" + \".txt\")\n",
-    "\n",
-    "    with open(fname_header, \"r\") as hf:\n",
-    "        for row in csv.reader(hf):\n",
-    "            header = [entry.strip() for entry in row]\n",
-    "            break\n",
-    "\n",
-    "    df = pd.read_csv(fname_partition, header=None, names=header)\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "get_partition_data(\"train\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_partition_list(\n",
-    "    partition,\n",
-    "    dataset=\"surveyExports\",\n",
-    "    full_path=False,\n",
-    "    partitioning_version=\"firstpass\",\n",
-    "    root_data_dir=ROOT_DATA_DIR,\n",
-    "):\n",
-    "    df = get_partition_data(\n",
-    "        partition,\n",
-    "        dataset=dataset,\n",
-    "        partitioning_version=partitioning_version,\n",
-    "        root_data_dir=root_data_dir,\n",
-    "    )\n",
-    "    fnames = df[\"Filename\"]\n",
-    "    fnames = [\n",
-    "        os.path.join(f.split(\"_\")[0], f.strip().replace(\"_Sv_raw.csv\", \"\"))\n",
-    "        for f in fnames\n",
-    "    ]\n",
-    "    if full_path:\n",
-    "        fnames = [os.path.join(root_data_dir, dataset, f) for f in fnames]\n",
-    "    return fnames"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "get_partition_list(\"train\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "get_partition_list(\"train\", full_path=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
-    "\n",
-    "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n",
-    "    fname = fname + \"_bottom.evl\"\n",
-    "    try:\n",
-    "        depths = evl_loader(fname)[1]\n",
-    "    except Exception:\n",
-    "        continue\n",
-    "    print(\n",
-    "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
-    "            os.path.split(fname)[1],\n",
-    "            min(depths),\n",
-    "            max(depths),\n",
-    "            \"*\" if max(depths) > 62 else \"\",\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
-    "\n",
-    "for fname in sorted(get_partition_list(\"validate\", full_path=True)):\n",
-    "    fname = fname + \"_bottom.evl\"\n",
-    "    try:\n",
-    "        depths = evl_loader(fname)[1]\n",
-    "    except Exception:\n",
-    "        continue\n",
-    "    print(\n",
-    "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
-    "            os.path.split(fname)[1],\n",
-    "            min(depths),\n",
-    "            max(depths),\n",
-    "            \"*\" if max(depths) > 62 else \"\",\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
-    "\n",
-    "for fname in sorted(get_partition_list(\"test\", full_path=True)):\n",
-    "    fname = fname + \"_bottom.evl\"\n",
-    "    try:\n",
-    "        depths = evl_loader(fname)[1]\n",
-    "    except Exception:\n",
-    "        continue\n",
-    "    print(\n",
-    "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
-    "            os.path.split(fname)[1],\n",
-    "            min(depths),\n",
-    "            max(depths),\n",
-    "            \"*\" if max(depths) > 62 else \"\",\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
-    "\n",
-    "for fname in sorted(get_partition_list(\"leaveout\", full_path=True)):\n",
-    "    fname = fname + \"_bottom.evl\"\n",
-    "    try:\n",
-    "        depths = evl_loader(fname)[1]\n",
-    "    except Exception:\n",
-    "        continue\n",
-    "    print(\n",
-    "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
-    "            os.path.split(fname)[1],\n",
-    "            min(depths),\n",
-    "            max(depths),\n",
-    "            \"*\" if max(depths) > 62 else \"\",\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# One weird survey\n",
-    "plot_transect_data(\"Survey17/Survey17_GR4_S3A_E\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
-    "\n",
-    "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n",
-    "    fname = fname + \"_turbulence.evl\"\n",
-    "    try:\n",
-    "        depths = evl_loader(fname)[1]\n",
-    "    except Exception:\n",
-    "        continue\n",
-    "    print(\n",
-    "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
-    "            os.path.split(fname)[1],\n",
-    "            min(depths),\n",
-    "            max(depths),\n",
-    "            \"*\" if max(depths) > 62 else \"\",\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_transect_data(\"Survey17/Survey17_GR4_N5A_E\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_transect_data(\"Survey03/Survey03_GR2_S1A_survey3\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "40, 62, 96"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_pth = \"Survey17/Survey17_GR1_S3W_F\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps, depths, signals, d_top, d_bot = load_transect_data(\n",
-    "    transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "depths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "signals"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d_top"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d_bot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps[:128], -depths[:2000], signals[:128, :2000].T)\n",
-    "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n",
-    "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dwn_sig = signals[:128, :2000].reshape(128, 200, 10).mean(-1).reshape(128, 200)\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps[:128], -depths[:2000:10], dwn_sig.T)\n",
-    "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n",
-    "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def shard_transect(\n",
-    "    transect_pth,\n",
-    "    dataset=\"surveyExports\",\n",
-    "    max_depth=100,\n",
-    "    shard_len=128,\n",
-    "    root_data_dir=ROOT_DATA_DIR,\n",
-    "):\n",
-    "    root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n",
-    "    timestamps, depths, signals, d_top, d_bot = load_transect_data(\n",
-    "        transect_pth, dataset, root_data_dir\n",
-    "    )\n",
-    "    depth_mask = depths <= 100\n",
-    "    indices = range(128, signals.shape[0], 128)\n",
-    "    dirname = os.path.join(root_shard_dir, transect_pth)\n",
-    "    os.makedirs(dirname, exist_ok=True)\n",
-    "    with open(os.path.join(dirname, \"shard_size.txt\"), \"w\") as hf:\n",
-    "        print(\"{},{}\".format(len(timestamps), shard_len), file=hf)\n",
-    "    for i, (ts_i, sig_i, top_i, bot_i) in enumerate(\n",
-    "        zip(\n",
-    "            np.split(timestamps, indices),\n",
-    "            np.split(np.single(signals[:, depth_mask]), indices),\n",
-    "            np.split(np.single(d_top), indices),\n",
-    "            np.split(np.single(d_bot), indices),\n",
-    "        )\n",
-    "    ):\n",
-    "        os.makedirs(os.path.join(dirname, str(i)), exist_ok=True)\n",
-    "        for obj, fname in (\n",
-    "            (depths[depth_mask], \"depths\"),\n",
-    "            (ts_i, \"timestamps\"),\n",
-    "            (sig_i, \"Sv\"),\n",
-    "            (top_i, \"top\"),\n",
-    "            (bot_i, \"bottom\"),\n",
-    "        ):\n",
-    "            obj.dump(os.path.join(dirname, str(i), fname + \".npy\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_transect_from_shards(\n",
-    "    transect_pth, i1=0, i2=None, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
-    "):\n",
-    "    root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n",
-    "    dirname = os.path.join(root_shard_dir, transect_pth)\n",
-    "    with open(os.path.join(dirname, \"shard_size.txt\"), \"r\") as f:\n",
-    "        n_timestamps, shard_len = f.readline().strip().split(\",\")\n",
-    "        n_timestamps = int(n_timestamps)\n",
-    "        shard_len = int(shard_len)\n",
-    "    if i2 is None:\n",
-    "        i2 = n_timestamps\n",
-    "    j1 = max(0, int(i1 / shard_len))\n",
-    "    j2 = int(min(i2, n_timestamps - 1) / shard_len)\n",
-    "\n",
-    "    depths = np.load(os.path.join(dirname, str(j1), \"depths.npy\"), allow_pickle=True)\n",
-    "\n",
-    "    def load_shard(fname):\n",
-    "        return np.concatenate(\n",
-    "            [\n",
-    "                np.load(\n",
-    "                    os.path.join(dirname, str(j), fname + \".npy\"), allow_pickle=True\n",
-    "                )\n",
-    "                for j in range(j1, j2 + 1)\n",
-    "            ]\n",
-    "        )[(i1 - j1 * shard_len) : (i2 - j1 * shard_len)]\n",
-    "\n",
-    "    timestamps = load_shard(\"timestamps\")\n",
-    "    signals = load_shard(\"Sv\")\n",
-    "    d_top = load_shard(\"top\")\n",
-    "    d_bot = load_shard(\"bottom\")\n",
-    "\n",
-    "    return timestamps, depths, signals, d_top, d_bot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "shard_transect(transect_pth)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "o = load_transect_from_shards(transect_pth)\n",
-    "for io in o:\n",
-    "    print(io.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "o = load_transect_from_shards(transect_pth, 200, 500)\n",
-    "for io in o:\n",
-    "    print(io.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps, depths, signals, d_top, d_bot = load_transect_from_shards(\n",
-    "    transect_pth, 100, 800\n",
-    ")\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "plt.plot(timestamps, -d_bot, \"b\")\n",
-    "plt.plot(timestamps, -d_top, \"c\")\n",
-    "plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import csv\n",
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import pandas as pd"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from echofilter.raw.loader import evl_loader, transect_loader"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce\"\n",
+        "ROOT_DATA_DIR = \"/data/dsforce\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def load_transect_data(\n",
+        "    transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
+        "):\n",
+        "\n",
+        "    dirname = os.path.join(root_data_dir, dataset)\n",
+        "    raw_fname = os.path.join(dirname, transect_pth + \"_Sv_raw.csv\")\n",
+        "    bot_fname = os.path.join(dirname, transect_pth + \"_bottom.evl\")\n",
+        "    top_fname = os.path.join(dirname, transect_pth + \"_turbulence.evl\")\n",
+        "\n",
+        "    timestamps, depths, signals = transect_loader(raw_fname)\n",
+        "    t_bot, d_bot = evl_loader(bot_fname)\n",
+        "    t_top, d_top = evl_loader(top_fname)\n",
+        "\n",
+        "    return (\n",
+        "        timestamps,\n",
+        "        depths,\n",
+        "        signals,\n",
+        "        np.interp(timestamps, t_top, d_top),\n",
+        "        np.interp(timestamps, t_bot, d_bot),\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def load_transect_data2(\n",
+        "    survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
+        "):\n",
+        "\n",
+        "    return load_transect_data(\n",
+        "        os.path.join(\n",
+        "            \"Survey{}\".format(survey), \"Survey{}_{}\".format(survey, transect_name)\n",
+        "        ),\n",
+        "        dataset=dataset,\n",
+        "        root_data_dir=root_data_dir,\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def plot_transect_data(\n",
+        "    transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
+        "):\n",
+        "\n",
+        "    timestamps, depths, signals, d_top, d_bot = load_transect_data(\n",
+        "        transect_pth, dataset, root_data_dir\n",
+        "    )\n",
+        "\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "    plt.plot(timestamps, -d_bot, \"b\")\n",
+        "    plt.plot(timestamps, -d_top, \"c\")\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def plot_transect_data2(\n",
+        "    survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
+        "):\n",
+        "\n",
+        "    timestamps, depths, signals, d_top, d_bot = load_transect_data2(\n",
+        "        survey, transect_name, dataset, root_data_dir\n",
+        "    )\n",
+        "\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "    plt.plot(timestamps, -d_bot, \"b\")\n",
+        "    plt.plot(timestamps, -d_top, \"c\")\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plot_transect_data(\"Survey17/Survey17_GR1_N0A_E\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "survey = 17\n",
+        "transect_name = \"GR1_N0A_E\"\n",
+        "plot_transect_data2(survey, transect_name)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def get_partition_data(\n",
+        "    partition,\n",
+        "    dataset=\"surveyExports\",\n",
+        "    partitioning_version=\"firstpass\",\n",
+        "    root_data_dir=ROOT_DATA_DIR,\n",
+        "):\n",
+        "\n",
+        "    dirname = os.path.join(root_data_dir, dataset, \"sets\", partitioning_version)\n",
+        "    fname_partition = os.path.join(dirname, partition + \".txt\")\n",
+        "    fname_header = os.path.join(dirname, \"header\" + \".txt\")\n",
+        "\n",
+        "    with open(fname_header, \"r\") as hf:\n",
+        "        for row in csv.reader(hf):\n",
+        "            header = [entry.strip() for entry in row]\n",
+        "            break\n",
+        "\n",
+        "    df = pd.read_csv(fname_partition, header=None, names=header)\n",
+        "    return df"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "get_partition_data(\"train\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def get_partition_list(\n",
+        "    partition,\n",
+        "    dataset=\"surveyExports\",\n",
+        "    full_path=False,\n",
+        "    partitioning_version=\"firstpass\",\n",
+        "    root_data_dir=ROOT_DATA_DIR,\n",
+        "):\n",
+        "    df = get_partition_data(\n",
+        "        partition,\n",
+        "        dataset=dataset,\n",
+        "        partitioning_version=partitioning_version,\n",
+        "        root_data_dir=root_data_dir,\n",
+        "    )\n",
+        "    fnames = df[\"Filename\"]\n",
+        "    fnames = [\n",
+        "        os.path.join(f.split(\"_\")[0], f.strip().replace(\"_Sv_raw.csv\", \"\"))\n",
+        "        for f in fnames\n",
+        "    ]\n",
+        "    if full_path:\n",
+        "        fnames = [os.path.join(root_data_dir, dataset, f) for f in fnames]\n",
+        "    return fnames"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "get_partition_list(\"train\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "get_partition_list(\"train\", full_path=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
+        "\n",
+        "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n",
+        "    fname = fname + \"_bottom.evl\"\n",
+        "    try:\n",
+        "        depths = evl_loader(fname)[1]\n",
+        "    except Exception:\n",
+        "        continue\n",
+        "    print(\n",
+        "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
+        "            os.path.split(fname)[1],\n",
+        "            min(depths),\n",
+        "            max(depths),\n",
+        "            \"*\" if max(depths) > 62 else \"\",\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
+        "\n",
+        "for fname in sorted(get_partition_list(\"validate\", full_path=True)):\n",
+        "    fname = fname + \"_bottom.evl\"\n",
+        "    try:\n",
+        "        depths = evl_loader(fname)[1]\n",
+        "    except Exception:\n",
+        "        continue\n",
+        "    print(\n",
+        "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
+        "            os.path.split(fname)[1],\n",
+        "            min(depths),\n",
+        "            max(depths),\n",
+        "            \"*\" if max(depths) > 62 else \"\",\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
+        "\n",
+        "for fname in sorted(get_partition_list(\"test\", full_path=True)):\n",
+        "    fname = fname + \"_bottom.evl\"\n",
+        "    try:\n",
+        "        depths = evl_loader(fname)[1]\n",
+        "    except Exception:\n",
+        "        continue\n",
+        "    print(\n",
+        "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
+        "            os.path.split(fname)[1],\n",
+        "            min(depths),\n",
+        "            max(depths),\n",
+        "            \"*\" if max(depths) > 62 else \"\",\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
+        "\n",
+        "for fname in sorted(get_partition_list(\"leaveout\", full_path=True)):\n",
+        "    fname = fname + \"_bottom.evl\"\n",
+        "    try:\n",
+        "        depths = evl_loader(fname)[1]\n",
+        "    except Exception:\n",
+        "        continue\n",
+        "    print(\n",
+        "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
+        "            os.path.split(fname)[1],\n",
+        "            min(depths),\n",
+        "            max(depths),\n",
+        "            \"*\" if max(depths) > 62 else \"\",\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# One weird survey\n",
+        "plot_transect_data(\"Survey17/Survey17_GR4_S3A_E\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n",
+        "\n",
+        "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n",
+        "    fname = fname + \"_turbulence.evl\"\n",
+        "    try:\n",
+        "        depths = evl_loader(fname)[1]\n",
+        "    except Exception:\n",
+        "        continue\n",
+        "    print(\n",
+        "        \"{:<40s}{:6.1f} {:6.1f}  {}\".format(\n",
+        "            os.path.split(fname)[1],\n",
+        "            min(depths),\n",
+        "            max(depths),\n",
+        "            \"*\" if max(depths) > 62 else \"\",\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plot_transect_data(\"Survey17/Survey17_GR4_N5A_E\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plot_transect_data(\"Survey03/Survey03_GR2_S1A_survey3\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "40, 62, 96"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_pth = \"Survey17/Survey17_GR1_S3W_F\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps, depths, signals, d_top, d_bot = load_transect_data(\n",
+        "    transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "depths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "signals"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "d_top"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "d_bot"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps[:128], -depths[:2000], signals[:128, :2000].T)\n",
+        "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n",
+        "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dwn_sig = signals[:128, :2000].reshape(128, 200, 10).mean(-1).reshape(128, 200)\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps[:128], -depths[:2000:10], dwn_sig.T)\n",
+        "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n",
+        "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def shard_transect(\n",
+        "    transect_pth,\n",
+        "    dataset=\"surveyExports\",\n",
+        "    max_depth=100,\n",
+        "    shard_len=128,\n",
+        "    root_data_dir=ROOT_DATA_DIR,\n",
+        "):\n",
+        "    root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n",
+        "    timestamps, depths, signals, d_top, d_bot = load_transect_data(\n",
+        "        transect_pth, dataset, root_data_dir\n",
+        "    )\n",
+        "    depth_mask = depths <= 100\n",
+        "    indices = range(128, signals.shape[0], 128)\n",
+        "    dirname = os.path.join(root_shard_dir, transect_pth)\n",
+        "    os.makedirs(dirname, exist_ok=True)\n",
+        "    with open(os.path.join(dirname, \"shard_size.txt\"), \"w\") as hf:\n",
+        "        print(\"{},{}\".format(len(timestamps), shard_len), file=hf)\n",
+        "    for i, (ts_i, sig_i, top_i, bot_i) in enumerate(\n",
+        "        zip(\n",
+        "            np.split(timestamps, indices),\n",
+        "            np.split(np.single(signals[:, depth_mask]), indices),\n",
+        "            np.split(np.single(d_top), indices),\n",
+        "            np.split(np.single(d_bot), indices),\n",
+        "        )\n",
+        "    ):\n",
+        "        os.makedirs(os.path.join(dirname, str(i)), exist_ok=True)\n",
+        "        for obj, fname in (\n",
+        "            (depths[depth_mask], \"depths\"),\n",
+        "            (ts_i, \"timestamps\"),\n",
+        "            (sig_i, \"Sv\"),\n",
+        "            (top_i, \"top\"),\n",
+        "            (bot_i, \"bottom\"),\n",
+        "        ):\n",
+        "            obj.dump(os.path.join(dirname, str(i), fname + \".npy\"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def load_transect_from_shards(\n",
+        "    transect_pth, i1=0, i2=None, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n",
+        "):\n",
+        "    root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n",
+        "    dirname = os.path.join(root_shard_dir, transect_pth)\n",
+        "    with open(os.path.join(dirname, \"shard_size.txt\"), \"r\") as f:\n",
+        "        n_timestamps, shard_len = f.readline().strip().split(\",\")\n",
+        "        n_timestamps = int(n_timestamps)\n",
+        "        shard_len = int(shard_len)\n",
+        "    if i2 is None:\n",
+        "        i2 = n_timestamps\n",
+        "    j1 = max(0, int(i1 / shard_len))\n",
+        "    j2 = int(min(i2, n_timestamps - 1) / shard_len)\n",
+        "\n",
+        "    depths = np.load(os.path.join(dirname, str(j1), \"depths.npy\"), allow_pickle=True)\n",
+        "\n",
+        "    def load_shard(fname):\n",
+        "        return np.concatenate(\n",
+        "            [\n",
+        "                np.load(\n",
+        "                    os.path.join(dirname, str(j), fname + \".npy\"), allow_pickle=True\n",
+        "                )\n",
+        "                for j in range(j1, j2 + 1)\n",
+        "            ]\n",
+        "        )[(i1 - j1 * shard_len) : (i2 - j1 * shard_len)]\n",
+        "\n",
+        "    timestamps = load_shard(\"timestamps\")\n",
+        "    signals = load_shard(\"Sv\")\n",
+        "    d_top = load_shard(\"top\")\n",
+        "    d_bot = load_shard(\"bottom\")\n",
+        "\n",
+        "    return timestamps, depths, signals, d_top, d_bot"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "shard_transect(transect_pth)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "o = load_transect_from_shards(transect_pth)\n",
+        "for io in o:\n",
+        "    print(io.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "o = load_transect_from_shards(transect_pth, 200, 500)\n",
+        "for io in o:\n",
+        "    print(io.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps, depths, signals, d_top, d_bot = load_transect_from_shards(\n",
+        "    transect_pth, 100, 800\n",
+        ")\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "plt.plot(timestamps, -d_bot, \"b\")\n",
+        "plt.plot(timestamps, -d_top, \"c\")\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Building Data Loader 3.ipynb b/notebooks/Building Data Loader 3.ipynb
index 7161302d..892c0436 100644
--- a/notebooks/Building Data Loader 3.ipynb	
+++ b/notebooks/Building Data Loader 3.ipynb	
@@ -1,833 +1,833 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import random"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw.shardloader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n",
-    "(\n",
-    "    timestamps,\n",
-    "    depths,\n",
-    "    signals,\n",
-    "    d_top,\n",
-    "    d_bot,\n",
-    ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n",
-    "    transect_pth,\n",
-    "    100,\n",
-    "    800,\n",
-    "    root_data_dir=ROOT_DATA_DIR,\n",
-    ")\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "plt.plot(timestamps, -d_bot, \"b\")\n",
-    "plt.plot(timestamps, -d_top, \"c\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n",
-    "(\n",
-    "    timestamps,\n",
-    "    depths,\n",
-    "    signals,\n",
-    "    d_top,\n",
-    "    d_bot,\n",
-    ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n",
-    "    transect_pth,\n",
-    "    -100,\n",
-    "    800,\n",
-    "    root_data_dir=ROOT_DATA_DIR,\n",
-    ")\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "plt.plot(timestamps, -d_bot, \"b\")\n",
-    "plt.plot(timestamps, -d_top, \"c\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n",
-    "(\n",
-    "    timestamps,\n",
-    "    depths,\n",
-    "    signals,\n",
-    "    d_top,\n",
-    "    d_bot,\n",
-    ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n",
-    "    transect_pth,\n",
-    "    0,\n",
-    "    128,\n",
-    "    root_data_dir=ROOT_DATA_DIR,\n",
-    ")\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "plt.plot(timestamps, -d_bot, \"b\")\n",
-    "plt.plot(timestamps, -d_top, \"c\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch.utils.data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TransectDataset(torch.utils.data.Dataset):\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        transect_paths,\n",
-    "        window_len=128,\n",
-    "        crop_depth=70,\n",
-    "        num_windows_per_transect=0,\n",
-    "        use_dynamic_offsets=True,\n",
-    "        transform_pre=None,\n",
-    "        transform_post=None,\n",
-    "    ):\n",
-    "        \"\"\"\n",
-    "        TransectDataset\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        transect_paths : list\n",
-    "            Absolute paths to transects.\n",
-    "        window_len : int\n",
-    "            Width (number of timestamps) to load. Default is `128`.\n",
-    "        crop_depth : float\n",
-    "            Maximum depth to include, in metres. Deeper data will be cropped away.\n",
-    "            Default is `70`.\n",
-    "        num_windows_per_transect : int\n",
-    "            Number of windows to extract for each transect. Start indices for the\n",
-    "            windows will be equally spaced across the total width of the transect.\n",
-    "            If this is `0`, the number of windows will be inferred automatically\n",
-    "            based on `window_len` and the total width of the transect, resulting\n",
-    "            in a different number of windows for each transect. Default is `0`.\n",
-    "        use_dynamic_offsets : bool\n",
-    "            Whether starting indices for each window should be randomly offset.\n",
-    "            Set to `True` for training and `False` for testing. Default is `True`.\n",
-    "        transform_pre : callable\n",
-    "            Operations to perform to the dictionary containing a single sample.\n",
-    "            These are performed before generating the masks. Default is `None`.\n",
-    "        transform_post : callable\n",
-    "            Operations to perform to the dictionary containing a single sample.\n",
-    "            These are performed after generating the masks. Default is `None`.\n",
-    "        \"\"\"\n",
-    "        super(TransectDataset, self).__init__()\n",
-    "        self.window_len = window_len\n",
-    "        self.crop_depth = crop_depth\n",
-    "        self.num_windows = num_windows_per_transect\n",
-    "        self.use_dynamic_offsets = use_dynamic_offsets\n",
-    "        self.transform_pre = transform_pre\n",
-    "        self.transform_post = transform_post\n",
-    "\n",
-    "        self.datapoints = []\n",
-    "\n",
-    "        for transect_path in transect_paths:\n",
-    "            # Lookup the number of rows in the transect\n",
-    "            # Load the sharding metadata\n",
-    "            with open(os.path.join(transect_path, \"shard_size.txt\"), \"r\") as f:\n",
-    "                n_timestamps, shard_len = f.readline().strip().split(\",\")\n",
-    "                n_timestamps = int(n_timestamps)\n",
-    "            # Generate an array for window centers within the transect\n",
-    "            # - if this is for training, we want to randomise the offsets\n",
-    "            # - if this is for validation, we want stable windows\n",
-    "            num_windows = self.num_windows\n",
-    "            if self.num_windows is None or self.num_windows == 0:\n",
-    "                # Load enough windows to include all datapoints\n",
-    "                num_windows = int(np.ceil(n_timestamps / self.window_len))\n",
-    "            centers = np.linspace(0, n_timestamps, num_windows + 1)[:num_windows]\n",
-    "            if len(centers) > 1:\n",
-    "                max_dy_offset = centers[1] - centers[0]\n",
-    "            else:\n",
-    "                max_dy_offset = n_timestamps\n",
-    "            if self.use_dynamic_offsets:\n",
-    "                centers += np.random.rand() * max_dy_offset\n",
-    "            else:\n",
-    "                centers += max_dy_offset / 2\n",
-    "            centers = np.round(centers)\n",
-    "            # Add each (transect, center) to the list for this epoch\n",
-    "            for center_idx in centers:\n",
-    "                self.datapoints.append((transect_path, int(center_idx)))\n",
-    "\n",
-    "    def __getitem__(self, index):\n",
-    "        transect_pth, center_idx = self.datapoints[index]\n",
-    "        # Load data from shards\n",
-    "        (\n",
-    "            timestamps,\n",
-    "            depths,\n",
-    "            signals,\n",
-    "            d_top,\n",
-    "            d_bot,\n",
-    "        ) = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
-    "            transect_pth,\n",
-    "            center_idx - int(self.window_len / 2),\n",
-    "            center_idx - int(self.window_len / 2) + self.window_len,\n",
-    "        )\n",
-    "        sample = {\n",
-    "            \"timestamps\": timestamps,\n",
-    "            \"depths\": depths,\n",
-    "            \"signals\": signals,\n",
-    "            \"d_top\": d_top,\n",
-    "            \"d_bot\": d_bot,\n",
-    "        }\n",
-    "        if self.transform_pre is not None:\n",
-    "            sample = self.transform_pre(sample)\n",
-    "        # Apply depth crop\n",
-    "        depth_crop_mask = sample[\"depths\"] <= self.crop_depth\n",
-    "        sample[\"depths\"] = sample[\"depths\"][depth_crop_mask]\n",
-    "        sample[\"signals\"] = sample[\"signals\"][:, depth_crop_mask]\n",
-    "        # Convert lines to masks\n",
-    "        ddepths = np.broadcast_to(sample[\"depths\"], sample[\"signals\"].shape)\n",
-    "        mask_top = np.single(ddepths < np.expand_dims(sample[\"d_top\"], -1))\n",
-    "        mask_bot = np.single(ddepths > np.expand_dims(sample[\"d_bot\"], -1))\n",
-    "        sample[\"mask_top\"] = mask_top\n",
-    "        sample[\"mask_bot\"] = mask_bot\n",
-    "        sample[\"r_top\"] = sample[\"d_top\"] / abs(\n",
-    "            sample[\"depths\"][-1] - sample[\"depths\"][0]\n",
-    "        )\n",
-    "        sample[\"r_bot\"] = sample[\"d_bot\"] / abs(\n",
-    "            sample[\"depths\"][-1] - sample[\"depths\"][0]\n",
-    "        )\n",
-    "        if self.transform_post is not None:\n",
-    "            sample = self.transform_post(sample)\n",
-    "        return sample\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        return len(self.datapoints)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_paths = [\n",
-    "    os.path.join(ROOT_DATA_DIR, \"surveyExports_sharded/Survey17/Survey17_GR1_S3W_F\")\n",
-    "] * 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset = TransectDataset(transect_paths)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset.datapoints"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample = dataset[0]\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"signals\"])\n",
-    "plt.show()\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"mask_top\"])\n",
-    "plt.show()\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"mask_bot\"])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample[\"signals\"].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "loader = torch.utils.data.DataLoader(dataset, batch_size=2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for sample in loader:\n",
-    "    print(sample[\"signals\"].shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import skimage.transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Rescale(object):\n",
-    "    \"\"\"\n",
-    "    Rescale the image(s) in a sample to a given size.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    output_size : tuple or int\n",
-    "        Desired output size. If tuple, output is matched to output_size. If int,\n",
-    "        output is square.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, output_size):\n",
-    "        assert isinstance(output_size, (int, tuple))\n",
-    "        if isinstance(output_size, int):\n",
-    "            output_size = (output_size, output_size)\n",
-    "        self.output_size = output_size\n",
-    "\n",
-    "    def __call__(self, sample):\n",
-    "\n",
-    "        for key in (\"signals\", \"mask_top\", \"mask_bot\"):\n",
-    "            if key in sample:\n",
-    "                sample[key] = skimage.transform.resize(\n",
-    "                    sample[key],\n",
-    "                    self.output_size,\n",
-    "                    clip=False,\n",
-    "                    preserve_range=False,\n",
-    "                )\n",
-    "\n",
-    "        return sample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Normalize(object):\n",
-    "    \"\"\"\n",
-    "    Normalize mean and standard deviation of image.\n",
-    "\n",
-    "    Note that changes are made inplace.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    mean : float\n",
-    "        Expected sample pixel mean.\n",
-    "    stdev : float\n",
-    "        Expected sample standard deviation of pixel intensities.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, mean, stdev):\n",
-    "        self.mean = mean\n",
-    "        self.stdev = stdev\n",
-    "\n",
-    "    def __call__(self, sample):\n",
-    "\n",
-    "        sample[\"signals\"] -= self.mean\n",
-    "        sample[\"signals\"] /= self.stdev\n",
-    "\n",
-    "        return sample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class RandomReflection(object):\n",
-    "    \"\"\"\n",
-    "    Randomly reflect a sample.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    axis : int, optional\n",
-    "        Axis to reflect. Default is 0.\n",
-    "    p : float, optional\n",
-    "        Probability of reflection. Default is 0.5.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, axis=0, p=0.5):\n",
-    "        self.axis = axis\n",
-    "        self.p = p\n",
-    "\n",
-    "    def __call__(self, sample):\n",
-    "\n",
-    "        if random.random() > self.p:\n",
-    "            # Nothing to do\n",
-    "            return sample\n",
-    "\n",
-    "        # Reflect x co-ordinates\n",
-    "        sample[\"timestamps\"] = sample[\"timestamps\"][::-1]\n",
-    "\n",
-    "        # Reflect data\n",
-    "        for key in (\"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n",
-    "            if key in sample:\n",
-    "                sample[key] = np.flip(sample[key], self.axis)\n",
-    "\n",
-    "        return sample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class RandomStretchDepth(object):\n",
-    "    \"\"\"\n",
-    "    Rescale a set of images in a sample to a given size.\n",
-    "\n",
-    "    Note that this transform doesn't change images, just the `depth`, `d_top`, and `d_bot`.\n",
-    "    Note that changes are made inplace.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    max_factor : float\n",
-    "        Maximum stretch factor. A number between `[1, 1 + max_factor]` will be generated,\n",
-    "        and the depth will either be divided or multiplied by the generated stretch\n",
-    "        factor.\n",
-    "    expected_bottom_gap : float\n",
-    "        Expected gap between actual ocean floor and target bottom line.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, max_factor, expected_bottom_gap=1):\n",
-    "        self.max_factor = max_factor\n",
-    "        self.expected_bottom_gap = expected_bottom_gap\n",
-    "\n",
-    "    def __call__(self, sample):\n",
-    "\n",
-    "        factor = random.uniform(1.0, 1.0 + self.max_factor)\n",
-    "\n",
-    "        if random.random() > 0.5:\n",
-    "            factor = 1.0 / factor\n",
-    "\n",
-    "        sample[\"d_bot\"] += self.expected_bottom_gap\n",
-    "        for key in (\"depths\", \"d_top\", \"d_bot\"):\n",
-    "            sample[key] *= factor\n",
-    "        sample[\"d_bot\"] -= self.expected_bottom_gap\n",
-    "\n",
-    "        return sample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class RandomCropWidth(object):\n",
-    "    \"\"\"\n",
-    "    Randomly crop a sample in the width dimension.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    max_crop_fraction : float\n",
-    "        Maximum amount of material to crop away, as a fraction of the total width.\n",
-    "        The `crop_fraction` will be sampled uniformly from the range\n",
-    "        `[0, max_crop_fraction]`. The crop is always centred.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, max_crop_fraction):\n",
-    "        self.max_crop_fraction = max_crop_fraction\n",
-    "\n",
-    "    def __call__(self, sample):\n",
-    "\n",
-    "        width = sample[\"signals\"].shape[0]\n",
-    "\n",
-    "        crop_fraction = random.uniform(0.0, self.max_crop_fraction)\n",
-    "        crop_amount = crop_fraction * width\n",
-    "\n",
-    "        lft = int(crop_amount / 2)\n",
-    "        rgt = lft + width - int(crop_amount)\n",
-    "\n",
-    "        # Crop data\n",
-    "        for key in (\"timestamps\", \"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n",
-    "            if key in sample:\n",
-    "                sample[key] = sample[key][lft:rgt]\n",
-    "\n",
-    "        return sample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class ColorJitter(object):\n",
-    "    \"\"\"\n",
-    "    Randomly change the brightness and contrast of a normalized image.\n",
-    "\n",
-    "    Note that changes are made inplace.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    brightness : float or tuple of float (min, max)\n",
-    "        How much to jitter brightness. `brightness_factor` is chosen uniformly from\n",
-    "        `[-brightness, brightness]`\n",
-    "        or the given `[min, max]`. `brightness_factor` is then added to the image.\n",
-    "    contrast : (float or tuple of float (min, max))\n",
-    "        How much to jitter contrast. `contrast_factor` is chosen uniformly from\n",
-    "        `[max(0, 1 - contrast), 1 + contrast]`\n",
-    "        or the given `[min, max]`. Should be non negative numbers.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, brightness=0, contrast=0):\n",
-    "        self.brightness = self._check_input(\n",
-    "            brightness,\n",
-    "            \"brightness\",\n",
-    "            center=0,\n",
-    "            bound=(float(\"-inf\"), float(\"inf\")),\n",
-    "            clip_first_on_zero=False,\n",
-    "        )\n",
-    "        self.contrast = self._check_input(contrast, \"contrast\")\n",
-    "\n",
-    "    def _check_input(\n",
-    "        self, value, name, center=1, bound=(0, float(\"inf\")), clip_first_on_zero=True\n",
-    "    ):\n",
-    "        if isinstance(value, (float, int)):\n",
-    "            if value < 0:\n",
-    "                raise ValueError(\n",
-    "                    \"If {} is a single number, it must be non negative.\".format(name)\n",
-    "                )\n",
-    "            value = [center - value, center + value]\n",
-    "            if clip_first_on_zero:\n",
-    "                value[0] = max(value[0], 0)\n",
-    "        elif isinstance(value, (tuple, list)) and len(value) == 2:\n",
-    "            if not bound[0] <= value[0] <= value[1] <= bound[1]:\n",
-    "                raise ValueError(\"{} values should be between {}\".format(name, bound))\n",
-    "        else:\n",
-    "            raise TypeError(\n",
-    "                \"{} should be a single number or a list/tuple with length 2.\".format(\n",
-    "                    name\n",
-    "                )\n",
-    "            )\n",
-    "\n",
-    "        if value[0] == value[1] == center:\n",
-    "            value = None\n",
-    "        return value\n",
-    "\n",
-    "    def __call__(self, sample):\n",
-    "        init_op = random.randint(0, 1)\n",
-    "        for i_op in range(2):\n",
-    "            op_num = (init_op + i_op) % 2\n",
-    "            if op_num == 0 and self.brightness is not None:\n",
-    "                brightness_factor = random.uniform(\n",
-    "                    self.brightness[0], self.brightness[1]\n",
-    "                )\n",
-    "                sample[\"signals\"] += brightness_factor\n",
-    "            elif op_num == 1 and self.contrast is not None:\n",
-    "                contrast_factor = random.uniform(self.contrast[0], self.contrast[1])\n",
-    "                sample[\"signals\"] *= contrast_factor\n",
-    "        return sample\n",
-    "\n",
-    "    def __repr__(self):\n",
-    "        format_string = self.__class__.__name__ + \"(\"\n",
-    "        format_string += \"brightness={0}\".format(self.brightness)\n",
-    "        format_string += \", contrast={0})\".format(self.contrast)\n",
-    "        format_string += \")\"\n",
-    "        return format_string"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torchvision.transforms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_transform_pre = torchvision.transforms.Compose(\n",
-    "    [\n",
-    "        RandomCropWidth(0.5),\n",
-    "        RandomStretchDepth(0.5),\n",
-    "        RandomReflection(),\n",
-    "    ]\n",
-    ")\n",
-    "train_transform_post = torchvision.transforms.Compose(\n",
-    "    [\n",
-    "        Rescale((128, 512)),\n",
-    "        Normalize(-70, 22),\n",
-    "        ColorJitter(0.5, 0.3),\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_train = TransectDataset(\n",
-    "    transect_paths,\n",
-    "    window_len=192,\n",
-    "    crop_depth=70,\n",
-    "    num_windows_per_transect=10,\n",
-    "    use_dynamic_offsets=True,\n",
-    "    transform_pre=train_transform_pre,\n",
-    "    transform_post=train_transform_post,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample = dataset_train[0]\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n",
-    "    -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n",
-    "    sample[\"signals\"].T,\n",
-    ")\n",
-    "plt.plot(\n",
-    "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n",
-    "    -sample[\"d_bot\"],\n",
-    "    \"b\",\n",
-    ")\n",
-    "plt.plot(\n",
-    "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n",
-    "    -sample[\"d_top\"],\n",
-    "    \"c\",\n",
-    ")\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"signals\"])\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"mask_top\"])\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"mask_bot\"])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample[\"r_top\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample[\"r_bot\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "val_transform = torchvision.transforms.Compose(\n",
-    "    [\n",
-    "        Rescale((128, 512)),\n",
-    "        Normalize(-70, 22),\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "dataset_val = TransectDataset(\n",
-    "    transect_paths,\n",
-    "    window_len=128,\n",
-    "    crop_depth=70,\n",
-    "    num_windows_per_transect=20,\n",
-    "    use_dynamic_offsets=False,\n",
-    "    transform_post=val_transform,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample = dataset_val[0]\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n",
-    "    -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n",
-    "    sample[\"signals\"].T,\n",
-    ")\n",
-    "plt.plot(\n",
-    "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n",
-    "    -sample[\"d_bot\"],\n",
-    "    \"b\",\n",
-    ")\n",
-    "plt.plot(\n",
-    "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n",
-    "    -sample[\"d_top\"],\n",
-    "    \"c\",\n",
-    ")\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"signals\"])\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"mask_top\"])\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.imshow(sample[\"mask_bot\"])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_val.datapoints"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import random"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw.shardloader"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n",
+        "(\n",
+        "    timestamps,\n",
+        "    depths,\n",
+        "    signals,\n",
+        "    d_top,\n",
+        "    d_bot,\n",
+        ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n",
+        "    transect_pth,\n",
+        "    100,\n",
+        "    800,\n",
+        "    root_data_dir=ROOT_DATA_DIR,\n",
+        ")\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "plt.plot(timestamps, -d_bot, \"b\")\n",
+        "plt.plot(timestamps, -d_top, \"c\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n",
+        "(\n",
+        "    timestamps,\n",
+        "    depths,\n",
+        "    signals,\n",
+        "    d_top,\n",
+        "    d_bot,\n",
+        ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n",
+        "    transect_pth,\n",
+        "    -100,\n",
+        "    800,\n",
+        "    root_data_dir=ROOT_DATA_DIR,\n",
+        ")\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "plt.plot(timestamps, -d_bot, \"b\")\n",
+        "plt.plot(timestamps, -d_top, \"c\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n",
+        "(\n",
+        "    timestamps,\n",
+        "    depths,\n",
+        "    signals,\n",
+        "    d_top,\n",
+        "    d_bot,\n",
+        ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n",
+        "    transect_pth,\n",
+        "    0,\n",
+        "    128,\n",
+        "    root_data_dir=ROOT_DATA_DIR,\n",
+        ")\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "plt.plot(timestamps, -d_bot, \"b\")\n",
+        "plt.plot(timestamps, -d_top, \"c\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import torch.utils.data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class TransectDataset(torch.utils.data.Dataset):\n",
+        "    def __init__(\n",
+        "        self,\n",
+        "        transect_paths,\n",
+        "        window_len=128,\n",
+        "        crop_depth=70,\n",
+        "        num_windows_per_transect=0,\n",
+        "        use_dynamic_offsets=True,\n",
+        "        transform_pre=None,\n",
+        "        transform_post=None,\n",
+        "    ):\n",
+        "        \"\"\"\n",
+        "        TransectDataset\n",
+        "\n",
+        "        Parameters\n",
+        "        ----------\n",
+        "        transect_paths : list\n",
+        "            Absolute paths to transects.\n",
+        "        window_len : int\n",
+        "            Width (number of timestamps) to load. Default is `128`.\n",
+        "        crop_depth : float\n",
+        "            Maximum depth to include, in metres. Deeper data will be cropped away.\n",
+        "            Default is `70`.\n",
+        "        num_windows_per_transect : int\n",
+        "            Number of windows to extract for each transect. Start indices for the\n",
+        "            windows will be equally spaced across the total width of the transect.\n",
+        "            If this is `0`, the number of windows will be inferred automatically\n",
+        "            based on `window_len` and the total width of the transect, resulting\n",
+        "            in a different number of windows for each transect. Default is `0`.\n",
+        "        use_dynamic_offsets : bool\n",
+        "            Whether starting indices for each window should be randomly offset.\n",
+        "            Set to `True` for training and `False` for testing. Default is `True`.\n",
+        "        transform_pre : callable\n",
+        "            Operations to perform to the dictionary containing a single sample.\n",
+        "            These are performed before generating the masks. Default is `None`.\n",
+        "        transform_post : callable\n",
+        "            Operations to perform to the dictionary containing a single sample.\n",
+        "            These are performed after generating the masks. Default is `None`.\n",
+        "        \"\"\"\n",
+        "        super(TransectDataset, self).__init__()\n",
+        "        self.window_len = window_len\n",
+        "        self.crop_depth = crop_depth\n",
+        "        self.num_windows = num_windows_per_transect\n",
+        "        self.use_dynamic_offsets = use_dynamic_offsets\n",
+        "        self.transform_pre = transform_pre\n",
+        "        self.transform_post = transform_post\n",
+        "\n",
+        "        self.datapoints = []\n",
+        "\n",
+        "        for transect_path in transect_paths:\n",
+        "            # Lookup the number of rows in the transect\n",
+        "            # Load the sharding metadata\n",
+        "            with open(os.path.join(transect_path, \"shard_size.txt\"), \"r\") as f:\n",
+        "                n_timestamps, shard_len = f.readline().strip().split(\",\")\n",
+        "                n_timestamps = int(n_timestamps)\n",
+        "            # Generate an array for window centers within the transect\n",
+        "            # - if this is for training, we want to randomise the offsets\n",
+        "            # - if this is for validation, we want stable windows\n",
+        "            num_windows = self.num_windows\n",
+        "            if self.num_windows is None or self.num_windows == 0:\n",
+        "                # Load enough windows to include all datapoints\n",
+        "                num_windows = int(np.ceil(n_timestamps / self.window_len))\n",
+        "            centers = np.linspace(0, n_timestamps, num_windows + 1)[:num_windows]\n",
+        "            if len(centers) > 1:\n",
+        "                max_dy_offset = centers[1] - centers[0]\n",
+        "            else:\n",
+        "                max_dy_offset = n_timestamps\n",
+        "            if self.use_dynamic_offsets:\n",
+        "                centers += np.random.rand() * max_dy_offset\n",
+        "            else:\n",
+        "                centers += max_dy_offset / 2\n",
+        "            centers = np.round(centers)\n",
+        "            # Add each (transect, center) to the list for this epoch\n",
+        "            for center_idx in centers:\n",
+        "                self.datapoints.append((transect_path, int(center_idx)))\n",
+        "\n",
+        "    def __getitem__(self, index):\n",
+        "        transect_pth, center_idx = self.datapoints[index]\n",
+        "        # Load data from shards\n",
+        "        (\n",
+        "            timestamps,\n",
+        "            depths,\n",
+        "            signals,\n",
+        "            d_top,\n",
+        "            d_bot,\n",
+        "        ) = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
+        "            transect_pth,\n",
+        "            center_idx - int(self.window_len / 2),\n",
+        "            center_idx - int(self.window_len / 2) + self.window_len,\n",
+        "        )\n",
+        "        sample = {\n",
+        "            \"timestamps\": timestamps,\n",
+        "            \"depths\": depths,\n",
+        "            \"signals\": signals,\n",
+        "            \"d_top\": d_top,\n",
+        "            \"d_bot\": d_bot,\n",
+        "        }\n",
+        "        if self.transform_pre is not None:\n",
+        "            sample = self.transform_pre(sample)\n",
+        "        # Apply depth crop\n",
+        "        depth_crop_mask = sample[\"depths\"] <= self.crop_depth\n",
+        "        sample[\"depths\"] = sample[\"depths\"][depth_crop_mask]\n",
+        "        sample[\"signals\"] = sample[\"signals\"][:, depth_crop_mask]\n",
+        "        # Convert lines to masks\n",
+        "        ddepths = np.broadcast_to(sample[\"depths\"], sample[\"signals\"].shape)\n",
+        "        mask_top = np.single(ddepths < np.expand_dims(sample[\"d_top\"], -1))\n",
+        "        mask_bot = np.single(ddepths > np.expand_dims(sample[\"d_bot\"], -1))\n",
+        "        sample[\"mask_top\"] = mask_top\n",
+        "        sample[\"mask_bot\"] = mask_bot\n",
+        "        sample[\"r_top\"] = sample[\"d_top\"] / abs(\n",
+        "            sample[\"depths\"][-1] - sample[\"depths\"][0]\n",
+        "        )\n",
+        "        sample[\"r_bot\"] = sample[\"d_bot\"] / abs(\n",
+        "            sample[\"depths\"][-1] - sample[\"depths\"][0]\n",
+        "        )\n",
+        "        if self.transform_post is not None:\n",
+        "            sample = self.transform_post(sample)\n",
+        "        return sample\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.datapoints)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_paths = [\n",
+        "    os.path.join(ROOT_DATA_DIR, \"surveyExports_sharded/Survey17/Survey17_GR1_S3W_F\")\n",
+        "] * 2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dataset = TransectDataset(transect_paths)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dataset.datapoints"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample = dataset[0]\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"signals\"])\n",
+        "plt.show()\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"mask_top\"])\n",
+        "plt.show()\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"mask_bot\"])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample[\"signals\"].shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "loader = torch.utils.data.DataLoader(dataset, batch_size=2)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for sample in loader:\n",
+        "    print(sample[\"signals\"].shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import skimage.transform"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class Rescale(object):\n",
+        "    \"\"\"\n",
+        "    Rescale the image(s) in a sample to a given size.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    output_size : tuple or int\n",
+        "        Desired output size. If tuple, output is matched to output_size. If int,\n",
+        "        output is square.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, output_size):\n",
+        "        assert isinstance(output_size, (int, tuple))\n",
+        "        if isinstance(output_size, int):\n",
+        "            output_size = (output_size, output_size)\n",
+        "        self.output_size = output_size\n",
+        "\n",
+        "    def __call__(self, sample):\n",
+        "\n",
+        "        for key in (\"signals\", \"mask_top\", \"mask_bot\"):\n",
+        "            if key in sample:\n",
+        "                sample[key] = skimage.transform.resize(\n",
+        "                    sample[key],\n",
+        "                    self.output_size,\n",
+        "                    clip=False,\n",
+        "                    preserve_range=False,\n",
+        "                )\n",
+        "\n",
+        "        return sample"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class Normalize(object):\n",
+        "    \"\"\"\n",
+        "    Normalize mean and standard deviation of image.\n",
+        "\n",
+        "    Note that changes are made inplace.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    mean : float\n",
+        "        Expected sample pixel mean.\n",
+        "    stdev : float\n",
+        "        Expected sample standard deviation of pixel intensities.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, mean, stdev):\n",
+        "        self.mean = mean\n",
+        "        self.stdev = stdev\n",
+        "\n",
+        "    def __call__(self, sample):\n",
+        "\n",
+        "        sample[\"signals\"] -= self.mean\n",
+        "        sample[\"signals\"] /= self.stdev\n",
+        "\n",
+        "        return sample"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class RandomReflection(object):\n",
+        "    \"\"\"\n",
+        "    Randomly reflect a sample.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    axis : int, optional\n",
+        "        Axis to reflect. Default is 0.\n",
+        "    p : float, optional\n",
+        "        Probability of reflection. Default is 0.5.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, axis=0, p=0.5):\n",
+        "        self.axis = axis\n",
+        "        self.p = p\n",
+        "\n",
+        "    def __call__(self, sample):\n",
+        "\n",
+        "        if random.random() > self.p:\n",
+        "            # Nothing to do\n",
+        "            return sample\n",
+        "\n",
+        "        # Reflect x co-ordinates\n",
+        "        sample[\"timestamps\"] = sample[\"timestamps\"][::-1]\n",
+        "\n",
+        "        # Reflect data\n",
+        "        for key in (\"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n",
+        "            if key in sample:\n",
+        "                sample[key] = np.flip(sample[key], self.axis)\n",
+        "\n",
+        "        return sample"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class RandomStretchDepth(object):\n",
+        "    \"\"\"\n",
+        "    Rescale a set of images in a sample to a given size.\n",
+        "\n",
+        "    Note that this transform doesn't change images, just the `depth`, `d_top`, and `d_bot`.\n",
+        "    Note that changes are made inplace.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    max_factor : float\n",
+        "        Maximum stretch factor. A number between `[1, 1 + max_factor]` will be generated,\n",
+        "        and the depth will either be divided or multiplied by the generated stretch\n",
+        "        factor.\n",
+        "    expected_bottom_gap : float\n",
+        "        Expected gap between actual ocean floor and target bottom line.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, max_factor, expected_bottom_gap=1):\n",
+        "        self.max_factor = max_factor\n",
+        "        self.expected_bottom_gap = expected_bottom_gap\n",
+        "\n",
+        "    def __call__(self, sample):\n",
+        "\n",
+        "        factor = random.uniform(1.0, 1.0 + self.max_factor)\n",
+        "\n",
+        "        if random.random() > 0.5:\n",
+        "            factor = 1.0 / factor\n",
+        "\n",
+        "        sample[\"d_bot\"] += self.expected_bottom_gap\n",
+        "        for key in (\"depths\", \"d_top\", \"d_bot\"):\n",
+        "            sample[key] *= factor\n",
+        "        sample[\"d_bot\"] -= self.expected_bottom_gap\n",
+        "\n",
+        "        return sample"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class RandomCropWidth(object):\n",
+        "    \"\"\"\n",
+        "    Randomly crop a sample in the width dimension.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    max_crop_fraction : float\n",
+        "        Maximum amount of material to crop away, as a fraction of the total width.\n",
+        "        The `crop_fraction` will be sampled uniformly from the range\n",
+        "        `[0, max_crop_fraction]`. The crop is always centred.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, max_crop_fraction):\n",
+        "        self.max_crop_fraction = max_crop_fraction\n",
+        "\n",
+        "    def __call__(self, sample):\n",
+        "\n",
+        "        width = sample[\"signals\"].shape[0]\n",
+        "\n",
+        "        crop_fraction = random.uniform(0.0, self.max_crop_fraction)\n",
+        "        crop_amount = crop_fraction * width\n",
+        "\n",
+        "        lft = int(crop_amount / 2)\n",
+        "        rgt = lft + width - int(crop_amount)\n",
+        "\n",
+        "        # Crop data\n",
+        "        for key in (\"timestamps\", \"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n",
+        "            if key in sample:\n",
+        "                sample[key] = sample[key][lft:rgt]\n",
+        "\n",
+        "        return sample"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "class ColorJitter(object):\n",
+        "    \"\"\"\n",
+        "    Randomly change the brightness and contrast of a normalized image.\n",
+        "\n",
+        "    Note that changes are made inplace.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    brightness : float or tuple of float (min, max)\n",
+        "        How much to jitter brightness. `brightness_factor` is chosen uniformly from\n",
+        "        `[-brightness, brightness]`\n",
+        "        or the given `[min, max]`. `brightness_factor` is then added to the image.\n",
+        "    contrast : (float or tuple of float (min, max))\n",
+        "        How much to jitter contrast. `contrast_factor` is chosen uniformly from\n",
+        "        `[max(0, 1 - contrast), 1 + contrast]`\n",
+        "        or the given `[min, max]`. Should be non negative numbers.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, brightness=0, contrast=0):\n",
+        "        self.brightness = self._check_input(\n",
+        "            brightness,\n",
+        "            \"brightness\",\n",
+        "            center=0,\n",
+        "            bound=(float(\"-inf\"), float(\"inf\")),\n",
+        "            clip_first_on_zero=False,\n",
+        "        )\n",
+        "        self.contrast = self._check_input(contrast, \"contrast\")\n",
+        "\n",
+        "    def _check_input(\n",
+        "        self, value, name, center=1, bound=(0, float(\"inf\")), clip_first_on_zero=True\n",
+        "    ):\n",
+        "        if isinstance(value, (float, int)):\n",
+        "            if value < 0:\n",
+        "                raise ValueError(\n",
+        "                    \"If {} is a single number, it must be non negative.\".format(name)\n",
+        "                )\n",
+        "            value = [center - value, center + value]\n",
+        "            if clip_first_on_zero:\n",
+        "                value[0] = max(value[0], 0)\n",
+        "        elif isinstance(value, (tuple, list)) and len(value) == 2:\n",
+        "            if not bound[0] <= value[0] <= value[1] <= bound[1]:\n",
+        "                raise ValueError(\"{} values should be between {}\".format(name, bound))\n",
+        "        else:\n",
+        "            raise TypeError(\n",
+        "                \"{} should be a single number or a list/tuple with length 2.\".format(\n",
+        "                    name\n",
+        "                )\n",
+        "            )\n",
+        "\n",
+        "        if value[0] == value[1] == center:\n",
+        "            value = None\n",
+        "        return value\n",
+        "\n",
+        "    def __call__(self, sample):\n",
+        "        init_op = random.randint(0, 1)\n",
+        "        for i_op in range(2):\n",
+        "            op_num = (init_op + i_op) % 2\n",
+        "            if op_num == 0 and self.brightness is not None:\n",
+        "                brightness_factor = random.uniform(\n",
+        "                    self.brightness[0], self.brightness[1]\n",
+        "                )\n",
+        "                sample[\"signals\"] += brightness_factor\n",
+        "            elif op_num == 1 and self.contrast is not None:\n",
+        "                contrast_factor = random.uniform(self.contrast[0], self.contrast[1])\n",
+        "                sample[\"signals\"] *= contrast_factor\n",
+        "        return sample\n",
+        "\n",
+        "    def __repr__(self):\n",
+        "        format_string = self.__class__.__name__ + \"(\"\n",
+        "        format_string += \"brightness={0}\".format(self.brightness)\n",
+        "        format_string += \", contrast={0})\".format(self.contrast)\n",
+        "        format_string += \")\"\n",
+        "        return format_string"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import torchvision.transforms"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "train_transform_pre = torchvision.transforms.Compose(\n",
+        "    [\n",
+        "        RandomCropWidth(0.5),\n",
+        "        RandomStretchDepth(0.5),\n",
+        "        RandomReflection(),\n",
+        "    ]\n",
+        ")\n",
+        "train_transform_post = torchvision.transforms.Compose(\n",
+        "    [\n",
+        "        Rescale((128, 512)),\n",
+        "        Normalize(-70, 22),\n",
+        "        ColorJitter(0.5, 0.3),\n",
+        "    ]\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dataset_train = TransectDataset(\n",
+        "    transect_paths,\n",
+        "    window_len=192,\n",
+        "    crop_depth=70,\n",
+        "    num_windows_per_transect=10,\n",
+        "    use_dynamic_offsets=True,\n",
+        "    transform_pre=train_transform_pre,\n",
+        "    transform_post=train_transform_post,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample = dataset_train[0]\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n",
+        "    -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n",
+        "    sample[\"signals\"].T,\n",
+        ")\n",
+        "plt.plot(\n",
+        "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n",
+        "    -sample[\"d_bot\"],\n",
+        "    \"b\",\n",
+        ")\n",
+        "plt.plot(\n",
+        "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n",
+        "    -sample[\"d_top\"],\n",
+        "    \"c\",\n",
+        ")\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"signals\"])\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"mask_top\"])\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"mask_bot\"])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample[\"r_top\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample[\"r_bot\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "val_transform = torchvision.transforms.Compose(\n",
+        "    [\n",
+        "        Rescale((128, 512)),\n",
+        "        Normalize(-70, 22),\n",
+        "    ]\n",
+        ")\n",
+        "\n",
+        "dataset_val = TransectDataset(\n",
+        "    transect_paths,\n",
+        "    window_len=128,\n",
+        "    crop_depth=70,\n",
+        "    num_windows_per_transect=20,\n",
+        "    use_dynamic_offsets=False,\n",
+        "    transform_post=val_transform,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample = dataset_val[0]\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n",
+        "    -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n",
+        "    sample[\"signals\"].T,\n",
+        ")\n",
+        "plt.plot(\n",
+        "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n",
+        "    -sample[\"d_bot\"],\n",
+        "    \"b\",\n",
+        ")\n",
+        "plt.plot(\n",
+        "    np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n",
+        "    -sample[\"d_top\"],\n",
+        "    \"c\",\n",
+        ")\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"signals\"])\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"mask_top\"])\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.imshow(sample[\"mask_bot\"])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dataset_val.datapoints"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Building Data Loader.ipynb b/notebooks/Building Data Loader.ipynb
index 5bd832dc..4f8c9dde 100644
--- a/notebooks/Building Data Loader.ipynb	
+++ b/notebooks/Building Data Loader.ipynb	
@@ -1,599 +1,599 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import csv\n",
-    "import datetime\n",
-    "import os\n",
-    "from collections import OrderedDict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import pandas as pd\n",
-    "import numpy as np\n",
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_dir = \"/media/scott/scratch/Datasets/dsforce/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fname = os.path.join(root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_Sv_raw.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df = pd.read_csv(fname)\n",
-    "#\n",
-    "# Can't use pandas because of inconsistent columns. Attempting to do so generates this error:\n",
-    "#\n",
-    "# ParserError: Error tokenizing data. C error: Expected 2544 fields in line 3, saw 5977"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SURVEY_FIELD_TYPES = {\n",
-    "    \"Ping_index\": int,\n",
-    "    \"Distance_gps\": float,\n",
-    "    \"Distance_vl\": float,\n",
-    "    \"Ping_date\": str,\n",
-    "    \"Ping_time\": str,\n",
-    "    \"Ping_milliseconds\": float,\n",
-    "    \"Latitude\": float,\n",
-    "    \"Longitude\": float,\n",
-    "    \"Depth_start\": float,\n",
-    "    \"Depth_stop\": float,\n",
-    "    \"Range_start\": float,\n",
-    "    \"Range_stop\": float,\n",
-    "    \"Sample_count\": int,\n",
-    "}\n",
-    "\n",
-    "\n",
-    "def survey_reader(fname):\n",
-    "    \"\"\"\n",
-    "    Creates a generator which iterates through a survey csv file.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    fname: str\n",
-    "        Path to survey CSV file.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    generator\n",
-    "        Yields a tupule of `(metadata, data)`, where metadata is a dict,\n",
-    "        and data is a `numpy.ndarray`. Each yield corresponds to a single\n",
-    "        row in the data. Every row (except for the header) is yielded.\n",
-    "    \"\"\"\n",
-    "    metadata_header = []\n",
-    "    with open(fname, \"r\", encoding=\"utf-8-sig\") as hf:\n",
-    "        for i_row, row in enumerate(csv.reader(hf)):\n",
-    "            row = [entry.strip() for entry in row]\n",
-    "            if i_row == 0:\n",
-    "                metadata_header = row\n",
-    "                continue\n",
-    "            metadata = row[: len(metadata_header)]\n",
-    "            metadata_d = OrderedDict()\n",
-    "            for k, v in zip(metadata_header, metadata):\n",
-    "                if k in SURVEY_FIELD_TYPES:\n",
-    "                    metadata_d[k] = SURVEY_FIELD_TYPES[k](v)\n",
-    "                else:\n",
-    "                    metadata_d[k] = v\n",
-    "            data = np.array([float(x) for x in row[len(metadata_header) :]])\n",
-    "            yield metadata_d, data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def count_lines(filename):\n",
-    "    \"\"\"\n",
-    "    Count the number of lines in a file.\n",
-    "\n",
-    "    Credit: https://stackoverflow.com/a/27518377\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    filename : str\n",
-    "        Path to file.\n",
-    "\n",
-    "    Returns\n",
-    "    int\n",
-    "        Number of lines in file.\n",
-    "    \"\"\"\n",
-    "    f = open(filename)\n",
-    "    lines = 0\n",
-    "    buf_size = 1024 * 1024\n",
-    "    read_f = f.read  # loop optimization\n",
-    "\n",
-    "    buf = read_f(buf_size)\n",
-    "    while buf:\n",
-    "        lines += buf.count(\"\\n\")\n",
-    "        buf = read_f(buf_size)\n",
-    "\n",
-    "    return lines"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for meta, data in survey_reader(fname):\n",
-    "    print(meta, data)\n",
-    "    break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "count_lines(fname)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def survey_loader(fname, skip_lines=1, warn_row_overflow=True):\n",
-    "    \"\"\"\n",
-    "    Loads an entire survey CSV.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    fname : str\n",
-    "        Path to survey CSV file.\n",
-    "    skip_lines : int, optional\n",
-    "        Number of initial entries to skip. Default is 1.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    numpy.ndarray\n",
-    "        Timestamps for each row, in seconds. Note: not corrected for timezone.\n",
-    "    numpy.ndarray\n",
-    "        Depth of each column, in metres.\n",
-    "    numpy.ndarray\n",
-    "        Survey signal (echo strength, units unknown).\n",
-    "    \"\"\"\n",
-    "\n",
-    "    # We remove one from the line count because of the header\n",
-    "    # which is excluded from output\n",
-    "    n_lines = count_lines(fname) - 1\n",
-    "    # n_distances = 0\n",
-    "    depth_start = None\n",
-    "    depth_stop = None\n",
-    "\n",
-    "    # Initialise output array\n",
-    "    for i_line, (meta, row) in enumerate(survey_reader(fname)):\n",
-    "        if i_line < skip_lines:\n",
-    "            continue\n",
-    "        n_depths = len(row)\n",
-    "        depth_start = meta[\"Depth_start\"]\n",
-    "        depth_stop = meta[\"Depth_stop\"]\n",
-    "        break\n",
-    "\n",
-    "    data = np.empty((n_lines - skip_lines, n_depths))\n",
-    "    timestamps = np.empty((n_lines - skip_lines))\n",
-    "    depths = np.linspace(depth_start, depth_stop, n_depths)\n",
-    "\n",
-    "    for i_line, (meta, row) in enumerate(survey_reader(fname)):\n",
-    "        if i_line < skip_lines:\n",
-    "            continue\n",
-    "        i_entry = i_line - skip_lines\n",
-    "        if warn_row_overflow and len(row) > n_depths:\n",
-    "            print(\n",
-    "                \"Row {} of {} exceeds expected n_depths of {} with {}\".format(\n",
-    "                    i_line, fname, n_depths, len(row)\n",
-    "                )\n",
-    "            )\n",
-    "        data[i_entry, :] = row[:n_depths]\n",
-    "        timestamps[i_entry] = datetime.datetime.strptime(\n",
-    "            \"{}T{}.{:06d}\".format(\n",
-    "                meta[\"Ping_date\"],\n",
-    "                meta[\"Ping_time\"],\n",
-    "                int(1000 * float(meta[\"Ping_milliseconds\"])),\n",
-    "            ),\n",
-    "            \"%Y-%m-%dT%H:%M:%S.%f\",\n",
-    "        ).timestamp()\n",
-    "\n",
-    "    # Turn NaNs into NaNs (instead of extremely negative number)\n",
-    "    data[data < -1e6] = np.nan\n",
-    "\n",
-    "    return timestamps, depths, data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fname2 = os.path.join(\n",
-    "    root_dir, \"surveyExports\", \"Survey03/Survey03_GR2_S1A_survey3_Sv_raw.csv\"\n",
-    ")\n",
-    "timestamps, depths, signals = survey_loader(fname2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps, depths, signals = survey_loader(fname)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "depths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "signals"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(signals)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.imshow(signals)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "signals.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.prod(signals.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.hist(np.reshape(signals, -1), bins=100, density=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(20, 20))\n",
-    "plt.imshow(signals.T)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def evl_reader(fname):\n",
-    "    \"\"\"\n",
-    "    EVL file reader\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    fname : str\n",
-    "        Path to .evl file.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    generator\n",
-    "        A generator which yields the timestamp (in seconds) and depth (in metres)\n",
-    "        for each entry. Note that the timestamp is not corrected for timezone\n",
-    "        (so make sure your timezones are internally consistent).\n",
-    "    \"\"\"\n",
-    "    with open(fname, \"r\") as hf:\n",
-    "        continuance = True\n",
-    "        for i_row, row in enumerate(csv.reader(hf, delimiter=\" \")):\n",
-    "            if i_row == 0:\n",
-    "                continue\n",
-    "            if len(row) < 4:\n",
-    "                if not continuance:\n",
-    "                    raise ValueError(\"Trying to skip data after parsing began\")\n",
-    "                continue\n",
-    "            continuance = False\n",
-    "\n",
-    "            timestamp = datetime.datetime.strptime(\n",
-    "                row[0] + \"T\" + row[1],\n",
-    "                \"%Y%m%dT%H%M%S%f\",\n",
-    "            ).timestamp()\n",
-    "\n",
-    "            if len(row[2]) > 0:\n",
-    "                raise ValueError(\"row[2] was non-empty: {}\".format(row[2]))\n",
-    "\n",
-    "            yield timestamp, float(row[3])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def evl_loader(fname):\n",
-    "    \"\"\"\n",
-    "    EVL file loader\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    fname : str\n",
-    "        Path to .evl file.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    numpy.ndarray\n",
-    "        Timestamps, in seconds.\n",
-    "    numpy.ndarary\n",
-    "        Depth, in metres.\n",
-    "    \"\"\"\n",
-    "    timestamps = []\n",
-    "    values = []\n",
-    "    for timestamp, value in evl_reader(fname):\n",
-    "        timestamps.append(timestamp)\n",
-    "        values.append(value)\n",
-    "    return np.array(timestamps), np.array(values)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bottom_fname = os.path.join(\n",
-    "    root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_bottom.evl\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for t, v in evl_reader(bottom_fname):\n",
-    "    print(t, v)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "evl_loader(bottom_fname)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_fname = os.path.join(\n",
-    "    root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_turbulence.evl\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "evl_loader(top_fname)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "\n",
-    "plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "\n",
-    "t_bottom, d_bottom = evl_loader(bottom_fname)\n",
-    "t_top, d_top = evl_loader(top_fname)\n",
-    "\n",
-    "plt.plot(t_bottom, -d_bottom, \"b\")\n",
-    "plt.plot(t_top, -d_top, \"c\")\n",
-    "\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_transect_data(survey, transect_name, root_dir):\n",
-    "\n",
-    "    dirname = os.path.join(root_dir, \"surveyExports\", \"Survey\" + str(survey))\n",
-    "    raw_fname = os.path.join(\n",
-    "        dirname, \"Survey{}_{}_Sv_raw.csv\".format(survey, transect_name)\n",
-    "    )\n",
-    "    bot_fname = os.path.join(\n",
-    "        dirname, \"Survey{}_{}_bottom.evl\".format(survey, transect_name)\n",
-    "    )\n",
-    "    top_fname = os.path.join(\n",
-    "        dirname, \"Survey{}_{}_turbulence.evl\".format(survey, transect_name)\n",
-    "    )\n",
-    "\n",
-    "    timestamps, depths, signals = survey_loader(raw_fname)\n",
-    "    t_bottom, d_bottom = evl_loader(bot_fname)\n",
-    "    t_top, d_top = evl_loader(top_fname)\n",
-    "\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(timestamps, -depths, signals.T)\n",
-    "    plt.plot(t_bottom, -d_bottom, \"b\")\n",
-    "    plt.plot(t_top, -d_top, \"c\")\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "survey = 17\n",
-    "transect_name = \"GR1_N0A_E\"\n",
-    "\n",
-    "plot_transect_data(survey, transect_name, root_dir)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "survey = 17\n",
-    "transect_name = \"GR1_N2W_E\"\n",
-    "\n",
-    "plot_transect_data(survey, transect_name, root_dir)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "survey = 17\n",
-    "transect_name = \"GR1_N3A_F\"\n",
-    "\n",
-    "plot_transect_data(survey, transect_name, root_dir)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import csv\n",
+        "import datetime\n",
+        "import os\n",
+        "from collections import OrderedDict"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# import pandas as pd\n",
+        "import numpy as np\n",
+        "import torch"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_dir = \"/media/scott/scratch/Datasets/dsforce/\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "fname = os.path.join(root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_Sv_raw.csv\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# df = pd.read_csv(fname)\n",
+        "#\n",
+        "# Can't use pandas because of inconsistent columns. Attempting to do so generates this error:\n",
+        "#\n",
+        "# ParserError: Error tokenizing data. C error: Expected 2544 fields in line 3, saw 5977"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "SURVEY_FIELD_TYPES = {\n",
+        "    \"Ping_index\": int,\n",
+        "    \"Distance_gps\": float,\n",
+        "    \"Distance_vl\": float,\n",
+        "    \"Ping_date\": str,\n",
+        "    \"Ping_time\": str,\n",
+        "    \"Ping_milliseconds\": float,\n",
+        "    \"Latitude\": float,\n",
+        "    \"Longitude\": float,\n",
+        "    \"Depth_start\": float,\n",
+        "    \"Depth_stop\": float,\n",
+        "    \"Range_start\": float,\n",
+        "    \"Range_stop\": float,\n",
+        "    \"Sample_count\": int,\n",
+        "}\n",
+        "\n",
+        "\n",
+        "def survey_reader(fname):\n",
+        "    \"\"\"\n",
+        "    Creates a generator which iterates through a survey csv file.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    fname: str\n",
+        "        Path to survey CSV file.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    generator\n",
+        "        Yields a tupule of `(metadata, data)`, where metadata is a dict,\n",
+        "        and data is a `numpy.ndarray`. Each yield corresponds to a single\n",
+        "        row in the data. Every row (except for the header) is yielded.\n",
+        "    \"\"\"\n",
+        "    metadata_header = []\n",
+        "    with open(fname, \"r\", encoding=\"utf-8-sig\") as hf:\n",
+        "        for i_row, row in enumerate(csv.reader(hf)):\n",
+        "            row = [entry.strip() for entry in row]\n",
+        "            if i_row == 0:\n",
+        "                metadata_header = row\n",
+        "                continue\n",
+        "            metadata = row[: len(metadata_header)]\n",
+        "            metadata_d = OrderedDict()\n",
+        "            for k, v in zip(metadata_header, metadata):\n",
+        "                if k in SURVEY_FIELD_TYPES:\n",
+        "                    metadata_d[k] = SURVEY_FIELD_TYPES[k](v)\n",
+        "                else:\n",
+        "                    metadata_d[k] = v\n",
+        "            data = np.array([float(x) for x in row[len(metadata_header) :]])\n",
+        "            yield metadata_d, data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def count_lines(filename):\n",
+        "    \"\"\"\n",
+        "    Count the number of lines in a file.\n",
+        "\n",
+        "    Credit: https://stackoverflow.com/a/27518377\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    filename : str\n",
+        "        Path to file.\n",
+        "\n",
+        "    Returns\n",
+        "    int\n",
+        "        Number of lines in file.\n",
+        "    \"\"\"\n",
+        "    f = open(filename)\n",
+        "    lines = 0\n",
+        "    buf_size = 1024 * 1024\n",
+        "    read_f = f.read  # loop optimization\n",
+        "\n",
+        "    buf = read_f(buf_size)\n",
+        "    while buf:\n",
+        "        lines += buf.count(\"\\n\")\n",
+        "        buf = read_f(buf_size)\n",
+        "\n",
+        "    return lines"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for meta, data in survey_reader(fname):\n",
+        "    print(meta, data)\n",
+        "    break"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "count_lines(fname)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def survey_loader(fname, skip_lines=1, warn_row_overflow=True):\n",
+        "    \"\"\"\n",
+        "    Loads an entire survey CSV.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    fname : str\n",
+        "        Path to survey CSV file.\n",
+        "    skip_lines : int, optional\n",
+        "        Number of initial entries to skip. Default is 1.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    numpy.ndarray\n",
+        "        Timestamps for each row, in seconds. Note: not corrected for timezone.\n",
+        "    numpy.ndarray\n",
+        "        Depth of each column, in metres.\n",
+        "    numpy.ndarray\n",
+        "        Survey signal (echo strength, units unknown).\n",
+        "    \"\"\"\n",
+        "\n",
+        "    # We remove one from the line count because of the header\n",
+        "    # which is excluded from output\n",
+        "    n_lines = count_lines(fname) - 1\n",
+        "    # n_distances = 0\n",
+        "    depth_start = None\n",
+        "    depth_stop = None\n",
+        "\n",
+        "    # Initialise output array\n",
+        "    for i_line, (meta, row) in enumerate(survey_reader(fname)):\n",
+        "        if i_line < skip_lines:\n",
+        "            continue\n",
+        "        n_depths = len(row)\n",
+        "        depth_start = meta[\"Depth_start\"]\n",
+        "        depth_stop = meta[\"Depth_stop\"]\n",
+        "        break\n",
+        "\n",
+        "    data = np.empty((n_lines - skip_lines, n_depths))\n",
+        "    timestamps = np.empty((n_lines - skip_lines))\n",
+        "    depths = np.linspace(depth_start, depth_stop, n_depths)\n",
+        "\n",
+        "    for i_line, (meta, row) in enumerate(survey_reader(fname)):\n",
+        "        if i_line < skip_lines:\n",
+        "            continue\n",
+        "        i_entry = i_line - skip_lines\n",
+        "        if warn_row_overflow and len(row) > n_depths:\n",
+        "            print(\n",
+        "                \"Row {} of {} exceeds expected n_depths of {} with {}\".format(\n",
+        "                    i_line, fname, n_depths, len(row)\n",
+        "                )\n",
+        "            )\n",
+        "        data[i_entry, :] = row[:n_depths]\n",
+        "        timestamps[i_entry] = datetime.datetime.strptime(\n",
+        "            \"{}T{}.{:06d}\".format(\n",
+        "                meta[\"Ping_date\"],\n",
+        "                meta[\"Ping_time\"],\n",
+        "                int(1000 * float(meta[\"Ping_milliseconds\"])),\n",
+        "            ),\n",
+        "            \"%Y-%m-%dT%H:%M:%S.%f\",\n",
+        "        ).timestamp()\n",
+        "\n",
+        "    # Turn NaNs into NaNs (instead of extremely negative number)\n",
+        "    data[data < -1e6] = np.nan\n",
+        "\n",
+        "    return timestamps, depths, data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "fname2 = os.path.join(\n",
+        "    root_dir, \"surveyExports\", \"Survey03/Survey03_GR2_S1A_survey3_Sv_raw.csv\"\n",
+        ")\n",
+        "timestamps, depths, signals = survey_loader(fname2)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps, depths, signals = survey_loader(fname)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "depths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "signals"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "len(signals)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.imshow(signals)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "signals.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.prod(signals.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.hist(np.reshape(signals, -1), bins=100, density=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(20, 20))\n",
+        "plt.imshow(signals.T)\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def evl_reader(fname):\n",
+        "    \"\"\"\n",
+        "    EVL file reader\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    fname : str\n",
+        "        Path to .evl file.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    generator\n",
+        "        A generator which yields the timestamp (in seconds) and depth (in metres)\n",
+        "        for each entry. Note that the timestamp is not corrected for timezone\n",
+        "        (so make sure your timezones are internally consistent).\n",
+        "    \"\"\"\n",
+        "    with open(fname, \"r\") as hf:\n",
+        "        continuance = True\n",
+        "        for i_row, row in enumerate(csv.reader(hf, delimiter=\" \")):\n",
+        "            if i_row == 0:\n",
+        "                continue\n",
+        "            if len(row) < 4:\n",
+        "                if not continuance:\n",
+        "                    raise ValueError(\"Trying to skip data after parsing began\")\n",
+        "                continue\n",
+        "            continuance = False\n",
+        "\n",
+        "            timestamp = datetime.datetime.strptime(\n",
+        "                row[0] + \"T\" + row[1],\n",
+        "                \"%Y%m%dT%H%M%S%f\",\n",
+        "            ).timestamp()\n",
+        "\n",
+        "            if len(row[2]) > 0:\n",
+        "                raise ValueError(\"row[2] was non-empty: {}\".format(row[2]))\n",
+        "\n",
+        "            yield timestamp, float(row[3])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def evl_loader(fname):\n",
+        "    \"\"\"\n",
+        "    EVL file loader\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    fname : str\n",
+        "        Path to .evl file.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    numpy.ndarray\n",
+        "        Timestamps, in seconds.\n",
+        "    numpy.ndarary\n",
+        "        Depth, in metres.\n",
+        "    \"\"\"\n",
+        "    timestamps = []\n",
+        "    values = []\n",
+        "    for timestamp, value in evl_reader(fname):\n",
+        "        timestamps.append(timestamp)\n",
+        "        values.append(value)\n",
+        "    return np.array(timestamps), np.array(values)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "bottom_fname = os.path.join(\n",
+        "    root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_bottom.evl\"\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for t, v in evl_reader(bottom_fname):\n",
+        "    print(t, v)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "evl_loader(bottom_fname)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "top_fname = os.path.join(\n",
+        "    root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_turbulence.evl\"\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "evl_loader(top_fname)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "\n",
+        "plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "\n",
+        "t_bottom, d_bottom = evl_loader(bottom_fname)\n",
+        "t_top, d_top = evl_loader(top_fname)\n",
+        "\n",
+        "plt.plot(t_bottom, -d_bottom, \"b\")\n",
+        "plt.plot(t_top, -d_top, \"c\")\n",
+        "\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def plot_transect_data(survey, transect_name, root_dir):\n",
+        "\n",
+        "    dirname = os.path.join(root_dir, \"surveyExports\", \"Survey\" + str(survey))\n",
+        "    raw_fname = os.path.join(\n",
+        "        dirname, \"Survey{}_{}_Sv_raw.csv\".format(survey, transect_name)\n",
+        "    )\n",
+        "    bot_fname = os.path.join(\n",
+        "        dirname, \"Survey{}_{}_bottom.evl\".format(survey, transect_name)\n",
+        "    )\n",
+        "    top_fname = os.path.join(\n",
+        "        dirname, \"Survey{}_{}_turbulence.evl\".format(survey, transect_name)\n",
+        "    )\n",
+        "\n",
+        "    timestamps, depths, signals = survey_loader(raw_fname)\n",
+        "    t_bottom, d_bottom = evl_loader(bot_fname)\n",
+        "    t_top, d_top = evl_loader(top_fname)\n",
+        "\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(timestamps, -depths, signals.T)\n",
+        "    plt.plot(t_bottom, -d_bottom, \"b\")\n",
+        "    plt.plot(t_top, -d_top, \"c\")\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "survey = 17\n",
+        "transect_name = \"GR1_N0A_E\"\n",
+        "\n",
+        "plot_transect_data(survey, transect_name, root_dir)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "survey = 17\n",
+        "transect_name = \"GR1_N2W_E\"\n",
+        "\n",
+        "plot_transect_data(survey, transect_name, root_dir)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "survey = 17\n",
+        "transect_name = \"GR1_N3A_F\"\n",
+        "\n",
+        "plot_transect_data(survey, transect_name, root_dir)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Completely decomposing mask.ipynb b/notebooks/Completely decomposing mask.ipynb
index 40fe328a..2d2d5073 100644
--- a/notebooks/Completely decomposing mask.ipynb	
+++ b/notebooks/Completely decomposing mask.ipynb	
@@ -1,531 +1,531 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import copy\n",
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00dd00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "\n",
+        "# example with only passive period\n",
+        "# sample = 'mobile/Survey17/Survey17_GR4_T1W_E'\n",
+        "\n",
+        "# example with 1 passive period, 1 turbulence cut out\n",
+        "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n",
+        "\n",
+        "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
+        "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n",
+        "\n",
+        "# example with passive, removed, and patches\n",
+        "sample = \"mobile/Survey16/Survey16_GR1_N3A_F\"\n",
+        "\n",
+        "# example with passive, removed, and patches\n",
+        "sample = \"mobile/Survey16/Survey16_GR3_N3A_F\"\n",
+        "\n",
+        "# example where passive detection went wrong\n",
+        "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n",
+        "\n",
+        "# Load raw data\n",
+        "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n",
+        "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n",
+        "\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
+        "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
+        "mask = ~np.isnan(signals_mskd)\n",
+        "\n",
+        "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n",
+        "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n",
+        "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n",
+        "if os.path.isfile(fname_top1):\n",
+        "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n",
+        "elif os.path.isfile(fname_top2):\n",
+        "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n",
+        "else:\n",
+        "    t_top = d_top = None\n",
+        "if os.path.isfile(fname_bot):\n",
+        "    t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n",
+        "else:\n",
+        "    t_bot = d_bot = None"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "(\n",
+        "    d_top_new,\n",
+        "    d_bot_new,\n",
+        "    passive_starts,\n",
+        "    passive_ends,\n",
+        ") = echofilter.raw.manipulate.fixup_lines(\n",
+        "    ts_raw,\n",
+        "    depths_raw,\n",
+        "    signals_raw,\n",
+        "    mask,\n",
+        "    t_top=t_top,\n",
+        "    d_top=d_top,\n",
+        "    t_bot=t_bot,\n",
+        "    d_bot=d_bot,\n",
+        "    return_passive_boundaries=True,\n",
+        ")\n",
+        "ts_new = ts_raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "t_top[-1] - t_top[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_raw[-1] - ts_raw[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_mskd[-1] - ts_mskd[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.plot(t_top, d_top)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "is_passive = np.zeros(ts_raw.shape, dtype=bool)\n",
+        "\n",
+        "for pass_start, pass_end in zip(passive_starts, passive_ends):\n",
+        "    is_passive[pass_start:pass_end] = True"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "allnan = np.all(np.isnan(signals_mskd), axis=1)\n",
+        "\n",
+        "is_removed = allnan & ~is_passive\n",
+        "\n",
+        "removed_starts = np.nonzero(np.diff(is_removed.astype(np.float)) > 0)[0]\n",
+        "removed_ends = np.nonzero(np.diff(is_removed.astype(np.float)) < 0)[0]\n",
+        "\n",
+        "if len(removed_starts) > 0:\n",
+        "    removed_starts += 1\n",
+        "if len(removed_ends) > 0:\n",
+        "    removed_ends += 1\n",
+        "\n",
+        "if len(removed_ends) > 0 and (\n",
+        "    len(removed_starts) == 0 or removed_ends[0] < removed_starts[0]\n",
+        "):\n",
+        "    removed_starts = np.concatenate(([0], removed_starts))\n",
+        "\n",
+        "if len(removed_starts) > 0 and (\n",
+        "    len(removed_ends) == 0 or removed_starts[-1] > removed_ends[-1]\n",
+        "):\n",
+        "    removed_ends = np.concatenate((removed_ends, [len(is_removed)]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Plot with time on x-axis\n",
+        "for ts, depths, signals in (\n",
+        "    (ts_raw, depths_raw, signals_raw),\n",
+        "    (ts_mskd, depths_mskd, signals_mskd),\n",
+        "):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(ts, depths, signals.T)\n",
+        "    if d_top is not None:\n",
+        "        li = t_top <= ts[-1]\n",
+        "        eliminated_line_points = np.sum(~li)\n",
+        "        if eliminated_line_points > 1:\n",
+        "            print(\n",
+        "                \"Removed {} point from top line which extend past signal recording\".format(\n",
+        "                    eliminated_line_points\n",
+        "                )\n",
+        "            )\n",
+        "        plt.plot(t_top[li], d_top[li], turbulence_color)\n",
+        "    if d_bot is not None:\n",
+        "        plt.plot(t_bot, d_bot, bottom_color)\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.xlabel(\"Timestamp (s)\")\n",
+        "    plt.ylabel(\"Depth (m)\")\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Plot with index on x-axis\n",
+        "for ts, depths, signals in (\n",
+        "    (np.arange(signals_raw.shape[0]), depths_raw, signals_raw),\n",
+        "    (np.arange(signals_raw.shape[0]), depths_mskd, signals_mskd),\n",
+        "):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(ts, depths, signals.T)\n",
+        "    if d_top is not None:\n",
+        "        plt.plot(ts, np.interp(ts_raw, t_top, d_top), turbulence_color)\n",
+        "    if d_bot is not None:\n",
+        "        plt.plot(ts, np.interp(ts_raw, t_bot, d_bot), bottom_color)\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.xlabel(\"Sample index\")\n",
+        "    plt.ylabel(\"Depth (m)\")\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "bad_mask = np.ones(signals_raw.shape, dtype=bool)\n",
+        "\n",
+        "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n",
+        "\n",
+        "is_top = ddepths < np.expand_dims(np.interp(ts_raw, t_top, d_top), -1)\n",
+        "bad_mask[is_top] = False\n",
+        "\n",
+        "if d_bot is not None:\n",
+        "    is_bot = ddepths > np.expand_dims(np.interp(ts_raw, t_bot, d_bot), -1)\n",
+        "    bad_mask[is_bot] = False\n",
+        "\n",
+        "signals_badly = copy.deepcopy(signals_raw)\n",
+        "signals_badly[~bad_mask] = np.NaN"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, signals_badly.T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.title(\"Bad mask (old)\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for ts, depths, signals in (\n",
+        "    (ts_raw, depths_raw, signals_raw),\n",
+        "    (ts_mskd, depths_mskd, signals_mskd),\n",
+        "):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(ts, depths, signals.T)\n",
+        "    plt.plot(ts_new, d_top_new, turbulence_color)\n",
+        "    plt.plot(ts_new, d_bot_new, bottom_color)\n",
+        "\n",
+        "    for r_start, r_end in zip(passive_starts, passive_ends):\n",
+        "        plt.fill_between(\n",
+        "            ts_raw[[r_start, r_end]],\n",
+        "            depths_raw[[0, 0]],\n",
+        "            depths_raw[[-1, -1]],\n",
+        "            facecolor=\"none\",\n",
+        "            hatch=\"//\",\n",
+        "            edgecolor=\"k\",\n",
+        "            linewidth=0.0,\n",
+        "        )\n",
+        "\n",
+        "    for r_start, r_end in zip(removed_starts, removed_ends):\n",
+        "        plt.fill_between(\n",
+        "            ts_raw[[r_start, r_end]],\n",
+        "            depths_raw[[0, 0]],\n",
+        "            depths_raw[[-1, -1]],\n",
+        "            facecolor=\"none\",\n",
+        "            hatch=\"\\\\\\\\\",\n",
+        "            edgecolor=[0, 0, 1],\n",
+        "            linewidth=0.0,\n",
+        "        )\n",
+        "\n",
+        "    plt.xlabel(\"Timestamp (s)\")\n",
+        "    plt.ylabel(\"Depth (m)\")\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Recompose mask from parts"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "new_mask = np.ones(signals_raw.shape, dtype=bool)\n",
+        "\n",
+        "new_mask[is_passive, :] = False\n",
+        "new_mask[is_removed, :] = False\n",
+        "\n",
+        "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n",
+        "is_top = ddepths < np.expand_dims(np.nan_to_num(d_top_new), -1)\n",
+        "is_bot = ddepths > np.expand_dims(np.nan_to_num(d_bot_new), -1)\n",
+        "\n",
+        "new_mask[is_top] = False\n",
+        "new_mask[is_bot] = False"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.title(\"Raw data\")\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, signals_mskd.T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.title(\"Masked data\")\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, mask.T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.title(\"Original mask\")\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, new_mask.T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.title(\"Decomposed and recomposed mask\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Check recomposed mask matches the original mask"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, (np.single(mask) - np.single(new_mask)).T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.nonzero(np.single(mask) - np.single(new_mask))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.max(np.single(mask) - np.single(new_mask))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.min(np.single(mask) - np.single(new_mask))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for idx in np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0]):\n",
+        "    for data in (signals_raw, mask, new_mask):\n",
+        "        plt.figure(figsize=(12, 12))\n",
+        "        plt.pcolormesh(\n",
+        "            ts_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5)],\n",
+        "            depths_raw[:80],\n",
+        "            data[max(0, idx - 4) : min(len(ts_raw), idx + 5), :80].T,\n",
+        "        )\n",
+        "        plt.gca().invert_yaxis()\n",
+        "        plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Visually inspect starts and ends of removed segments"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for idx in np.concatenate((passive_starts, passive_ends, removed_starts, removed_ends)):\n",
+        "    for data in (signals_raw, mask, new_mask):\n",
+        "        plt.figure(figsize=(12, 12))\n",
+        "        plt.pcolormesh(\n",
+        "            ts_raw[idx - 4 : idx + 5], depths_raw[:500], data[idx - 4 : idx + 5, :500].T\n",
+        "        )\n",
+        "        plt.gca().invert_yaxis()\n",
+        "        plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import copy\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00dd00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "\n",
-    "# example with only passive period\n",
-    "# sample = 'mobile/Survey17/Survey17_GR4_T1W_E'\n",
-    "\n",
-    "# example with 1 passive period, 1 turbulence cut out\n",
-    "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n",
-    "\n",
-    "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
-    "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n",
-    "\n",
-    "# example with passive, removed, and patches\n",
-    "sample = \"mobile/Survey16/Survey16_GR1_N3A_F\"\n",
-    "\n",
-    "# example with passive, removed, and patches\n",
-    "sample = \"mobile/Survey16/Survey16_GR3_N3A_F\"\n",
-    "\n",
-    "# example where passive detection went wrong\n",
-    "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n",
-    "\n",
-    "# Load raw data\n",
-    "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n",
-    "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n",
-    "\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
-    "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
-    "mask = ~np.isnan(signals_mskd)\n",
-    "\n",
-    "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n",
-    "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n",
-    "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n",
-    "if os.path.isfile(fname_top1):\n",
-    "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n",
-    "elif os.path.isfile(fname_top2):\n",
-    "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n",
-    "else:\n",
-    "    t_top = d_top = None\n",
-    "if os.path.isfile(fname_bot):\n",
-    "    t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n",
-    "else:\n",
-    "    t_bot = d_bot = None"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "(\n",
-    "    d_top_new,\n",
-    "    d_bot_new,\n",
-    "    passive_starts,\n",
-    "    passive_ends,\n",
-    ") = echofilter.raw.manipulate.fixup_lines(\n",
-    "    ts_raw,\n",
-    "    depths_raw,\n",
-    "    signals_raw,\n",
-    "    mask,\n",
-    "    t_top=t_top,\n",
-    "    d_top=d_top,\n",
-    "    t_bot=t_bot,\n",
-    "    d_bot=d_bot,\n",
-    "    return_passive_boundaries=True,\n",
-    ")\n",
-    "ts_new = ts_raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t_top[-1] - t_top[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_raw[-1] - ts_raw[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_mskd[-1] - ts_mskd[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(t_top, d_top)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "is_passive = np.zeros(ts_raw.shape, dtype=bool)\n",
-    "\n",
-    "for pass_start, pass_end in zip(passive_starts, passive_ends):\n",
-    "    is_passive[pass_start:pass_end] = True"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "allnan = np.all(np.isnan(signals_mskd), axis=1)\n",
-    "\n",
-    "is_removed = allnan & ~is_passive\n",
-    "\n",
-    "removed_starts = np.nonzero(np.diff(is_removed.astype(np.float)) > 0)[0]\n",
-    "removed_ends = np.nonzero(np.diff(is_removed.astype(np.float)) < 0)[0]\n",
-    "\n",
-    "if len(removed_starts) > 0:\n",
-    "    removed_starts += 1\n",
-    "if len(removed_ends) > 0:\n",
-    "    removed_ends += 1\n",
-    "\n",
-    "if len(removed_ends) > 0 and (\n",
-    "    len(removed_starts) == 0 or removed_ends[0] < removed_starts[0]\n",
-    "):\n",
-    "    removed_starts = np.concatenate(([0], removed_starts))\n",
-    "\n",
-    "if len(removed_starts) > 0 and (\n",
-    "    len(removed_ends) == 0 or removed_starts[-1] > removed_ends[-1]\n",
-    "):\n",
-    "    removed_ends = np.concatenate((removed_ends, [len(is_removed)]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Plot with time on x-axis\n",
-    "for ts, depths, signals in (\n",
-    "    (ts_raw, depths_raw, signals_raw),\n",
-    "    (ts_mskd, depths_mskd, signals_mskd),\n",
-    "):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(ts, depths, signals.T)\n",
-    "    if d_top is not None:\n",
-    "        li = t_top <= ts[-1]\n",
-    "        eliminated_line_points = np.sum(~li)\n",
-    "        if eliminated_line_points > 1:\n",
-    "            print(\n",
-    "                \"Removed {} point from top line which extend past signal recording\".format(\n",
-    "                    eliminated_line_points\n",
-    "                )\n",
-    "            )\n",
-    "        plt.plot(t_top[li], d_top[li], turbulence_color)\n",
-    "    if d_bot is not None:\n",
-    "        plt.plot(t_bot, d_bot, bottom_color)\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.xlabel(\"Timestamp (s)\")\n",
-    "    plt.ylabel(\"Depth (m)\")\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Plot with index on x-axis\n",
-    "for ts, depths, signals in (\n",
-    "    (np.arange(signals_raw.shape[0]), depths_raw, signals_raw),\n",
-    "    (np.arange(signals_raw.shape[0]), depths_mskd, signals_mskd),\n",
-    "):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(ts, depths, signals.T)\n",
-    "    if d_top is not None:\n",
-    "        plt.plot(ts, np.interp(ts_raw, t_top, d_top), turbulence_color)\n",
-    "    if d_bot is not None:\n",
-    "        plt.plot(ts, np.interp(ts_raw, t_bot, d_bot), bottom_color)\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.xlabel(\"Sample index\")\n",
-    "    plt.ylabel(\"Depth (m)\")\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bad_mask = np.ones(signals_raw.shape, dtype=bool)\n",
-    "\n",
-    "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n",
-    "\n",
-    "is_top = ddepths < np.expand_dims(np.interp(ts_raw, t_top, d_top), -1)\n",
-    "bad_mask[is_top] = False\n",
-    "\n",
-    "if d_bot is not None:\n",
-    "    is_bot = ddepths > np.expand_dims(np.interp(ts_raw, t_bot, d_bot), -1)\n",
-    "    bad_mask[is_bot] = False\n",
-    "\n",
-    "signals_badly = copy.deepcopy(signals_raw)\n",
-    "signals_badly[~bad_mask] = np.NaN"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, signals_badly.T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.title(\"Bad mask (old)\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for ts, depths, signals in (\n",
-    "    (ts_raw, depths_raw, signals_raw),\n",
-    "    (ts_mskd, depths_mskd, signals_mskd),\n",
-    "):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(ts, depths, signals.T)\n",
-    "    plt.plot(ts_new, d_top_new, turbulence_color)\n",
-    "    plt.plot(ts_new, d_bot_new, bottom_color)\n",
-    "\n",
-    "    for r_start, r_end in zip(passive_starts, passive_ends):\n",
-    "        plt.fill_between(\n",
-    "            ts_raw[[r_start, r_end]],\n",
-    "            depths_raw[[0, 0]],\n",
-    "            depths_raw[[-1, -1]],\n",
-    "            facecolor=\"none\",\n",
-    "            hatch=\"//\",\n",
-    "            edgecolor=\"k\",\n",
-    "            linewidth=0.0,\n",
-    "        )\n",
-    "\n",
-    "    for r_start, r_end in zip(removed_starts, removed_ends):\n",
-    "        plt.fill_between(\n",
-    "            ts_raw[[r_start, r_end]],\n",
-    "            depths_raw[[0, 0]],\n",
-    "            depths_raw[[-1, -1]],\n",
-    "            facecolor=\"none\",\n",
-    "            hatch=\"\\\\\\\\\",\n",
-    "            edgecolor=[0, 0, 1],\n",
-    "            linewidth=0.0,\n",
-    "        )\n",
-    "\n",
-    "    plt.xlabel(\"Timestamp (s)\")\n",
-    "    plt.ylabel(\"Depth (m)\")\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Recompose mask from parts"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_mask = np.ones(signals_raw.shape, dtype=bool)\n",
-    "\n",
-    "new_mask[is_passive, :] = False\n",
-    "new_mask[is_removed, :] = False\n",
-    "\n",
-    "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n",
-    "is_top = ddepths < np.expand_dims(np.nan_to_num(d_top_new), -1)\n",
-    "is_bot = ddepths > np.expand_dims(np.nan_to_num(d_bot_new), -1)\n",
-    "\n",
-    "new_mask[is_top] = False\n",
-    "new_mask[is_bot] = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.title(\"Raw data\")\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, signals_mskd.T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.title(\"Masked data\")\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, mask.T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.title(\"Original mask\")\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, new_mask.T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.title(\"Decomposed and recomposed mask\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Check recomposed mask matches the original mask"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, (np.single(mask) - np.single(new_mask)).T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.nonzero(np.single(mask) - np.single(new_mask))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.max(np.single(mask) - np.single(new_mask))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.min(np.single(mask) - np.single(new_mask))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for idx in np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0]):\n",
-    "    for data in (signals_raw, mask, new_mask):\n",
-    "        plt.figure(figsize=(12, 12))\n",
-    "        plt.pcolormesh(\n",
-    "            ts_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5)],\n",
-    "            depths_raw[:80],\n",
-    "            data[max(0, idx - 4) : min(len(ts_raw), idx + 5), :80].T,\n",
-    "        )\n",
-    "        plt.gca().invert_yaxis()\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Visually inspect starts and ends of removed segments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for idx in np.concatenate((passive_starts, passive_ends, removed_starts, removed_ends)):\n",
-    "    for data in (signals_raw, mask, new_mask):\n",
-    "        plt.figure(figsize=(12, 12))\n",
-    "        plt.pcolormesh(\n",
-    "            ts_raw[idx - 4 : idx + 5], depths_raw[:500], data[idx - 4 : idx + 5, :500].T\n",
-    "        )\n",
-    "        plt.gca().invert_yaxis()\n",
-    "        plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Data Loader - Stationary.ipynb b/notebooks/Data Loader - Stationary.ipynb
index c8d585c7..f19cd03a 100644
--- a/notebooks/Data Loader - Stationary.ipynb	
+++ b/notebooks/Data Loader - Stationary.ipynb	
@@ -1,499 +1,499 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00dd00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "dataset = \"MinasPassage\"\n",
-    "# has removed window\n",
-    "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n",
-    "# has passive recording\n",
-    "sample = \"september2018/september2018_D20181116-T205220_D20181117-T022218\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sv_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_Sv_raw.csv\")\n",
-    "evl_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_air.evl\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps, depths, signals = echofilter.raw.loader.transect_loader(sv_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(timestamps.shape)\n",
-    "print(depths.shape)\n",
-    "print(signals.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "depths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "min(depths), max(depths)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "signals"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t_top, d_top = echofilter.raw.loader.evl_loader(evl_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(t_top.shape)\n",
-    "print(d_top.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t_top"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d_top"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "min(d_top), max(d_top)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(timestamps, depths, signals.T)\n",
-    "# plt.plot(t_bot, d_top, 'c')\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(t_top, d_top, \"b\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Splitting stationary recordings into continguous periods with gaps between them"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dt = np.diff(timestamps)\n",
-    "min(dt), max(dt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.median(dt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sum(dt > np.median(dt) * 50)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "break_indices = np.where(dt > np.median(dt) * 50)[0]\n",
-    "\n",
-    "if len(break_indices) > 0:\n",
-    "    break_indices += 1\n",
-    "\n",
-    "break_indices"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps[296:302] - timestamps[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "timestamps[596:600] - timestamps[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for chunk_start, chunk_end in zip(\n",
-    "    np.concatenate(([0], break_indices)),\n",
-    "    np.concatenate((break_indices, [len(timestamps)])),\n",
-    "):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        timestamps[chunk_start:chunk_end], depths, signals[chunk_start:chunk_end, :].T\n",
-    "    )\n",
-    "    li = np.logical_and(\n",
-    "        timestamps[chunk_start] <= t_top, t_top <= timestamps[chunk_end - 1]\n",
-    "    )\n",
-    "    plt.plot(t_top[li], d_top[li], turbulence_color)\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "depths[0] - depths[-1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Try using existing mask decomposer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "    os.path.join(ROOT_DATA_DIR, dataset, sample),\n",
-    "    dataset,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(transect[\"timestamps\"], transect[\"depths\"], transect[\"Sv\"].T)\n",
-    "plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n",
-    "plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i_chunk, (ck_start, ck_end) in enumerate(\n",
-    "    zip(\n",
-    "        np.concatenate(([0], break_indices)),\n",
-    "        np.concatenate((break_indices, [len(timestamps)])),\n",
-    "    )\n",
-    "):\n",
-    "    for signal_name in (\"Sv\", \"mask\"):\n",
-    "        plt.figure(figsize=(12, 12))\n",
-    "        plt.pcolormesh(\n",
-    "            transect[\"timestamps\"][ck_start:ck_end],\n",
-    "            transect[\"depths\"],\n",
-    "            transect[signal_name][ck_start:ck_end, :].T,\n",
-    "        )\n",
-    "        plt.plot(\n",
-    "            transect[\"timestamps\"][ck_start:ck_end],\n",
-    "            transect[\"top\"][ck_start:ck_end],\n",
-    "            turbulence_color,\n",
-    "        )\n",
-    "        plt.plot(\n",
-    "            transect[\"timestamps\"][ck_start:ck_end],\n",
-    "            transect[\"bottom\"][ck_start:ck_end],\n",
-    "            bottom_color,\n",
-    "        )\n",
-    "\n",
-    "        indices = np.nonzero(transect[\"is_passive\"][ck_start:ck_end])[0]\n",
-    "        if len(indices) > 0:\n",
-    "            r_starts = [indices[0]]\n",
-    "            r_ends = [indices[-1]]\n",
-    "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
-    "            for break_idx in breaks:\n",
-    "                r_ends.append(indices[break_idx + 1])\n",
-    "                r_starts.append(indices[break_idx + 2])\n",
-    "            for r_start, r_end in zip(r_starts, r_ends):\n",
-    "                plt.fill_between(\n",
-    "                    transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n",
-    "                    transect[\"depths\"][[0, 0]],\n",
-    "                    transect[\"depths\"][[-1, -1]],\n",
-    "                    facecolor=\"none\",\n",
-    "                    hatch=\"//\",\n",
-    "                    edgecolor=[0.4, 0.4, 0.4],\n",
-    "                    linewidth=0.0,\n",
-    "                )\n",
-    "\n",
-    "        indices = np.nonzero(transect[\"is_removed\"][ck_start:ck_end])[0]\n",
-    "        if len(indices) > 0:\n",
-    "            r_starts = [indices[0]]\n",
-    "            r_ends = [indices[-1]]\n",
-    "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
-    "            for break_idx in breaks:\n",
-    "                r_ends.append(indices[break_idx + 1])\n",
-    "                r_starts.append(indices[break_idx + 2])\n",
-    "            for r_start, r_end in zip(r_starts, r_ends):\n",
-    "                plt.fill_between(\n",
-    "                    transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n",
-    "                    transect[\"depths\"][[0, 0]],\n",
-    "                    transect[\"depths\"][[-1, -1]],\n",
-    "                    facecolor=\"none\",\n",
-    "                    hatch=\"\\\\\\\\\",\n",
-    "                    edgecolor=[0, 0, 1],\n",
-    "                    linewidth=0.0,\n",
-    "                )\n",
-    "\n",
-    "        plt.gca().invert_yaxis()\n",
-    "        plt.xlabel(\"Timestamp (s)\")\n",
-    "        plt.ylabel(\"Depth (m)\")\n",
-    "        plt.title(\"Chunk {}\".format(i_chunk))\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    transect[\"timestamps\"][ck_start:ck_end],\n",
-    "    transect[\"depths\"][:200],\n",
-    "    transect[\"Sv\"][ck_start:ck_end, :200].T,\n",
-    ")\n",
-    "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n",
-    "plt.plot(\n",
-    "    transect[\"timestamps\"][ck_start:ck_end],\n",
-    "    transect[\"bottom\"][ck_start:ck_end],\n",
-    "    bottom_color,\n",
-    ")\n",
-    "\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ck_start = break_indices[6]\n",
-    "ck_end = break_indices[7]\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    transect[\"timestamps\"][ck_start:ck_end],\n",
-    "    transect[\"depths\"][:200],\n",
-    "    transect[\"Sv\"][ck_start:ck_end, :200].T,\n",
-    ")\n",
-    "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n",
-    "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['bottom'][ck_start:ck_end], bottom_color)\n",
-    "\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00dd00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "dataset = \"MinasPassage\"\n",
+        "# has removed window\n",
+        "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n",
+        "# has passive recording\n",
+        "sample = \"september2018/september2018_D20181116-T205220_D20181117-T022218\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sv_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_Sv_raw.csv\")\n",
+        "evl_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_air.evl\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps, depths, signals = echofilter.raw.loader.transect_loader(sv_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(timestamps.shape)\n",
+        "print(depths.shape)\n",
+        "print(signals.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "depths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "min(depths), max(depths)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "signals"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "t_top, d_top = echofilter.raw.loader.evl_loader(evl_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(t_top.shape)\n",
+        "print(d_top.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "t_top"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "d_top"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "min(d_top), max(d_top)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(timestamps, depths, signals.T)\n",
+        "# plt.plot(t_bot, d_top, 'c')\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(t_top, d_top, \"b\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Splitting stationary recordings into continguous periods with gaps between them"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dt = np.diff(timestamps)\n",
+        "min(dt), max(dt)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.median(dt)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sum(dt > np.median(dt) * 50)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "break_indices = np.where(dt > np.median(dt) * 50)[0]\n",
+        "\n",
+        "if len(break_indices) > 0:\n",
+        "    break_indices += 1\n",
+        "\n",
+        "break_indices"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps[296:302] - timestamps[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "timestamps[596:600] - timestamps[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for chunk_start, chunk_end in zip(\n",
+        "    np.concatenate(([0], break_indices)),\n",
+        "    np.concatenate((break_indices, [len(timestamps)])),\n",
+        "):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        timestamps[chunk_start:chunk_end], depths, signals[chunk_start:chunk_end, :].T\n",
+        "    )\n",
+        "    li = np.logical_and(\n",
+        "        timestamps[chunk_start] <= t_top, t_top <= timestamps[chunk_end - 1]\n",
+        "    )\n",
+        "    plt.plot(t_top[li], d_top[li], turbulence_color)\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "depths[0] - depths[-1]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Try using existing mask decomposer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "    os.path.join(ROOT_DATA_DIR, dataset, sample),\n",
+        "    dataset,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(transect[\"timestamps\"], transect[\"depths\"], transect[\"Sv\"].T)\n",
+        "plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n",
+        "plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for i_chunk, (ck_start, ck_end) in enumerate(\n",
+        "    zip(\n",
+        "        np.concatenate(([0], break_indices)),\n",
+        "        np.concatenate((break_indices, [len(timestamps)])),\n",
+        "    )\n",
+        "):\n",
+        "    for signal_name in (\"Sv\", \"mask\"):\n",
+        "        plt.figure(figsize=(12, 12))\n",
+        "        plt.pcolormesh(\n",
+        "            transect[\"timestamps\"][ck_start:ck_end],\n",
+        "            transect[\"depths\"],\n",
+        "            transect[signal_name][ck_start:ck_end, :].T,\n",
+        "        )\n",
+        "        plt.plot(\n",
+        "            transect[\"timestamps\"][ck_start:ck_end],\n",
+        "            transect[\"top\"][ck_start:ck_end],\n",
+        "            turbulence_color,\n",
+        "        )\n",
+        "        plt.plot(\n",
+        "            transect[\"timestamps\"][ck_start:ck_end],\n",
+        "            transect[\"bottom\"][ck_start:ck_end],\n",
+        "            bottom_color,\n",
+        "        )\n",
+        "\n",
+        "        indices = np.nonzero(transect[\"is_passive\"][ck_start:ck_end])[0]\n",
+        "        if len(indices) > 0:\n",
+        "            r_starts = [indices[0]]\n",
+        "            r_ends = [indices[-1]]\n",
+        "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
+        "            for break_idx in breaks:\n",
+        "                r_ends.append(indices[break_idx + 1])\n",
+        "                r_starts.append(indices[break_idx + 2])\n",
+        "            for r_start, r_end in zip(r_starts, r_ends):\n",
+        "                plt.fill_between(\n",
+        "                    transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n",
+        "                    transect[\"depths\"][[0, 0]],\n",
+        "                    transect[\"depths\"][[-1, -1]],\n",
+        "                    facecolor=\"none\",\n",
+        "                    hatch=\"//\",\n",
+        "                    edgecolor=[0.4, 0.4, 0.4],\n",
+        "                    linewidth=0.0,\n",
+        "                )\n",
+        "\n",
+        "        indices = np.nonzero(transect[\"is_removed\"][ck_start:ck_end])[0]\n",
+        "        if len(indices) > 0:\n",
+        "            r_starts = [indices[0]]\n",
+        "            r_ends = [indices[-1]]\n",
+        "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
+        "            for break_idx in breaks:\n",
+        "                r_ends.append(indices[break_idx + 1])\n",
+        "                r_starts.append(indices[break_idx + 2])\n",
+        "            for r_start, r_end in zip(r_starts, r_ends):\n",
+        "                plt.fill_between(\n",
+        "                    transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n",
+        "                    transect[\"depths\"][[0, 0]],\n",
+        "                    transect[\"depths\"][[-1, -1]],\n",
+        "                    facecolor=\"none\",\n",
+        "                    hatch=\"\\\\\\\\\",\n",
+        "                    edgecolor=[0, 0, 1],\n",
+        "                    linewidth=0.0,\n",
+        "                )\n",
+        "\n",
+        "        plt.gca().invert_yaxis()\n",
+        "        plt.xlabel(\"Timestamp (s)\")\n",
+        "        plt.ylabel(\"Depth (m)\")\n",
+        "        plt.title(\"Chunk {}\".format(i_chunk))\n",
+        "        plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    transect[\"timestamps\"][ck_start:ck_end],\n",
+        "    transect[\"depths\"][:200],\n",
+        "    transect[\"Sv\"][ck_start:ck_end, :200].T,\n",
+        ")\n",
+        "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n",
+        "plt.plot(\n",
+        "    transect[\"timestamps\"][ck_start:ck_end],\n",
+        "    transect[\"bottom\"][ck_start:ck_end],\n",
+        "    bottom_color,\n",
+        ")\n",
+        "\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ck_start = break_indices[6]\n",
+        "ck_end = break_indices[7]\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    transect[\"timestamps\"][ck_start:ck_end],\n",
+        "    transect[\"depths\"][:200],\n",
+        "    transect[\"Sv\"][ck_start:ck_end, :200].T,\n",
+        ")\n",
+        "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n",
+        "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['bottom'][ck_start:ck_end], bottom_color)\n",
+        "\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Estimate mean and stdev.ipynb b/notebooks/Estimate mean and stdev.ipynb
index 42d3ce30..eb2aec2c 100644
--- a/notebooks/Estimate mean and stdev.ipynb	
+++ b/notebooks/Estimate mean and stdev.ipynb	
@@ -1,599 +1,599 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import scipy.stats"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from tqdm.autonotebook import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw\n",
-    "import echofilter.raw.shardloader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = echofilter.raw.loader.ROOT_DATA_DIR"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "partition = \"train\"\n",
-    "partitioning_version = \"firstpass\"\n",
-    "dataset = \"mobile\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "max_depth = 70"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_pths = echofilter.raw.loader.get_partition_list(\n",
-    "    partition,\n",
-    "    dataset=dataset,\n",
-    "    partitioning_version=partitioning_version,\n",
-    "    root_data_dir=root_data_dir,\n",
-    "    full_path=True,\n",
-    "    sharded=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect_pth = transect_pths[0]\n",
-    "with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
-    "    n_segment = int(f.readline().strip())\n",
-    "\n",
-    "i_seg = 0\n",
-    "transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
-    "    os.path.join(transect_pth, str(i_seg))\n",
-    ")\n",
-    "transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
-    "transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
-    "print(\"mean\", np.mean(transect[\"Sv\"]))\n",
-    "print(\"median\", np.median(transect[\"Sv\"]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.nanpercentile([5, 3, 5, 4, np.nan], 50)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect[\"Sv\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.distplot(transect[\"Sv\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "qs = [0, 0.1, 1, 5, 7, 10, 25, 50, 75, 90, 93, 95, 99, 99.9, 100]\n",
-    "ps = np.percentile(transect[\"Sv\"], qs)\n",
-    "for q, p in zip(qs, ps):\n",
-    "    print(\"{:5.1f} {:7.2f}\".format(q, p))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.diff(np.percentile(transect[\"Sv\"], [10, 90])) / 2.56"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.diff(np.percentile(transect[\"Sv\"], [7, 93])) / 3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.diff(np.percentile(transect[\"Sv\"], [25, 75])) / 1.35"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.std(transect[\"Sv\"][1:])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mad = np.median(np.abs(transect[\"Sv\"] - np.median(transect[\"Sv\"])))\n",
-    "print(mad)\n",
-    "print(mad * 1.4826)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.abs(\n",
-    "    np.diff(np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10]))\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.percentile(transect[\"Sv\"], [40, 35, 30])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i in tqdm(range(10)):\n",
-    "    transect_pth = transect_pths[i]\n",
-    "    with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
-    "        n_segment = int(f.readline().strip())\n",
-    "\n",
-    "    i_seg = 0\n",
-    "    transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
-    "        os.path.join(transect_pth, str(i_seg))\n",
-    "    )\n",
-    "    transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
-    "    transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
-    "\n",
-    "    plt.figure(figsize=(12, 9))\n",
-    "    sns.distplot(transect[\"Sv\"])\n",
-    "    plt.show()\n",
-    "\n",
-    "    print(\"{:6s} {:7.2f}\".format(\"mean\", np.mean(transect[\"Sv\"])))\n",
-    "    print(\"{:6s} {:7.2f}\".format(\"median\", np.median(transect[\"Sv\"])))\n",
-    "    print(\"{:6s} {:7.2f}\".format(\"stdev\", np.std(transect[\"Sv\"])))\n",
-    "    print(\n",
-    "        \"{:6s} {:7.2f}\".format(\n",
-    "            \"mad\", np.median(np.abs(transect[\"Sv\"][1:] - np.median(transect[\"Sv\"])))\n",
-    "        )\n",
-    "    )\n",
-    "    print(\n",
-    "        \"{:6s} {:7.2f}\".format(\n",
-    "            \"iqr\", np.diff(np.percentile(transect[\"Sv\"], [25, 75]))[0]\n",
-    "        )\n",
-    "    )\n",
-    "    print(\n",
-    "        \"{:6s} {:7.2f}\".format(\n",
-    "            \"idr\", np.diff(np.percentile(transect[\"Sv\"], [10, 90]))[0]\n",
-    "        )\n",
-    "    )\n",
-    "    print(\n",
-    "        \"{:6s} {:7.2f}\".format(\n",
-    "            \"i7r\", np.diff(np.percentile(transect[\"Sv\"], [7, 93]))[0]\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "means = []\n",
-    "stdevs = []\n",
-    "medians = []\n",
-    "mads = []\n",
-    "percentiles = []\n",
-    "std25 = []\n",
-    "\n",
-    "qs = [0, 0.1, 1, 5, 7, 10, 15, 20, 25, 30, 35, 40, 50, 75, 90, 93, 95, 99, 99.9, 100]\n",
-    "\n",
-    "for transect_pth in tqdm(transect_pths):\n",
-    "\n",
-    "    try:\n",
-    "        # Check how many segments the transect was divided into\n",
-    "        with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
-    "            n_segment = int(f.readline().strip())\n",
-    "\n",
-    "        for i_seg in range(n_segment):\n",
-    "            transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
-    "                os.path.join(transect_pth, str(i_seg))\n",
-    "            )\n",
-    "            transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
-    "            if len(transect[\"Sv\"]) < 2:\n",
-    "                continue\n",
-    "            transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
-    "            means.append(np.nanmean(transect[\"Sv\"]))\n",
-    "            stdevs.append(np.nanstd(transect[\"Sv\"]))\n",
-    "            median = np.nanmedian(transect[\"Sv\"])\n",
-    "            medians.append(median)\n",
-    "            mads.append(np.nanmedian(np.abs(transect[\"Sv\"] - median)))\n",
-    "            percentiles.append(np.nanpercentile(transect[\"Sv\"], qs))\n",
-    "            pc25 = np.nanpercentile(transect[\"Sv\"], 25)\n",
-    "            std25.append(np.sqrt(np.nanmean(np.power(transect[\"Sv\"] - pc25, 2))))\n",
-    "\n",
-    "    except Exception as ex:\n",
-    "        print(\"Error loading shard from {}\".format(transect_pth))\n",
-    "        print(ex)\n",
-    "\n",
-    "MEAN = np.nanmean(means)\n",
-    "print(\"mean = {}\".format(MEAN))\n",
-    "print(\"mean of medians = {}\".format(np.nanmean(medians)))\n",
-    "\n",
-    "qs = np.array(qs)\n",
-    "percentiles = np.array(percentiles)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "variances = []\n",
-    "\n",
-    "for transect_pth in tqdm(transect_pths):\n",
-    "\n",
-    "    try:\n",
-    "        # Check how many segments the transect was divided into\n",
-    "        with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
-    "            n_segment = int(f.readline().strip())\n",
-    "\n",
-    "        for i_seg in range(n_segment):\n",
-    "            transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
-    "                os.path.join(transect_pth, str(i_seg))\n",
-    "            )\n",
-    "            transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
-    "            if len(transect[\"Sv\"]) < 2:\n",
-    "                continue\n",
-    "            transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
-    "            variances.append(np.nanmean(np.power(transect[\"Sv\"] - MEAN, 2)))\n",
-    "    except Exception as ex:\n",
-    "        print(\"Error loading shard from {}\".format(transect_pth))\n",
-    "        print(ex)\n",
-    "\n",
-    "\n",
-    "VARIANCE = np.mean(variances)\n",
-    "print(\"variance = {}\".format(VARIANCE))\n",
-    "print(\"stdev = {}\".format(np.sqrt(VARIANCE)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "iqrs = (\n",
-    "    percentiles[:, np.nonzero(qs == 75)[0][0]]\n",
-    "    - percentiles[:, np.nonzero(qs == 25)[0][0]]\n",
-    ")\n",
-    "IQR = np.mean(iqrs)\n",
-    "print(IQR)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "idrs = (\n",
-    "    percentiles[:, np.nonzero(qs == 90)[0][0]]\n",
-    "    - percentiles[:, np.nonzero(qs == 10)[0][0]]\n",
-    ")\n",
-    "IDR = np.mean(idrs)\n",
-    "print(IDR)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "i7rs = (\n",
-    "    percentiles[:, np.nonzero(qs == 93)[0][0]]\n",
-    "    - percentiles[:, np.nonzero(qs == 7)[0][0]]\n",
-    ")\n",
-    "I7R = np.mean(i7rs)\n",
-    "print(I7R)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(np.sqrt(VARIANCE))\n",
-    "print(np.mean(stdevs))\n",
-    "\n",
-    "print(np.mean(mads) * 1.4826)\n",
-    "print(IQR / 1.35)\n",
-    "print(IDR / 2.56)\n",
-    "print(I7R / 3.0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\n",
-    "    \"{:6s} {:6s}  {:6s}  {:6s}  {:6s}  {:5s}  {:5s}  {:5s}\".format(\n",
-    "        \"name\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n",
-    "    )\n",
-    ")\n",
-    "for name, estimator in [\n",
-    "    (\"mean\", means),\n",
-    "    (\"median\", medians),\n",
-    "    (\"stdev\", stdevs),\n",
-    "    (\"MAD\", mads),\n",
-    "    (\"IQR\", iqrs),\n",
-    "    (\"IDR\", idrs),\n",
-    "    (\"I7R\", i7rs),\n",
-    "    (\"std25\", std25),\n",
-    "]:\n",
-    "    print(\n",
-    "        \"{:6s} {:6.4f}  {:6.1f}  {:6.1f}  {:6.1f}  {:5.3f}  {:5.2f}  {:5.1f}\".format(\n",
-    "            name,\n",
-    "            scipy.stats.sem(estimator),\n",
-    "            np.mean(estimator),\n",
-    "            np.min(estimator),\n",
-    "            np.max(estimator),\n",
-    "            scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n",
-    "            np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n",
-    "            (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\n",
-    "    \"{:6s}  {:6s}  {:6s}  {:6s}  {:6s}  {:5s}  {:5s}  {:5s}\".format(\n",
-    "        \"percentile\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n",
-    "    )\n",
-    ")\n",
-    "for iq, q in enumerate(qs):\n",
-    "    estimator = percentiles[:, iq]\n",
-    "    print(\n",
-    "        \"{:10.1f}  {:6.4f}  {:6.1f}  {:6.1f}  {:6.1f}  {:5.3f}  {:5.2f}  {:5.1f}\".format(\n",
-    "            q,\n",
-    "            scipy.stats.sem(estimator),\n",
-    "            np.mean(estimator),\n",
-    "            np.min(estimator),\n",
-    "            np.max(estimator),\n",
-    "            scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n",
-    "            np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n",
-    "            (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(means)\n",
-    "plt.title(\"mean estimates\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(medians)\n",
-    "plt.title(\"median estimates\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(stdevs)\n",
-    "plt.title(\"standard deviation estimates\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(mads)\n",
-    "plt.title(\"MAD estimates\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(iqrs)\n",
-    "plt.title(\"IQR estimates\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(idrs)\n",
-    "plt.title(\"IDR estimates\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "sns.distplot(i7rs)\n",
-    "plt.title(\"7-93 estimates\")\n",
-    "plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import scipy.stats"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from tqdm.autonotebook import tqdm"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw\n",
+        "import echofilter.raw.shardloader"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = echofilter.raw.loader.ROOT_DATA_DIR"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "partition = \"train\"\n",
+        "partitioning_version = \"firstpass\"\n",
+        "dataset = \"mobile\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "max_depth = 70"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_pths = echofilter.raw.loader.get_partition_list(\n",
+        "    partition,\n",
+        "    dataset=dataset,\n",
+        "    partitioning_version=partitioning_version,\n",
+        "    root_data_dir=root_data_dir,\n",
+        "    full_path=True,\n",
+        "    sharded=True,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect_pth = transect_pths[0]\n",
+        "with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
+        "    n_segment = int(f.readline().strip())\n",
+        "\n",
+        "i_seg = 0\n",
+        "transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
+        "    os.path.join(transect_pth, str(i_seg))\n",
+        ")\n",
+        "transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
+        "transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
+        "print(\"mean\", np.mean(transect[\"Sv\"]))\n",
+        "print(\"median\", np.median(transect[\"Sv\"]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.nanpercentile([5, 3, 5, 4, np.nan], 50)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect[\"Sv\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sns.distplot(transect[\"Sv\"])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "qs = [0, 0.1, 1, 5, 7, 10, 25, 50, 75, 90, 93, 95, 99, 99.9, 100]\n",
+        "ps = np.percentile(transect[\"Sv\"], qs)\n",
+        "for q, p in zip(qs, ps):\n",
+        "    print(\"{:5.1f} {:7.2f}\".format(q, p))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.diff(np.percentile(transect[\"Sv\"], [10, 90])) / 2.56"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.diff(np.percentile(transect[\"Sv\"], [7, 93])) / 3"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.diff(np.percentile(transect[\"Sv\"], [25, 75])) / 1.35"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.std(transect[\"Sv\"][1:])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "mad = np.median(np.abs(transect[\"Sv\"] - np.median(transect[\"Sv\"])))\n",
+        "print(mad)\n",
+        "print(mad * 1.4826)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.abs(\n",
+        "    np.diff(np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10]))\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.percentile(transect[\"Sv\"], [40, 35, 30])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for i in tqdm(range(10)):\n",
+        "    transect_pth = transect_pths[i]\n",
+        "    with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
+        "        n_segment = int(f.readline().strip())\n",
+        "\n",
+        "    i_seg = 0\n",
+        "    transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
+        "        os.path.join(transect_pth, str(i_seg))\n",
+        "    )\n",
+        "    transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
+        "    transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
+        "\n",
+        "    plt.figure(figsize=(12, 9))\n",
+        "    sns.distplot(transect[\"Sv\"])\n",
+        "    plt.show()\n",
+        "\n",
+        "    print(\"{:6s} {:7.2f}\".format(\"mean\", np.mean(transect[\"Sv\"])))\n",
+        "    print(\"{:6s} {:7.2f}\".format(\"median\", np.median(transect[\"Sv\"])))\n",
+        "    print(\"{:6s} {:7.2f}\".format(\"stdev\", np.std(transect[\"Sv\"])))\n",
+        "    print(\n",
+        "        \"{:6s} {:7.2f}\".format(\n",
+        "            \"mad\", np.median(np.abs(transect[\"Sv\"][1:] - np.median(transect[\"Sv\"])))\n",
+        "        )\n",
+        "    )\n",
+        "    print(\n",
+        "        \"{:6s} {:7.2f}\".format(\n",
+        "            \"iqr\", np.diff(np.percentile(transect[\"Sv\"], [25, 75]))[0]\n",
+        "        )\n",
+        "    )\n",
+        "    print(\n",
+        "        \"{:6s} {:7.2f}\".format(\n",
+        "            \"idr\", np.diff(np.percentile(transect[\"Sv\"], [10, 90]))[0]\n",
+        "        )\n",
+        "    )\n",
+        "    print(\n",
+        "        \"{:6s} {:7.2f}\".format(\n",
+        "            \"i7r\", np.diff(np.percentile(transect[\"Sv\"], [7, 93]))[0]\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "means = []\n",
+        "stdevs = []\n",
+        "medians = []\n",
+        "mads = []\n",
+        "percentiles = []\n",
+        "std25 = []\n",
+        "\n",
+        "qs = [0, 0.1, 1, 5, 7, 10, 15, 20, 25, 30, 35, 40, 50, 75, 90, 93, 95, 99, 99.9, 100]\n",
+        "\n",
+        "for transect_pth in tqdm(transect_pths):\n",
+        "\n",
+        "    try:\n",
+        "        # Check how many segments the transect was divided into\n",
+        "        with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
+        "            n_segment = int(f.readline().strip())\n",
+        "\n",
+        "        for i_seg in range(n_segment):\n",
+        "            transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
+        "                os.path.join(transect_pth, str(i_seg))\n",
+        "            )\n",
+        "            transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
+        "            if len(transect[\"Sv\"]) < 2:\n",
+        "                continue\n",
+        "            transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
+        "            means.append(np.nanmean(transect[\"Sv\"]))\n",
+        "            stdevs.append(np.nanstd(transect[\"Sv\"]))\n",
+        "            median = np.nanmedian(transect[\"Sv\"])\n",
+        "            medians.append(median)\n",
+        "            mads.append(np.nanmedian(np.abs(transect[\"Sv\"] - median)))\n",
+        "            percentiles.append(np.nanpercentile(transect[\"Sv\"], qs))\n",
+        "            pc25 = np.nanpercentile(transect[\"Sv\"], 25)\n",
+        "            std25.append(np.sqrt(np.nanmean(np.power(transect[\"Sv\"] - pc25, 2))))\n",
+        "\n",
+        "    except Exception as ex:\n",
+        "        print(\"Error loading shard from {}\".format(transect_pth))\n",
+        "        print(ex)\n",
+        "\n",
+        "MEAN = np.nanmean(means)\n",
+        "print(\"mean = {}\".format(MEAN))\n",
+        "print(\"mean of medians = {}\".format(np.nanmean(medians)))\n",
+        "\n",
+        "qs = np.array(qs)\n",
+        "percentiles = np.array(percentiles)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "variances = []\n",
+        "\n",
+        "for transect_pth in tqdm(transect_pths):\n",
+        "\n",
+        "    try:\n",
+        "        # Check how many segments the transect was divided into\n",
+        "        with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n",
+        "            n_segment = int(f.readline().strip())\n",
+        "\n",
+        "        for i_seg in range(n_segment):\n",
+        "            transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n",
+        "                os.path.join(transect_pth, str(i_seg))\n",
+        "            )\n",
+        "            transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n",
+        "            if len(transect[\"Sv\"]) < 2:\n",
+        "                continue\n",
+        "            transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n",
+        "            variances.append(np.nanmean(np.power(transect[\"Sv\"] - MEAN, 2)))\n",
+        "    except Exception as ex:\n",
+        "        print(\"Error loading shard from {}\".format(transect_pth))\n",
+        "        print(ex)\n",
+        "\n",
+        "\n",
+        "VARIANCE = np.mean(variances)\n",
+        "print(\"variance = {}\".format(VARIANCE))\n",
+        "print(\"stdev = {}\".format(np.sqrt(VARIANCE)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "iqrs = (\n",
+        "    percentiles[:, np.nonzero(qs == 75)[0][0]]\n",
+        "    - percentiles[:, np.nonzero(qs == 25)[0][0]]\n",
+        ")\n",
+        "IQR = np.mean(iqrs)\n",
+        "print(IQR)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "idrs = (\n",
+        "    percentiles[:, np.nonzero(qs == 90)[0][0]]\n",
+        "    - percentiles[:, np.nonzero(qs == 10)[0][0]]\n",
+        ")\n",
+        "IDR = np.mean(idrs)\n",
+        "print(IDR)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "i7rs = (\n",
+        "    percentiles[:, np.nonzero(qs == 93)[0][0]]\n",
+        "    - percentiles[:, np.nonzero(qs == 7)[0][0]]\n",
+        ")\n",
+        "I7R = np.mean(i7rs)\n",
+        "print(I7R)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(np.sqrt(VARIANCE))\n",
+        "print(np.mean(stdevs))\n",
+        "\n",
+        "print(np.mean(mads) * 1.4826)\n",
+        "print(IQR / 1.35)\n",
+        "print(IDR / 2.56)\n",
+        "print(I7R / 3.0)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\n",
+        "    \"{:6s} {:6s}  {:6s}  {:6s}  {:6s}  {:5s}  {:5s}  {:5s}\".format(\n",
+        "        \"name\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n",
+        "    )\n",
+        ")\n",
+        "for name, estimator in [\n",
+        "    (\"mean\", means),\n",
+        "    (\"median\", medians),\n",
+        "    (\"stdev\", stdevs),\n",
+        "    (\"MAD\", mads),\n",
+        "    (\"IQR\", iqrs),\n",
+        "    (\"IDR\", idrs),\n",
+        "    (\"I7R\", i7rs),\n",
+        "    (\"std25\", std25),\n",
+        "]:\n",
+        "    print(\n",
+        "        \"{:6s} {:6.4f}  {:6.1f}  {:6.1f}  {:6.1f}  {:5.3f}  {:5.2f}  {:5.1f}\".format(\n",
+        "            name,\n",
+        "            scipy.stats.sem(estimator),\n",
+        "            np.mean(estimator),\n",
+        "            np.min(estimator),\n",
+        "            np.max(estimator),\n",
+        "            scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n",
+        "            np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n",
+        "            (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\n",
+        "    \"{:6s}  {:6s}  {:6s}  {:6s}  {:6s}  {:5s}  {:5s}  {:5s}\".format(\n",
+        "        \"percentile\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n",
+        "    )\n",
+        ")\n",
+        "for iq, q in enumerate(qs):\n",
+        "    estimator = percentiles[:, iq]\n",
+        "    print(\n",
+        "        \"{:10.1f}  {:6.4f}  {:6.1f}  {:6.1f}  {:6.1f}  {:5.3f}  {:5.2f}  {:5.1f}\".format(\n",
+        "            q,\n",
+        "            scipy.stats.sem(estimator),\n",
+        "            np.mean(estimator),\n",
+        "            np.min(estimator),\n",
+        "            np.max(estimator),\n",
+        "            scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n",
+        "            np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n",
+        "            (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(means)\n",
+        "plt.title(\"mean estimates\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(medians)\n",
+        "plt.title(\"median estimates\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(stdevs)\n",
+        "plt.title(\"standard deviation estimates\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(mads)\n",
+        "plt.title(\"MAD estimates\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(iqrs)\n",
+        "plt.title(\"IQR estimates\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(idrs)\n",
+        "plt.title(\"IDR estimates\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "sns.distplot(i7rs)\n",
+        "plt.title(\"7-93 estimates\")\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Finding mask all removed.ipynb b/notebooks/Finding mask all removed.ipynb
index 2df01e2e..5c1fd219 100644
--- a/notebooks/Finding mask all removed.ipynb	
+++ b/notebooks/Finding mask all removed.ipynb	
@@ -1,356 +1,356 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw.loader"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "# example with 1 passive period, 1 turbulence cut out\n",
+        "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n",
+        "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
+        "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n",
+        "\n",
+        "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n",
+        "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n",
+        "\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
+        "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
+        "\n",
+        "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n",
+        "    os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n",
+        ")\n",
+        "t_top, d_top = echofilter.raw.loader.evl_loader(\n",
+        "    os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n",
+        "    fname_masked\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.min(signals_raw), np.max(signals_raw)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_raw.shape, depths_raw.shape, signals_raw.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00ee00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
+        "plt.plot(t_top, d_top, \"k\")\n",
+        "plt.plot(t_bot, d_bot, \"w\")\n",
+        "plt.plot(ts_new, d_top_new, turbulence_color)\n",
+        "plt.plot(ts_new, d_bot_new, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Finding rows which are fully removed from masked output"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.all(np.isnan(signals_mskd), axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "allnan = np.all(np.isnan(signals_mskd), axis=1)\n",
+        "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n",
+        "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n",
+        "\n",
+        "if removed_ends[0] < removed_starts[0]:\n",
+        "    removed_starts = np.concatenate(([0], removed_starts))\n",
+        "\n",
+        "if removed_starts[-1] > removed_ends[-1]:\n",
+        "    removed_ends = np.concatenate((removed_ends, [len(allnan)]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(removed_starts)\n",
+        "print(removed_ends)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(removed_starts, removed_ends):\n",
+        "    for ts, depths, signals in (\n",
+        "        (ts_mskd, depths_mskd, signals_mskd),\n",
+        "        (ts_raw, depths_raw, signals_raw),\n",
+        "    ):\n",
+        "        an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n",
+        "        print(np.sum(an), np.sum(~an))\n",
+        "        plt.figure(figsize=(12, 12))\n",
+        "        plt.pcolormesh(\n",
+        "            ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n",
+        "        )\n",
+        "        li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n",
+        "        plt.plot(t_top[li], d_top[li], \"k\")\n",
+        "        li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n",
+        "        plt.plot(t_bot[li], d_bot[li], \"w\")\n",
+        "        plt.plot(\n",
+        "            ts_new[index_start:index_end],\n",
+        "            d_top_new[index_start:index_end],\n",
+        "            turbulence_color,\n",
+        "        )\n",
+        "        plt.plot(\n",
+        "            ts_new[index_start:index_end],\n",
+        "            d_bot_new[index_start:index_end],\n",
+        "            bottom_color,\n",
+        "        )\n",
+        "        plt.gca().invert_yaxis()\n",
+        "        plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(\n",
+        "    np.concatenate(([0], removed_ends)),\n",
+        "    np.concatenate((removed_starts, [signals_raw.shape[0]])),\n",
+        "):\n",
+        "    if index_start == index_end:\n",
+        "        continue\n",
+        "    for ts, depths, signals in (\n",
+        "        (ts_mskd, depths_mskd, signals_mskd),\n",
+        "        (ts_raw, depths_raw, signals_raw),\n",
+        "    ):\n",
+        "        an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n",
+        "        print(np.sum(an), np.sum(~an))\n",
+        "        plt.figure(figsize=(12, 12))\n",
+        "        plt.pcolormesh(\n",
+        "            ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n",
+        "        )\n",
+        "        li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n",
+        "        plt.plot(t_top[li], d_top[li], \"k\")\n",
+        "        li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n",
+        "        plt.plot(t_bot[li], d_bot[li], \"w\")\n",
+        "        plt.plot(\n",
+        "            ts_new[index_start:index_end],\n",
+        "            d_top_new[index_start:index_end],\n",
+        "            turbulence_color,\n",
+        "        )\n",
+        "        plt.plot(\n",
+        "            ts_new[index_start:index_end],\n",
+        "            d_bot_new[index_start:index_end],\n",
+        "            bottom_color,\n",
+        "        )\n",
+        "        plt.gca().invert_yaxis()\n",
+        "        plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(signals_raw.shape)\n",
+        "print(signals_mskd.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd[234:257], depths_mskd[:34], signals_mskd[234:257, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd[235:256], depths_mskd[:34], signals_mskd[235:256, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd[234:257], depths_mskd, signals_mskd[234:257, :].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd[235:256], depths_mskd, signals_mskd[235:256, :].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.nonzero(allnan)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw.loader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "# example with 1 passive period, 1 turbulence cut out\n",
-    "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n",
-    "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
-    "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n",
-    "\n",
-    "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n",
-    "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n",
-    "\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
-    "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
-    "\n",
-    "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n",
-    "    os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n",
-    ")\n",
-    "t_top, d_top = echofilter.raw.loader.evl_loader(\n",
-    "    os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n",
-    "    fname_masked\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.min(signals_raw), np.max(signals_raw)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_raw.shape, depths_raw.shape, signals_raw.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00ee00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
-    "plt.plot(t_top, d_top, \"k\")\n",
-    "plt.plot(t_bot, d_bot, \"w\")\n",
-    "plt.plot(ts_new, d_top_new, turbulence_color)\n",
-    "plt.plot(ts_new, d_bot_new, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Finding rows which are fully removed from masked output"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.all(np.isnan(signals_mskd), axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "allnan = np.all(np.isnan(signals_mskd), axis=1)\n",
-    "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n",
-    "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n",
-    "\n",
-    "if removed_ends[0] < removed_starts[0]:\n",
-    "    removed_starts = np.concatenate(([0], removed_starts))\n",
-    "\n",
-    "if removed_starts[-1] > removed_ends[-1]:\n",
-    "    removed_ends = np.concatenate((removed_ends, [len(allnan)]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(removed_starts)\n",
-    "print(removed_ends)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(removed_starts, removed_ends):\n",
-    "    for ts, depths, signals in (\n",
-    "        (ts_mskd, depths_mskd, signals_mskd),\n",
-    "        (ts_raw, depths_raw, signals_raw),\n",
-    "    ):\n",
-    "        an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n",
-    "        print(np.sum(an), np.sum(~an))\n",
-    "        plt.figure(figsize=(12, 12))\n",
-    "        plt.pcolormesh(\n",
-    "            ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n",
-    "        )\n",
-    "        li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n",
-    "        plt.plot(t_top[li], d_top[li], \"k\")\n",
-    "        li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n",
-    "        plt.plot(t_bot[li], d_bot[li], \"w\")\n",
-    "        plt.plot(\n",
-    "            ts_new[index_start:index_end],\n",
-    "            d_top_new[index_start:index_end],\n",
-    "            turbulence_color,\n",
-    "        )\n",
-    "        plt.plot(\n",
-    "            ts_new[index_start:index_end],\n",
-    "            d_bot_new[index_start:index_end],\n",
-    "            bottom_color,\n",
-    "        )\n",
-    "        plt.gca().invert_yaxis()\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(\n",
-    "    np.concatenate(([0], removed_ends)),\n",
-    "    np.concatenate((removed_starts, [signals_raw.shape[0]])),\n",
-    "):\n",
-    "    if index_start == index_end:\n",
-    "        continue\n",
-    "    for ts, depths, signals in (\n",
-    "        (ts_mskd, depths_mskd, signals_mskd),\n",
-    "        (ts_raw, depths_raw, signals_raw),\n",
-    "    ):\n",
-    "        an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n",
-    "        print(np.sum(an), np.sum(~an))\n",
-    "        plt.figure(figsize=(12, 12))\n",
-    "        plt.pcolormesh(\n",
-    "            ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n",
-    "        )\n",
-    "        li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n",
-    "        plt.plot(t_top[li], d_top[li], \"k\")\n",
-    "        li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n",
-    "        plt.plot(t_bot[li], d_bot[li], \"w\")\n",
-    "        plt.plot(\n",
-    "            ts_new[index_start:index_end],\n",
-    "            d_top_new[index_start:index_end],\n",
-    "            turbulence_color,\n",
-    "        )\n",
-    "        plt.plot(\n",
-    "            ts_new[index_start:index_end],\n",
-    "            d_bot_new[index_start:index_end],\n",
-    "            bottom_color,\n",
-    "        )\n",
-    "        plt.gca().invert_yaxis()\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(signals_raw.shape)\n",
-    "print(signals_mskd.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd[234:257], depths_mskd[:34], signals_mskd[234:257, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd[235:256], depths_mskd[:34], signals_mskd[235:256, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd[234:257], depths_mskd, signals_mskd[234:257, :].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd[235:256], depths_mskd, signals_mskd[235:256, :].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.nonzero(allnan)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Generating lines from masked csv.ipynb b/notebooks/Generating lines from masked csv.ipynb
index 8ebc8ee7..cd3f8fdc 100644
--- a/notebooks/Generating lines from masked csv.ipynb	
+++ b/notebooks/Generating lines from masked csv.ipynb	
@@ -1,523 +1,523 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw.loader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "\n",
-    "# example with 1 passive period, 1 turbulence cut out\n",
-    "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n",
-    "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
-    "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n",
-    "\n",
-    "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n",
-    "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n",
-    "\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
-    "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
-    "\n",
-    "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n",
-    "    os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n",
-    ")\n",
-    "t_top, d_top = echofilter.raw.loader.evl_loader(\n",
-    "    os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.min(signals_raw), np.max(signals_raw)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_raw.shape, depths_raw.shape, signals_raw.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00ee00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Finding boxes to merge"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd, ~np.isnan(signals_mskd).T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "indices = np.tile(np.arange(signals_mskd.shape[1]), (signals_mskd.shape[0], 1)).astype(\n",
-    "    \"float\"\n",
-    ")\n",
-    "indices[np.isnan(signals_mskd)] = np.nan"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.nanmin(indices, axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.nanmax(indices, axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cut_off_indices = np.round(np.nanmedian(indices, axis=1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cut_off_indices"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "depths_mskd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_depths = np.tile(\n",
-    "    np.concatenate([(depths_mskd[:-1] + depths_mskd[1:]) / 2, depths_mskd[-1:]]),\n",
-    "    (signals_mskd.shape[0], 1),\n",
-    ")\n",
-    "top_depths[~np.isnan(signals_mskd)] = np.nan"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_depths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.expand_dims(np.arange(signals_mskd.shape[1]), 0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.expand_dims(cut_off_indices, -1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.imshow(top_depths.T)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) > np.expand_dims(\n",
-    "    cut_off_indices, -1\n",
-    ")\n",
-    "plt.imshow(li.T)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_depths[li] = np.nan\n",
-    "plt.imshow(top_depths.T)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d_top_new = np.nanmax(top_depths, axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bot_depths = np.tile(\n",
-    "    np.concatenate([depths_mskd[:1], (depths_mskd[:-1] + depths_mskd[1:]) / 2]),\n",
-    "    (signals_mskd.shape[0], 1),\n",
-    ")\n",
-    "bot_depths[~np.isnan(signals_mskd)] = np.nan\n",
-    "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) < np.expand_dims(\n",
-    "    cut_off_indices, -1\n",
-    ")\n",
-    "bot_depths[li] = np.nan\n",
-    "d_bot_new = np.nanmin(bot_depths, axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.imshow(bot_depths.T)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(d_top_new)\n",
-    "plt.plot(d_bot_new)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
-    "plt.plot(ts_mskd, d_top_new, turbulence_color)\n",
-    "plt.plot(ts_mskd, d_bot_new, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d_bot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "d_bot_new"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_mskd.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t_bot.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(t_bot, d_bot)\n",
-    "plt.plot(ts_mskd, d_bot_new)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(t_top, d_top)\n",
-    "plt.plot(ts_mskd, d_top_new)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Finding rows which are fully removed from masked output"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.all(np.isnan(signals_mskd), axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "allnan = np.all(np.isnan(signals_mskd), axis=1)\n",
-    "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n",
-    "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n",
-    "\n",
-    "if removed_ends[0] < removed_starts[0]:\n",
-    "    removed_starts = np.concatenate(([0], removed_starts))\n",
-    "\n",
-    "if removed_starts[-1] > removed_ends[-1]:\n",
-    "    removed_ends = np.concatenate((removed_ends, [len(allnan)]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(removed_starts)\n",
-    "print(removed_ends)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(removed_starts, removed_ends):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw,\n",
-    "        signals_raw[index_start:index_end, :].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(\n",
-    "    np.concatenate(([0], removed_ends)),\n",
-    "    np.concatenate((removed_starts, [signals_raw.shape[0]])),\n",
-    "):\n",
-    "    if index_start == index_end:\n",
-    "        continue\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw,\n",
-    "        signals_raw[index_start:index_end, :].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw.loader"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "\n",
+        "# example with 1 passive period, 1 turbulence cut out\n",
+        "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n",
+        "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
+        "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n",
+        "\n",
+        "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n",
+        "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n",
+        "\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
+        "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
+        "\n",
+        "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n",
+        "    os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n",
+        ")\n",
+        "t_top, d_top = echofilter.raw.loader.evl_loader(\n",
+        "    os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.min(signals_raw), np.max(signals_raw)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_raw.shape, depths_raw.shape, signals_raw.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00ee00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Finding boxes to merge"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd, ~np.isnan(signals_mskd).T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "indices = np.tile(np.arange(signals_mskd.shape[1]), (signals_mskd.shape[0], 1)).astype(\n",
+        "    \"float\"\n",
+        ")\n",
+        "indices[np.isnan(signals_mskd)] = np.nan"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.nanmin(indices, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.nanmax(indices, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cut_off_indices = np.round(np.nanmedian(indices, axis=1))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cut_off_indices"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "depths_mskd"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "top_depths = np.tile(\n",
+        "    np.concatenate([(depths_mskd[:-1] + depths_mskd[1:]) / 2, depths_mskd[-1:]]),\n",
+        "    (signals_mskd.shape[0], 1),\n",
+        ")\n",
+        "top_depths[~np.isnan(signals_mskd)] = np.nan"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "top_depths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.expand_dims(np.arange(signals_mskd.shape[1]), 0)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.expand_dims(cut_off_indices, -1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.imshow(top_depths.T)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) > np.expand_dims(\n",
+        "    cut_off_indices, -1\n",
+        ")\n",
+        "plt.imshow(li.T)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "top_depths[li] = np.nan\n",
+        "plt.imshow(top_depths.T)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "d_top_new = np.nanmax(top_depths, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "bot_depths = np.tile(\n",
+        "    np.concatenate([depths_mskd[:1], (depths_mskd[:-1] + depths_mskd[1:]) / 2]),\n",
+        "    (signals_mskd.shape[0], 1),\n",
+        ")\n",
+        "bot_depths[~np.isnan(signals_mskd)] = np.nan\n",
+        "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) < np.expand_dims(\n",
+        "    cut_off_indices, -1\n",
+        ")\n",
+        "bot_depths[li] = np.nan\n",
+        "d_bot_new = np.nanmin(bot_depths, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.imshow(bot_depths.T)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.plot(d_top_new)\n",
+        "plt.plot(d_bot_new)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
+        "plt.plot(ts_mskd, d_top_new, turbulence_color)\n",
+        "plt.plot(ts_mskd, d_bot_new, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "d_bot"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "d_bot_new"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_mskd.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "t_bot.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.plot(t_bot, d_bot)\n",
+        "plt.plot(ts_mskd, d_bot_new)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.plot(t_top, d_top)\n",
+        "plt.plot(ts_mskd, d_top_new)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Finding rows which are fully removed from masked output"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.all(np.isnan(signals_mskd), axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "allnan = np.all(np.isnan(signals_mskd), axis=1)\n",
+        "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n",
+        "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n",
+        "\n",
+        "if removed_ends[0] < removed_starts[0]:\n",
+        "    removed_starts = np.concatenate(([0], removed_starts))\n",
+        "\n",
+        "if removed_starts[-1] > removed_ends[-1]:\n",
+        "    removed_ends = np.concatenate((removed_ends, [len(allnan)]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(removed_starts)\n",
+        "print(removed_ends)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(removed_starts, removed_ends):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw,\n",
+        "        signals_raw[index_start:index_end, :].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(\n",
+        "    np.concatenate(([0], removed_ends)),\n",
+        "    np.concatenate((removed_starts, [signals_raw.shape[0]])),\n",
+        "):\n",
+        "    if index_start == index_end:\n",
+        "        continue\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw,\n",
+        "        signals_raw[index_start:index_end, :].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Passive metadata labelling.ipynb b/notebooks/Passive metadata labelling.ipynb
index 4b091313..7a8b8d9e 100644
--- a/notebooks/Passive metadata labelling.ipynb	
+++ b/notebooks/Passive metadata labelling.ipynb	
@@ -1,1079 +1,1079 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import copy\n",
+        "import datetime\n",
+        "import os\n",
+        "import warnings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.plotting\n",
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "top_color = \"c\"\n",
+        "bot_color = \"#00ee00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/data/dsforce/surveyExports\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import scipy.interpolate\n",
+        "import scipy.ndimage\n",
+        "\n",
+        "from echofilter.raw import loader, utils"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from echofilter.raw.manipulate import find_passive_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = loader.ROOT_DATA_DIR"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def find_passive_data_v2(\n",
+        "    signals,\n",
+        "    n_depth_use=38,\n",
+        "    threshold_inner=None,\n",
+        "    threshold_init=None,\n",
+        "    deviation=None,\n",
+        "    sigma_depth=0,\n",
+        "    sigma_time=1,\n",
+        "):\n",
+        "    \"\"\"\n",
+        "    Find segments of Sv recording which correspond to passive recording.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    signals : array_like\n",
+        "        Two-dimensional array of Sv values, shaped `[timestamps, depths]`.\n",
+        "    n_depth_use : int, optional\n",
+        "        How many Sv depths to use, starting with the first depths (closest\n",
+        "        to the sounder device). If `None` all depths are used. Default is `38`.\n",
+        "        The median is taken across the depths, after taking the temporal\n",
+        "        derivative.\n",
+        "    threshold_inner : float, optional\n",
+        "        Theshold to apply to the temporal derivative of the signal when\n",
+        "        detected fine-tuned start/end of passive regions.\n",
+        "        Default behaviour is to use a threshold automatically determined using\n",
+        "        `deviation` if it is set, and otherwise use a threshold of `35.0`.\n",
+        "    threshold_init : float, optional\n",
+        "        Theshold to apply during the initial scan of the start/end of passive\n",
+        "        regions, which seeds the fine-tuning search.\n",
+        "        Default behaviour is to use a threshold automatically determined using\n",
+        "        `deviation` if it is set, and otherwise use a threshold of `12.0`.\n",
+        "    deviation : float, optional\n",
+        "        Set `threshold_inner` to be `deviation` times the standard deviation of\n",
+        "        the temporal derivative of the signal. The standard deviation is\n",
+        "        robustly estimated based on the interquartile range.\n",
+        "        If this is set, `threshold_inner` must not be `None`.\n",
+        "        Default is `None`\n",
+        "    sigma_depth : float, optional\n",
+        "        Width of kernel for filtering signals across second dimension (depth).\n",
+        "        Default is `0` (no filter).\n",
+        "    sigma_time : float, optional\n",
+        "        Width of kernel for filtering signals across second dimension (time).\n",
+        "        Default is `1`. Set to `0` to not filter.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    passive_start : numpy.ndarray\n",
+        "        Indices of rows of `signals` at which passive segments start.\n",
+        "    passive_end : numpy.ndarray\n",
+        "        Indices of rows of `signals` at which passive segments end.\n",
+        "\n",
+        "    Notes\n",
+        "    -----\n",
+        "    Works by looking at the difference between consecutive recordings and\n",
+        "    finding large deviations.\n",
+        "    \"\"\"\n",
+        "    # Ensure signals is numpy array\n",
+        "    signals = np.asarray(signals)\n",
+        "\n",
+        "    if n_depth_use is None:\n",
+        "        n_depth_use = signals.shape[1]\n",
+        "\n",
+        "    if sigma_depth > 0:\n",
+        "        signals_smooth = scipy.ndimage.gaussian_filter1d(\n",
+        "            signals.astype(np.float32), sigma_depth, axis=-1\n",
+        "        )\n",
+        "    else:\n",
+        "        signals_smooth = signals\n",
+        "\n",
+        "    md_inner = np.median(np.diff(signals_smooth[:, :n_depth_use], axis=0), axis=1)\n",
+        "\n",
+        "    if sigma_time > 0:\n",
+        "        signals_init = scipy.ndimage.gaussian_filter1d(\n",
+        "            signals_smooth.astype(np.float32), sigma_time, axis=0\n",
+        "        )\n",
+        "        md_init = np.median(np.diff(signals_init[:, :n_depth_use], axis=0), axis=1)\n",
+        "    else:\n",
+        "        signals_init = signals\n",
+        "        md_init = md_inner\n",
+        "\n",
+        "    if threshold_inner is not None and deviation is not None:\n",
+        "        raise ValueError(\"Only one of `threshold_inner` and `deviation` should be set.\")\n",
+        "    if threshold_init is None:\n",
+        "        if deviation is None:\n",
+        "            threshold_init = 12.0\n",
+        "        else:\n",
+        "            threshold_inner = (\n",
+        "                (np.percentile(md_init, 75) - np.percentile(md_init, 25))\n",
+        "                / 1.35\n",
+        "                * deviation\n",
+        "            )\n",
+        "    if threshold_inner is None:\n",
+        "        if deviation is None:\n",
+        "            threshold_inner = 35.0\n",
+        "        else:\n",
+        "            threshold_inner = (\n",
+        "                (np.percentile(md_inner, 75) - np.percentile(md_inner, 25))\n",
+        "                / 1.35\n",
+        "                * deviation\n",
+        "            )\n",
+        "\n",
+        "    threshold_high_inner = threshold_inner\n",
+        "    # threshold_low_inner = -threshold_inner\n",
+        "    threshold_high_init = threshold_init\n",
+        "    threshold_low_init = -threshold_init\n",
+        "    indices_possible_start_init = np.nonzero(md_init < threshold_low_init)[0]\n",
+        "    indices_possible_end_init = np.nonzero(md_init > threshold_high_init)[0]\n",
+        "\n",
+        "    if len(indices_possible_start_init) == 0 and len(indices_possible_end_init) == 0:\n",
+        "        return np.array([]), np.array([])\n",
+        "\n",
+        "    # Fine tune indices without smoothing\n",
+        "    indices_possible_start = []\n",
+        "    indices_possible_end = []\n",
+        "\n",
+        "    capture_start = None\n",
+        "    for i, index_p in enumerate(indices_possible_start_init):\n",
+        "        if capture_start is None:\n",
+        "            capture_start = index_p\n",
+        "        if (\n",
+        "            i + 1 >= len(indices_possible_start_init)\n",
+        "            or indices_possible_start_init[i + 1] > index_p + 3\n",
+        "        ):\n",
+        "            # break capture\n",
+        "            capture_end = index_p\n",
+        "            capture = np.arange(capture_start, capture_end + 1)\n",
+        "            indices_possible_start.append(capture[np.argmin(md_init[capture])])\n",
+        "            capture_start = None\n",
+        "\n",
+        "    capture_start = None\n",
+        "    for i, index_p in enumerate(indices_possible_end_init):\n",
+        "        if capture_start is None:\n",
+        "            capture_start = index_p\n",
+        "        if (\n",
+        "            i + 1 >= len(indices_possible_end_init)\n",
+        "            or indices_possible_end_init[i + 1] > index_p + 3\n",
+        "        ):\n",
+        "            # break capture\n",
+        "            capture_end = index_p\n",
+        "            capture = np.arange(capture_start, capture_end + 1)\n",
+        "            indices_possible_end.append(capture[np.argmax(md_init[capture])])\n",
+        "            capture_start = None\n",
+        "\n",
+        "    indices_possible_start = np.array(indices_possible_start)\n",
+        "    indices_possible_end = np.array(indices_possible_end)\n",
+        "\n",
+        "    current_index = 0\n",
+        "    indices_passive_start = []\n",
+        "    indices_passive_end = []\n",
+        "\n",
+        "    if len(indices_possible_start) > 0:\n",
+        "        indices_possible_start += 1\n",
+        "\n",
+        "    if len(indices_possible_end) > 0:\n",
+        "        indices_possible_end += 1\n",
+        "\n",
+        "    if len(indices_possible_end) > 0 and (\n",
+        "        len(indices_possible_start) == 0\n",
+        "        or indices_possible_end[0] < indices_possible_start[0]\n",
+        "    ):\n",
+        "        indices_passive_start.append(0)\n",
+        "        current_index = indices_possible_end[0]\n",
+        "        indices_passive_end.append(current_index)\n",
+        "        indices_possible_start = indices_possible_start[\n",
+        "            indices_possible_start > current_index\n",
+        "        ]\n",
+        "        indices_possible_end = indices_possible_end[\n",
+        "            indices_possible_end > current_index\n",
+        "        ]\n",
+        "\n",
+        "    while len(indices_possible_start) > 0:\n",
+        "        current_index = indices_possible_start[0]\n",
+        "        indices_passive_start.append(current_index)\n",
+        "        baseline_index = max(0, current_index - 2)\n",
+        "        baseline = signals[baseline_index, :n_depth_use]\n",
+        "\n",
+        "        # Find first column which returns to the baseline value seen before passive region\n",
+        "        offsets = np.nonzero(\n",
+        "            np.median(baseline - signals[current_index:, :n_depth_use], axis=1)\n",
+        "            < threshold_high_inner\n",
+        "        )[0]\n",
+        "        if len(offsets) == 0:\n",
+        "            current_index = signals.shape[0]\n",
+        "        else:\n",
+        "            current_index += offsets[0]\n",
+        "        indices_passive_end.append(current_index)\n",
+        "\n",
+        "        # Remove preceding indices from the list of candidates\n",
+        "        indices_possible_start = indices_possible_start[\n",
+        "            indices_possible_start > current_index\n",
+        "        ]\n",
+        "        indices_possible_end = indices_possible_end[\n",
+        "            indices_possible_end > current_index\n",
+        "        ]\n",
+        "\n",
+        "        # Check the start was sufficiently inclusive\n",
+        "        if current_index < signals.shape[0]:\n",
+        "            baseline_index = min(signals.shape[0] - 1, current_index + 1)\n",
+        "            baseline = signals[baseline_index, :n_depth_use]\n",
+        "            nonpassives = np.nonzero(\n",
+        "                np.median(baseline - signals[:current_index, :n_depth_use], axis=1)\n",
+        "                < threshold_high_inner\n",
+        "            )[0]\n",
+        "            if len(nonpassives) == 0:\n",
+        "                indices_passive_start[-1] = 0\n",
+        "            else:\n",
+        "                indices_passive_start[-1] = min(\n",
+        "                    indices_passive_start[-1],\n",
+        "                    nonpassives[-1] + 1,\n",
+        "                )\n",
+        "\n",
+        "        # Combine with preceding passive segments if they overlap\n",
+        "        while (\n",
+        "            len(indices_passive_start) > 1\n",
+        "            and indices_passive_start[-1] <= indices_passive_end[-2]\n",
+        "        ):\n",
+        "            indices_passive_start = indices_passive_start[:-1]\n",
+        "            indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n",
+        "\n",
+        "    return np.array(indices_passive_start), np.array(indices_passive_end)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample_paths = []\n",
+        "for dataset in [\"MinasPassage\", \"GrandPassage\", \"mobile\"]:\n",
+        "    for partition in [\"train\", \"validate\", \"test\"]:\n",
+        "        sample_paths += [\n",
+        "            os.path.join(dataset, pth)\n",
+        "            for pth in loader.get_partition_list(partition, dataset=dataset)\n",
+        "        ]\n",
+        "sample_paths = sorted(sample_paths)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample_paths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n",
+        "\n",
+        "for i_sample, sample_path in enumerate(sample_paths):\n",
+        "\n",
+        "    print(\n",
+        "        \"{:4d}/{:4d} {}\".format(\n",
+        "            i_sample + 1,\n",
+        "            len(sample_paths),\n",
+        "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
+        "        )\n",
+        "    )\n",
+        "    print(sample_path)\n",
+        "\n",
+        "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
+        "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
+        "        fname_raw, warn_row_overflow=0\n",
+        "    )\n",
+        "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
+        "\n",
+        "    nt = len(ts_raw)\n",
+        "    print(\"length: {}\".format(nt))\n",
+        "    its_raw = np.arange(len(ts_raw))\n",
+        "\n",
+        "    if \"december2017\" in sample_path:\n",
+        "        psx = np.array([])\n",
+        "        pex = np.array([])\n",
+        "    elif \"march2018\" in sample_path:\n",
+        "        psx = np.arange(0, nt, 360)\n",
+        "        pex = psx + 60\n",
+        "    elif \"september2018\" in sample_path:\n",
+        "        psx = np.arange(300, nt, 360)\n",
+        "        pex = psx + 60\n",
+        "    elif \"GrandPassage\" in sample_path:\n",
+        "        psx = np.array([0, 3120, 6540, 9960, 13380])\n",
+        "        psx = psx[psx < nt]\n",
+        "        pex = np.r_[120, psx[1:] + 420]\n",
+        "        pex = np.minimum(pex, nt)\n",
+        "    else:\n",
+        "        psx = None\n",
+        "        pex = None\n",
+        "\n",
+        "    def tidy_up_line(t, d):\n",
+        "        if d is None:\n",
+        "            return np.nan * np.ones_like(ts_raw)\n",
+        "        is_usable = np.isfinite(d)\n",
+        "        if np.sum(is_usable) > 0:\n",
+        "            t = t[is_usable]\n",
+        "            d = d[is_usable]\n",
+        "        return np.interp(ts_raw, t, d)\n",
+        "\n",
+        "    ps1, pe1 = find_passive_data(signals_raw)\n",
+        "    ps2, pe2 = find_passive_data_v2(signals_raw)\n",
+        "\n",
+        "    fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n",
+        "\n",
+        "    if os.path.isfile(fname_surface):\n",
+        "        t_surface, d_surface = loader.evl_loader(fname_surface)\n",
+        "    elif is_upward_facing:\n",
+        "        print(\n",
+        "            \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n",
+        "                fname_surface\n",
+        "            )\n",
+        "        )\n",
+        "        t_surface = ts_raw\n",
+        "        d_surface = np.zeros_like(ts_raw)\n",
+        "    else:\n",
+        "        # Default surface depth of 0m for downward facing data\n",
+        "        t_surface = ts_raw\n",
+        "        d_surface = np.zeros_like(ts_raw)\n",
+        "\n",
+        "    # Find location of passive data.\n",
+        "    # Try to determine passive data as whenever the surface line is undefined.\n",
+        "    d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n",
+        "    is_passive = np.isnan(d_surface)\n",
+        "    ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n",
+        "    ps3 = np.asarray(ps3)\n",
+        "    pe3 = np.asarray(pe3) + 1\n",
+        "    pl3 = pe3 - ps3\n",
+        "    li = pl3 >= 3\n",
+        "    ps3 = ps3[li]\n",
+        "    pe3 = pe3[li]\n",
+        "    if np.sum(~li) > 0:\n",
+        "        print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n",
+        "\n",
+        "    print(\"starts:\")\n",
+        "    print(\"xp:\", psx)\n",
+        "    print(\"v1:\", ps1)\n",
+        "    print(\"v2:\", ps2)\n",
+        "    print(\"v3:\", ps3)\n",
+        "    print(\"ends:\")\n",
+        "    print(\"xp:\", pex)\n",
+        "    print(\"v1:\", pe1)\n",
+        "    print(\"v2:\", pe2)\n",
+        "    print(\"v3:\", pe3)\n",
+        "    print(\"durations:\")\n",
+        "    if pex is not None:\n",
+        "        print(\"xp:\", pex - psx)\n",
+        "    print(\"v1:\", pe1 - ps1)\n",
+        "    print(\"v2:\", pe2 - ps2)\n",
+        "    if ps3 is not None:\n",
+        "        print(\"v3:\", pe3 - ps3)\n",
+        "    print(\"\")\n",
+        "\n",
+        "    if (\n",
+        "        len(ps1) != len(ps2)\n",
+        "        or len(pe1) != len(pe2)\n",
+        "        or not np.allclose(ps1, ps2)\n",
+        "        or not np.allclose(pe1, pe2)\n",
+        "    ):\n",
+        "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n",
+        "    if (\n",
+        "        ps3 is not None\n",
+        "        and pe3 is not None\n",
+        "        and (\n",
+        "            len(ps3) != len(ps2)\n",
+        "            or len(pe3) != len(pe2)\n",
+        "            or not np.allclose(ps3, ps2)\n",
+        "            or not np.allclose(pe3, pe2)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n",
+        "    if (\n",
+        "        psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps1)\n",
+        "            or len(pex) != len(pe1)\n",
+        "            or not np.allclose(psx, ps1)\n",
+        "            or not np.allclose(pex, pe1)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n",
+        "    if (\n",
+        "        psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps2)\n",
+        "            or len(pex) != len(pe2)\n",
+        "            or not np.allclose(psx, ps2)\n",
+        "            or not np.allclose(pex, pe2)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n",
+        "    if (\n",
+        "        ps3 is not None\n",
+        "        and pe3 is not None\n",
+        "        and psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps3)\n",
+        "            or len(pex) != len(pe3)\n",
+        "            or not np.allclose(psx, ps3)\n",
+        "            or not np.allclose(pex, pe3)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n",
+        "\n",
+        "    print(\"\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample_paths = []\n",
+        "dataset = \"mobile\"\n",
+        "sample_paths = [\n",
+        "    os.path.join(dataset, pth)\n",
+        "    for pth in loader.get_partition_list(\"leaveout\", dataset=dataset)\n",
+        "]\n",
+        "sample_paths = sorted(sample_paths)\n",
+        "\n",
+        "\n",
+        "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n",
+        "\n",
+        "for i_sample, sample_path in enumerate(sample_paths):\n",
+        "\n",
+        "    print(\n",
+        "        \"{:4d}/{:4d} {}\".format(\n",
+        "            i_sample + 1,\n",
+        "            len(sample_paths),\n",
+        "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
+        "        )\n",
+        "    )\n",
+        "    print(sample_path)\n",
+        "\n",
+        "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
+        "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
+        "        fname_raw, warn_row_overflow=0\n",
+        "    )\n",
+        "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
+        "\n",
+        "    nt = len(ts_raw)\n",
+        "    print(\"length: {}\".format(nt))\n",
+        "    its_raw = np.arange(len(ts_raw))\n",
+        "\n",
+        "    if \"december2017\" in sample_path:\n",
+        "        psx = np.array([])\n",
+        "        pex = np.array([])\n",
+        "    elif \"march2018\" in sample_path:\n",
+        "        psx = np.arange(0, nt, 360)\n",
+        "        pex = psx + 60\n",
+        "    elif \"september2018\" in sample_path:\n",
+        "        psx = np.arange(300, nt, 360)\n",
+        "        pex = psx + 60\n",
+        "    elif \"GrandPassage\" in sample_path:\n",
+        "        psx = np.array([0, 3120, 6540, 9960, 13380])\n",
+        "        psx = psx[psx < nt]\n",
+        "        pex = np.r_[120, psx[1:] + 420]\n",
+        "        pex = np.minimum(pex, nt)\n",
+        "    else:\n",
+        "        psx = None\n",
+        "        pex = None\n",
+        "\n",
+        "    ps1, pe1 = find_passive_data(signals_raw)\n",
+        "    ps2, pe2 = find_passive_data_v2(signals_raw)\n",
+        "\n",
+        "    fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n",
+        "\n",
+        "    if os.path.isfile(fname_surface):\n",
+        "        t_surface, d_surface = loader.evl_loader(fname_surface)\n",
+        "    elif is_upward_facing:\n",
+        "        print(\n",
+        "            \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n",
+        "                fname_surface\n",
+        "            )\n",
+        "        )\n",
+        "        t_surface = ts_raw\n",
+        "        d_surface = np.zeros_like(ts_raw)\n",
+        "    else:\n",
+        "        # Default surface depth of 0m for downward facing data\n",
+        "        t_surface = ts_raw\n",
+        "        d_surface = np.zeros_like(ts_raw)\n",
+        "\n",
+        "    # Find location of passive data.\n",
+        "    # Try to determine passive data as whenever the surface line is undefined.\n",
+        "    d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n",
+        "    is_passive = np.isnan(d_surface)\n",
+        "    ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n",
+        "    ps3 = np.asarray(ps3)\n",
+        "    pe3 = np.asarray(pe3) + 1\n",
+        "    pl3 = pe3 - ps3\n",
+        "    li = pl3 >= 3\n",
+        "    ps3 = ps3[li]\n",
+        "    pe3 = pe3[li]\n",
+        "    if np.sum(~li) > 0:\n",
+        "        print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n",
+        "\n",
+        "    print(\"starts:\")\n",
+        "    print(\"xp:\", psx)\n",
+        "    print(\"v1:\", ps1)\n",
+        "    print(\"v2:\", ps2)\n",
+        "    print(\"v3:\", ps3)\n",
+        "    print(\"ends:\")\n",
+        "    print(\"xp:\", pex)\n",
+        "    print(\"v1:\", pe1)\n",
+        "    print(\"v2:\", pe2)\n",
+        "    print(\"v3:\", pe3)\n",
+        "    print(\"durations:\")\n",
+        "    if pex is not None:\n",
+        "        print(\"xp:\", pex - psx)\n",
+        "    print(\"v1:\", pe1 - ps1)\n",
+        "    print(\"v2:\", pe2 - ps2)\n",
+        "    if ps3 is not None:\n",
+        "        print(\"v3:\", pe3 - ps3)\n",
+        "    print(\"\")\n",
+        "\n",
+        "    if (\n",
+        "        len(ps1) != len(ps2)\n",
+        "        or len(pe1) != len(pe2)\n",
+        "        or not np.allclose(ps1, ps2)\n",
+        "        or not np.allclose(pe1, pe2)\n",
+        "    ):\n",
+        "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n",
+        "    if (\n",
+        "        ps3 is not None\n",
+        "        and pe3 is not None\n",
+        "        and (\n",
+        "            len(ps3) != len(ps2)\n",
+        "            or len(pe3) != len(pe2)\n",
+        "            or not np.allclose(ps3, ps2)\n",
+        "            or not np.allclose(pe3, pe2)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n",
+        "    if (\n",
+        "        psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps1)\n",
+        "            or len(pex) != len(pe1)\n",
+        "            or not np.allclose(psx, ps1)\n",
+        "            or not np.allclose(pex, pe1)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n",
+        "    if (\n",
+        "        psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps2)\n",
+        "            or len(pex) != len(pe2)\n",
+        "            or not np.allclose(psx, ps2)\n",
+        "            or not np.allclose(pex, pe2)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n",
+        "    if (\n",
+        "        ps3 is not None\n",
+        "        and pe3 is not None\n",
+        "        and psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps3)\n",
+        "            or len(pex) != len(pe3)\n",
+        "            or not np.allclose(psx, ps3)\n",
+        "            or not np.allclose(pex, pe3)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n",
+        "\n",
+        "    print(\"\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "bad_sample_paths = [\n",
+        "    \"MinasPassage/september2018/september2018_D20181021-T165220_D20181021-T222221\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181022-T105220_D20181022-T162217\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181022-T172213_D20181022-T232217\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181026-T082220_D20181026-T135213\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181026-T142217_D20181026-T195218\",\n",
+        "]\n",
+        "# bad_sample_paths = [\n",
+        "# \"MinasPassage/september2018/september2018_D20180928-T202217_D20180929-T015217\",\n",
+        "# \"MinasPassage/september2018/september2018_D20181008-T235218_D20181009-T052220\",\n",
+        "# \"MinasPassage/september2018/september2018_D20181021-T045220_D20181021-T102218\",\n",
+        "# ]\n",
+        "# bad_sample_paths = [\n",
+        "#    \"GrandPassage/phase2/GrandPassage_WBAT_2B_20200130_UTC020017_floodhigh\",\n",
+        "# ]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n",
+        "\n",
+        "for i_sample, sample_path in enumerate(bad_sample_paths):\n",
+        "\n",
+        "    print(\n",
+        "        \"{:4d}/{:4d} {}\".format(\n",
+        "            i_sample + 1,\n",
+        "            len(bad_sample_paths),\n",
+        "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
+        "        )\n",
+        "    )\n",
+        "    print(sample_path)\n",
+        "\n",
+        "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
+        "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
+        "        fname_raw, warn_row_overflow=0\n",
+        "    )\n",
+        "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
+        "\n",
+        "    nt = len(ts_raw)\n",
+        "    print(\"length: {}\".format(nt))\n",
+        "    its_raw = np.arange(len(ts_raw))\n",
+        "\n",
+        "    if \"december2017\" in sample_path:\n",
+        "        psx = np.array([])\n",
+        "        pex = np.array([])\n",
+        "    elif \"march2018\" in sample_path:\n",
+        "        psx = np.arange(0, nt, 360)\n",
+        "        pex = psx + 60\n",
+        "    elif \"september2018\" in sample_path:\n",
+        "        psx = np.arange(300, nt, 360)\n",
+        "        pex = psx + 60\n",
+        "    elif \"GrandPassage\" in sample_path:\n",
+        "        psx = np.array([0, 3120, 6540, 9960, 13380])\n",
+        "        psx = psx[psx < nt]\n",
+        "        pex = np.r_[120, psx[1:] + 420]\n",
+        "        pex = np.minimum(pex, nt)\n",
+        "    else:\n",
+        "        psx = None\n",
+        "        pex = None\n",
+        "\n",
+        "    ps1, pe1 = find_passive_data(signals_raw)\n",
+        "    ps2, pe2 = find_passive_data_v2(signals_raw)\n",
+        "\n",
+        "    fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n",
+        "\n",
+        "    if os.path.isfile(fname_surface):\n",
+        "        t_surface, d_surface = loader.evl_loader(fname_surface)\n",
+        "    elif is_upward_facing:\n",
+        "        print(\n",
+        "            \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n",
+        "                fname_surface\n",
+        "            )\n",
+        "        )\n",
+        "        t_surface = ts_raw\n",
+        "        d_surface = np.zeros_like(ts_raw)\n",
+        "    else:\n",
+        "        # Default surface depth of 0m for downward facing data\n",
+        "        t_surface = ts_raw\n",
+        "        d_surface = np.zeros_like(ts_raw)\n",
+        "\n",
+        "    # Find location of passive data.\n",
+        "    # Try to determine passive data as whenever the surface line is undefined.\n",
+        "    d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n",
+        "    is_passive = np.isnan(d_surface)\n",
+        "    ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n",
+        "    ps3 = np.asarray(ps3)\n",
+        "    pe3 = np.asarray(pe3) + 1\n",
+        "    pl3 = pe3 - ps3\n",
+        "    li = pl3 >= 3\n",
+        "    ps3 = ps3[li]\n",
+        "    pe3 = pe3[li]\n",
+        "    if np.sum(~li) > 0:\n",
+        "        print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n",
+        "\n",
+        "    print(\"starts:\")\n",
+        "    print(\"xp:\", psx)\n",
+        "    print(\"v1:\", ps1)\n",
+        "    print(\"v2:\", ps2)\n",
+        "    print(\"v3:\", ps3)\n",
+        "    print(\"ends:\")\n",
+        "    print(\"xp:\", pex)\n",
+        "    print(\"v1:\", pe1)\n",
+        "    print(\"v2:\", pe2)\n",
+        "    print(\"v3:\", pe3)\n",
+        "    print(\"durations:\")\n",
+        "    if pex is not None:\n",
+        "        print(\"xp:\", pex - psx)\n",
+        "    print(\"v1:\", pe1 - ps1)\n",
+        "    print(\"v2:\", pe2 - ps2)\n",
+        "    if ps3 is not None:\n",
+        "        print(\"v3:\", pe3 - ps3)\n",
+        "    print(\"\")\n",
+        "\n",
+        "    if (\n",
+        "        len(ps1) != len(ps2)\n",
+        "        or len(pe1) != len(pe2)\n",
+        "        or not np.allclose(ps1, ps2)\n",
+        "        or not np.allclose(pe1, pe2)\n",
+        "    ):\n",
+        "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n",
+        "    if (\n",
+        "        ps3 is not None\n",
+        "        and pe3 is not None\n",
+        "        and (\n",
+        "            len(ps3) != len(ps2)\n",
+        "            or len(pe3) != len(pe2)\n",
+        "            or not np.allclose(ps3, ps2)\n",
+        "            or not np.allclose(pe3, pe2)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n",
+        "    if (\n",
+        "        psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps1)\n",
+        "            or len(pex) != len(pe1)\n",
+        "            or not np.allclose(psx, ps1)\n",
+        "            or not np.allclose(pex, pe1)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n",
+        "    if (\n",
+        "        psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps2)\n",
+        "            or len(pex) != len(pe2)\n",
+        "            or not np.allclose(psx, ps2)\n",
+        "            or not np.allclose(pex, pe2)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n",
+        "    if (\n",
+        "        ps3 is not None\n",
+        "        and pe3 is not None\n",
+        "        and psx is not None\n",
+        "        and pex is not None\n",
+        "        and (\n",
+        "            len(psx) != len(ps3)\n",
+        "            or len(pex) != len(pe3)\n",
+        "            or not np.allclose(psx, ps3)\n",
+        "            or not np.allclose(pex, pe3)\n",
+        "        )\n",
+        "    ):\n",
+        "        print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n",
+        "\n",
+        "    best_ps = ps1\n",
+        "    best_pe = pe1\n",
+        "\n",
+        "    for i in range(min(len(best_ps), len(psx))):\n",
+        "        if best_ps[i] == psx[i] and best_pe[i] == pex[i]:\n",
+        "            continue\n",
+        "\n",
+        "        for ps, pe, tit in (\n",
+        "            (psx[i], pex[i], \"expected\"),\n",
+        "            (best_ps[i], best_pe[i], \"v1\"),\n",
+        "        ):\n",
+        "            plt.figure(figsize=(12, 9))\n",
+        "            i0 = max(0, ps - 1)\n",
+        "            i1 = pe + 2\n",
+        "            if i1 >= len(its_raw):\n",
+        "                i1 = None\n",
+        "            plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n",
+        "            plt.gca().invert_yaxis()\n",
+        "            plt.title(\"passive #{}, {}\".format(i, tit))\n",
+        "            plt.show()\n",
+        "\n",
+        "    print(\"\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 9))\n",
+        "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(sample_path)\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "_ = \"\"\"\n",
+        "length: 10259\n",
+        "starts:\n",
+        "xp: [   0 3120 6540 9960]\n",
+        "v1: [   0 3120 6539 9959]\n",
+        "v2: [   0 3120 6539 9959]\n",
+        "v3: [   0 3120 6540 9960]\n",
+        "ends:\n",
+        "xp: [  120  3540  6960 10259]\n",
+        "v1: [  120  3540  6959 10259]\n",
+        "v2: [  120  3540  6701 10259]\n",
+        "v3: [  120  3540  6960 10260]\n",
+        "durations:\n",
+        "xp: [120 420 420 299]\n",
+        "v1: [120 420 420 300]\n",
+        "v2: [120 420 162 300]\n",
+        "v3: [120 420 420 300]\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "i0 = 10200\n",
+        "i1 = 10259\n",
+        "i0 -= 1\n",
+        "i1 += 2\n",
+        "plt.figure(figsize=(12, 9))\n",
+        "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(\"{}-{}\".format(i0, i1 - 1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample_path = (\n",
+        "    \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\"\n",
+        ")\n",
+        "\n",
+        "fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
+        "    fname_raw, warn_row_overflow=0\n",
+        ")\n",
+        "is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
+        "\n",
+        "nt = len(ts_raw)\n",
+        "print(\"length: {}\".format(nt))\n",
+        "its_raw = np.arange(len(ts_raw))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 9))\n",
+        "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(sample_path)\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "i0 = 250\n",
+        "i1 = 350\n",
+        "i0 -= 1\n",
+        "i1 += 2\n",
+        "plt.figure(figsize=(12, 9))\n",
+        "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(\"{}-{}\".format(i0, i1 - 1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "bad_sample_paths = [\n",
+        "    \"mobile/Survey01/Survey01_GR1_S1A_E\",\n",
+        "    \"mobile/Survey03/Survey03_GR2_N5W_survey3\",\n",
+        "    \"mobile/Survey03/Survey03_GR4_N0A_survey3\",\n",
+        "    \"mobile/Survey04/Survey04_GR1_N3A\",\n",
+        "    \"mobile/Survey04/Survey04_GR2_N5A\",\n",
+        "    \"mobile/Survey05/Survey05_GR1_N1A_survey5\",\n",
+        "    \"mobile/Survey07/Survey07_GR2_N1W_survey7\",\n",
+        "    \"mobile/Survey10/Survey10_GR1_N0A_E\",\n",
+        "    \"mobile/Survey12/Survey12_GR4_N5A_E\",\n",
+        "    \"mobile/Survey01/Survey01_GR1_S2A_E\",\n",
+        "    \"mobile/Survey01/Survey01_GR1_S2W_E\",\n",
+        "    \"mobile/Survey11/Survey11_GR1_S2A_E\",\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n",
+        "\n",
+        "for i_sample, sample_path in enumerate(bad_sample_paths):\n",
+        "\n",
+        "    print(\n",
+        "        \"{:4d}/{:4d} {}\".format(\n",
+        "            i_sample + 1,\n",
+        "            len(bad_sample_paths),\n",
+        "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
+        "        )\n",
+        "    )\n",
+        "    print(sample_path)\n",
+        "\n",
+        "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
+        "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
+        "        fname_raw, warn_row_overflow=0\n",
+        "    )\n",
+        "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
+        "\n",
+        "    nt = len(ts_raw)\n",
+        "    print(\"length: {}\".format(nt))\n",
+        "    its_raw = np.arange(len(ts_raw))\n",
+        "\n",
+        "    plt.figure(figsize=(12, 9))\n",
+        "    plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.title(sample_path)\n",
+        "    plt.show()\n",
+        "\n",
+        "    print(\"\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import copy\n",
-    "import datetime\n",
-    "import os\n",
-    "import warnings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.plotting\n",
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_color = \"c\"\n",
-    "bot_color = \"#00ee00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/data/dsforce/surveyExports\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy.interpolate\n",
-    "import scipy.ndimage\n",
-    "\n",
-    "from echofilter.raw import loader, utils"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from echofilter.raw.manipulate import find_passive_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = loader.ROOT_DATA_DIR"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def find_passive_data_v2(\n",
-    "    signals,\n",
-    "    n_depth_use=38,\n",
-    "    threshold_inner=None,\n",
-    "    threshold_init=None,\n",
-    "    deviation=None,\n",
-    "    sigma_depth=0,\n",
-    "    sigma_time=1,\n",
-    "):\n",
-    "    \"\"\"\n",
-    "    Find segments of Sv recording which correspond to passive recording.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    signals : array_like\n",
-    "        Two-dimensional array of Sv values, shaped `[timestamps, depths]`.\n",
-    "    n_depth_use : int, optional\n",
-    "        How many Sv depths to use, starting with the first depths (closest\n",
-    "        to the sounder device). If `None` all depths are used. Default is `38`.\n",
-    "        The median is taken across the depths, after taking the temporal\n",
-    "        derivative.\n",
-    "    threshold_inner : float, optional\n",
-    "        Theshold to apply to the temporal derivative of the signal when\n",
-    "        detected fine-tuned start/end of passive regions.\n",
-    "        Default behaviour is to use a threshold automatically determined using\n",
-    "        `deviation` if it is set, and otherwise use a threshold of `35.0`.\n",
-    "    threshold_init : float, optional\n",
-    "        Theshold to apply during the initial scan of the start/end of passive\n",
-    "        regions, which seeds the fine-tuning search.\n",
-    "        Default behaviour is to use a threshold automatically determined using\n",
-    "        `deviation` if it is set, and otherwise use a threshold of `12.0`.\n",
-    "    deviation : float, optional\n",
-    "        Set `threshold_inner` to be `deviation` times the standard deviation of\n",
-    "        the temporal derivative of the signal. The standard deviation is\n",
-    "        robustly estimated based on the interquartile range.\n",
-    "        If this is set, `threshold_inner` must not be `None`.\n",
-    "        Default is `None`\n",
-    "    sigma_depth : float, optional\n",
-    "        Width of kernel for filtering signals across second dimension (depth).\n",
-    "        Default is `0` (no filter).\n",
-    "    sigma_time : float, optional\n",
-    "        Width of kernel for filtering signals across second dimension (time).\n",
-    "        Default is `1`. Set to `0` to not filter.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    passive_start : numpy.ndarray\n",
-    "        Indices of rows of `signals` at which passive segments start.\n",
-    "    passive_end : numpy.ndarray\n",
-    "        Indices of rows of `signals` at which passive segments end.\n",
-    "\n",
-    "    Notes\n",
-    "    -----\n",
-    "    Works by looking at the difference between consecutive recordings and\n",
-    "    finding large deviations.\n",
-    "    \"\"\"\n",
-    "    # Ensure signals is numpy array\n",
-    "    signals = np.asarray(signals)\n",
-    "\n",
-    "    if n_depth_use is None:\n",
-    "        n_depth_use = signals.shape[1]\n",
-    "\n",
-    "    if sigma_depth > 0:\n",
-    "        signals_smooth = scipy.ndimage.gaussian_filter1d(\n",
-    "            signals.astype(np.float32), sigma_depth, axis=-1\n",
-    "        )\n",
-    "    else:\n",
-    "        signals_smooth = signals\n",
-    "\n",
-    "    md_inner = np.median(np.diff(signals_smooth[:, :n_depth_use], axis=0), axis=1)\n",
-    "\n",
-    "    if sigma_time > 0:\n",
-    "        signals_init = scipy.ndimage.gaussian_filter1d(\n",
-    "            signals_smooth.astype(np.float32), sigma_time, axis=0\n",
-    "        )\n",
-    "        md_init = np.median(np.diff(signals_init[:, :n_depth_use], axis=0), axis=1)\n",
-    "    else:\n",
-    "        signals_init = signals\n",
-    "        md_init = md_inner\n",
-    "\n",
-    "    if threshold_inner is not None and deviation is not None:\n",
-    "        raise ValueError(\"Only one of `threshold_inner` and `deviation` should be set.\")\n",
-    "    if threshold_init is None:\n",
-    "        if deviation is None:\n",
-    "            threshold_init = 12.0\n",
-    "        else:\n",
-    "            threshold_inner = (\n",
-    "                (np.percentile(md_init, 75) - np.percentile(md_init, 25))\n",
-    "                / 1.35\n",
-    "                * deviation\n",
-    "            )\n",
-    "    if threshold_inner is None:\n",
-    "        if deviation is None:\n",
-    "            threshold_inner = 35.0\n",
-    "        else:\n",
-    "            threshold_inner = (\n",
-    "                (np.percentile(md_inner, 75) - np.percentile(md_inner, 25))\n",
-    "                / 1.35\n",
-    "                * deviation\n",
-    "            )\n",
-    "\n",
-    "    threshold_high_inner = threshold_inner\n",
-    "    # threshold_low_inner = -threshold_inner\n",
-    "    threshold_high_init = threshold_init\n",
-    "    threshold_low_init = -threshold_init\n",
-    "    indices_possible_start_init = np.nonzero(md_init < threshold_low_init)[0]\n",
-    "    indices_possible_end_init = np.nonzero(md_init > threshold_high_init)[0]\n",
-    "\n",
-    "    if len(indices_possible_start_init) == 0 and len(indices_possible_end_init) == 0:\n",
-    "        return np.array([]), np.array([])\n",
-    "\n",
-    "    # Fine tune indices without smoothing\n",
-    "    indices_possible_start = []\n",
-    "    indices_possible_end = []\n",
-    "\n",
-    "    capture_start = None\n",
-    "    for i, index_p in enumerate(indices_possible_start_init):\n",
-    "        if capture_start is None:\n",
-    "            capture_start = index_p\n",
-    "        if (\n",
-    "            i + 1 >= len(indices_possible_start_init)\n",
-    "            or indices_possible_start_init[i + 1] > index_p + 3\n",
-    "        ):\n",
-    "            # break capture\n",
-    "            capture_end = index_p\n",
-    "            capture = np.arange(capture_start, capture_end + 1)\n",
-    "            indices_possible_start.append(capture[np.argmin(md_init[capture])])\n",
-    "            capture_start = None\n",
-    "\n",
-    "    capture_start = None\n",
-    "    for i, index_p in enumerate(indices_possible_end_init):\n",
-    "        if capture_start is None:\n",
-    "            capture_start = index_p\n",
-    "        if (\n",
-    "            i + 1 >= len(indices_possible_end_init)\n",
-    "            or indices_possible_end_init[i + 1] > index_p + 3\n",
-    "        ):\n",
-    "            # break capture\n",
-    "            capture_end = index_p\n",
-    "            capture = np.arange(capture_start, capture_end + 1)\n",
-    "            indices_possible_end.append(capture[np.argmax(md_init[capture])])\n",
-    "            capture_start = None\n",
-    "\n",
-    "    indices_possible_start = np.array(indices_possible_start)\n",
-    "    indices_possible_end = np.array(indices_possible_end)\n",
-    "\n",
-    "    current_index = 0\n",
-    "    indices_passive_start = []\n",
-    "    indices_passive_end = []\n",
-    "\n",
-    "    if len(indices_possible_start) > 0:\n",
-    "        indices_possible_start += 1\n",
-    "\n",
-    "    if len(indices_possible_end) > 0:\n",
-    "        indices_possible_end += 1\n",
-    "\n",
-    "    if len(indices_possible_end) > 0 and (\n",
-    "        len(indices_possible_start) == 0\n",
-    "        or indices_possible_end[0] < indices_possible_start[0]\n",
-    "    ):\n",
-    "        indices_passive_start.append(0)\n",
-    "        current_index = indices_possible_end[0]\n",
-    "        indices_passive_end.append(current_index)\n",
-    "        indices_possible_start = indices_possible_start[\n",
-    "            indices_possible_start > current_index\n",
-    "        ]\n",
-    "        indices_possible_end = indices_possible_end[\n",
-    "            indices_possible_end > current_index\n",
-    "        ]\n",
-    "\n",
-    "    while len(indices_possible_start) > 0:\n",
-    "        current_index = indices_possible_start[0]\n",
-    "        indices_passive_start.append(current_index)\n",
-    "        baseline_index = max(0, current_index - 2)\n",
-    "        baseline = signals[baseline_index, :n_depth_use]\n",
-    "\n",
-    "        # Find first column which returns to the baseline value seen before passive region\n",
-    "        offsets = np.nonzero(\n",
-    "            np.median(baseline - signals[current_index:, :n_depth_use], axis=1)\n",
-    "            < threshold_high_inner\n",
-    "        )[0]\n",
-    "        if len(offsets) == 0:\n",
-    "            current_index = signals.shape[0]\n",
-    "        else:\n",
-    "            current_index += offsets[0]\n",
-    "        indices_passive_end.append(current_index)\n",
-    "\n",
-    "        # Remove preceding indices from the list of candidates\n",
-    "        indices_possible_start = indices_possible_start[\n",
-    "            indices_possible_start > current_index\n",
-    "        ]\n",
-    "        indices_possible_end = indices_possible_end[\n",
-    "            indices_possible_end > current_index\n",
-    "        ]\n",
-    "\n",
-    "        # Check the start was sufficiently inclusive\n",
-    "        if current_index < signals.shape[0]:\n",
-    "            baseline_index = min(signals.shape[0] - 1, current_index + 1)\n",
-    "            baseline = signals[baseline_index, :n_depth_use]\n",
-    "            nonpassives = np.nonzero(\n",
-    "                np.median(baseline - signals[:current_index, :n_depth_use], axis=1)\n",
-    "                < threshold_high_inner\n",
-    "            )[0]\n",
-    "            if len(nonpassives) == 0:\n",
-    "                indices_passive_start[-1] = 0\n",
-    "            else:\n",
-    "                indices_passive_start[-1] = min(\n",
-    "                    indices_passive_start[-1],\n",
-    "                    nonpassives[-1] + 1,\n",
-    "                )\n",
-    "\n",
-    "        # Combine with preceding passive segments if they overlap\n",
-    "        while (\n",
-    "            len(indices_passive_start) > 1\n",
-    "            and indices_passive_start[-1] <= indices_passive_end[-2]\n",
-    "        ):\n",
-    "            indices_passive_start = indices_passive_start[:-1]\n",
-    "            indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n",
-    "\n",
-    "    return np.array(indices_passive_start), np.array(indices_passive_end)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_paths = []\n",
-    "for dataset in [\"MinasPassage\", \"GrandPassage\", \"mobile\"]:\n",
-    "    for partition in [\"train\", \"validate\", \"test\"]:\n",
-    "        sample_paths += [\n",
-    "            os.path.join(dataset, pth)\n",
-    "            for pth in loader.get_partition_list(partition, dataset=dataset)\n",
-    "        ]\n",
-    "sample_paths = sorted(sample_paths)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_paths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n",
-    "\n",
-    "for i_sample, sample_path in enumerate(sample_paths):\n",
-    "\n",
-    "    print(\n",
-    "        \"{:4d}/{:4d} {}\".format(\n",
-    "            i_sample + 1,\n",
-    "            len(sample_paths),\n",
-    "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
-    "        )\n",
-    "    )\n",
-    "    print(sample_path)\n",
-    "\n",
-    "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
-    "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
-    "        fname_raw, warn_row_overflow=0\n",
-    "    )\n",
-    "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
-    "\n",
-    "    nt = len(ts_raw)\n",
-    "    print(\"length: {}\".format(nt))\n",
-    "    its_raw = np.arange(len(ts_raw))\n",
-    "\n",
-    "    if \"december2017\" in sample_path:\n",
-    "        psx = np.array([])\n",
-    "        pex = np.array([])\n",
-    "    elif \"march2018\" in sample_path:\n",
-    "        psx = np.arange(0, nt, 360)\n",
-    "        pex = psx + 60\n",
-    "    elif \"september2018\" in sample_path:\n",
-    "        psx = np.arange(300, nt, 360)\n",
-    "        pex = psx + 60\n",
-    "    elif \"GrandPassage\" in sample_path:\n",
-    "        psx = np.array([0, 3120, 6540, 9960, 13380])\n",
-    "        psx = psx[psx < nt]\n",
-    "        pex = np.r_[120, psx[1:] + 420]\n",
-    "        pex = np.minimum(pex, nt)\n",
-    "    else:\n",
-    "        psx = None\n",
-    "        pex = None\n",
-    "\n",
-    "    def tidy_up_line(t, d):\n",
-    "        if d is None:\n",
-    "            return np.nan * np.ones_like(ts_raw)\n",
-    "        is_usable = np.isfinite(d)\n",
-    "        if np.sum(is_usable) > 0:\n",
-    "            t = t[is_usable]\n",
-    "            d = d[is_usable]\n",
-    "        return np.interp(ts_raw, t, d)\n",
-    "\n",
-    "    ps1, pe1 = find_passive_data(signals_raw)\n",
-    "    ps2, pe2 = find_passive_data_v2(signals_raw)\n",
-    "\n",
-    "    fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n",
-    "\n",
-    "    if os.path.isfile(fname_surface):\n",
-    "        t_surface, d_surface = loader.evl_loader(fname_surface)\n",
-    "    elif is_upward_facing:\n",
-    "        print(\n",
-    "            \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n",
-    "                fname_surface\n",
-    "            )\n",
-    "        )\n",
-    "        t_surface = ts_raw\n",
-    "        d_surface = np.zeros_like(ts_raw)\n",
-    "    else:\n",
-    "        # Default surface depth of 0m for downward facing data\n",
-    "        t_surface = ts_raw\n",
-    "        d_surface = np.zeros_like(ts_raw)\n",
-    "\n",
-    "    # Find location of passive data.\n",
-    "    # Try to determine passive data as whenever the surface line is undefined.\n",
-    "    d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n",
-    "    is_passive = np.isnan(d_surface)\n",
-    "    ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n",
-    "    ps3 = np.asarray(ps3)\n",
-    "    pe3 = np.asarray(pe3) + 1\n",
-    "    pl3 = pe3 - ps3\n",
-    "    li = pl3 >= 3\n",
-    "    ps3 = ps3[li]\n",
-    "    pe3 = pe3[li]\n",
-    "    if np.sum(~li) > 0:\n",
-    "        print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n",
-    "\n",
-    "    print(\"starts:\")\n",
-    "    print(\"xp:\", psx)\n",
-    "    print(\"v1:\", ps1)\n",
-    "    print(\"v2:\", ps2)\n",
-    "    print(\"v3:\", ps3)\n",
-    "    print(\"ends:\")\n",
-    "    print(\"xp:\", pex)\n",
-    "    print(\"v1:\", pe1)\n",
-    "    print(\"v2:\", pe2)\n",
-    "    print(\"v3:\", pe3)\n",
-    "    print(\"durations:\")\n",
-    "    if pex is not None:\n",
-    "        print(\"xp:\", pex - psx)\n",
-    "    print(\"v1:\", pe1 - ps1)\n",
-    "    print(\"v2:\", pe2 - ps2)\n",
-    "    if ps3 is not None:\n",
-    "        print(\"v3:\", pe3 - ps3)\n",
-    "    print(\"\")\n",
-    "\n",
-    "    if (\n",
-    "        len(ps1) != len(ps2)\n",
-    "        or len(pe1) != len(pe2)\n",
-    "        or not np.allclose(ps1, ps2)\n",
-    "        or not np.allclose(pe1, pe2)\n",
-    "    ):\n",
-    "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n",
-    "    if (\n",
-    "        ps3 is not None\n",
-    "        and pe3 is not None\n",
-    "        and (\n",
-    "            len(ps3) != len(ps2)\n",
-    "            or len(pe3) != len(pe2)\n",
-    "            or not np.allclose(ps3, ps2)\n",
-    "            or not np.allclose(pe3, pe2)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n",
-    "    if (\n",
-    "        psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps1)\n",
-    "            or len(pex) != len(pe1)\n",
-    "            or not np.allclose(psx, ps1)\n",
-    "            or not np.allclose(pex, pe1)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n",
-    "    if (\n",
-    "        psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps2)\n",
-    "            or len(pex) != len(pe2)\n",
-    "            or not np.allclose(psx, ps2)\n",
-    "            or not np.allclose(pex, pe2)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n",
-    "    if (\n",
-    "        ps3 is not None\n",
-    "        and pe3 is not None\n",
-    "        and psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps3)\n",
-    "            or len(pex) != len(pe3)\n",
-    "            or not np.allclose(psx, ps3)\n",
-    "            or not np.allclose(pex, pe3)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n",
-    "\n",
-    "    print(\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_paths = []\n",
-    "dataset = \"mobile\"\n",
-    "sample_paths = [\n",
-    "    os.path.join(dataset, pth)\n",
-    "    for pth in loader.get_partition_list(\"leaveout\", dataset=dataset)\n",
-    "]\n",
-    "sample_paths = sorted(sample_paths)\n",
-    "\n",
-    "\n",
-    "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n",
-    "\n",
-    "for i_sample, sample_path in enumerate(sample_paths):\n",
-    "\n",
-    "    print(\n",
-    "        \"{:4d}/{:4d} {}\".format(\n",
-    "            i_sample + 1,\n",
-    "            len(sample_paths),\n",
-    "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
-    "        )\n",
-    "    )\n",
-    "    print(sample_path)\n",
-    "\n",
-    "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
-    "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
-    "        fname_raw, warn_row_overflow=0\n",
-    "    )\n",
-    "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
-    "\n",
-    "    nt = len(ts_raw)\n",
-    "    print(\"length: {}\".format(nt))\n",
-    "    its_raw = np.arange(len(ts_raw))\n",
-    "\n",
-    "    if \"december2017\" in sample_path:\n",
-    "        psx = np.array([])\n",
-    "        pex = np.array([])\n",
-    "    elif \"march2018\" in sample_path:\n",
-    "        psx = np.arange(0, nt, 360)\n",
-    "        pex = psx + 60\n",
-    "    elif \"september2018\" in sample_path:\n",
-    "        psx = np.arange(300, nt, 360)\n",
-    "        pex = psx + 60\n",
-    "    elif \"GrandPassage\" in sample_path:\n",
-    "        psx = np.array([0, 3120, 6540, 9960, 13380])\n",
-    "        psx = psx[psx < nt]\n",
-    "        pex = np.r_[120, psx[1:] + 420]\n",
-    "        pex = np.minimum(pex, nt)\n",
-    "    else:\n",
-    "        psx = None\n",
-    "        pex = None\n",
-    "\n",
-    "    ps1, pe1 = find_passive_data(signals_raw)\n",
-    "    ps2, pe2 = find_passive_data_v2(signals_raw)\n",
-    "\n",
-    "    fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n",
-    "\n",
-    "    if os.path.isfile(fname_surface):\n",
-    "        t_surface, d_surface = loader.evl_loader(fname_surface)\n",
-    "    elif is_upward_facing:\n",
-    "        print(\n",
-    "            \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n",
-    "                fname_surface\n",
-    "            )\n",
-    "        )\n",
-    "        t_surface = ts_raw\n",
-    "        d_surface = np.zeros_like(ts_raw)\n",
-    "    else:\n",
-    "        # Default surface depth of 0m for downward facing data\n",
-    "        t_surface = ts_raw\n",
-    "        d_surface = np.zeros_like(ts_raw)\n",
-    "\n",
-    "    # Find location of passive data.\n",
-    "    # Try to determine passive data as whenever the surface line is undefined.\n",
-    "    d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n",
-    "    is_passive = np.isnan(d_surface)\n",
-    "    ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n",
-    "    ps3 = np.asarray(ps3)\n",
-    "    pe3 = np.asarray(pe3) + 1\n",
-    "    pl3 = pe3 - ps3\n",
-    "    li = pl3 >= 3\n",
-    "    ps3 = ps3[li]\n",
-    "    pe3 = pe3[li]\n",
-    "    if np.sum(~li) > 0:\n",
-    "        print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n",
-    "\n",
-    "    print(\"starts:\")\n",
-    "    print(\"xp:\", psx)\n",
-    "    print(\"v1:\", ps1)\n",
-    "    print(\"v2:\", ps2)\n",
-    "    print(\"v3:\", ps3)\n",
-    "    print(\"ends:\")\n",
-    "    print(\"xp:\", pex)\n",
-    "    print(\"v1:\", pe1)\n",
-    "    print(\"v2:\", pe2)\n",
-    "    print(\"v3:\", pe3)\n",
-    "    print(\"durations:\")\n",
-    "    if pex is not None:\n",
-    "        print(\"xp:\", pex - psx)\n",
-    "    print(\"v1:\", pe1 - ps1)\n",
-    "    print(\"v2:\", pe2 - ps2)\n",
-    "    if ps3 is not None:\n",
-    "        print(\"v3:\", pe3 - ps3)\n",
-    "    print(\"\")\n",
-    "\n",
-    "    if (\n",
-    "        len(ps1) != len(ps2)\n",
-    "        or len(pe1) != len(pe2)\n",
-    "        or not np.allclose(ps1, ps2)\n",
-    "        or not np.allclose(pe1, pe2)\n",
-    "    ):\n",
-    "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n",
-    "    if (\n",
-    "        ps3 is not None\n",
-    "        and pe3 is not None\n",
-    "        and (\n",
-    "            len(ps3) != len(ps2)\n",
-    "            or len(pe3) != len(pe2)\n",
-    "            or not np.allclose(ps3, ps2)\n",
-    "            or not np.allclose(pe3, pe2)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n",
-    "    if (\n",
-    "        psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps1)\n",
-    "            or len(pex) != len(pe1)\n",
-    "            or not np.allclose(psx, ps1)\n",
-    "            or not np.allclose(pex, pe1)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n",
-    "    if (\n",
-    "        psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps2)\n",
-    "            or len(pex) != len(pe2)\n",
-    "            or not np.allclose(psx, ps2)\n",
-    "            or not np.allclose(pex, pe2)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n",
-    "    if (\n",
-    "        ps3 is not None\n",
-    "        and pe3 is not None\n",
-    "        and psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps3)\n",
-    "            or len(pex) != len(pe3)\n",
-    "            or not np.allclose(psx, ps3)\n",
-    "            or not np.allclose(pex, pe3)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n",
-    "\n",
-    "    print(\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bad_sample_paths = [\n",
-    "    \"MinasPassage/september2018/september2018_D20181021-T165220_D20181021-T222221\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181022-T105220_D20181022-T162217\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181022-T172213_D20181022-T232217\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181026-T082220_D20181026-T135213\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181026-T142217_D20181026-T195218\",\n",
-    "]\n",
-    "# bad_sample_paths = [\n",
-    "# \"MinasPassage/september2018/september2018_D20180928-T202217_D20180929-T015217\",\n",
-    "# \"MinasPassage/september2018/september2018_D20181008-T235218_D20181009-T052220\",\n",
-    "# \"MinasPassage/september2018/september2018_D20181021-T045220_D20181021-T102218\",\n",
-    "# ]\n",
-    "# bad_sample_paths = [\n",
-    "#    \"GrandPassage/phase2/GrandPassage_WBAT_2B_20200130_UTC020017_floodhigh\",\n",
-    "# ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n",
-    "\n",
-    "for i_sample, sample_path in enumerate(bad_sample_paths):\n",
-    "\n",
-    "    print(\n",
-    "        \"{:4d}/{:4d} {}\".format(\n",
-    "            i_sample + 1,\n",
-    "            len(bad_sample_paths),\n",
-    "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
-    "        )\n",
-    "    )\n",
-    "    print(sample_path)\n",
-    "\n",
-    "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
-    "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
-    "        fname_raw, warn_row_overflow=0\n",
-    "    )\n",
-    "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
-    "\n",
-    "    nt = len(ts_raw)\n",
-    "    print(\"length: {}\".format(nt))\n",
-    "    its_raw = np.arange(len(ts_raw))\n",
-    "\n",
-    "    if \"december2017\" in sample_path:\n",
-    "        psx = np.array([])\n",
-    "        pex = np.array([])\n",
-    "    elif \"march2018\" in sample_path:\n",
-    "        psx = np.arange(0, nt, 360)\n",
-    "        pex = psx + 60\n",
-    "    elif \"september2018\" in sample_path:\n",
-    "        psx = np.arange(300, nt, 360)\n",
-    "        pex = psx + 60\n",
-    "    elif \"GrandPassage\" in sample_path:\n",
-    "        psx = np.array([0, 3120, 6540, 9960, 13380])\n",
-    "        psx = psx[psx < nt]\n",
-    "        pex = np.r_[120, psx[1:] + 420]\n",
-    "        pex = np.minimum(pex, nt)\n",
-    "    else:\n",
-    "        psx = None\n",
-    "        pex = None\n",
-    "\n",
-    "    ps1, pe1 = find_passive_data(signals_raw)\n",
-    "    ps2, pe2 = find_passive_data_v2(signals_raw)\n",
-    "\n",
-    "    fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n",
-    "\n",
-    "    if os.path.isfile(fname_surface):\n",
-    "        t_surface, d_surface = loader.evl_loader(fname_surface)\n",
-    "    elif is_upward_facing:\n",
-    "        print(\n",
-    "            \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n",
-    "                fname_surface\n",
-    "            )\n",
-    "        )\n",
-    "        t_surface = ts_raw\n",
-    "        d_surface = np.zeros_like(ts_raw)\n",
-    "    else:\n",
-    "        # Default surface depth of 0m for downward facing data\n",
-    "        t_surface = ts_raw\n",
-    "        d_surface = np.zeros_like(ts_raw)\n",
-    "\n",
-    "    # Find location of passive data.\n",
-    "    # Try to determine passive data as whenever the surface line is undefined.\n",
-    "    d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n",
-    "    is_passive = np.isnan(d_surface)\n",
-    "    ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n",
-    "    ps3 = np.asarray(ps3)\n",
-    "    pe3 = np.asarray(pe3) + 1\n",
-    "    pl3 = pe3 - ps3\n",
-    "    li = pl3 >= 3\n",
-    "    ps3 = ps3[li]\n",
-    "    pe3 = pe3[li]\n",
-    "    if np.sum(~li) > 0:\n",
-    "        print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n",
-    "\n",
-    "    print(\"starts:\")\n",
-    "    print(\"xp:\", psx)\n",
-    "    print(\"v1:\", ps1)\n",
-    "    print(\"v2:\", ps2)\n",
-    "    print(\"v3:\", ps3)\n",
-    "    print(\"ends:\")\n",
-    "    print(\"xp:\", pex)\n",
-    "    print(\"v1:\", pe1)\n",
-    "    print(\"v2:\", pe2)\n",
-    "    print(\"v3:\", pe3)\n",
-    "    print(\"durations:\")\n",
-    "    if pex is not None:\n",
-    "        print(\"xp:\", pex - psx)\n",
-    "    print(\"v1:\", pe1 - ps1)\n",
-    "    print(\"v2:\", pe2 - ps2)\n",
-    "    if ps3 is not None:\n",
-    "        print(\"v3:\", pe3 - ps3)\n",
-    "    print(\"\")\n",
-    "\n",
-    "    if (\n",
-    "        len(ps1) != len(ps2)\n",
-    "        or len(pe1) != len(pe2)\n",
-    "        or not np.allclose(ps1, ps2)\n",
-    "        or not np.allclose(pe1, pe2)\n",
-    "    ):\n",
-    "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n",
-    "    if (\n",
-    "        ps3 is not None\n",
-    "        and pe3 is not None\n",
-    "        and (\n",
-    "            len(ps3) != len(ps2)\n",
-    "            or len(pe3) != len(pe2)\n",
-    "            or not np.allclose(ps3, ps2)\n",
-    "            or not np.allclose(pe3, pe2)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n",
-    "    if (\n",
-    "        psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps1)\n",
-    "            or len(pex) != len(pe1)\n",
-    "            or not np.allclose(psx, ps1)\n",
-    "            or not np.allclose(pex, pe1)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n",
-    "    if (\n",
-    "        psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps2)\n",
-    "            or len(pex) != len(pe2)\n",
-    "            or not np.allclose(psx, ps2)\n",
-    "            or not np.allclose(pex, pe2)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n",
-    "    if (\n",
-    "        ps3 is not None\n",
-    "        and pe3 is not None\n",
-    "        and psx is not None\n",
-    "        and pex is not None\n",
-    "        and (\n",
-    "            len(psx) != len(ps3)\n",
-    "            or len(pex) != len(pe3)\n",
-    "            or not np.allclose(psx, ps3)\n",
-    "            or not np.allclose(pex, pe3)\n",
-    "        )\n",
-    "    ):\n",
-    "        print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n",
-    "\n",
-    "    best_ps = ps1\n",
-    "    best_pe = pe1\n",
-    "\n",
-    "    for i in range(min(len(best_ps), len(psx))):\n",
-    "        if best_ps[i] == psx[i] and best_pe[i] == pex[i]:\n",
-    "            continue\n",
-    "\n",
-    "        for ps, pe, tit in (\n",
-    "            (psx[i], pex[i], \"expected\"),\n",
-    "            (best_ps[i], best_pe[i], \"v1\"),\n",
-    "        ):\n",
-    "            plt.figure(figsize=(12, 9))\n",
-    "            i0 = max(0, ps - 1)\n",
-    "            i1 = pe + 2\n",
-    "            if i1 >= len(its_raw):\n",
-    "                i1 = None\n",
-    "            plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n",
-    "            plt.gca().invert_yaxis()\n",
-    "            plt.title(\"passive #{}, {}\".format(i, tit))\n",
-    "            plt.show()\n",
-    "\n",
-    "    print(\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 9))\n",
-    "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(sample_path)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "_ = \"\"\"\n",
-    "length: 10259\n",
-    "starts:\n",
-    "xp: [   0 3120 6540 9960]\n",
-    "v1: [   0 3120 6539 9959]\n",
-    "v2: [   0 3120 6539 9959]\n",
-    "v3: [   0 3120 6540 9960]\n",
-    "ends:\n",
-    "xp: [  120  3540  6960 10259]\n",
-    "v1: [  120  3540  6959 10259]\n",
-    "v2: [  120  3540  6701 10259]\n",
-    "v3: [  120  3540  6960 10260]\n",
-    "durations:\n",
-    "xp: [120 420 420 299]\n",
-    "v1: [120 420 420 300]\n",
-    "v2: [120 420 162 300]\n",
-    "v3: [120 420 420 300]\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "i0 = 10200\n",
-    "i1 = 10259\n",
-    "i0 -= 1\n",
-    "i1 += 2\n",
-    "plt.figure(figsize=(12, 9))\n",
-    "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(\"{}-{}\".format(i0, i1 - 1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_path = (\n",
-    "    \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\"\n",
-    ")\n",
-    "\n",
-    "fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
-    "    fname_raw, warn_row_overflow=0\n",
-    ")\n",
-    "is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
-    "\n",
-    "nt = len(ts_raw)\n",
-    "print(\"length: {}\".format(nt))\n",
-    "its_raw = np.arange(len(ts_raw))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 9))\n",
-    "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(sample_path)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "i0 = 250\n",
-    "i1 = 350\n",
-    "i0 -= 1\n",
-    "i1 += 2\n",
-    "plt.figure(figsize=(12, 9))\n",
-    "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(\"{}-{}\".format(i0, i1 - 1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bad_sample_paths = [\n",
-    "    \"mobile/Survey01/Survey01_GR1_S1A_E\",\n",
-    "    \"mobile/Survey03/Survey03_GR2_N5W_survey3\",\n",
-    "    \"mobile/Survey03/Survey03_GR4_N0A_survey3\",\n",
-    "    \"mobile/Survey04/Survey04_GR1_N3A\",\n",
-    "    \"mobile/Survey04/Survey04_GR2_N5A\",\n",
-    "    \"mobile/Survey05/Survey05_GR1_N1A_survey5\",\n",
-    "    \"mobile/Survey07/Survey07_GR2_N1W_survey7\",\n",
-    "    \"mobile/Survey10/Survey10_GR1_N0A_E\",\n",
-    "    \"mobile/Survey12/Survey12_GR4_N5A_E\",\n",
-    "    \"mobile/Survey01/Survey01_GR1_S2A_E\",\n",
-    "    \"mobile/Survey01/Survey01_GR1_S2W_E\",\n",
-    "    \"mobile/Survey11/Survey11_GR1_S2A_E\",\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n",
-    "\n",
-    "for i_sample, sample_path in enumerate(bad_sample_paths):\n",
-    "\n",
-    "    print(\n",
-    "        \"{:4d}/{:4d} {}\".format(\n",
-    "            i_sample + 1,\n",
-    "            len(bad_sample_paths),\n",
-    "            datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n",
-    "        )\n",
-    "    )\n",
-    "    print(sample_path)\n",
-    "\n",
-    "    fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n",
-    "    ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n",
-    "        fname_raw, warn_row_overflow=0\n",
-    "    )\n",
-    "    is_upward_facing = depths_raw[-1] < depths_raw[0]\n",
-    "\n",
-    "    nt = len(ts_raw)\n",
-    "    print(\"length: {}\".format(nt))\n",
-    "    its_raw = np.arange(len(ts_raw))\n",
-    "\n",
-    "    plt.figure(figsize=(12, 9))\n",
-    "    plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.title(sample_path)\n",
-    "    plt.show()\n",
-    "\n",
-    "    print(\"\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
+  "nbformat": 4,
+  "nbformat_minor": 4
 }
diff --git a/notebooks/Plot Metrics Distribution.ipynb b/notebooks/Plot Metrics Distribution.ipynb
index a1e0418e..6dbe7a4a 100644
--- a/notebooks/Plot Metrics Distribution.ipynb	
+++ b/notebooks/Plot Metrics Distribution.ipynb	
@@ -1,145 +1,145 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import scipy.stats"
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "import scipy.stats"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df = pd.read_csv(\"model_best.meters.csv\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df = df.drop(columns=[\"Accuracy\", \"Precision\", \"Recall\", \"F1\", \"Jaccard\"])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.columns"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for col_name in df.columns:\n",
+        "    sns.distplot(df[col_name])\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(10, 10))\n",
+        "plt.plot([5, 85], [5, 85], \"-\", color=(0.3, 0.3, 0.3))\n",
+        "sns.scatterplot(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "scipy.stats.pearsonr(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(10, 10))\n",
+        "plt.plot([0, 50], [0, 50], \"-\", color=(0.3, 0.3, 0.3))\n",
+        "sns.scatterplot(df[\"Active target (top)\"], df[\"Active output (top)\"])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "scipy.stats.pearsonr(df[\"Active target (top)\"], df[\"Active output (top)\"])"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.read_csv(\"model_best.meters.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = df.drop(columns=[\"Accuracy\", \"Precision\", \"Recall\", \"F1\", \"Jaccard\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for col_name in df.columns:\n",
-    "    sns.distplot(df[col_name])\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10, 10))\n",
-    "plt.plot([5, 85], [5, 85], \"-\", color=(0.3, 0.3, 0.3))\n",
-    "sns.scatterplot(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "scipy.stats.pearsonr(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(10, 10))\n",
-    "plt.plot([0, 50], [0, 50], \"-\", color=(0.3, 0.3, 0.3))\n",
-    "sns.scatterplot(df[\"Active target (top)\"], df[\"Active output (top)\"])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "scipy.stats.pearsonr(df[\"Active target (top)\"], df[\"Active output (top)\"])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Plot results.ipynb b/notebooks/Plot results.ipynb
index 0d8a9c9f..2706ccfe 100644
--- a/notebooks/Plot results.ipynb	
+++ b/notebooks/Plot results.ipynb	
@@ -1,326 +1,326 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import copy\n",
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.plotting\n",
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def declare_cc_cmap(cmap, name):\n",
+        "    \"\"\"\n",
+        "    Register a colorcet colormap in matplotlib.pyplot.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    cmap : list of hex str\n",
+        "        List of colors in the colormap, with equispaced\n",
+        "        samples. Each entry should be a hexadecimal\n",
+        "        string.\n",
+        "    name : str\n",
+        "        Name of the colormap. The colormap will be\n",
+        "        available in matplotlib with this name, and can\n",
+        "        be used with `plt.set_cmap(name)`.\n",
+        "    \"\"\"\n",
+        "    n = len(cmap)\n",
+        "    R = [int(h.lstrip(\"#\")[:2], 16) / 255 for h in cmap]\n",
+        "    G = [int(h.lstrip(\"#\")[2:4], 16) / 255 for h in cmap]\n",
+        "    B = [int(h.lstrip(\"#\")[4:], 16) / 255 for h in cmap]\n",
+        "\n",
+        "    R = [(i / (n - 1), v, v) for i, v in enumerate(R)]\n",
+        "    G = [(i / (n - 1), v, v) for i, v in enumerate(G)]\n",
+        "    B = [(i / (n - 1), v, v) for i, v in enumerate(B)]\n",
+        "\n",
+        "    cdict = {\"red\": R, \"green\": G, \"blue\": B}\n",
+        "\n",
+        "    plt.register_cmap(name=name, data=cdict, lut=n)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "    import colorcet as cc\n",
+        "\n",
+        "    declare_cc_cmap(cc.fire, \"fire\")\n",
+        "    print(\"Declared fire colormap.\")\n",
+        "    declare_cc_cmap(cc.rainbow, \"rainbow\")\n",
+        "    print(\"Declared rainbow colormap.\")\n",
+        "except ImportError:\n",
+        "    print(\"colorcet not installed\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color2 = echofilter.plotting.TURBULENCE_COLOR_DARK\n",
+        "bottom_color2 = echofilter.plotting.BOTTOM_COLOR_DARK"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "\n",
+        "# first val sample for stationary\n",
+        "dataset = \"MinasPassage\"\n",
+        "sample = \"december2017/december2017_D20180108-T045216_D20180108-T102216\"\n",
+        "\n",
+        "# first val sample for mobile\n",
+        "# dataset = 'mobile'\n",
+        "# sample = 'Survey05/Survey05_GR1_S1W_survey5'\n",
+        "\n",
+        "# sample = 'Survey07/Survey07_GR4_N5W_survey7'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "    os.path.join(root_data_dir, dataset, sample),\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "echofilter.plotting.plot_transect(transect, x_scale=\"index\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "fname_top1 = os.path.join(root_data_dir, dataset, sample + \"_turbulence.evl\")\n",
+        "fname_top2 = os.path.join(root_data_dir, dataset, sample + \"_air.evl\")\n",
+        "fname_bot = os.path.join(root_data_dir, dataset, sample + \"_bottom.evl\")\n",
+        "\n",
+        "if os.path.isfile(fname_top1):\n",
+        "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n",
+        "elif os.path.isfile(fname_top2):\n",
+        "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n",
+        "else:\n",
+        "    t_top = d_top = None\n",
+        "if os.path.isfile(fname_bot):\n",
+        "    t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n",
+        "else:\n",
+        "    t_bot = d_bot = None"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "processed_dir = \"/home/scott/Documents/git/deepsense/dsforce/echofilter/processed/\"\n",
+        "fname_top = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.top.evl\")\n",
+        "fname_bot = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.bottom.evl\")\n",
+        "\n",
+        "t_top_gen, d_top_gen = echofilter.raw.loader.evl_loader(fname_top)\n",
+        "t_bot_gen, d_bot_gen = echofilter.raw.loader.evl_loader(fname_bot)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.plot(\n",
+        "    np.arange(transect[\"timestamps\"].shape[0]),\n",
+        "    np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n",
+        "    turbulence_color2,\n",
+        ")\n",
+        "plt.plot(\n",
+        "    np.arange(transect[\"timestamps\"].shape[0]),\n",
+        "    np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n",
+        "    bottom_color2,\n",
+        ")\n",
+        "plt.gca().invert_yaxis()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "echofilter.plotting.plot_transect(\n",
+        "    transect, x_scale=\"time\" if dataset == \"mobile\" else \"index\"\n",
+        ")\n",
+        "\n",
+        "if dataset == \"mobile\":\n",
+        "    tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n",
+        "else:\n",
+        "    tt = np.arange(transect[\"timestamps\"].shape[0])\n",
+        "plt.plot(\n",
+        "    tt,\n",
+        "    np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n",
+        "    turbulence_color2,\n",
+        "    linewidth=2,\n",
+        ")\n",
+        "plt.plot(\n",
+        "    tt,\n",
+        "    np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n",
+        "    bottom_color2,\n",
+        "    linewidth=2,\n",
+        ")\n",
+        "if dataset == \"mobile\":\n",
+        "    plt.ylim([0, 67])\n",
+        "    plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cmap = \"viridis\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "echofilter.plotting.plot_transect(\n",
+        "    transect,\n",
+        "    x_scale=\"time\" if dataset == \"mobile\" else \"index\",\n",
+        "    turbulence_color=\"k\",\n",
+        "    bottom_color=\"k\",\n",
+        "    surface_color=\"k\",\n",
+        "    passive_color=\"k\",\n",
+        "    removed_color=\"k\",\n",
+        "    cmap=cmap,\n",
+        ")\n",
+        "\n",
+        "if dataset == \"mobile\":\n",
+        "    tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n",
+        "else:\n",
+        "    tt = np.arange(transect[\"timestamps\"].shape[0])\n",
+        "plt.plot(\n",
+        "    tt,\n",
+        "    np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n",
+        "    \"w\",\n",
+        "    linewidth=2,\n",
+        ")\n",
+        "plt.plot(\n",
+        "    tt,\n",
+        "    np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n",
+        "    \"w\",\n",
+        "    linewidth=2,\n",
+        ")\n",
+        "if dataset == \"mobile\":\n",
+        "    plt.ylim([0, 67])\n",
+        "else:\n",
+        "    plt.ylim([5, 50])\n",
+        "    plt.xlim([0, 1000])\n",
+        "\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import copy\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.plotting\n",
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def declare_cc_cmap(cmap, name):\n",
-    "    \"\"\"\n",
-    "    Register a colorcet colormap in matplotlib.pyplot.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    cmap : list of hex str\n",
-    "        List of colors in the colormap, with equispaced\n",
-    "        samples. Each entry should be a hexadecimal\n",
-    "        string.\n",
-    "    name : str\n",
-    "        Name of the colormap. The colormap will be\n",
-    "        available in matplotlib with this name, and can\n",
-    "        be used with `plt.set_cmap(name)`.\n",
-    "    \"\"\"\n",
-    "    n = len(cmap)\n",
-    "    R = [int(h.lstrip(\"#\")[:2], 16) / 255 for h in cmap]\n",
-    "    G = [int(h.lstrip(\"#\")[2:4], 16) / 255 for h in cmap]\n",
-    "    B = [int(h.lstrip(\"#\")[4:], 16) / 255 for h in cmap]\n",
-    "\n",
-    "    R = [(i / (n - 1), v, v) for i, v in enumerate(R)]\n",
-    "    G = [(i / (n - 1), v, v) for i, v in enumerate(G)]\n",
-    "    B = [(i / (n - 1), v, v) for i, v in enumerate(B)]\n",
-    "\n",
-    "    cdict = {\"red\": R, \"green\": G, \"blue\": B}\n",
-    "\n",
-    "    plt.register_cmap(name=name, data=cdict, lut=n)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "try:\n",
-    "    import colorcet as cc\n",
-    "\n",
-    "    declare_cc_cmap(cc.fire, \"fire\")\n",
-    "    print(\"Declared fire colormap.\")\n",
-    "    declare_cc_cmap(cc.rainbow, \"rainbow\")\n",
-    "    print(\"Declared rainbow colormap.\")\n",
-    "except ImportError:\n",
-    "    print(\"colorcet not installed\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color2 = echofilter.plotting.TURBULENCE_COLOR_DARK\n",
-    "bottom_color2 = echofilter.plotting.BOTTOM_COLOR_DARK"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "\n",
-    "# first val sample for stationary\n",
-    "dataset = \"MinasPassage\"\n",
-    "sample = \"december2017/december2017_D20180108-T045216_D20180108-T102216\"\n",
-    "\n",
-    "# first val sample for mobile\n",
-    "# dataset = 'mobile'\n",
-    "# sample = 'Survey05/Survey05_GR1_S1W_survey5'\n",
-    "\n",
-    "# sample = 'Survey07/Survey07_GR4_N5W_survey7'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "    os.path.join(root_data_dir, dataset, sample),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "echofilter.plotting.plot_transect(transect, x_scale=\"index\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fname_top1 = os.path.join(root_data_dir, dataset, sample + \"_turbulence.evl\")\n",
-    "fname_top2 = os.path.join(root_data_dir, dataset, sample + \"_air.evl\")\n",
-    "fname_bot = os.path.join(root_data_dir, dataset, sample + \"_bottom.evl\")\n",
-    "\n",
-    "if os.path.isfile(fname_top1):\n",
-    "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n",
-    "elif os.path.isfile(fname_top2):\n",
-    "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n",
-    "else:\n",
-    "    t_top = d_top = None\n",
-    "if os.path.isfile(fname_bot):\n",
-    "    t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n",
-    "else:\n",
-    "    t_bot = d_bot = None"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "processed_dir = \"/home/scott/Documents/git/deepsense/dsforce/echofilter/processed/\"\n",
-    "fname_top = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.top.evl\")\n",
-    "fname_bot = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.bottom.evl\")\n",
-    "\n",
-    "t_top_gen, d_top_gen = echofilter.raw.loader.evl_loader(fname_top)\n",
-    "t_bot_gen, d_bot_gen = echofilter.raw.loader.evl_loader(fname_bot)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(\n",
-    "    np.arange(transect[\"timestamps\"].shape[0]),\n",
-    "    np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n",
-    "    turbulence_color2,\n",
-    ")\n",
-    "plt.plot(\n",
-    "    np.arange(transect[\"timestamps\"].shape[0]),\n",
-    "    np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n",
-    "    bottom_color2,\n",
-    ")\n",
-    "plt.gca().invert_yaxis()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "echofilter.plotting.plot_transect(\n",
-    "    transect, x_scale=\"time\" if dataset == \"mobile\" else \"index\"\n",
-    ")\n",
-    "\n",
-    "if dataset == \"mobile\":\n",
-    "    tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n",
-    "else:\n",
-    "    tt = np.arange(transect[\"timestamps\"].shape[0])\n",
-    "plt.plot(\n",
-    "    tt,\n",
-    "    np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n",
-    "    turbulence_color2,\n",
-    "    linewidth=2,\n",
-    ")\n",
-    "plt.plot(\n",
-    "    tt,\n",
-    "    np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n",
-    "    bottom_color2,\n",
-    "    linewidth=2,\n",
-    ")\n",
-    "if dataset == \"mobile\":\n",
-    "    plt.ylim([0, 67])\n",
-    "    plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cmap = \"viridis\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "echofilter.plotting.plot_transect(\n",
-    "    transect,\n",
-    "    x_scale=\"time\" if dataset == \"mobile\" else \"index\",\n",
-    "    turbulence_color=\"k\",\n",
-    "    bottom_color=\"k\",\n",
-    "    surface_color=\"k\",\n",
-    "    passive_color=\"k\",\n",
-    "    removed_color=\"k\",\n",
-    "    cmap=cmap,\n",
-    ")\n",
-    "\n",
-    "if dataset == \"mobile\":\n",
-    "    tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n",
-    "else:\n",
-    "    tt = np.arange(transect[\"timestamps\"].shape[0])\n",
-    "plt.plot(\n",
-    "    tt,\n",
-    "    np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n",
-    "    \"w\",\n",
-    "    linewidth=2,\n",
-    ")\n",
-    "plt.plot(\n",
-    "    tt,\n",
-    "    np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n",
-    "    \"w\",\n",
-    "    linewidth=2,\n",
-    ")\n",
-    "if dataset == \"mobile\":\n",
-    "    plt.ylim([0, 67])\n",
-    "else:\n",
-    "    plt.ylim([5, 50])\n",
-    "    plt.xlim([0, 1000])\n",
-    "\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Splitting Passive Data.ipynb b/notebooks/Splitting Passive Data.ipynb
index 3478c24d..03108a91 100644
--- a/notebooks/Splitting Passive Data.ipynb	
+++ b/notebooks/Splitting Passive Data.ipynb	
@@ -1,858 +1,858 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import copy\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00ee00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "\n",
-    "# example with 1 passive period, 1 turbulence cut out\n",
-    "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n",
-    "\n",
-    "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
-    "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n",
-    "\n",
-    "# sample done incorrectly\n",
-    "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n",
-    "\n",
-    "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n",
-    "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n",
-    "\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
-    "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
-    "\n",
-    "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n",
-    "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n",
-    "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n",
-    "if os.path.isfile(fname_top1):\n",
-    "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n",
-    "elif os.path.isfile(fname_top2):\n",
-    "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n",
-    "else:\n",
-    "    t_top = d_top = np.nan\n",
-    "if os.path.isfile(fname_bot):\n",
-    "    t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n",
-    "else:\n",
-    "    t_bot = d_bot = np.nan"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n",
-    "    fname_masked\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.min(signals_raw), np.max(signals_raw)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_raw.shape, depths_raw.shape, signals_raw.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.plot(ts_new, d_top_new, \"k\")\n",
-    "plt.plot(ts_new, d_bot_new, \"w\")\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Finding passive data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.distplot(np.reshape(signals_raw, (-1,)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.percentile(signals_raw, 95)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.percentile(signals_raw, 99.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 95)).T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sum(~np.any(signals_raw > np.percentile(signals_raw, 95), axis=1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 97)).T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw[:100], signals_raw[:, :100].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw[:26], signals_raw[:, :26].T)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(signals_raw[:, :45], axis=0))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.min(d_top)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sum(depths_raw <= np.min(d_top))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.distplot(np.reshape(signals_raw[:, :34], (-1,)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sns.distplot(np.reshape(signals_raw[:, 34:], (-1,)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T > 0)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for pc in range(100):\n",
-    "    cut_off = np.percentile(signals_raw[:, 34:], pc)\n",
-    "    print(\n",
-    "        \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n",
-    "            pc, cut_off, sum(~np.any(signals_raw[:, 34:] > cut_off, axis=1))\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for pc in range(100):\n",
-    "    cut_off = np.percentile(signals_raw[:, :34], pc)\n",
-    "    print(\n",
-    "        \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n",
-    "            pc, cut_off, sum(~np.any(signals_raw[:, :34] > cut_off, axis=1))\n",
-    "        )\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.max(signals_raw[:, :34], axis=1))\n",
-    "plt.plot(np.min(signals_raw[:, :34], axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.max(signals_raw[:, :25], axis=1))\n",
-    "plt.plot(np.min(signals_raw[:, :25], axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(np.mean(np.diff(signals_raw, axis=0), axis=1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw, axis=0), axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "md = np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1)\n",
-    "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
-    "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[280:, :], axis=0), axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "md = np.median(np.diff(signals_raw[280:, :], axis=0), axis=1)\n",
-    "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
-    "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "md = np.median(np.diff(signals_raw, axis=0), axis=1)\n",
-    "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
-    "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n",
-    "plt.ylim([-1, 1])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n",
-    "plt.ylim([-10, 10])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n",
-    "plt.ylim([-5, 5])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n",
-    "# plt.ylim([-5, 5])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n",
-    "plt.ylim([-5, 5])\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "md = np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1)\n",
-    "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
-    "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "md = np.median(np.diff(signals_raw[:, :34], axis=0), axis=1)\n",
-    "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
-    "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "threshold_low = np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n",
-    "threshold_high = np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n",
-    "indices_possible_start = np.nonzero(md < threshold_low)[0]\n",
-    "indices_possible_end = np.nonzero(md > threshold_high)[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "indices_possible_start"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def find_passive_edges(signals_raw, n_depth_use=None, deviation=8):\n",
-    "\n",
-    "    # n_depth_use = 34\n",
-    "\n",
-    "    if n_depth_use is None:\n",
-    "        n_depth_use = signals_raw.shape[1]\n",
-    "\n",
-    "    indices_passive_start = []\n",
-    "    indices_passive_end = []\n",
-    "\n",
-    "    md = np.median(np.diff(signals_raw[:, :n_depth_use], axis=0), axis=1)\n",
-    "\n",
-    "    threshold_low = (\n",
-    "        np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n",
-    "    )\n",
-    "    threshold_high = (\n",
-    "        np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n",
-    "    )\n",
-    "    indices_possible_start = np.nonzero(md < threshold_low)[0]\n",
-    "    indices_possible_end = np.nonzero(md > threshold_high)[0]\n",
-    "    print(\"a\", indices_possible_end)\n",
-    "\n",
-    "    current_index = 0\n",
-    "\n",
-    "    if len(indices_possible_start) == 0 and len(indices_possible_end) == 0:\n",
-    "        return np.array(indices_passive_start), np.array(indices_passive_end)\n",
-    "\n",
-    "    if len(indices_possible_start) > 0:\n",
-    "        indices_possible_start += 1\n",
-    "\n",
-    "    if len(indices_possible_end) > 0:\n",
-    "        indices_possible_end += 1\n",
-    "\n",
-    "    print(\"b\", indices_possible_end)\n",
-    "\n",
-    "    if (\n",
-    "        len(indices_possible_start) == 0\n",
-    "        or indices_possible_end[0] < indices_possible_start[0]\n",
-    "    ):\n",
-    "        indices_passive_start.append(0)\n",
-    "        current_index = indices_possible_end[0]\n",
-    "        indices_passive_end.append(current_index)\n",
-    "        indices_possible_start = indices_possible_start[\n",
-    "            indices_possible_start > current_index\n",
-    "        ]\n",
-    "        indices_possible_end = indices_possible_end[\n",
-    "            indices_possible_end > current_index\n",
-    "        ]\n",
-    "\n",
-    "    print(\"c\", indices_possible_end)\n",
-    "    print(\"c2\", indices_passive_end)\n",
-    "\n",
-    "    while len(indices_possible_start) > 0:\n",
-    "        current_index = indices_possible_start[0]\n",
-    "        indices_passive_start.append(current_index)\n",
-    "        baseline = signals_raw[current_index - 1, :n_depth_use]\n",
-    "\n",
-    "        # Find first column which returns to the baseline value seen before passive region\n",
-    "        offsets = np.nonzero(\n",
-    "            np.median(baseline - signals_raw[current_index:, :n_depth_use], axis=1)\n",
-    "            < threshold_high\n",
-    "        )[0]\n",
-    "        if len(offsets) == 0:\n",
-    "            current_index = signals_raw.shape[0]\n",
-    "        else:\n",
-    "            current_index = current_index + offsets[0]\n",
-    "        indices_passive_end.append(current_index)\n",
-    "\n",
-    "        print(\"d\", indices_passive_end)\n",
-    "\n",
-    "        # Remove preceding indices from the list of candidates\n",
-    "        indices_possible_start = indices_possible_start[\n",
-    "            indices_possible_start > current_index\n",
-    "        ]\n",
-    "        indices_possible_end = indices_possible_end[\n",
-    "            indices_possible_end > current_index\n",
-    "        ]\n",
-    "\n",
-    "        print(\"e\", indices_passive_end)\n",
-    "\n",
-    "        # Check the start was sufficiently inclusive.\n",
-    "        if current_index < signals_raw.shape[0]:\n",
-    "            baseline = signals_raw[current_index, :n_depth_use]\n",
-    "            nonpassives = np.nonzero(\n",
-    "                np.median(baseline - signals_raw[:current_index, :n_depth_use], axis=1)\n",
-    "                < threshold_high\n",
-    "            )[0]\n",
-    "            if len(nonpassives) == 0:\n",
-    "                indices_passive_start[-1] = 0\n",
-    "            else:\n",
-    "                indices_passive_start[-1] = min(\n",
-    "                    indices_passive_start[-1], nonpassives[-1] + 1\n",
-    "                )\n",
-    "\n",
-    "        print(\"f\", indices_passive_end)\n",
-    "\n",
-    "        if (\n",
-    "            len(indices_passive_start) > 1\n",
-    "            and indices_passive_start[-1] <= indices_passive_end[-2]\n",
-    "        ):\n",
-    "            indices_passive_start = indices_passive_start[:-1]\n",
-    "            indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n",
-    "\n",
-    "        print(\"g\", indices_passive_end)\n",
-    "\n",
-    "    return np.array(indices_passive_start), np.array(indices_passive_end)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "init_idx = 0\n",
-    "indices_passive_start, indices_passive_end = find_passive_edges(\n",
-    "    signals_raw[init_idx:, :]\n",
-    ")\n",
-    "indices_passive_start += init_idx\n",
-    "indices_passive_end += init_idx"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "indices_passive_start"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "indices_passive_end"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw[:34],\n",
-    "        signals_raw[index_start:index_end, :34].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(\n",
-    "    np.concatenate(([0], indices_passive_end)),\n",
-    "    np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n",
-    "):\n",
-    "    if index_start == index_end:\n",
-    "        continue\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw[:34],\n",
-    "        signals_raw[index_start:index_end, :34].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw,\n",
-    "        signals_raw[index_start:index_end, :].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(\n",
-    "    np.concatenate(([0], indices_passive_end)),\n",
-    "    np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n",
-    "):\n",
-    "    if index_start == index_end:\n",
-    "        continue\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw,\n",
-    "        signals_raw[index_start:index_end, :].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import copy\n",
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00ee00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "\n",
+        "# example with 1 passive period, 1 turbulence cut out\n",
+        "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n",
+        "\n",
+        "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
+        "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n",
+        "\n",
+        "# sample done incorrectly\n",
+        "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n",
+        "\n",
+        "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n",
+        "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n",
+        "\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
+        "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
+        "\n",
+        "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n",
+        "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n",
+        "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n",
+        "if os.path.isfile(fname_top1):\n",
+        "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n",
+        "elif os.path.isfile(fname_top2):\n",
+        "    t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n",
+        "else:\n",
+        "    t_top = d_top = np.nan\n",
+        "if os.path.isfile(fname_bot):\n",
+        "    t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n",
+        "else:\n",
+        "    t_bot = d_bot = np.nan"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n",
+        "    fname_masked\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.min(signals_raw), np.max(signals_raw)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_raw.shape, depths_raw.shape, signals_raw.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.plot(ts_new, d_top_new, \"k\")\n",
+        "plt.plot(ts_new, d_bot_new, \"w\")\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Finding passive data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sns.distplot(np.reshape(signals_raw, (-1,)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.percentile(signals_raw, 95)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.percentile(signals_raw, 99.5)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 95)).T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sum(~np.any(signals_raw > np.percentile(signals_raw, 95), axis=1))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 97)).T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw[:100], signals_raw[:, :100].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw[:26], signals_raw[:, :26].T)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(signals_raw[:, :45], axis=0))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "np.min(d_top)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sum(depths_raw <= np.min(d_top))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sns.distplot(np.reshape(signals_raw[:, :34], (-1,)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sns.distplot(np.reshape(signals_raw[:, 34:], (-1,)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T > 0)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for pc in range(100):\n",
+        "    cut_off = np.percentile(signals_raw[:, 34:], pc)\n",
+        "    print(\n",
+        "        \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n",
+        "            pc, cut_off, sum(~np.any(signals_raw[:, 34:] > cut_off, axis=1))\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for pc in range(100):\n",
+        "    cut_off = np.percentile(signals_raw[:, :34], pc)\n",
+        "    print(\n",
+        "        \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n",
+        "            pc, cut_off, sum(~np.any(signals_raw[:, :34] > cut_off, axis=1))\n",
+        "        )\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.max(signals_raw[:, :34], axis=1))\n",
+        "plt.plot(np.min(signals_raw[:, :34], axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.max(signals_raw[:, :25], axis=1))\n",
+        "plt.plot(np.min(signals_raw[:, :25], axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.plot(np.mean(np.diff(signals_raw, axis=0), axis=1))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw, axis=0), axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "md = np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1)\n",
+        "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
+        "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[280:, :], axis=0), axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "md = np.median(np.diff(signals_raw[280:, :], axis=0), axis=1)\n",
+        "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
+        "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "md = np.median(np.diff(signals_raw, axis=0), axis=1)\n",
+        "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
+        "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n",
+        "plt.ylim([-1, 1])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n",
+        "plt.ylim([-10, 10])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n",
+        "plt.ylim([-5, 5])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n",
+        "# plt.ylim([-5, 5])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n",
+        "plt.ylim([-5, 5])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "md = np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1)\n",
+        "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
+        "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "md = np.median(np.diff(signals_raw[:, :34], axis=0), axis=1)\n",
+        "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n",
+        "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "threshold_low = np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n",
+        "threshold_high = np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n",
+        "indices_possible_start = np.nonzero(md < threshold_low)[0]\n",
+        "indices_possible_end = np.nonzero(md > threshold_high)[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "indices_possible_start"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def find_passive_edges(signals_raw, n_depth_use=None, deviation=8):\n",
+        "\n",
+        "    # n_depth_use = 34\n",
+        "\n",
+        "    if n_depth_use is None:\n",
+        "        n_depth_use = signals_raw.shape[1]\n",
+        "\n",
+        "    indices_passive_start = []\n",
+        "    indices_passive_end = []\n",
+        "\n",
+        "    md = np.median(np.diff(signals_raw[:, :n_depth_use], axis=0), axis=1)\n",
+        "\n",
+        "    threshold_low = (\n",
+        "        np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n",
+        "    )\n",
+        "    threshold_high = (\n",
+        "        np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n",
+        "    )\n",
+        "    indices_possible_start = np.nonzero(md < threshold_low)[0]\n",
+        "    indices_possible_end = np.nonzero(md > threshold_high)[0]\n",
+        "    print(\"a\", indices_possible_end)\n",
+        "\n",
+        "    current_index = 0\n",
+        "\n",
+        "    if len(indices_possible_start) == 0 and len(indices_possible_end) == 0:\n",
+        "        return np.array(indices_passive_start), np.array(indices_passive_end)\n",
+        "\n",
+        "    if len(indices_possible_start) > 0:\n",
+        "        indices_possible_start += 1\n",
+        "\n",
+        "    if len(indices_possible_end) > 0:\n",
+        "        indices_possible_end += 1\n",
+        "\n",
+        "    print(\"b\", indices_possible_end)\n",
+        "\n",
+        "    if (\n",
+        "        len(indices_possible_start) == 0\n",
+        "        or indices_possible_end[0] < indices_possible_start[0]\n",
+        "    ):\n",
+        "        indices_passive_start.append(0)\n",
+        "        current_index = indices_possible_end[0]\n",
+        "        indices_passive_end.append(current_index)\n",
+        "        indices_possible_start = indices_possible_start[\n",
+        "            indices_possible_start > current_index\n",
+        "        ]\n",
+        "        indices_possible_end = indices_possible_end[\n",
+        "            indices_possible_end > current_index\n",
+        "        ]\n",
+        "\n",
+        "    print(\"c\", indices_possible_end)\n",
+        "    print(\"c2\", indices_passive_end)\n",
+        "\n",
+        "    while len(indices_possible_start) > 0:\n",
+        "        current_index = indices_possible_start[0]\n",
+        "        indices_passive_start.append(current_index)\n",
+        "        baseline = signals_raw[current_index - 1, :n_depth_use]\n",
+        "\n",
+        "        # Find first column which returns to the baseline value seen before passive region\n",
+        "        offsets = np.nonzero(\n",
+        "            np.median(baseline - signals_raw[current_index:, :n_depth_use], axis=1)\n",
+        "            < threshold_high\n",
+        "        )[0]\n",
+        "        if len(offsets) == 0:\n",
+        "            current_index = signals_raw.shape[0]\n",
+        "        else:\n",
+        "            current_index = current_index + offsets[0]\n",
+        "        indices_passive_end.append(current_index)\n",
+        "\n",
+        "        print(\"d\", indices_passive_end)\n",
+        "\n",
+        "        # Remove preceding indices from the list of candidates\n",
+        "        indices_possible_start = indices_possible_start[\n",
+        "            indices_possible_start > current_index\n",
+        "        ]\n",
+        "        indices_possible_end = indices_possible_end[\n",
+        "            indices_possible_end > current_index\n",
+        "        ]\n",
+        "\n",
+        "        print(\"e\", indices_passive_end)\n",
+        "\n",
+        "        # Check the start was sufficiently inclusive.\n",
+        "        if current_index < signals_raw.shape[0]:\n",
+        "            baseline = signals_raw[current_index, :n_depth_use]\n",
+        "            nonpassives = np.nonzero(\n",
+        "                np.median(baseline - signals_raw[:current_index, :n_depth_use], axis=1)\n",
+        "                < threshold_high\n",
+        "            )[0]\n",
+        "            if len(nonpassives) == 0:\n",
+        "                indices_passive_start[-1] = 0\n",
+        "            else:\n",
+        "                indices_passive_start[-1] = min(\n",
+        "                    indices_passive_start[-1], nonpassives[-1] + 1\n",
+        "                )\n",
+        "\n",
+        "        print(\"f\", indices_passive_end)\n",
+        "\n",
+        "        if (\n",
+        "            len(indices_passive_start) > 1\n",
+        "            and indices_passive_start[-1] <= indices_passive_end[-2]\n",
+        "        ):\n",
+        "            indices_passive_start = indices_passive_start[:-1]\n",
+        "            indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n",
+        "\n",
+        "        print(\"g\", indices_passive_end)\n",
+        "\n",
+        "    return np.array(indices_passive_start), np.array(indices_passive_end)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "init_idx = 0\n",
+        "indices_passive_start, indices_passive_end = find_passive_edges(\n",
+        "    signals_raw[init_idx:, :]\n",
+        ")\n",
+        "indices_passive_start += init_idx\n",
+        "indices_passive_end += init_idx"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "indices_passive_start"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "indices_passive_end"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw[:34],\n",
+        "        signals_raw[index_start:index_end, :34].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(\n",
+        "    np.concatenate(([0], indices_passive_end)),\n",
+        "    np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n",
+        "):\n",
+        "    if index_start == index_end:\n",
+        "        continue\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw[:34],\n",
+        "        signals_raw[index_start:index_end, :34].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw,\n",
+        "        signals_raw[index_start:index_end, :].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(\n",
+        "    np.concatenate(([0], indices_passive_end)),\n",
+        "    np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n",
+        "):\n",
+        "    if index_start == index_end:\n",
+        "        continue\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw,\n",
+        "        signals_raw[index_start:index_end, :].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/Surface anomaly removal.ipynb b/notebooks/Surface anomaly removal.ipynb
index 494b9f00..ca670f1e 100644
--- a/notebooks/Surface anomaly removal.ipynb	
+++ b/notebooks/Surface anomaly removal.ipynb	
@@ -1,576 +1,576 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import copy\n",
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import scipy.ndimage\n",
+        "import scipy.signal"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.plotting\n",
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# first val sample for stationary\n",
+        "sample = \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\"\n",
+        "sample = \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "    os.path.join(root_data_dir, sample),\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "echofilter.plotting.plot_transect(transect)\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "fname_surface = os.path.join(root_data_dir, sample + \"_surface.evl\")\n",
+        "t_surface, d_surface = echofilter.raw.loader.evl_loader(fname_surface)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_raw = transect[\"timestamps\"]\n",
+        "d_surface = transect[\"surface\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(d_surface)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "segments = list(echofilter.raw.manipulate.split_transect(**transect))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for i_segment, segment in enumerate(segments):\n",
+        "    plt.figure(figsize=(15, 9))\n",
+        "    plt.plot(segment[\"surface\"])\n",
+        "    plt.title(\"{}  #{}\".format(sample, i_segment))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "i_segment = 8\n",
+        "segment = segments[i_segment]\n",
+        "# Remove passive data from the signal\n",
+        "signal = segment[\"surface\"][segment[\"is_passive\"] < 0.5]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sigma = 50\n",
+        "smoothed = scipy.ndimage.gaussian_filter1d(signal, sigma, axis=0)\n",
+        "\n",
+        "ks = 175\n",
+        "offset = ks // 2\n",
+        "medfiltered = scipy.signal.medfilt(\n",
+        "    np.pad(signal, (offset, offset), mode=\"reflect\"), ks\n",
+        ")[offset:-offset]\n",
+        "\n",
+        "savgoled = scipy.signal.savgol_filter(signal, ks, 3)\n",
+        "\n",
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(signal, label=\"original\")\n",
+        "plt.plot(smoothed, label=\"gaussian, sigma={}\".format(sigma))\n",
+        "plt.plot(medfiltered, label=\"median, kernel={}\".format(ks))\n",
+        "plt.plot(savgoled, label=\"SavGol, kernel={}\".format(ks))\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "residual = signal - medfiltered\n",
+        "\n",
+        "stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n",
+        "print(stdev)\n",
+        "\n",
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(residual, label=\"residual\")\n",
+        "plt.axhline(stdev, color=\"g\", ls=\":\")\n",
+        "plt.axhline(-stdev, color=\"g\", ls=\":\")\n",
+        "plt.axhline(stdev * 5, color=\"r\", ls=\":\")\n",
+        "plt.axhline(-stdev * 5, color=\"r\", ls=\":\")\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "is_good_line = np.abs(residual) < 5 * stdev"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ii = np.arange(len(signal))\n",
+        "\n",
+        "new_line = signal.copy()\n",
+        "new_line[~is_good_line] = np.interp(\n",
+        "    ii[~is_good_line], ii[is_good_line], medfiltered[is_good_line]\n",
+        ")\n",
+        "\n",
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(signal, label=\"original\")\n",
+        "plt.plot(new_line, label=\"new\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sigma = 5\n",
+        "new_smoothed = scipy.ndimage.gaussian_filter1d(new_line, sigma, axis=0)\n",
+        "\n",
+        "ks = 31\n",
+        "offset = ks // 2\n",
+        "new_medfiltered = scipy.signal.medfilt(\n",
+        "    np.pad(new_line, (offset, offset), mode=\"reflect\"), ks\n",
+        ")[offset:-offset]\n",
+        "\n",
+        "new_savgoled = scipy.signal.savgol_filter(new_line, ks, 2)\n",
+        "\n",
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(new_line, label=\"new_line\")\n",
+        "plt.plot(new_smoothed, label=\"gaussian, sigma={}\".format(sigma))\n",
+        "plt.plot(new_medfiltered, label=\"median, kernel={}\".format(ks))\n",
+        "plt.plot(new_savgoled, label=\"SavGol, kernel={}\".format(ks))\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "new_residual = new_line - new_smoothed\n",
+        "\n",
+        "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n",
+        "print(stdev)\n",
+        "\n",
+        "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n",
+        "print(stdev)\n",
+        "\n",
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(new_residual, label=\"smoothed-residual\")\n",
+        "plt.axhline(stdev, color=\"g\", ls=\":\")\n",
+        "plt.axhline(-stdev, color=\"g\", ls=\":\")\n",
+        "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n",
+        "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "new_residual = new_line - new_medfiltered\n",
+        "\n",
+        "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n",
+        "print(stdev)\n",
+        "\n",
+        "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n",
+        "print(stdev)\n",
+        "\n",
+        "plt.figure(figsize=(15, 9))\n",
+        "plt.plot(new_residual, label=\"smoothed-residual\")\n",
+        "plt.axhline(stdev, color=\"g\", ls=\":\")\n",
+        "plt.axhline(-stdev, color=\"g\", ls=\":\")\n",
+        "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n",
+        "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def _remove_anomalies_1d_v1(signal, thr=4, median_kernel=51, gaussian_sigma=5):\n",
+        "    \"\"\"\n",
+        "    remove anomalies from signal\n",
+        "    \"\"\"\n",
+        "    signal = np.copy(signal)\n",
+        "\n",
+        "    # Median filtering, with reflection padding\n",
+        "    offset = median_kernel // 2\n",
+        "    smoothed = scipy.signal.medfilt(\n",
+        "        np.pad(signal, (offset, offset), mode=\"reflect\"),\n",
+        "        median_kernel,\n",
+        "    )[offset:-offset]\n",
+        "    # Measure the residual between the original and median filtered signal\n",
+        "    residual = signal - smoothed\n",
+        "    # Replace datapoints more than 4 sigma away from the median filter\n",
+        "    # with the filtered signal\n",
+        "    stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n",
+        "    is_fixed = np.abs(residual) > thr * stdev\n",
+        "    signal[is_fixed] = smoothed[is_fixed]\n",
+        "\n",
+        "    # Smooth signal with a gaussian kernel\n",
+        "    while True:\n",
+        "        smoothed = scipy.ndimage.gaussian_filter1d(signal, gaussian_sigma, axis=0)\n",
+        "        # Mesure new residual\n",
+        "        residual = signal - smoothed\n",
+        "        stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n",
+        "        is_fixed_now = np.abs(residual) > thr * stdev\n",
+        "        is_fixed |= is_fixed_now\n",
+        "        signal[is_fixed] = smoothed[is_fixed]\n",
+        "        if not np.any(is_fixed_now):\n",
+        "            break\n",
+        "\n",
+        "    return signal, is_fixed"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from echofilter.raw.utils import pad1d\n",
+        "\n",
+        "\n",
+        "def medfilt1d(signal, kernel_size, axis=-1, pad_mode=\"reflect\"):\n",
+        "    \"\"\"\n",
+        "    Median filter in 1d, with support for selecting padding mode.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    signal : array_like\n",
+        "        The signal to filter.\n",
+        "    kernel_size\n",
+        "        Size of the median kernel to use.\n",
+        "    axis : int, optional\n",
+        "        Which axis to operate along. Default is `-1`.\n",
+        "    pad_mode : str, optional\n",
+        "        Method with which to pad the vector at the edges.\n",
+        "        Must be supported by `numpy.pad`. Default is `\"reflect\"`.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    filtered : array_like\n",
+        "        The filtered signal.\n",
+        "\n",
+        "    See Also\n",
+        "    --------\n",
+        "    - `scipy.signal.medfilt`\n",
+        "    - `pad1d`\n",
+        "    \"\"\"\n",
+        "    offset = kernel_size // 2\n",
+        "    signal = pad1d(signal, offset, axis=axis, mode=pad_mode)\n",
+        "    filtered = scipy.signal.medfilt(signal, kernel_size)[offset:-offset]\n",
+        "    return filtered\n",
+        "\n",
+        "\n",
+        "def remove_anomalies_1d(signal, thr=5, thr2=4, kernel=201, kernel2=31):\n",
+        "    \"\"\"\n",
+        "    Remove anomalies from a temporal signal.\n",
+        "\n",
+        "    Applies a median filter to the data, and replaces datapoints which\n",
+        "    deviate from the median filtered signal by more than some threshold\n",
+        "    with the median filtered data. This process is repeated until no\n",
+        "    datapoints deviate from the filtered line by more than the threshold.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    signal : array_like\n",
+        "        The signal to filter.\n",
+        "    thr : float, optional\n",
+        "        The initial threshold will be `thr` times the standard deviation of the residuals.\n",
+        "        The standard deviation is robustly estimated from the interquartile range.\n",
+        "        Default is `5`.\n",
+        "    thr2 : float, optional\n",
+        "        The threshold for repeated iterations will be `thr2` times the standard deviation\n",
+        "        of the remaining residuals. The standard deviation is robustly estimated from\n",
+        "        interdecile range. Default is `4`.\n",
+        "    kernel : int, optional\n",
+        "        The kernel size for the initial median filter. Default is `201`.\n",
+        "    kernel2 : int, optional\n",
+        "        The kernel size for subsequent median filters. Default is `31`.\n",
+        "\n",
+        "    Returns\n",
+        "    -------\n",
+        "    filtered : numpy.ndarray like signal\n",
+        "        The input signal with anomalies replaced with median values.\n",
+        "    is_fixed : bool numpy.ndarray shaped like signal\n",
+        "        Indicator for which datapoints were replaced.\n",
+        "\n",
+        "    See Also\n",
+        "    --------\n",
+        "    `medfilt1d`\n",
+        "    \"\"\"\n",
+        "    signal = np.copy(signal)\n",
+        "\n",
+        "    # Median filtering, with reflection padding\n",
+        "    smoothed = medfilt1d(signal, kernel)\n",
+        "    # Measure the residual between the original and median filtered signal\n",
+        "    residual = signal - smoothed\n",
+        "    # Replace datapoints more than thr sigma away from the median filter\n",
+        "    # with the filtered signal. We use a robust estimate of the standard\n",
+        "    # deviation, using the central 50% of datapoints.\n",
+        "    stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n",
+        "    is_fixed = np.abs(residual) > thr * stdev\n",
+        "    signal[is_fixed] = smoothed[is_fixed]\n",
+        "\n",
+        "    # Filter again, with a narrower kernel but tighter threshold\n",
+        "    while True:\n",
+        "        smoothed = medfilt1d(signal, kernel2)\n",
+        "        # Mesure new residual\n",
+        "        residual = signal - smoothed\n",
+        "        # Make sure to only include original data points when determining\n",
+        "        # the standard deviation. We use the interdecile range.\n",
+        "        stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n",
+        "        is_fixed_now = np.abs(residual) > thr2 * stdev\n",
+        "        is_fixed |= is_fixed_now\n",
+        "        signal[is_fixed] = smoothed[is_fixed]\n",
+        "        # We are done when no more datapoints had to be replaced\n",
+        "        if not np.any(is_fixed_now):\n",
+        "            break\n",
+        "\n",
+        "    return signal, is_fixed"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for i_segment, segment in enumerate(segments):\n",
+        "    plt.figure(figsize=(15, 9))\n",
+        "    plt.plot(segment[\"surface\"])\n",
+        "\n",
+        "    # Handle passive data\n",
+        "    is_passive = segment[\"is_passive\"] > 0.5\n",
+        "    _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n",
+        "    smoothed = np.interp(\n",
+        "        segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n",
+        "    )\n",
+        "    is_fixed = np.zeros_like(is_passive)\n",
+        "    is_fixed[~is_passive] = _is_fixed\n",
+        "\n",
+        "    print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n",
+        "    plt.plot(smoothed)\n",
+        "    plt.title(\"{}  #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n",
+        "    plt.show()\n",
+        "    if np.sum(is_fixed) > 0:\n",
+        "        plt.figure(figsize=(15, 9))\n",
+        "        echofilter.plotting.plot_transect(segment)\n",
+        "        plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "sample_paths = [\n",
+        "    \"MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215\",\n",
+        "    \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\",\n",
+        "    \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\",\n",
+        "    \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\",\n",
+        "    \"MinasPassage/march2018/march2018_D20180513-T015216_D20180513-T072215\",\n",
+        "    \"MinasPassage/march2018/march2018_D20180523-T175215_D20180523-T172215\",\n",
+        "    \"MinasPassage/september2018/september2018_D20180915-T202216_D20180916-T015217\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181027-T022221_D20181027-T075217\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181116-T205220_D20181117-T022218\",\n",
+        "    \"MinasPassage/september2018/september2018_D20181119-T195217_D20181119-T195217\",\n",
+        "]\n",
+        "\n",
+        "for sample in sample_paths:\n",
+        "    print(sample)\n",
+        "\n",
+        "    transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "        os.path.join(root_data_dir, sample),\n",
+        "    )\n",
+        "\n",
+        "    for i_segment, segment in enumerate(\n",
+        "        echofilter.raw.manipulate.split_transect(**transect)\n",
+        "    ):\n",
+        "        plt.figure(figsize=(15, 9))\n",
+        "        plt.plot(segment[\"surface\"])\n",
+        "\n",
+        "        # Handle passive data\n",
+        "        is_passive = segment[\"is_passive\"] > 0.5\n",
+        "        _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n",
+        "        smoothed = np.interp(\n",
+        "            segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n",
+        "        )\n",
+        "        is_fixed = np.zeros_like(is_passive)\n",
+        "        is_fixed[~is_passive] = _is_fixed\n",
+        "\n",
+        "        print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n",
+        "        plt.plot(smoothed)\n",
+        "        plt.title(\"{}  #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n",
+        "        plt.show()\n",
+        "        if np.sum(is_fixed) > 0:\n",
+        "            plt.figure(figsize=(15, 9))\n",
+        "            echofilter.plotting.plot_transect(segment)\n",
+        "            plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import copy\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy.ndimage\n",
-    "import scipy.signal"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.plotting\n",
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# first val sample for stationary\n",
-    "sample = \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\"\n",
-    "sample = \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "    os.path.join(root_data_dir, sample),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "echofilter.plotting.plot_transect(transect)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fname_surface = os.path.join(root_data_dir, sample + \"_surface.evl\")\n",
-    "t_surface, d_surface = echofilter.raw.loader.evl_loader(fname_surface)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_raw = transect[\"timestamps\"]\n",
-    "d_surface = transect[\"surface\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(d_surface)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "segments = list(echofilter.raw.manipulate.split_transect(**transect))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i_segment, segment in enumerate(segments):\n",
-    "    plt.figure(figsize=(15, 9))\n",
-    "    plt.plot(segment[\"surface\"])\n",
-    "    plt.title(\"{}  #{}\".format(sample, i_segment))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "i_segment = 8\n",
-    "segment = segments[i_segment]\n",
-    "# Remove passive data from the signal\n",
-    "signal = segment[\"surface\"][segment[\"is_passive\"] < 0.5]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sigma = 50\n",
-    "smoothed = scipy.ndimage.gaussian_filter1d(signal, sigma, axis=0)\n",
-    "\n",
-    "ks = 175\n",
-    "offset = ks // 2\n",
-    "medfiltered = scipy.signal.medfilt(\n",
-    "    np.pad(signal, (offset, offset), mode=\"reflect\"), ks\n",
-    ")[offset:-offset]\n",
-    "\n",
-    "savgoled = scipy.signal.savgol_filter(signal, ks, 3)\n",
-    "\n",
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(signal, label=\"original\")\n",
-    "plt.plot(smoothed, label=\"gaussian, sigma={}\".format(sigma))\n",
-    "plt.plot(medfiltered, label=\"median, kernel={}\".format(ks))\n",
-    "plt.plot(savgoled, label=\"SavGol, kernel={}\".format(ks))\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "residual = signal - medfiltered\n",
-    "\n",
-    "stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n",
-    "print(stdev)\n",
-    "\n",
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(residual, label=\"residual\")\n",
-    "plt.axhline(stdev, color=\"g\", ls=\":\")\n",
-    "plt.axhline(-stdev, color=\"g\", ls=\":\")\n",
-    "plt.axhline(stdev * 5, color=\"r\", ls=\":\")\n",
-    "plt.axhline(-stdev * 5, color=\"r\", ls=\":\")\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "is_good_line = np.abs(residual) < 5 * stdev"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ii = np.arange(len(signal))\n",
-    "\n",
-    "new_line = signal.copy()\n",
-    "new_line[~is_good_line] = np.interp(\n",
-    "    ii[~is_good_line], ii[is_good_line], medfiltered[is_good_line]\n",
-    ")\n",
-    "\n",
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(signal, label=\"original\")\n",
-    "plt.plot(new_line, label=\"new\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sigma = 5\n",
-    "new_smoothed = scipy.ndimage.gaussian_filter1d(new_line, sigma, axis=0)\n",
-    "\n",
-    "ks = 31\n",
-    "offset = ks // 2\n",
-    "new_medfiltered = scipy.signal.medfilt(\n",
-    "    np.pad(new_line, (offset, offset), mode=\"reflect\"), ks\n",
-    ")[offset:-offset]\n",
-    "\n",
-    "new_savgoled = scipy.signal.savgol_filter(new_line, ks, 2)\n",
-    "\n",
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(new_line, label=\"new_line\")\n",
-    "plt.plot(new_smoothed, label=\"gaussian, sigma={}\".format(sigma))\n",
-    "plt.plot(new_medfiltered, label=\"median, kernel={}\".format(ks))\n",
-    "plt.plot(new_savgoled, label=\"SavGol, kernel={}\".format(ks))\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_residual = new_line - new_smoothed\n",
-    "\n",
-    "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n",
-    "print(stdev)\n",
-    "\n",
-    "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n",
-    "print(stdev)\n",
-    "\n",
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(new_residual, label=\"smoothed-residual\")\n",
-    "plt.axhline(stdev, color=\"g\", ls=\":\")\n",
-    "plt.axhline(-stdev, color=\"g\", ls=\":\")\n",
-    "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n",
-    "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_residual = new_line - new_medfiltered\n",
-    "\n",
-    "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n",
-    "print(stdev)\n",
-    "\n",
-    "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n",
-    "print(stdev)\n",
-    "\n",
-    "plt.figure(figsize=(15, 9))\n",
-    "plt.plot(new_residual, label=\"smoothed-residual\")\n",
-    "plt.axhline(stdev, color=\"g\", ls=\":\")\n",
-    "plt.axhline(-stdev, color=\"g\", ls=\":\")\n",
-    "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n",
-    "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def _remove_anomalies_1d_v1(signal, thr=4, median_kernel=51, gaussian_sigma=5):\n",
-    "    \"\"\"\n",
-    "    remove anomalies from signal\n",
-    "    \"\"\"\n",
-    "    signal = np.copy(signal)\n",
-    "\n",
-    "    # Median filtering, with reflection padding\n",
-    "    offset = median_kernel // 2\n",
-    "    smoothed = scipy.signal.medfilt(\n",
-    "        np.pad(signal, (offset, offset), mode=\"reflect\"),\n",
-    "        median_kernel,\n",
-    "    )[offset:-offset]\n",
-    "    # Measure the residual between the original and median filtered signal\n",
-    "    residual = signal - smoothed\n",
-    "    # Replace datapoints more than 4 sigma away from the median filter\n",
-    "    # with the filtered signal\n",
-    "    stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n",
-    "    is_fixed = np.abs(residual) > thr * stdev\n",
-    "    signal[is_fixed] = smoothed[is_fixed]\n",
-    "\n",
-    "    # Smooth signal with a gaussian kernel\n",
-    "    while True:\n",
-    "        smoothed = scipy.ndimage.gaussian_filter1d(signal, gaussian_sigma, axis=0)\n",
-    "        # Mesure new residual\n",
-    "        residual = signal - smoothed\n",
-    "        stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n",
-    "        is_fixed_now = np.abs(residual) > thr * stdev\n",
-    "        is_fixed |= is_fixed_now\n",
-    "        signal[is_fixed] = smoothed[is_fixed]\n",
-    "        if not np.any(is_fixed_now):\n",
-    "            break\n",
-    "\n",
-    "    return signal, is_fixed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from echofilter.raw.utils import pad1d\n",
-    "\n",
-    "\n",
-    "def medfilt1d(signal, kernel_size, axis=-1, pad_mode=\"reflect\"):\n",
-    "    \"\"\"\n",
-    "    Median filter in 1d, with support for selecting padding mode.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    signal : array_like\n",
-    "        The signal to filter.\n",
-    "    kernel_size\n",
-    "        Size of the median kernel to use.\n",
-    "    axis : int, optional\n",
-    "        Which axis to operate along. Default is `-1`.\n",
-    "    pad_mode : str, optional\n",
-    "        Method with which to pad the vector at the edges.\n",
-    "        Must be supported by `numpy.pad`. Default is `\"reflect\"`.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    filtered : array_like\n",
-    "        The filtered signal.\n",
-    "\n",
-    "    See Also\n",
-    "    --------\n",
-    "    - `scipy.signal.medfilt`\n",
-    "    - `pad1d`\n",
-    "    \"\"\"\n",
-    "    offset = kernel_size // 2\n",
-    "    signal = pad1d(signal, offset, axis=axis, mode=pad_mode)\n",
-    "    filtered = scipy.signal.medfilt(signal, kernel_size)[offset:-offset]\n",
-    "    return filtered\n",
-    "\n",
-    "\n",
-    "def remove_anomalies_1d(signal, thr=5, thr2=4, kernel=201, kernel2=31):\n",
-    "    \"\"\"\n",
-    "    Remove anomalies from a temporal signal.\n",
-    "\n",
-    "    Applies a median filter to the data, and replaces datapoints which\n",
-    "    deviate from the median filtered signal by more than some threshold\n",
-    "    with the median filtered data. This process is repeated until no\n",
-    "    datapoints deviate from the filtered line by more than the threshold.\n",
-    "\n",
-    "    Parameters\n",
-    "    ----------\n",
-    "    signal : array_like\n",
-    "        The signal to filter.\n",
-    "    thr : float, optional\n",
-    "        The initial threshold will be `thr` times the standard deviation of the residuals.\n",
-    "        The standard deviation is robustly estimated from the interquartile range.\n",
-    "        Default is `5`.\n",
-    "    thr2 : float, optional\n",
-    "        The threshold for repeated iterations will be `thr2` times the standard deviation\n",
-    "        of the remaining residuals. The standard deviation is robustly estimated from\n",
-    "        interdecile range. Default is `4`.\n",
-    "    kernel : int, optional\n",
-    "        The kernel size for the initial median filter. Default is `201`.\n",
-    "    kernel2 : int, optional\n",
-    "        The kernel size for subsequent median filters. Default is `31`.\n",
-    "\n",
-    "    Returns\n",
-    "    -------\n",
-    "    filtered : numpy.ndarray like signal\n",
-    "        The input signal with anomalies replaced with median values.\n",
-    "    is_fixed : bool numpy.ndarray shaped like signal\n",
-    "        Indicator for which datapoints were replaced.\n",
-    "\n",
-    "    See Also\n",
-    "    --------\n",
-    "    `medfilt1d`\n",
-    "    \"\"\"\n",
-    "    signal = np.copy(signal)\n",
-    "\n",
-    "    # Median filtering, with reflection padding\n",
-    "    smoothed = medfilt1d(signal, kernel)\n",
-    "    # Measure the residual between the original and median filtered signal\n",
-    "    residual = signal - smoothed\n",
-    "    # Replace datapoints more than thr sigma away from the median filter\n",
-    "    # with the filtered signal. We use a robust estimate of the standard\n",
-    "    # deviation, using the central 50% of datapoints.\n",
-    "    stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n",
-    "    is_fixed = np.abs(residual) > thr * stdev\n",
-    "    signal[is_fixed] = smoothed[is_fixed]\n",
-    "\n",
-    "    # Filter again, with a narrower kernel but tighter threshold\n",
-    "    while True:\n",
-    "        smoothed = medfilt1d(signal, kernel2)\n",
-    "        # Mesure new residual\n",
-    "        residual = signal - smoothed\n",
-    "        # Make sure to only include original data points when determining\n",
-    "        # the standard deviation. We use the interdecile range.\n",
-    "        stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n",
-    "        is_fixed_now = np.abs(residual) > thr2 * stdev\n",
-    "        is_fixed |= is_fixed_now\n",
-    "        signal[is_fixed] = smoothed[is_fixed]\n",
-    "        # We are done when no more datapoints had to be replaced\n",
-    "        if not np.any(is_fixed_now):\n",
-    "            break\n",
-    "\n",
-    "    return signal, is_fixed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i_segment, segment in enumerate(segments):\n",
-    "    plt.figure(figsize=(15, 9))\n",
-    "    plt.plot(segment[\"surface\"])\n",
-    "\n",
-    "    # Handle passive data\n",
-    "    is_passive = segment[\"is_passive\"] > 0.5\n",
-    "    _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n",
-    "    smoothed = np.interp(\n",
-    "        segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n",
-    "    )\n",
-    "    is_fixed = np.zeros_like(is_passive)\n",
-    "    is_fixed[~is_passive] = _is_fixed\n",
-    "\n",
-    "    print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n",
-    "    plt.plot(smoothed)\n",
-    "    plt.title(\"{}  #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n",
-    "    plt.show()\n",
-    "    if np.sum(is_fixed) > 0:\n",
-    "        plt.figure(figsize=(15, 9))\n",
-    "        echofilter.plotting.plot_transect(segment)\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample_paths = [\n",
-    "    \"MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215\",\n",
-    "    \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\",\n",
-    "    \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\",\n",
-    "    \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\",\n",
-    "    \"MinasPassage/march2018/march2018_D20180513-T015216_D20180513-T072215\",\n",
-    "    \"MinasPassage/march2018/march2018_D20180523-T175215_D20180523-T172215\",\n",
-    "    \"MinasPassage/september2018/september2018_D20180915-T202216_D20180916-T015217\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181027-T022221_D20181027-T075217\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181116-T205220_D20181117-T022218\",\n",
-    "    \"MinasPassage/september2018/september2018_D20181119-T195217_D20181119-T195217\",\n",
-    "]\n",
-    "\n",
-    "for sample in sample_paths:\n",
-    "    print(sample)\n",
-    "\n",
-    "    transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "        os.path.join(root_data_dir, sample),\n",
-    "    )\n",
-    "\n",
-    "    for i_segment, segment in enumerate(\n",
-    "        echofilter.raw.manipulate.split_transect(**transect)\n",
-    "    ):\n",
-    "        plt.figure(figsize=(15, 9))\n",
-    "        plt.plot(segment[\"surface\"])\n",
-    "\n",
-    "        # Handle passive data\n",
-    "        is_passive = segment[\"is_passive\"] > 0.5\n",
-    "        _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n",
-    "        smoothed = np.interp(\n",
-    "            segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n",
-    "        )\n",
-    "        is_fixed = np.zeros_like(is_passive)\n",
-    "        is_fixed[~is_passive] = _is_fixed\n",
-    "\n",
-    "        print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n",
-    "        plt.plot(smoothed)\n",
-    "        plt.title(\"{}  #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n",
-    "        plt.show()\n",
-    "        if np.sum(is_fixed) > 0:\n",
-    "            plt.figure(figsize=(15, 9))\n",
-    "            echofilter.plotting.plot_transect(segment)\n",
-    "            plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/check chunking.ipynb b/notebooks/check chunking.ipynb
index f2d52bf2..511a7565 100644
--- a/notebooks/check chunking.ipynb	
+++ b/notebooks/check chunking.ipynb	
@@ -1,277 +1,277 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import copy\n",
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00dd00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def plot_transect(transect, i_transect=None):\n",
+        "    tt = transect[\"timestamps\"]\n",
+        "    for signal_name in (\"Sv\", \"Sv_masked\", \"mask\"):\n",
+        "        if signal_name == \"Sv_masked\":\n",
+        "            signal = copy.deepcopy(transect[\"Sv\"])\n",
+        "            signal[~transect[\"mask\"]] = np.nan\n",
+        "        else:\n",
+        "            signal = transect[signal_name]\n",
+        "        plt.figure(figsize=(12, 12))\n",
+        "        plt.pcolormesh(\n",
+        "            transect[\"timestamps\"],\n",
+        "            transect[\"depths\"],\n",
+        "            signal.T,\n",
+        "        )\n",
+        "        plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n",
+        "        plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n",
+        "\n",
+        "        indices = np.nonzero(transect[\"is_passive\"])[0]\n",
+        "        if len(indices) > 0:\n",
+        "            r_starts = [indices[0]]\n",
+        "            r_ends = []\n",
+        "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
+        "            for break_idx in breaks:\n",
+        "                r_ends.append(indices[break_idx])\n",
+        "                r_starts.append(indices[break_idx + 1])\n",
+        "            r_ends.append(indices[-1])\n",
+        "            for r_start, r_end in zip(r_starts, r_ends):\n",
+        "                plt.fill_between(\n",
+        "                    tt[[r_start, r_end]],\n",
+        "                    transect[\"depths\"][[0, 0]],\n",
+        "                    transect[\"depths\"][[-1, -1]],\n",
+        "                    facecolor=\"none\",\n",
+        "                    hatch=\"//\",\n",
+        "                    edgecolor=[0.4, 0.4, 0.4],\n",
+        "                    linewidth=0.0,\n",
+        "                )\n",
+        "\n",
+        "        indices = np.nonzero(transect[\"is_removed\"])[0]\n",
+        "        if len(indices) > 0:\n",
+        "            r_starts = [indices[0]]\n",
+        "            r_ends = []\n",
+        "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
+        "            for break_idx in breaks:\n",
+        "                r_ends.append(indices[break_idx])\n",
+        "                r_starts.append(indices[break_idx + 1])\n",
+        "            r_ends.append(indices[-1])\n",
+        "            for r_start, r_end in zip(r_starts, r_ends):\n",
+        "                plt.fill_between(\n",
+        "                    tt[[r_start, r_end]],\n",
+        "                    transect[\"depths\"][[0, 0]],\n",
+        "                    transect[\"depths\"][[-1, -1]],\n",
+        "                    facecolor=\"none\",\n",
+        "                    hatch=\"\\\\\\\\\",\n",
+        "                    edgecolor=[0, 0, 1],\n",
+        "                    linewidth=0.0,\n",
+        "                )\n",
+        "\n",
+        "        plt.gca().invert_yaxis()\n",
+        "        plt.xlabel(\"Timestamp (s)\")\n",
+        "        plt.ylabel(\"Depth (m)\")\n",
+        "        plt.title(\"{}  Chunk {}\".format(signal_name, i_transect))\n",
+        "        plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "dataset = \"MinasPassage\"\n",
+        "# has removed window\n",
+        "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n",
+        "# has passive recording\n",
+        "# sample = 'september2018/september2018_D20181116-T205220_D20181117-T022218'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "    os.path.join(root_data_dir, dataset, sample),\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "chunks = echofilter.raw.manipulate.split_transect(**transect)\n",
+        "\n",
+        "for i_chunk, chunk in enumerate(chunks):\n",
+        "    plot_transect(chunk, i_chunk)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "dataset = \"MinasPassage\"\n",
+        "\n",
+        "for sample in [\n",
+        "    \"december2017/december2017_D20171214-T202211_D20171215-T015215\",\n",
+        "    \"december2017/december2017_D20180222-T145219_D20180222-T142214\",\n",
+        "    \"march2018/march2018_D20180330-T202218_D20180331-T015214\",\n",
+        "    \"march2018/march2018_D20180523-T175215_D20180523-T172215\",\n",
+        "    \"september2018/september2018_D20180915-T202216_D20180916-T015217\",\n",
+        "    \"september2018/september2018_D20181116-T205220_D20181117-T022218\",\n",
+        "    \"september2018/september2018_D20181119-T195217_D20181119-T195217\",\n",
+        "]:\n",
+        "    print(sample)\n",
+        "\n",
+        "    transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "        os.path.join(root_data_dir, dataset, sample),\n",
+        "    )\n",
+        "\n",
+        "    print(sample)\n",
+        "\n",
+        "    chunks = echofilter.raw.manipulate.split_transect(**transect)\n",
+        "\n",
+        "    for i_chunk, chunk in enumerate(chunks):\n",
+        "        plot_transect(chunk, i_chunk)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Compare against a mobile transect"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dataset = \"mobile\"\n",
+        "\n",
+        "# example with only passive period\n",
+        "# sample = 'Survey17/Survey17_GR4_T1W_E'\n",
+        "\n",
+        "# example with 1 passive period, 1 turbulence cut out\n",
+        "# sample = 'Survey16/Survey16_GR3_N1W_E'\n",
+        "\n",
+        "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
+        "# sample = 'Survey17/Survey17_GR1_N0W_E'\n",
+        "\n",
+        "# example with passive, removed, and patches\n",
+        "# sample = 'Survey16/Survey16_GR1_N3A_F'\n",
+        "\n",
+        "# example with passive, removed, and patches\n",
+        "sample = \"Survey16/Survey16_GR3_N3A_F\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
+        "    os.path.join(root_data_dir, dataset, sample),\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "chunks = echofilter.raw.manipulate.split_transect(**transect)\n",
+        "\n",
+        "for i_chunk, chunk in enumerate(chunks):\n",
+        "    plot_transect(chunk, i_chunk)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import copy\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00dd00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_transect(transect, i_transect=None):\n",
-    "    tt = transect[\"timestamps\"]\n",
-    "    for signal_name in (\"Sv\", \"Sv_masked\", \"mask\"):\n",
-    "        if signal_name == \"Sv_masked\":\n",
-    "            signal = copy.deepcopy(transect[\"Sv\"])\n",
-    "            signal[~transect[\"mask\"]] = np.nan\n",
-    "        else:\n",
-    "            signal = transect[signal_name]\n",
-    "        plt.figure(figsize=(12, 12))\n",
-    "        plt.pcolormesh(\n",
-    "            transect[\"timestamps\"],\n",
-    "            transect[\"depths\"],\n",
-    "            signal.T,\n",
-    "        )\n",
-    "        plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n",
-    "        plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n",
-    "\n",
-    "        indices = np.nonzero(transect[\"is_passive\"])[0]\n",
-    "        if len(indices) > 0:\n",
-    "            r_starts = [indices[0]]\n",
-    "            r_ends = []\n",
-    "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
-    "            for break_idx in breaks:\n",
-    "                r_ends.append(indices[break_idx])\n",
-    "                r_starts.append(indices[break_idx + 1])\n",
-    "            r_ends.append(indices[-1])\n",
-    "            for r_start, r_end in zip(r_starts, r_ends):\n",
-    "                plt.fill_between(\n",
-    "                    tt[[r_start, r_end]],\n",
-    "                    transect[\"depths\"][[0, 0]],\n",
-    "                    transect[\"depths\"][[-1, -1]],\n",
-    "                    facecolor=\"none\",\n",
-    "                    hatch=\"//\",\n",
-    "                    edgecolor=[0.4, 0.4, 0.4],\n",
-    "                    linewidth=0.0,\n",
-    "                )\n",
-    "\n",
-    "        indices = np.nonzero(transect[\"is_removed\"])[0]\n",
-    "        if len(indices) > 0:\n",
-    "            r_starts = [indices[0]]\n",
-    "            r_ends = []\n",
-    "            breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n",
-    "            for break_idx in breaks:\n",
-    "                r_ends.append(indices[break_idx])\n",
-    "                r_starts.append(indices[break_idx + 1])\n",
-    "            r_ends.append(indices[-1])\n",
-    "            for r_start, r_end in zip(r_starts, r_ends):\n",
-    "                plt.fill_between(\n",
-    "                    tt[[r_start, r_end]],\n",
-    "                    transect[\"depths\"][[0, 0]],\n",
-    "                    transect[\"depths\"][[-1, -1]],\n",
-    "                    facecolor=\"none\",\n",
-    "                    hatch=\"\\\\\\\\\",\n",
-    "                    edgecolor=[0, 0, 1],\n",
-    "                    linewidth=0.0,\n",
-    "                )\n",
-    "\n",
-    "        plt.gca().invert_yaxis()\n",
-    "        plt.xlabel(\"Timestamp (s)\")\n",
-    "        plt.ylabel(\"Depth (m)\")\n",
-    "        plt.title(\"{}  Chunk {}\".format(signal_name, i_transect))\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "dataset = \"MinasPassage\"\n",
-    "# has removed window\n",
-    "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n",
-    "# has passive recording\n",
-    "# sample = 'september2018/september2018_D20181116-T205220_D20181117-T022218'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "    os.path.join(root_data_dir, dataset, sample),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chunks = echofilter.raw.manipulate.split_transect(**transect)\n",
-    "\n",
-    "for i_chunk, chunk in enumerate(chunks):\n",
-    "    plot_transect(chunk, i_chunk)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "dataset = \"MinasPassage\"\n",
-    "\n",
-    "for sample in [\n",
-    "    \"december2017/december2017_D20171214-T202211_D20171215-T015215\",\n",
-    "    \"december2017/december2017_D20180222-T145219_D20180222-T142214\",\n",
-    "    \"march2018/march2018_D20180330-T202218_D20180331-T015214\",\n",
-    "    \"march2018/march2018_D20180523-T175215_D20180523-T172215\",\n",
-    "    \"september2018/september2018_D20180915-T202216_D20180916-T015217\",\n",
-    "    \"september2018/september2018_D20181116-T205220_D20181117-T022218\",\n",
-    "    \"september2018/september2018_D20181119-T195217_D20181119-T195217\",\n",
-    "]:\n",
-    "    print(sample)\n",
-    "\n",
-    "    transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "        os.path.join(root_data_dir, dataset, sample),\n",
-    "    )\n",
-    "\n",
-    "    print(sample)\n",
-    "\n",
-    "    chunks = echofilter.raw.manipulate.split_transect(**transect)\n",
-    "\n",
-    "    for i_chunk, chunk in enumerate(chunks):\n",
-    "        plot_transect(chunk, i_chunk)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Compare against a mobile transect"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset = \"mobile\"\n",
-    "\n",
-    "# example with only passive period\n",
-    "# sample = 'Survey17/Survey17_GR4_T1W_E'\n",
-    "\n",
-    "# example with 1 passive period, 1 turbulence cut out\n",
-    "# sample = 'Survey16/Survey16_GR3_N1W_E'\n",
-    "\n",
-    "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
-    "# sample = 'Survey17/Survey17_GR1_N0W_E'\n",
-    "\n",
-    "# example with passive, removed, and patches\n",
-    "# sample = 'Survey16/Survey16_GR1_N3A_F'\n",
-    "\n",
-    "# example with passive, removed, and patches\n",
-    "sample = \"Survey16/Survey16_GR3_N3A_F\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n",
-    "    os.path.join(root_data_dir, dataset, sample),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chunks = echofilter.raw.manipulate.split_transect(**transect)\n",
-    "\n",
-    "for i_chunk, chunk in enumerate(chunks):\n",
-    "    plot_transect(chunk, i_chunk)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/check making lines from masked csv.ipynb b/notebooks/check making lines from masked csv.ipynb
index 94c9e37f..069b76b4 100644
--- a/notebooks/check making lines from masked csv.ipynb	
+++ b/notebooks/check making lines from masked csv.ipynb	
@@ -1,210 +1,210 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00ee00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "\n",
+        "sample = \"mobile/Survey17/Survey17_GR4_T1W_E\"\n",
+        "\n",
+        "# example with a gap\n",
+        "sample = \"mobile/Survey16/Survey16_GR2_S3W_E\"  # 115 apart = 3.47m\n",
+        "sample = \"mobile/Survey16/Survey16_GR2_N1A_F\"  # 045 apart = 1.36m\n",
+        "\n",
+        "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n",
+        "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n",
+        "\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
+        "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
+        "\n",
+        "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n",
+        "    os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n",
+        ")\n",
+        "t_top, d_top = echofilter.raw.loader.evl_loader(\n",
+        "    os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n",
+        "    fname_masked\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
+        "plt.plot(ts_new, d_top_new, turbulence_color)\n",
+        "plt.plot(ts_new, d_bot_new, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Zoomed in on top\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd[-450:], depths_mskd[:500], signals_mskd[-450:, :500].T)\n",
+        "plt.plot(ts_new[-450:], d_top_new[-450:], turbulence_color)\n",
+        "# plt.plot(ts_new, d_bot_new, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Check the resulting masks agree"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n",
+        "mask_top = np.single(ddepths < np.expand_dims(d_top_new, -1))\n",
+        "mask_bot = np.single(ddepths > np.expand_dims(d_bot_new, -1))\n",
+        "mask_new = ~np.any([mask_top, mask_bot], axis=0)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd[:2000], ~np.isnan(signals_mskd)[:, :2000].T)\n",
+        "plt.plot(t_top, d_top, turbulence_color)\n",
+        "plt.plot(t_bot, d_bot, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(\"Original mask and lines\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(ts_mskd, depths_mskd[:2000], mask_new[:, :2000].T)\n",
+        "plt.plot(ts_new, d_top_new, turbulence_color)\n",
+        "plt.plot(ts_new, d_bot_new, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(\"New mask and lines\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    ts_mskd, depths_mskd[:2000], (mask_new != ~np.isnan(signals_mskd))[:, :2000].T\n",
+        ")\n",
+        "# plt.plot(ts_new, d_top_new, turbulence_color)\n",
+        "# plt.plot(ts_new, d_bot_new, bottom_color)\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.title(\"Mask difference\")\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00ee00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "\n",
-    "sample = \"mobile/Survey17/Survey17_GR4_T1W_E\"\n",
-    "\n",
-    "# example with a gap\n",
-    "sample = \"mobile/Survey16/Survey16_GR2_S3W_E\"  # 115 apart = 3.47m\n",
-    "sample = \"mobile/Survey16/Survey16_GR2_N1A_F\"  # 045 apart = 1.36m\n",
-    "\n",
-    "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n",
-    "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n",
-    "\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
-    "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
-    "\n",
-    "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n",
-    "    os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n",
-    ")\n",
-    "t_top, d_top = echofilter.raw.loader.evl_loader(\n",
-    "    os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n",
-    "    fname_masked\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n",
-    "plt.plot(ts_new, d_top_new, turbulence_color)\n",
-    "plt.plot(ts_new, d_bot_new, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Zoomed in on top\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd[-450:], depths_mskd[:500], signals_mskd[-450:, :500].T)\n",
-    "plt.plot(ts_new[-450:], d_top_new[-450:], turbulence_color)\n",
-    "# plt.plot(ts_new, d_bot_new, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Check the resulting masks agree"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n",
-    "mask_top = np.single(ddepths < np.expand_dims(d_top_new, -1))\n",
-    "mask_bot = np.single(ddepths > np.expand_dims(d_bot_new, -1))\n",
-    "mask_new = ~np.any([mask_top, mask_bot], axis=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd[:2000], ~np.isnan(signals_mskd)[:, :2000].T)\n",
-    "plt.plot(t_top, d_top, turbulence_color)\n",
-    "plt.plot(t_bot, d_bot, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(\"Original mask and lines\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(ts_mskd, depths_mskd[:2000], mask_new[:, :2000].T)\n",
-    "plt.plot(ts_new, d_top_new, turbulence_color)\n",
-    "plt.plot(ts_new, d_bot_new, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(\"New mask and lines\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    ts_mskd, depths_mskd[:2000], (mask_new != ~np.isnan(signals_mskd))[:, :2000].T\n",
-    ")\n",
-    "# plt.plot(ts_new, d_top_new, turbulence_color)\n",
-    "# plt.plot(ts_new, d_bot_new, bottom_color)\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.title(\"Mask difference\")\n",
-    "plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/notebooks/check splitting passive data.ipynb b/notebooks/check splitting passive data.ipynb
index 6d483477..bfa57132 100644
--- a/notebooks/check splitting passive data.ipynb	
+++ b/notebooks/check splitting passive data.ipynb	
@@ -1,445 +1,445 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd .."
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cd .."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "sns.set()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import echofilter.raw"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "turbulence_color = \"c\"\n",
+        "bottom_color = \"#00ee00\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
+        "# example with 1 passive period, 1 turbulence cut out\n",
+        "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n",
+        "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
+        "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n",
+        "# sample done incorrectly\n",
+        "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n",
+        "\n",
+        "# sample = 'MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215'\n",
+        "\n",
+        "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n",
+        "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n",
+        "\n",
+        "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
+        "# ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
+        "\n",
+        "# t_bot, d_bot = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_bottom.evl'))\n",
+        "# t_top, d_top = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_turbulence.evl'))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "passive_starts, passive_ends = echofilter.raw.manipulate.find_passive_data(signals_raw)\n",
+        "print(passive_starts)\n",
+        "print(passive_ends)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(np.arange(0, signals_raw.shape[0]), depths_raw, signals_raw.T)\n",
+        "\n",
+        "for r_start, r_end in zip(passive_starts, passive_ends):\n",
+        "    plt.fill_between(\n",
+        "        [r_start, r_end],\n",
+        "        depths_raw[[0, 0]],\n",
+        "        depths_raw[[-1, -1]],\n",
+        "        facecolor=\"none\",\n",
+        "        hatch=\"//\",\n",
+        "        edgecolor=\"k\",\n",
+        "        linewidth=0.0,\n",
+        "    )\n",
+        "\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(passive_starts, passive_ends):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw,\n",
+        "        signals_raw[index_start:index_end, :].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for index_start, index_end in zip(\n",
+        "    np.concatenate(([0], passive_ends)),\n",
+        "    np.concatenate((passive_starts, [signals_raw.shape[0]])),\n",
+        "):\n",
+        "    index_start = int(index_start)\n",
+        "    index_end = int(index_end)\n",
+        "    if index_start == index_end:\n",
+        "        continue\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        ts_raw[index_start:index_end],\n",
+        "        depths_raw,\n",
+        "        signals_raw[index_start:index_end, :].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depth = 50\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    np.arange(0, signals_raw.shape[0]), depths_raw[:n_depth], signals_raw[:, :n_depth].T\n",
+        ")\n",
+        "\n",
+        "for r_start, r_end in zip(passive_starts, passive_ends):\n",
+        "    plt.fill_between(\n",
+        "        [r_start, r_end],\n",
+        "        depths_raw[[0, 0]],\n",
+        "        depths_raw[[n_depth - 1, n_depth - 1]],\n",
+        "        facecolor=\"none\",\n",
+        "        hatch=\"//\",\n",
+        "        edgecolor=\"k\",\n",
+        "        linewidth=0.0,\n",
+        "    )\n",
+        "\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depth = 40\n",
+        "\n",
+        "for idx in np.concatenate((passive_starts, passive_ends)):\n",
+        "    plt.figure(figsize=(12, 12))\n",
+        "    plt.pcolormesh(\n",
+        "        np.arange(max(0, idx - 4), min(len(ts_raw), idx + 5)),\n",
+        "        depths_raw[:n_depth],\n",
+        "        signals_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5), :n_depth].T,\n",
+        "    )\n",
+        "    plt.gca().invert_yaxis()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "t0 = 230  # 190  #  0\n",
+        "t1 = 255  # 220  # 65  # signals_raw.shape[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_times = 2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depth = len(depths_raw)\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n",
+        "\n",
+        "for r_start, r_end in zip(passive_starts, passive_ends):\n",
+        "    if r_end > n_times:\n",
+        "        continue\n",
+        "    plt.fill_between(\n",
+        "        [r_start, r_end],\n",
+        "        depths_raw[[0, 0]],\n",
+        "        depths_raw[[n_depth - 1, n_depth - 1]],\n",
+        "        facecolor=\"none\",\n",
+        "        hatch=\"//\",\n",
+        "        edgecolor=\"k\",\n",
+        "        linewidth=0.0,\n",
+        "    )\n",
+        "\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depth = 50\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n",
+        "\n",
+        "for r_start, r_end in zip(passive_starts, passive_ends):\n",
+        "    if r_end > n_times:\n",
+        "        continue\n",
+        "    plt.fill_between(\n",
+        "        [r_start, r_end],\n",
+        "        depths_raw[[0, 0]],\n",
+        "        depths_raw[[n_depth - 1, n_depth - 1]],\n",
+        "        facecolor=\"none\",\n",
+        "        hatch=\"//\",\n",
+        "        edgecolor=\"k\",\n",
+        "        linewidth=0.0,\n",
+        "    )\n",
+        "\n",
+        "plt.xlabel(\"Timestamp (s)\")\n",
+        "plt.ylabel(\"Depth (m)\")\n",
+        "plt.gca().invert_yaxis()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depth = 65\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    np.arange(t0, t1),\n",
+        "    depths_raw[:n_depth],\n",
+        "    np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n",
+        ")\n",
+        "plt.colorbar()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depth = 40\n",
+        "\n",
+        "plt.figure(figsize=(12, 12))\n",
+        "plt.pcolormesh(\n",
+        "    np.arange(t0, t1),\n",
+        "    depths_raw[:n_depth],\n",
+        "    np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n",
+        ")\n",
+        "plt.colorbar()\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 9))\n",
+        "plt.plot(np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n",
+        "plt.show()\n",
+        "\n",
+        "plt.figure(figsize=(12, 9))\n",
+        "plt.plot(np.mean(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 9))\n",
+        "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n",
+        "    yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n",
+        "    plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(12, 9))\n",
+        "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n",
+        "    yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n",
+        "    plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depths = [\n",
+        "    10,\n",
+        "    20,\n",
+        "    25,\n",
+        "    26,\n",
+        "    29,\n",
+        "    30,\n",
+        "    31,\n",
+        "    32,\n",
+        "    33,\n",
+        "    34,\n",
+        "    35,\n",
+        "    36,\n",
+        "    37,\n",
+        "    38,\n",
+        "    39,\n",
+        "    40,\n",
+        "    41,\n",
+        "    42,\n",
+        "    43,\n",
+        "    44,\n",
+        "    45,\n",
+        "    50,\n",
+        "    60,\n",
+        "    100,\n",
+        "]\n",
+        "medians = []\n",
+        "means = []\n",
+        "\n",
+        "for n_depth in n_depths:\n",
+        "    yy = np.diff(signals_raw[t0:t1, :n_depth], axis=0)\n",
+        "    medians.append(np.max(np.abs(np.median(yy, axis=1))))\n",
+        "    means.append(np.max(np.abs(np.mean(yy, axis=1))))\n",
+        "\n",
+        "plt.plot(n_depths, medians, label=\"median\")\n",
+        "plt.plot(n_depths, means, label=\"mean\")\n",
+        "\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "medians"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_depths"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.15"
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "sns.set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import echofilter.raw"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "turbulence_color = \"c\"\n",
-    "bottom_color = \"#00ee00\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n",
-    "# example with 1 passive period, 1 turbulence cut out\n",
-    "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n",
-    "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n",
-    "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n",
-    "# sample done incorrectly\n",
-    "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n",
-    "\n",
-    "# sample = 'MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215'\n",
-    "\n",
-    "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n",
-    "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n",
-    "\n",
-    "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n",
-    "# ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n",
-    "\n",
-    "# t_bot, d_bot = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_bottom.evl'))\n",
-    "# t_top, d_top = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_turbulence.evl'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "passive_starts, passive_ends = echofilter.raw.manipulate.find_passive_data(signals_raw)\n",
-    "print(passive_starts)\n",
-    "print(passive_ends)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(np.arange(0, signals_raw.shape[0]), depths_raw, signals_raw.T)\n",
-    "\n",
-    "for r_start, r_end in zip(passive_starts, passive_ends):\n",
-    "    plt.fill_between(\n",
-    "        [r_start, r_end],\n",
-    "        depths_raw[[0, 0]],\n",
-    "        depths_raw[[-1, -1]],\n",
-    "        facecolor=\"none\",\n",
-    "        hatch=\"//\",\n",
-    "        edgecolor=\"k\",\n",
-    "        linewidth=0.0,\n",
-    "    )\n",
-    "\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(passive_starts, passive_ends):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw,\n",
-    "        signals_raw[index_start:index_end, :].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for index_start, index_end in zip(\n",
-    "    np.concatenate(([0], passive_ends)),\n",
-    "    np.concatenate((passive_starts, [signals_raw.shape[0]])),\n",
-    "):\n",
-    "    index_start = int(index_start)\n",
-    "    index_end = int(index_end)\n",
-    "    if index_start == index_end:\n",
-    "        continue\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        ts_raw[index_start:index_end],\n",
-    "        depths_raw,\n",
-    "        signals_raw[index_start:index_end, :].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depth = 50\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    np.arange(0, signals_raw.shape[0]), depths_raw[:n_depth], signals_raw[:, :n_depth].T\n",
-    ")\n",
-    "\n",
-    "for r_start, r_end in zip(passive_starts, passive_ends):\n",
-    "    plt.fill_between(\n",
-    "        [r_start, r_end],\n",
-    "        depths_raw[[0, 0]],\n",
-    "        depths_raw[[n_depth - 1, n_depth - 1]],\n",
-    "        facecolor=\"none\",\n",
-    "        hatch=\"//\",\n",
-    "        edgecolor=\"k\",\n",
-    "        linewidth=0.0,\n",
-    "    )\n",
-    "\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depth = 40\n",
-    "\n",
-    "for idx in np.concatenate((passive_starts, passive_ends)):\n",
-    "    plt.figure(figsize=(12, 12))\n",
-    "    plt.pcolormesh(\n",
-    "        np.arange(max(0, idx - 4), min(len(ts_raw), idx + 5)),\n",
-    "        depths_raw[:n_depth],\n",
-    "        signals_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5), :n_depth].T,\n",
-    "    )\n",
-    "    plt.gca().invert_yaxis()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "t0 = 230  # 190  #  0\n",
-    "t1 = 255  # 220  # 65  # signals_raw.shape[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_times = 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depth = len(depths_raw)\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n",
-    "\n",
-    "for r_start, r_end in zip(passive_starts, passive_ends):\n",
-    "    if r_end > n_times:\n",
-    "        continue\n",
-    "    plt.fill_between(\n",
-    "        [r_start, r_end],\n",
-    "        depths_raw[[0, 0]],\n",
-    "        depths_raw[[n_depth - 1, n_depth - 1]],\n",
-    "        facecolor=\"none\",\n",
-    "        hatch=\"//\",\n",
-    "        edgecolor=\"k\",\n",
-    "        linewidth=0.0,\n",
-    "    )\n",
-    "\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depth = 50\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n",
-    "\n",
-    "for r_start, r_end in zip(passive_starts, passive_ends):\n",
-    "    if r_end > n_times:\n",
-    "        continue\n",
-    "    plt.fill_between(\n",
-    "        [r_start, r_end],\n",
-    "        depths_raw[[0, 0]],\n",
-    "        depths_raw[[n_depth - 1, n_depth - 1]],\n",
-    "        facecolor=\"none\",\n",
-    "        hatch=\"//\",\n",
-    "        edgecolor=\"k\",\n",
-    "        linewidth=0.0,\n",
-    "    )\n",
-    "\n",
-    "plt.xlabel(\"Timestamp (s)\")\n",
-    "plt.ylabel(\"Depth (m)\")\n",
-    "plt.gca().invert_yaxis()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depth = 65\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    np.arange(t0, t1),\n",
-    "    depths_raw[:n_depth],\n",
-    "    np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n",
-    ")\n",
-    "plt.colorbar()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depth = 40\n",
-    "\n",
-    "plt.figure(figsize=(12, 12))\n",
-    "plt.pcolormesh(\n",
-    "    np.arange(t0, t1),\n",
-    "    depths_raw[:n_depth],\n",
-    "    np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n",
-    ")\n",
-    "plt.colorbar()\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 9))\n",
-    "plt.plot(np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n",
-    "plt.show()\n",
-    "\n",
-    "plt.figure(figsize=(12, 9))\n",
-    "plt.plot(np.mean(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 9))\n",
-    "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n",
-    "    yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n",
-    "    plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(12, 9))\n",
-    "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n",
-    "    yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n",
-    "    plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depths = [\n",
-    "    10,\n",
-    "    20,\n",
-    "    25,\n",
-    "    26,\n",
-    "    29,\n",
-    "    30,\n",
-    "    31,\n",
-    "    32,\n",
-    "    33,\n",
-    "    34,\n",
-    "    35,\n",
-    "    36,\n",
-    "    37,\n",
-    "    38,\n",
-    "    39,\n",
-    "    40,\n",
-    "    41,\n",
-    "    42,\n",
-    "    43,\n",
-    "    44,\n",
-    "    45,\n",
-    "    50,\n",
-    "    60,\n",
-    "    100,\n",
-    "]\n",
-    "medians = []\n",
-    "means = []\n",
-    "\n",
-    "for n_depth in n_depths:\n",
-    "    yy = np.diff(signals_raw[t0:t1, :n_depth], axis=0)\n",
-    "    medians.append(np.max(np.abs(np.median(yy, axis=1))))\n",
-    "    means.append(np.max(np.abs(np.mean(yy, axis=1))))\n",
-    "\n",
-    "plt.plot(n_depths, medians, label=\"median\")\n",
-    "plt.plot(n_depths, means, label=\"mean\")\n",
-    "\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "medians"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_depths"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }