From 772646b95a69fbbf236cd5391032d97a9cb118f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 21:12:16 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- echofilter/inference.py | 8 +- echofilter/nn/wrapper.py | 2 +- notebooks/Building Data Loader 2.ipynb | 1360 +++++------ notebooks/Building Data Loader 3.ipynb | 1662 ++++++------- notebooks/Building Data Loader.ipynb | 1194 ++++----- notebooks/Completely decomposing mask.ipynb | 1056 ++++---- notebooks/Data Loader - Stationary.ipynb | 994 ++++---- notebooks/Estimate mean and stdev.ipynb | 1194 ++++----- notebooks/Finding mask all removed.ipynb | 706 +++--- .../Generating lines from masked csv.ipynb | 1042 ++++---- notebooks/Passive metadata labelling.ipynb | 2152 ++++++++--------- notebooks/Plot Metrics Distribution.ipynb | 284 +-- notebooks/Plot results.ipynb | 646 ++--- notebooks/Splitting Passive Data.ipynb | 1712 ++++++------- notebooks/Surface anomaly removal.ipynb | 1146 ++++----- notebooks/check chunking.ipynb | 548 ++--- .../check making lines from masked csv.ipynb | 414 ++-- notebooks/check splitting passive data.ipynb | 884 +++---- 18 files changed, 8503 insertions(+), 8501 deletions(-) diff --git a/echofilter/inference.py b/echofilter/inference.py index 1f3c3f41..b5b12417 100755 --- a/echofilter/inference.py +++ b/echofilter/inference.py @@ -703,9 +703,11 @@ def run_inference( print( "Echoview application would{} be opened {}.".format( "" if do_open else " not", - "to convert EV files to CSV" - if do_open - else "(no EV files to process)", + ( + "to convert EV files to CSV" + if do_open + else "(no EV files to process)" + ), ) ) do_open = False diff --git a/echofilter/nn/wrapper.py b/echofilter/nn/wrapper.py index 2abd2a3c..8e29426c 100644 --- a/echofilter/nn/wrapper.py +++ b/echofilter/nn/wrapper.py @@ -119,7 +119,7 @@ def __init__( mapping_extra = {} for key in mapping: for alias_map in self.aliases: - for (alias_a, alias_b) in [alias_map, alias_map[::-1]]: + for alias_a, alias_b in [alias_map, alias_map[::-1]]: if "_" + alias_a not in key: continue alt_key = key.replace("_" + alias_a, "_" + alias_b) diff --git a/notebooks/Building Data Loader 2.ipynb b/notebooks/Building Data Loader 2.ipynb index ce320fbf..1534c03f 100644 --- a/notebooks/Building Data Loader 2.ipynb +++ b/notebooks/Building Data Loader 2.ipynb @@ -1,682 +1,682 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import csv\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from echofilter.raw.loader import evl_loader, transect_loader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce\"\n", - "ROOT_DATA_DIR = \"/data/dsforce\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def load_transect_data(\n", - " transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", - "):\n", - "\n", - " dirname = os.path.join(root_data_dir, dataset)\n", - " raw_fname = os.path.join(dirname, transect_pth + \"_Sv_raw.csv\")\n", - " bot_fname = os.path.join(dirname, transect_pth + \"_bottom.evl\")\n", - " top_fname = os.path.join(dirname, transect_pth + \"_turbulence.evl\")\n", - "\n", - " timestamps, depths, signals = transect_loader(raw_fname)\n", - " t_bot, d_bot = evl_loader(bot_fname)\n", - " t_top, d_top = evl_loader(top_fname)\n", - "\n", - " return (\n", - " timestamps,\n", - " depths,\n", - " signals,\n", - " np.interp(timestamps, t_top, d_top),\n", - " np.interp(timestamps, t_bot, d_bot),\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def load_transect_data2(\n", - " survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", - "):\n", - "\n", - " return load_transect_data(\n", - " os.path.join(\n", - " \"Survey{}\".format(survey), \"Survey{}_{}\".format(survey, transect_name)\n", - " ),\n", - " dataset=dataset,\n", - " root_data_dir=root_data_dir,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_transect_data(\n", - " transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", - "):\n", - "\n", - " timestamps, depths, signals, d_top, d_bot = load_transect_data(\n", - " transect_pth, dataset, root_data_dir\n", - " )\n", - "\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(timestamps, -depths, signals.T)\n", - " plt.plot(timestamps, -d_bot, \"b\")\n", - " plt.plot(timestamps, -d_top, \"c\")\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_transect_data2(\n", - " survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", - "):\n", - "\n", - " timestamps, depths, signals, d_top, d_bot = load_transect_data2(\n", - " survey, transect_name, dataset, root_data_dir\n", - " )\n", - "\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(timestamps, -depths, signals.T)\n", - " plt.plot(timestamps, -d_bot, \"b\")\n", - " plt.plot(timestamps, -d_top, \"c\")\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_transect_data(\"Survey17/Survey17_GR1_N0A_E\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "survey = 17\n", - "transect_name = \"GR1_N0A_E\"\n", - "plot_transect_data2(survey, transect_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_partition_data(\n", - " partition,\n", - " dataset=\"surveyExports\",\n", - " partitioning_version=\"firstpass\",\n", - " root_data_dir=ROOT_DATA_DIR,\n", - "):\n", - "\n", - " dirname = os.path.join(root_data_dir, dataset, \"sets\", partitioning_version)\n", - " fname_partition = os.path.join(dirname, partition + \".txt\")\n", - " fname_header = os.path.join(dirname, \"header\" + \".txt\")\n", - "\n", - " with open(fname_header, \"r\") as hf:\n", - " for row in csv.reader(hf):\n", - " header = [entry.strip() for entry in row]\n", - " break\n", - "\n", - " df = pd.read_csv(fname_partition, header=None, names=header)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_partition_data(\"train\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_partition_list(\n", - " partition,\n", - " dataset=\"surveyExports\",\n", - " full_path=False,\n", - " partitioning_version=\"firstpass\",\n", - " root_data_dir=ROOT_DATA_DIR,\n", - "):\n", - " df = get_partition_data(\n", - " partition,\n", - " dataset=dataset,\n", - " partitioning_version=partitioning_version,\n", - " root_data_dir=root_data_dir,\n", - " )\n", - " fnames = df[\"Filename\"]\n", - " fnames = [\n", - " os.path.join(f.split(\"_\")[0], f.strip().replace(\"_Sv_raw.csv\", \"\"))\n", - " for f in fnames\n", - " ]\n", - " if full_path:\n", - " fnames = [os.path.join(root_data_dir, dataset, f) for f in fnames]\n", - " return fnames" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_partition_list(\"train\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_partition_list(\"train\", full_path=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", - "\n", - "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n", - " fname = fname + \"_bottom.evl\"\n", - " try:\n", - " depths = evl_loader(fname)[1]\n", - " except Exception:\n", - " continue\n", - " print(\n", - " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", - " os.path.split(fname)[1],\n", - " min(depths),\n", - " max(depths),\n", - " \"*\" if max(depths) > 62 else \"\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", - "\n", - "for fname in sorted(get_partition_list(\"validate\", full_path=True)):\n", - " fname = fname + \"_bottom.evl\"\n", - " try:\n", - " depths = evl_loader(fname)[1]\n", - " except Exception:\n", - " continue\n", - " print(\n", - " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", - " os.path.split(fname)[1],\n", - " min(depths),\n", - " max(depths),\n", - " \"*\" if max(depths) > 62 else \"\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", - "\n", - "for fname in sorted(get_partition_list(\"test\", full_path=True)):\n", - " fname = fname + \"_bottom.evl\"\n", - " try:\n", - " depths = evl_loader(fname)[1]\n", - " except Exception:\n", - " continue\n", - " print(\n", - " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", - " os.path.split(fname)[1],\n", - " min(depths),\n", - " max(depths),\n", - " \"*\" if max(depths) > 62 else \"\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", - "\n", - "for fname in sorted(get_partition_list(\"leaveout\", full_path=True)):\n", - " fname = fname + \"_bottom.evl\"\n", - " try:\n", - " depths = evl_loader(fname)[1]\n", - " except Exception:\n", - " continue\n", - " print(\n", - " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", - " os.path.split(fname)[1],\n", - " min(depths),\n", - " max(depths),\n", - " \"*\" if max(depths) > 62 else \"\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# One weird survey\n", - "plot_transect_data(\"Survey17/Survey17_GR4_S3A_E\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", - "\n", - "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n", - " fname = fname + \"_turbulence.evl\"\n", - " try:\n", - " depths = evl_loader(fname)[1]\n", - " except Exception:\n", - " continue\n", - " print(\n", - " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", - " os.path.split(fname)[1],\n", - " min(depths),\n", - " max(depths),\n", - " \"*\" if max(depths) > 62 else \"\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_transect_data(\"Survey17/Survey17_GR4_N5A_E\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_transect_data(\"Survey03/Survey03_GR2_S1A_survey3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "40, 62, 96" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps, depths, signals, d_top, d_bot = load_transect_data(\n", - " transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "depths" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "signals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d_top" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d_bot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps[:128], -depths[:2000], signals[:128, :2000].T)\n", - "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n", - "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dwn_sig = signals[:128, :2000].reshape(128, 200, 10).mean(-1).reshape(128, 200)\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps[:128], -depths[:2000:10], dwn_sig.T)\n", - "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n", - "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def shard_transect(\n", - " transect_pth,\n", - " dataset=\"surveyExports\",\n", - " max_depth=100,\n", - " shard_len=128,\n", - " root_data_dir=ROOT_DATA_DIR,\n", - "):\n", - " root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n", - " timestamps, depths, signals, d_top, d_bot = load_transect_data(\n", - " transect_pth, dataset, root_data_dir\n", - " )\n", - " depth_mask = depths <= 100\n", - " indices = range(128, signals.shape[0], 128)\n", - " dirname = os.path.join(root_shard_dir, transect_pth)\n", - " os.makedirs(dirname, exist_ok=True)\n", - " with open(os.path.join(dirname, \"shard_size.txt\"), \"w\") as hf:\n", - " print(\"{},{}\".format(len(timestamps), shard_len), file=hf)\n", - " for i, (ts_i, sig_i, top_i, bot_i) in enumerate(\n", - " zip(\n", - " np.split(timestamps, indices),\n", - " np.split(np.single(signals[:, depth_mask]), indices),\n", - " np.split(np.single(d_top), indices),\n", - " np.split(np.single(d_bot), indices),\n", - " )\n", - " ):\n", - " os.makedirs(os.path.join(dirname, str(i)), exist_ok=True)\n", - " for obj, fname in (\n", - " (depths[depth_mask], \"depths\"),\n", - " (ts_i, \"timestamps\"),\n", - " (sig_i, \"Sv\"),\n", - " (top_i, \"top\"),\n", - " (bot_i, \"bottom\"),\n", - " ):\n", - " obj.dump(os.path.join(dirname, str(i), fname + \".npy\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def load_transect_from_shards(\n", - " transect_pth, i1=0, i2=None, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", - "):\n", - " root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n", - " dirname = os.path.join(root_shard_dir, transect_pth)\n", - " with open(os.path.join(dirname, \"shard_size.txt\"), \"r\") as f:\n", - " n_timestamps, shard_len = f.readline().strip().split(\",\")\n", - " n_timestamps = int(n_timestamps)\n", - " shard_len = int(shard_len)\n", - " if i2 is None:\n", - " i2 = n_timestamps\n", - " j1 = max(0, int(i1 / shard_len))\n", - " j2 = int(min(i2, n_timestamps - 1) / shard_len)\n", - "\n", - " depths = np.load(os.path.join(dirname, str(j1), \"depths.npy\"), allow_pickle=True)\n", - "\n", - " def load_shard(fname):\n", - " return np.concatenate(\n", - " [\n", - " np.load(\n", - " os.path.join(dirname, str(j), fname + \".npy\"), allow_pickle=True\n", - " )\n", - " for j in range(j1, j2 + 1)\n", - " ]\n", - " )[(i1 - j1 * shard_len) : (i2 - j1 * shard_len)]\n", - "\n", - " timestamps = load_shard(\"timestamps\")\n", - " signals = load_shard(\"Sv\")\n", - " d_top = load_shard(\"top\")\n", - " d_bot = load_shard(\"bottom\")\n", - "\n", - " return timestamps, depths, signals, d_top, d_bot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shard_transect(transect_pth)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "o = load_transect_from_shards(transect_pth)\n", - "for io in o:\n", - " print(io.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "o = load_transect_from_shards(transect_pth, 200, 500)\n", - "for io in o:\n", - " print(io.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps, depths, signals, d_top, d_bot = load_transect_from_shards(\n", - " transect_pth, 100, 800\n", - ")\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps, -depths, signals.T)\n", - "plt.plot(timestamps, -d_bot, \"b\")\n", - "plt.plot(timestamps, -d_top, \"c\")\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from echofilter.raw.loader import evl_loader, transect_loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce\"\n", + "ROOT_DATA_DIR = \"/data/dsforce\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_transect_data(\n", + " transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", + "):\n", + "\n", + " dirname = os.path.join(root_data_dir, dataset)\n", + " raw_fname = os.path.join(dirname, transect_pth + \"_Sv_raw.csv\")\n", + " bot_fname = os.path.join(dirname, transect_pth + \"_bottom.evl\")\n", + " top_fname = os.path.join(dirname, transect_pth + \"_turbulence.evl\")\n", + "\n", + " timestamps, depths, signals = transect_loader(raw_fname)\n", + " t_bot, d_bot = evl_loader(bot_fname)\n", + " t_top, d_top = evl_loader(top_fname)\n", + "\n", + " return (\n", + " timestamps,\n", + " depths,\n", + " signals,\n", + " np.interp(timestamps, t_top, d_top),\n", + " np.interp(timestamps, t_bot, d_bot),\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_transect_data2(\n", + " survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", + "):\n", + "\n", + " return load_transect_data(\n", + " os.path.join(\n", + " \"Survey{}\".format(survey), \"Survey{}_{}\".format(survey, transect_name)\n", + " ),\n", + " dataset=dataset,\n", + " root_data_dir=root_data_dir,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_transect_data(\n", + " transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", + "):\n", + "\n", + " timestamps, depths, signals, d_top, d_bot = load_transect_data(\n", + " transect_pth, dataset, root_data_dir\n", + " )\n", + "\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(timestamps, -depths, signals.T)\n", + " plt.plot(timestamps, -d_bot, \"b\")\n", + " plt.plot(timestamps, -d_top, \"c\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_transect_data2(\n", + " survey, transect_name, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", + "):\n", + "\n", + " timestamps, depths, signals, d_top, d_bot = load_transect_data2(\n", + " survey, transect_name, dataset, root_data_dir\n", + " )\n", + "\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(timestamps, -depths, signals.T)\n", + " plt.plot(timestamps, -d_bot, \"b\")\n", + " plt.plot(timestamps, -d_top, \"c\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_transect_data(\"Survey17/Survey17_GR1_N0A_E\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "survey = 17\n", + "transect_name = \"GR1_N0A_E\"\n", + "plot_transect_data2(survey, transect_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_partition_data(\n", + " partition,\n", + " dataset=\"surveyExports\",\n", + " partitioning_version=\"firstpass\",\n", + " root_data_dir=ROOT_DATA_DIR,\n", + "):\n", + "\n", + " dirname = os.path.join(root_data_dir, dataset, \"sets\", partitioning_version)\n", + " fname_partition = os.path.join(dirname, partition + \".txt\")\n", + " fname_header = os.path.join(dirname, \"header\" + \".txt\")\n", + "\n", + " with open(fname_header, \"r\") as hf:\n", + " for row in csv.reader(hf):\n", + " header = [entry.strip() for entry in row]\n", + " break\n", + "\n", + " df = pd.read_csv(fname_partition, header=None, names=header)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_partition_data(\"train\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_partition_list(\n", + " partition,\n", + " dataset=\"surveyExports\",\n", + " full_path=False,\n", + " partitioning_version=\"firstpass\",\n", + " root_data_dir=ROOT_DATA_DIR,\n", + "):\n", + " df = get_partition_data(\n", + " partition,\n", + " dataset=dataset,\n", + " partitioning_version=partitioning_version,\n", + " root_data_dir=root_data_dir,\n", + " )\n", + " fnames = df[\"Filename\"]\n", + " fnames = [\n", + " os.path.join(f.split(\"_\")[0], f.strip().replace(\"_Sv_raw.csv\", \"\"))\n", + " for f in fnames\n", + " ]\n", + " if full_path:\n", + " fnames = [os.path.join(root_data_dir, dataset, f) for f in fnames]\n", + " return fnames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_partition_list(\"train\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_partition_list(\"train\", full_path=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", + "\n", + "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n", + " fname = fname + \"_bottom.evl\"\n", + " try:\n", + " depths = evl_loader(fname)[1]\n", + " except Exception:\n", + " continue\n", + " print(\n", + " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", + " os.path.split(fname)[1],\n", + " min(depths),\n", + " max(depths),\n", + " \"*\" if max(depths) > 62 else \"\",\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", + "\n", + "for fname in sorted(get_partition_list(\"validate\", full_path=True)):\n", + " fname = fname + \"_bottom.evl\"\n", + " try:\n", + " depths = evl_loader(fname)[1]\n", + " except Exception:\n", + " continue\n", + " print(\n", + " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", + " os.path.split(fname)[1],\n", + " min(depths),\n", + " max(depths),\n", + " \"*\" if max(depths) > 62 else \"\",\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", + "\n", + "for fname in sorted(get_partition_list(\"test\", full_path=True)):\n", + " fname = fname + \"_bottom.evl\"\n", + " try:\n", + " depths = evl_loader(fname)[1]\n", + " except Exception:\n", + " continue\n", + " print(\n", + " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", + " os.path.split(fname)[1],\n", + " min(depths),\n", + " max(depths),\n", + " \"*\" if max(depths) > 62 else \"\",\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", + "\n", + "for fname in sorted(get_partition_list(\"leaveout\", full_path=True)):\n", + " fname = fname + \"_bottom.evl\"\n", + " try:\n", + " depths = evl_loader(fname)[1]\n", + " except Exception:\n", + " continue\n", + " print(\n", + " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", + " os.path.split(fname)[1],\n", + " min(depths),\n", + " max(depths),\n", + " \"*\" if max(depths) > 62 else \"\",\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# One weird survey\n", + "plot_transect_data(\"Survey17/Survey17_GR4_S3A_E\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"{:<40s}{:>6s} {:>6s}\".format(\"Filename\", \"Min D\", \"Max D\"))\n", + "\n", + "for fname in sorted(get_partition_list(\"train\", full_path=True)):\n", + " fname = fname + \"_turbulence.evl\"\n", + " try:\n", + " depths = evl_loader(fname)[1]\n", + " except Exception:\n", + " continue\n", + " print(\n", + " \"{:<40s}{:6.1f} {:6.1f} {}\".format(\n", + " os.path.split(fname)[1],\n", + " min(depths),\n", + " max(depths),\n", + " \"*\" if max(depths) > 62 else \"\",\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_transect_data(\"Survey17/Survey17_GR4_N5A_E\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_transect_data(\"Survey03/Survey03_GR2_S1A_survey3\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "40, 62, 96" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_transect_data(\"Survey17/Survey17_GR1_S3W_F\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps, depths, signals, d_top, d_bot = load_transect_data(\n", + " transect_pth, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "depths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_top" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_bot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps[:128], -depths[:2000], signals[:128, :2000].T)\n", + "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n", + "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dwn_sig = signals[:128, :2000].reshape(128, 200, 10).mean(-1).reshape(128, 200)\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps[:128], -depths[:2000:10], dwn_sig.T)\n", + "plt.plot(timestamps[:128], -d_bot[:128], \"b\")\n", + "plt.plot(timestamps[:128], -d_top[:128], \"c\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def shard_transect(\n", + " transect_pth,\n", + " dataset=\"surveyExports\",\n", + " max_depth=100,\n", + " shard_len=128,\n", + " root_data_dir=ROOT_DATA_DIR,\n", + "):\n", + " root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n", + " timestamps, depths, signals, d_top, d_bot = load_transect_data(\n", + " transect_pth, dataset, root_data_dir\n", + " )\n", + " depth_mask = depths <= 100\n", + " indices = range(128, signals.shape[0], 128)\n", + " dirname = os.path.join(root_shard_dir, transect_pth)\n", + " os.makedirs(dirname, exist_ok=True)\n", + " with open(os.path.join(dirname, \"shard_size.txt\"), \"w\") as hf:\n", + " print(\"{},{}\".format(len(timestamps), shard_len), file=hf)\n", + " for i, (ts_i, sig_i, top_i, bot_i) in enumerate(\n", + " zip(\n", + " np.split(timestamps, indices),\n", + " np.split(np.single(signals[:, depth_mask]), indices),\n", + " np.split(np.single(d_top), indices),\n", + " np.split(np.single(d_bot), indices),\n", + " )\n", + " ):\n", + " os.makedirs(os.path.join(dirname, str(i)), exist_ok=True)\n", + " for obj, fname in (\n", + " (depths[depth_mask], \"depths\"),\n", + " (ts_i, \"timestamps\"),\n", + " (sig_i, \"Sv\"),\n", + " (top_i, \"top\"),\n", + " (bot_i, \"bottom\"),\n", + " ):\n", + " obj.dump(os.path.join(dirname, str(i), fname + \".npy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_transect_from_shards(\n", + " transect_pth, i1=0, i2=None, dataset=\"surveyExports\", root_data_dir=ROOT_DATA_DIR\n", + "):\n", + " root_shard_dir = os.path.join(root_data_dir, dataset + \"_sharded\")\n", + " dirname = os.path.join(root_shard_dir, transect_pth)\n", + " with open(os.path.join(dirname, \"shard_size.txt\"), \"r\") as f:\n", + " n_timestamps, shard_len = f.readline().strip().split(\",\")\n", + " n_timestamps = int(n_timestamps)\n", + " shard_len = int(shard_len)\n", + " if i2 is None:\n", + " i2 = n_timestamps\n", + " j1 = max(0, int(i1 / shard_len))\n", + " j2 = int(min(i2, n_timestamps - 1) / shard_len)\n", + "\n", + " depths = np.load(os.path.join(dirname, str(j1), \"depths.npy\"), allow_pickle=True)\n", + "\n", + " def load_shard(fname):\n", + " return np.concatenate(\n", + " [\n", + " np.load(\n", + " os.path.join(dirname, str(j), fname + \".npy\"), allow_pickle=True\n", + " )\n", + " for j in range(j1, j2 + 1)\n", + " ]\n", + " )[(i1 - j1 * shard_len) : (i2 - j1 * shard_len)]\n", + "\n", + " timestamps = load_shard(\"timestamps\")\n", + " signals = load_shard(\"Sv\")\n", + " d_top = load_shard(\"top\")\n", + " d_bot = load_shard(\"bottom\")\n", + "\n", + " return timestamps, depths, signals, d_top, d_bot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shard_transect(transect_pth)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "o = load_transect_from_shards(transect_pth)\n", + "for io in o:\n", + " print(io.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "o = load_transect_from_shards(transect_pth, 200, 500)\n", + "for io in o:\n", + " print(io.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps, depths, signals, d_top, d_bot = load_transect_from_shards(\n", + " transect_pth, 100, 800\n", + ")\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps, -depths, signals.T)\n", + "plt.plot(timestamps, -d_bot, \"b\")\n", + "plt.plot(timestamps, -d_top, \"c\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Building Data Loader 3.ipynb b/notebooks/Building Data Loader 3.ipynb index 7161302d..892c0436 100644 --- a/notebooks/Building Data Loader 3.ipynb +++ b/notebooks/Building Data Loader 3.ipynb @@ -1,833 +1,833 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import random" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw.shardloader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n", - "(\n", - " timestamps,\n", - " depths,\n", - " signals,\n", - " d_top,\n", - " d_bot,\n", - ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n", - " transect_pth,\n", - " 100,\n", - " 800,\n", - " root_data_dir=ROOT_DATA_DIR,\n", - ")\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps, -depths, signals.T)\n", - "plt.plot(timestamps, -d_bot, \"b\")\n", - "plt.plot(timestamps, -d_top, \"c\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n", - "(\n", - " timestamps,\n", - " depths,\n", - " signals,\n", - " d_top,\n", - " d_bot,\n", - ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n", - " transect_pth,\n", - " -100,\n", - " 800,\n", - " root_data_dir=ROOT_DATA_DIR,\n", - ")\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps, -depths, signals.T)\n", - "plt.plot(timestamps, -d_bot, \"b\")\n", - "plt.plot(timestamps, -d_top, \"c\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n", - "(\n", - " timestamps,\n", - " depths,\n", - " signals,\n", - " d_top,\n", - " d_bot,\n", - ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n", - " transect_pth,\n", - " 0,\n", - " 128,\n", - " root_data_dir=ROOT_DATA_DIR,\n", - ")\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps, -depths, signals.T)\n", - "plt.plot(timestamps, -d_bot, \"b\")\n", - "plt.plot(timestamps, -d_top, \"c\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch.utils.data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class TransectDataset(torch.utils.data.Dataset):\n", - " def __init__(\n", - " self,\n", - " transect_paths,\n", - " window_len=128,\n", - " crop_depth=70,\n", - " num_windows_per_transect=0,\n", - " use_dynamic_offsets=True,\n", - " transform_pre=None,\n", - " transform_post=None,\n", - " ):\n", - " \"\"\"\n", - " TransectDataset\n", - "\n", - " Parameters\n", - " ----------\n", - " transect_paths : list\n", - " Absolute paths to transects.\n", - " window_len : int\n", - " Width (number of timestamps) to load. Default is `128`.\n", - " crop_depth : float\n", - " Maximum depth to include, in metres. Deeper data will be cropped away.\n", - " Default is `70`.\n", - " num_windows_per_transect : int\n", - " Number of windows to extract for each transect. Start indices for the\n", - " windows will be equally spaced across the total width of the transect.\n", - " If this is `0`, the number of windows will be inferred automatically\n", - " based on `window_len` and the total width of the transect, resulting\n", - " in a different number of windows for each transect. Default is `0`.\n", - " use_dynamic_offsets : bool\n", - " Whether starting indices for each window should be randomly offset.\n", - " Set to `True` for training and `False` for testing. Default is `True`.\n", - " transform_pre : callable\n", - " Operations to perform to the dictionary containing a single sample.\n", - " These are performed before generating the masks. Default is `None`.\n", - " transform_post : callable\n", - " Operations to perform to the dictionary containing a single sample.\n", - " These are performed after generating the masks. Default is `None`.\n", - " \"\"\"\n", - " super(TransectDataset, self).__init__()\n", - " self.window_len = window_len\n", - " self.crop_depth = crop_depth\n", - " self.num_windows = num_windows_per_transect\n", - " self.use_dynamic_offsets = use_dynamic_offsets\n", - " self.transform_pre = transform_pre\n", - " self.transform_post = transform_post\n", - "\n", - " self.datapoints = []\n", - "\n", - " for transect_path in transect_paths:\n", - " # Lookup the number of rows in the transect\n", - " # Load the sharding metadata\n", - " with open(os.path.join(transect_path, \"shard_size.txt\"), \"r\") as f:\n", - " n_timestamps, shard_len = f.readline().strip().split(\",\")\n", - " n_timestamps = int(n_timestamps)\n", - " # Generate an array for window centers within the transect\n", - " # - if this is for training, we want to randomise the offsets\n", - " # - if this is for validation, we want stable windows\n", - " num_windows = self.num_windows\n", - " if self.num_windows is None or self.num_windows == 0:\n", - " # Load enough windows to include all datapoints\n", - " num_windows = int(np.ceil(n_timestamps / self.window_len))\n", - " centers = np.linspace(0, n_timestamps, num_windows + 1)[:num_windows]\n", - " if len(centers) > 1:\n", - " max_dy_offset = centers[1] - centers[0]\n", - " else:\n", - " max_dy_offset = n_timestamps\n", - " if self.use_dynamic_offsets:\n", - " centers += np.random.rand() * max_dy_offset\n", - " else:\n", - " centers += max_dy_offset / 2\n", - " centers = np.round(centers)\n", - " # Add each (transect, center) to the list for this epoch\n", - " for center_idx in centers:\n", - " self.datapoints.append((transect_path, int(center_idx)))\n", - "\n", - " def __getitem__(self, index):\n", - " transect_pth, center_idx = self.datapoints[index]\n", - " # Load data from shards\n", - " (\n", - " timestamps,\n", - " depths,\n", - " signals,\n", - " d_top,\n", - " d_bot,\n", - " ) = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", - " transect_pth,\n", - " center_idx - int(self.window_len / 2),\n", - " center_idx - int(self.window_len / 2) + self.window_len,\n", - " )\n", - " sample = {\n", - " \"timestamps\": timestamps,\n", - " \"depths\": depths,\n", - " \"signals\": signals,\n", - " \"d_top\": d_top,\n", - " \"d_bot\": d_bot,\n", - " }\n", - " if self.transform_pre is not None:\n", - " sample = self.transform_pre(sample)\n", - " # Apply depth crop\n", - " depth_crop_mask = sample[\"depths\"] <= self.crop_depth\n", - " sample[\"depths\"] = sample[\"depths\"][depth_crop_mask]\n", - " sample[\"signals\"] = sample[\"signals\"][:, depth_crop_mask]\n", - " # Convert lines to masks\n", - " ddepths = np.broadcast_to(sample[\"depths\"], sample[\"signals\"].shape)\n", - " mask_top = np.single(ddepths < np.expand_dims(sample[\"d_top\"], -1))\n", - " mask_bot = np.single(ddepths > np.expand_dims(sample[\"d_bot\"], -1))\n", - " sample[\"mask_top\"] = mask_top\n", - " sample[\"mask_bot\"] = mask_bot\n", - " sample[\"r_top\"] = sample[\"d_top\"] / abs(\n", - " sample[\"depths\"][-1] - sample[\"depths\"][0]\n", - " )\n", - " sample[\"r_bot\"] = sample[\"d_bot\"] / abs(\n", - " sample[\"depths\"][-1] - sample[\"depths\"][0]\n", - " )\n", - " if self.transform_post is not None:\n", - " sample = self.transform_post(sample)\n", - " return sample\n", - "\n", - " def __len__(self):\n", - " return len(self.datapoints)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_paths = [\n", - " os.path.join(ROOT_DATA_DIR, \"surveyExports_sharded/Survey17/Survey17_GR1_S3W_F\")\n", - "] * 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = TransectDataset(transect_paths)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset.datapoints" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample = dataset[0]\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"signals\"])\n", - "plt.show()\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"mask_top\"])\n", - "plt.show()\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"mask_bot\"])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample[\"signals\"].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "loader = torch.utils.data.DataLoader(dataset, batch_size=2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for sample in loader:\n", - " print(sample[\"signals\"].shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import skimage.transform" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class Rescale(object):\n", - " \"\"\"\n", - " Rescale the image(s) in a sample to a given size.\n", - "\n", - " Parameters\n", - " ----------\n", - " output_size : tuple or int\n", - " Desired output size. If tuple, output is matched to output_size. If int,\n", - " output is square.\n", - " \"\"\"\n", - "\n", - " def __init__(self, output_size):\n", - " assert isinstance(output_size, (int, tuple))\n", - " if isinstance(output_size, int):\n", - " output_size = (output_size, output_size)\n", - " self.output_size = output_size\n", - "\n", - " def __call__(self, sample):\n", - "\n", - " for key in (\"signals\", \"mask_top\", \"mask_bot\"):\n", - " if key in sample:\n", - " sample[key] = skimage.transform.resize(\n", - " sample[key],\n", - " self.output_size,\n", - " clip=False,\n", - " preserve_range=False,\n", - " )\n", - "\n", - " return sample" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class Normalize(object):\n", - " \"\"\"\n", - " Normalize mean and standard deviation of image.\n", - "\n", - " Note that changes are made inplace.\n", - "\n", - " Parameters\n", - " ----------\n", - " mean : float\n", - " Expected sample pixel mean.\n", - " stdev : float\n", - " Expected sample standard deviation of pixel intensities.\n", - " \"\"\"\n", - "\n", - " def __init__(self, mean, stdev):\n", - " self.mean = mean\n", - " self.stdev = stdev\n", - "\n", - " def __call__(self, sample):\n", - "\n", - " sample[\"signals\"] -= self.mean\n", - " sample[\"signals\"] /= self.stdev\n", - "\n", - " return sample" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class RandomReflection(object):\n", - " \"\"\"\n", - " Randomly reflect a sample.\n", - "\n", - " Parameters\n", - " ----------\n", - " axis : int, optional\n", - " Axis to reflect. Default is 0.\n", - " p : float, optional\n", - " Probability of reflection. Default is 0.5.\n", - " \"\"\"\n", - "\n", - " def __init__(self, axis=0, p=0.5):\n", - " self.axis = axis\n", - " self.p = p\n", - "\n", - " def __call__(self, sample):\n", - "\n", - " if random.random() > self.p:\n", - " # Nothing to do\n", - " return sample\n", - "\n", - " # Reflect x co-ordinates\n", - " sample[\"timestamps\"] = sample[\"timestamps\"][::-1]\n", - "\n", - " # Reflect data\n", - " for key in (\"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n", - " if key in sample:\n", - " sample[key] = np.flip(sample[key], self.axis)\n", - "\n", - " return sample" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class RandomStretchDepth(object):\n", - " \"\"\"\n", - " Rescale a set of images in a sample to a given size.\n", - "\n", - " Note that this transform doesn't change images, just the `depth`, `d_top`, and `d_bot`.\n", - " Note that changes are made inplace.\n", - "\n", - " Parameters\n", - " ----------\n", - " max_factor : float\n", - " Maximum stretch factor. A number between `[1, 1 + max_factor]` will be generated,\n", - " and the depth will either be divided or multiplied by the generated stretch\n", - " factor.\n", - " expected_bottom_gap : float\n", - " Expected gap between actual ocean floor and target bottom line.\n", - " \"\"\"\n", - "\n", - " def __init__(self, max_factor, expected_bottom_gap=1):\n", - " self.max_factor = max_factor\n", - " self.expected_bottom_gap = expected_bottom_gap\n", - "\n", - " def __call__(self, sample):\n", - "\n", - " factor = random.uniform(1.0, 1.0 + self.max_factor)\n", - "\n", - " if random.random() > 0.5:\n", - " factor = 1.0 / factor\n", - "\n", - " sample[\"d_bot\"] += self.expected_bottom_gap\n", - " for key in (\"depths\", \"d_top\", \"d_bot\"):\n", - " sample[key] *= factor\n", - " sample[\"d_bot\"] -= self.expected_bottom_gap\n", - "\n", - " return sample" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class RandomCropWidth(object):\n", - " \"\"\"\n", - " Randomly crop a sample in the width dimension.\n", - "\n", - " Parameters\n", - " ----------\n", - " max_crop_fraction : float\n", - " Maximum amount of material to crop away, as a fraction of the total width.\n", - " The `crop_fraction` will be sampled uniformly from the range\n", - " `[0, max_crop_fraction]`. The crop is always centred.\n", - " \"\"\"\n", - "\n", - " def __init__(self, max_crop_fraction):\n", - " self.max_crop_fraction = max_crop_fraction\n", - "\n", - " def __call__(self, sample):\n", - "\n", - " width = sample[\"signals\"].shape[0]\n", - "\n", - " crop_fraction = random.uniform(0.0, self.max_crop_fraction)\n", - " crop_amount = crop_fraction * width\n", - "\n", - " lft = int(crop_amount / 2)\n", - " rgt = lft + width - int(crop_amount)\n", - "\n", - " # Crop data\n", - " for key in (\"timestamps\", \"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n", - " if key in sample:\n", - " sample[key] = sample[key][lft:rgt]\n", - "\n", - " return sample" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class ColorJitter(object):\n", - " \"\"\"\n", - " Randomly change the brightness and contrast of a normalized image.\n", - "\n", - " Note that changes are made inplace.\n", - "\n", - " Parameters\n", - " ----------\n", - " brightness : float or tuple of float (min, max)\n", - " How much to jitter brightness. `brightness_factor` is chosen uniformly from\n", - " `[-brightness, brightness]`\n", - " or the given `[min, max]`. `brightness_factor` is then added to the image.\n", - " contrast : (float or tuple of float (min, max))\n", - " How much to jitter contrast. `contrast_factor` is chosen uniformly from\n", - " `[max(0, 1 - contrast), 1 + contrast]`\n", - " or the given `[min, max]`. Should be non negative numbers.\n", - " \"\"\"\n", - "\n", - " def __init__(self, brightness=0, contrast=0):\n", - " self.brightness = self._check_input(\n", - " brightness,\n", - " \"brightness\",\n", - " center=0,\n", - " bound=(float(\"-inf\"), float(\"inf\")),\n", - " clip_first_on_zero=False,\n", - " )\n", - " self.contrast = self._check_input(contrast, \"contrast\")\n", - "\n", - " def _check_input(\n", - " self, value, name, center=1, bound=(0, float(\"inf\")), clip_first_on_zero=True\n", - " ):\n", - " if isinstance(value, (float, int)):\n", - " if value < 0:\n", - " raise ValueError(\n", - " \"If {} is a single number, it must be non negative.\".format(name)\n", - " )\n", - " value = [center - value, center + value]\n", - " if clip_first_on_zero:\n", - " value[0] = max(value[0], 0)\n", - " elif isinstance(value, (tuple, list)) and len(value) == 2:\n", - " if not bound[0] <= value[0] <= value[1] <= bound[1]:\n", - " raise ValueError(\"{} values should be between {}\".format(name, bound))\n", - " else:\n", - " raise TypeError(\n", - " \"{} should be a single number or a list/tuple with length 2.\".format(\n", - " name\n", - " )\n", - " )\n", - "\n", - " if value[0] == value[1] == center:\n", - " value = None\n", - " return value\n", - "\n", - " def __call__(self, sample):\n", - " init_op = random.randint(0, 1)\n", - " for i_op in range(2):\n", - " op_num = (init_op + i_op) % 2\n", - " if op_num == 0 and self.brightness is not None:\n", - " brightness_factor = random.uniform(\n", - " self.brightness[0], self.brightness[1]\n", - " )\n", - " sample[\"signals\"] += brightness_factor\n", - " elif op_num == 1 and self.contrast is not None:\n", - " contrast_factor = random.uniform(self.contrast[0], self.contrast[1])\n", - " sample[\"signals\"] *= contrast_factor\n", - " return sample\n", - "\n", - " def __repr__(self):\n", - " format_string = self.__class__.__name__ + \"(\"\n", - " format_string += \"brightness={0}\".format(self.brightness)\n", - " format_string += \", contrast={0})\".format(self.contrast)\n", - " format_string += \")\"\n", - " return format_string" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision.transforms" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "train_transform_pre = torchvision.transforms.Compose(\n", - " [\n", - " RandomCropWidth(0.5),\n", - " RandomStretchDepth(0.5),\n", - " RandomReflection(),\n", - " ]\n", - ")\n", - "train_transform_post = torchvision.transforms.Compose(\n", - " [\n", - " Rescale((128, 512)),\n", - " Normalize(-70, 22),\n", - " ColorJitter(0.5, 0.3),\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_train = TransectDataset(\n", - " transect_paths,\n", - " window_len=192,\n", - " crop_depth=70,\n", - " num_windows_per_transect=10,\n", - " use_dynamic_offsets=True,\n", - " transform_pre=train_transform_pre,\n", - " transform_post=train_transform_post,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample = dataset_train[0]\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n", - " -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n", - " sample[\"signals\"].T,\n", - ")\n", - "plt.plot(\n", - " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n", - " -sample[\"d_bot\"],\n", - " \"b\",\n", - ")\n", - "plt.plot(\n", - " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n", - " -sample[\"d_top\"],\n", - " \"c\",\n", - ")\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"signals\"])\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"mask_top\"])\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"mask_bot\"])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample[\"r_top\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample[\"r_bot\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "val_transform = torchvision.transforms.Compose(\n", - " [\n", - " Rescale((128, 512)),\n", - " Normalize(-70, 22),\n", - " ]\n", - ")\n", - "\n", - "dataset_val = TransectDataset(\n", - " transect_paths,\n", - " window_len=128,\n", - " crop_depth=70,\n", - " num_windows_per_transect=20,\n", - " use_dynamic_offsets=False,\n", - " transform_post=val_transform,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample = dataset_val[0]\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n", - " -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n", - " sample[\"signals\"].T,\n", - ")\n", - "plt.plot(\n", - " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n", - " -sample[\"d_bot\"],\n", - " \"b\",\n", - ")\n", - "plt.plot(\n", - " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n", - " -sample[\"d_top\"],\n", - " \"c\",\n", - ")\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"signals\"])\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"mask_top\"])\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.imshow(sample[\"mask_bot\"])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_val.datapoints" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw.shardloader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n", + "(\n", + " timestamps,\n", + " depths,\n", + " signals,\n", + " d_top,\n", + " d_bot,\n", + ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n", + " transect_pth,\n", + " 100,\n", + " 800,\n", + " root_data_dir=ROOT_DATA_DIR,\n", + ")\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps, -depths, signals.T)\n", + "plt.plot(timestamps, -d_bot, \"b\")\n", + "plt.plot(timestamps, -d_top, \"c\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n", + "(\n", + " timestamps,\n", + " depths,\n", + " signals,\n", + " d_top,\n", + " d_bot,\n", + ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n", + " transect_pth,\n", + " -100,\n", + " 800,\n", + " root_data_dir=ROOT_DATA_DIR,\n", + ")\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps, -depths, signals.T)\n", + "plt.plot(timestamps, -d_bot, \"b\")\n", + "plt.plot(timestamps, -d_top, \"c\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_pth = \"Survey17/Survey17_GR1_S3W_F\"\n", + "(\n", + " timestamps,\n", + " depths,\n", + " signals,\n", + " d_top,\n", + " d_bot,\n", + ") = echofilter.raw.shardloader.load_transect_from_shards_rel(\n", + " transect_pth,\n", + " 0,\n", + " 128,\n", + " root_data_dir=ROOT_DATA_DIR,\n", + ")\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps, -depths, signals.T)\n", + "plt.plot(timestamps, -d_bot, \"b\")\n", + "plt.plot(timestamps, -d_top, \"c\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.utils.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TransectDataset(torch.utils.data.Dataset):\n", + " def __init__(\n", + " self,\n", + " transect_paths,\n", + " window_len=128,\n", + " crop_depth=70,\n", + " num_windows_per_transect=0,\n", + " use_dynamic_offsets=True,\n", + " transform_pre=None,\n", + " transform_post=None,\n", + " ):\n", + " \"\"\"\n", + " TransectDataset\n", + "\n", + " Parameters\n", + " ----------\n", + " transect_paths : list\n", + " Absolute paths to transects.\n", + " window_len : int\n", + " Width (number of timestamps) to load. Default is `128`.\n", + " crop_depth : float\n", + " Maximum depth to include, in metres. Deeper data will be cropped away.\n", + " Default is `70`.\n", + " num_windows_per_transect : int\n", + " Number of windows to extract for each transect. Start indices for the\n", + " windows will be equally spaced across the total width of the transect.\n", + " If this is `0`, the number of windows will be inferred automatically\n", + " based on `window_len` and the total width of the transect, resulting\n", + " in a different number of windows for each transect. Default is `0`.\n", + " use_dynamic_offsets : bool\n", + " Whether starting indices for each window should be randomly offset.\n", + " Set to `True` for training and `False` for testing. Default is `True`.\n", + " transform_pre : callable\n", + " Operations to perform to the dictionary containing a single sample.\n", + " These are performed before generating the masks. Default is `None`.\n", + " transform_post : callable\n", + " Operations to perform to the dictionary containing a single sample.\n", + " These are performed after generating the masks. Default is `None`.\n", + " \"\"\"\n", + " super(TransectDataset, self).__init__()\n", + " self.window_len = window_len\n", + " self.crop_depth = crop_depth\n", + " self.num_windows = num_windows_per_transect\n", + " self.use_dynamic_offsets = use_dynamic_offsets\n", + " self.transform_pre = transform_pre\n", + " self.transform_post = transform_post\n", + "\n", + " self.datapoints = []\n", + "\n", + " for transect_path in transect_paths:\n", + " # Lookup the number of rows in the transect\n", + " # Load the sharding metadata\n", + " with open(os.path.join(transect_path, \"shard_size.txt\"), \"r\") as f:\n", + " n_timestamps, shard_len = f.readline().strip().split(\",\")\n", + " n_timestamps = int(n_timestamps)\n", + " # Generate an array for window centers within the transect\n", + " # - if this is for training, we want to randomise the offsets\n", + " # - if this is for validation, we want stable windows\n", + " num_windows = self.num_windows\n", + " if self.num_windows is None or self.num_windows == 0:\n", + " # Load enough windows to include all datapoints\n", + " num_windows = int(np.ceil(n_timestamps / self.window_len))\n", + " centers = np.linspace(0, n_timestamps, num_windows + 1)[:num_windows]\n", + " if len(centers) > 1:\n", + " max_dy_offset = centers[1] - centers[0]\n", + " else:\n", + " max_dy_offset = n_timestamps\n", + " if self.use_dynamic_offsets:\n", + " centers += np.random.rand() * max_dy_offset\n", + " else:\n", + " centers += max_dy_offset / 2\n", + " centers = np.round(centers)\n", + " # Add each (transect, center) to the list for this epoch\n", + " for center_idx in centers:\n", + " self.datapoints.append((transect_path, int(center_idx)))\n", + "\n", + " def __getitem__(self, index):\n", + " transect_pth, center_idx = self.datapoints[index]\n", + " # Load data from shards\n", + " (\n", + " timestamps,\n", + " depths,\n", + " signals,\n", + " d_top,\n", + " d_bot,\n", + " ) = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", + " transect_pth,\n", + " center_idx - int(self.window_len / 2),\n", + " center_idx - int(self.window_len / 2) + self.window_len,\n", + " )\n", + " sample = {\n", + " \"timestamps\": timestamps,\n", + " \"depths\": depths,\n", + " \"signals\": signals,\n", + " \"d_top\": d_top,\n", + " \"d_bot\": d_bot,\n", + " }\n", + " if self.transform_pre is not None:\n", + " sample = self.transform_pre(sample)\n", + " # Apply depth crop\n", + " depth_crop_mask = sample[\"depths\"] <= self.crop_depth\n", + " sample[\"depths\"] = sample[\"depths\"][depth_crop_mask]\n", + " sample[\"signals\"] = sample[\"signals\"][:, depth_crop_mask]\n", + " # Convert lines to masks\n", + " ddepths = np.broadcast_to(sample[\"depths\"], sample[\"signals\"].shape)\n", + " mask_top = np.single(ddepths < np.expand_dims(sample[\"d_top\"], -1))\n", + " mask_bot = np.single(ddepths > np.expand_dims(sample[\"d_bot\"], -1))\n", + " sample[\"mask_top\"] = mask_top\n", + " sample[\"mask_bot\"] = mask_bot\n", + " sample[\"r_top\"] = sample[\"d_top\"] / abs(\n", + " sample[\"depths\"][-1] - sample[\"depths\"][0]\n", + " )\n", + " sample[\"r_bot\"] = sample[\"d_bot\"] / abs(\n", + " sample[\"depths\"][-1] - sample[\"depths\"][0]\n", + " )\n", + " if self.transform_post is not None:\n", + " sample = self.transform_post(sample)\n", + " return sample\n", + "\n", + " def __len__(self):\n", + " return len(self.datapoints)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_paths = [\n", + " os.path.join(ROOT_DATA_DIR, \"surveyExports_sharded/Survey17/Survey17_GR1_S3W_F\")\n", + "] * 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = TransectDataset(transect_paths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample = dataset[0]\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"signals\"])\n", + "plt.show()\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"mask_top\"])\n", + "plt.show()\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"mask_bot\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample[\"signals\"].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loader = torch.utils.data.DataLoader(dataset, batch_size=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sample in loader:\n", + " print(sample[\"signals\"].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import skimage.transform" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class Rescale(object):\n", + " \"\"\"\n", + " Rescale the image(s) in a sample to a given size.\n", + "\n", + " Parameters\n", + " ----------\n", + " output_size : tuple or int\n", + " Desired output size. If tuple, output is matched to output_size. If int,\n", + " output is square.\n", + " \"\"\"\n", + "\n", + " def __init__(self, output_size):\n", + " assert isinstance(output_size, (int, tuple))\n", + " if isinstance(output_size, int):\n", + " output_size = (output_size, output_size)\n", + " self.output_size = output_size\n", + "\n", + " def __call__(self, sample):\n", + "\n", + " for key in (\"signals\", \"mask_top\", \"mask_bot\"):\n", + " if key in sample:\n", + " sample[key] = skimage.transform.resize(\n", + " sample[key],\n", + " self.output_size,\n", + " clip=False,\n", + " preserve_range=False,\n", + " )\n", + "\n", + " return sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class Normalize(object):\n", + " \"\"\"\n", + " Normalize mean and standard deviation of image.\n", + "\n", + " Note that changes are made inplace.\n", + "\n", + " Parameters\n", + " ----------\n", + " mean : float\n", + " Expected sample pixel mean.\n", + " stdev : float\n", + " Expected sample standard deviation of pixel intensities.\n", + " \"\"\"\n", + "\n", + " def __init__(self, mean, stdev):\n", + " self.mean = mean\n", + " self.stdev = stdev\n", + "\n", + " def __call__(self, sample):\n", + "\n", + " sample[\"signals\"] -= self.mean\n", + " sample[\"signals\"] /= self.stdev\n", + "\n", + " return sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class RandomReflection(object):\n", + " \"\"\"\n", + " Randomly reflect a sample.\n", + "\n", + " Parameters\n", + " ----------\n", + " axis : int, optional\n", + " Axis to reflect. Default is 0.\n", + " p : float, optional\n", + " Probability of reflection. Default is 0.5.\n", + " \"\"\"\n", + "\n", + " def __init__(self, axis=0, p=0.5):\n", + " self.axis = axis\n", + " self.p = p\n", + "\n", + " def __call__(self, sample):\n", + "\n", + " if random.random() > self.p:\n", + " # Nothing to do\n", + " return sample\n", + "\n", + " # Reflect x co-ordinates\n", + " sample[\"timestamps\"] = sample[\"timestamps\"][::-1]\n", + "\n", + " # Reflect data\n", + " for key in (\"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n", + " if key in sample:\n", + " sample[key] = np.flip(sample[key], self.axis)\n", + "\n", + " return sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class RandomStretchDepth(object):\n", + " \"\"\"\n", + " Rescale a set of images in a sample to a given size.\n", + "\n", + " Note that this transform doesn't change images, just the `depth`, `d_top`, and `d_bot`.\n", + " Note that changes are made inplace.\n", + "\n", + " Parameters\n", + " ----------\n", + " max_factor : float\n", + " Maximum stretch factor. A number between `[1, 1 + max_factor]` will be generated,\n", + " and the depth will either be divided or multiplied by the generated stretch\n", + " factor.\n", + " expected_bottom_gap : float\n", + " Expected gap between actual ocean floor and target bottom line.\n", + " \"\"\"\n", + "\n", + " def __init__(self, max_factor, expected_bottom_gap=1):\n", + " self.max_factor = max_factor\n", + " self.expected_bottom_gap = expected_bottom_gap\n", + "\n", + " def __call__(self, sample):\n", + "\n", + " factor = random.uniform(1.0, 1.0 + self.max_factor)\n", + "\n", + " if random.random() > 0.5:\n", + " factor = 1.0 / factor\n", + "\n", + " sample[\"d_bot\"] += self.expected_bottom_gap\n", + " for key in (\"depths\", \"d_top\", \"d_bot\"):\n", + " sample[key] *= factor\n", + " sample[\"d_bot\"] -= self.expected_bottom_gap\n", + "\n", + " return sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class RandomCropWidth(object):\n", + " \"\"\"\n", + " Randomly crop a sample in the width dimension.\n", + "\n", + " Parameters\n", + " ----------\n", + " max_crop_fraction : float\n", + " Maximum amount of material to crop away, as a fraction of the total width.\n", + " The `crop_fraction` will be sampled uniformly from the range\n", + " `[0, max_crop_fraction]`. The crop is always centred.\n", + " \"\"\"\n", + "\n", + " def __init__(self, max_crop_fraction):\n", + " self.max_crop_fraction = max_crop_fraction\n", + "\n", + " def __call__(self, sample):\n", + "\n", + " width = sample[\"signals\"].shape[0]\n", + "\n", + " crop_fraction = random.uniform(0.0, self.max_crop_fraction)\n", + " crop_amount = crop_fraction * width\n", + "\n", + " lft = int(crop_amount / 2)\n", + " rgt = lft + width - int(crop_amount)\n", + "\n", + " # Crop data\n", + " for key in (\"timestamps\", \"signals\", \"d_top\", \"d_bot\", \"mask_top\", \"mask_bot\"):\n", + " if key in sample:\n", + " sample[key] = sample[key][lft:rgt]\n", + "\n", + " return sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class ColorJitter(object):\n", + " \"\"\"\n", + " Randomly change the brightness and contrast of a normalized image.\n", + "\n", + " Note that changes are made inplace.\n", + "\n", + " Parameters\n", + " ----------\n", + " brightness : float or tuple of float (min, max)\n", + " How much to jitter brightness. `brightness_factor` is chosen uniformly from\n", + " `[-brightness, brightness]`\n", + " or the given `[min, max]`. `brightness_factor` is then added to the image.\n", + " contrast : (float or tuple of float (min, max))\n", + " How much to jitter contrast. `contrast_factor` is chosen uniformly from\n", + " `[max(0, 1 - contrast), 1 + contrast]`\n", + " or the given `[min, max]`. Should be non negative numbers.\n", + " \"\"\"\n", + "\n", + " def __init__(self, brightness=0, contrast=0):\n", + " self.brightness = self._check_input(\n", + " brightness,\n", + " \"brightness\",\n", + " center=0,\n", + " bound=(float(\"-inf\"), float(\"inf\")),\n", + " clip_first_on_zero=False,\n", + " )\n", + " self.contrast = self._check_input(contrast, \"contrast\")\n", + "\n", + " def _check_input(\n", + " self, value, name, center=1, bound=(0, float(\"inf\")), clip_first_on_zero=True\n", + " ):\n", + " if isinstance(value, (float, int)):\n", + " if value < 0:\n", + " raise ValueError(\n", + " \"If {} is a single number, it must be non negative.\".format(name)\n", + " )\n", + " value = [center - value, center + value]\n", + " if clip_first_on_zero:\n", + " value[0] = max(value[0], 0)\n", + " elif isinstance(value, (tuple, list)) and len(value) == 2:\n", + " if not bound[0] <= value[0] <= value[1] <= bound[1]:\n", + " raise ValueError(\"{} values should be between {}\".format(name, bound))\n", + " else:\n", + " raise TypeError(\n", + " \"{} should be a single number or a list/tuple with length 2.\".format(\n", + " name\n", + " )\n", + " )\n", + "\n", + " if value[0] == value[1] == center:\n", + " value = None\n", + " return value\n", + "\n", + " def __call__(self, sample):\n", + " init_op = random.randint(0, 1)\n", + " for i_op in range(2):\n", + " op_num = (init_op + i_op) % 2\n", + " if op_num == 0 and self.brightness is not None:\n", + " brightness_factor = random.uniform(\n", + " self.brightness[0], self.brightness[1]\n", + " )\n", + " sample[\"signals\"] += brightness_factor\n", + " elif op_num == 1 and self.contrast is not None:\n", + " contrast_factor = random.uniform(self.contrast[0], self.contrast[1])\n", + " sample[\"signals\"] *= contrast_factor\n", + " return sample\n", + "\n", + " def __repr__(self):\n", + " format_string = self.__class__.__name__ + \"(\"\n", + " format_string += \"brightness={0}\".format(self.brightness)\n", + " format_string += \", contrast={0})\".format(self.contrast)\n", + " format_string += \")\"\n", + " return format_string" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.transforms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_transform_pre = torchvision.transforms.Compose(\n", + " [\n", + " RandomCropWidth(0.5),\n", + " RandomStretchDepth(0.5),\n", + " RandomReflection(),\n", + " ]\n", + ")\n", + "train_transform_post = torchvision.transforms.Compose(\n", + " [\n", + " Rescale((128, 512)),\n", + " Normalize(-70, 22),\n", + " ColorJitter(0.5, 0.3),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_train = TransectDataset(\n", + " transect_paths,\n", + " window_len=192,\n", + " crop_depth=70,\n", + " num_windows_per_transect=10,\n", + " use_dynamic_offsets=True,\n", + " transform_pre=train_transform_pre,\n", + " transform_post=train_transform_post,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample = dataset_train[0]\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n", + " -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n", + " sample[\"signals\"].T,\n", + ")\n", + "plt.plot(\n", + " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n", + " -sample[\"d_bot\"],\n", + " \"b\",\n", + ")\n", + "plt.plot(\n", + " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n", + " -sample[\"d_top\"],\n", + " \"c\",\n", + ")\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"signals\"])\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"mask_top\"])\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"mask_bot\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample[\"r_top\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample[\"r_bot\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "val_transform = torchvision.transforms.Compose(\n", + " [\n", + " Rescale((128, 512)),\n", + " Normalize(-70, 22),\n", + " ]\n", + ")\n", + "\n", + "dataset_val = TransectDataset(\n", + " transect_paths,\n", + " window_len=128,\n", + " crop_depth=70,\n", + " num_windows_per_transect=20,\n", + " use_dynamic_offsets=False,\n", + " transform_post=val_transform,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample = dataset_val[0]\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"signals\"].shape[0]),\n", + " -np.linspace(sample[\"depths\"][0], sample[\"depths\"][-1], sample[\"signals\"].shape[1]),\n", + " sample[\"signals\"].T,\n", + ")\n", + "plt.plot(\n", + " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_bot\"].shape[0]),\n", + " -sample[\"d_bot\"],\n", + " \"b\",\n", + ")\n", + "plt.plot(\n", + " np.linspace(*sample[\"timestamps\"][[0, -1]], sample[\"d_top\"].shape[0]),\n", + " -sample[\"d_top\"],\n", + " \"c\",\n", + ")\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"signals\"])\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"mask_top\"])\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.imshow(sample[\"mask_bot\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_val.datapoints" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Building Data Loader.ipynb b/notebooks/Building Data Loader.ipynb index 5bd832dc..4f8c9dde 100644 --- a/notebooks/Building Data Loader.ipynb +++ b/notebooks/Building Data Loader.ipynb @@ -1,599 +1,599 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import csv\n", - "import datetime\n", - "import os\n", - "from collections import OrderedDict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import pandas as pd\n", - "import numpy as np\n", - "import torch" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_dir = \"/media/scott/scratch/Datasets/dsforce/\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fname = os.path.join(root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_Sv_raw.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# df = pd.read_csv(fname)\n", - "#\n", - "# Can't use pandas because of inconsistent columns. Attempting to do so generates this error:\n", - "#\n", - "# ParserError: Error tokenizing data. C error: Expected 2544 fields in line 3, saw 5977" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SURVEY_FIELD_TYPES = {\n", - " \"Ping_index\": int,\n", - " \"Distance_gps\": float,\n", - " \"Distance_vl\": float,\n", - " \"Ping_date\": str,\n", - " \"Ping_time\": str,\n", - " \"Ping_milliseconds\": float,\n", - " \"Latitude\": float,\n", - " \"Longitude\": float,\n", - " \"Depth_start\": float,\n", - " \"Depth_stop\": float,\n", - " \"Range_start\": float,\n", - " \"Range_stop\": float,\n", - " \"Sample_count\": int,\n", - "}\n", - "\n", - "\n", - "def survey_reader(fname):\n", - " \"\"\"\n", - " Creates a generator which iterates through a survey csv file.\n", - "\n", - " Parameters\n", - " ----------\n", - " fname: str\n", - " Path to survey CSV file.\n", - "\n", - " Returns\n", - " -------\n", - " generator\n", - " Yields a tupule of `(metadata, data)`, where metadata is a dict,\n", - " and data is a `numpy.ndarray`. Each yield corresponds to a single\n", - " row in the data. Every row (except for the header) is yielded.\n", - " \"\"\"\n", - " metadata_header = []\n", - " with open(fname, \"r\", encoding=\"utf-8-sig\") as hf:\n", - " for i_row, row in enumerate(csv.reader(hf)):\n", - " row = [entry.strip() for entry in row]\n", - " if i_row == 0:\n", - " metadata_header = row\n", - " continue\n", - " metadata = row[: len(metadata_header)]\n", - " metadata_d = OrderedDict()\n", - " for k, v in zip(metadata_header, metadata):\n", - " if k in SURVEY_FIELD_TYPES:\n", - " metadata_d[k] = SURVEY_FIELD_TYPES[k](v)\n", - " else:\n", - " metadata_d[k] = v\n", - " data = np.array([float(x) for x in row[len(metadata_header) :]])\n", - " yield metadata_d, data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def count_lines(filename):\n", - " \"\"\"\n", - " Count the number of lines in a file.\n", - "\n", - " Credit: https://stackoverflow.com/a/27518377\n", - "\n", - " Parameters\n", - " ----------\n", - " filename : str\n", - " Path to file.\n", - "\n", - " Returns\n", - " int\n", - " Number of lines in file.\n", - " \"\"\"\n", - " f = open(filename)\n", - " lines = 0\n", - " buf_size = 1024 * 1024\n", - " read_f = f.read # loop optimization\n", - "\n", - " buf = read_f(buf_size)\n", - " while buf:\n", - " lines += buf.count(\"\\n\")\n", - " buf = read_f(buf_size)\n", - "\n", - " return lines" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for meta, data in survey_reader(fname):\n", - " print(meta, data)\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "count_lines(fname)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def survey_loader(fname, skip_lines=1, warn_row_overflow=True):\n", - " \"\"\"\n", - " Loads an entire survey CSV.\n", - "\n", - " Parameters\n", - " ----------\n", - " fname : str\n", - " Path to survey CSV file.\n", - " skip_lines : int, optional\n", - " Number of initial entries to skip. Default is 1.\n", - "\n", - " Returns\n", - " -------\n", - " numpy.ndarray\n", - " Timestamps for each row, in seconds. Note: not corrected for timezone.\n", - " numpy.ndarray\n", - " Depth of each column, in metres.\n", - " numpy.ndarray\n", - " Survey signal (echo strength, units unknown).\n", - " \"\"\"\n", - "\n", - " # We remove one from the line count because of the header\n", - " # which is excluded from output\n", - " n_lines = count_lines(fname) - 1\n", - " # n_distances = 0\n", - " depth_start = None\n", - " depth_stop = None\n", - "\n", - " # Initialise output array\n", - " for i_line, (meta, row) in enumerate(survey_reader(fname)):\n", - " if i_line < skip_lines:\n", - " continue\n", - " n_depths = len(row)\n", - " depth_start = meta[\"Depth_start\"]\n", - " depth_stop = meta[\"Depth_stop\"]\n", - " break\n", - "\n", - " data = np.empty((n_lines - skip_lines, n_depths))\n", - " timestamps = np.empty((n_lines - skip_lines))\n", - " depths = np.linspace(depth_start, depth_stop, n_depths)\n", - "\n", - " for i_line, (meta, row) in enumerate(survey_reader(fname)):\n", - " if i_line < skip_lines:\n", - " continue\n", - " i_entry = i_line - skip_lines\n", - " if warn_row_overflow and len(row) > n_depths:\n", - " print(\n", - " \"Row {} of {} exceeds expected n_depths of {} with {}\".format(\n", - " i_line, fname, n_depths, len(row)\n", - " )\n", - " )\n", - " data[i_entry, :] = row[:n_depths]\n", - " timestamps[i_entry] = datetime.datetime.strptime(\n", - " \"{}T{}.{:06d}\".format(\n", - " meta[\"Ping_date\"],\n", - " meta[\"Ping_time\"],\n", - " int(1000 * float(meta[\"Ping_milliseconds\"])),\n", - " ),\n", - " \"%Y-%m-%dT%H:%M:%S.%f\",\n", - " ).timestamp()\n", - "\n", - " # Turn NaNs into NaNs (instead of extremely negative number)\n", - " data[data < -1e6] = np.nan\n", - "\n", - " return timestamps, depths, data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fname2 = os.path.join(\n", - " root_dir, \"surveyExports\", \"Survey03/Survey03_GR2_S1A_survey3_Sv_raw.csv\"\n", - ")\n", - "timestamps, depths, signals = survey_loader(fname2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps, depths, signals = survey_loader(fname)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "depths" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "signals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(signals)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.imshow(signals)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "signals.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.prod(signals.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(np.reshape(signals, -1), bins=100, density=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(20, 20))\n", - "plt.imshow(signals.T)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps, -depths, signals.T)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def evl_reader(fname):\n", - " \"\"\"\n", - " EVL file reader\n", - "\n", - " Parameters\n", - " ----------\n", - " fname : str\n", - " Path to .evl file.\n", - "\n", - " Returns\n", - " -------\n", - " generator\n", - " A generator which yields the timestamp (in seconds) and depth (in metres)\n", - " for each entry. Note that the timestamp is not corrected for timezone\n", - " (so make sure your timezones are internally consistent).\n", - " \"\"\"\n", - " with open(fname, \"r\") as hf:\n", - " continuance = True\n", - " for i_row, row in enumerate(csv.reader(hf, delimiter=\" \")):\n", - " if i_row == 0:\n", - " continue\n", - " if len(row) < 4:\n", - " if not continuance:\n", - " raise ValueError(\"Trying to skip data after parsing began\")\n", - " continue\n", - " continuance = False\n", - "\n", - " timestamp = datetime.datetime.strptime(\n", - " row[0] + \"T\" + row[1],\n", - " \"%Y%m%dT%H%M%S%f\",\n", - " ).timestamp()\n", - "\n", - " if len(row[2]) > 0:\n", - " raise ValueError(\"row[2] was non-empty: {}\".format(row[2]))\n", - "\n", - " yield timestamp, float(row[3])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def evl_loader(fname):\n", - " \"\"\"\n", - " EVL file loader\n", - "\n", - " Parameters\n", - " ----------\n", - " fname : str\n", - " Path to .evl file.\n", - "\n", - " Returns\n", - " -------\n", - " numpy.ndarray\n", - " Timestamps, in seconds.\n", - " numpy.ndarary\n", - " Depth, in metres.\n", - " \"\"\"\n", - " timestamps = []\n", - " values = []\n", - " for timestamp, value in evl_reader(fname):\n", - " timestamps.append(timestamp)\n", - " values.append(value)\n", - " return np.array(timestamps), np.array(values)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bottom_fname = os.path.join(\n", - " root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_bottom.evl\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for t, v in evl_reader(bottom_fname):\n", - " print(t, v)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "evl_loader(bottom_fname)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_fname = os.path.join(\n", - " root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_turbulence.evl\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "evl_loader(top_fname)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "\n", - "plt.pcolormesh(timestamps, -depths, signals.T)\n", - "\n", - "t_bottom, d_bottom = evl_loader(bottom_fname)\n", - "t_top, d_top = evl_loader(top_fname)\n", - "\n", - "plt.plot(t_bottom, -d_bottom, \"b\")\n", - "plt.plot(t_top, -d_top, \"c\")\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_transect_data(survey, transect_name, root_dir):\n", - "\n", - " dirname = os.path.join(root_dir, \"surveyExports\", \"Survey\" + str(survey))\n", - " raw_fname = os.path.join(\n", - " dirname, \"Survey{}_{}_Sv_raw.csv\".format(survey, transect_name)\n", - " )\n", - " bot_fname = os.path.join(\n", - " dirname, \"Survey{}_{}_bottom.evl\".format(survey, transect_name)\n", - " )\n", - " top_fname = os.path.join(\n", - " dirname, \"Survey{}_{}_turbulence.evl\".format(survey, transect_name)\n", - " )\n", - "\n", - " timestamps, depths, signals = survey_loader(raw_fname)\n", - " t_bottom, d_bottom = evl_loader(bot_fname)\n", - " t_top, d_top = evl_loader(top_fname)\n", - "\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(timestamps, -depths, signals.T)\n", - " plt.plot(t_bottom, -d_bottom, \"b\")\n", - " plt.plot(t_top, -d_top, \"c\")\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "survey = 17\n", - "transect_name = \"GR1_N0A_E\"\n", - "\n", - "plot_transect_data(survey, transect_name, root_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "survey = 17\n", - "transect_name = \"GR1_N2W_E\"\n", - "\n", - "plot_transect_data(survey, transect_name, root_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "survey = 17\n", - "transect_name = \"GR1_N3A_F\"\n", - "\n", - "plot_transect_data(survey, transect_name, root_dir)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "import datetime\n", + "import os\n", + "from collections import OrderedDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import pandas as pd\n", + "import numpy as np\n", + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_dir = \"/media/scott/scratch/Datasets/dsforce/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fname = os.path.join(root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_Sv_raw.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df = pd.read_csv(fname)\n", + "#\n", + "# Can't use pandas because of inconsistent columns. Attempting to do so generates this error:\n", + "#\n", + "# ParserError: Error tokenizing data. C error: Expected 2544 fields in line 3, saw 5977" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SURVEY_FIELD_TYPES = {\n", + " \"Ping_index\": int,\n", + " \"Distance_gps\": float,\n", + " \"Distance_vl\": float,\n", + " \"Ping_date\": str,\n", + " \"Ping_time\": str,\n", + " \"Ping_milliseconds\": float,\n", + " \"Latitude\": float,\n", + " \"Longitude\": float,\n", + " \"Depth_start\": float,\n", + " \"Depth_stop\": float,\n", + " \"Range_start\": float,\n", + " \"Range_stop\": float,\n", + " \"Sample_count\": int,\n", + "}\n", + "\n", + "\n", + "def survey_reader(fname):\n", + " \"\"\"\n", + " Creates a generator which iterates through a survey csv file.\n", + "\n", + " Parameters\n", + " ----------\n", + " fname: str\n", + " Path to survey CSV file.\n", + "\n", + " Returns\n", + " -------\n", + " generator\n", + " Yields a tupule of `(metadata, data)`, where metadata is a dict,\n", + " and data is a `numpy.ndarray`. Each yield corresponds to a single\n", + " row in the data. Every row (except for the header) is yielded.\n", + " \"\"\"\n", + " metadata_header = []\n", + " with open(fname, \"r\", encoding=\"utf-8-sig\") as hf:\n", + " for i_row, row in enumerate(csv.reader(hf)):\n", + " row = [entry.strip() for entry in row]\n", + " if i_row == 0:\n", + " metadata_header = row\n", + " continue\n", + " metadata = row[: len(metadata_header)]\n", + " metadata_d = OrderedDict()\n", + " for k, v in zip(metadata_header, metadata):\n", + " if k in SURVEY_FIELD_TYPES:\n", + " metadata_d[k] = SURVEY_FIELD_TYPES[k](v)\n", + " else:\n", + " metadata_d[k] = v\n", + " data = np.array([float(x) for x in row[len(metadata_header) :]])\n", + " yield metadata_d, data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def count_lines(filename):\n", + " \"\"\"\n", + " Count the number of lines in a file.\n", + "\n", + " Credit: https://stackoverflow.com/a/27518377\n", + "\n", + " Parameters\n", + " ----------\n", + " filename : str\n", + " Path to file.\n", + "\n", + " Returns\n", + " int\n", + " Number of lines in file.\n", + " \"\"\"\n", + " f = open(filename)\n", + " lines = 0\n", + " buf_size = 1024 * 1024\n", + " read_f = f.read # loop optimization\n", + "\n", + " buf = read_f(buf_size)\n", + " while buf:\n", + " lines += buf.count(\"\\n\")\n", + " buf = read_f(buf_size)\n", + "\n", + " return lines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for meta, data in survey_reader(fname):\n", + " print(meta, data)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "count_lines(fname)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def survey_loader(fname, skip_lines=1, warn_row_overflow=True):\n", + " \"\"\"\n", + " Loads an entire survey CSV.\n", + "\n", + " Parameters\n", + " ----------\n", + " fname : str\n", + " Path to survey CSV file.\n", + " skip_lines : int, optional\n", + " Number of initial entries to skip. Default is 1.\n", + "\n", + " Returns\n", + " -------\n", + " numpy.ndarray\n", + " Timestamps for each row, in seconds. Note: not corrected for timezone.\n", + " numpy.ndarray\n", + " Depth of each column, in metres.\n", + " numpy.ndarray\n", + " Survey signal (echo strength, units unknown).\n", + " \"\"\"\n", + "\n", + " # We remove one from the line count because of the header\n", + " # which is excluded from output\n", + " n_lines = count_lines(fname) - 1\n", + " # n_distances = 0\n", + " depth_start = None\n", + " depth_stop = None\n", + "\n", + " # Initialise output array\n", + " for i_line, (meta, row) in enumerate(survey_reader(fname)):\n", + " if i_line < skip_lines:\n", + " continue\n", + " n_depths = len(row)\n", + " depth_start = meta[\"Depth_start\"]\n", + " depth_stop = meta[\"Depth_stop\"]\n", + " break\n", + "\n", + " data = np.empty((n_lines - skip_lines, n_depths))\n", + " timestamps = np.empty((n_lines - skip_lines))\n", + " depths = np.linspace(depth_start, depth_stop, n_depths)\n", + "\n", + " for i_line, (meta, row) in enumerate(survey_reader(fname)):\n", + " if i_line < skip_lines:\n", + " continue\n", + " i_entry = i_line - skip_lines\n", + " if warn_row_overflow and len(row) > n_depths:\n", + " print(\n", + " \"Row {} of {} exceeds expected n_depths of {} with {}\".format(\n", + " i_line, fname, n_depths, len(row)\n", + " )\n", + " )\n", + " data[i_entry, :] = row[:n_depths]\n", + " timestamps[i_entry] = datetime.datetime.strptime(\n", + " \"{}T{}.{:06d}\".format(\n", + " meta[\"Ping_date\"],\n", + " meta[\"Ping_time\"],\n", + " int(1000 * float(meta[\"Ping_milliseconds\"])),\n", + " ),\n", + " \"%Y-%m-%dT%H:%M:%S.%f\",\n", + " ).timestamp()\n", + "\n", + " # Turn NaNs into NaNs (instead of extremely negative number)\n", + " data[data < -1e6] = np.nan\n", + "\n", + " return timestamps, depths, data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fname2 = os.path.join(\n", + " root_dir, \"surveyExports\", \"Survey03/Survey03_GR2_S1A_survey3_Sv_raw.csv\"\n", + ")\n", + "timestamps, depths, signals = survey_loader(fname2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps, depths, signals = survey_loader(fname)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "depths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(signals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "signals.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.prod(signals.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(np.reshape(signals, -1), bins=100, density=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 20))\n", + "plt.imshow(signals.T)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps, -depths, signals.T)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def evl_reader(fname):\n", + " \"\"\"\n", + " EVL file reader\n", + "\n", + " Parameters\n", + " ----------\n", + " fname : str\n", + " Path to .evl file.\n", + "\n", + " Returns\n", + " -------\n", + " generator\n", + " A generator which yields the timestamp (in seconds) and depth (in metres)\n", + " for each entry. Note that the timestamp is not corrected for timezone\n", + " (so make sure your timezones are internally consistent).\n", + " \"\"\"\n", + " with open(fname, \"r\") as hf:\n", + " continuance = True\n", + " for i_row, row in enumerate(csv.reader(hf, delimiter=\" \")):\n", + " if i_row == 0:\n", + " continue\n", + " if len(row) < 4:\n", + " if not continuance:\n", + " raise ValueError(\"Trying to skip data after parsing began\")\n", + " continue\n", + " continuance = False\n", + "\n", + " timestamp = datetime.datetime.strptime(\n", + " row[0] + \"T\" + row[1],\n", + " \"%Y%m%dT%H%M%S%f\",\n", + " ).timestamp()\n", + "\n", + " if len(row[2]) > 0:\n", + " raise ValueError(\"row[2] was non-empty: {}\".format(row[2]))\n", + "\n", + " yield timestamp, float(row[3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def evl_loader(fname):\n", + " \"\"\"\n", + " EVL file loader\n", + "\n", + " Parameters\n", + " ----------\n", + " fname : str\n", + " Path to .evl file.\n", + "\n", + " Returns\n", + " -------\n", + " numpy.ndarray\n", + " Timestamps, in seconds.\n", + " numpy.ndarary\n", + " Depth, in metres.\n", + " \"\"\"\n", + " timestamps = []\n", + " values = []\n", + " for timestamp, value in evl_reader(fname):\n", + " timestamps.append(timestamp)\n", + " values.append(value)\n", + " return np.array(timestamps), np.array(values)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bottom_fname = os.path.join(\n", + " root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_bottom.evl\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for t, v in evl_reader(bottom_fname):\n", + " print(t, v)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evl_loader(bottom_fname)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_fname = os.path.join(\n", + " root_dir, \"surveyExports/Survey17/Survey17_GR1_N0A_E_turbulence.evl\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evl_loader(top_fname)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "\n", + "plt.pcolormesh(timestamps, -depths, signals.T)\n", + "\n", + "t_bottom, d_bottom = evl_loader(bottom_fname)\n", + "t_top, d_top = evl_loader(top_fname)\n", + "\n", + "plt.plot(t_bottom, -d_bottom, \"b\")\n", + "plt.plot(t_top, -d_top, \"c\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_transect_data(survey, transect_name, root_dir):\n", + "\n", + " dirname = os.path.join(root_dir, \"surveyExports\", \"Survey\" + str(survey))\n", + " raw_fname = os.path.join(\n", + " dirname, \"Survey{}_{}_Sv_raw.csv\".format(survey, transect_name)\n", + " )\n", + " bot_fname = os.path.join(\n", + " dirname, \"Survey{}_{}_bottom.evl\".format(survey, transect_name)\n", + " )\n", + " top_fname = os.path.join(\n", + " dirname, \"Survey{}_{}_turbulence.evl\".format(survey, transect_name)\n", + " )\n", + "\n", + " timestamps, depths, signals = survey_loader(raw_fname)\n", + " t_bottom, d_bottom = evl_loader(bot_fname)\n", + " t_top, d_top = evl_loader(top_fname)\n", + "\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(timestamps, -depths, signals.T)\n", + " plt.plot(t_bottom, -d_bottom, \"b\")\n", + " plt.plot(t_top, -d_top, \"c\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "survey = 17\n", + "transect_name = \"GR1_N0A_E\"\n", + "\n", + "plot_transect_data(survey, transect_name, root_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "survey = 17\n", + "transect_name = \"GR1_N2W_E\"\n", + "\n", + "plot_transect_data(survey, transect_name, root_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "survey = 17\n", + "transect_name = \"GR1_N3A_F\"\n", + "\n", + "plot_transect_data(survey, transect_name, root_dir)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Completely decomposing mask.ipynb b/notebooks/Completely decomposing mask.ipynb index 40fe328a..2d2d5073 100644 --- a/notebooks/Completely decomposing mask.ipynb +++ b/notebooks/Completely decomposing mask.ipynb @@ -1,531 +1,531 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00dd00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "\n", + "# example with only passive period\n", + "# sample = 'mobile/Survey17/Survey17_GR4_T1W_E'\n", + "\n", + "# example with 1 passive period, 1 turbulence cut out\n", + "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n", + "\n", + "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", + "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n", + "\n", + "# example with passive, removed, and patches\n", + "sample = \"mobile/Survey16/Survey16_GR1_N3A_F\"\n", + "\n", + "# example with passive, removed, and patches\n", + "sample = \"mobile/Survey16/Survey16_GR3_N3A_F\"\n", + "\n", + "# example where passive detection went wrong\n", + "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n", + "\n", + "# Load raw data\n", + "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n", + "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n", + "\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", + "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", + "mask = ~np.isnan(signals_mskd)\n", + "\n", + "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n", + "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n", + "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n", + "if os.path.isfile(fname_top1):\n", + " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n", + "elif os.path.isfile(fname_top2):\n", + " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n", + "else:\n", + " t_top = d_top = None\n", + "if os.path.isfile(fname_bot):\n", + " t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n", + "else:\n", + " t_bot = d_bot = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " d_top_new,\n", + " d_bot_new,\n", + " passive_starts,\n", + " passive_ends,\n", + ") = echofilter.raw.manipulate.fixup_lines(\n", + " ts_raw,\n", + " depths_raw,\n", + " signals_raw,\n", + " mask,\n", + " t_top=t_top,\n", + " d_top=d_top,\n", + " t_bot=t_bot,\n", + " d_bot=d_bot,\n", + " return_passive_boundaries=True,\n", + ")\n", + "ts_new = ts_raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t_top[-1] - t_top[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_raw[-1] - ts_raw[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_mskd[-1] - ts_mskd[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(t_top, d_top)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "is_passive = np.zeros(ts_raw.shape, dtype=bool)\n", + "\n", + "for pass_start, pass_end in zip(passive_starts, passive_ends):\n", + " is_passive[pass_start:pass_end] = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allnan = np.all(np.isnan(signals_mskd), axis=1)\n", + "\n", + "is_removed = allnan & ~is_passive\n", + "\n", + "removed_starts = np.nonzero(np.diff(is_removed.astype(np.float)) > 0)[0]\n", + "removed_ends = np.nonzero(np.diff(is_removed.astype(np.float)) < 0)[0]\n", + "\n", + "if len(removed_starts) > 0:\n", + " removed_starts += 1\n", + "if len(removed_ends) > 0:\n", + " removed_ends += 1\n", + "\n", + "if len(removed_ends) > 0 and (\n", + " len(removed_starts) == 0 or removed_ends[0] < removed_starts[0]\n", + "):\n", + " removed_starts = np.concatenate(([0], removed_starts))\n", + "\n", + "if len(removed_starts) > 0 and (\n", + " len(removed_ends) == 0 or removed_starts[-1] > removed_ends[-1]\n", + "):\n", + " removed_ends = np.concatenate((removed_ends, [len(is_removed)]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot with time on x-axis\n", + "for ts, depths, signals in (\n", + " (ts_raw, depths_raw, signals_raw),\n", + " (ts_mskd, depths_mskd, signals_mskd),\n", + "):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(ts, depths, signals.T)\n", + " if d_top is not None:\n", + " li = t_top <= ts[-1]\n", + " eliminated_line_points = np.sum(~li)\n", + " if eliminated_line_points > 1:\n", + " print(\n", + " \"Removed {} point from top line which extend past signal recording\".format(\n", + " eliminated_line_points\n", + " )\n", + " )\n", + " plt.plot(t_top[li], d_top[li], turbulence_color)\n", + " if d_bot is not None:\n", + " plt.plot(t_bot, d_bot, bottom_color)\n", + " plt.gca().invert_yaxis()\n", + " plt.xlabel(\"Timestamp (s)\")\n", + " plt.ylabel(\"Depth (m)\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot with index on x-axis\n", + "for ts, depths, signals in (\n", + " (np.arange(signals_raw.shape[0]), depths_raw, signals_raw),\n", + " (np.arange(signals_raw.shape[0]), depths_mskd, signals_mskd),\n", + "):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(ts, depths, signals.T)\n", + " if d_top is not None:\n", + " plt.plot(ts, np.interp(ts_raw, t_top, d_top), turbulence_color)\n", + " if d_bot is not None:\n", + " plt.plot(ts, np.interp(ts_raw, t_bot, d_bot), bottom_color)\n", + " plt.gca().invert_yaxis()\n", + " plt.xlabel(\"Sample index\")\n", + " plt.ylabel(\"Depth (m)\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bad_mask = np.ones(signals_raw.shape, dtype=bool)\n", + "\n", + "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n", + "\n", + "is_top = ddepths < np.expand_dims(np.interp(ts_raw, t_top, d_top), -1)\n", + "bad_mask[is_top] = False\n", + "\n", + "if d_bot is not None:\n", + " is_bot = ddepths > np.expand_dims(np.interp(ts_raw, t_bot, d_bot), -1)\n", + " bad_mask[is_bot] = False\n", + "\n", + "signals_badly = copy.deepcopy(signals_raw)\n", + "signals_badly[~bad_mask] = np.NaN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, signals_badly.T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.title(\"Bad mask (old)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ts, depths, signals in (\n", + " (ts_raw, depths_raw, signals_raw),\n", + " (ts_mskd, depths_mskd, signals_mskd),\n", + "):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(ts, depths, signals.T)\n", + " plt.plot(ts_new, d_top_new, turbulence_color)\n", + " plt.plot(ts_new, d_bot_new, bottom_color)\n", + "\n", + " for r_start, r_end in zip(passive_starts, passive_ends):\n", + " plt.fill_between(\n", + " ts_raw[[r_start, r_end]],\n", + " depths_raw[[0, 0]],\n", + " depths_raw[[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=\"k\",\n", + " linewidth=0.0,\n", + " )\n", + "\n", + " for r_start, r_end in zip(removed_starts, removed_ends):\n", + " plt.fill_between(\n", + " ts_raw[[r_start, r_end]],\n", + " depths_raw[[0, 0]],\n", + " depths_raw[[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"\\\\\\\\\",\n", + " edgecolor=[0, 0, 1],\n", + " linewidth=0.0,\n", + " )\n", + "\n", + " plt.xlabel(\"Timestamp (s)\")\n", + " plt.ylabel(\"Depth (m)\")\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recompose mask from parts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_mask = np.ones(signals_raw.shape, dtype=bool)\n", + "\n", + "new_mask[is_passive, :] = False\n", + "new_mask[is_removed, :] = False\n", + "\n", + "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n", + "is_top = ddepths < np.expand_dims(np.nan_to_num(d_top_new), -1)\n", + "is_bot = ddepths > np.expand_dims(np.nan_to_num(d_bot_new), -1)\n", + "\n", + "new_mask[is_top] = False\n", + "new_mask[is_bot] = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.title(\"Raw data\")\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, signals_mskd.T)\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.title(\"Masked data\")\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, mask.T)\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.title(\"Original mask\")\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, new_mask.T)\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.title(\"Decomposed and recomposed mask\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check recomposed mask matches the original mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, (np.single(mask) - np.single(new_mask)).T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.nonzero(np.single(mask) - np.single(new_mask))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.max(np.single(mask) - np.single(new_mask))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.min(np.single(mask) - np.single(new_mask))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for idx in np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0]):\n", + " for data in (signals_raw, mask, new_mask):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5)],\n", + " depths_raw[:80],\n", + " data[max(0, idx - 4) : min(len(ts_raw), idx + 5), :80].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visually inspect starts and ends of removed segments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for idx in np.concatenate((passive_starts, passive_ends, removed_starts, removed_ends)):\n", + " for data in (signals_raw, mask, new_mask):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[idx - 4 : idx + 5], depths_raw[:500], data[idx - 4 : idx + 5, :500].T\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00dd00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "\n", - "# example with only passive period\n", - "# sample = 'mobile/Survey17/Survey17_GR4_T1W_E'\n", - "\n", - "# example with 1 passive period, 1 turbulence cut out\n", - "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n", - "\n", - "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", - "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n", - "\n", - "# example with passive, removed, and patches\n", - "sample = \"mobile/Survey16/Survey16_GR1_N3A_F\"\n", - "\n", - "# example with passive, removed, and patches\n", - "sample = \"mobile/Survey16/Survey16_GR3_N3A_F\"\n", - "\n", - "# example where passive detection went wrong\n", - "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n", - "\n", - "# Load raw data\n", - "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n", - "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n", - "\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", - "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", - "mask = ~np.isnan(signals_mskd)\n", - "\n", - "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n", - "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n", - "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n", - "if os.path.isfile(fname_top1):\n", - " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n", - "elif os.path.isfile(fname_top2):\n", - " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n", - "else:\n", - " t_top = d_top = None\n", - "if os.path.isfile(fname_bot):\n", - " t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n", - "else:\n", - " t_bot = d_bot = None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "(\n", - " d_top_new,\n", - " d_bot_new,\n", - " passive_starts,\n", - " passive_ends,\n", - ") = echofilter.raw.manipulate.fixup_lines(\n", - " ts_raw,\n", - " depths_raw,\n", - " signals_raw,\n", - " mask,\n", - " t_top=t_top,\n", - " d_top=d_top,\n", - " t_bot=t_bot,\n", - " d_bot=d_bot,\n", - " return_passive_boundaries=True,\n", - ")\n", - "ts_new = ts_raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t_top[-1] - t_top[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_raw[-1] - ts_raw[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_mskd[-1] - ts_mskd[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(t_top, d_top)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "is_passive = np.zeros(ts_raw.shape, dtype=bool)\n", - "\n", - "for pass_start, pass_end in zip(passive_starts, passive_ends):\n", - " is_passive[pass_start:pass_end] = True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "allnan = np.all(np.isnan(signals_mskd), axis=1)\n", - "\n", - "is_removed = allnan & ~is_passive\n", - "\n", - "removed_starts = np.nonzero(np.diff(is_removed.astype(np.float)) > 0)[0]\n", - "removed_ends = np.nonzero(np.diff(is_removed.astype(np.float)) < 0)[0]\n", - "\n", - "if len(removed_starts) > 0:\n", - " removed_starts += 1\n", - "if len(removed_ends) > 0:\n", - " removed_ends += 1\n", - "\n", - "if len(removed_ends) > 0 and (\n", - " len(removed_starts) == 0 or removed_ends[0] < removed_starts[0]\n", - "):\n", - " removed_starts = np.concatenate(([0], removed_starts))\n", - "\n", - "if len(removed_starts) > 0 and (\n", - " len(removed_ends) == 0 or removed_starts[-1] > removed_ends[-1]\n", - "):\n", - " removed_ends = np.concatenate((removed_ends, [len(is_removed)]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Plot with time on x-axis\n", - "for ts, depths, signals in (\n", - " (ts_raw, depths_raw, signals_raw),\n", - " (ts_mskd, depths_mskd, signals_mskd),\n", - "):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(ts, depths, signals.T)\n", - " if d_top is not None:\n", - " li = t_top <= ts[-1]\n", - " eliminated_line_points = np.sum(~li)\n", - " if eliminated_line_points > 1:\n", - " print(\n", - " \"Removed {} point from top line which extend past signal recording\".format(\n", - " eliminated_line_points\n", - " )\n", - " )\n", - " plt.plot(t_top[li], d_top[li], turbulence_color)\n", - " if d_bot is not None:\n", - " plt.plot(t_bot, d_bot, bottom_color)\n", - " plt.gca().invert_yaxis()\n", - " plt.xlabel(\"Timestamp (s)\")\n", - " plt.ylabel(\"Depth (m)\")\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Plot with index on x-axis\n", - "for ts, depths, signals in (\n", - " (np.arange(signals_raw.shape[0]), depths_raw, signals_raw),\n", - " (np.arange(signals_raw.shape[0]), depths_mskd, signals_mskd),\n", - "):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(ts, depths, signals.T)\n", - " if d_top is not None:\n", - " plt.plot(ts, np.interp(ts_raw, t_top, d_top), turbulence_color)\n", - " if d_bot is not None:\n", - " plt.plot(ts, np.interp(ts_raw, t_bot, d_bot), bottom_color)\n", - " plt.gca().invert_yaxis()\n", - " plt.xlabel(\"Sample index\")\n", - " plt.ylabel(\"Depth (m)\")\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bad_mask = np.ones(signals_raw.shape, dtype=bool)\n", - "\n", - "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n", - "\n", - "is_top = ddepths < np.expand_dims(np.interp(ts_raw, t_top, d_top), -1)\n", - "bad_mask[is_top] = False\n", - "\n", - "if d_bot is not None:\n", - " is_bot = ddepths > np.expand_dims(np.interp(ts_raw, t_bot, d_bot), -1)\n", - " bad_mask[is_bot] = False\n", - "\n", - "signals_badly = copy.deepcopy(signals_raw)\n", - "signals_badly[~bad_mask] = np.NaN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, signals_badly.T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.title(\"Bad mask (old)\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for ts, depths, signals in (\n", - " (ts_raw, depths_raw, signals_raw),\n", - " (ts_mskd, depths_mskd, signals_mskd),\n", - "):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(ts, depths, signals.T)\n", - " plt.plot(ts_new, d_top_new, turbulence_color)\n", - " plt.plot(ts_new, d_bot_new, bottom_color)\n", - "\n", - " for r_start, r_end in zip(passive_starts, passive_ends):\n", - " plt.fill_between(\n", - " ts_raw[[r_start, r_end]],\n", - " depths_raw[[0, 0]],\n", - " depths_raw[[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=\"k\",\n", - " linewidth=0.0,\n", - " )\n", - "\n", - " for r_start, r_end in zip(removed_starts, removed_ends):\n", - " plt.fill_between(\n", - " ts_raw[[r_start, r_end]],\n", - " depths_raw[[0, 0]],\n", - " depths_raw[[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"\\\\\\\\\",\n", - " edgecolor=[0, 0, 1],\n", - " linewidth=0.0,\n", - " )\n", - "\n", - " plt.xlabel(\"Timestamp (s)\")\n", - " plt.ylabel(\"Depth (m)\")\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Recompose mask from parts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "new_mask = np.ones(signals_raw.shape, dtype=bool)\n", - "\n", - "new_mask[is_passive, :] = False\n", - "new_mask[is_removed, :] = False\n", - "\n", - "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n", - "is_top = ddepths < np.expand_dims(np.nan_to_num(d_top_new), -1)\n", - "is_bot = ddepths > np.expand_dims(np.nan_to_num(d_bot_new), -1)\n", - "\n", - "new_mask[is_top] = False\n", - "new_mask[is_bot] = False" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.title(\"Raw data\")\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, signals_mskd.T)\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.title(\"Masked data\")\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, mask.T)\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.title(\"Original mask\")\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, new_mask.T)\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.title(\"Decomposed and recomposed mask\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Check recomposed mask matches the original mask" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, (np.single(mask) - np.single(new_mask)).T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.nonzero(np.single(mask) - np.single(new_mask))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.max(np.single(mask) - np.single(new_mask))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.min(np.single(mask) - np.single(new_mask))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for idx in np.unique(np.nonzero(np.single(mask) - np.single(new_mask))[0]):\n", - " for data in (signals_raw, mask, new_mask):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5)],\n", - " depths_raw[:80],\n", - " data[max(0, idx - 4) : min(len(ts_raw), idx + 5), :80].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Visually inspect starts and ends of removed segments" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for idx in np.concatenate((passive_starts, passive_ends, removed_starts, removed_ends)):\n", - " for data in (signals_raw, mask, new_mask):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[idx - 4 : idx + 5], depths_raw[:500], data[idx - 4 : idx + 5, :500].T\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Data Loader - Stationary.ipynb b/notebooks/Data Loader - Stationary.ipynb index c8d585c7..f19cd03a 100644 --- a/notebooks/Data Loader - Stationary.ipynb +++ b/notebooks/Data Loader - Stationary.ipynb @@ -1,499 +1,499 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00dd00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "dataset = \"MinasPassage\"\n", - "# has removed window\n", - "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n", - "# has passive recording\n", - "sample = \"september2018/september2018_D20181116-T205220_D20181117-T022218\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sv_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_Sv_raw.csv\")\n", - "evl_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_air.evl\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps, depths, signals = echofilter.raw.loader.transect_loader(sv_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(timestamps.shape)\n", - "print(depths.shape)\n", - "print(signals.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "depths" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "min(depths), max(depths)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "signals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t_top, d_top = echofilter.raw.loader.evl_loader(evl_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(t_top.shape)\n", - "print(d_top.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t_top" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d_top" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "min(d_top), max(d_top)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(timestamps, depths, signals.T)\n", - "# plt.plot(t_bot, d_top, 'c')\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(t_top, d_top, \"b\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Splitting stationary recordings into continguous periods with gaps between them" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dt = np.diff(timestamps)\n", - "min(dt), max(dt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.median(dt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sum(dt > np.median(dt) * 50)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "break_indices = np.where(dt > np.median(dt) * 50)[0]\n", - "\n", - "if len(break_indices) > 0:\n", - " break_indices += 1\n", - "\n", - "break_indices" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps[296:302] - timestamps[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timestamps[596:600] - timestamps[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for chunk_start, chunk_end in zip(\n", - " np.concatenate(([0], break_indices)),\n", - " np.concatenate((break_indices, [len(timestamps)])),\n", - "):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " timestamps[chunk_start:chunk_end], depths, signals[chunk_start:chunk_end, :].T\n", - " )\n", - " li = np.logical_and(\n", - " timestamps[chunk_start] <= t_top, t_top <= timestamps[chunk_end - 1]\n", - " )\n", - " plt.plot(t_top[li], d_top[li], turbulence_color)\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "depths[0] - depths[-1]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Try using existing mask decomposer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(ROOT_DATA_DIR, dataset, sample),\n", - " dataset,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(transect[\"timestamps\"], transect[\"depths\"], transect[\"Sv\"].T)\n", - "plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n", - "plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i_chunk, (ck_start, ck_end) in enumerate(\n", - " zip(\n", - " np.concatenate(([0], break_indices)),\n", - " np.concatenate((break_indices, [len(timestamps)])),\n", - " )\n", - "):\n", - " for signal_name in (\"Sv\", \"mask\"):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " transect[\"timestamps\"][ck_start:ck_end],\n", - " transect[\"depths\"],\n", - " transect[signal_name][ck_start:ck_end, :].T,\n", - " )\n", - " plt.plot(\n", - " transect[\"timestamps\"][ck_start:ck_end],\n", - " transect[\"top\"][ck_start:ck_end],\n", - " turbulence_color,\n", - " )\n", - " plt.plot(\n", - " transect[\"timestamps\"][ck_start:ck_end],\n", - " transect[\"bottom\"][ck_start:ck_end],\n", - " bottom_color,\n", - " )\n", - "\n", - " indices = np.nonzero(transect[\"is_passive\"][ck_start:ck_end])[0]\n", - " if len(indices) > 0:\n", - " r_starts = [indices[0]]\n", - " r_ends = [indices[-1]]\n", - " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", - " for break_idx in breaks:\n", - " r_ends.append(indices[break_idx + 1])\n", - " r_starts.append(indices[break_idx + 2])\n", - " for r_start, r_end in zip(r_starts, r_ends):\n", - " plt.fill_between(\n", - " transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n", - " transect[\"depths\"][[0, 0]],\n", - " transect[\"depths\"][[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=[0.4, 0.4, 0.4],\n", - " linewidth=0.0,\n", - " )\n", - "\n", - " indices = np.nonzero(transect[\"is_removed\"][ck_start:ck_end])[0]\n", - " if len(indices) > 0:\n", - " r_starts = [indices[0]]\n", - " r_ends = [indices[-1]]\n", - " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", - " for break_idx in breaks:\n", - " r_ends.append(indices[break_idx + 1])\n", - " r_starts.append(indices[break_idx + 2])\n", - " for r_start, r_end in zip(r_starts, r_ends):\n", - " plt.fill_between(\n", - " transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n", - " transect[\"depths\"][[0, 0]],\n", - " transect[\"depths\"][[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"\\\\\\\\\",\n", - " edgecolor=[0, 0, 1],\n", - " linewidth=0.0,\n", - " )\n", - "\n", - " plt.gca().invert_yaxis()\n", - " plt.xlabel(\"Timestamp (s)\")\n", - " plt.ylabel(\"Depth (m)\")\n", - " plt.title(\"Chunk {}\".format(i_chunk))\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " transect[\"timestamps\"][ck_start:ck_end],\n", - " transect[\"depths\"][:200],\n", - " transect[\"Sv\"][ck_start:ck_end, :200].T,\n", - ")\n", - "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n", - "plt.plot(\n", - " transect[\"timestamps\"][ck_start:ck_end],\n", - " transect[\"bottom\"][ck_start:ck_end],\n", - " bottom_color,\n", - ")\n", - "\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ck_start = break_indices[6]\n", - "ck_end = break_indices[7]\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " transect[\"timestamps\"][ck_start:ck_end],\n", - " transect[\"depths\"][:200],\n", - " transect[\"Sv\"][ck_start:ck_end, :200].T,\n", - ")\n", - "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n", - "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['bottom'][ck_start:ck_end], bottom_color)\n", - "\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00dd00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "dataset = \"MinasPassage\"\n", + "# has removed window\n", + "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n", + "# has passive recording\n", + "sample = \"september2018/september2018_D20181116-T205220_D20181117-T022218\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sv_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_Sv_raw.csv\")\n", + "evl_path = os.path.join(ROOT_DATA_DIR, dataset, sample + \"_air.evl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps, depths, signals = echofilter.raw.loader.transect_loader(sv_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(timestamps.shape)\n", + "print(depths.shape)\n", + "print(signals.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "depths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "min(depths), max(depths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t_top, d_top = echofilter.raw.loader.evl_loader(evl_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(t_top.shape)\n", + "print(d_top.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t_top" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_top" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "min(d_top), max(d_top)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(timestamps, depths, signals.T)\n", + "# plt.plot(t_bot, d_top, 'c')\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(t_top, d_top, \"b\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splitting stationary recordings into continguous periods with gaps between them" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dt = np.diff(timestamps)\n", + "min(dt), max(dt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.median(dt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum(dt > np.median(dt) * 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "break_indices = np.where(dt > np.median(dt) * 50)[0]\n", + "\n", + "if len(break_indices) > 0:\n", + " break_indices += 1\n", + "\n", + "break_indices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps[296:302] - timestamps[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timestamps[596:600] - timestamps[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for chunk_start, chunk_end in zip(\n", + " np.concatenate(([0], break_indices)),\n", + " np.concatenate((break_indices, [len(timestamps)])),\n", + "):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " timestamps[chunk_start:chunk_end], depths, signals[chunk_start:chunk_end, :].T\n", + " )\n", + " li = np.logical_and(\n", + " timestamps[chunk_start] <= t_top, t_top <= timestamps[chunk_end - 1]\n", + " )\n", + " plt.plot(t_top[li], d_top[li], turbulence_color)\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "depths[0] - depths[-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Try using existing mask decomposer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(ROOT_DATA_DIR, dataset, sample),\n", + " dataset,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(transect[\"timestamps\"], transect[\"depths\"], transect[\"Sv\"].T)\n", + "plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n", + "plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i_chunk, (ck_start, ck_end) in enumerate(\n", + " zip(\n", + " np.concatenate(([0], break_indices)),\n", + " np.concatenate((break_indices, [len(timestamps)])),\n", + " )\n", + "):\n", + " for signal_name in (\"Sv\", \"mask\"):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " transect[\"timestamps\"][ck_start:ck_end],\n", + " transect[\"depths\"],\n", + " transect[signal_name][ck_start:ck_end, :].T,\n", + " )\n", + " plt.plot(\n", + " transect[\"timestamps\"][ck_start:ck_end],\n", + " transect[\"top\"][ck_start:ck_end],\n", + " turbulence_color,\n", + " )\n", + " plt.plot(\n", + " transect[\"timestamps\"][ck_start:ck_end],\n", + " transect[\"bottom\"][ck_start:ck_end],\n", + " bottom_color,\n", + " )\n", + "\n", + " indices = np.nonzero(transect[\"is_passive\"][ck_start:ck_end])[0]\n", + " if len(indices) > 0:\n", + " r_starts = [indices[0]]\n", + " r_ends = [indices[-1]]\n", + " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", + " for break_idx in breaks:\n", + " r_ends.append(indices[break_idx + 1])\n", + " r_starts.append(indices[break_idx + 2])\n", + " for r_start, r_end in zip(r_starts, r_ends):\n", + " plt.fill_between(\n", + " transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n", + " transect[\"depths\"][[0, 0]],\n", + " transect[\"depths\"][[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=[0.4, 0.4, 0.4],\n", + " linewidth=0.0,\n", + " )\n", + "\n", + " indices = np.nonzero(transect[\"is_removed\"][ck_start:ck_end])[0]\n", + " if len(indices) > 0:\n", + " r_starts = [indices[0]]\n", + " r_ends = [indices[-1]]\n", + " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", + " for break_idx in breaks:\n", + " r_ends.append(indices[break_idx + 1])\n", + " r_starts.append(indices[break_idx + 2])\n", + " for r_start, r_end in zip(r_starts, r_ends):\n", + " plt.fill_between(\n", + " transect[\"timestamps\"][ck_start:ck_end][[r_start, r_end]],\n", + " transect[\"depths\"][[0, 0]],\n", + " transect[\"depths\"][[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"\\\\\\\\\",\n", + " edgecolor=[0, 0, 1],\n", + " linewidth=0.0,\n", + " )\n", + "\n", + " plt.gca().invert_yaxis()\n", + " plt.xlabel(\"Timestamp (s)\")\n", + " plt.ylabel(\"Depth (m)\")\n", + " plt.title(\"Chunk {}\".format(i_chunk))\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " transect[\"timestamps\"][ck_start:ck_end],\n", + " transect[\"depths\"][:200],\n", + " transect[\"Sv\"][ck_start:ck_end, :200].T,\n", + ")\n", + "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n", + "plt.plot(\n", + " transect[\"timestamps\"][ck_start:ck_end],\n", + " transect[\"bottom\"][ck_start:ck_end],\n", + " bottom_color,\n", + ")\n", + "\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ck_start = break_indices[6]\n", + "ck_end = break_indices[7]\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " transect[\"timestamps\"][ck_start:ck_end],\n", + " transect[\"depths\"][:200],\n", + " transect[\"Sv\"][ck_start:ck_end, :200].T,\n", + ")\n", + "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['top'][ck_start:ck_end], turbulence_color)\n", + "# plt.plot(transect['timestamps'][ck_start:ck_end], transect['bottom'][ck_start:ck_end], bottom_color)\n", + "\n", + "plt.gca().invert_yaxis()\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Estimate mean and stdev.ipynb b/notebooks/Estimate mean and stdev.ipynb index 42d3ce30..eb2aec2c 100644 --- a/notebooks/Estimate mean and stdev.ipynb +++ b/notebooks/Estimate mean and stdev.ipynb @@ -1,599 +1,599 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import scipy.stats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tqdm.autonotebook import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw\n", - "import echofilter.raw.shardloader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = echofilter.raw.loader.ROOT_DATA_DIR" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "partition = \"train\"\n", - "partitioning_version = \"firstpass\"\n", - "dataset = \"mobile\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "max_depth = 70" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_pths = echofilter.raw.loader.get_partition_list(\n", - " partition,\n", - " dataset=dataset,\n", - " partitioning_version=partitioning_version,\n", - " root_data_dir=root_data_dir,\n", - " full_path=True,\n", - " sharded=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect_pth = transect_pths[0]\n", - "with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", - " n_segment = int(f.readline().strip())\n", - "\n", - "i_seg = 0\n", - "transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", - " os.path.join(transect_pth, str(i_seg))\n", - ")\n", - "transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", - "transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", - "print(\"mean\", np.mean(transect[\"Sv\"]))\n", - "print(\"median\", np.median(transect[\"Sv\"]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.nanpercentile([5, 3, 5, 4, np.nan], 50)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect[\"Sv\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sns.distplot(transect[\"Sv\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qs = [0, 0.1, 1, 5, 7, 10, 25, 50, 75, 90, 93, 95, 99, 99.9, 100]\n", - "ps = np.percentile(transect[\"Sv\"], qs)\n", - "for q, p in zip(qs, ps):\n", - " print(\"{:5.1f} {:7.2f}\".format(q, p))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.diff(np.percentile(transect[\"Sv\"], [10, 90])) / 2.56" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.diff(np.percentile(transect[\"Sv\"], [7, 93])) / 3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.diff(np.percentile(transect[\"Sv\"], [25, 75])) / 1.35" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.std(transect[\"Sv\"][1:])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mad = np.median(np.abs(transect[\"Sv\"] - np.median(transect[\"Sv\"])))\n", - "print(mad)\n", - "print(mad * 1.4826)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.abs(\n", - " np.diff(np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10]))\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.percentile(transect[\"Sv\"], [40, 35, 30])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i in tqdm(range(10)):\n", - " transect_pth = transect_pths[i]\n", - " with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", - " n_segment = int(f.readline().strip())\n", - "\n", - " i_seg = 0\n", - " transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", - " os.path.join(transect_pth, str(i_seg))\n", - " )\n", - " transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", - " transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", - "\n", - " plt.figure(figsize=(12, 9))\n", - " sns.distplot(transect[\"Sv\"])\n", - " plt.show()\n", - "\n", - " print(\"{:6s} {:7.2f}\".format(\"mean\", np.mean(transect[\"Sv\"])))\n", - " print(\"{:6s} {:7.2f}\".format(\"median\", np.median(transect[\"Sv\"])))\n", - " print(\"{:6s} {:7.2f}\".format(\"stdev\", np.std(transect[\"Sv\"])))\n", - " print(\n", - " \"{:6s} {:7.2f}\".format(\n", - " \"mad\", np.median(np.abs(transect[\"Sv\"][1:] - np.median(transect[\"Sv\"])))\n", - " )\n", - " )\n", - " print(\n", - " \"{:6s} {:7.2f}\".format(\n", - " \"iqr\", np.diff(np.percentile(transect[\"Sv\"], [25, 75]))[0]\n", - " )\n", - " )\n", - " print(\n", - " \"{:6s} {:7.2f}\".format(\n", - " \"idr\", np.diff(np.percentile(transect[\"Sv\"], [10, 90]))[0]\n", - " )\n", - " )\n", - " print(\n", - " \"{:6s} {:7.2f}\".format(\n", - " \"i7r\", np.diff(np.percentile(transect[\"Sv\"], [7, 93]))[0]\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "means = []\n", - "stdevs = []\n", - "medians = []\n", - "mads = []\n", - "percentiles = []\n", - "std25 = []\n", - "\n", - "qs = [0, 0.1, 1, 5, 7, 10, 15, 20, 25, 30, 35, 40, 50, 75, 90, 93, 95, 99, 99.9, 100]\n", - "\n", - "for transect_pth in tqdm(transect_pths):\n", - "\n", - " try:\n", - " # Check how many segments the transect was divided into\n", - " with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", - " n_segment = int(f.readline().strip())\n", - "\n", - " for i_seg in range(n_segment):\n", - " transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", - " os.path.join(transect_pth, str(i_seg))\n", - " )\n", - " transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", - " if len(transect[\"Sv\"]) < 2:\n", - " continue\n", - " transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", - " means.append(np.nanmean(transect[\"Sv\"]))\n", - " stdevs.append(np.nanstd(transect[\"Sv\"]))\n", - " median = np.nanmedian(transect[\"Sv\"])\n", - " medians.append(median)\n", - " mads.append(np.nanmedian(np.abs(transect[\"Sv\"] - median)))\n", - " percentiles.append(np.nanpercentile(transect[\"Sv\"], qs))\n", - " pc25 = np.nanpercentile(transect[\"Sv\"], 25)\n", - " std25.append(np.sqrt(np.nanmean(np.power(transect[\"Sv\"] - pc25, 2))))\n", - "\n", - " except Exception as ex:\n", - " print(\"Error loading shard from {}\".format(transect_pth))\n", - " print(ex)\n", - "\n", - "MEAN = np.nanmean(means)\n", - "print(\"mean = {}\".format(MEAN))\n", - "print(\"mean of medians = {}\".format(np.nanmean(medians)))\n", - "\n", - "qs = np.array(qs)\n", - "percentiles = np.array(percentiles)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "variances = []\n", - "\n", - "for transect_pth in tqdm(transect_pths):\n", - "\n", - " try:\n", - " # Check how many segments the transect was divided into\n", - " with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", - " n_segment = int(f.readline().strip())\n", - "\n", - " for i_seg in range(n_segment):\n", - " transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", - " os.path.join(transect_pth, str(i_seg))\n", - " )\n", - " transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", - " if len(transect[\"Sv\"]) < 2:\n", - " continue\n", - " transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", - " variances.append(np.nanmean(np.power(transect[\"Sv\"] - MEAN, 2)))\n", - " except Exception as ex:\n", - " print(\"Error loading shard from {}\".format(transect_pth))\n", - " print(ex)\n", - "\n", - "\n", - "VARIANCE = np.mean(variances)\n", - "print(\"variance = {}\".format(VARIANCE))\n", - "print(\"stdev = {}\".format(np.sqrt(VARIANCE)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "iqrs = (\n", - " percentiles[:, np.nonzero(qs == 75)[0][0]]\n", - " - percentiles[:, np.nonzero(qs == 25)[0][0]]\n", - ")\n", - "IQR = np.mean(iqrs)\n", - "print(IQR)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "idrs = (\n", - " percentiles[:, np.nonzero(qs == 90)[0][0]]\n", - " - percentiles[:, np.nonzero(qs == 10)[0][0]]\n", - ")\n", - "IDR = np.mean(idrs)\n", - "print(IDR)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "i7rs = (\n", - " percentiles[:, np.nonzero(qs == 93)[0][0]]\n", - " - percentiles[:, np.nonzero(qs == 7)[0][0]]\n", - ")\n", - "I7R = np.mean(i7rs)\n", - "print(I7R)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(np.sqrt(VARIANCE))\n", - "print(np.mean(stdevs))\n", - "\n", - "print(np.mean(mads) * 1.4826)\n", - "print(IQR / 1.35)\n", - "print(IDR / 2.56)\n", - "print(I7R / 3.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\n", - " \"{:6s} {:6s} {:6s} {:6s} {:6s} {:5s} {:5s} {:5s}\".format(\n", - " \"name\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n", - " )\n", - ")\n", - "for name, estimator in [\n", - " (\"mean\", means),\n", - " (\"median\", medians),\n", - " (\"stdev\", stdevs),\n", - " (\"MAD\", mads),\n", - " (\"IQR\", iqrs),\n", - " (\"IDR\", idrs),\n", - " (\"I7R\", i7rs),\n", - " (\"std25\", std25),\n", - "]:\n", - " print(\n", - " \"{:6s} {:6.4f} {:6.1f} {:6.1f} {:6.1f} {:5.3f} {:5.2f} {:5.1f}\".format(\n", - " name,\n", - " scipy.stats.sem(estimator),\n", - " np.mean(estimator),\n", - " np.min(estimator),\n", - " np.max(estimator),\n", - " scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n", - " np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n", - " (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\n", - " \"{:6s} {:6s} {:6s} {:6s} {:6s} {:5s} {:5s} {:5s}\".format(\n", - " \"percentile\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n", - " )\n", - ")\n", - "for iq, q in enumerate(qs):\n", - " estimator = percentiles[:, iq]\n", - " print(\n", - " \"{:10.1f} {:6.4f} {:6.1f} {:6.1f} {:6.1f} {:5.3f} {:5.2f} {:5.1f}\".format(\n", - " q,\n", - " scipy.stats.sem(estimator),\n", - " np.mean(estimator),\n", - " np.min(estimator),\n", - " np.max(estimator),\n", - " scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n", - " np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n", - " (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(means)\n", - "plt.title(\"mean estimates\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(medians)\n", - "plt.title(\"median estimates\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(stdevs)\n", - "plt.title(\"standard deviation estimates\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(mads)\n", - "plt.title(\"MAD estimates\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(iqrs)\n", - "plt.title(\"IQR estimates\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(idrs)\n", - "plt.title(\"IDR estimates\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "sns.distplot(i7rs)\n", - "plt.title(\"7-93 estimates\")\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import scipy.stats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm.autonotebook import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw\n", + "import echofilter.raw.shardloader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = echofilter.raw.loader.ROOT_DATA_DIR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "partition = \"train\"\n", + "partitioning_version = \"firstpass\"\n", + "dataset = \"mobile\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "max_depth = 70" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_pths = echofilter.raw.loader.get_partition_list(\n", + " partition,\n", + " dataset=dataset,\n", + " partitioning_version=partitioning_version,\n", + " root_data_dir=root_data_dir,\n", + " full_path=True,\n", + " sharded=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect_pth = transect_pths[0]\n", + "with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", + " n_segment = int(f.readline().strip())\n", + "\n", + "i_seg = 0\n", + "transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", + " os.path.join(transect_pth, str(i_seg))\n", + ")\n", + "transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", + "transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", + "print(\"mean\", np.mean(transect[\"Sv\"]))\n", + "print(\"median\", np.median(transect[\"Sv\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.nanpercentile([5, 3, 5, 4, np.nan], 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect[\"Sv\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.distplot(transect[\"Sv\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "qs = [0, 0.1, 1, 5, 7, 10, 25, 50, 75, 90, 93, 95, 99, 99.9, 100]\n", + "ps = np.percentile(transect[\"Sv\"], qs)\n", + "for q, p in zip(qs, ps):\n", + " print(\"{:5.1f} {:7.2f}\".format(q, p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.diff(np.percentile(transect[\"Sv\"], [10, 90])) / 2.56" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.diff(np.percentile(transect[\"Sv\"], [7, 93])) / 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.diff(np.percentile(transect[\"Sv\"], [25, 75])) / 1.35" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.std(transect[\"Sv\"][1:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mad = np.median(np.abs(transect[\"Sv\"] - np.median(transect[\"Sv\"])))\n", + "print(mad)\n", + "print(mad * 1.4826)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.abs(\n", + " np.diff(np.percentile(transect[\"Sv\"], [60, 55, 50, 45, 40, 35, 30, 25, 20, 15, 10]))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.percentile(transect[\"Sv\"], [40, 35, 30])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in tqdm(range(10)):\n", + " transect_pth = transect_pths[i]\n", + " with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", + " n_segment = int(f.readline().strip())\n", + "\n", + " i_seg = 0\n", + " transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", + " os.path.join(transect_pth, str(i_seg))\n", + " )\n", + " transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", + " transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", + "\n", + " plt.figure(figsize=(12, 9))\n", + " sns.distplot(transect[\"Sv\"])\n", + " plt.show()\n", + "\n", + " print(\"{:6s} {:7.2f}\".format(\"mean\", np.mean(transect[\"Sv\"])))\n", + " print(\"{:6s} {:7.2f}\".format(\"median\", np.median(transect[\"Sv\"])))\n", + " print(\"{:6s} {:7.2f}\".format(\"stdev\", np.std(transect[\"Sv\"])))\n", + " print(\n", + " \"{:6s} {:7.2f}\".format(\n", + " \"mad\", np.median(np.abs(transect[\"Sv\"][1:] - np.median(transect[\"Sv\"])))\n", + " )\n", + " )\n", + " print(\n", + " \"{:6s} {:7.2f}\".format(\n", + " \"iqr\", np.diff(np.percentile(transect[\"Sv\"], [25, 75]))[0]\n", + " )\n", + " )\n", + " print(\n", + " \"{:6s} {:7.2f}\".format(\n", + " \"idr\", np.diff(np.percentile(transect[\"Sv\"], [10, 90]))[0]\n", + " )\n", + " )\n", + " print(\n", + " \"{:6s} {:7.2f}\".format(\n", + " \"i7r\", np.diff(np.percentile(transect[\"Sv\"], [7, 93]))[0]\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "means = []\n", + "stdevs = []\n", + "medians = []\n", + "mads = []\n", + "percentiles = []\n", + "std25 = []\n", + "\n", + "qs = [0, 0.1, 1, 5, 7, 10, 15, 20, 25, 30, 35, 40, 50, 75, 90, 93, 95, 99, 99.9, 100]\n", + "\n", + "for transect_pth in tqdm(transect_pths):\n", + "\n", + " try:\n", + " # Check how many segments the transect was divided into\n", + " with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", + " n_segment = int(f.readline().strip())\n", + "\n", + " for i_seg in range(n_segment):\n", + " transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", + " os.path.join(transect_pth, str(i_seg))\n", + " )\n", + " transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", + " if len(transect[\"Sv\"]) < 2:\n", + " continue\n", + " transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", + " means.append(np.nanmean(transect[\"Sv\"]))\n", + " stdevs.append(np.nanstd(transect[\"Sv\"]))\n", + " median = np.nanmedian(transect[\"Sv\"])\n", + " medians.append(median)\n", + " mads.append(np.nanmedian(np.abs(transect[\"Sv\"] - median)))\n", + " percentiles.append(np.nanpercentile(transect[\"Sv\"], qs))\n", + " pc25 = np.nanpercentile(transect[\"Sv\"], 25)\n", + " std25.append(np.sqrt(np.nanmean(np.power(transect[\"Sv\"] - pc25, 2))))\n", + "\n", + " except Exception as ex:\n", + " print(\"Error loading shard from {}\".format(transect_pth))\n", + " print(ex)\n", + "\n", + "MEAN = np.nanmean(means)\n", + "print(\"mean = {}\".format(MEAN))\n", + "print(\"mean of medians = {}\".format(np.nanmean(medians)))\n", + "\n", + "qs = np.array(qs)\n", + "percentiles = np.array(percentiles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "variances = []\n", + "\n", + "for transect_pth in tqdm(transect_pths):\n", + "\n", + " try:\n", + " # Check how many segments the transect was divided into\n", + " with open(os.path.join(transect_pth, \"n_segment.txt\"), \"r\") as f:\n", + " n_segment = int(f.readline().strip())\n", + "\n", + " for i_seg in range(n_segment):\n", + " transect = echofilter.raw.shardloader.load_transect_from_shards_abs(\n", + " os.path.join(transect_pth, str(i_seg))\n", + " )\n", + " transect[\"Sv\"] = transect[\"Sv\"][1:, transect[\"depths\"] <= max_depth]\n", + " if len(transect[\"Sv\"]) < 2:\n", + " continue\n", + " transect[\"Sv\"] = transect[\"Sv\"].astype(np.float32)\n", + " variances.append(np.nanmean(np.power(transect[\"Sv\"] - MEAN, 2)))\n", + " except Exception as ex:\n", + " print(\"Error loading shard from {}\".format(transect_pth))\n", + " print(ex)\n", + "\n", + "\n", + "VARIANCE = np.mean(variances)\n", + "print(\"variance = {}\".format(VARIANCE))\n", + "print(\"stdev = {}\".format(np.sqrt(VARIANCE)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iqrs = (\n", + " percentiles[:, np.nonzero(qs == 75)[0][0]]\n", + " - percentiles[:, np.nonzero(qs == 25)[0][0]]\n", + ")\n", + "IQR = np.mean(iqrs)\n", + "print(IQR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "idrs = (\n", + " percentiles[:, np.nonzero(qs == 90)[0][0]]\n", + " - percentiles[:, np.nonzero(qs == 10)[0][0]]\n", + ")\n", + "IDR = np.mean(idrs)\n", + "print(IDR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "i7rs = (\n", + " percentiles[:, np.nonzero(qs == 93)[0][0]]\n", + " - percentiles[:, np.nonzero(qs == 7)[0][0]]\n", + ")\n", + "I7R = np.mean(i7rs)\n", + "print(I7R)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(np.sqrt(VARIANCE))\n", + "print(np.mean(stdevs))\n", + "\n", + "print(np.mean(mads) * 1.4826)\n", + "print(IQR / 1.35)\n", + "print(IDR / 2.56)\n", + "print(I7R / 3.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " \"{:6s} {:6s} {:6s} {:6s} {:6s} {:5s} {:5s} {:5s}\".format(\n", + " \"name\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n", + " )\n", + ")\n", + "for name, estimator in [\n", + " (\"mean\", means),\n", + " (\"median\", medians),\n", + " (\"stdev\", stdevs),\n", + " (\"MAD\", mads),\n", + " (\"IQR\", iqrs),\n", + " (\"IDR\", idrs),\n", + " (\"I7R\", i7rs),\n", + " (\"std25\", std25),\n", + "]:\n", + " print(\n", + " \"{:6s} {:6.4f} {:6.1f} {:6.1f} {:6.1f} {:5.3f} {:5.2f} {:5.1f}\".format(\n", + " name,\n", + " scipy.stats.sem(estimator),\n", + " np.mean(estimator),\n", + " np.min(estimator),\n", + " np.max(estimator),\n", + " scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n", + " np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n", + " (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " \"{:6s} {:6s} {:6s} {:6s} {:6s} {:5s} {:5s} {:5s}\".format(\n", + " \"percentile\", \"SEM\", \"mean\", \"min\", \"max\", \"pcerr\", \"pcstd\", \"pcran\"\n", + " )\n", + ")\n", + "for iq, q in enumerate(qs):\n", + " estimator = percentiles[:, iq]\n", + " print(\n", + " \"{:10.1f} {:6.4f} {:6.1f} {:6.1f} {:6.1f} {:5.3f} {:5.2f} {:5.1f}\".format(\n", + " q,\n", + " scipy.stats.sem(estimator),\n", + " np.mean(estimator),\n", + " np.min(estimator),\n", + " np.max(estimator),\n", + " scipy.stats.sem(estimator) / np.abs(np.mean(estimator)) * 100,\n", + " np.std(estimator) / np.abs(np.mean(estimator)) * 100,\n", + " (np.max(estimator) - np.min(estimator)) / np.abs(np.mean(estimator)) * 100,\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(means)\n", + "plt.title(\"mean estimates\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(medians)\n", + "plt.title(\"median estimates\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(stdevs)\n", + "plt.title(\"standard deviation estimates\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(mads)\n", + "plt.title(\"MAD estimates\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(iqrs)\n", + "plt.title(\"IQR estimates\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(idrs)\n", + "plt.title(\"IDR estimates\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "sns.distplot(i7rs)\n", + "plt.title(\"7-93 estimates\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Finding mask all removed.ipynb b/notebooks/Finding mask all removed.ipynb index 2df01e2e..5c1fd219 100644 --- a/notebooks/Finding mask all removed.ipynb +++ b/notebooks/Finding mask all removed.ipynb @@ -1,356 +1,356 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw.loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "# example with 1 passive period, 1 turbulence cut out\n", + "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n", + "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", + "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n", + "\n", + "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n", + "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n", + "\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", + "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", + "\n", + "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n", + " os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n", + ")\n", + "t_top, d_top = echofilter.raw.loader.evl_loader(\n", + " os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n", + " fname_masked\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.min(signals_raw), np.max(signals_raw)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_raw.shape, depths_raw.shape, signals_raw.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00ee00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", + "plt.plot(t_top, d_top, \"k\")\n", + "plt.plot(t_bot, d_bot, \"w\")\n", + "plt.plot(ts_new, d_top_new, turbulence_color)\n", + "plt.plot(ts_new, d_bot_new, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finding rows which are fully removed from masked output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.all(np.isnan(signals_mskd), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allnan = np.all(np.isnan(signals_mskd), axis=1)\n", + "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n", + "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n", + "\n", + "if removed_ends[0] < removed_starts[0]:\n", + " removed_starts = np.concatenate(([0], removed_starts))\n", + "\n", + "if removed_starts[-1] > removed_ends[-1]:\n", + " removed_ends = np.concatenate((removed_ends, [len(allnan)]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(removed_starts)\n", + "print(removed_ends)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(removed_starts, removed_ends):\n", + " for ts, depths, signals in (\n", + " (ts_mskd, depths_mskd, signals_mskd),\n", + " (ts_raw, depths_raw, signals_raw),\n", + " ):\n", + " an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n", + " print(np.sum(an), np.sum(~an))\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n", + " )\n", + " li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n", + " plt.plot(t_top[li], d_top[li], \"k\")\n", + " li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n", + " plt.plot(t_bot[li], d_bot[li], \"w\")\n", + " plt.plot(\n", + " ts_new[index_start:index_end],\n", + " d_top_new[index_start:index_end],\n", + " turbulence_color,\n", + " )\n", + " plt.plot(\n", + " ts_new[index_start:index_end],\n", + " d_bot_new[index_start:index_end],\n", + " bottom_color,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(\n", + " np.concatenate(([0], removed_ends)),\n", + " np.concatenate((removed_starts, [signals_raw.shape[0]])),\n", + "):\n", + " if index_start == index_end:\n", + " continue\n", + " for ts, depths, signals in (\n", + " (ts_mskd, depths_mskd, signals_mskd),\n", + " (ts_raw, depths_raw, signals_raw),\n", + " ):\n", + " an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n", + " print(np.sum(an), np.sum(~an))\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n", + " )\n", + " li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n", + " plt.plot(t_top[li], d_top[li], \"k\")\n", + " li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n", + " plt.plot(t_bot[li], d_bot[li], \"w\")\n", + " plt.plot(\n", + " ts_new[index_start:index_end],\n", + " d_top_new[index_start:index_end],\n", + " turbulence_color,\n", + " )\n", + " plt.plot(\n", + " ts_new[index_start:index_end],\n", + " d_bot_new[index_start:index_end],\n", + " bottom_color,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(signals_raw.shape)\n", + "print(signals_mskd.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd[234:257], depths_mskd[:34], signals_mskd[234:257, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd[235:256], depths_mskd[:34], signals_mskd[235:256, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd[234:257], depths_mskd, signals_mskd[234:257, :].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd[235:256], depths_mskd, signals_mskd[235:256, :].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.nonzero(allnan)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw.loader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "# example with 1 passive period, 1 turbulence cut out\n", - "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n", - "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", - "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n", - "\n", - "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n", - "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n", - "\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", - "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", - "\n", - "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n", - " os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n", - ")\n", - "t_top, d_top = echofilter.raw.loader.evl_loader(\n", - " os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n", - " fname_masked\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.min(signals_raw), np.max(signals_raw)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_raw.shape, depths_raw.shape, signals_raw.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00ee00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", - "plt.plot(t_top, d_top, \"k\")\n", - "plt.plot(t_bot, d_bot, \"w\")\n", - "plt.plot(ts_new, d_top_new, turbulence_color)\n", - "plt.plot(ts_new, d_bot_new, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Finding rows which are fully removed from masked output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.all(np.isnan(signals_mskd), axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "allnan = np.all(np.isnan(signals_mskd), axis=1)\n", - "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n", - "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n", - "\n", - "if removed_ends[0] < removed_starts[0]:\n", - " removed_starts = np.concatenate(([0], removed_starts))\n", - "\n", - "if removed_starts[-1] > removed_ends[-1]:\n", - " removed_ends = np.concatenate((removed_ends, [len(allnan)]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(removed_starts)\n", - "print(removed_ends)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(removed_starts, removed_ends):\n", - " for ts, depths, signals in (\n", - " (ts_mskd, depths_mskd, signals_mskd),\n", - " (ts_raw, depths_raw, signals_raw),\n", - " ):\n", - " an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n", - " print(np.sum(an), np.sum(~an))\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n", - " )\n", - " li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n", - " plt.plot(t_top[li], d_top[li], \"k\")\n", - " li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n", - " plt.plot(t_bot[li], d_bot[li], \"w\")\n", - " plt.plot(\n", - " ts_new[index_start:index_end],\n", - " d_top_new[index_start:index_end],\n", - " turbulence_color,\n", - " )\n", - " plt.plot(\n", - " ts_new[index_start:index_end],\n", - " d_bot_new[index_start:index_end],\n", - " bottom_color,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(\n", - " np.concatenate(([0], removed_ends)),\n", - " np.concatenate((removed_starts, [signals_raw.shape[0]])),\n", - "):\n", - " if index_start == index_end:\n", - " continue\n", - " for ts, depths, signals in (\n", - " (ts_mskd, depths_mskd, signals_mskd),\n", - " (ts_raw, depths_raw, signals_raw),\n", - " ):\n", - " an = np.all(np.isnan(signals[index_start:index_end, :]), axis=1)\n", - " print(np.sum(an), np.sum(~an))\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts[index_start:index_end], depths, signals[index_start:index_end, :].T\n", - " )\n", - " li = np.all([t_top >= ts[index_start], t_top <= ts[index_end - 1]], axis=0)\n", - " plt.plot(t_top[li], d_top[li], \"k\")\n", - " li = np.all([t_bot >= ts[index_start], t_bot <= ts[index_end - 1]], axis=0)\n", - " plt.plot(t_bot[li], d_bot[li], \"w\")\n", - " plt.plot(\n", - " ts_new[index_start:index_end],\n", - " d_top_new[index_start:index_end],\n", - " turbulence_color,\n", - " )\n", - " plt.plot(\n", - " ts_new[index_start:index_end],\n", - " d_bot_new[index_start:index_end],\n", - " bottom_color,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(signals_raw.shape)\n", - "print(signals_mskd.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd[234:257], depths_mskd[:34], signals_mskd[234:257, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd[235:256], depths_mskd[:34], signals_mskd[235:256, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd[234:257], depths_mskd, signals_mskd[234:257, :].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd[235:256], depths_mskd, signals_mskd[235:256, :].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.nonzero(allnan)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Generating lines from masked csv.ipynb b/notebooks/Generating lines from masked csv.ipynb index 8ebc8ee7..cd3f8fdc 100644 --- a/notebooks/Generating lines from masked csv.ipynb +++ b/notebooks/Generating lines from masked csv.ipynb @@ -1,523 +1,523 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw.loader" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "\n", - "# example with 1 passive period, 1 turbulence cut out\n", - "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n", - "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", - "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n", - "\n", - "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n", - "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n", - "\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", - "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", - "\n", - "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n", - " os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n", - ")\n", - "t_top, d_top = echofilter.raw.loader.evl_loader(\n", - " os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.min(signals_raw), np.max(signals_raw)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_raw.shape, depths_raw.shape, signals_raw.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00ee00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Finding boxes to merge" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd, ~np.isnan(signals_mskd).T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "indices = np.tile(np.arange(signals_mskd.shape[1]), (signals_mskd.shape[0], 1)).astype(\n", - " \"float\"\n", - ")\n", - "indices[np.isnan(signals_mskd)] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.nanmin(indices, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.nanmax(indices, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cut_off_indices = np.round(np.nanmedian(indices, axis=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cut_off_indices" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "depths_mskd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_depths = np.tile(\n", - " np.concatenate([(depths_mskd[:-1] + depths_mskd[1:]) / 2, depths_mskd[-1:]]),\n", - " (signals_mskd.shape[0], 1),\n", - ")\n", - "top_depths[~np.isnan(signals_mskd)] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_depths" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.expand_dims(np.arange(signals_mskd.shape[1]), 0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.expand_dims(cut_off_indices, -1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.imshow(top_depths.T)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) > np.expand_dims(\n", - " cut_off_indices, -1\n", - ")\n", - "plt.imshow(li.T)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_depths[li] = np.nan\n", - "plt.imshow(top_depths.T)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d_top_new = np.nanmax(top_depths, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bot_depths = np.tile(\n", - " np.concatenate([depths_mskd[:1], (depths_mskd[:-1] + depths_mskd[1:]) / 2]),\n", - " (signals_mskd.shape[0], 1),\n", - ")\n", - "bot_depths[~np.isnan(signals_mskd)] = np.nan\n", - "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) < np.expand_dims(\n", - " cut_off_indices, -1\n", - ")\n", - "bot_depths[li] = np.nan\n", - "d_bot_new = np.nanmin(bot_depths, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.imshow(bot_depths.T)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(d_top_new)\n", - "plt.plot(d_bot_new)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", - "plt.plot(ts_mskd, d_top_new, turbulence_color)\n", - "plt.plot(ts_mskd, d_bot_new, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d_bot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "d_bot_new" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_mskd.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t_bot.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(t_bot, d_bot)\n", - "plt.plot(ts_mskd, d_bot_new)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(t_top, d_top)\n", - "plt.plot(ts_mskd, d_top_new)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Finding rows which are fully removed from masked output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.all(np.isnan(signals_mskd), axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "allnan = np.all(np.isnan(signals_mskd), axis=1)\n", - "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n", - "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n", - "\n", - "if removed_ends[0] < removed_starts[0]:\n", - " removed_starts = np.concatenate(([0], removed_starts))\n", - "\n", - "if removed_starts[-1] > removed_ends[-1]:\n", - " removed_ends = np.concatenate((removed_ends, [len(allnan)]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(removed_starts)\n", - "print(removed_ends)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(removed_starts, removed_ends):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw,\n", - " signals_raw[index_start:index_end, :].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(\n", - " np.concatenate(([0], removed_ends)),\n", - " np.concatenate((removed_starts, [signals_raw.shape[0]])),\n", - "):\n", - " if index_start == index_end:\n", - " continue\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw,\n", - " signals_raw[index_start:index_end, :].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw.loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "\n", + "# example with 1 passive period, 1 turbulence cut out\n", + "sample = \"mobile/Survey16/Survey16_GR3_N1W_E\"\n", + "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", + "# sample = 'mobile/Survey17/Survey17_GR1_N0W_E'\n", + "\n", + "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n", + "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n", + "\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", + "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", + "\n", + "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n", + " os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n", + ")\n", + "t_top, d_top = echofilter.raw.loader.evl_loader(\n", + " os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.min(signals_raw), np.max(signals_raw)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_raw.shape, depths_raw.shape, signals_raw.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00ee00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finding boxes to merge" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd, ~np.isnan(signals_mskd).T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "indices = np.tile(np.arange(signals_mskd.shape[1]), (signals_mskd.shape[0], 1)).astype(\n", + " \"float\"\n", + ")\n", + "indices[np.isnan(signals_mskd)] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.nanmin(indices, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.nanmax(indices, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cut_off_indices = np.round(np.nanmedian(indices, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cut_off_indices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "depths_mskd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_depths = np.tile(\n", + " np.concatenate([(depths_mskd[:-1] + depths_mskd[1:]) / 2, depths_mskd[-1:]]),\n", + " (signals_mskd.shape[0], 1),\n", + ")\n", + "top_depths[~np.isnan(signals_mskd)] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_depths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.expand_dims(np.arange(signals_mskd.shape[1]), 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.expand_dims(cut_off_indices, -1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(top_depths.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) > np.expand_dims(\n", + " cut_off_indices, -1\n", + ")\n", + "plt.imshow(li.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_depths[li] = np.nan\n", + "plt.imshow(top_depths.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_top_new = np.nanmax(top_depths, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bot_depths = np.tile(\n", + " np.concatenate([depths_mskd[:1], (depths_mskd[:-1] + depths_mskd[1:]) / 2]),\n", + " (signals_mskd.shape[0], 1),\n", + ")\n", + "bot_depths[~np.isnan(signals_mskd)] = np.nan\n", + "li = np.expand_dims(np.arange(signals_mskd.shape[1]), 0) < np.expand_dims(\n", + " cut_off_indices, -1\n", + ")\n", + "bot_depths[li] = np.nan\n", + "d_bot_new = np.nanmin(bot_depths, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(bot_depths.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(d_top_new)\n", + "plt.plot(d_bot_new)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", + "plt.plot(ts_mskd, d_top_new, turbulence_color)\n", + "plt.plot(ts_mskd, d_bot_new, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_bot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_bot_new" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_mskd.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t_bot.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(t_bot, d_bot)\n", + "plt.plot(ts_mskd, d_bot_new)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(t_top, d_top)\n", + "plt.plot(ts_mskd, d_top_new)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finding rows which are fully removed from masked output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.all(np.isnan(signals_mskd), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allnan = np.all(np.isnan(signals_mskd), axis=1)\n", + "removed_starts = np.nonzero(np.diff(allnan.astype(np.float)) > 0)[0] + 1\n", + "removed_ends = np.nonzero(np.diff(allnan.astype(np.float)) < 0)[0] + 1\n", + "\n", + "if removed_ends[0] < removed_starts[0]:\n", + " removed_starts = np.concatenate(([0], removed_starts))\n", + "\n", + "if removed_starts[-1] > removed_ends[-1]:\n", + " removed_ends = np.concatenate((removed_ends, [len(allnan)]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(removed_starts)\n", + "print(removed_ends)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(removed_starts, removed_ends):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw,\n", + " signals_raw[index_start:index_end, :].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(\n", + " np.concatenate(([0], removed_ends)),\n", + " np.concatenate((removed_starts, [signals_raw.shape[0]])),\n", + "):\n", + " if index_start == index_end:\n", + " continue\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw,\n", + " signals_raw[index_start:index_end, :].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Passive metadata labelling.ipynb b/notebooks/Passive metadata labelling.ipynb index 4b091313..7a8b8d9e 100644 --- a/notebooks/Passive metadata labelling.ipynb +++ b/notebooks/Passive metadata labelling.ipynb @@ -1,1079 +1,1079 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import datetime\n", + "import os\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.plotting\n", + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_color = \"c\"\n", + "bot_color = \"#00ee00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/data/dsforce/surveyExports\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.interpolate\n", + "import scipy.ndimage\n", + "\n", + "from echofilter.raw import loader, utils" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from echofilter.raw.manipulate import find_passive_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = loader.ROOT_DATA_DIR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def find_passive_data_v2(\n", + " signals,\n", + " n_depth_use=38,\n", + " threshold_inner=None,\n", + " threshold_init=None,\n", + " deviation=None,\n", + " sigma_depth=0,\n", + " sigma_time=1,\n", + "):\n", + " \"\"\"\n", + " Find segments of Sv recording which correspond to passive recording.\n", + "\n", + " Parameters\n", + " ----------\n", + " signals : array_like\n", + " Two-dimensional array of Sv values, shaped `[timestamps, depths]`.\n", + " n_depth_use : int, optional\n", + " How many Sv depths to use, starting with the first depths (closest\n", + " to the sounder device). If `None` all depths are used. Default is `38`.\n", + " The median is taken across the depths, after taking the temporal\n", + " derivative.\n", + " threshold_inner : float, optional\n", + " Theshold to apply to the temporal derivative of the signal when\n", + " detected fine-tuned start/end of passive regions.\n", + " Default behaviour is to use a threshold automatically determined using\n", + " `deviation` if it is set, and otherwise use a threshold of `35.0`.\n", + " threshold_init : float, optional\n", + " Theshold to apply during the initial scan of the start/end of passive\n", + " regions, which seeds the fine-tuning search.\n", + " Default behaviour is to use a threshold automatically determined using\n", + " `deviation` if it is set, and otherwise use a threshold of `12.0`.\n", + " deviation : float, optional\n", + " Set `threshold_inner` to be `deviation` times the standard deviation of\n", + " the temporal derivative of the signal. The standard deviation is\n", + " robustly estimated based on the interquartile range.\n", + " If this is set, `threshold_inner` must not be `None`.\n", + " Default is `None`\n", + " sigma_depth : float, optional\n", + " Width of kernel for filtering signals across second dimension (depth).\n", + " Default is `0` (no filter).\n", + " sigma_time : float, optional\n", + " Width of kernel for filtering signals across second dimension (time).\n", + " Default is `1`. Set to `0` to not filter.\n", + "\n", + " Returns\n", + " -------\n", + " passive_start : numpy.ndarray\n", + " Indices of rows of `signals` at which passive segments start.\n", + " passive_end : numpy.ndarray\n", + " Indices of rows of `signals` at which passive segments end.\n", + "\n", + " Notes\n", + " -----\n", + " Works by looking at the difference between consecutive recordings and\n", + " finding large deviations.\n", + " \"\"\"\n", + " # Ensure signals is numpy array\n", + " signals = np.asarray(signals)\n", + "\n", + " if n_depth_use is None:\n", + " n_depth_use = signals.shape[1]\n", + "\n", + " if sigma_depth > 0:\n", + " signals_smooth = scipy.ndimage.gaussian_filter1d(\n", + " signals.astype(np.float32), sigma_depth, axis=-1\n", + " )\n", + " else:\n", + " signals_smooth = signals\n", + "\n", + " md_inner = np.median(np.diff(signals_smooth[:, :n_depth_use], axis=0), axis=1)\n", + "\n", + " if sigma_time > 0:\n", + " signals_init = scipy.ndimage.gaussian_filter1d(\n", + " signals_smooth.astype(np.float32), sigma_time, axis=0\n", + " )\n", + " md_init = np.median(np.diff(signals_init[:, :n_depth_use], axis=0), axis=1)\n", + " else:\n", + " signals_init = signals\n", + " md_init = md_inner\n", + "\n", + " if threshold_inner is not None and deviation is not None:\n", + " raise ValueError(\"Only one of `threshold_inner` and `deviation` should be set.\")\n", + " if threshold_init is None:\n", + " if deviation is None:\n", + " threshold_init = 12.0\n", + " else:\n", + " threshold_inner = (\n", + " (np.percentile(md_init, 75) - np.percentile(md_init, 25))\n", + " / 1.35\n", + " * deviation\n", + " )\n", + " if threshold_inner is None:\n", + " if deviation is None:\n", + " threshold_inner = 35.0\n", + " else:\n", + " threshold_inner = (\n", + " (np.percentile(md_inner, 75) - np.percentile(md_inner, 25))\n", + " / 1.35\n", + " * deviation\n", + " )\n", + "\n", + " threshold_high_inner = threshold_inner\n", + " # threshold_low_inner = -threshold_inner\n", + " threshold_high_init = threshold_init\n", + " threshold_low_init = -threshold_init\n", + " indices_possible_start_init = np.nonzero(md_init < threshold_low_init)[0]\n", + " indices_possible_end_init = np.nonzero(md_init > threshold_high_init)[0]\n", + "\n", + " if len(indices_possible_start_init) == 0 and len(indices_possible_end_init) == 0:\n", + " return np.array([]), np.array([])\n", + "\n", + " # Fine tune indices without smoothing\n", + " indices_possible_start = []\n", + " indices_possible_end = []\n", + "\n", + " capture_start = None\n", + " for i, index_p in enumerate(indices_possible_start_init):\n", + " if capture_start is None:\n", + " capture_start = index_p\n", + " if (\n", + " i + 1 >= len(indices_possible_start_init)\n", + " or indices_possible_start_init[i + 1] > index_p + 3\n", + " ):\n", + " # break capture\n", + " capture_end = index_p\n", + " capture = np.arange(capture_start, capture_end + 1)\n", + " indices_possible_start.append(capture[np.argmin(md_init[capture])])\n", + " capture_start = None\n", + "\n", + " capture_start = None\n", + " for i, index_p in enumerate(indices_possible_end_init):\n", + " if capture_start is None:\n", + " capture_start = index_p\n", + " if (\n", + " i + 1 >= len(indices_possible_end_init)\n", + " or indices_possible_end_init[i + 1] > index_p + 3\n", + " ):\n", + " # break capture\n", + " capture_end = index_p\n", + " capture = np.arange(capture_start, capture_end + 1)\n", + " indices_possible_end.append(capture[np.argmax(md_init[capture])])\n", + " capture_start = None\n", + "\n", + " indices_possible_start = np.array(indices_possible_start)\n", + " indices_possible_end = np.array(indices_possible_end)\n", + "\n", + " current_index = 0\n", + " indices_passive_start = []\n", + " indices_passive_end = []\n", + "\n", + " if len(indices_possible_start) > 0:\n", + " indices_possible_start += 1\n", + "\n", + " if len(indices_possible_end) > 0:\n", + " indices_possible_end += 1\n", + "\n", + " if len(indices_possible_end) > 0 and (\n", + " len(indices_possible_start) == 0\n", + " or indices_possible_end[0] < indices_possible_start[0]\n", + " ):\n", + " indices_passive_start.append(0)\n", + " current_index = indices_possible_end[0]\n", + " indices_passive_end.append(current_index)\n", + " indices_possible_start = indices_possible_start[\n", + " indices_possible_start > current_index\n", + " ]\n", + " indices_possible_end = indices_possible_end[\n", + " indices_possible_end > current_index\n", + " ]\n", + "\n", + " while len(indices_possible_start) > 0:\n", + " current_index = indices_possible_start[0]\n", + " indices_passive_start.append(current_index)\n", + " baseline_index = max(0, current_index - 2)\n", + " baseline = signals[baseline_index, :n_depth_use]\n", + "\n", + " # Find first column which returns to the baseline value seen before passive region\n", + " offsets = np.nonzero(\n", + " np.median(baseline - signals[current_index:, :n_depth_use], axis=1)\n", + " < threshold_high_inner\n", + " )[0]\n", + " if len(offsets) == 0:\n", + " current_index = signals.shape[0]\n", + " else:\n", + " current_index += offsets[0]\n", + " indices_passive_end.append(current_index)\n", + "\n", + " # Remove preceding indices from the list of candidates\n", + " indices_possible_start = indices_possible_start[\n", + " indices_possible_start > current_index\n", + " ]\n", + " indices_possible_end = indices_possible_end[\n", + " indices_possible_end > current_index\n", + " ]\n", + "\n", + " # Check the start was sufficiently inclusive\n", + " if current_index < signals.shape[0]:\n", + " baseline_index = min(signals.shape[0] - 1, current_index + 1)\n", + " baseline = signals[baseline_index, :n_depth_use]\n", + " nonpassives = np.nonzero(\n", + " np.median(baseline - signals[:current_index, :n_depth_use], axis=1)\n", + " < threshold_high_inner\n", + " )[0]\n", + " if len(nonpassives) == 0:\n", + " indices_passive_start[-1] = 0\n", + " else:\n", + " indices_passive_start[-1] = min(\n", + " indices_passive_start[-1],\n", + " nonpassives[-1] + 1,\n", + " )\n", + "\n", + " # Combine with preceding passive segments if they overlap\n", + " while (\n", + " len(indices_passive_start) > 1\n", + " and indices_passive_start[-1] <= indices_passive_end[-2]\n", + " ):\n", + " indices_passive_start = indices_passive_start[:-1]\n", + " indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n", + "\n", + " return np.array(indices_passive_start), np.array(indices_passive_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample_paths = []\n", + "for dataset in [\"MinasPassage\", \"GrandPassage\", \"mobile\"]:\n", + " for partition in [\"train\", \"validate\", \"test\"]:\n", + " sample_paths += [\n", + " os.path.join(dataset, pth)\n", + " for pth in loader.get_partition_list(partition, dataset=dataset)\n", + " ]\n", + "sample_paths = sorted(sample_paths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample_paths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n", + "\n", + "for i_sample, sample_path in enumerate(sample_paths):\n", + "\n", + " print(\n", + " \"{:4d}/{:4d} {}\".format(\n", + " i_sample + 1,\n", + " len(sample_paths),\n", + " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", + " )\n", + " )\n", + " print(sample_path)\n", + "\n", + " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", + " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", + " fname_raw, warn_row_overflow=0\n", + " )\n", + " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", + "\n", + " nt = len(ts_raw)\n", + " print(\"length: {}\".format(nt))\n", + " its_raw = np.arange(len(ts_raw))\n", + "\n", + " if \"december2017\" in sample_path:\n", + " psx = np.array([])\n", + " pex = np.array([])\n", + " elif \"march2018\" in sample_path:\n", + " psx = np.arange(0, nt, 360)\n", + " pex = psx + 60\n", + " elif \"september2018\" in sample_path:\n", + " psx = np.arange(300, nt, 360)\n", + " pex = psx + 60\n", + " elif \"GrandPassage\" in sample_path:\n", + " psx = np.array([0, 3120, 6540, 9960, 13380])\n", + " psx = psx[psx < nt]\n", + " pex = np.r_[120, psx[1:] + 420]\n", + " pex = np.minimum(pex, nt)\n", + " else:\n", + " psx = None\n", + " pex = None\n", + "\n", + " def tidy_up_line(t, d):\n", + " if d is None:\n", + " return np.nan * np.ones_like(ts_raw)\n", + " is_usable = np.isfinite(d)\n", + " if np.sum(is_usable) > 0:\n", + " t = t[is_usable]\n", + " d = d[is_usable]\n", + " return np.interp(ts_raw, t, d)\n", + "\n", + " ps1, pe1 = find_passive_data(signals_raw)\n", + " ps2, pe2 = find_passive_data_v2(signals_raw)\n", + "\n", + " fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n", + "\n", + " if os.path.isfile(fname_surface):\n", + " t_surface, d_surface = loader.evl_loader(fname_surface)\n", + " elif is_upward_facing:\n", + " print(\n", + " \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n", + " fname_surface\n", + " )\n", + " )\n", + " t_surface = ts_raw\n", + " d_surface = np.zeros_like(ts_raw)\n", + " else:\n", + " # Default surface depth of 0m for downward facing data\n", + " t_surface = ts_raw\n", + " d_surface = np.zeros_like(ts_raw)\n", + "\n", + " # Find location of passive data.\n", + " # Try to determine passive data as whenever the surface line is undefined.\n", + " d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n", + " is_passive = np.isnan(d_surface)\n", + " ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n", + " ps3 = np.asarray(ps3)\n", + " pe3 = np.asarray(pe3) + 1\n", + " pl3 = pe3 - ps3\n", + " li = pl3 >= 3\n", + " ps3 = ps3[li]\n", + " pe3 = pe3[li]\n", + " if np.sum(~li) > 0:\n", + " print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n", + "\n", + " print(\"starts:\")\n", + " print(\"xp:\", psx)\n", + " print(\"v1:\", ps1)\n", + " print(\"v2:\", ps2)\n", + " print(\"v3:\", ps3)\n", + " print(\"ends:\")\n", + " print(\"xp:\", pex)\n", + " print(\"v1:\", pe1)\n", + " print(\"v2:\", pe2)\n", + " print(\"v3:\", pe3)\n", + " print(\"durations:\")\n", + " if pex is not None:\n", + " print(\"xp:\", pex - psx)\n", + " print(\"v1:\", pe1 - ps1)\n", + " print(\"v2:\", pe2 - ps2)\n", + " if ps3 is not None:\n", + " print(\"v3:\", pe3 - ps3)\n", + " print(\"\")\n", + "\n", + " if (\n", + " len(ps1) != len(ps2)\n", + " or len(pe1) != len(pe2)\n", + " or not np.allclose(ps1, ps2)\n", + " or not np.allclose(pe1, pe2)\n", + " ):\n", + " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n", + " if (\n", + " ps3 is not None\n", + " and pe3 is not None\n", + " and (\n", + " len(ps3) != len(ps2)\n", + " or len(pe3) != len(pe2)\n", + " or not np.allclose(ps3, ps2)\n", + " or not np.allclose(pe3, pe2)\n", + " )\n", + " ):\n", + " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n", + " if (\n", + " psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps1)\n", + " or len(pex) != len(pe1)\n", + " or not np.allclose(psx, ps1)\n", + " or not np.allclose(pex, pe1)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n", + " if (\n", + " psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps2)\n", + " or len(pex) != len(pe2)\n", + " or not np.allclose(psx, ps2)\n", + " or not np.allclose(pex, pe2)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n", + " if (\n", + " ps3 is not None\n", + " and pe3 is not None\n", + " and psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps3)\n", + " or len(pex) != len(pe3)\n", + " or not np.allclose(psx, ps3)\n", + " or not np.allclose(pex, pe3)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n", + "\n", + " print(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample_paths = []\n", + "dataset = \"mobile\"\n", + "sample_paths = [\n", + " os.path.join(dataset, pth)\n", + " for pth in loader.get_partition_list(\"leaveout\", dataset=dataset)\n", + "]\n", + "sample_paths = sorted(sample_paths)\n", + "\n", + "\n", + "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n", + "\n", + "for i_sample, sample_path in enumerate(sample_paths):\n", + "\n", + " print(\n", + " \"{:4d}/{:4d} {}\".format(\n", + " i_sample + 1,\n", + " len(sample_paths),\n", + " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", + " )\n", + " )\n", + " print(sample_path)\n", + "\n", + " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", + " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", + " fname_raw, warn_row_overflow=0\n", + " )\n", + " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", + "\n", + " nt = len(ts_raw)\n", + " print(\"length: {}\".format(nt))\n", + " its_raw = np.arange(len(ts_raw))\n", + "\n", + " if \"december2017\" in sample_path:\n", + " psx = np.array([])\n", + " pex = np.array([])\n", + " elif \"march2018\" in sample_path:\n", + " psx = np.arange(0, nt, 360)\n", + " pex = psx + 60\n", + " elif \"september2018\" in sample_path:\n", + " psx = np.arange(300, nt, 360)\n", + " pex = psx + 60\n", + " elif \"GrandPassage\" in sample_path:\n", + " psx = np.array([0, 3120, 6540, 9960, 13380])\n", + " psx = psx[psx < nt]\n", + " pex = np.r_[120, psx[1:] + 420]\n", + " pex = np.minimum(pex, nt)\n", + " else:\n", + " psx = None\n", + " pex = None\n", + "\n", + " ps1, pe1 = find_passive_data(signals_raw)\n", + " ps2, pe2 = find_passive_data_v2(signals_raw)\n", + "\n", + " fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n", + "\n", + " if os.path.isfile(fname_surface):\n", + " t_surface, d_surface = loader.evl_loader(fname_surface)\n", + " elif is_upward_facing:\n", + " print(\n", + " \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n", + " fname_surface\n", + " )\n", + " )\n", + " t_surface = ts_raw\n", + " d_surface = np.zeros_like(ts_raw)\n", + " else:\n", + " # Default surface depth of 0m for downward facing data\n", + " t_surface = ts_raw\n", + " d_surface = np.zeros_like(ts_raw)\n", + "\n", + " # Find location of passive data.\n", + " # Try to determine passive data as whenever the surface line is undefined.\n", + " d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n", + " is_passive = np.isnan(d_surface)\n", + " ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n", + " ps3 = np.asarray(ps3)\n", + " pe3 = np.asarray(pe3) + 1\n", + " pl3 = pe3 - ps3\n", + " li = pl3 >= 3\n", + " ps3 = ps3[li]\n", + " pe3 = pe3[li]\n", + " if np.sum(~li) > 0:\n", + " print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n", + "\n", + " print(\"starts:\")\n", + " print(\"xp:\", psx)\n", + " print(\"v1:\", ps1)\n", + " print(\"v2:\", ps2)\n", + " print(\"v3:\", ps3)\n", + " print(\"ends:\")\n", + " print(\"xp:\", pex)\n", + " print(\"v1:\", pe1)\n", + " print(\"v2:\", pe2)\n", + " print(\"v3:\", pe3)\n", + " print(\"durations:\")\n", + " if pex is not None:\n", + " print(\"xp:\", pex - psx)\n", + " print(\"v1:\", pe1 - ps1)\n", + " print(\"v2:\", pe2 - ps2)\n", + " if ps3 is not None:\n", + " print(\"v3:\", pe3 - ps3)\n", + " print(\"\")\n", + "\n", + " if (\n", + " len(ps1) != len(ps2)\n", + " or len(pe1) != len(pe2)\n", + " or not np.allclose(ps1, ps2)\n", + " or not np.allclose(pe1, pe2)\n", + " ):\n", + " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n", + " if (\n", + " ps3 is not None\n", + " and pe3 is not None\n", + " and (\n", + " len(ps3) != len(ps2)\n", + " or len(pe3) != len(pe2)\n", + " or not np.allclose(ps3, ps2)\n", + " or not np.allclose(pe3, pe2)\n", + " )\n", + " ):\n", + " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n", + " if (\n", + " psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps1)\n", + " or len(pex) != len(pe1)\n", + " or not np.allclose(psx, ps1)\n", + " or not np.allclose(pex, pe1)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n", + " if (\n", + " psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps2)\n", + " or len(pex) != len(pe2)\n", + " or not np.allclose(psx, ps2)\n", + " or not np.allclose(pex, pe2)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n", + " if (\n", + " ps3 is not None\n", + " and pe3 is not None\n", + " and psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps3)\n", + " or len(pex) != len(pe3)\n", + " or not np.allclose(psx, ps3)\n", + " or not np.allclose(pex, pe3)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n", + "\n", + " print(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bad_sample_paths = [\n", + " \"MinasPassage/september2018/september2018_D20181021-T165220_D20181021-T222221\",\n", + " \"MinasPassage/september2018/september2018_D20181022-T105220_D20181022-T162217\",\n", + " \"MinasPassage/september2018/september2018_D20181022-T172213_D20181022-T232217\",\n", + " \"MinasPassage/september2018/september2018_D20181026-T082220_D20181026-T135213\",\n", + " \"MinasPassage/september2018/september2018_D20181026-T142217_D20181026-T195218\",\n", + "]\n", + "# bad_sample_paths = [\n", + "# \"MinasPassage/september2018/september2018_D20180928-T202217_D20180929-T015217\",\n", + "# \"MinasPassage/september2018/september2018_D20181008-T235218_D20181009-T052220\",\n", + "# \"MinasPassage/september2018/september2018_D20181021-T045220_D20181021-T102218\",\n", + "# ]\n", + "# bad_sample_paths = [\n", + "# \"GrandPassage/phase2/GrandPassage_WBAT_2B_20200130_UTC020017_floodhigh\",\n", + "# ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n", + "\n", + "for i_sample, sample_path in enumerate(bad_sample_paths):\n", + "\n", + " print(\n", + " \"{:4d}/{:4d} {}\".format(\n", + " i_sample + 1,\n", + " len(bad_sample_paths),\n", + " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", + " )\n", + " )\n", + " print(sample_path)\n", + "\n", + " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", + " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", + " fname_raw, warn_row_overflow=0\n", + " )\n", + " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", + "\n", + " nt = len(ts_raw)\n", + " print(\"length: {}\".format(nt))\n", + " its_raw = np.arange(len(ts_raw))\n", + "\n", + " if \"december2017\" in sample_path:\n", + " psx = np.array([])\n", + " pex = np.array([])\n", + " elif \"march2018\" in sample_path:\n", + " psx = np.arange(0, nt, 360)\n", + " pex = psx + 60\n", + " elif \"september2018\" in sample_path:\n", + " psx = np.arange(300, nt, 360)\n", + " pex = psx + 60\n", + " elif \"GrandPassage\" in sample_path:\n", + " psx = np.array([0, 3120, 6540, 9960, 13380])\n", + " psx = psx[psx < nt]\n", + " pex = np.r_[120, psx[1:] + 420]\n", + " pex = np.minimum(pex, nt)\n", + " else:\n", + " psx = None\n", + " pex = None\n", + "\n", + " ps1, pe1 = find_passive_data(signals_raw)\n", + " ps2, pe2 = find_passive_data_v2(signals_raw)\n", + "\n", + " fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n", + "\n", + " if os.path.isfile(fname_surface):\n", + " t_surface, d_surface = loader.evl_loader(fname_surface)\n", + " elif is_upward_facing:\n", + " print(\n", + " \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n", + " fname_surface\n", + " )\n", + " )\n", + " t_surface = ts_raw\n", + " d_surface = np.zeros_like(ts_raw)\n", + " else:\n", + " # Default surface depth of 0m for downward facing data\n", + " t_surface = ts_raw\n", + " d_surface = np.zeros_like(ts_raw)\n", + "\n", + " # Find location of passive data.\n", + " # Try to determine passive data as whenever the surface line is undefined.\n", + " d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n", + " is_passive = np.isnan(d_surface)\n", + " ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n", + " ps3 = np.asarray(ps3)\n", + " pe3 = np.asarray(pe3) + 1\n", + " pl3 = pe3 - ps3\n", + " li = pl3 >= 3\n", + " ps3 = ps3[li]\n", + " pe3 = pe3[li]\n", + " if np.sum(~li) > 0:\n", + " print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n", + "\n", + " print(\"starts:\")\n", + " print(\"xp:\", psx)\n", + " print(\"v1:\", ps1)\n", + " print(\"v2:\", ps2)\n", + " print(\"v3:\", ps3)\n", + " print(\"ends:\")\n", + " print(\"xp:\", pex)\n", + " print(\"v1:\", pe1)\n", + " print(\"v2:\", pe2)\n", + " print(\"v3:\", pe3)\n", + " print(\"durations:\")\n", + " if pex is not None:\n", + " print(\"xp:\", pex - psx)\n", + " print(\"v1:\", pe1 - ps1)\n", + " print(\"v2:\", pe2 - ps2)\n", + " if ps3 is not None:\n", + " print(\"v3:\", pe3 - ps3)\n", + " print(\"\")\n", + "\n", + " if (\n", + " len(ps1) != len(ps2)\n", + " or len(pe1) != len(pe2)\n", + " or not np.allclose(ps1, ps2)\n", + " or not np.allclose(pe1, pe2)\n", + " ):\n", + " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n", + " if (\n", + " ps3 is not None\n", + " and pe3 is not None\n", + " and (\n", + " len(ps3) != len(ps2)\n", + " or len(pe3) != len(pe2)\n", + " or not np.allclose(ps3, ps2)\n", + " or not np.allclose(pe3, pe2)\n", + " )\n", + " ):\n", + " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n", + " if (\n", + " psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps1)\n", + " or len(pex) != len(pe1)\n", + " or not np.allclose(psx, ps1)\n", + " or not np.allclose(pex, pe1)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n", + " if (\n", + " psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps2)\n", + " or len(pex) != len(pe2)\n", + " or not np.allclose(psx, ps2)\n", + " or not np.allclose(pex, pe2)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n", + " if (\n", + " ps3 is not None\n", + " and pe3 is not None\n", + " and psx is not None\n", + " and pex is not None\n", + " and (\n", + " len(psx) != len(ps3)\n", + " or len(pex) != len(pe3)\n", + " or not np.allclose(psx, ps3)\n", + " or not np.allclose(pex, pe3)\n", + " )\n", + " ):\n", + " print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n", + "\n", + " best_ps = ps1\n", + " best_pe = pe1\n", + "\n", + " for i in range(min(len(best_ps), len(psx))):\n", + " if best_ps[i] == psx[i] and best_pe[i] == pex[i]:\n", + " continue\n", + "\n", + " for ps, pe, tit in (\n", + " (psx[i], pex[i], \"expected\"),\n", + " (best_ps[i], best_pe[i], \"v1\"),\n", + " ):\n", + " plt.figure(figsize=(12, 9))\n", + " i0 = max(0, ps - 1)\n", + " i1 = pe + 2\n", + " if i1 >= len(its_raw):\n", + " i1 = None\n", + " plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n", + " plt.gca().invert_yaxis()\n", + " plt.title(\"passive #{}, {}\".format(i, tit))\n", + " plt.show()\n", + "\n", + " print(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 9))\n", + "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(sample_path)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = \"\"\"\n", + "length: 10259\n", + "starts:\n", + "xp: [ 0 3120 6540 9960]\n", + "v1: [ 0 3120 6539 9959]\n", + "v2: [ 0 3120 6539 9959]\n", + "v3: [ 0 3120 6540 9960]\n", + "ends:\n", + "xp: [ 120 3540 6960 10259]\n", + "v1: [ 120 3540 6959 10259]\n", + "v2: [ 120 3540 6701 10259]\n", + "v3: [ 120 3540 6960 10260]\n", + "durations:\n", + "xp: [120 420 420 299]\n", + "v1: [120 420 420 300]\n", + "v2: [120 420 162 300]\n", + "v3: [120 420 420 300]\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "i0 = 10200\n", + "i1 = 10259\n", + "i0 -= 1\n", + "i1 += 2\n", + "plt.figure(figsize=(12, 9))\n", + "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(\"{}-{}\".format(i0, i1 - 1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample_path = (\n", + " \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\"\n", + ")\n", + "\n", + "fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", + " fname_raw, warn_row_overflow=0\n", + ")\n", + "is_upward_facing = depths_raw[-1] < depths_raw[0]\n", + "\n", + "nt = len(ts_raw)\n", + "print(\"length: {}\".format(nt))\n", + "its_raw = np.arange(len(ts_raw))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 9))\n", + "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(sample_path)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "i0 = 250\n", + "i1 = 350\n", + "i0 -= 1\n", + "i1 += 2\n", + "plt.figure(figsize=(12, 9))\n", + "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(\"{}-{}\".format(i0, i1 - 1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bad_sample_paths = [\n", + " \"mobile/Survey01/Survey01_GR1_S1A_E\",\n", + " \"mobile/Survey03/Survey03_GR2_N5W_survey3\",\n", + " \"mobile/Survey03/Survey03_GR4_N0A_survey3\",\n", + " \"mobile/Survey04/Survey04_GR1_N3A\",\n", + " \"mobile/Survey04/Survey04_GR2_N5A\",\n", + " \"mobile/Survey05/Survey05_GR1_N1A_survey5\",\n", + " \"mobile/Survey07/Survey07_GR2_N1W_survey7\",\n", + " \"mobile/Survey10/Survey10_GR1_N0A_E\",\n", + " \"mobile/Survey12/Survey12_GR4_N5A_E\",\n", + " \"mobile/Survey01/Survey01_GR1_S2A_E\",\n", + " \"mobile/Survey01/Survey01_GR1_S2W_E\",\n", + " \"mobile/Survey11/Survey11_GR1_S2A_E\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n", + "\n", + "for i_sample, sample_path in enumerate(bad_sample_paths):\n", + "\n", + " print(\n", + " \"{:4d}/{:4d} {}\".format(\n", + " i_sample + 1,\n", + " len(bad_sample_paths),\n", + " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", + " )\n", + " )\n", + " print(sample_path)\n", + "\n", + " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", + " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", + " fname_raw, warn_row_overflow=0\n", + " )\n", + " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", + "\n", + " nt = len(ts_raw)\n", + " print(\"length: {}\".format(nt))\n", + " its_raw = np.arange(len(ts_raw))\n", + "\n", + " plt.figure(figsize=(12, 9))\n", + " plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n", + " plt.gca().invert_yaxis()\n", + " plt.title(sample_path)\n", + " plt.show()\n", + "\n", + " print(\"\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import datetime\n", - "import os\n", - "import warnings" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.plotting\n", - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_color = \"c\"\n", - "bot_color = \"#00ee00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/data/dsforce/surveyExports\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import scipy.interpolate\n", - "import scipy.ndimage\n", - "\n", - "from echofilter.raw import loader, utils" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from echofilter.raw.manipulate import find_passive_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = loader.ROOT_DATA_DIR" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def find_passive_data_v2(\n", - " signals,\n", - " n_depth_use=38,\n", - " threshold_inner=None,\n", - " threshold_init=None,\n", - " deviation=None,\n", - " sigma_depth=0,\n", - " sigma_time=1,\n", - "):\n", - " \"\"\"\n", - " Find segments of Sv recording which correspond to passive recording.\n", - "\n", - " Parameters\n", - " ----------\n", - " signals : array_like\n", - " Two-dimensional array of Sv values, shaped `[timestamps, depths]`.\n", - " n_depth_use : int, optional\n", - " How many Sv depths to use, starting with the first depths (closest\n", - " to the sounder device). If `None` all depths are used. Default is `38`.\n", - " The median is taken across the depths, after taking the temporal\n", - " derivative.\n", - " threshold_inner : float, optional\n", - " Theshold to apply to the temporal derivative of the signal when\n", - " detected fine-tuned start/end of passive regions.\n", - " Default behaviour is to use a threshold automatically determined using\n", - " `deviation` if it is set, and otherwise use a threshold of `35.0`.\n", - " threshold_init : float, optional\n", - " Theshold to apply during the initial scan of the start/end of passive\n", - " regions, which seeds the fine-tuning search.\n", - " Default behaviour is to use a threshold automatically determined using\n", - " `deviation` if it is set, and otherwise use a threshold of `12.0`.\n", - " deviation : float, optional\n", - " Set `threshold_inner` to be `deviation` times the standard deviation of\n", - " the temporal derivative of the signal. The standard deviation is\n", - " robustly estimated based on the interquartile range.\n", - " If this is set, `threshold_inner` must not be `None`.\n", - " Default is `None`\n", - " sigma_depth : float, optional\n", - " Width of kernel for filtering signals across second dimension (depth).\n", - " Default is `0` (no filter).\n", - " sigma_time : float, optional\n", - " Width of kernel for filtering signals across second dimension (time).\n", - " Default is `1`. Set to `0` to not filter.\n", - "\n", - " Returns\n", - " -------\n", - " passive_start : numpy.ndarray\n", - " Indices of rows of `signals` at which passive segments start.\n", - " passive_end : numpy.ndarray\n", - " Indices of rows of `signals` at which passive segments end.\n", - "\n", - " Notes\n", - " -----\n", - " Works by looking at the difference between consecutive recordings and\n", - " finding large deviations.\n", - " \"\"\"\n", - " # Ensure signals is numpy array\n", - " signals = np.asarray(signals)\n", - "\n", - " if n_depth_use is None:\n", - " n_depth_use = signals.shape[1]\n", - "\n", - " if sigma_depth > 0:\n", - " signals_smooth = scipy.ndimage.gaussian_filter1d(\n", - " signals.astype(np.float32), sigma_depth, axis=-1\n", - " )\n", - " else:\n", - " signals_smooth = signals\n", - "\n", - " md_inner = np.median(np.diff(signals_smooth[:, :n_depth_use], axis=0), axis=1)\n", - "\n", - " if sigma_time > 0:\n", - " signals_init = scipy.ndimage.gaussian_filter1d(\n", - " signals_smooth.astype(np.float32), sigma_time, axis=0\n", - " )\n", - " md_init = np.median(np.diff(signals_init[:, :n_depth_use], axis=0), axis=1)\n", - " else:\n", - " signals_init = signals\n", - " md_init = md_inner\n", - "\n", - " if threshold_inner is not None and deviation is not None:\n", - " raise ValueError(\"Only one of `threshold_inner` and `deviation` should be set.\")\n", - " if threshold_init is None:\n", - " if deviation is None:\n", - " threshold_init = 12.0\n", - " else:\n", - " threshold_inner = (\n", - " (np.percentile(md_init, 75) - np.percentile(md_init, 25))\n", - " / 1.35\n", - " * deviation\n", - " )\n", - " if threshold_inner is None:\n", - " if deviation is None:\n", - " threshold_inner = 35.0\n", - " else:\n", - " threshold_inner = (\n", - " (np.percentile(md_inner, 75) - np.percentile(md_inner, 25))\n", - " / 1.35\n", - " * deviation\n", - " )\n", - "\n", - " threshold_high_inner = threshold_inner\n", - " # threshold_low_inner = -threshold_inner\n", - " threshold_high_init = threshold_init\n", - " threshold_low_init = -threshold_init\n", - " indices_possible_start_init = np.nonzero(md_init < threshold_low_init)[0]\n", - " indices_possible_end_init = np.nonzero(md_init > threshold_high_init)[0]\n", - "\n", - " if len(indices_possible_start_init) == 0 and len(indices_possible_end_init) == 0:\n", - " return np.array([]), np.array([])\n", - "\n", - " # Fine tune indices without smoothing\n", - " indices_possible_start = []\n", - " indices_possible_end = []\n", - "\n", - " capture_start = None\n", - " for i, index_p in enumerate(indices_possible_start_init):\n", - " if capture_start is None:\n", - " capture_start = index_p\n", - " if (\n", - " i + 1 >= len(indices_possible_start_init)\n", - " or indices_possible_start_init[i + 1] > index_p + 3\n", - " ):\n", - " # break capture\n", - " capture_end = index_p\n", - " capture = np.arange(capture_start, capture_end + 1)\n", - " indices_possible_start.append(capture[np.argmin(md_init[capture])])\n", - " capture_start = None\n", - "\n", - " capture_start = None\n", - " for i, index_p in enumerate(indices_possible_end_init):\n", - " if capture_start is None:\n", - " capture_start = index_p\n", - " if (\n", - " i + 1 >= len(indices_possible_end_init)\n", - " or indices_possible_end_init[i + 1] > index_p + 3\n", - " ):\n", - " # break capture\n", - " capture_end = index_p\n", - " capture = np.arange(capture_start, capture_end + 1)\n", - " indices_possible_end.append(capture[np.argmax(md_init[capture])])\n", - " capture_start = None\n", - "\n", - " indices_possible_start = np.array(indices_possible_start)\n", - " indices_possible_end = np.array(indices_possible_end)\n", - "\n", - " current_index = 0\n", - " indices_passive_start = []\n", - " indices_passive_end = []\n", - "\n", - " if len(indices_possible_start) > 0:\n", - " indices_possible_start += 1\n", - "\n", - " if len(indices_possible_end) > 0:\n", - " indices_possible_end += 1\n", - "\n", - " if len(indices_possible_end) > 0 and (\n", - " len(indices_possible_start) == 0\n", - " or indices_possible_end[0] < indices_possible_start[0]\n", - " ):\n", - " indices_passive_start.append(0)\n", - " current_index = indices_possible_end[0]\n", - " indices_passive_end.append(current_index)\n", - " indices_possible_start = indices_possible_start[\n", - " indices_possible_start > current_index\n", - " ]\n", - " indices_possible_end = indices_possible_end[\n", - " indices_possible_end > current_index\n", - " ]\n", - "\n", - " while len(indices_possible_start) > 0:\n", - " current_index = indices_possible_start[0]\n", - " indices_passive_start.append(current_index)\n", - " baseline_index = max(0, current_index - 2)\n", - " baseline = signals[baseline_index, :n_depth_use]\n", - "\n", - " # Find first column which returns to the baseline value seen before passive region\n", - " offsets = np.nonzero(\n", - " np.median(baseline - signals[current_index:, :n_depth_use], axis=1)\n", - " < threshold_high_inner\n", - " )[0]\n", - " if len(offsets) == 0:\n", - " current_index = signals.shape[0]\n", - " else:\n", - " current_index += offsets[0]\n", - " indices_passive_end.append(current_index)\n", - "\n", - " # Remove preceding indices from the list of candidates\n", - " indices_possible_start = indices_possible_start[\n", - " indices_possible_start > current_index\n", - " ]\n", - " indices_possible_end = indices_possible_end[\n", - " indices_possible_end > current_index\n", - " ]\n", - "\n", - " # Check the start was sufficiently inclusive\n", - " if current_index < signals.shape[0]:\n", - " baseline_index = min(signals.shape[0] - 1, current_index + 1)\n", - " baseline = signals[baseline_index, :n_depth_use]\n", - " nonpassives = np.nonzero(\n", - " np.median(baseline - signals[:current_index, :n_depth_use], axis=1)\n", - " < threshold_high_inner\n", - " )[0]\n", - " if len(nonpassives) == 0:\n", - " indices_passive_start[-1] = 0\n", - " else:\n", - " indices_passive_start[-1] = min(\n", - " indices_passive_start[-1],\n", - " nonpassives[-1] + 1,\n", - " )\n", - "\n", - " # Combine with preceding passive segments if they overlap\n", - " while (\n", - " len(indices_passive_start) > 1\n", - " and indices_passive_start[-1] <= indices_passive_end[-2]\n", - " ):\n", - " indices_passive_start = indices_passive_start[:-1]\n", - " indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n", - "\n", - " return np.array(indices_passive_start), np.array(indices_passive_end)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample_paths = []\n", - "for dataset in [\"MinasPassage\", \"GrandPassage\", \"mobile\"]:\n", - " for partition in [\"train\", \"validate\", \"test\"]:\n", - " sample_paths += [\n", - " os.path.join(dataset, pth)\n", - " for pth in loader.get_partition_list(partition, dataset=dataset)\n", - " ]\n", - "sample_paths = sorted(sample_paths)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample_paths" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n", - "\n", - "for i_sample, sample_path in enumerate(sample_paths):\n", - "\n", - " print(\n", - " \"{:4d}/{:4d} {}\".format(\n", - " i_sample + 1,\n", - " len(sample_paths),\n", - " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", - " )\n", - " )\n", - " print(sample_path)\n", - "\n", - " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", - " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", - " fname_raw, warn_row_overflow=0\n", - " )\n", - " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", - "\n", - " nt = len(ts_raw)\n", - " print(\"length: {}\".format(nt))\n", - " its_raw = np.arange(len(ts_raw))\n", - "\n", - " if \"december2017\" in sample_path:\n", - " psx = np.array([])\n", - " pex = np.array([])\n", - " elif \"march2018\" in sample_path:\n", - " psx = np.arange(0, nt, 360)\n", - " pex = psx + 60\n", - " elif \"september2018\" in sample_path:\n", - " psx = np.arange(300, nt, 360)\n", - " pex = psx + 60\n", - " elif \"GrandPassage\" in sample_path:\n", - " psx = np.array([0, 3120, 6540, 9960, 13380])\n", - " psx = psx[psx < nt]\n", - " pex = np.r_[120, psx[1:] + 420]\n", - " pex = np.minimum(pex, nt)\n", - " else:\n", - " psx = None\n", - " pex = None\n", - "\n", - " def tidy_up_line(t, d):\n", - " if d is None:\n", - " return np.nan * np.ones_like(ts_raw)\n", - " is_usable = np.isfinite(d)\n", - " if np.sum(is_usable) > 0:\n", - " t = t[is_usable]\n", - " d = d[is_usable]\n", - " return np.interp(ts_raw, t, d)\n", - "\n", - " ps1, pe1 = find_passive_data(signals_raw)\n", - " ps2, pe2 = find_passive_data_v2(signals_raw)\n", - "\n", - " fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n", - "\n", - " if os.path.isfile(fname_surface):\n", - " t_surface, d_surface = loader.evl_loader(fname_surface)\n", - " elif is_upward_facing:\n", - " print(\n", - " \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n", - " fname_surface\n", - " )\n", - " )\n", - " t_surface = ts_raw\n", - " d_surface = np.zeros_like(ts_raw)\n", - " else:\n", - " # Default surface depth of 0m for downward facing data\n", - " t_surface = ts_raw\n", - " d_surface = np.zeros_like(ts_raw)\n", - "\n", - " # Find location of passive data.\n", - " # Try to determine passive data as whenever the surface line is undefined.\n", - " d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n", - " is_passive = np.isnan(d_surface)\n", - " ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n", - " ps3 = np.asarray(ps3)\n", - " pe3 = np.asarray(pe3) + 1\n", - " pl3 = pe3 - ps3\n", - " li = pl3 >= 3\n", - " ps3 = ps3[li]\n", - " pe3 = pe3[li]\n", - " if np.sum(~li) > 0:\n", - " print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n", - "\n", - " print(\"starts:\")\n", - " print(\"xp:\", psx)\n", - " print(\"v1:\", ps1)\n", - " print(\"v2:\", ps2)\n", - " print(\"v3:\", ps3)\n", - " print(\"ends:\")\n", - " print(\"xp:\", pex)\n", - " print(\"v1:\", pe1)\n", - " print(\"v2:\", pe2)\n", - " print(\"v3:\", pe3)\n", - " print(\"durations:\")\n", - " if pex is not None:\n", - " print(\"xp:\", pex - psx)\n", - " print(\"v1:\", pe1 - ps1)\n", - " print(\"v2:\", pe2 - ps2)\n", - " if ps3 is not None:\n", - " print(\"v3:\", pe3 - ps3)\n", - " print(\"\")\n", - "\n", - " if (\n", - " len(ps1) != len(ps2)\n", - " or len(pe1) != len(pe2)\n", - " or not np.allclose(ps1, ps2)\n", - " or not np.allclose(pe1, pe2)\n", - " ):\n", - " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n", - " if (\n", - " ps3 is not None\n", - " and pe3 is not None\n", - " and (\n", - " len(ps3) != len(ps2)\n", - " or len(pe3) != len(pe2)\n", - " or not np.allclose(ps3, ps2)\n", - " or not np.allclose(pe3, pe2)\n", - " )\n", - " ):\n", - " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n", - " if (\n", - " psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps1)\n", - " or len(pex) != len(pe1)\n", - " or not np.allclose(psx, ps1)\n", - " or not np.allclose(pex, pe1)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n", - " if (\n", - " psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps2)\n", - " or len(pex) != len(pe2)\n", - " or not np.allclose(psx, ps2)\n", - " or not np.allclose(pex, pe2)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n", - " if (\n", - " ps3 is not None\n", - " and pe3 is not None\n", - " and psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps3)\n", - " or len(pex) != len(pe3)\n", - " or not np.allclose(psx, ps3)\n", - " or not np.allclose(pex, pe3)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n", - "\n", - " print(\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample_paths = []\n", - "dataset = \"mobile\"\n", - "sample_paths = [\n", - " os.path.join(dataset, pth)\n", - " for pth in loader.get_partition_list(\"leaveout\", dataset=dataset)\n", - "]\n", - "sample_paths = sorted(sample_paths)\n", - "\n", - "\n", - "print(\"Begin running on {} paths\\n\".format(len(sample_paths)))\n", - "\n", - "for i_sample, sample_path in enumerate(sample_paths):\n", - "\n", - " print(\n", - " \"{:4d}/{:4d} {}\".format(\n", - " i_sample + 1,\n", - " len(sample_paths),\n", - " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", - " )\n", - " )\n", - " print(sample_path)\n", - "\n", - " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", - " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", - " fname_raw, warn_row_overflow=0\n", - " )\n", - " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", - "\n", - " nt = len(ts_raw)\n", - " print(\"length: {}\".format(nt))\n", - " its_raw = np.arange(len(ts_raw))\n", - "\n", - " if \"december2017\" in sample_path:\n", - " psx = np.array([])\n", - " pex = np.array([])\n", - " elif \"march2018\" in sample_path:\n", - " psx = np.arange(0, nt, 360)\n", - " pex = psx + 60\n", - " elif \"september2018\" in sample_path:\n", - " psx = np.arange(300, nt, 360)\n", - " pex = psx + 60\n", - " elif \"GrandPassage\" in sample_path:\n", - " psx = np.array([0, 3120, 6540, 9960, 13380])\n", - " psx = psx[psx < nt]\n", - " pex = np.r_[120, psx[1:] + 420]\n", - " pex = np.minimum(pex, nt)\n", - " else:\n", - " psx = None\n", - " pex = None\n", - "\n", - " ps1, pe1 = find_passive_data(signals_raw)\n", - " ps2, pe2 = find_passive_data_v2(signals_raw)\n", - "\n", - " fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n", - "\n", - " if os.path.isfile(fname_surface):\n", - " t_surface, d_surface = loader.evl_loader(fname_surface)\n", - " elif is_upward_facing:\n", - " print(\n", - " \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n", - " fname_surface\n", - " )\n", - " )\n", - " t_surface = ts_raw\n", - " d_surface = np.zeros_like(ts_raw)\n", - " else:\n", - " # Default surface depth of 0m for downward facing data\n", - " t_surface = ts_raw\n", - " d_surface = np.zeros_like(ts_raw)\n", - "\n", - " # Find location of passive data.\n", - " # Try to determine passive data as whenever the surface line is undefined.\n", - " d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n", - " is_passive = np.isnan(d_surface)\n", - " ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n", - " ps3 = np.asarray(ps3)\n", - " pe3 = np.asarray(pe3) + 1\n", - " pl3 = pe3 - ps3\n", - " li = pl3 >= 3\n", - " ps3 = ps3[li]\n", - " pe3 = pe3[li]\n", - " if np.sum(~li) > 0:\n", - " print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n", - "\n", - " print(\"starts:\")\n", - " print(\"xp:\", psx)\n", - " print(\"v1:\", ps1)\n", - " print(\"v2:\", ps2)\n", - " print(\"v3:\", ps3)\n", - " print(\"ends:\")\n", - " print(\"xp:\", pex)\n", - " print(\"v1:\", pe1)\n", - " print(\"v2:\", pe2)\n", - " print(\"v3:\", pe3)\n", - " print(\"durations:\")\n", - " if pex is not None:\n", - " print(\"xp:\", pex - psx)\n", - " print(\"v1:\", pe1 - ps1)\n", - " print(\"v2:\", pe2 - ps2)\n", - " if ps3 is not None:\n", - " print(\"v3:\", pe3 - ps3)\n", - " print(\"\")\n", - "\n", - " if (\n", - " len(ps1) != len(ps2)\n", - " or len(pe1) != len(pe2)\n", - " or not np.allclose(ps1, ps2)\n", - " or not np.allclose(pe1, pe2)\n", - " ):\n", - " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n", - " if (\n", - " ps3 is not None\n", - " and pe3 is not None\n", - " and (\n", - " len(ps3) != len(ps2)\n", - " or len(pe3) != len(pe2)\n", - " or not np.allclose(ps3, ps2)\n", - " or not np.allclose(pe3, pe2)\n", - " )\n", - " ):\n", - " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n", - " if (\n", - " psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps1)\n", - " or len(pex) != len(pe1)\n", - " or not np.allclose(psx, ps1)\n", - " or not np.allclose(pex, pe1)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n", - " if (\n", - " psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps2)\n", - " or len(pex) != len(pe2)\n", - " or not np.allclose(psx, ps2)\n", - " or not np.allclose(pex, pe2)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n", - " if (\n", - " ps3 is not None\n", - " and pe3 is not None\n", - " and psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps3)\n", - " or len(pex) != len(pe3)\n", - " or not np.allclose(psx, ps3)\n", - " or not np.allclose(pex, pe3)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n", - "\n", - " print(\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bad_sample_paths = [\n", - " \"MinasPassage/september2018/september2018_D20181021-T165220_D20181021-T222221\",\n", - " \"MinasPassage/september2018/september2018_D20181022-T105220_D20181022-T162217\",\n", - " \"MinasPassage/september2018/september2018_D20181022-T172213_D20181022-T232217\",\n", - " \"MinasPassage/september2018/september2018_D20181026-T082220_D20181026-T135213\",\n", - " \"MinasPassage/september2018/september2018_D20181026-T142217_D20181026-T195218\",\n", - "]\n", - "# bad_sample_paths = [\n", - "# \"MinasPassage/september2018/september2018_D20180928-T202217_D20180929-T015217\",\n", - "# \"MinasPassage/september2018/september2018_D20181008-T235218_D20181009-T052220\",\n", - "# \"MinasPassage/september2018/september2018_D20181021-T045220_D20181021-T102218\",\n", - "# ]\n", - "# bad_sample_paths = [\n", - "# \"GrandPassage/phase2/GrandPassage_WBAT_2B_20200130_UTC020017_floodhigh\",\n", - "# ]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n", - "\n", - "for i_sample, sample_path in enumerate(bad_sample_paths):\n", - "\n", - " print(\n", - " \"{:4d}/{:4d} {}\".format(\n", - " i_sample + 1,\n", - " len(bad_sample_paths),\n", - " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", - " )\n", - " )\n", - " print(sample_path)\n", - "\n", - " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", - " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", - " fname_raw, warn_row_overflow=0\n", - " )\n", - " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", - "\n", - " nt = len(ts_raw)\n", - " print(\"length: {}\".format(nt))\n", - " its_raw = np.arange(len(ts_raw))\n", - "\n", - " if \"december2017\" in sample_path:\n", - " psx = np.array([])\n", - " pex = np.array([])\n", - " elif \"march2018\" in sample_path:\n", - " psx = np.arange(0, nt, 360)\n", - " pex = psx + 60\n", - " elif \"september2018\" in sample_path:\n", - " psx = np.arange(300, nt, 360)\n", - " pex = psx + 60\n", - " elif \"GrandPassage\" in sample_path:\n", - " psx = np.array([0, 3120, 6540, 9960, 13380])\n", - " psx = psx[psx < nt]\n", - " pex = np.r_[120, psx[1:] + 420]\n", - " pex = np.minimum(pex, nt)\n", - " else:\n", - " psx = None\n", - " pex = None\n", - "\n", - " ps1, pe1 = find_passive_data(signals_raw)\n", - " ps2, pe2 = find_passive_data_v2(signals_raw)\n", - "\n", - " fname_surface = os.path.join(root_data_dir, sample_path + \"_surface.evl\")\n", - "\n", - " if os.path.isfile(fname_surface):\n", - " t_surface, d_surface = loader.evl_loader(fname_surface)\n", - " elif is_upward_facing:\n", - " print(\n", - " \"ERROR: Expected {} to exist when transect is upfacing.\".format(\n", - " fname_surface\n", - " )\n", - " )\n", - " t_surface = ts_raw\n", - " d_surface = np.zeros_like(ts_raw)\n", - " else:\n", - " # Default surface depth of 0m for downward facing data\n", - " t_surface = ts_raw\n", - " d_surface = np.zeros_like(ts_raw)\n", - "\n", - " # Find location of passive data.\n", - " # Try to determine passive data as whenever the surface line is undefined.\n", - " d_surface[np.isclose(d_surface, -10000.99)] = np.nan\n", - " is_passive = np.isnan(d_surface)\n", - " ps3, pe3 = echofilter.utils.get_indicator_onoffsets(is_passive > 0.5)\n", - " ps3 = np.asarray(ps3)\n", - " pe3 = np.asarray(pe3) + 1\n", - " pl3 = pe3 - ps3\n", - " li = pl3 >= 3\n", - " ps3 = ps3[li]\n", - " pe3 = pe3[li]\n", - " if np.sum(~li) > 0:\n", - " print(\"popped {} from v3, with lengths {}\".format(np.sum(~li), pl3[~li]))\n", - "\n", - " print(\"starts:\")\n", - " print(\"xp:\", psx)\n", - " print(\"v1:\", ps1)\n", - " print(\"v2:\", ps2)\n", - " print(\"v3:\", ps3)\n", - " print(\"ends:\")\n", - " print(\"xp:\", pex)\n", - " print(\"v1:\", pe1)\n", - " print(\"v2:\", pe2)\n", - " print(\"v3:\", pe3)\n", - " print(\"durations:\")\n", - " if pex is not None:\n", - " print(\"xp:\", pex - psx)\n", - " print(\"v1:\", pe1 - ps1)\n", - " print(\"v2:\", pe2 - ps2)\n", - " if ps3 is not None:\n", - " print(\"v3:\", pe3 - ps3)\n", - " print(\"\")\n", - "\n", - " if (\n", - " len(ps1) != len(ps2)\n", - " or len(pe1) != len(pe2)\n", - " or not np.allclose(ps1, ps2)\n", - " or not np.allclose(pe1, pe2)\n", - " ):\n", - " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v1/v2\")\n", - " if (\n", - " ps3 is not None\n", - " and pe3 is not None\n", - " and (\n", - " len(ps3) != len(ps2)\n", - " or len(pe3) != len(pe2)\n", - " or not np.allclose(ps3, ps2)\n", - " or not np.allclose(pe3, pe2)\n", - " )\n", - " ):\n", - " print(\"Warning: DIFFERENT PASSIVE PREDICTIONS v2/v3\")\n", - " if (\n", - " psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps1)\n", - " or len(pex) != len(pe1)\n", - " or not np.allclose(psx, ps1)\n", - " or not np.allclose(pex, pe1)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v1 DISAGREES WITH EXPECTED\")\n", - " if (\n", - " psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps2)\n", - " or len(pex) != len(pe2)\n", - " or not np.allclose(psx, ps2)\n", - " or not np.allclose(pex, pe2)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v2 DISAGREES WITH EXPECTED\")\n", - " if (\n", - " ps3 is not None\n", - " and pe3 is not None\n", - " and psx is not None\n", - " and pex is not None\n", - " and (\n", - " len(psx) != len(ps3)\n", - " or len(pex) != len(pe3)\n", - " or not np.allclose(psx, ps3)\n", - " or not np.allclose(pex, pe3)\n", - " )\n", - " ):\n", - " print(\"Warning: PASSIVE v3 DISAGREES WITH EXPECTED\")\n", - "\n", - " best_ps = ps1\n", - " best_pe = pe1\n", - "\n", - " for i in range(min(len(best_ps), len(psx))):\n", - " if best_ps[i] == psx[i] and best_pe[i] == pex[i]:\n", - " continue\n", - "\n", - " for ps, pe, tit in (\n", - " (psx[i], pex[i], \"expected\"),\n", - " (best_ps[i], best_pe[i], \"v1\"),\n", - " ):\n", - " plt.figure(figsize=(12, 9))\n", - " i0 = max(0, ps - 1)\n", - " i1 = pe + 2\n", - " if i1 >= len(its_raw):\n", - " i1 = None\n", - " plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n", - " plt.gca().invert_yaxis()\n", - " plt.title(\"passive #{}, {}\".format(i, tit))\n", - " plt.show()\n", - "\n", - " print(\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 9))\n", - "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(sample_path)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "_ = \"\"\"\n", - "length: 10259\n", - "starts:\n", - "xp: [ 0 3120 6540 9960]\n", - "v1: [ 0 3120 6539 9959]\n", - "v2: [ 0 3120 6539 9959]\n", - "v3: [ 0 3120 6540 9960]\n", - "ends:\n", - "xp: [ 120 3540 6960 10259]\n", - "v1: [ 120 3540 6959 10259]\n", - "v2: [ 120 3540 6701 10259]\n", - "v3: [ 120 3540 6960 10260]\n", - "durations:\n", - "xp: [120 420 420 299]\n", - "v1: [120 420 420 300]\n", - "v2: [120 420 162 300]\n", - "v3: [120 420 420 300]\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "i0 = 10200\n", - "i1 = 10259\n", - "i0 -= 1\n", - "i1 += 2\n", - "plt.figure(figsize=(12, 9))\n", - "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(\"{}-{}\".format(i0, i1 - 1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample_path = (\n", - " \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\"\n", - ")\n", - "\n", - "fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", - " fname_raw, warn_row_overflow=0\n", - ")\n", - "is_upward_facing = depths_raw[-1] < depths_raw[0]\n", - "\n", - "nt = len(ts_raw)\n", - "print(\"length: {}\".format(nt))\n", - "its_raw = np.arange(len(ts_raw))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 9))\n", - "plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(sample_path)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "i0 = 250\n", - "i1 = 350\n", - "i0 -= 1\n", - "i1 += 2\n", - "plt.figure(figsize=(12, 9))\n", - "plt.pcolormesh(its_raw[i0:i1], depths_raw[:50], signals_raw[i0:i1, :50].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(\"{}-{}\".format(i0, i1 - 1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bad_sample_paths = [\n", - " \"mobile/Survey01/Survey01_GR1_S1A_E\",\n", - " \"mobile/Survey03/Survey03_GR2_N5W_survey3\",\n", - " \"mobile/Survey03/Survey03_GR4_N0A_survey3\",\n", - " \"mobile/Survey04/Survey04_GR1_N3A\",\n", - " \"mobile/Survey04/Survey04_GR2_N5A\",\n", - " \"mobile/Survey05/Survey05_GR1_N1A_survey5\",\n", - " \"mobile/Survey07/Survey07_GR2_N1W_survey7\",\n", - " \"mobile/Survey10/Survey10_GR1_N0A_E\",\n", - " \"mobile/Survey12/Survey12_GR4_N5A_E\",\n", - " \"mobile/Survey01/Survey01_GR1_S2A_E\",\n", - " \"mobile/Survey01/Survey01_GR1_S2W_E\",\n", - " \"mobile/Survey11/Survey11_GR1_S2A_E\",\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Begin running on {} paths\\n\".format(len(bad_sample_paths)))\n", - "\n", - "for i_sample, sample_path in enumerate(bad_sample_paths):\n", - "\n", - " print(\n", - " \"{:4d}/{:4d} {}\".format(\n", - " i_sample + 1,\n", - " len(bad_sample_paths),\n", - " datetime.datetime.now().strftime(\"%A, %B %d, %Y at %H:%M:%S\"),\n", - " )\n", - " )\n", - " print(sample_path)\n", - "\n", - " fname_raw = os.path.join(root_data_dir, sample_path + \"_Sv_raw.csv\")\n", - " ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(\n", - " fname_raw, warn_row_overflow=0\n", - " )\n", - " is_upward_facing = depths_raw[-1] < depths_raw[0]\n", - "\n", - " nt = len(ts_raw)\n", - " print(\"length: {}\".format(nt))\n", - " its_raw = np.arange(len(ts_raw))\n", - "\n", - " plt.figure(figsize=(12, 9))\n", - " plt.pcolormesh(its_raw, depths_raw, signals_raw.T)\n", - " plt.gca().invert_yaxis()\n", - " plt.title(sample_path)\n", - " plt.show()\n", - "\n", - " print(\"\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/notebooks/Plot Metrics Distribution.ipynb b/notebooks/Plot Metrics Distribution.ipynb index a1e0418e..6dbe7a4a 100644 --- a/notebooks/Plot Metrics Distribution.ipynb +++ b/notebooks/Plot Metrics Distribution.ipynb @@ -1,145 +1,145 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import scipy.stats" - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import scipy.stats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"model_best.meters.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.drop(columns=[\"Accuracy\", \"Precision\", \"Recall\", \"F1\", \"Jaccard\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for col_name in df.columns:\n", + " sns.distplot(df[col_name])\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 10))\n", + "plt.plot([5, 85], [5, 85], \"-\", color=(0.3, 0.3, 0.3))\n", + "sns.scatterplot(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scipy.stats.pearsonr(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 10))\n", + "plt.plot([0, 50], [0, 50], \"-\", color=(0.3, 0.3, 0.3))\n", + "sns.scatterplot(df[\"Active target (top)\"], df[\"Active output (top)\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scipy.stats.pearsonr(df[\"Active target (top)\"], df[\"Active output (top)\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(\"model_best.meters.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = df.drop(columns=[\"Accuracy\", \"Precision\", \"Recall\", \"F1\", \"Jaccard\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for col_name in df.columns:\n", - " sns.distplot(df[col_name])\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10, 10))\n", - "plt.plot([5, 85], [5, 85], \"-\", color=(0.3, 0.3, 0.3))\n", - "sns.scatterplot(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scipy.stats.pearsonr(df[\"Active target (bottom)\"], df[\"Active output (bottom)\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(10, 10))\n", - "plt.plot([0, 50], [0, 50], \"-\", color=(0.3, 0.3, 0.3))\n", - "sns.scatterplot(df[\"Active target (top)\"], df[\"Active output (top)\"])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scipy.stats.pearsonr(df[\"Active target (top)\"], df[\"Active output (top)\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Plot results.ipynb b/notebooks/Plot results.ipynb index 0d8a9c9f..2706ccfe 100644 --- a/notebooks/Plot results.ipynb +++ b/notebooks/Plot results.ipynb @@ -1,326 +1,326 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.plotting\n", + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def declare_cc_cmap(cmap, name):\n", + " \"\"\"\n", + " Register a colorcet colormap in matplotlib.pyplot.\n", + "\n", + " Parameters\n", + " ----------\n", + " cmap : list of hex str\n", + " List of colors in the colormap, with equispaced\n", + " samples. Each entry should be a hexadecimal\n", + " string.\n", + " name : str\n", + " Name of the colormap. The colormap will be\n", + " available in matplotlib with this name, and can\n", + " be used with `plt.set_cmap(name)`.\n", + " \"\"\"\n", + " n = len(cmap)\n", + " R = [int(h.lstrip(\"#\")[:2], 16) / 255 for h in cmap]\n", + " G = [int(h.lstrip(\"#\")[2:4], 16) / 255 for h in cmap]\n", + " B = [int(h.lstrip(\"#\")[4:], 16) / 255 for h in cmap]\n", + "\n", + " R = [(i / (n - 1), v, v) for i, v in enumerate(R)]\n", + " G = [(i / (n - 1), v, v) for i, v in enumerate(G)]\n", + " B = [(i / (n - 1), v, v) for i, v in enumerate(B)]\n", + "\n", + " cdict = {\"red\": R, \"green\": G, \"blue\": B}\n", + "\n", + " plt.register_cmap(name=name, data=cdict, lut=n)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import colorcet as cc\n", + "\n", + " declare_cc_cmap(cc.fire, \"fire\")\n", + " print(\"Declared fire colormap.\")\n", + " declare_cc_cmap(cc.rainbow, \"rainbow\")\n", + " print(\"Declared rainbow colormap.\")\n", + "except ImportError:\n", + " print(\"colorcet not installed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color2 = echofilter.plotting.TURBULENCE_COLOR_DARK\n", + "bottom_color2 = echofilter.plotting.BOTTOM_COLOR_DARK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "\n", + "# first val sample for stationary\n", + "dataset = \"MinasPassage\"\n", + "sample = \"december2017/december2017_D20180108-T045216_D20180108-T102216\"\n", + "\n", + "# first val sample for mobile\n", + "# dataset = 'mobile'\n", + "# sample = 'Survey05/Survey05_GR1_S1W_survey5'\n", + "\n", + "# sample = 'Survey07/Survey07_GR4_N5W_survey7'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(root_data_dir, dataset, sample),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "echofilter.plotting.plot_transect(transect, x_scale=\"index\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fname_top1 = os.path.join(root_data_dir, dataset, sample + \"_turbulence.evl\")\n", + "fname_top2 = os.path.join(root_data_dir, dataset, sample + \"_air.evl\")\n", + "fname_bot = os.path.join(root_data_dir, dataset, sample + \"_bottom.evl\")\n", + "\n", + "if os.path.isfile(fname_top1):\n", + " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n", + "elif os.path.isfile(fname_top2):\n", + " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n", + "else:\n", + " t_top = d_top = None\n", + "if os.path.isfile(fname_bot):\n", + " t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n", + "else:\n", + " t_bot = d_bot = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "processed_dir = \"/home/scott/Documents/git/deepsense/dsforce/echofilter/processed/\"\n", + "fname_top = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.top.evl\")\n", + "fname_bot = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.bottom.evl\")\n", + "\n", + "t_top_gen, d_top_gen = echofilter.raw.loader.evl_loader(fname_top)\n", + "t_bot_gen, d_bot_gen = echofilter.raw.loader.evl_loader(fname_bot)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(\n", + " np.arange(transect[\"timestamps\"].shape[0]),\n", + " np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n", + " turbulence_color2,\n", + ")\n", + "plt.plot(\n", + " np.arange(transect[\"timestamps\"].shape[0]),\n", + " np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n", + " bottom_color2,\n", + ")\n", + "plt.gca().invert_yaxis()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "echofilter.plotting.plot_transect(\n", + " transect, x_scale=\"time\" if dataset == \"mobile\" else \"index\"\n", + ")\n", + "\n", + "if dataset == \"mobile\":\n", + " tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n", + "else:\n", + " tt = np.arange(transect[\"timestamps\"].shape[0])\n", + "plt.plot(\n", + " tt,\n", + " np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n", + " turbulence_color2,\n", + " linewidth=2,\n", + ")\n", + "plt.plot(\n", + " tt,\n", + " np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n", + " bottom_color2,\n", + " linewidth=2,\n", + ")\n", + "if dataset == \"mobile\":\n", + " plt.ylim([0, 67])\n", + " plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cmap = \"viridis\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "echofilter.plotting.plot_transect(\n", + " transect,\n", + " x_scale=\"time\" if dataset == \"mobile\" else \"index\",\n", + " turbulence_color=\"k\",\n", + " bottom_color=\"k\",\n", + " surface_color=\"k\",\n", + " passive_color=\"k\",\n", + " removed_color=\"k\",\n", + " cmap=cmap,\n", + ")\n", + "\n", + "if dataset == \"mobile\":\n", + " tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n", + "else:\n", + " tt = np.arange(transect[\"timestamps\"].shape[0])\n", + "plt.plot(\n", + " tt,\n", + " np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n", + " \"w\",\n", + " linewidth=2,\n", + ")\n", + "plt.plot(\n", + " tt,\n", + " np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n", + " \"w\",\n", + " linewidth=2,\n", + ")\n", + "if dataset == \"mobile\":\n", + " plt.ylim([0, 67])\n", + "else:\n", + " plt.ylim([5, 50])\n", + " plt.xlim([0, 1000])\n", + "\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.plotting\n", - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def declare_cc_cmap(cmap, name):\n", - " \"\"\"\n", - " Register a colorcet colormap in matplotlib.pyplot.\n", - "\n", - " Parameters\n", - " ----------\n", - " cmap : list of hex str\n", - " List of colors in the colormap, with equispaced\n", - " samples. Each entry should be a hexadecimal\n", - " string.\n", - " name : str\n", - " Name of the colormap. The colormap will be\n", - " available in matplotlib with this name, and can\n", - " be used with `plt.set_cmap(name)`.\n", - " \"\"\"\n", - " n = len(cmap)\n", - " R = [int(h.lstrip(\"#\")[:2], 16) / 255 for h in cmap]\n", - " G = [int(h.lstrip(\"#\")[2:4], 16) / 255 for h in cmap]\n", - " B = [int(h.lstrip(\"#\")[4:], 16) / 255 for h in cmap]\n", - "\n", - " R = [(i / (n - 1), v, v) for i, v in enumerate(R)]\n", - " G = [(i / (n - 1), v, v) for i, v in enumerate(G)]\n", - " B = [(i / (n - 1), v, v) for i, v in enumerate(B)]\n", - "\n", - " cdict = {\"red\": R, \"green\": G, \"blue\": B}\n", - "\n", - " plt.register_cmap(name=name, data=cdict, lut=n)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " import colorcet as cc\n", - "\n", - " declare_cc_cmap(cc.fire, \"fire\")\n", - " print(\"Declared fire colormap.\")\n", - " declare_cc_cmap(cc.rainbow, \"rainbow\")\n", - " print(\"Declared rainbow colormap.\")\n", - "except ImportError:\n", - " print(\"colorcet not installed\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color2 = echofilter.plotting.TURBULENCE_COLOR_DARK\n", - "bottom_color2 = echofilter.plotting.BOTTOM_COLOR_DARK" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "\n", - "# first val sample for stationary\n", - "dataset = \"MinasPassage\"\n", - "sample = \"december2017/december2017_D20180108-T045216_D20180108-T102216\"\n", - "\n", - "# first val sample for mobile\n", - "# dataset = 'mobile'\n", - "# sample = 'Survey05/Survey05_GR1_S1W_survey5'\n", - "\n", - "# sample = 'Survey07/Survey07_GR4_N5W_survey7'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(root_data_dir, dataset, sample),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "echofilter.plotting.plot_transect(transect, x_scale=\"index\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fname_top1 = os.path.join(root_data_dir, dataset, sample + \"_turbulence.evl\")\n", - "fname_top2 = os.path.join(root_data_dir, dataset, sample + \"_air.evl\")\n", - "fname_bot = os.path.join(root_data_dir, dataset, sample + \"_bottom.evl\")\n", - "\n", - "if os.path.isfile(fname_top1):\n", - " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n", - "elif os.path.isfile(fname_top2):\n", - " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n", - "else:\n", - " t_top = d_top = None\n", - "if os.path.isfile(fname_bot):\n", - " t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n", - "else:\n", - " t_bot = d_bot = None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "processed_dir = \"/home/scott/Documents/git/deepsense/dsforce/echofilter/processed/\"\n", - "fname_top = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.top.evl\")\n", - "fname_bot = os.path.join(processed_dir, dataset, sample + \"_Sv_raw.csv.bottom.evl\")\n", - "\n", - "t_top_gen, d_top_gen = echofilter.raw.loader.evl_loader(fname_top)\n", - "t_bot_gen, d_bot_gen = echofilter.raw.loader.evl_loader(fname_bot)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(\n", - " np.arange(transect[\"timestamps\"].shape[0]),\n", - " np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n", - " turbulence_color2,\n", - ")\n", - "plt.plot(\n", - " np.arange(transect[\"timestamps\"].shape[0]),\n", - " np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n", - " bottom_color2,\n", - ")\n", - "plt.gca().invert_yaxis()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "echofilter.plotting.plot_transect(\n", - " transect, x_scale=\"time\" if dataset == \"mobile\" else \"index\"\n", - ")\n", - "\n", - "if dataset == \"mobile\":\n", - " tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n", - "else:\n", - " tt = np.arange(transect[\"timestamps\"].shape[0])\n", - "plt.plot(\n", - " tt,\n", - " np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n", - " turbulence_color2,\n", - " linewidth=2,\n", - ")\n", - "plt.plot(\n", - " tt,\n", - " np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n", - " bottom_color2,\n", - " linewidth=2,\n", - ")\n", - "if dataset == \"mobile\":\n", - " plt.ylim([0, 67])\n", - " plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cmap = \"viridis\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "echofilter.plotting.plot_transect(\n", - " transect,\n", - " x_scale=\"time\" if dataset == \"mobile\" else \"index\",\n", - " turbulence_color=\"k\",\n", - " bottom_color=\"k\",\n", - " surface_color=\"k\",\n", - " passive_color=\"k\",\n", - " removed_color=\"k\",\n", - " cmap=cmap,\n", - ")\n", - "\n", - "if dataset == \"mobile\":\n", - " tt = transect[\"timestamps\"] - transect[\"timestamps\"][0]\n", - "else:\n", - " tt = np.arange(transect[\"timestamps\"].shape[0])\n", - "plt.plot(\n", - " tt,\n", - " np.interp(transect[\"timestamps\"], t_top_gen, d_top_gen),\n", - " \"w\",\n", - " linewidth=2,\n", - ")\n", - "plt.plot(\n", - " tt,\n", - " np.interp(transect[\"timestamps\"], t_bot_gen, d_bot_gen),\n", - " \"w\",\n", - " linewidth=2,\n", - ")\n", - "if dataset == \"mobile\":\n", - " plt.ylim([0, 67])\n", - "else:\n", - " plt.ylim([5, 50])\n", - " plt.xlim([0, 1000])\n", - "\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Splitting Passive Data.ipynb b/notebooks/Splitting Passive Data.ipynb index 3478c24d..03108a91 100644 --- a/notebooks/Splitting Passive Data.ipynb +++ b/notebooks/Splitting Passive Data.ipynb @@ -1,858 +1,858 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00ee00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "\n", - "# example with 1 passive period, 1 turbulence cut out\n", - "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n", - "\n", - "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", - "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n", - "\n", - "# sample done incorrectly\n", - "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n", - "\n", - "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n", - "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n", - "\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", - "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", - "\n", - "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n", - "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n", - "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n", - "if os.path.isfile(fname_top1):\n", - " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n", - "elif os.path.isfile(fname_top2):\n", - " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n", - "else:\n", - " t_top = d_top = np.nan\n", - "if os.path.isfile(fname_bot):\n", - " t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n", - "else:\n", - " t_bot = d_bot = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n", - " fname_masked\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.min(signals_raw), np.max(signals_raw)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_raw.shape, depths_raw.shape, signals_raw.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.plot(ts_new, d_top_new, \"k\")\n", - "plt.plot(ts_new, d_bot_new, \"w\")\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Finding passive data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sns.distplot(np.reshape(signals_raw, (-1,)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.percentile(signals_raw, 95)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.percentile(signals_raw, 99.5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 95)).T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sum(~np.any(signals_raw > np.percentile(signals_raw, 95), axis=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 97)).T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw[:100], signals_raw[:, :100].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw[:26], signals_raw[:, :26].T)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(signals_raw[:, :45], axis=0))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.min(d_top)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sum(depths_raw <= np.min(d_top))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sns.distplot(np.reshape(signals_raw[:, :34], (-1,)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sns.distplot(np.reshape(signals_raw[:, 34:], (-1,)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T > 0)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for pc in range(100):\n", - " cut_off = np.percentile(signals_raw[:, 34:], pc)\n", - " print(\n", - " \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n", - " pc, cut_off, sum(~np.any(signals_raw[:, 34:] > cut_off, axis=1))\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for pc in range(100):\n", - " cut_off = np.percentile(signals_raw[:, :34], pc)\n", - " print(\n", - " \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n", - " pc, cut_off, sum(~np.any(signals_raw[:, :34] > cut_off, axis=1))\n", - " )\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.max(signals_raw[:, :34], axis=1))\n", - "plt.plot(np.min(signals_raw[:, :34], axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.max(signals_raw[:, :25], axis=1))\n", - "plt.plot(np.min(signals_raw[:, :25], axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(np.mean(np.diff(signals_raw, axis=0), axis=1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw, axis=0), axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "md = np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1)\n", - "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", - "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[280:, :], axis=0), axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "md = np.median(np.diff(signals_raw[280:, :], axis=0), axis=1)\n", - "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", - "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "md = np.median(np.diff(signals_raw, axis=0), axis=1)\n", - "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", - "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n", - "plt.ylim([-1, 1])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n", - "plt.ylim([-10, 10])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n", - "plt.ylim([-5, 5])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n", - "# plt.ylim([-5, 5])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n", - "plt.ylim([-5, 5])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "md = np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1)\n", - "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", - "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "md = np.median(np.diff(signals_raw[:, :34], axis=0), axis=1)\n", - "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", - "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "threshold_low = np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n", - "threshold_high = np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n", - "indices_possible_start = np.nonzero(md < threshold_low)[0]\n", - "indices_possible_end = np.nonzero(md > threshold_high)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "indices_possible_start" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def find_passive_edges(signals_raw, n_depth_use=None, deviation=8):\n", - "\n", - " # n_depth_use = 34\n", - "\n", - " if n_depth_use is None:\n", - " n_depth_use = signals_raw.shape[1]\n", - "\n", - " indices_passive_start = []\n", - " indices_passive_end = []\n", - "\n", - " md = np.median(np.diff(signals_raw[:, :n_depth_use], axis=0), axis=1)\n", - "\n", - " threshold_low = (\n", - " np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n", - " )\n", - " threshold_high = (\n", - " np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n", - " )\n", - " indices_possible_start = np.nonzero(md < threshold_low)[0]\n", - " indices_possible_end = np.nonzero(md > threshold_high)[0]\n", - " print(\"a\", indices_possible_end)\n", - "\n", - " current_index = 0\n", - "\n", - " if len(indices_possible_start) == 0 and len(indices_possible_end) == 0:\n", - " return np.array(indices_passive_start), np.array(indices_passive_end)\n", - "\n", - " if len(indices_possible_start) > 0:\n", - " indices_possible_start += 1\n", - "\n", - " if len(indices_possible_end) > 0:\n", - " indices_possible_end += 1\n", - "\n", - " print(\"b\", indices_possible_end)\n", - "\n", - " if (\n", - " len(indices_possible_start) == 0\n", - " or indices_possible_end[0] < indices_possible_start[0]\n", - " ):\n", - " indices_passive_start.append(0)\n", - " current_index = indices_possible_end[0]\n", - " indices_passive_end.append(current_index)\n", - " indices_possible_start = indices_possible_start[\n", - " indices_possible_start > current_index\n", - " ]\n", - " indices_possible_end = indices_possible_end[\n", - " indices_possible_end > current_index\n", - " ]\n", - "\n", - " print(\"c\", indices_possible_end)\n", - " print(\"c2\", indices_passive_end)\n", - "\n", - " while len(indices_possible_start) > 0:\n", - " current_index = indices_possible_start[0]\n", - " indices_passive_start.append(current_index)\n", - " baseline = signals_raw[current_index - 1, :n_depth_use]\n", - "\n", - " # Find first column which returns to the baseline value seen before passive region\n", - " offsets = np.nonzero(\n", - " np.median(baseline - signals_raw[current_index:, :n_depth_use], axis=1)\n", - " < threshold_high\n", - " )[0]\n", - " if len(offsets) == 0:\n", - " current_index = signals_raw.shape[0]\n", - " else:\n", - " current_index = current_index + offsets[0]\n", - " indices_passive_end.append(current_index)\n", - "\n", - " print(\"d\", indices_passive_end)\n", - "\n", - " # Remove preceding indices from the list of candidates\n", - " indices_possible_start = indices_possible_start[\n", - " indices_possible_start > current_index\n", - " ]\n", - " indices_possible_end = indices_possible_end[\n", - " indices_possible_end > current_index\n", - " ]\n", - "\n", - " print(\"e\", indices_passive_end)\n", - "\n", - " # Check the start was sufficiently inclusive.\n", - " if current_index < signals_raw.shape[0]:\n", - " baseline = signals_raw[current_index, :n_depth_use]\n", - " nonpassives = np.nonzero(\n", - " np.median(baseline - signals_raw[:current_index, :n_depth_use], axis=1)\n", - " < threshold_high\n", - " )[0]\n", - " if len(nonpassives) == 0:\n", - " indices_passive_start[-1] = 0\n", - " else:\n", - " indices_passive_start[-1] = min(\n", - " indices_passive_start[-1], nonpassives[-1] + 1\n", - " )\n", - "\n", - " print(\"f\", indices_passive_end)\n", - "\n", - " if (\n", - " len(indices_passive_start) > 1\n", - " and indices_passive_start[-1] <= indices_passive_end[-2]\n", - " ):\n", - " indices_passive_start = indices_passive_start[:-1]\n", - " indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n", - "\n", - " print(\"g\", indices_passive_end)\n", - "\n", - " return np.array(indices_passive_start), np.array(indices_passive_end)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "init_idx = 0\n", - "indices_passive_start, indices_passive_end = find_passive_edges(\n", - " signals_raw[init_idx:, :]\n", - ")\n", - "indices_passive_start += init_idx\n", - "indices_passive_end += init_idx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "indices_passive_start" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "indices_passive_end" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw[:34],\n", - " signals_raw[index_start:index_end, :34].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(\n", - " np.concatenate(([0], indices_passive_end)),\n", - " np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n", - "):\n", - " if index_start == index_end:\n", - " continue\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw[:34],\n", - " signals_raw[index_start:index_end, :34].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw,\n", - " signals_raw[index_start:index_end, :].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(\n", - " np.concatenate(([0], indices_passive_end)),\n", - " np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n", - "):\n", - " if index_start == index_end:\n", - " continue\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw,\n", - " signals_raw[index_start:index_end, :].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00ee00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "\n", + "# example with 1 passive period, 1 turbulence cut out\n", + "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n", + "\n", + "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", + "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n", + "\n", + "# sample done incorrectly\n", + "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n", + "\n", + "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n", + "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n", + "\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", + "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", + "\n", + "fname_top1 = os.path.join(root_data_dir, sample + \"_turbulence.evl\")\n", + "fname_top2 = os.path.join(root_data_dir, sample + \"_air.evl\")\n", + "fname_bot = os.path.join(root_data_dir, sample + \"_bottom.evl\")\n", + "if os.path.isfile(fname_top1):\n", + " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top1)\n", + "elif os.path.isfile(fname_top2):\n", + " t_top, d_top = echofilter.raw.loader.evl_loader(fname_top2)\n", + "else:\n", + " t_top = d_top = np.nan\n", + "if os.path.isfile(fname_bot):\n", + " t_bot, d_bot = echofilter.raw.loader.evl_loader(fname_bot)\n", + "else:\n", + " t_bot = d_bot = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n", + " fname_masked\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.min(signals_raw), np.max(signals_raw)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_raw.shape, depths_raw.shape, signals_raw.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, signals_raw.T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.plot(ts_new, d_top_new, \"k\")\n", + "plt.plot(ts_new, d_bot_new, \"w\")\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Finding passive data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.distplot(np.reshape(signals_raw, (-1,)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.percentile(signals_raw, 95)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.percentile(signals_raw, 99.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 95)).T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum(~np.any(signals_raw > np.percentile(signals_raw, 95), axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > np.percentile(signals_raw, 97)).T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw[:100], signals_raw[:, :100].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw[234:257], depths_raw[:34], signals_raw[234:257, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw[235:256], depths_raw[:34], signals_raw[235:256, :34].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw[:26], signals_raw[:, :26].T)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(signals_raw[:, :45], axis=0))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.min(d_top)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum(depths_raw <= np.min(d_top))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.distplot(np.reshape(signals_raw[:, :34], (-1,)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.distplot(np.reshape(signals_raw[:, 34:], (-1,)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw[:34], signals_raw[:, :34].T > 0)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_raw, depths_raw, (signals_raw > 0).T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for pc in range(100):\n", + " cut_off = np.percentile(signals_raw[:, 34:], pc)\n", + " print(\n", + " \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n", + " pc, cut_off, sum(~np.any(signals_raw[:, 34:] > cut_off, axis=1))\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for pc in range(100):\n", + " cut_off = np.percentile(signals_raw[:, :34], pc)\n", + " print(\n", + " \"{:3d}\\t {:7.2f}\\t {:3d}\".format(\n", + " pc, cut_off, sum(~np.any(signals_raw[:, :34] > cut_off, axis=1))\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.max(signals_raw[:, :34], axis=1))\n", + "plt.plot(np.min(signals_raw[:, :34], axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.max(signals_raw[:, :25], axis=1))\n", + "plt.plot(np.min(signals_raw[:, :25], axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(np.mean(np.diff(signals_raw, axis=0), axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw, axis=0), axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "md = np.median(np.diff(signals_raw[220:280, :], axis=0), axis=1)\n", + "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", + "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[280:, :], axis=0), axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "md = np.median(np.diff(signals_raw[280:, :], axis=0), axis=1)\n", + "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", + "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "md = np.median(np.diff(signals_raw, axis=0), axis=1)\n", + "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", + "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[:, :34], axis=0), axis=1))\n", + "plt.ylim([-1, 1])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1))\n", + "plt.ylim([-10, 10])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n", + "plt.ylim([-5, 5])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n", + "# plt.ylim([-5, 5])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.plot(np.median(np.diff(signals_raw[230:260, :26], axis=0), axis=1))\n", + "plt.ylim([-5, 5])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "md = np.median(np.diff(signals_raw[230:260, :34], axis=0), axis=1)\n", + "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", + "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "md = np.median(np.diff(signals_raw[:, :34], axis=0), axis=1)\n", + "print(np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8)\n", + "print(np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "threshold_low = np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n", + "threshold_high = np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * 8\n", + "indices_possible_start = np.nonzero(md < threshold_low)[0]\n", + "indices_possible_end = np.nonzero(md > threshold_high)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "indices_possible_start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def find_passive_edges(signals_raw, n_depth_use=None, deviation=8):\n", + "\n", + " # n_depth_use = 34\n", + "\n", + " if n_depth_use is None:\n", + " n_depth_use = signals_raw.shape[1]\n", + "\n", + " indices_passive_start = []\n", + " indices_passive_end = []\n", + "\n", + " md = np.median(np.diff(signals_raw[:, :n_depth_use], axis=0), axis=1)\n", + "\n", + " threshold_low = (\n", + " np.median(md) - (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n", + " )\n", + " threshold_high = (\n", + " np.median(md) + (np.percentile(md, 75) - np.percentile(md, 25)) * deviation\n", + " )\n", + " indices_possible_start = np.nonzero(md < threshold_low)[0]\n", + " indices_possible_end = np.nonzero(md > threshold_high)[0]\n", + " print(\"a\", indices_possible_end)\n", + "\n", + " current_index = 0\n", + "\n", + " if len(indices_possible_start) == 0 and len(indices_possible_end) == 0:\n", + " return np.array(indices_passive_start), np.array(indices_passive_end)\n", + "\n", + " if len(indices_possible_start) > 0:\n", + " indices_possible_start += 1\n", + "\n", + " if len(indices_possible_end) > 0:\n", + " indices_possible_end += 1\n", + "\n", + " print(\"b\", indices_possible_end)\n", + "\n", + " if (\n", + " len(indices_possible_start) == 0\n", + " or indices_possible_end[0] < indices_possible_start[0]\n", + " ):\n", + " indices_passive_start.append(0)\n", + " current_index = indices_possible_end[0]\n", + " indices_passive_end.append(current_index)\n", + " indices_possible_start = indices_possible_start[\n", + " indices_possible_start > current_index\n", + " ]\n", + " indices_possible_end = indices_possible_end[\n", + " indices_possible_end > current_index\n", + " ]\n", + "\n", + " print(\"c\", indices_possible_end)\n", + " print(\"c2\", indices_passive_end)\n", + "\n", + " while len(indices_possible_start) > 0:\n", + " current_index = indices_possible_start[0]\n", + " indices_passive_start.append(current_index)\n", + " baseline = signals_raw[current_index - 1, :n_depth_use]\n", + "\n", + " # Find first column which returns to the baseline value seen before passive region\n", + " offsets = np.nonzero(\n", + " np.median(baseline - signals_raw[current_index:, :n_depth_use], axis=1)\n", + " < threshold_high\n", + " )[0]\n", + " if len(offsets) == 0:\n", + " current_index = signals_raw.shape[0]\n", + " else:\n", + " current_index = current_index + offsets[0]\n", + " indices_passive_end.append(current_index)\n", + "\n", + " print(\"d\", indices_passive_end)\n", + "\n", + " # Remove preceding indices from the list of candidates\n", + " indices_possible_start = indices_possible_start[\n", + " indices_possible_start > current_index\n", + " ]\n", + " indices_possible_end = indices_possible_end[\n", + " indices_possible_end > current_index\n", + " ]\n", + "\n", + " print(\"e\", indices_passive_end)\n", + "\n", + " # Check the start was sufficiently inclusive.\n", + " if current_index < signals_raw.shape[0]:\n", + " baseline = signals_raw[current_index, :n_depth_use]\n", + " nonpassives = np.nonzero(\n", + " np.median(baseline - signals_raw[:current_index, :n_depth_use], axis=1)\n", + " < threshold_high\n", + " )[0]\n", + " if len(nonpassives) == 0:\n", + " indices_passive_start[-1] = 0\n", + " else:\n", + " indices_passive_start[-1] = min(\n", + " indices_passive_start[-1], nonpassives[-1] + 1\n", + " )\n", + "\n", + " print(\"f\", indices_passive_end)\n", + "\n", + " if (\n", + " len(indices_passive_start) > 1\n", + " and indices_passive_start[-1] <= indices_passive_end[-2]\n", + " ):\n", + " indices_passive_start = indices_passive_start[:-1]\n", + " indices_passive_end = indices_passive_end[:-2] + indices_passive_end[-1:]\n", + "\n", + " print(\"g\", indices_passive_end)\n", + "\n", + " return np.array(indices_passive_start), np.array(indices_passive_end)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "init_idx = 0\n", + "indices_passive_start, indices_passive_end = find_passive_edges(\n", + " signals_raw[init_idx:, :]\n", + ")\n", + "indices_passive_start += init_idx\n", + "indices_passive_end += init_idx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "indices_passive_start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "indices_passive_end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw[:34],\n", + " signals_raw[index_start:index_end, :34].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(\n", + " np.concatenate(([0], indices_passive_end)),\n", + " np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n", + "):\n", + " if index_start == index_end:\n", + " continue\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw[:34],\n", + " signals_raw[index_start:index_end, :34].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(indices_passive_start, indices_passive_end):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw,\n", + " signals_raw[index_start:index_end, :].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(\n", + " np.concatenate(([0], indices_passive_end)),\n", + " np.concatenate((indices_passive_start, [signals_raw.shape[0]])),\n", + "):\n", + " if index_start == index_end:\n", + " continue\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw,\n", + " signals_raw[index_start:index_end, :].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/Surface anomaly removal.ipynb b/notebooks/Surface anomaly removal.ipynb index 494b9f00..ca670f1e 100644 --- a/notebooks/Surface anomaly removal.ipynb +++ b/notebooks/Surface anomaly removal.ipynb @@ -1,576 +1,576 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.ndimage\n", + "import scipy.signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.plotting\n", + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# first val sample for stationary\n", + "sample = \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\"\n", + "sample = \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(root_data_dir, sample),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "echofilter.plotting.plot_transect(transect)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fname_surface = os.path.join(root_data_dir, sample + \"_surface.evl\")\n", + "t_surface, d_surface = echofilter.raw.loader.evl_loader(fname_surface)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_raw = transect[\"timestamps\"]\n", + "d_surface = transect[\"surface\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 9))\n", + "plt.plot(d_surface)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "segments = list(echofilter.raw.manipulate.split_transect(**transect))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i_segment, segment in enumerate(segments):\n", + " plt.figure(figsize=(15, 9))\n", + " plt.plot(segment[\"surface\"])\n", + " plt.title(\"{} #{}\".format(sample, i_segment))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "i_segment = 8\n", + "segment = segments[i_segment]\n", + "# Remove passive data from the signal\n", + "signal = segment[\"surface\"][segment[\"is_passive\"] < 0.5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sigma = 50\n", + "smoothed = scipy.ndimage.gaussian_filter1d(signal, sigma, axis=0)\n", + "\n", + "ks = 175\n", + "offset = ks // 2\n", + "medfiltered = scipy.signal.medfilt(\n", + " np.pad(signal, (offset, offset), mode=\"reflect\"), ks\n", + ")[offset:-offset]\n", + "\n", + "savgoled = scipy.signal.savgol_filter(signal, ks, 3)\n", + "\n", + "plt.figure(figsize=(15, 9))\n", + "plt.plot(signal, label=\"original\")\n", + "plt.plot(smoothed, label=\"gaussian, sigma={}\".format(sigma))\n", + "plt.plot(medfiltered, label=\"median, kernel={}\".format(ks))\n", + "plt.plot(savgoled, label=\"SavGol, kernel={}\".format(ks))\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "residual = signal - medfiltered\n", + "\n", + "stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n", + "print(stdev)\n", + "\n", + "plt.figure(figsize=(15, 9))\n", + "plt.plot(residual, label=\"residual\")\n", + "plt.axhline(stdev, color=\"g\", ls=\":\")\n", + "plt.axhline(-stdev, color=\"g\", ls=\":\")\n", + "plt.axhline(stdev * 5, color=\"r\", ls=\":\")\n", + "plt.axhline(-stdev * 5, color=\"r\", ls=\":\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "is_good_line = np.abs(residual) < 5 * stdev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ii = np.arange(len(signal))\n", + "\n", + "new_line = signal.copy()\n", + "new_line[~is_good_line] = np.interp(\n", + " ii[~is_good_line], ii[is_good_line], medfiltered[is_good_line]\n", + ")\n", + "\n", + "plt.figure(figsize=(15, 9))\n", + "plt.plot(signal, label=\"original\")\n", + "plt.plot(new_line, label=\"new\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sigma = 5\n", + "new_smoothed = scipy.ndimage.gaussian_filter1d(new_line, sigma, axis=0)\n", + "\n", + "ks = 31\n", + "offset = ks // 2\n", + "new_medfiltered = scipy.signal.medfilt(\n", + " np.pad(new_line, (offset, offset), mode=\"reflect\"), ks\n", + ")[offset:-offset]\n", + "\n", + "new_savgoled = scipy.signal.savgol_filter(new_line, ks, 2)\n", + "\n", + "plt.figure(figsize=(15, 9))\n", + "plt.plot(new_line, label=\"new_line\")\n", + "plt.plot(new_smoothed, label=\"gaussian, sigma={}\".format(sigma))\n", + "plt.plot(new_medfiltered, label=\"median, kernel={}\".format(ks))\n", + "plt.plot(new_savgoled, label=\"SavGol, kernel={}\".format(ks))\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_residual = new_line - new_smoothed\n", + "\n", + "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n", + "print(stdev)\n", + "\n", + "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n", + "print(stdev)\n", + "\n", + "plt.figure(figsize=(15, 9))\n", + "plt.plot(new_residual, label=\"smoothed-residual\")\n", + "plt.axhline(stdev, color=\"g\", ls=\":\")\n", + "plt.axhline(-stdev, color=\"g\", ls=\":\")\n", + "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n", + "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_residual = new_line - new_medfiltered\n", + "\n", + "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n", + "print(stdev)\n", + "\n", + "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n", + "print(stdev)\n", + "\n", + "plt.figure(figsize=(15, 9))\n", + "plt.plot(new_residual, label=\"smoothed-residual\")\n", + "plt.axhline(stdev, color=\"g\", ls=\":\")\n", + "plt.axhline(-stdev, color=\"g\", ls=\":\")\n", + "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n", + "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def _remove_anomalies_1d_v1(signal, thr=4, median_kernel=51, gaussian_sigma=5):\n", + " \"\"\"\n", + " remove anomalies from signal\n", + " \"\"\"\n", + " signal = np.copy(signal)\n", + "\n", + " # Median filtering, with reflection padding\n", + " offset = median_kernel // 2\n", + " smoothed = scipy.signal.medfilt(\n", + " np.pad(signal, (offset, offset), mode=\"reflect\"),\n", + " median_kernel,\n", + " )[offset:-offset]\n", + " # Measure the residual between the original and median filtered signal\n", + " residual = signal - smoothed\n", + " # Replace datapoints more than 4 sigma away from the median filter\n", + " # with the filtered signal\n", + " stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n", + " is_fixed = np.abs(residual) > thr * stdev\n", + " signal[is_fixed] = smoothed[is_fixed]\n", + "\n", + " # Smooth signal with a gaussian kernel\n", + " while True:\n", + " smoothed = scipy.ndimage.gaussian_filter1d(signal, gaussian_sigma, axis=0)\n", + " # Mesure new residual\n", + " residual = signal - smoothed\n", + " stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n", + " is_fixed_now = np.abs(residual) > thr * stdev\n", + " is_fixed |= is_fixed_now\n", + " signal[is_fixed] = smoothed[is_fixed]\n", + " if not np.any(is_fixed_now):\n", + " break\n", + "\n", + " return signal, is_fixed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from echofilter.raw.utils import pad1d\n", + "\n", + "\n", + "def medfilt1d(signal, kernel_size, axis=-1, pad_mode=\"reflect\"):\n", + " \"\"\"\n", + " Median filter in 1d, with support for selecting padding mode.\n", + "\n", + " Parameters\n", + " ----------\n", + " signal : array_like\n", + " The signal to filter.\n", + " kernel_size\n", + " Size of the median kernel to use.\n", + " axis : int, optional\n", + " Which axis to operate along. Default is `-1`.\n", + " pad_mode : str, optional\n", + " Method with which to pad the vector at the edges.\n", + " Must be supported by `numpy.pad`. Default is `\"reflect\"`.\n", + "\n", + " Returns\n", + " -------\n", + " filtered : array_like\n", + " The filtered signal.\n", + "\n", + " See Also\n", + " --------\n", + " - `scipy.signal.medfilt`\n", + " - `pad1d`\n", + " \"\"\"\n", + " offset = kernel_size // 2\n", + " signal = pad1d(signal, offset, axis=axis, mode=pad_mode)\n", + " filtered = scipy.signal.medfilt(signal, kernel_size)[offset:-offset]\n", + " return filtered\n", + "\n", + "\n", + "def remove_anomalies_1d(signal, thr=5, thr2=4, kernel=201, kernel2=31):\n", + " \"\"\"\n", + " Remove anomalies from a temporal signal.\n", + "\n", + " Applies a median filter to the data, and replaces datapoints which\n", + " deviate from the median filtered signal by more than some threshold\n", + " with the median filtered data. This process is repeated until no\n", + " datapoints deviate from the filtered line by more than the threshold.\n", + "\n", + " Parameters\n", + " ----------\n", + " signal : array_like\n", + " The signal to filter.\n", + " thr : float, optional\n", + " The initial threshold will be `thr` times the standard deviation of the residuals.\n", + " The standard deviation is robustly estimated from the interquartile range.\n", + " Default is `5`.\n", + " thr2 : float, optional\n", + " The threshold for repeated iterations will be `thr2` times the standard deviation\n", + " of the remaining residuals. The standard deviation is robustly estimated from\n", + " interdecile range. Default is `4`.\n", + " kernel : int, optional\n", + " The kernel size for the initial median filter. Default is `201`.\n", + " kernel2 : int, optional\n", + " The kernel size for subsequent median filters. Default is `31`.\n", + "\n", + " Returns\n", + " -------\n", + " filtered : numpy.ndarray like signal\n", + " The input signal with anomalies replaced with median values.\n", + " is_fixed : bool numpy.ndarray shaped like signal\n", + " Indicator for which datapoints were replaced.\n", + "\n", + " See Also\n", + " --------\n", + " `medfilt1d`\n", + " \"\"\"\n", + " signal = np.copy(signal)\n", + "\n", + " # Median filtering, with reflection padding\n", + " smoothed = medfilt1d(signal, kernel)\n", + " # Measure the residual between the original and median filtered signal\n", + " residual = signal - smoothed\n", + " # Replace datapoints more than thr sigma away from the median filter\n", + " # with the filtered signal. We use a robust estimate of the standard\n", + " # deviation, using the central 50% of datapoints.\n", + " stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n", + " is_fixed = np.abs(residual) > thr * stdev\n", + " signal[is_fixed] = smoothed[is_fixed]\n", + "\n", + " # Filter again, with a narrower kernel but tighter threshold\n", + " while True:\n", + " smoothed = medfilt1d(signal, kernel2)\n", + " # Mesure new residual\n", + " residual = signal - smoothed\n", + " # Make sure to only include original data points when determining\n", + " # the standard deviation. We use the interdecile range.\n", + " stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n", + " is_fixed_now = np.abs(residual) > thr2 * stdev\n", + " is_fixed |= is_fixed_now\n", + " signal[is_fixed] = smoothed[is_fixed]\n", + " # We are done when no more datapoints had to be replaced\n", + " if not np.any(is_fixed_now):\n", + " break\n", + "\n", + " return signal, is_fixed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i_segment, segment in enumerate(segments):\n", + " plt.figure(figsize=(15, 9))\n", + " plt.plot(segment[\"surface\"])\n", + "\n", + " # Handle passive data\n", + " is_passive = segment[\"is_passive\"] > 0.5\n", + " _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n", + " smoothed = np.interp(\n", + " segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n", + " )\n", + " is_fixed = np.zeros_like(is_passive)\n", + " is_fixed[~is_passive] = _is_fixed\n", + "\n", + " print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n", + " plt.plot(smoothed)\n", + " plt.title(\"{} #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n", + " plt.show()\n", + " if np.sum(is_fixed) > 0:\n", + " plt.figure(figsize=(15, 9))\n", + " echofilter.plotting.plot_transect(segment)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample_paths = [\n", + " \"MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215\",\n", + " \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\",\n", + " \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\",\n", + " \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\",\n", + " \"MinasPassage/march2018/march2018_D20180513-T015216_D20180513-T072215\",\n", + " \"MinasPassage/march2018/march2018_D20180523-T175215_D20180523-T172215\",\n", + " \"MinasPassage/september2018/september2018_D20180915-T202216_D20180916-T015217\",\n", + " \"MinasPassage/september2018/september2018_D20181027-T022221_D20181027-T075217\",\n", + " \"MinasPassage/september2018/september2018_D20181116-T205220_D20181117-T022218\",\n", + " \"MinasPassage/september2018/september2018_D20181119-T195217_D20181119-T195217\",\n", + "]\n", + "\n", + "for sample in sample_paths:\n", + " print(sample)\n", + "\n", + " transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(root_data_dir, sample),\n", + " )\n", + "\n", + " for i_segment, segment in enumerate(\n", + " echofilter.raw.manipulate.split_transect(**transect)\n", + " ):\n", + " plt.figure(figsize=(15, 9))\n", + " plt.plot(segment[\"surface\"])\n", + "\n", + " # Handle passive data\n", + " is_passive = segment[\"is_passive\"] > 0.5\n", + " _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n", + " smoothed = np.interp(\n", + " segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n", + " )\n", + " is_fixed = np.zeros_like(is_passive)\n", + " is_fixed[~is_passive] = _is_fixed\n", + "\n", + " print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n", + " plt.plot(smoothed)\n", + " plt.title(\"{} #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n", + " plt.show()\n", + " if np.sum(is_fixed) > 0:\n", + " plt.figure(figsize=(15, 9))\n", + " echofilter.plotting.plot_transect(segment)\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import scipy.ndimage\n", - "import scipy.signal" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.plotting\n", - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# first val sample for stationary\n", - "sample = \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\"\n", - "sample = \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(root_data_dir, sample),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "echofilter.plotting.plot_transect(transect)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fname_surface = os.path.join(root_data_dir, sample + \"_surface.evl\")\n", - "t_surface, d_surface = echofilter.raw.loader.evl_loader(fname_surface)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_raw = transect[\"timestamps\"]\n", - "d_surface = transect[\"surface\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(15, 9))\n", - "plt.plot(d_surface)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "segments = list(echofilter.raw.manipulate.split_transect(**transect))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i_segment, segment in enumerate(segments):\n", - " plt.figure(figsize=(15, 9))\n", - " plt.plot(segment[\"surface\"])\n", - " plt.title(\"{} #{}\".format(sample, i_segment))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "i_segment = 8\n", - "segment = segments[i_segment]\n", - "# Remove passive data from the signal\n", - "signal = segment[\"surface\"][segment[\"is_passive\"] < 0.5]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sigma = 50\n", - "smoothed = scipy.ndimage.gaussian_filter1d(signal, sigma, axis=0)\n", - "\n", - "ks = 175\n", - "offset = ks // 2\n", - "medfiltered = scipy.signal.medfilt(\n", - " np.pad(signal, (offset, offset), mode=\"reflect\"), ks\n", - ")[offset:-offset]\n", - "\n", - "savgoled = scipy.signal.savgol_filter(signal, ks, 3)\n", - "\n", - "plt.figure(figsize=(15, 9))\n", - "plt.plot(signal, label=\"original\")\n", - "plt.plot(smoothed, label=\"gaussian, sigma={}\".format(sigma))\n", - "plt.plot(medfiltered, label=\"median, kernel={}\".format(ks))\n", - "plt.plot(savgoled, label=\"SavGol, kernel={}\".format(ks))\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "residual = signal - medfiltered\n", - "\n", - "stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n", - "print(stdev)\n", - "\n", - "plt.figure(figsize=(15, 9))\n", - "plt.plot(residual, label=\"residual\")\n", - "plt.axhline(stdev, color=\"g\", ls=\":\")\n", - "plt.axhline(-stdev, color=\"g\", ls=\":\")\n", - "plt.axhline(stdev * 5, color=\"r\", ls=\":\")\n", - "plt.axhline(-stdev * 5, color=\"r\", ls=\":\")\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "is_good_line = np.abs(residual) < 5 * stdev" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ii = np.arange(len(signal))\n", - "\n", - "new_line = signal.copy()\n", - "new_line[~is_good_line] = np.interp(\n", - " ii[~is_good_line], ii[is_good_line], medfiltered[is_good_line]\n", - ")\n", - "\n", - "plt.figure(figsize=(15, 9))\n", - "plt.plot(signal, label=\"original\")\n", - "plt.plot(new_line, label=\"new\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sigma = 5\n", - "new_smoothed = scipy.ndimage.gaussian_filter1d(new_line, sigma, axis=0)\n", - "\n", - "ks = 31\n", - "offset = ks // 2\n", - "new_medfiltered = scipy.signal.medfilt(\n", - " np.pad(new_line, (offset, offset), mode=\"reflect\"), ks\n", - ")[offset:-offset]\n", - "\n", - "new_savgoled = scipy.signal.savgol_filter(new_line, ks, 2)\n", - "\n", - "plt.figure(figsize=(15, 9))\n", - "plt.plot(new_line, label=\"new_line\")\n", - "plt.plot(new_smoothed, label=\"gaussian, sigma={}\".format(sigma))\n", - "plt.plot(new_medfiltered, label=\"median, kernel={}\".format(ks))\n", - "plt.plot(new_savgoled, label=\"SavGol, kernel={}\".format(ks))\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "new_residual = new_line - new_smoothed\n", - "\n", - "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n", - "print(stdev)\n", - "\n", - "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n", - "print(stdev)\n", - "\n", - "plt.figure(figsize=(15, 9))\n", - "plt.plot(new_residual, label=\"smoothed-residual\")\n", - "plt.axhline(stdev, color=\"g\", ls=\":\")\n", - "plt.axhline(-stdev, color=\"g\", ls=\":\")\n", - "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n", - "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "new_residual = new_line - new_medfiltered\n", - "\n", - "stdev = np.diff(np.percentile(new_residual[is_good_line], [25, 75])).item() / 1.35\n", - "print(stdev)\n", - "\n", - "stdev = np.diff(np.percentile(new_residual[is_good_line], [10, 90])).item() / 2.56\n", - "print(stdev)\n", - "\n", - "plt.figure(figsize=(15, 9))\n", - "plt.plot(new_residual, label=\"smoothed-residual\")\n", - "plt.axhline(stdev, color=\"g\", ls=\":\")\n", - "plt.axhline(-stdev, color=\"g\", ls=\":\")\n", - "plt.axhline(stdev * 4, color=\"r\", ls=\":\")\n", - "plt.axhline(-stdev * 4, color=\"r\", ls=\":\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def _remove_anomalies_1d_v1(signal, thr=4, median_kernel=51, gaussian_sigma=5):\n", - " \"\"\"\n", - " remove anomalies from signal\n", - " \"\"\"\n", - " signal = np.copy(signal)\n", - "\n", - " # Median filtering, with reflection padding\n", - " offset = median_kernel // 2\n", - " smoothed = scipy.signal.medfilt(\n", - " np.pad(signal, (offset, offset), mode=\"reflect\"),\n", - " median_kernel,\n", - " )[offset:-offset]\n", - " # Measure the residual between the original and median filtered signal\n", - " residual = signal - smoothed\n", - " # Replace datapoints more than 4 sigma away from the median filter\n", - " # with the filtered signal\n", - " stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n", - " is_fixed = np.abs(residual) > thr * stdev\n", - " signal[is_fixed] = smoothed[is_fixed]\n", - "\n", - " # Smooth signal with a gaussian kernel\n", - " while True:\n", - " smoothed = scipy.ndimage.gaussian_filter1d(signal, gaussian_sigma, axis=0)\n", - " # Mesure new residual\n", - " residual = signal - smoothed\n", - " stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n", - " is_fixed_now = np.abs(residual) > thr * stdev\n", - " is_fixed |= is_fixed_now\n", - " signal[is_fixed] = smoothed[is_fixed]\n", - " if not np.any(is_fixed_now):\n", - " break\n", - "\n", - " return signal, is_fixed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from echofilter.raw.utils import pad1d\n", - "\n", - "\n", - "def medfilt1d(signal, kernel_size, axis=-1, pad_mode=\"reflect\"):\n", - " \"\"\"\n", - " Median filter in 1d, with support for selecting padding mode.\n", - "\n", - " Parameters\n", - " ----------\n", - " signal : array_like\n", - " The signal to filter.\n", - " kernel_size\n", - " Size of the median kernel to use.\n", - " axis : int, optional\n", - " Which axis to operate along. Default is `-1`.\n", - " pad_mode : str, optional\n", - " Method with which to pad the vector at the edges.\n", - " Must be supported by `numpy.pad`. Default is `\"reflect\"`.\n", - "\n", - " Returns\n", - " -------\n", - " filtered : array_like\n", - " The filtered signal.\n", - "\n", - " See Also\n", - " --------\n", - " - `scipy.signal.medfilt`\n", - " - `pad1d`\n", - " \"\"\"\n", - " offset = kernel_size // 2\n", - " signal = pad1d(signal, offset, axis=axis, mode=pad_mode)\n", - " filtered = scipy.signal.medfilt(signal, kernel_size)[offset:-offset]\n", - " return filtered\n", - "\n", - "\n", - "def remove_anomalies_1d(signal, thr=5, thr2=4, kernel=201, kernel2=31):\n", - " \"\"\"\n", - " Remove anomalies from a temporal signal.\n", - "\n", - " Applies a median filter to the data, and replaces datapoints which\n", - " deviate from the median filtered signal by more than some threshold\n", - " with the median filtered data. This process is repeated until no\n", - " datapoints deviate from the filtered line by more than the threshold.\n", - "\n", - " Parameters\n", - " ----------\n", - " signal : array_like\n", - " The signal to filter.\n", - " thr : float, optional\n", - " The initial threshold will be `thr` times the standard deviation of the residuals.\n", - " The standard deviation is robustly estimated from the interquartile range.\n", - " Default is `5`.\n", - " thr2 : float, optional\n", - " The threshold for repeated iterations will be `thr2` times the standard deviation\n", - " of the remaining residuals. The standard deviation is robustly estimated from\n", - " interdecile range. Default is `4`.\n", - " kernel : int, optional\n", - " The kernel size for the initial median filter. Default is `201`.\n", - " kernel2 : int, optional\n", - " The kernel size for subsequent median filters. Default is `31`.\n", - "\n", - " Returns\n", - " -------\n", - " filtered : numpy.ndarray like signal\n", - " The input signal with anomalies replaced with median values.\n", - " is_fixed : bool numpy.ndarray shaped like signal\n", - " Indicator for which datapoints were replaced.\n", - "\n", - " See Also\n", - " --------\n", - " `medfilt1d`\n", - " \"\"\"\n", - " signal = np.copy(signal)\n", - "\n", - " # Median filtering, with reflection padding\n", - " smoothed = medfilt1d(signal, kernel)\n", - " # Measure the residual between the original and median filtered signal\n", - " residual = signal - smoothed\n", - " # Replace datapoints more than thr sigma away from the median filter\n", - " # with the filtered signal. We use a robust estimate of the standard\n", - " # deviation, using the central 50% of datapoints.\n", - " stdev = np.diff(np.percentile(residual, [25, 75])).item() / 1.35\n", - " is_fixed = np.abs(residual) > thr * stdev\n", - " signal[is_fixed] = smoothed[is_fixed]\n", - "\n", - " # Filter again, with a narrower kernel but tighter threshold\n", - " while True:\n", - " smoothed = medfilt1d(signal, kernel2)\n", - " # Mesure new residual\n", - " residual = signal - smoothed\n", - " # Make sure to only include original data points when determining\n", - " # the standard deviation. We use the interdecile range.\n", - " stdev = np.diff(np.percentile(residual[~is_fixed], [10, 90])).item() / 2.56\n", - " is_fixed_now = np.abs(residual) > thr2 * stdev\n", - " is_fixed |= is_fixed_now\n", - " signal[is_fixed] = smoothed[is_fixed]\n", - " # We are done when no more datapoints had to be replaced\n", - " if not np.any(is_fixed_now):\n", - " break\n", - "\n", - " return signal, is_fixed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i_segment, segment in enumerate(segments):\n", - " plt.figure(figsize=(15, 9))\n", - " plt.plot(segment[\"surface\"])\n", - "\n", - " # Handle passive data\n", - " is_passive = segment[\"is_passive\"] > 0.5\n", - " _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n", - " smoothed = np.interp(\n", - " segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n", - " )\n", - " is_fixed = np.zeros_like(is_passive)\n", - " is_fixed[~is_passive] = _is_fixed\n", - "\n", - " print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n", - " plt.plot(smoothed)\n", - " plt.title(\"{} #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n", - " plt.show()\n", - " if np.sum(is_fixed) > 0:\n", - " plt.figure(figsize=(15, 9))\n", - " echofilter.plotting.plot_transect(segment)\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sample_paths = [\n", - " \"MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215\",\n", - " \"MinasPassage/december2017/december2017_D20180108-T045216_D20180108-T102216\",\n", - " \"MinasPassage/december2017/december2017_D20180222-T145219_D20180222-T142214\",\n", - " \"MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214\",\n", - " \"MinasPassage/march2018/march2018_D20180513-T015216_D20180513-T072215\",\n", - " \"MinasPassage/march2018/march2018_D20180523-T175215_D20180523-T172215\",\n", - " \"MinasPassage/september2018/september2018_D20180915-T202216_D20180916-T015217\",\n", - " \"MinasPassage/september2018/september2018_D20181027-T022221_D20181027-T075217\",\n", - " \"MinasPassage/september2018/september2018_D20181116-T205220_D20181117-T022218\",\n", - " \"MinasPassage/september2018/september2018_D20181119-T195217_D20181119-T195217\",\n", - "]\n", - "\n", - "for sample in sample_paths:\n", - " print(sample)\n", - "\n", - " transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(root_data_dir, sample),\n", - " )\n", - "\n", - " for i_segment, segment in enumerate(\n", - " echofilter.raw.manipulate.split_transect(**transect)\n", - " ):\n", - " plt.figure(figsize=(15, 9))\n", - " plt.plot(segment[\"surface\"])\n", - "\n", - " # Handle passive data\n", - " is_passive = segment[\"is_passive\"] > 0.5\n", - " _smoothed, _is_fixed = remove_anomalies_1d(segment[\"surface\"][~is_passive])\n", - " smoothed = np.interp(\n", - " segment[\"timestamps\"], segment[\"timestamps\"][~is_passive], _smoothed\n", - " )\n", - " is_fixed = np.zeros_like(is_passive)\n", - " is_fixed[~is_passive] = _is_fixed\n", - "\n", - " print(\"{} datapoints were fixed\".format(np.sum(is_fixed)))\n", - " plt.plot(smoothed)\n", - " plt.title(\"{} #{}, {} removed\".format(sample, i_segment, np.sum(is_fixed)))\n", - " plt.show()\n", - " if np.sum(is_fixed) > 0:\n", - " plt.figure(figsize=(15, 9))\n", - " echofilter.plotting.plot_transect(segment)\n", - " plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/check chunking.ipynb b/notebooks/check chunking.ipynb index f2d52bf2..511a7565 100644 --- a/notebooks/check chunking.ipynb +++ b/notebooks/check chunking.ipynb @@ -1,277 +1,277 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00dd00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_transect(transect, i_transect=None):\n", + " tt = transect[\"timestamps\"]\n", + " for signal_name in (\"Sv\", \"Sv_masked\", \"mask\"):\n", + " if signal_name == \"Sv_masked\":\n", + " signal = copy.deepcopy(transect[\"Sv\"])\n", + " signal[~transect[\"mask\"]] = np.nan\n", + " else:\n", + " signal = transect[signal_name]\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " transect[\"timestamps\"],\n", + " transect[\"depths\"],\n", + " signal.T,\n", + " )\n", + " plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n", + " plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n", + "\n", + " indices = np.nonzero(transect[\"is_passive\"])[0]\n", + " if len(indices) > 0:\n", + " r_starts = [indices[0]]\n", + " r_ends = []\n", + " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", + " for break_idx in breaks:\n", + " r_ends.append(indices[break_idx])\n", + " r_starts.append(indices[break_idx + 1])\n", + " r_ends.append(indices[-1])\n", + " for r_start, r_end in zip(r_starts, r_ends):\n", + " plt.fill_between(\n", + " tt[[r_start, r_end]],\n", + " transect[\"depths\"][[0, 0]],\n", + " transect[\"depths\"][[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=[0.4, 0.4, 0.4],\n", + " linewidth=0.0,\n", + " )\n", + "\n", + " indices = np.nonzero(transect[\"is_removed\"])[0]\n", + " if len(indices) > 0:\n", + " r_starts = [indices[0]]\n", + " r_ends = []\n", + " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", + " for break_idx in breaks:\n", + " r_ends.append(indices[break_idx])\n", + " r_starts.append(indices[break_idx + 1])\n", + " r_ends.append(indices[-1])\n", + " for r_start, r_end in zip(r_starts, r_ends):\n", + " plt.fill_between(\n", + " tt[[r_start, r_end]],\n", + " transect[\"depths\"][[0, 0]],\n", + " transect[\"depths\"][[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"\\\\\\\\\",\n", + " edgecolor=[0, 0, 1],\n", + " linewidth=0.0,\n", + " )\n", + "\n", + " plt.gca().invert_yaxis()\n", + " plt.xlabel(\"Timestamp (s)\")\n", + " plt.ylabel(\"Depth (m)\")\n", + " plt.title(\"{} Chunk {}\".format(signal_name, i_transect))\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "dataset = \"MinasPassage\"\n", + "# has removed window\n", + "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n", + "# has passive recording\n", + "# sample = 'september2018/september2018_D20181116-T205220_D20181117-T022218'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(root_data_dir, dataset, sample),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chunks = echofilter.raw.manipulate.split_transect(**transect)\n", + "\n", + "for i_chunk, chunk in enumerate(chunks):\n", + " plot_transect(chunk, i_chunk)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "dataset = \"MinasPassage\"\n", + "\n", + "for sample in [\n", + " \"december2017/december2017_D20171214-T202211_D20171215-T015215\",\n", + " \"december2017/december2017_D20180222-T145219_D20180222-T142214\",\n", + " \"march2018/march2018_D20180330-T202218_D20180331-T015214\",\n", + " \"march2018/march2018_D20180523-T175215_D20180523-T172215\",\n", + " \"september2018/september2018_D20180915-T202216_D20180916-T015217\",\n", + " \"september2018/september2018_D20181116-T205220_D20181117-T022218\",\n", + " \"september2018/september2018_D20181119-T195217_D20181119-T195217\",\n", + "]:\n", + " print(sample)\n", + "\n", + " transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(root_data_dir, dataset, sample),\n", + " )\n", + "\n", + " print(sample)\n", + "\n", + " chunks = echofilter.raw.manipulate.split_transect(**transect)\n", + "\n", + " for i_chunk, chunk in enumerate(chunks):\n", + " plot_transect(chunk, i_chunk)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compare against a mobile transect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = \"mobile\"\n", + "\n", + "# example with only passive period\n", + "# sample = 'Survey17/Survey17_GR4_T1W_E'\n", + "\n", + "# example with 1 passive period, 1 turbulence cut out\n", + "# sample = 'Survey16/Survey16_GR3_N1W_E'\n", + "\n", + "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", + "# sample = 'Survey17/Survey17_GR1_N0W_E'\n", + "\n", + "# example with passive, removed, and patches\n", + "# sample = 'Survey16/Survey16_GR1_N3A_F'\n", + "\n", + "# example with passive, removed, and patches\n", + "sample = \"Survey16/Survey16_GR3_N3A_F\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", + " os.path.join(root_data_dir, dataset, sample),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chunks = echofilter.raw.manipulate.split_transect(**transect)\n", + "\n", + "for i_chunk, chunk in enumerate(chunks):\n", + " plot_transect(chunk, i_chunk)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00dd00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_transect(transect, i_transect=None):\n", - " tt = transect[\"timestamps\"]\n", - " for signal_name in (\"Sv\", \"Sv_masked\", \"mask\"):\n", - " if signal_name == \"Sv_masked\":\n", - " signal = copy.deepcopy(transect[\"Sv\"])\n", - " signal[~transect[\"mask\"]] = np.nan\n", - " else:\n", - " signal = transect[signal_name]\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " transect[\"timestamps\"],\n", - " transect[\"depths\"],\n", - " signal.T,\n", - " )\n", - " plt.plot(transect[\"timestamps\"], transect[\"top\"], turbulence_color)\n", - " plt.plot(transect[\"timestamps\"], transect[\"bottom\"], bottom_color)\n", - "\n", - " indices = np.nonzero(transect[\"is_passive\"])[0]\n", - " if len(indices) > 0:\n", - " r_starts = [indices[0]]\n", - " r_ends = []\n", - " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", - " for break_idx in breaks:\n", - " r_ends.append(indices[break_idx])\n", - " r_starts.append(indices[break_idx + 1])\n", - " r_ends.append(indices[-1])\n", - " for r_start, r_end in zip(r_starts, r_ends):\n", - " plt.fill_between(\n", - " tt[[r_start, r_end]],\n", - " transect[\"depths\"][[0, 0]],\n", - " transect[\"depths\"][[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=[0.4, 0.4, 0.4],\n", - " linewidth=0.0,\n", - " )\n", - "\n", - " indices = np.nonzero(transect[\"is_removed\"])[0]\n", - " if len(indices) > 0:\n", - " r_starts = [indices[0]]\n", - " r_ends = []\n", - " breaks = np.nonzero(indices[1:] - indices[:-1] > 1)[0]\n", - " for break_idx in breaks:\n", - " r_ends.append(indices[break_idx])\n", - " r_starts.append(indices[break_idx + 1])\n", - " r_ends.append(indices[-1])\n", - " for r_start, r_end in zip(r_starts, r_ends):\n", - " plt.fill_between(\n", - " tt[[r_start, r_end]],\n", - " transect[\"depths\"][[0, 0]],\n", - " transect[\"depths\"][[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"\\\\\\\\\",\n", - " edgecolor=[0, 0, 1],\n", - " linewidth=0.0,\n", - " )\n", - "\n", - " plt.gca().invert_yaxis()\n", - " plt.xlabel(\"Timestamp (s)\")\n", - " plt.ylabel(\"Depth (m)\")\n", - " plt.title(\"{} Chunk {}\".format(signal_name, i_transect))\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "dataset = \"MinasPassage\"\n", - "# has removed window\n", - "sample = \"december2017/december2017_D20171214-T202211_D20171215-T015215\"\n", - "# has passive recording\n", - "# sample = 'september2018/september2018_D20181116-T205220_D20181117-T022218'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(root_data_dir, dataset, sample),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "chunks = echofilter.raw.manipulate.split_transect(**transect)\n", - "\n", - "for i_chunk, chunk in enumerate(chunks):\n", - " plot_transect(chunk, i_chunk)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "dataset = \"MinasPassage\"\n", - "\n", - "for sample in [\n", - " \"december2017/december2017_D20171214-T202211_D20171215-T015215\",\n", - " \"december2017/december2017_D20180222-T145219_D20180222-T142214\",\n", - " \"march2018/march2018_D20180330-T202218_D20180331-T015214\",\n", - " \"march2018/march2018_D20180523-T175215_D20180523-T172215\",\n", - " \"september2018/september2018_D20180915-T202216_D20180916-T015217\",\n", - " \"september2018/september2018_D20181116-T205220_D20181117-T022218\",\n", - " \"september2018/september2018_D20181119-T195217_D20181119-T195217\",\n", - "]:\n", - " print(sample)\n", - "\n", - " transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(root_data_dir, dataset, sample),\n", - " )\n", - "\n", - " print(sample)\n", - "\n", - " chunks = echofilter.raw.manipulate.split_transect(**transect)\n", - "\n", - " for i_chunk, chunk in enumerate(chunks):\n", - " plot_transect(chunk, i_chunk)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Compare against a mobile transect" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = \"mobile\"\n", - "\n", - "# example with only passive period\n", - "# sample = 'Survey17/Survey17_GR4_T1W_E'\n", - "\n", - "# example with 1 passive period, 1 turbulence cut out\n", - "# sample = 'Survey16/Survey16_GR3_N1W_E'\n", - "\n", - "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", - "# sample = 'Survey17/Survey17_GR1_N0W_E'\n", - "\n", - "# example with passive, removed, and patches\n", - "# sample = 'Survey16/Survey16_GR1_N3A_F'\n", - "\n", - "# example with passive, removed, and patches\n", - "sample = \"Survey16/Survey16_GR3_N3A_F\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "transect = echofilter.raw.manipulate.load_decomposed_transect_mask(\n", - " os.path.join(root_data_dir, dataset, sample),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "chunks = echofilter.raw.manipulate.split_transect(**transect)\n", - "\n", - "for i_chunk, chunk in enumerate(chunks):\n", - " plot_transect(chunk, i_chunk)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/check making lines from masked csv.ipynb b/notebooks/check making lines from masked csv.ipynb index 94c9e37f..069b76b4 100644 --- a/notebooks/check making lines from masked csv.ipynb +++ b/notebooks/check making lines from masked csv.ipynb @@ -1,210 +1,210 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00ee00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "\n", + "sample = \"mobile/Survey17/Survey17_GR4_T1W_E\"\n", + "\n", + "# example with a gap\n", + "sample = \"mobile/Survey16/Survey16_GR2_S3W_E\" # 115 apart = 3.47m\n", + "sample = \"mobile/Survey16/Survey16_GR2_N1A_F\" # 045 apart = 1.36m\n", + "\n", + "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n", + "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n", + "\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", + "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", + "\n", + "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n", + " os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n", + ")\n", + "t_top, d_top = echofilter.raw.loader.evl_loader(\n", + " os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n", + " fname_masked\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", + "plt.plot(ts_new, d_top_new, turbulence_color)\n", + "plt.plot(ts_new, d_bot_new, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Zoomed in on top\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd[-450:], depths_mskd[:500], signals_mskd[-450:, :500].T)\n", + "plt.plot(ts_new[-450:], d_top_new[-450:], turbulence_color)\n", + "# plt.plot(ts_new, d_bot_new, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check the resulting masks agree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n", + "mask_top = np.single(ddepths < np.expand_dims(d_top_new, -1))\n", + "mask_bot = np.single(ddepths > np.expand_dims(d_bot_new, -1))\n", + "mask_new = ~np.any([mask_top, mask_bot], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd[:2000], ~np.isnan(signals_mskd)[:, :2000].T)\n", + "plt.plot(t_top, d_top, turbulence_color)\n", + "plt.plot(t_bot, d_bot, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(\"Original mask and lines\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(ts_mskd, depths_mskd[:2000], mask_new[:, :2000].T)\n", + "plt.plot(ts_new, d_top_new, turbulence_color)\n", + "plt.plot(ts_new, d_bot_new, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(\"New mask and lines\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " ts_mskd, depths_mskd[:2000], (mask_new != ~np.isnan(signals_mskd))[:, :2000].T\n", + ")\n", + "# plt.plot(ts_new, d_top_new, turbulence_color)\n", + "# plt.plot(ts_new, d_bot_new, bottom_color)\n", + "plt.gca().invert_yaxis()\n", + "plt.title(\"Mask difference\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00ee00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ROOT_DATA_DIR = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "\n", - "sample = \"mobile/Survey17/Survey17_GR4_T1W_E\"\n", - "\n", - "# example with a gap\n", - "sample = \"mobile/Survey16/Survey16_GR2_S3W_E\" # 115 apart = 3.47m\n", - "sample = \"mobile/Survey16/Survey16_GR2_N1A_F\" # 045 apart = 1.36m\n", - "\n", - "fname_raw = os.path.join(ROOT_DATA_DIR, sample + \"_Sv_raw.csv\")\n", - "fname_masked = os.path.join(ROOT_DATA_DIR, sample + \"_Sv.csv\")\n", - "\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", - "ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", - "\n", - "t_bot, d_bot = echofilter.raw.loader.evl_loader(\n", - " os.path.join(ROOT_DATA_DIR, sample + \"_bottom.evl\")\n", - ")\n", - "t_top, d_top = echofilter.raw.loader.evl_loader(\n", - " os.path.join(ROOT_DATA_DIR, sample + \"_turbulence.evl\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ts_new, d_top_new, d_bot_new = echofilter.raw.manipulate.make_lines_from_masked_csv(\n", - " fname_masked\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd, signals_mskd.T)\n", - "plt.plot(ts_new, d_top_new, turbulence_color)\n", - "plt.plot(ts_new, d_bot_new, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Zoomed in on top\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd[-450:], depths_mskd[:500], signals_mskd[-450:, :500].T)\n", - "plt.plot(ts_new[-450:], d_top_new[-450:], turbulence_color)\n", - "# plt.plot(ts_new, d_bot_new, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Check the resulting masks agree" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ddepths = np.broadcast_to(depths_raw, signals_raw.shape)\n", - "mask_top = np.single(ddepths < np.expand_dims(d_top_new, -1))\n", - "mask_bot = np.single(ddepths > np.expand_dims(d_bot_new, -1))\n", - "mask_new = ~np.any([mask_top, mask_bot], axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd[:2000], ~np.isnan(signals_mskd)[:, :2000].T)\n", - "plt.plot(t_top, d_top, turbulence_color)\n", - "plt.plot(t_bot, d_bot, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(\"Original mask and lines\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(ts_mskd, depths_mskd[:2000], mask_new[:, :2000].T)\n", - "plt.plot(ts_new, d_top_new, turbulence_color)\n", - "plt.plot(ts_new, d_bot_new, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(\"New mask and lines\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " ts_mskd, depths_mskd[:2000], (mask_new != ~np.isnan(signals_mskd))[:, :2000].T\n", - ")\n", - "# plt.plot(ts_new, d_top_new, turbulence_color)\n", - "# plt.plot(ts_new, d_bot_new, bottom_color)\n", - "plt.gca().invert_yaxis()\n", - "plt.title(\"Mask difference\")\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/check splitting passive data.ipynb b/notebooks/check splitting passive data.ipynb index 6d483477..bfa57132 100644 --- a/notebooks/check splitting passive data.ipynb +++ b/notebooks/check splitting passive data.ipynb @@ -1,445 +1,445 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd .." - ] + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import echofilter.raw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "turbulence_color = \"c\"\n", + "bottom_color = \"#00ee00\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", + "# example with 1 passive period, 1 turbulence cut out\n", + "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n", + "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", + "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n", + "# sample done incorrectly\n", + "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n", + "\n", + "# sample = 'MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215'\n", + "\n", + "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n", + "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n", + "\n", + "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", + "# ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", + "\n", + "# t_bot, d_bot = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_bottom.evl'))\n", + "# t_top, d_top = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_turbulence.evl'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "passive_starts, passive_ends = echofilter.raw.manipulate.find_passive_data(signals_raw)\n", + "print(passive_starts)\n", + "print(passive_ends)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(np.arange(0, signals_raw.shape[0]), depths_raw, signals_raw.T)\n", + "\n", + "for r_start, r_end in zip(passive_starts, passive_ends):\n", + " plt.fill_between(\n", + " [r_start, r_end],\n", + " depths_raw[[0, 0]],\n", + " depths_raw[[-1, -1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=\"k\",\n", + " linewidth=0.0,\n", + " )\n", + "\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(passive_starts, passive_ends):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw,\n", + " signals_raw[index_start:index_end, :].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for index_start, index_end in zip(\n", + " np.concatenate(([0], passive_ends)),\n", + " np.concatenate((passive_starts, [signals_raw.shape[0]])),\n", + "):\n", + " index_start = int(index_start)\n", + " index_end = int(index_end)\n", + " if index_start == index_end:\n", + " continue\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " ts_raw[index_start:index_end],\n", + " depths_raw,\n", + " signals_raw[index_start:index_end, :].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depth = 50\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " np.arange(0, signals_raw.shape[0]), depths_raw[:n_depth], signals_raw[:, :n_depth].T\n", + ")\n", + "\n", + "for r_start, r_end in zip(passive_starts, passive_ends):\n", + " plt.fill_between(\n", + " [r_start, r_end],\n", + " depths_raw[[0, 0]],\n", + " depths_raw[[n_depth - 1, n_depth - 1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=\"k\",\n", + " linewidth=0.0,\n", + " )\n", + "\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depth = 40\n", + "\n", + "for idx in np.concatenate((passive_starts, passive_ends)):\n", + " plt.figure(figsize=(12, 12))\n", + " plt.pcolormesh(\n", + " np.arange(max(0, idx - 4), min(len(ts_raw), idx + 5)),\n", + " depths_raw[:n_depth],\n", + " signals_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5), :n_depth].T,\n", + " )\n", + " plt.gca().invert_yaxis()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t0 = 230 # 190 # 0\n", + "t1 = 255 # 220 # 65 # signals_raw.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_times = 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depth = len(depths_raw)\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n", + "\n", + "for r_start, r_end in zip(passive_starts, passive_ends):\n", + " if r_end > n_times:\n", + " continue\n", + " plt.fill_between(\n", + " [r_start, r_end],\n", + " depths_raw[[0, 0]],\n", + " depths_raw[[n_depth - 1, n_depth - 1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=\"k\",\n", + " linewidth=0.0,\n", + " )\n", + "\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depth = 50\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n", + "\n", + "for r_start, r_end in zip(passive_starts, passive_ends):\n", + " if r_end > n_times:\n", + " continue\n", + " plt.fill_between(\n", + " [r_start, r_end],\n", + " depths_raw[[0, 0]],\n", + " depths_raw[[n_depth - 1, n_depth - 1]],\n", + " facecolor=\"none\",\n", + " hatch=\"//\",\n", + " edgecolor=\"k\",\n", + " linewidth=0.0,\n", + " )\n", + "\n", + "plt.xlabel(\"Timestamp (s)\")\n", + "plt.ylabel(\"Depth (m)\")\n", + "plt.gca().invert_yaxis()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depth = 65\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " np.arange(t0, t1),\n", + " depths_raw[:n_depth],\n", + " np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n", + ")\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depth = 40\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "plt.pcolormesh(\n", + " np.arange(t0, t1),\n", + " depths_raw[:n_depth],\n", + " np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n", + ")\n", + "plt.colorbar()\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 9))\n", + "plt.plot(np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(12, 9))\n", + "plt.plot(np.mean(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 9))\n", + "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n", + " yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n", + " plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 9))\n", + "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n", + " yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n", + " plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depths = [\n", + " 10,\n", + " 20,\n", + " 25,\n", + " 26,\n", + " 29,\n", + " 30,\n", + " 31,\n", + " 32,\n", + " 33,\n", + " 34,\n", + " 35,\n", + " 36,\n", + " 37,\n", + " 38,\n", + " 39,\n", + " 40,\n", + " 41,\n", + " 42,\n", + " 43,\n", + " 44,\n", + " 45,\n", + " 50,\n", + " 60,\n", + " 100,\n", + "]\n", + "medians = []\n", + "means = []\n", + "\n", + "for n_depth in n_depths:\n", + " yy = np.diff(signals_raw[t0:t1, :n_depth], axis=0)\n", + " medians.append(np.max(np.abs(np.median(yy, axis=1))))\n", + " means.append(np.max(np.abs(np.mean(yy, axis=1))))\n", + "\n", + "plt.plot(n_depths, medians, label=\"median\")\n", + "plt.plot(n_depths, means, label=\"mean\")\n", + "\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "medians" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_depths" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.15" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "sns.set()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import echofilter.raw" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "turbulence_color = \"c\"\n", - "bottom_color = \"#00ee00\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root_data_dir = \"/media/scott/scratch/Datasets/dsforce/surveyExports\"\n", - "# example with 1 passive period, 1 turbulence cut out\n", - "# sample = 'mobile/Survey16/Survey16_GR3_N1W_E'\n", - "# example with lots of short passive periods, 1 proper passive period, 1 turbulence cut out\n", - "sample = \"mobile/Survey17/Survey17_GR1_N0W_E\"\n", - "# sample done incorrectly\n", - "# sample = 'MinasPassage/march2018/march2018_D20180330-T202218_D20180331-T015214'\n", - "\n", - "# sample = 'MinasPassage/december2017/december2017_D20171214-T202211_D20171215-T015215'\n", - "\n", - "fname_raw = os.path.join(root_data_dir, sample + \"_Sv_raw.csv\")\n", - "fname_masked = os.path.join(root_data_dir, sample + \"_Sv.csv\")\n", - "\n", - "ts_raw, depths_raw, signals_raw = echofilter.raw.loader.transect_loader(fname_raw)\n", - "# ts_mskd, depths_mskd, signals_mskd = echofilter.raw.loader.transect_loader(fname_masked)\n", - "\n", - "# t_bot, d_bot = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_bottom.evl'))\n", - "# t_top, d_top = echofilter.raw.loader.evl_loader(os.path.join(root_data_dir, sample + '_turbulence.evl'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "passive_starts, passive_ends = echofilter.raw.manipulate.find_passive_data(signals_raw)\n", - "print(passive_starts)\n", - "print(passive_ends)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(np.arange(0, signals_raw.shape[0]), depths_raw, signals_raw.T)\n", - "\n", - "for r_start, r_end in zip(passive_starts, passive_ends):\n", - " plt.fill_between(\n", - " [r_start, r_end],\n", - " depths_raw[[0, 0]],\n", - " depths_raw[[-1, -1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=\"k\",\n", - " linewidth=0.0,\n", - " )\n", - "\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(passive_starts, passive_ends):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw,\n", - " signals_raw[index_start:index_end, :].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for index_start, index_end in zip(\n", - " np.concatenate(([0], passive_ends)),\n", - " np.concatenate((passive_starts, [signals_raw.shape[0]])),\n", - "):\n", - " index_start = int(index_start)\n", - " index_end = int(index_end)\n", - " if index_start == index_end:\n", - " continue\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " ts_raw[index_start:index_end],\n", - " depths_raw,\n", - " signals_raw[index_start:index_end, :].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depth = 50\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " np.arange(0, signals_raw.shape[0]), depths_raw[:n_depth], signals_raw[:, :n_depth].T\n", - ")\n", - "\n", - "for r_start, r_end in zip(passive_starts, passive_ends):\n", - " plt.fill_between(\n", - " [r_start, r_end],\n", - " depths_raw[[0, 0]],\n", - " depths_raw[[n_depth - 1, n_depth - 1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=\"k\",\n", - " linewidth=0.0,\n", - " )\n", - "\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depth = 40\n", - "\n", - "for idx in np.concatenate((passive_starts, passive_ends)):\n", - " plt.figure(figsize=(12, 12))\n", - " plt.pcolormesh(\n", - " np.arange(max(0, idx - 4), min(len(ts_raw), idx + 5)),\n", - " depths_raw[:n_depth],\n", - " signals_raw[max(0, idx - 4) : min(len(ts_raw), idx + 5), :n_depth].T,\n", - " )\n", - " plt.gca().invert_yaxis()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t0 = 230 # 190 # 0\n", - "t1 = 255 # 220 # 65 # signals_raw.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_times = 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depth = len(depths_raw)\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n", - "\n", - "for r_start, r_end in zip(passive_starts, passive_ends):\n", - " if r_end > n_times:\n", - " continue\n", - " plt.fill_between(\n", - " [r_start, r_end],\n", - " depths_raw[[0, 0]],\n", - " depths_raw[[n_depth - 1, n_depth - 1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=\"k\",\n", - " linewidth=0.0,\n", - " )\n", - "\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depth = 50\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(np.arange(t0, t1), depths_raw[:n_depth], signals_raw[t0:t1, :n_depth].T)\n", - "\n", - "for r_start, r_end in zip(passive_starts, passive_ends):\n", - " if r_end > n_times:\n", - " continue\n", - " plt.fill_between(\n", - " [r_start, r_end],\n", - " depths_raw[[0, 0]],\n", - " depths_raw[[n_depth - 1, n_depth - 1]],\n", - " facecolor=\"none\",\n", - " hatch=\"//\",\n", - " edgecolor=\"k\",\n", - " linewidth=0.0,\n", - " )\n", - "\n", - "plt.xlabel(\"Timestamp (s)\")\n", - "plt.ylabel(\"Depth (m)\")\n", - "plt.gca().invert_yaxis()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depth = 65\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " np.arange(t0, t1),\n", - " depths_raw[:n_depth],\n", - " np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n", - ")\n", - "plt.colorbar()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depth = 40\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "plt.pcolormesh(\n", - " np.arange(t0, t1),\n", - " depths_raw[:n_depth],\n", - " np.diff(signals_raw[t0:t1, :n_depth], axis=0).T,\n", - ")\n", - "plt.colorbar()\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 9))\n", - "plt.plot(np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n", - "plt.show()\n", - "\n", - "plt.figure(figsize=(12, 9))\n", - "plt.plot(np.mean(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1))\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 9))\n", - "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n", - " yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n", - " plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 9))\n", - "for n_depth in [20, 26, 27, 30, 35, 40, 45, 50, 60, signals_raw.shape[1]]:\n", - " yy = np.median(np.diff(signals_raw[t0:t1, :n_depth], axis=0), axis=1)\n", - " plt.plot(yy, label=\"{}: {}\".format(n_depth, np.max(np.abs(yy))))\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depths = [\n", - " 10,\n", - " 20,\n", - " 25,\n", - " 26,\n", - " 29,\n", - " 30,\n", - " 31,\n", - " 32,\n", - " 33,\n", - " 34,\n", - " 35,\n", - " 36,\n", - " 37,\n", - " 38,\n", - " 39,\n", - " 40,\n", - " 41,\n", - " 42,\n", - " 43,\n", - " 44,\n", - " 45,\n", - " 50,\n", - " 60,\n", - " 100,\n", - "]\n", - "medians = []\n", - "means = []\n", - "\n", - "for n_depth in n_depths:\n", - " yy = np.diff(signals_raw[t0:t1, :n_depth], axis=0)\n", - " medians.append(np.max(np.abs(np.median(yy, axis=1))))\n", - " means.append(np.max(np.abs(np.mean(yy, axis=1))))\n", - "\n", - "plt.plot(n_depths, medians, label=\"median\")\n", - "plt.plot(n_depths, means, label=\"mean\")\n", - "\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "medians" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_depths" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 2 }