From 8a6db70f1a9786007705a3888300028af4044de1 Mon Sep 17 00:00:00 2001
From: Will Schlitzer <schlitzer90@gmail.com>
Date: Thu, 26 Aug 2021 19:38:36 +0100
Subject: [PATCH] Add function to import hotspot dataset (#1386)

Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com>
Co-authored-by: Meghan Jones <meghanj@alum.mit.edu>
---
 doc/api/index.rst                    |  1 +
 pygmt/datasets/__init__.py           |  1 +
 pygmt/datasets/samples.py            | 26 ++++++++++++++++++++++++++
 pygmt/helpers/testing.py             |  1 +
 pygmt/tests/test_datasets_samples.py | 17 +++++++++++++++++
 5 files changed, 46 insertions(+)

diff --git a/doc/api/index.rst b/doc/api/index.rst
index a9bec80af88..74bd2f182c4 100644
--- a/doc/api/index.rst
+++ b/doc/api/index.rst
@@ -161,6 +161,7 @@ and store them in the GMT cache folder.
     datasets.load_sample_bathymetry
     datasets.load_usgs_quakes
     datasets.load_fractures_compilation
+    datasets.load_hotspots
 
 .. automodule:: pygmt.exceptions
 
diff --git a/pygmt/datasets/__init__.py b/pygmt/datasets/__init__.py
index 1d06e7f08fe..1ac58d23b87 100644
--- a/pygmt/datasets/__init__.py
+++ b/pygmt/datasets/__init__.py
@@ -5,6 +5,7 @@
 from pygmt.datasets.earth_relief import load_earth_relief
 from pygmt.datasets.samples import (
     load_fractures_compilation,
+    load_hotspots,
     load_japan_quakes,
     load_ocean_ridge_points,
     load_sample_bathymetry,
diff --git a/pygmt/datasets/samples.py b/pygmt/datasets/samples.py
index 6df2c43354c..18ce24f0a30 100644
--- a/pygmt/datasets/samples.py
+++ b/pygmt/datasets/samples.py
@@ -123,3 +123,29 @@ def load_fractures_compilation():
     fname = which("@fractures_06.txt", download="c")
     data = pd.read_csv(fname, header=None, sep=r"\s+", names=["azimuth", "length"])
     return data[["length", "azimuth"]]
+
+
+def load_hotspots():
+    """
+    Load a table with the locations, names, and suggested symbol sizes of
+    hotspots.
+
+    This is the ``@hotspots.txt`` dataset used in the GMT tutorials, with data
+    from Mueller, Royer, and Lawver, 1993, Geology, vol. 21, pp. 275-278. The
+    main 5 hotspots used by Doubrovine et al. [2012] have symbol sizes twice
+    the size of all other hotspots.
+
+    The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the
+    first time you invoke this function. Afterwards, it will load the data from
+    the cache. So you'll need an internet connection the first time around.
+
+    Returns
+    -------
+    data : pandas.DataFrame
+        The data table with columns "longitude", "latitude", "symbol_size", and
+        "placename".
+    """
+    fname = which("@hotspots.txt", download="c")
+    columns = ["longitude", "latitude", "symbol_size", "place_name"]
+    data = pd.read_table(filepath_or_buffer=fname, sep="\t", skiprows=3, names=columns)
+    return data
diff --git a/pygmt/helpers/testing.py b/pygmt/helpers/testing.py
index a7aabe16e89..9dceefe2a87 100644
--- a/pygmt/helpers/testing.py
+++ b/pygmt/helpers/testing.py
@@ -164,6 +164,7 @@ def download_test_data():
         "@N00W090.earth_relief_03m_p.nc",
         # Other cache files
         "@fractures_06.txt",
+        "@hotspots.txt",
         "@ridge.txt",
         "@srtm_tiles.nc",  # needed for 03s and 01s relief data
         "@Table_5_11.txt",
diff --git a/pygmt/tests/test_datasets_samples.py b/pygmt/tests/test_datasets_samples.py
index ff55c64d652..ef598215822 100644
--- a/pygmt/tests/test_datasets_samples.py
+++ b/pygmt/tests/test_datasets_samples.py
@@ -1,8 +1,10 @@
 """
 Test basic functionality for loading sample datasets.
 """
+import pandas as pd
 from pygmt.datasets import (
     load_fractures_compilation,
+    load_hotspots,
     load_japan_quakes,
     load_ocean_ridge_points,
     load_sample_bathymetry,
@@ -72,3 +74,18 @@ def test_fractures_compilation():
     assert summary.loc["max", "length"] == 984.652
     assert summary.loc["min", "azimuth"] == 0.0
     assert summary.loc["max", "azimuth"] == 360.0
+
+
+def test_hotspots():
+    """
+    Check that the @hotspots.txt dataset loads without errors.
+    """
+    data = load_hotspots()
+    assert data.shape == (55, 4)
+    assert data.columns.values.tolist() == [
+        "longitude",
+        "latitude",
+        "symbol_size",
+        "place_name",
+    ]
+    assert isinstance(data, pd.DataFrame)