Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor blockm* to use virtualfile_from_data and improve i/o #1280

Merged
merged 10 commits into from
May 24, 2021
48 changes: 19 additions & 29 deletions pygmt/src/blockm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
"""
import pandas as pd
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
GMTTempFile,
build_arg_string,
data_kind,
dummy_context,
fmt_docstring,
kwargs_to_strings,
use_alias,
Expand Down Expand Up @@ -41,29 +38,24 @@ def _blockm(block_method, table, outfile, **kwargs):
set by ``outfile``)
"""

kind = data_kind(table)
with GMTTempFile(suffix=".csv") as tmpfile:
with Session() as lib:
if kind == "matrix":
if not hasattr(table, "values"):
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")
file_context = lib.virtualfile_from_matrix(table.values)
elif kind == "file":
if outfile is None:
raise GMTInvalidInput("Please pass in a str to 'outfile'")
file_context = dummy_context(table)
else:
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")

with file_context as infile:
# Choose how data will be passed into the module
table_context = lib.virtualfile_from_data(check_kind="vector", data=table)
# Run blockm* on data table
with table_context as infile:
if outfile is None:
outfile = tmpfile.name
arg_str = " ".join([infile, build_arg_string(kwargs), "->" + outfile])
lib.call_module(module=block_method, args=arg_str)

# Read temporary csv output to a pandas table
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
result = pd.read_csv(tmpfile.name, sep="\t", names=table.columns)
try:
column_names = table.columns.to_list()
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
except AttributeError: # 'str' object has no attribute 'columns'
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
result = None

Expand Down Expand Up @@ -95,10 +87,10 @@ def blockmean(table, outfile=None, **kwargs):

Parameters
----------
table : pandas.DataFrame or str
Either a pandas dataframe with (x, y, z) or (longitude, latitude,
elevation) values in the first three columns, or a file name to an
ASCII data table.
table : str or {table-like}
Pass in (x, y, z) or (longitude, latitude, elevation) values by
providing a file name to an ASCII data table, a 2D
{table-classes}.

spacing : str
*xinc*\[\ *unit*\][**+e**\|\ **n**]
Expand All @@ -110,8 +102,7 @@ def blockmean(table, outfile=None, **kwargs):
Specify the region of interest.

outfile : str
Required if ``table`` is a file. The file name for the output ASCII
file.
The file name for the output ASCII file.

{V}
{a}
Expand Down Expand Up @@ -156,10 +147,10 @@ def blockmedian(table, outfile=None, **kwargs):

Parameters
----------
table : pandas.DataFrame or str
Either a pandas dataframe with (x, y, z) or (longitude, latitude,
elevation) values in the first three columns, or a file name to an
ASCII data table.
table : str or {table-like}
Pass in (x, y, z) or (longitude, latitude, elevation) values by
providing a file name to an ASCII data table, a 2D
{table-classes}.

spacing : str
*xinc*\[\ *unit*\][**+e**\|\ **n**]
Expand All @@ -171,8 +162,7 @@ def blockmedian(table, outfile=None, **kwargs):
Specify the region of interest.

outfile : str
Required if ``table`` is a file. The file name for the output ASCII
file.
The file name for the output ASCII file.

{V}
{a}
Expand Down
36 changes: 20 additions & 16 deletions pygmt/tests/test_blockmean.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,38 +12,42 @@
from pygmt.helpers import GMTTempFile, data_kind


def test_blockmean_input_dataframe():
@pytest.fixture(scope="module", name="dataframe")
def fixture_dataframe():
"""
Load the grid data from the sample earth_relief file.
"""
return load_sample_bathymetry()


def test_blockmean_input_dataframe(dataframe):
"""
Run blockmean by passing in a pandas.DataFrame as input.
"""
dataframe = load_sample_bathymetry()
output = blockmean(table=dataframe, spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert all(dataframe.columns == output.columns)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.888877, 29.978707, -384.0])

return output


def test_blockmean_wrong_kind_of_input_table_matrix():
def test_blockmean_input_table_matrix(dataframe):
"""
Run blockmean using table input that is not a pandas.DataFrame but still a
matrix.
"""
dataframe = load_sample_bathymetry()
invalid_table = dataframe.values
assert data_kind(invalid_table) == "matrix"
with pytest.raises(GMTInvalidInput):
blockmean(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])
table = dataframe.values
output = blockmean(table=table, spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.888877, 29.978707, -384.0])


def test_blockmean_wrong_kind_of_input_table_grid():
def test_blockmean_wrong_kind_of_input_table_grid(dataframe):
"""
Run blockmean using table input that is not a pandas.DataFrame or file but
a grid.
"""
dataframe = load_sample_bathymetry()
invalid_table = dataframe.bathymetry.to_xarray()
assert data_kind(invalid_table) == "grid"
with pytest.raises(GMTInvalidInput):
Expand All @@ -67,12 +71,12 @@ def test_blockmean_input_filename():
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.888877, 29.978707, -384.0])

return output


def test_blockmean_without_outfile_setting():
"""
Run blockmean by not passing in outfile parameter setting.
"""
with pytest.raises(GMTInvalidInput):
blockmean(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])
output = blockmean(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.888877, 29.978707, -384.0])
36 changes: 20 additions & 16 deletions pygmt/tests/test_blockmedian.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,38 +12,42 @@
from pygmt.helpers import GMTTempFile, data_kind


def test_blockmedian_input_dataframe():
@pytest.fixture(scope="module", name="dataframe")
def fixture_dataframe():
"""
Load the grid data from the sample earth_relief file.
"""
return load_sample_bathymetry()


def test_blockmedian_input_dataframe(dataframe):
"""
Run blockmedian by passing in a pandas.DataFrame as input.
"""
dataframe = load_sample_bathymetry()
output = blockmedian(table=dataframe, spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert all(dataframe.columns == output.columns)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])

return output


def test_blockmedian_wrong_kind_of_input_table_matrix():
def test_blockmedian_wrong_kind_of_input_table_matrix(dataframe):
"""
Run blockmedian using table input that is not a pandas.DataFrame but still
a matrix.
"""
dataframe = load_sample_bathymetry()
invalid_table = dataframe.values
assert data_kind(invalid_table) == "matrix"
with pytest.raises(GMTInvalidInput):
blockmedian(table=invalid_table, spacing="5m", region=[245, 255, 20, 30])
table = dataframe.values
output = blockmedian(table=table, spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])


def test_blockmedian_wrong_kind_of_input_table_grid():
def test_blockmedian_wrong_kind_of_input_table_grid(dataframe):
"""
Run blockmedian using table input that is not a pandas.DataFrame or file
but a grid.
"""
dataframe = load_sample_bathymetry()
invalid_table = dataframe.bathymetry.to_xarray()
assert data_kind(invalid_table) == "grid"
with pytest.raises(GMTInvalidInput):
Expand All @@ -67,12 +71,12 @@ def test_blockmedian_input_filename():
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])

return output


def test_blockmedian_without_outfile_setting():
"""
Run blockmedian by not passing in outfile parameter setting.
"""
with pytest.raises(GMTInvalidInput):
blockmedian(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])
output = blockmedian(table="@tut_ship.xyz", spacing="5m", region=[245, 255, 20, 30])
assert isinstance(output, pd.DataFrame)
assert output.shape == (5849, 3)
npt.assert_allclose(output.iloc[0], [245.88819, 29.97895, -385.0])