Skip to content

Commit

Permalink
Allow passing in pandas dataframes to x2sys_cross (#591)
Browse files Browse the repository at this point in the history
Run crossover analysis directly on pandas.DataFrame inputs
instead of having to write to tab-separated value (TSV) files first!
Implemented by storing pandas.DataFrame data in a temporary file
and passing this intermediate file to x2sys_cross. Need to do some
file parsing to get the right file extension (suffix) for this to work.

* Use tempfile_from_dftrack instead of tempfile_from_buffer
* Don't use GMTTempFile, just generate random filename and write to it
* Reduce git diff and make Windows tests pass by ignoring permission error
* Test input two pandas dataframes to x2sys_cross with time column

Renamed 'result' to 'table' to prevent pylint complaining about
R0914: Too many local variables (16/15) (too-many-locals)

* Improve docstring of x2sys_cross and tempfile_from_dftrack
  • Loading branch information
weiji14 authored Sep 10, 2020
1 parent c06fa44 commit 6deb388
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 21 deletions.
61 changes: 47 additions & 14 deletions pygmt/tests/test_x2sys_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def fixture_tracks():
Load track data from the sample bathymetry file
"""
dataframe = load_sample_bathymetry()
return [dataframe.query(expr="bathymetry > -20")] # reduce size of dataset
dataframe.columns = ["x", "y", "z"] # longitude, latitude, bathymetry
return [dataframe.query(expr="z > -20")] # reduce size of dataset


def test_x2sys_cross_input_file_output_file(mock_x2sys_home):
Expand Down Expand Up @@ -76,25 +77,57 @@ def test_x2sys_cross_input_file_output_dataframe(mock_x2sys_home):
def test_x2sys_cross_input_dataframe_output_dataframe(mock_x2sys_home, tracks):
"""
Run x2sys_cross by passing in one dataframe, and output external crossovers
to a pandas.DataFrame. Not actually implemented yet, wait for
https://github.com/GenericMappingTools/gmt/issues/3717
to a pandas.DataFrame.
"""
with TemporaryDirectory(prefix="X2SYS", dir=os.getcwd()) as tmpdir:
tag = os.path.basename(tmpdir)
x2sys_init(tag=tag, fmtfile="xyz", force=True)

with pytest.raises(NotImplementedError):
_ = x2sys_cross(tracks=tracks, tag=tag, coe="i", verbose="i")
output = x2sys_cross(tracks=tracks, tag=tag, coe="i", verbose="i")

# assert isinstance(output, pd.DataFrame)
# assert output.shape == (4, 12)
# columns = list(output.columns)
# assert columns[:6] == ["x", "y", "t_1", "t_2", "dist_1", "dist_2"]
# assert columns[6:] == ["head_1","head_2","vel_1","vel_2","z_X","z_M"]
# assert output.dtypes["t_1"].type == np.datetime64
# assert output.dtypes["t_2"].type == np.datetime64
assert isinstance(output, pd.DataFrame)
assert output.shape == (14, 12)
columns = list(output.columns)
assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"]
assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"]
assert output.dtypes["i_1"].type == np.object_
assert output.dtypes["i_2"].type == np.object_

return output

# return output

def test_x2sys_cross_input_two_dataframes(mock_x2sys_home):
"""
Run x2sys_cross by passing in two pandas.DataFrame tables with a time
column, and output external crossovers to a pandas.DataFrame
"""
with TemporaryDirectory(prefix="X2SYS", dir=os.getcwd()) as tmpdir:
tag = os.path.basename(tmpdir)
x2sys_init(
tag=tag, fmtfile="xyz", suffix="xyzt", units=["de", "se"], force=True
)

# Add a time row to the x2sys fmtfile
with open(file=os.path.join(tmpdir, "xyz.fmt"), mode="a") as fmtfile:
fmtfile.write("time\ta\tN\t0\t1\t0\t%g\n")

# Create pandas.DataFrame track tables
tracks = []
for i in range(2):
np.random.seed(seed=i)
track = pd.DataFrame(data=np.random.rand(10, 3), columns=("x", "y", "z"))
track["time"] = pd.date_range(start=f"2020-{i}1-01", periods=10, freq="ms")
tracks.append(track)

output = x2sys_cross(tracks=tracks, tag=tag, coe="e", verbose="i")

assert isinstance(output, pd.DataFrame)
assert output.shape == (30, 12)
columns = list(output.columns)
assert columns[:6] == ["x", "y", "t_1", "t_2", "dist_1", "dist_2"]
assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"]
assert output.dtypes["t_1"].type == np.datetime64
assert output.dtypes["t_2"].type == np.datetime64


def test_x2sys_cross_input_two_filenames(mock_x2sys_home):
Expand Down Expand Up @@ -131,7 +164,7 @@ def test_x2sys_cross_invalid_tracks_input_type(tracks):
Run x2sys_cross using tracks input that is not a pandas.DataFrame (matrix)
or str (file) type, which would raise a GMTInvalidInput error.
"""
invalid_tracks = tracks[0].to_xarray().bathymetry
invalid_tracks = tracks[0].to_xarray().z
assert data_kind(invalid_tracks) == "grid"
with pytest.raises(GMTInvalidInput):
x2sys_cross(tracks=[invalid_tracks])
Expand Down
64 changes: 57 additions & 7 deletions pygmt/x2sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
GMT supplementary X2SYS module for crossover analysis.
"""
import contextlib
import os
from pathlib import Path

import pandas as pd

Expand All @@ -14,10 +16,45 @@
dummy_context,
fmt_docstring,
kwargs_to_strings,
unique_name,
use_alias,
)


@contextlib.contextmanager
def tempfile_from_dftrack(track, suffix):
"""
Saves pandas.DataFrame track table to a temporary tab-separated ASCII text
file with a unique name (to prevent clashes when running x2sys_cross),
adding a suffix extension to the end.
Parameters
----------
track : pandas.DataFrame
A table holding track data with coordinate (x, y) or (lon, lat) values,
and (optionally) time (t).
suffix : str
File extension, e.g. xyz, tsv, etc.
Yields
------
tmpfilename : str
A temporary tab-separated value file with a unique name holding the
track data. E.g. 'track-1a2b3c4.tsv'.
"""
try:
tmpfilename = f"track-{unique_name()[:7]}.{suffix}"
track.to_csv(
path_or_buf=tmpfilename,
sep="\t",
index=False,
date_format="%Y-%m-%dT%H:%M:%S.%fZ",
)
yield tmpfilename
finally:
os.remove(tmpfilename)


@fmt_docstring
@use_alias(
D="fmtfile",
Expand Down Expand Up @@ -158,9 +195,10 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs):
Parameters
----------
tracks : str or list
tracks : pandas.DataFrame or str or list
A table or a list of tables with (x, y) or (lon, lat) values in the
first two columns. Supported formats are ASCII, native binary, or
first two columns. Track(s) can be provided as pandas DataFrame tables
or file names. Supported file formats are ASCII, native binary, or
COARDS netCDF 1-D data. More columns may also be present.
If the filenames are missing their file extension, we will append the
Expand Down Expand Up @@ -263,8 +301,20 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs):
if kind == "file":
file_contexts.append(dummy_context(track))
elif kind == "matrix":
raise NotImplementedError(f"{type(track)} inputs are not supported yet")
# file_contexts.append(lib.virtualfile_from_matrix(track.values))
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file
lastline = (
Path(os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag")
.read_text()
.strip()
.split("\n")[-1]
) # e.g. "-Dxyz -Etsv -I1/1"
for item in sorted(lastline.split()): # sort list alphabetically
if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz
suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice)

# Save pandas.DataFrame track data to temporary file
file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix))
else:
raise GMTInvalidInput(f"Unrecognized data type: {type(track)}")

Expand All @@ -287,8 +337,8 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs):
parse_dates=[2, 3], # Datetimes on 3rd and 4th column
)
# Remove the "# " from "# x" in the first column
result = table.rename(columns={table.columns[0]: table.columns[0][2:]})
table = table.rename(columns={table.columns[0]: table.columns[0][2:]})
elif outfile != tmpfile.name: # if outfile is set, output in outfile only
result = None
table = None

return result
return table

0 comments on commit 6deb388

Please sign in to comment.