Skip to content

Commit

Permalink
Merge pull request #133 from osyris-project/as-pandas
Browse files Browse the repository at this point in the history
Add method `to_pandas` to convert a DataGroup to a pandas DataFrame
  • Loading branch information
nvaytet authored Jul 1, 2024
2 parents 780aea8 + 1a6760e commit 76eeba1
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ numpy
sphinx
nbsphinx
jupyter
pandas
pint
pythreejs
sphinx-copybutton
Expand Down
32 changes: 32 additions & 0 deletions src/osyris/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def copy(self):

@property
def values(self):
"""
The values of the array.
"""
if not self._array.shape:
return self._array[()]
else:
Expand All @@ -104,6 +107,9 @@ def values(self, values_):

@property
def unit(self):
"""
The unit of the array.
"""
return self._unit

@unit.setter
Expand All @@ -112,18 +118,30 @@ def unit(self, unit_):

@property
def norm(self):
"""
The norm of the array (just returns itself).
"""
return self

@property
def ndim(self):
"""
The number of dimensions of the array.
"""
return self._array.ndim

@property
def shape(self):
"""
The shape of the array.
"""
return self._array.shape

@property
def dtype(self):
"""
The dtype of the array.
"""
return self._array.dtype

def __add__(self, other):
Expand Down Expand Up @@ -193,6 +211,9 @@ def __invert__(self):
return np.logical_not(self)

def to(self, unit):
"""
Convert the array to a new unit.
"""
new_unit = units(unit)
if self.unit == new_unit:
return self
Expand Down Expand Up @@ -248,8 +269,19 @@ def _wrap_numpy(self, func, *args, **kwargs):
return self.__class__(values=result, unit=unit)

def reshape(self, *shape):
"""
Reshape the array.
Parameters
----------
shape : tuple
The new shape of the array.
"""
return self.__class__(values=self._array.reshape(*shape), unit=self.unit)

@property
def nbytes(self):
"""
The number of bytes used by the array.
"""
return self._array.nbytes
59 changes: 58 additions & 1 deletion src/osyris/core/datagroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from .layer import Layer
from .tools import bytes_to_human_readable
from .vector import Vector


class Datagroup:
Expand Down Expand Up @@ -62,58 +63,114 @@ def __copy__(self):
return self.copy()

def copy(self):
"""
Create a shallow copy of the Datagroup.
"""
return self.__class__(**{key: array for key, array in self.items()})

def keys(self):
"""
The keys of the Datagroup (iterable).
"""
return self._container.keys()

def items(self):
"""
The items of the Datagroup (iterable).
"""
return self._container.items()

def values(self):
"""
The values of the Datagroup (iterable).
"""
return self._container.values()

def nbytes(self):
"""
The number of bytes used by the Datagroup.
"""
return np.sum([item.nbytes for item in self.values()])

def print_size(self):
"""
Return the size of the Datagroup in human readable format.
"""
return bytes_to_human_readable(self.nbytes())

@property
def shape(self):
"""
The shape of the Datagroup.
"""
if len(self) == 0:
return ()
else:
return self[list(self.keys())[0]].shape

def sortby(self, key):
"""
Sort the Datagroup by key.
Parameters
----------
key : str or list
The key to sort the Datagroup by. If a list of indices is given, the
Datagroup will be sorted by the indices.
"""
if key is not None:
if isinstance(key, str):
key = np.argsort(self[key]).values
for var in self.keys():
self[var] = self[var][key]

def clear(self):
"""
Clear the Datagroup.
"""
self._container.clear()

def get(self, key, default):
"""
Get the value of a key in the Datagroup.
"""
return self._container.get(key, default)

def pop(self, key):
"""
Pop a key from the Datagroup.
"""
return self._container.pop(key)

def update(self, *args, **kwargs):
"""
Update the Datagroup with new values.
"""
d = dict(*args, **kwargs)
for key, value in d.items():
self[key] = value

def layer(self, key: str, **kwargs) -> Layer:
"""
Make a layer for map plots which contains mesh information
Make a layer for map plots which contains mesh information.
"""
keys = ("position", "dx", "mass", "velocity")
return Layer(
data=self[key],
aux={k: self[k] for k in keys if k in self},
**kwargs,
)

def to_pandas(self):
"""
Convert the Datagroup to a pandas DataFrame.
"""
import pandas as pd

data = {}
for key, item in self.items():
data[key] = item.norm.values
if isinstance(item, Vector):
for c, xyz in item._xyz.items():
data[f"{key}_{c}"] = xyz.values
return pd.DataFrame(data)
46 changes: 46 additions & 0 deletions src/osyris/core/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,18 @@ def __str__(self):
return str(self.norm) + comps_str

def copy(self):
"""
Create a (deep) copy of the vector.
"""
return self.__class__(
**{c: xyz.copy() for c, xyz in self._xyz.items()}, name=str(self._name)
)

@property
def norm(self):
"""
Compute the norm of the vector.
"""
if (self.y is None) and (self.z is None):
return self.x
out = self.x.values * self.x.values
Expand All @@ -121,6 +127,9 @@ def norm(self):

@property
def unit(self):
"""
The unit of the vector.
"""
return self.x.unit

@unit.setter
Expand All @@ -134,10 +143,17 @@ def unit(self, unit_):

@property
def ndim(self):
"""
The number of dimensions of the vector array
(this is not the same as the number of components).
"""
return self.x.ndim

@property
def nvec(self):
"""
The number of components of the vector.
"""
if (self.y is None) and (self.z is None):
return 1
if self.z is None:
Expand All @@ -146,14 +162,24 @@ def nvec(self):

@property
def shape(self):
"""
The shape of the vector array
(this is not the same as the shape of the components).
"""
return self.x.shape

@property
def dtype(self):
"""
The dtype of the vector array.
"""
return self.x.dtype

@property
def name(self):
"""
The name of the vector.
"""
return self._name

@name.setter
Expand Down Expand Up @@ -235,6 +261,9 @@ def __invert__(self):
return np.logical_not(self)

def to(self, unit):
"""
Convert the vector to a new unit.
"""
return self.__class__(**{c: xyz.to(unit) for c, xyz in self._xyz.items()})

def _wrap_numpy(self, func, *args, **kwargs):
Expand All @@ -255,21 +284,38 @@ def _wrap_numpy(self, func, *args, **kwargs):
return self.__class__(**out)

def reshape(self, *shape):
"""
Reshape the vector arrays.
Parameters
----------
shape : tuple
The new shape of the vector arrays.
"""
return self.__class__(
**{c: xyz.reshape(*shape) for c, xyz in self._xyz.items()}
)

@property
def nbytes(self):
"""
The number of bytes used by the vector.
"""
return np.sum([xyz.nbytes for xyz in self._xyz.values()])

def dot(self, other):
"""
Compute the dot product of two vectors.
"""
out = np.zeros(self.shape)
for c1, c2 in zip(self._xyz.values(), other._xyz.values()):
out += (c1 * c2).values
return Array(values=out, unit=self.unit * other.unit)

def cross(self, other):
"""
Compute the cross product of two vectors.
"""
x = self.y * other.z
x -= self.z * other.y
y = self.z * other.x
Expand Down
22 changes: 21 additions & 1 deletion test/test_datagroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
# Copyright (c) 2024 Osyris contributors (https://github.com/osyris-project/osyris)
from copy import copy, deepcopy

import numpy as np
import pandas as pd
import pytest
from common import arrayequal

from osyris import Array, Datagroup
from osyris import Array, Datagroup, Vector


def test_datagroup_creation():
Expand Down Expand Up @@ -201,3 +203,21 @@ def test_can_share_arrays_between_datagroups():
dg2["a2"] /= 10.0
assert arrayequal(dg1["a1"], Array(values=[1.0, 2.0, 3.0, 4.0, 5.0], unit="m"))
assert arrayequal(dg2["a2"], Array(values=[1.0, 2.0, 3.0, 4.0, 5.0], unit="m"))


def test_convert_to_pandas():
a = Array(values=[1.0, 2.0, 3.0, 4.0, 5.0], unit="m")
b = Array(values=[6.0, 7.0, 8.0, 9.0, 10.0], unit="s")
x = Array(values=[1.0, 2.0, 3.0, 4.0, 5.0], unit="cm")
y = Array(values=[6.0, 7.0, 8.0, 9.0, 10.0], unit="cm")
z = Array(values=[11.0, 12.0, 13.0, 14.0, 15.0], unit="cm")
v = Vector(x=x, y=y, z=z)
dg = Datagroup({"a": a, "b": b, "v": v})
df = dg.to_pandas()
assert isinstance(df, pd.DataFrame)
assert np.array_equal(df["a"], a.values)
assert np.array_equal(df["b"], b.values)
assert np.array_equal(df["v_x"], x.values)
assert np.array_equal(df["v_y"], y.values)
assert np.array_equal(df["v_z"], z.values)
assert np.array_equal(df["v"], v.norm.values)

0 comments on commit 76eeba1

Please sign in to comment.