Skip to content

Commit

Permalink
Initial support of light-curve
Browse files Browse the repository at this point in the history
  • Loading branch information
hombit committed Aug 22, 2023
1 parent 0a29578 commit 3814574
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 0 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies = [
'coverage',
"deprecated",
"ipykernel", # Support for Jupyter notebooks
"light-curve>=0.7.3,<0.8.0",
]

# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
Expand Down
1 change: 1 addition & 0 deletions src/tape/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .feature_extraction import extract_features # noqa
from .light_curve import LightCurve # noqa
from .stetsonj import * # noqa
from .structurefunction2 import * # noqa
94 changes: 94 additions & 0 deletions src/tape/analysis/feature_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Auxiliary code for time-series feature extraction with "light-curve" package
"""

from typing import List

import numpy as np
import pandas as pd
from light_curve.light_curve_ext import _FeatureEvaluator as BaseLightCurveFeature


__all__ = ["cols", "meta", "extract_features", "BaseLightCurveFeature"]


def cols(ens: "Ensemble") -> List[str]:
"""Return the columns required for the time-series feature extraction
Parameters
----------
ens : `Ensemble`
Ensemble of light curves
Returns
-------
cols : `list` of `str`
List of input column names
"""
return [ens._time_col, ens._flux_col, ens._err_col, ens._band_col]


def meta(feature: BaseLightCurveFeature) -> pd.DataFrame:
"""Return the meta required by Dask
Parameters
----------
feature : `BaseLightCurveFeature`
Feature extractor in use, its `.names` attribute will be used
Returns
-------
meta : `list` of `str`
List of output column names
"""
return pd.DataFrame(dtype=np.float64, columns=feature.names)


def extract_features(
time, flux, err, band, *, feature: BaseLightCurveFeature, band_to_calc: str, **kwargs
) -> pd.Series:
"""
Apply a feature extractor to a light curve, concatenating the results over
all bands.
Parameters
----------
feature : `BaseLightCurveFeature`
Feature extractor to apply, see "light-curve" package for more details.
time : `numpy.ndarray`
Time values
flux : `numpy.ndarray`
Brightness values, flux or magnitudes
err : `numpy.ndarray`
Errors for "flux"
band : `numpy.ndarray`
Passband names.
**kwargs : `dict`
Additional keyword arguments to pass to the feature extractor.
Returns
-------
features : pandas.DataFrame
Feature values for each band, dtype is a common type for input arrays.
"""

# Select passband to calculate
band_mask = band == band_to_calc
time, flux, err = (a[band_mask] for a in (time, flux, err))

# Sort inputs by time if not already sorted
if not kwargs.get("sorted", False):
sort_idx = np.argsort(time)
time, flux, err, band = (a[sort_idx] for a in (time, flux, err, band))
# Now we can update the kwargs for better performance
kwargs = kwargs.copy()
kwargs["sorted"] = True

# Convert the numerical arrays to a common dtype
dtype = np.find_common_type([a.dtype for a in (time, flux, err)], [])
time, flux, err = (a.astype(dtype) for a in (time, flux, err))

values = feature(time, flux, err, **kwargs)

series = pd.Series(dict(zip(feature.names, values)))
return series
58 changes: 58 additions & 0 deletions tests/tape_tests/test_feature_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Test feature extraction with light_curve package"""

import light_curve as licu
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose

from tape import Ensemble
from tape.analysis.feature_extraction import extract_features, cols, meta
from tape.utils import ColumnMapper


def test_stetsonk():
stetson_k = licu.StetsonK()

time = np.array([5.0, 4.0, 3.0, 2.0, 1.0, 0.0] * 2)
flux = 1.0 + time**2.0
err = np.full_like(time, 0.1, dtype=np.float32)
band = np.r_[["g"] * 6, ["r"] * 6]

result = extract_features(feature=stetson_k, time=time, flux=flux, err=err, band=band, band_to_calc="g")
assert result.shape == (1,)
assert_array_equal(result.index, ["stetson_K"])
assert_allclose(result.values, 0.84932, rtol=1e-5)
assert_array_equal(result.dtypes, np.float64)


def test_stetsonk_with_ensemble():
n = 5

object1 = {
"id": np.full(n, 1),
"time": np.arange(n, dtype=np.float64),
"flux": np.linspace(1.0, 2.0, n),
"err": np.full(n, 0.1),
"band": np.full(n, "g"),
}
object2 = {
"id": np.full(2 * n, 2),
"time": np.arange(2 * n, dtype=np.float64),
"flux": np.r_[np.linspace(1.0, 2.0, n), np.linspace(1.0, 2.0, n)],
"err": np.full(2 * n, 0.01),
"band": np.r_[np.full(n, "g"), np.full(n, "r")],
}
rows = {column: np.concatenate([object1[column], object2[column]]) for column in object1}

cmap = ColumnMapper(id_col="id", time_col="time", flux_col="flux", err_col="err", band_col="band")
ens = Ensemble().from_source_dict(rows, cmap)

stetson_k = licu.StetsonK()
result = ens.batch(
extract_features,
*cols(ens),
meta=meta(stetson_k),
feature=stetson_k,
band_to_calc="g",
)

assert_allclose(result, 0.848528, rtol=1e-5)

0 comments on commit 3814574

Please sign in to comment.