Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Centralize data label generation #1672

Merged
merged 6 commits into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ New Features

- Add support for nonstandard viewer reference names [#1681]

- Centralize data label generation if user does not provide a label with data load. Also
prevent duplicate data labels from being added to data collection. [#1672]

Cubeviz
^^^^^^^

Expand Down
133 changes: 103 additions & 30 deletions jdaviz/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
import pathlib
import re
import uuid
import warnings
from inspect import isclass

from ipywidgets import widget_serialization
import ipyvue

from astropy.nddata import CCDData
from astropy.nddata import CCDData, NDData
from astropy.io import fits

from echo import CallbackProperty, DictCallbackProperty, ListCallbackProperty
from ipygoldenlayout import GoldenLayout
from ipysplitpanes import SplitPanes
Expand Down Expand Up @@ -490,6 +493,7 @@ def load_data(self, file_obj, parser_reference=None, **kwargs):
**kwargs : dict
Additional keywords to be passed into the parser defined by
``parser_reference``.

"""
self.loading = True
try:
Expand Down Expand Up @@ -896,7 +900,7 @@ def add_data(self, data, data_label=None, notify_done=True):

if not data_label and hasattr(data, "label"):
data_label = data.label
data_label = data_label or "New Data"
data_label = self.return_unique_name(data_label)
self.data_collection[data_label] = data

# Send out a toast message
Expand All @@ -905,36 +909,99 @@ def add_data(self, data, data_label=None, notify_done=True):
f"Data '{data_label}' successfully added.", sender=self, color="success")
self.hub.broadcast(snackbar_message)

@staticmethod
def _build_data_label(path, ext=None):
""" Build a data label from a filename and data extension

Builds a data_label out of a filename and data extension.
If the input string path already ends with a data
extension, the label is returned directly. Otherwise a valid
extension must be specified to append to the file stem.
def return_data_label(self, loaded_object, ext=None, alt_name=None, check_unique=True):
"""
Returns a unique data label that can be safely used to load data into data collection.

Parameters
----------
path : str
The data filename to use as a
ext : str
The data extension to access from the file.
loaded_object : str or object
The path to a data file or FITS HDUList or image object or Spectrum1D or
NDData array or numpy.ndarray.
ext : str, optional
The extension (or other distinguishing feature) of data used to identify it.
For example, "filename[FLUX]" where "FLUX" is the ext value.
alt_name : str, optional
Alternate names that can be used if none of the options provided are valid.
check_unique : bool
Included so that this method can be used with data label retrieval in addition
to generation.

Returns
-------
A string data label used by Glue

"""

# if path is not a file or already ends in [ ] extension, assume it is a data-label
if not os.path.isfile(path) or re.search(r'(.+)(\[(.*?)\])$', path):
return path
else:
assert ext, 'A data extension must be specified'
p = pathlib.Path(path)
stem = p.stem.split(os.extsep)[0]
label = f'{stem}[{ext}]'
return label
data_label : str
A unique data label that at its root is either given by the user at load time
or created by Jdaviz using a description of the loaded filetype.
"""
data_label = None

if loaded_object is None:
pass
elif isinstance(loaded_object, str):
# This is either the full file path or the basename or the user input data label
if loaded_object.lower().endswith(('.jpg', '.jpeg', '.png')):
file_format = loaded_object.lower().split(".")[-1]
loaded_object = os.path.splitext(os.path.basename(loaded_object))[0]
data_label = f"{loaded_object}[{file_format}]"
elif os.path.isfile(loaded_object) or os.path.isdir(loaded_object):
# File that is not an image file
data_label = os.path.splitext(os.path.basename(loaded_object))[0]
else:
# Not a file path so assumed to be a data label
data_label = loaded_object
elif isinstance(loaded_object, fits.hdu.hdulist.HDUList):
if hasattr(loaded_object, 'file_name'):
data_label = f"{loaded_object.file_name}[HDU object]"
else:
data_label = "Unknown HDU object"
elif isinstance(loaded_object, Spectrum1D):
data_label = "Spectrum1D"
elif isinstance(loaded_object, NDData):
data_label = "NDData"
elif isinstance(loaded_object, np.ndarray):
data_label = "Array"

if data_label is None and alt_name is not None and isinstance(alt_name, str):
data_label = alt_name
elif data_label is None:
data_label = "Unknown"

if check_unique:
data_label = self.return_unique_name(data_label, ext=ext)

return data_label

def return_unique_name(self, data_label, ext=None):
if data_label is None:
data_label = "Unknown"

# This regex checks for any length of characters that end
# with a space followed by parenthesis with a number inside.
# If there is a match, the space and parenthesis section will be
# removed so that the remainder of the label can be checked
# against the data_label.
check_if_dup = re.compile(r"(.*)(\s\(\d*\))$")
labels = self.data_collection.labels
number_of_duplicates = 0
for label in labels:
# If label is a duplicate of another label
if re.fullmatch(check_if_dup, label):
label_split = label.split(" ")
label_without_dup = " ".join(label_split[:-1])
label = label_without_dup
kecnry marked this conversation as resolved.
Show resolved Hide resolved

if ext and f"{data_label}[{ext}]" == label:
number_of_duplicates += 1
elif ext is None and data_label == label:
number_of_duplicates += 1

if ext:
data_label = f"{data_label}[{ext}]"

if number_of_duplicates > 0:
data_label = f"{data_label} ({number_of_duplicates})"

return data_label

def add_data_to_viewer(self, viewer_reference, data_path,
clear_other_data=False, ext=None):
Expand All @@ -960,7 +1027,7 @@ def add_data_to_viewer(self, viewer_reference, data_path,
viewer_item = self._viewer_item_by_id(viewer_reference)
if viewer_item is None:
raise ValueError(f"Could not identify viewer with reference {viewer_reference}")
data_label = self._build_data_label(data_path, ext=ext)
data_label = self.return_data_label(data_path, ext=ext, check_unique=False)
data_id = self._data_id_from_label(data_label)

if clear_other_data:
Expand Down Expand Up @@ -1006,7 +1073,7 @@ def remove_data_from_viewer(self, viewer_reference, data_path, ext=None):
is required.
"""
viewer_item = self._viewer_item_by_reference(viewer_reference)
data_label = self._build_data_label(data_path, ext=ext)
data_label = self.return_data_label(data_path, ext=ext, check_unique=False)
data_id = self._data_id_from_label(data_label)

selected_items = viewer_item['selected_data_items']
Expand Down Expand Up @@ -1356,8 +1423,14 @@ def _update_selected_data_items(self, viewer_id, selected_items):

# Include any selected data in the viewer
for data_id, visibility in selected_items.items():
label = self._get_data_item_by_id(data_id)['name']
data_item = self._get_data_item_by_id(data_id)

if data_item is None or data_item['name'] is None:
warnings.warn(f"No data item with id {data_id} found in "
f"viewer {viewer_id}.")
continue
else:
label = self._get_data_item_by_id(data_id)['name']
active_data_labels.append(label)

[data] = [x for x in self.data_collection if x.label == label]
Expand Down
15 changes: 7 additions & 8 deletions jdaviz/configs/cubeviz/plugins/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def parse_data(app, file_obj, data_type=None, data_label=None):
for ext, viewer_name in (('SCI', flux_viewer_reference_name),
('ERR', uncert_viewer_reference_name),
('DQ', None)):
data_label = f'{file_name}[{ext}]'
data_label = app.return_data_label(file_name, ext)
_parse_jwst_s3d(
app, hdulist, data_label, ext=ext, viewer_name=viewer_name,
flux_viewer_reference_name=flux_viewer_reference_name,
Expand All @@ -84,13 +84,12 @@ def parse_data(app, file_obj, data_type=None, data_label=None):
for ext, viewer_name in (('DATA', flux_viewer_reference_name),
('ERR', uncert_viewer_reference_name),
('QUALITY', None)):
data_label = f'{file_name}[{ext}]'
data_label = app.return_data_label(file_name, ext)
_parse_esa_s3d(
app, hdulist, data_label, ext=ext, viewer_name=viewer_name,
flux_viewer_reference_name=flux_viewer_reference_name,
spectrum_viewer_reference_name=spectrum_viewer_reference_name
)

else:
_parse_hdulist(
app, hdulist, file_name=data_label or file_name,
Expand Down Expand Up @@ -183,7 +182,7 @@ def _parse_hdulist(app, hdulist, file_name=None,
continue

is_loaded.append(data_type)
data_label = f"{file_name}[{hdu.name}]"
data_label = app.return_data_label(file_name, hdu.name)

if data_type == 'flux':
wcs = WCS(hdu.header, hdulist)
Expand Down Expand Up @@ -346,7 +345,7 @@ def _parse_spectrum1d_3d(app, file_obj, data_label=None,
s1d = Spectrum1D(flux=flux, wcs=file_obj.wcs,
meta=standardize_metadata(file_obj.meta))

cur_data_label = f"{data_label}[{attr.upper()}]"
cur_data_label = app.return_data_label(data_label, attr.upper())
app.add_data(s1d, cur_data_label)

if attr == 'flux':
Expand All @@ -361,13 +360,13 @@ def _parse_spectrum1d_3d(app, file_obj, data_label=None,

def _parse_spectrum1d(app, file_obj, data_label=None, spectrum_viewer_reference_name=None):
if data_label is None:
data_label = "Unknown spectrum object"
data_label = app.return_data_label(file_obj)

# TODO: glue-astronomy translators only look at the flux property of
# specutils Spectrum1D objects. Fix to support uncertainties and masks.

app.add_data(file_obj, f"{data_label}[FLUX]")
app.add_data_to_viewer(spectrum_viewer_reference_name, f"{data_label}[FLUX]")
app.add_data(file_obj, data_label)
app.add_data_to_viewer(spectrum_viewer_reference_name, data_label)


def _get_data_type_by_hdu(hdu):
Expand Down
8 changes: 4 additions & 4 deletions jdaviz/configs/cubeviz/plugins/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_fits_image_hdu_parse(image_cube_hdu_obj, cubeviz_helper):
cubeviz_helper.load_data(image_cube_hdu_obj)

assert len(cubeviz_helper.app.data_collection) == 3
assert cubeviz_helper.app.data_collection[0].label.endswith('[FLUX]')
assert cubeviz_helper.app.data_collection[0].label == "Unknown HDU object[FLUX]"

# first load should be successful; subsequent attempts should fail
with pytest.raises(RuntimeError, match=r".?only one (data)?cube.?"):
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_fits_image_hdu_parse_from_file(tmpdir, image_cube_hdu_obj, cubeviz_help
cubeviz_helper.load_data(path)

assert len(cubeviz_helper.app.data_collection) == 3
assert cubeviz_helper.app.data_collection[0].label.endswith('[FLUX]')
assert cubeviz_helper.app.data_collection[0].label == "test_fits_image.fits[FLUX]"

# This tests the same data as test_fits_image_hdu_parse above.

Expand Down Expand Up @@ -121,7 +121,7 @@ def test_spectrum3d_parse(image_cube_hdu_obj, cubeviz_helper):
cubeviz_helper.load_data(sc)

assert len(cubeviz_helper.app.data_collection) == 1
assert cubeviz_helper.app.data_collection[0].label.endswith('[FLUX]')
assert cubeviz_helper.app.data_collection[0].label == "Unknown spectrum object[FLUX]"

# Same as flux viewer data in test_fits_image_hdu_parse_from_file
flux_viewer = cubeviz_helper.app.get_viewer(cubeviz_helper._default_flux_viewer_reference_name)
Expand All @@ -142,7 +142,7 @@ def test_spectrum1d_parse(spectrum1d, cubeviz_helper):
cubeviz_helper.load_data(spectrum1d)

assert len(cubeviz_helper.app.data_collection) == 1
assert cubeviz_helper.app.data_collection[0].label.endswith('[FLUX]')
assert cubeviz_helper.app.data_collection[0].label.endswith('Spectrum1D')
assert cubeviz_helper.app.data_collection[0].meta['uncertainty_type'] == 'std'

# Coordinate display is only for spatial image, which is missing here.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -760,8 +760,10 @@ def _fit_model_to_cube(self, add_data):
count = max(map(lambda s: int(next(iter(re.findall(r"\d$", s)), 0)),
self.data_collection.labels)) + 1

label = self.app.return_data_label(f"{self.results_label}[Cube]", ext=count)

# Create new glue data object
output_cube = Data(label=f"{self.results_label} [Cube] {count}",
output_cube = Data(label=label,
coords=data.coords)
output_cube['flux'] = fitted_spectrum.flux.value
output_cube.get_component('flux').units = fitted_spectrum.flux.unit.to_string()
Expand Down
5 changes: 2 additions & 3 deletions jdaviz/configs/imviz/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def load_data(self, data, data_label=None, do_link=True, show_in_viewer=True, **

# This will only overwrite if not provided.
if not data_label:
kw['data_label'] = cur_data_label
kw['data_label'] = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to centralize data label generation, so by setting the kw value to None if no data_label was provided, it would default to whatever is in the app.return_data_label method rather than a config specific helper method. It seems the methods do the same thing though so I'll leave it out of the next commit but can include it in the one after if other reviewers prefer that.

else:
kw['data_label'] = data_label

Expand All @@ -163,9 +163,8 @@ def load_data(self, data, data_label=None, do_link=True, show_in_viewer=True, **

kw = deepcopy(kwargs)

# This will only append index to data label if provided.
if data_label:
kw['data_label'] = f'{data_label}_{i}'
kw['data_label'] = data_label

self.app.load_data(data[i, :, :], parser_reference='imviz-data-parser', **kw)

Expand Down
18 changes: 4 additions & 14 deletions jdaviz/configs/imviz/plugins/parsers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import base64
import os
import uuid

import numpy as np
from asdf.fits_embed import AsdfInFits
Expand Down Expand Up @@ -56,8 +54,6 @@ def parse_data(app, file_obj, ext=None, data_label=None):
with fits.open(file_obj) as pf:
_parse_image(app, pf, data_label, ext=ext)
else:
if data_label is None:
data_label = f'imviz_data|{str(base64.b85encode(uuid.uuid4().bytes), "utf-8")}'
_parse_image(app, file_obj, data_label, ext=ext)


Expand Down Expand Up @@ -121,20 +117,14 @@ def get_image_data_iterator(app, file_obj, data_label, ext=None):


def _parse_image(app, file_obj, data_label, ext=None):
from jdaviz.core.helpers import _next_subset_num

if app is None:
raise ValueError("app is None, cannot proceed")
if data_label is None:
raise NotImplementedError('data_label should be set by now')

data_label = app.return_data_label(file_obj, ext, alt_name="image_data")
data_iter = get_image_data_iterator(app, file_obj, data_label, ext=ext)

for data, data_label in data_iter:

# avoid duplicate data labels in collection
if data_label in app.data_collection.labels:
i = _next_subset_num(data_label, app.data_collection)
data_label = f'{data_label} {i}'

data_label = app.return_data_label(data_label, alt_name="image_data")
app.add_data(data, data_label)

# Do not run link_image_data here. We do it at the end in Imviz.load_data()
Expand Down
Loading