Skip to content

Commit

Permalink
Adding guards in HSCDataSet for the manifest representation of un-dow…
Browse files Browse the repository at this point in the history
…nloaded files (#130)

- Should fix issue #127.
- Moved removal of incomplete downloads from the prune stage to the f/s read stage
- Added a better error to the case where HSCDataSet arrives at an absurdly small
  size of image to crop to.

Co-authored-by: Drew Oldag <[email protected]>
  • Loading branch information
mtauraso and drewoldag authored Dec 11, 2024
1 parent 02c7561 commit cc2f12b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
25 changes: 17 additions & 8 deletions src/fibad/data_sets/hsc_data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,17 +520,24 @@ def _read_filter_catalog(
object_id = row["object_id"]
filter = row["filter"]
filename = row["filename"]
if "dim" in colnames:
dim = tuple(row["dim"])

# Skip over any files that are marked as didn't download.
# or have a dimension listed less than 1px x 1px
if filename == "Attempted" or min(dim) < 1:
continue

# Insert into the filter catalog.
if object_id not in filter_catalog:
filter_catalog[object_id] = {}

filter_catalog[object_id][filter] = filename

# Dimension is optional
# Dimension is optional, insert into dimension catalog.
if "dim" in colnames:
if object_id not in dim_catalog:
dim_catalog[object_id] = []
dim_catalog[object_id].append(tuple(row["dim"]))
dim_catalog[object_id].append(dim)

return (filter_catalog, dim_catalog) if "dim" in colnames else filter_catalog

Expand Down Expand Up @@ -632,11 +639,6 @@ def _prune_objects(self, filters_ref: list[str]):
filters_ref = sorted(filters_ref)
self.prune_count = 0
for index, (object_id, filters) in enumerate(self.files.items()):
# Drop objects that failed to download
if any("Attempted" in v for v in filters.items()):
msg = f"Attempted to download {object_id} but failed. Pruning."
self._mark_for_prune(object_id, msg)

# Drop objects with missing filters
filters = sorted(list(filters))
if filters != filters_ref:
Expand Down Expand Up @@ -726,6 +728,13 @@ def _check_file_dimensions(self) -> tuple[int, int]:
finally:
logger.warning(msg)

if min(cutout_height, cutout_width) < 1:
msg = "Automatic determination found an absurd dimension of "
msg += f"({cutout_width}px, {cutout_height}px)\n"
msg += "Please either correct the data source or set a static cutout size with the \n"
msg += "crop_to configuration in the [data_set] section of your fibad config.\n"
raise RuntimeError(msg)

return cutout_width, cutout_height

def _rebuild_manifest(self, config):
Expand Down
2 changes: 1 addition & 1 deletion src/fibad/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _batched(iterable, n):
while batch := tuple(itertools.islice(iterator, n)):
yield batch

if len(self.rects) > num_threads:
if len(self.rects) < num_threads:
msg = f"Only {len(self.rects)} sky locations, which is less than the number of threads, so we "
msg += "will use only one thread."
logger.info(msg)
Expand Down

0 comments on commit cc2f12b

Please sign in to comment.