Refactoring pre v0.0.2 (#27)

* Refactor NWB * Refactor label studio * Refactor high level API * Bump version * Add basic NWB reading * Add high level NWB loading API * Use context manager for NWB append * Add roundtrip tests * Use context manager in NWB * Add conversion from numpy arrays to skeleton * Fix skeleton creation from numpy arrays * Fix labelstudio serialization * Fix loading mixed timestamp format predictions in NWB * Correctly append multiple videos to NWB * Relax coverage failure condition
talmolab · Dec 13, 2022 · c55f64e · c55f64e
1 parent 9d77d07
commit c55f64e
Show file tree

Hide file tree

Showing 12 changed files with 518 additions and 189 deletions.
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,12 @@
+coverage:
+  status:
+    project: # Measures overall project coverage.
+      default:
+        target: auto
+        threshold: 1%
+        informational: true # Project coverage for stats only.
+    patch: #  Only measures lines adjusted in the pull request.
+      default:
+        target: auto
+        threshold: 5%
+        informational: false # true: Patch coverage for stats only.
diff --git a/setup.cfg b/setup.cfg
@@ -23,6 +23,7 @@ install_requires =
     pynwb
     ndx-pose
     pandas
+    simplejson
 
 [options.extras_require]
 dev =

diff --git a/sleap_io/__init__.py b/sleap_io/__init__.py
@@ -2,7 +2,7 @@
 
 # Define package version.
 # This is read dynamically by setuptools in setup.cfg to determine the release version.
-__version__ = "0.0.1"
+__version__ = "0.0.2"
 
 from sleap_io.model.skeleton import Node, Edge, Skeleton, Symmetry
 from sleap_io.model.video import Video
@@ -15,5 +15,10 @@
 )
 from sleap_io.model.labeled_frame import LabeledFrame
 from sleap_io.model.labels import Labels
-from sleap_io.io.main import load_slp
-from sleap_io.io.nwb import write_labels_to_nwb, append_labels_data_to_nwb
+from sleap_io.io.main import (
+    load_slp,
+    load_nwb,
+    save_nwb,
+    load_labelstudio,
+    save_labelstudio,
+)
diff --git a/sleap_io/io/labelstudio.py b/sleap_io/io/labelstudio.py
@@ -1,43 +1,84 @@
-"""This module handles direct I/O operations for working with .slp files.
+"""This module handles direct I/O operations for working with Labelstudio files.
 
 Some important nomenclature:
-  - `tasks`: typically maps to a single frame of data to be annotated, closest correspondance is to `LabeledFrame`
-  - `annotations`: collection of points, polygons, relations, etc. corresponds to `Instance`s and `Point`s, but a flattened hierarchy
+  - `tasks`: typically maps to a single frame of data to be annotated, closest
+    correspondance is to `LabeledFrame`
+  - `annotations`: collection of points, polygons, relations, etc. corresponds to
+    `Instance`s and `Point`s, but a flattened hierarchy
 
 """
 
-
 import datetime
-import json
+import simplejson as json
 import math
 import uuid
-from typing import Dict, Iterable, List, Tuple
+from typing import Dict, Iterable, List, Tuple, Optional, Union
 
 from sleap_io import Instance, LabeledFrame, Labels, Node, Point, Video, Skeleton
 
 
-def read_labels(labels_path: str, skeleton: Skeleton) -> Labels:
-    """Read label-studio style annotations from a file and return a `Labels` object.
+def read_labels(
+    labels_path: str, skeleton: Optional[Union[Skeleton, List[str]]] = None
+) -> Labels:
+    """Read Label Studio style annotations from a file and return a `Labels` object.
 
     Args:
-        labels_path: Path to the label-studio annotation file, in json format.
-        skeleton: Skeleton
+        labels_path: Path to the Label Studio annotation file, in json format.
+        skeleton: An optional `Skeleton` object or list of node names. If not provided
+            (the default), skeleton will be inferred from the data. It may be useful to
+            provide this so the keypoint label types can be filtered to just the ones in
+            the skeleton.
 
     Returns:
         Parsed labels as a `Labels` instance.
     """
     with open(labels_path, "r") as task_file:
         tasks = json.load(task_file)
 
+    if type(skeleton) == list:
+        skeleton = Skeleton(nodes=skeleton)  # type: ignore[arg-type]
+    elif skeleton is None:
+        skeleton = infer_nodes(tasks)
+    else:
+        assert isinstance(skeleton, Skeleton)
+
     return parse_tasks(tasks, skeleton)
 
 
+def infer_nodes(tasks: List[Dict]) -> Skeleton:
+    """Parse the loaded JSON tasks to create a minimal skeleton.
+
+    Args:
+        tasks: Collection of tasks loaded from Label Studio JSON.
+
+    Returns:
+        The inferred `Skeleton`.
+    """
+    node_names = set()
+    for entry in tasks:
+        if "annotations" in entry:
+            key = "annotations"
+        elif "completions" in entry:
+            key = "completions"
+        else:
+            raise ValueError("Cannot find annotation data for entry!")
+
+        for annotation in entry[key]:
+            for datum in annotation["result"]:
+                if datum["type"] == "keypointlabels":
+                    for node_name in datum["value"]["keypointlabels"]:
+                        node_names.add(node_name)
+
+    skeleton = Skeleton(nodes=list(node_names))
+    return skeleton
+
+
 def parse_tasks(tasks: List[Dict], skeleton: Skeleton) -> Labels:
-    """Read label-studio style annotations from a file and return a `Labels` object
+    """Read Label Studio style annotations from a file and return a `Labels` object
 
     Args:
-        tasks: collection of tasks to be concerted to `Labels`
-        skeleton: Skeleton
+        tasks: Collection of tasks to be converted to `Labels`.
+        skeleton: `Skeleton` with the nodes and edges to be used.
 
     Returns:
         Parsed labels as a `Labels` instance.
@@ -57,14 +98,14 @@ def parse_tasks(tasks: List[Dict], skeleton: Skeleton) -> Labels:
     return Labels(frames)
 
 
-def write_labels(labels: Labels) -> List[dict]:
-    """Convert a `Labels` object into label-studio annotations
+def convert_labels(labels: Labels) -> List[dict]:
+    """Convert a `Labels` object into Label Studio-formatted annotations.
 
     Args:
-        labels: Labels to be converted to label-studio task format
+        labels: SLEAP `Labels` to be converted to Label Studio task format.
 
     Returns:
-        label-studio version of `Labels`
+        Label Studio dictionaries of the `Labels` data.
     """
 
     out = []
@@ -79,7 +120,7 @@ def write_labels(labels: Labels) -> List[dict]:
         frame_annots = []
 
         for instance in frame.instances:
-            inst_id = uuid.uuid4()
+            inst_id = str(uuid.uuid4())
             frame_annots.append(
                 {
                     "original_width": width,
@@ -103,7 +144,7 @@ def write_labels(labels: Labels) -> List[dict]:
             )
 
             for node, point in instance.points.items():
-                point_id = uuid.uuid4()
+                point_id = str(uuid.uuid4())
 
                 # add this point
                 frame_annots.append(
@@ -167,6 +208,23 @@ def write_labels(labels: Labels) -> List[dict]:
     return out
 
 
+def write_labels(labels: Labels, filename: str):
+    """Convert and save a SLEAP `Labels` object to a Label Studio `.json` file.
+
+    Args:
+        labels: SLEAP `Labels` to be converted to Label Studio task format.
+        filename: Path to save Label Studio annotations (`.json`).
+    """
+
+    def _encode(obj):
+        if type(obj).__name__ == "uint64":
+            return int(obj)
+
+    ls_dicts = convert_labels(labels)
+    with open(filename, "w") as f:
+        json.dump(ls_dicts, f, indent=4, default=_encode)
+
+
 def task_to_labeled_frame(
     task: dict, skeleton: Skeleton, key: str = "annotations"
 ) -> LabeledFrame:
@@ -179,56 +237,49 @@ def task_to_labeled_frame(
             )
         )
 
-    try:
-        # only parse the first entry result
-        to_parse = task[key][0]["result"]
-
-        individuals = filter_and_index(to_parse, "rectanglelabels")
-        keypoints = filter_and_index(to_parse, "keypointlabels")
-        relations = build_relation_map(to_parse)
-        instances = []
-
-        if len(individuals) > 0:
-            # multi animal case:
-            for indv_id, indv in individuals.items():
-                points = {}
-                for rel in relations[indv_id]:
-                    kpt = keypoints.pop(rel)
-                    node = Node(kpt["value"]["keypointlabels"][0])
-                    x_pos = (kpt["value"]["x"] * kpt["original_width"]) / 100
-                    y_pos = (kpt["value"]["y"] * kpt["original_height"]) / 100
-
-                    # If the value is a NAN, the user did not mark this keypoint
-                    if math.isnan(x_pos) or math.isnan(y_pos):
-                        continue
-
-                    points[node] = Point(x_pos, y_pos)
-
-                if len(points) > 0:
-                    instances.append(Instance(points, skeleton))
-
-        # If this is multi-animal, any leftover keypoints should be unique bodyparts, and will be collected here
-        # if single-animal, we only have 'unique bodyparts' [in a way] and the process is identical
-        points = {}
-        for _, kpt in keypoints.items():
-            node = Node(kpt["value"]["keypointlabels"][0])
-            points[node] = Point(
-                (kpt["value"]["x"] * kpt["original_width"]) / 100,
-                (kpt["value"]["y"] * kpt["original_height"]) / 100,
-                visible=True,
-            )
-        if len(points) > 0:
-            instances.append(Instance(points, skeleton))
+    # only parse the first entry result
+    to_parse = task[key][0]["result"]
+
+    individuals = filter_and_index(to_parse, "rectanglelabels")
+    keypoints = filter_and_index(to_parse, "keypointlabels")
+    relations = build_relation_map(to_parse)
+    instances = []
+
+    if len(individuals) > 0:
+        # multi animal case:
+        for indv_id, indv in individuals.items():
+            points = {}
+            for rel in relations[indv_id]:
+                kpt = keypoints.pop(rel)
+                node = Node(kpt["value"]["keypointlabels"][0])
+                x_pos = (kpt["value"]["x"] * kpt["original_width"]) / 100
+                y_pos = (kpt["value"]["y"] * kpt["original_height"]) / 100
+
+                # If the value is a NAN, the user did not mark this keypoint
+                if math.isnan(x_pos) or math.isnan(y_pos):
+                    continue
+
+                points[node] = Point(x_pos, y_pos)
+
+            if len(points) > 0:
+                instances.append(Instance(points, skeleton))
+
+    # If this is multi-animal, any leftover keypoints should be unique bodyparts, and will be collected here
+    # if single-animal, we only have 'unique bodyparts' [in a way] and the process is identical
+    points = {}
+    for _, kpt in keypoints.items():
+        node = Node(kpt["value"]["keypointlabels"][0])
+        points[node] = Point(
+            (kpt["value"]["x"] * kpt["original_width"]) / 100,
+            (kpt["value"]["y"] * kpt["original_height"]) / 100,
+            visible=True,
+        )
+    if len(points) > 0:
+        instances.append(Instance(points, skeleton))
 
-        video, frame_idx = video_from_task(task)
+    video, frame_idx = video_from_task(task)
 
-        return LabeledFrame(video, frame_idx, instances)
-    except Exception as excpt:
-        raise RuntimeError(
-            "While working on Task #{}, encountered the following error:".format(
-                task.get("id", "??")
-            )
-        ) from excpt
+    return LabeledFrame(video, frame_idx, instances)
 
 
 def filter_and_index(annotations: Iterable[dict], annot_type: str) -> Dict[str, dict]:
@@ -239,16 +290,16 @@ def filter_and_index(annotations: Iterable[dict], annot_type: str) -> Dict[str,
         annot_type: annotation type to filter e.x. 'keypointlabels' or 'rectanglelabels'
 
     Returns:
-        Dict[str, dict] - indexed and filtered annotations. Only annotations of type `annot_type`
-        will survive, and annotations are indexed by ID
+        Dict[str, dict] - indexed and filtered annotations. Only annotations of type
+        `annot_type` will survive, and annotations are indexed by ID.
     """
     filtered = list(filter(lambda d: d["type"] == annot_type, annotations))
     indexed = {item["id"]: item for item in filtered}
     return indexed
 
 
 def build_relation_map(annotations: Iterable[dict]) -> Dict[str, List[str]]:
-    """Build a two-way relationship map between annotations
+    """Build a two-way relationship map between annotations.
 
     Args:
         annotations: annotations, presumably, containing relation types
@@ -270,10 +321,10 @@ def build_relation_map(annotations: Iterable[dict]) -> Dict[str, List[str]]:
 
 
 def video_from_task(task: dict) -> Tuple[Video, int]:
-    """Given a label-studio task, retrieve video information
+    """Given a Label Studio task, retrieve video information.
 
     Args:
-        task: label-studio task
+        task: Label Studio task
 
     Returns:
         Video and frame index for this task
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,6 +23,7 @@ install_requires = @@
         pynwb
         ndx-pose
         pandas
+        simplejson
     [options.extras_require]
     dev =
@@ Expand Down @@