Merge pull request #506 from dirac-institute/analysis_utils

Remove analysis_utils.py
dirac-institute · Mar 5, 2024 · f39e379 · f39e379
2 parents d2e3c54 + b4334e6
commit f39e379
Show file tree

Hide file tree

Showing 8 changed files with 177 additions and 326 deletions.
diff --git a/src/kbmod/__init__.py b/src/kbmod/__init__.py
@@ -5,9 +5,12 @@
 except ImportError:
     warnings.warn("Unable to determine the package version. " "This is likely a broken installation.")
 
+# This is needed for compatibility with some older compilers: erfinv needs to be
+# imported before other packages though I'm not sure why.
+from scipy.special import erfinv
+
 from . import (
     analysis,
-    analysis_utils,
     data_interface,
     file_utils,
     filters,

diff --git a/src/kbmod/analysis_utils.py b/src/kbmod/analysis_utils.py
diff --git a/src/kbmod/filters/__init__.py b/src/kbmod/filters/__init__.py
@@ -1 +1,7 @@
-from . import base_filter, stamp_filters, stats_filters
+from . import (
+    base_filter,
+    clustering_filters,
+    sigma_g_filter,
+    stamp_filters,
+    stats_filters,
+)
diff --git a/src/kbmod/run_search.py b/src/kbmod/run_search.py
@@ -7,12 +7,12 @@
 
 import kbmod.search as kb
 
-from .analysis_utils import PostProcess
 from .configuration import SearchConfiguration
 from .data_interface import load_input_from_config, load_input_from_file
 from .filters.clustering_filters import apply_clustering
-from .filters.sigma_g_filter import SigmaGClipping
+from .filters.sigma_g_filter import apply_clipped_sigma_g, SigmaGClipping
 from .filters.stamp_filters import append_all_stamps, get_coadds_and_filter
+from .filters.stats_filters import CombinedStatsFilter
 from .masking import apply_mask_operations
 from .result_list import *
 from .trajectory_generator import KBMODV1Search
@@ -43,24 +43,121 @@ def get_angle_limits(self, config):
         ang_max = config["average_angle"] + config["ang_arr"][1]
         return [ang_min, ang_max]
 
-    def do_gpu_search(self, config, search, trj_generator):
+    def load_and_filter_results(self, search, config):
+        """This function loads results that are output by the gpu grid search.
+        Results are loaded in chunks and evaluated to see if the minimum
+        likelihood level has been reached. If not, another chunk of results is
+        fetched. The results are filtered using a clipped-sigmaG filter as they
+        are loaded and only the passing results are kept.
+
+        Parameters
+        ----------
+        search : `kbmod.search`
+            The search function object.
+        config : `SearchConfiguration`
+            The configuration parameters
+        chunk_size : int
+            The number of results to load at a given time from search.
+
+        Returns
+        -------
+        keep : `ResultList`
+            A ResultList object containing values from trajectories.
         """
-        Performs search on the GPU.
+        # Parse and check the configuration parameters.
+        num_obs = config["num_obs"]
+        sigmaG_lims = config["sigmaG_lims"]
+        clip_negative = config["clip_negative"]
+        lh_level = config["lh_level"]
+        max_lh = config["max_lh"]
+        chunk_size = config["chunk_size"]
+        if chunk_size <= 0:
+            raise ValueError(f"Invalid chunk size {chunk_size}")
+        num_cores = config["num_cores"]
+        if num_cores <= 0:
+            raise ValueError(f"Invalid number of cores {num_cores}")
+
+        # Set up the list of results.
+        img_stack = search.get_imagestack()
+        num_times = img_stack.img_count()
+        mjds = [img_stack.get_obstime(t) for t in range(num_times)]
+        keep = ResultList(mjds)
+
+        # Set up the clipped sigmaG filter.
+        if sigmaG_lims is not None:
+            bnds = sigmaG_lims
+        else:
+            bnds = [25, 75]
+        clipper = SigmaGClipping(bnds[0], bnds[1], 2, clip_negative)
+
+        # Set up the combined stats filter.
+        if lh_level > 0.0:
+            stats_filter = CombinedStatsFilter(min_obs=num_obs, min_lh=lh_level)
+        else:
+            stats_filter = CombinedStatsFilter(min_obs=num_obs)
+
+        print("---------------------------------------")
+        print("Retrieving Results")
+        print("---------------------------------------")
+        likelihood_limit = False
+        res_num = 0
+        total_count = 0
+        while likelihood_limit is False:
+            print("Getting results...")
+            results = search.get_results(res_num, chunk_size)
+            print("---------------------------------------")
+            print("Chunk Start = %i" % res_num)
+            print("Chunk Max Likelihood = %.2f" % results[0].lh)
+            print("Chunk Min. Likelihood = %.2f" % results[-1].lh)
+            print("---------------------------------------")
+
+            result_batch = ResultList(mjds)
+            for i, trj in enumerate(results):
+                # Stop as soon as we hit a result below our limit, because anything after
+                # that is not guarrenteed to be valid due to potential on-GPU filtering.
+                if trj.lh < lh_level:
+                    likelihood_limit = True
+                    break
+
+                if trj.lh < max_lh:
+                    row = ResultRow(trj, num_times)
+                    psi_curve = np.array(search.get_psi_curves(trj))
+                    phi_curve = np.array(search.get_phi_curves(trj))
+                    row.set_psi_phi(psi_curve, phi_curve)
+                    result_batch.append_result(row)
+                    total_count += 1
+
+            batch_size = result_batch.num_results()
+            print("Extracted batch of %i results for total of %i" % (batch_size, total_count))
+            if batch_size > 0:
+                apply_clipped_sigma_g(clipper, result_batch, num_cores)
+                result_batch.apply_filter(stats_filter)
+
+                # Add the results to the final set.
+                keep.extend(result_batch)
+            res_num += chunk_size
+        return keep
+
+    def do_gpu_search(self, config, stack, trj_generator):
+        """Performs search on the GPU.
 
         Parameters
         ----------
         config : `SearchConfiguration`
             The configuration parameters
-        search : `StackSearch`
-            The C++ object that holds data and does searching.
-        trj_generator : `TrajectoryGenerator`, optional
+        stack : `ImageStack`
+            The stack before the masks have been applied. Modified in-place.
+        trj_generator : `TrajectoryGenerator`
             The object to generate the candidate trajectories for each pixel.
 
         Returns
         -------
-        search : `StackSearch`
-            The C++ object holding the data and results.
+        keep : `ResultList`
+            The results.
         """
+        # Create the search object which will hold intermediate data and results.
+        search = kb.StackSearch(stack)
+
         width = search.get_image_width()
         height = search.get_image_height()
         debug = config["debug"]
@@ -77,9 +174,6 @@ def do_gpu_search(self, config, search, trj_generator):
             search.set_start_bounds_y(-config["y_pixel_buffer"], height + config["y_pixel_buffer"])
 
         search_timer = kb.DebugTimer("Grid Search", debug)
-        if debug:
-            print(f"Average Angle = {config['average_angle']}")
-            print(f"Velocity Limits = {config['v_arr']}")
 
         # If we are using gpu_filtering, enable it and set the parameters.
         if config["gpu_filter"]:
@@ -108,7 +202,10 @@ def do_gpu_search(self, config, search, trj_generator):
         candidates = [trj for trj in trj_generator]
         search.search(candidates, int(config["num_obs"]))
         search_timer.stop()
-        return search
+
+        # Load the results.
+        keep = self.load_and_filter_results(search, config)
+        return keep
 
     def run_search(self, config, stack, trj_generator=None):
         """This function serves as the highest-level python interface for starting
@@ -131,20 +228,11 @@ def run_search(self, config, stack, trj_generator=None):
         """
         full_timer = kb.DebugTimer("KBMOD", config["debug"])
 
-        # Collect the MJDs.
-        mjds = []
-        for i in range(stack.img_count()):
-            mjds.append(stack.get_obstime(i))
-
-        # Set up the post processing data structure.
-        kb_post_process = PostProcess(config, mjds)
-
         # Apply the mask to the images.
         if config["do_mask"]:
             stack = apply_mask_operations(config, stack)
 
         # Perform the actual search.
-        search = kb.StackSearch(stack)
         if trj_generator is None:
             ang_limits = self.get_angle_limits(config)
             trj_generator = KBMODV1Search(
@@ -155,27 +243,21 @@ def run_search(self, config, stack, trj_generator=None):
                 ang_limits[0],
                 ang_limits[1],
             )
-        search = self.do_gpu_search(config, search, trj_generator)
-
-        # Load the KBMOD results into Python and apply a filter based on 'filter_type'.
-        keep = kb_post_process.load_and_filter_results(
-            search,
-            config["lh_level"],
-            chunk_size=config["chunk_size"],
-            max_lh=config["max_lh"],
-        )
+        keep = self.do_gpu_search(config, stack, trj_generator)
+
         if config["do_stamp_filter"]:
             stamp_timer = kb.DebugTimer("stamp filtering", config["debug"])
             get_coadds_and_filter(
                 keep,
-                search.get_imagestack(),
+                stack,
                 config,
                 debug=config["debug"],
             )
             stamp_timer.stop()
 
         if config["do_clustering"]:
             cluster_timer = kb.DebugTimer("clustering", config["debug"])
+            mjds = [stack.get_obstime(t) for t in range(stack.img_count())]
             cluster_params = {
                 "ang_lims": self.get_angle_limits(config),
                 "cluster_type": config["cluster_type"],
@@ -191,7 +273,7 @@ def run_search(self, config, stack, trj_generator=None):
         # Extract all the stamps for all time steps and append them onto the result rows.
         if config["save_all_stamps"]:
             stamp_timer = kb.DebugTimer("computing all stamps", config["debug"])
-            append_all_stamps(keep, search.get_imagestack(), config["stamp_radius"])
+            append_all_stamps(keep, stack, config["stamp_radius"])
             stamp_timer.stop()
 
         # TODO - Re-enable the known object counting once we have a way to pass