diff --git a/README.md b/README.md index ab0f8fe..1754a95 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,44 @@ -# extragalactic_hostless -Repository for the extragalactic hostless project from CRP7 +# ELEPHANT: ExtragaLactic Pipeline for Hostless AstroNomical Transients +This repository contains the pipeline for potential hostless transients detection discussed in the paper(link). + + +Install the requirements with the command below + + pip install -r requirements.txt + +The pipeline parameters can be configured in pipeline_config.json file + + { + "parquet_files_list": "data/*.parquet", + "save_directory": "/path/to/save/results/", + ... + } + +To start the pipeline use the command below + + python run_pipeline.py + +The pipeline generates a result parquet file with the following columns for each input parquet file + +- **b:cutoutScience_stampData_stacked:** stacked science images +- **b:cutoutTemplate_stampData_stacked:** stacked template images +- **b:cutoutDifference_stampData_stacked:** stacked difference images +- **science_clipped:** stacked sigma clipped science image +- **template_clipped:** stacked sigma clipped template image +- **number_of_stamps_in_stacking:** number of images used for stacking after FWHM stamp preprocessing +- **is_hostless_candidate_clipping:** True, if the candidate flagged as hostless by sigma clipping approach +- **distance_science:** distance from transient to nearest mask in pixels +- **anderson-darling_SCIENCE_N_statistic:** Anderson darling test statistic value for N x N cutout science image +- **anderson-darling_SCIENCE_N_pvalue:** Anderson darling test p-value for N x N cutout science image +- **anderson-darling_TEMPLATE_N_statistic:** Anderson darling test statistic value for N x N cutout template image +- **anderson-darling_TEMPLATE_N_pvalue:** Anderson darling test p-value for N x N cutout template image +- **kstest_SCIENCE_N_statistic:** Kolmogorov-Smirnov test statistic value for N x N cutout science image +- **kstest_SCIENCE_N_pvalue:** Kolmogorov-Smirnov test p-value for N x N cutout science image +- **kstest_TEMPLATE_N_statistic:** Kolmogorov-Smirnov test statistic value for N x N cutout template image +- **kstest_TEMPLATE_N_pvalue:** Kolmogorov-Smirnov test p-value for N x N cutout template image + +The project is part of [COIN Residence Program #7, Portugal, 2023](https://cosmostatistics-initiative.org/residence-programs/crp7/) + + + + diff --git a/src/pipeline_config.json b/src/pipeline_config.json index 1fd20b5..bb243ec 100644 --- a/src/pipeline_config.json +++ b/src/pipeline_config.json @@ -13,5 +13,5 @@ "max_number_of_pixels_clipped": 5, "min_number_of_pixels_clipped": 3 }, - "number_of_processes": 20 + "number_of_processes": 1 } diff --git a/src/pipeline_utils.py b/src/pipeline_utils.py index 97cfb5d..8d4abee 100644 --- a/src/pipeline_utils.py +++ b/src/pipeline_utils.py @@ -458,10 +458,10 @@ def run_powerspectrum_analysis( template_data = create_noise_filled_mask( template_image, template_mask, image_size) _, anderson_results_dict = ps.detect_host_with_powerspectrum( - science_data, template_data, N_iter=number_of_iterations, + science_data, template_data, number_of_iterations=number_of_iterations, metric="anderson-darling") _, kstest_results_dict = ps.detect_host_with_powerspectrum( - science_data, template_data, N_iter=number_of_iterations, + science_data, template_data, number_of_iterations=number_of_iterations, metric="kstest") out_results = {**anderson_results_dict, **kstest_results_dict} return out_results diff --git a/src/powerspectrum.py b/src/powerspectrum.py index c4fcadd..f46300d 100644 --- a/src/powerspectrum.py +++ b/src/powerspectrum.py @@ -3,7 +3,7 @@ from scipy.stats import binned_statistic, wasserstein_distance, anderson_ksamp, kstest -def detect_host_with_powerspectrum(sci_image=None, tpl_image=None, N_iter=1000, cutout_sizes=[7, 15, 29], +def detect_host_with_powerspectrum(sci_image=None, tpl_image=None, number_of_iterations=1000, cutout_sizes=[7, 15, 29], metric='anderson-darling'): """ Function to detect host with power spectrum analysis. @@ -11,7 +11,7 @@ def detect_host_with_powerspectrum(sci_image=None, tpl_image=None, N_iter=1000, Parameters: - sci_image: Science image (default: None) - tpl_image: Template image (default: None) - - N_iter: Number of iterations for shuffling (default: 1000) + - number_of_iterations: Number of iterations for shuffling (default: 1000) - cutout_sizes: List of cutout sizes for analysis (default: [7, 15, 29]) - metric: Metric for comparison ('anderson-darling' or 'kstest') (default: 'anderson-darling') @@ -63,7 +63,7 @@ def get_powerspectrum(data, size): shuffled_Abins_dict = {} # Dictionary to store shuffled Abins # Iterate through shuffling process - for n in range(N_iter): + for n in range(number_of_iterations): copy = np.copy(image) copy = copy.reshape(full_len * full_len) np.random.shuffle(copy) @@ -76,7 +76,7 @@ def get_powerspectrum(data, size): N_bins = len(np.arange(0.5, size // 2 + 1, 1.)) - 1 if n == 0: - shuffled_Abins_dict[size] = np.zeros((N_iter, N_bins)) + shuffled_Abins_dict[size] = np.zeros((number_of_iterations, N_bins)) image_resized = image[start: stop, start: stop] Abins = get_powerspectrum(image_resized, size) @@ -92,13 +92,13 @@ def get_powerspectrum(data, size): WD_dist_real_to_shuffled = [] WD_dist_shuffled_to_shuffled = [] - for n in range(N_iter): + for n in range(number_of_iterations): iter1 = shuffled_Abins_dict[size][n] wd = wasserstein_distance(iter1, real_Abins_dict[size]) WD_dist_real_to_shuffled.append(wd) - for m in range(N_iter): + for m in range(number_of_iterations): if m >= n: continue iter2 = shuffled_Abins_dict[size][m]