Skip to content

Commit

Permalink
Merge pull request #347 from nasa-fornax/delete_data_files
Browse files Browse the repository at this point in the history
add parameters to delete downloaded data from spectroscopy queries
  • Loading branch information
bsipocz authored Sep 30, 2024
2 parents 016a786 + cf75254 commit df06e2f
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 49 deletions.
7 changes: 4 additions & 3 deletions spectroscopy/code_src/herschel_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def find_max_flux_column(df):
return max_flux_col


def Herschel_get_spec(sample_table, search_radius_arcsec, datadir, delete_tarfiles = False):
def Herschel_get_spec(sample_table, search_radius_arcsec, datadir,
delete_downloaded_data=True):
'''
Retrieves Herschel spectra from a subset of modes for a list of sources.
Expand All @@ -52,7 +53,7 @@ def Herschel_get_spec(sample_table, search_radius_arcsec, datadir, delete_tarfil
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
delete_tarfiles: True/False
delete_downloaded_data: `bool`, optional
Should the tarfiles be deteled after spectra are extracted?
Returns
Expand Down Expand Up @@ -143,7 +144,7 @@ def Herschel_get_spec(sample_table, search_radius_arcsec, datadir, delete_tarfil
print(f"Tarfile ReadError. This tarfile may be corrupt {path_to_file}")

#delete tar files
if delete_tarfiles:
if delete_downloaded_data:
filename_tar = f"data/herschel/{objectid_table[tab_id]['observation_id']}.tar"
print('filename_tar', filename_tar)
if os.path.exists(filename_tar):
Expand Down
29 changes: 23 additions & 6 deletions spectroscopy/code_src/mast_functions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os, sys, io

import shutil
import numpy as np
from contextlib import redirect_stdout

Expand All @@ -19,7 +19,8 @@
import matplotlib.pyplot as plt


def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose,
delete_downloaded_data=True):
'''
Retrieves HST spectra for a list of sources and groups/stacks them.
This main function runs two sub-functions:
Expand All @@ -37,6 +38,8 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
delete_downloaded_data : `bool`, optional
If True, delete the downloaded data files. Default is True.
Returns
-------
Expand All @@ -47,7 +50,7 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):

## Get the spectra
print("Searching and Downloading Spectra... ")
df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose)
df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose, delete_downloaded_data)
print("done")

## Group
Expand All @@ -58,7 +61,8 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
return(df_jwst_group)


def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose):
def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose,
delete_downloaded_data=True):
'''
Retrieves HST spectra for a list of sources.
Expand All @@ -73,6 +77,8 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose):
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
delete_downloaded_data : `bool`, optional
If True, delete the downloaded data files.
Returns
-------
Expand Down Expand Up @@ -171,6 +177,10 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose):
filter=[tab["filters"][jj]],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)

if delete_downloaded_data:
shutil.rmtree(this_data_dir)


else:
print("Nothing to download for source {}.".format(stab["label"]))
Expand Down Expand Up @@ -266,7 +276,8 @@ def JWST_group_spectra(df, verbose, quickplot):

return(df_spec)

def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose,
delete_downloaded_data=True):
'''
Retrieves HST spectra for a list of sources.
Expand All @@ -281,6 +292,9 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
delete_downloaded_data : `bool`, optional
If True, delete the downloaded data files. Default is True.
Returns
-------
Expand Down Expand Up @@ -366,7 +380,10 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
filter=[tab["filters"][jj]],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)


if delete_downloaded_data:
shutil.rmtree(this_data_dir)

else:
print("Nothing to download for source {}.".format(stab["label"]))
else:
Expand Down
77 changes: 37 additions & 40 deletions spectroscopy/spectra_generator.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ jupytext:
format_version: 0.13
jupytext_version: 1.16.4
kernelspec:
display_name: Python 3 (ipykernel)
display_name: science_demo
language: python
name: python3
name: conda-env-science_demo-py
---

# Extract Multi-Wavelength Spectroscopy from Archival Data
Expand Down Expand Up @@ -70,21 +70,14 @@ The ones with an asterisk (*) are the challenging ones.
• ...
## Runtime

As of 2024 August, this notebook takes ~300s to run to completion on Fornax using the 'Astrophysics Default Image' and the 'Large' server with 16GB RAM/ 4CPU.
As of 2024 August, this notebook takes ~330s to run to completion on Fornax using the 'Astrophysics Default Image' and the 'Large' server with 16GB RAM/ 4CPU.

## Authors:
Andreas Faisst, Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, David Shupe

## Acknowledgements:
...

## Open Issues:

• Implement queries for: Herschel, Euclid (use mock data), SPHEREx (use mock data)
• Match to HEASARC
• Make more efficient (especially MAST searches)


+++

### Datasets that were considered but didn't end up being used:
Expand All @@ -99,12 +92,12 @@ Andreas Faisst, Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, D

This cell will install them if needed:

```{code-cell}
```{code-cell} ipython3
# Uncomment the next line to install dependencies if needed.
# !pip install -r requirements_spectra_generator.txt
```

```{code-cell}
```{code-cell} ipython3
import sys
import numpy as np
import os
Expand Down Expand Up @@ -133,7 +126,7 @@ from herschel_functions import Herschel_get_spec

Here we will define the sample of galaxies. For now, we just enter some "random" coordinates to test the code.

```{code-cell}
```{code-cell} ipython3
coords = []
labels = []
Expand Down Expand Up @@ -173,7 +166,7 @@ At this point you may wish to write out your sample to disk and reuse that in fu

For the format of the save file, we would suggest to choose from various formats that fully support astropy objects(eg., SkyCoord). One example that works is Enhanced Character-Separated Values or ['ecsv'](https://docs.astropy.org/en/stable/io/ascii/ecsv.html)

```{code-cell}
```{code-cell} ipython3
if not os.path.exists("./data"):
os.mkdir("./data")
sample_table.write('data/input_sample.ecsv', format='ascii.ecsv', overwrite = True)
Expand All @@ -183,14 +176,14 @@ sample_table.write('data/input_sample.ecsv', format='ascii.ecsv', overwrite = Tr

Do only this step from this section when you have a previously generated sample table

```{code-cell}
```{code-cell} ipython3
sample_table = Table.read('data/input_sample.ecsv', format='ascii.ecsv')
```

### 1.4 Initialize data structure to hold the spectra
Here, we initialize the MultiIndex data structure that will hold the spectra.

```{code-cell}
```{code-cell} ipython3
df_spec = MultiIndexDFObject()
```

Expand All @@ -208,16 +201,14 @@ This archive includes spectra taken by

• Spitzer/IRS



```{code-cell}
```{code-cell} ipython3
%%time
## Get Keck Spectra (COSMOS only)
df_spec_DEIMOS = KeckDEIMOS_get_spec(sample_table = sample_table, search_radius_arcsec=1)
df_spec.append(df_spec_DEIMOS)
```

```{code-cell}
```{code-cell} ipython3
%%time
## Get Spitzer IRS Spectra
df_spec_IRS = SpitzerIRS_get_spec(sample_table, search_radius_arcsec=1 , COMBINESPEC=False)
Expand All @@ -232,39 +223,49 @@ This archive includes spectra taken by

• JWST (including MSA and slit spectroscopy)

```{code-cell}
```{code-cell} ipython3
%%time
## Get Spectra for HST
df_spec_HST = HST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/", verbose = False)
df_spec_HST = HST_get_spec(
sample_table ,
search_radius_arcsec=0.5,
datadir="./data/",
verbose=False,
delete_downloaded_data=True
)
df_spec.append(df_spec_HST)
```

```{code-cell} ipython3
%%time
## Get Spectra for JWST
df_jwst = JWST_get_spec(
sample_table ,
search_radius_arcsec=0.5,
datadir="./data/",
verbose=False,
delete_downloaded_data=True
)
df_spec.append(df_jwst)
```

### 2.3 ESA Archive

```{code-cell}
```{code-cell} ipython3
# Herschel PACS & SPIRE from ESA TAP using astroquery
#This search is fully functional, but is commented out because it takes ~4 hours to run to completion
herschel_radius = 1.1
herschel_download_directory = 'data/herschel'
#if not os.path.exists(herschel_download_directory):
# os.makedirs(herschel_download_directory, exist_ok=True)
#df_spec_herschel = Herschel_get_spec(sample_table, herschel_radius, herschel_download_directory, delete_tarfiles = True)
#df_spec_herschel = Herschel_get_spec(sample_table, herschel_radius, herschel_download_directory, delete_downloaded_data=True)
#df_spec.append(df_spec_herschel)
```

### 2.4 SDSS Archive

```{code-cell}
%%time
## Get Spectra for JWST
df_jwst = JWST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/", verbose = False)
df_spec.append(df_jwst)
```

This includes SDSS spectra.

```{code-cell}
```{code-cell} ipython3
%%time
## Get SDSS Spectra
df_spec_SDSS = SDSS_get_spec(sample_table , search_radius_arcsec=5, data_release=17)
Expand All @@ -276,7 +277,7 @@ df_spec.append(df_spec_SDSS)
This includes DESI spectra. Here, we use the `SPARCL` query. Note that this can also be used
for SDSS searches, however, according to the SPARCL webpage, only up to DR16 is included. Therefore, we will not include SDSS DR16 here (this is treated in the SDSS search above).

```{code-cell}
```{code-cell} ipython3
%%time
## Get DESI and BOSS spectra with SPARCL
df_spec_DESIBOSS = DESIBOSS_get_spec(sample_table, search_radius_arcsec=5)
Expand All @@ -286,8 +287,7 @@ df_spec.append(df_spec_DESIBOSS)
## 3. Make plots of luminosity as a function of time
We show flux in mJy as a function of time for all available bands for each object. `show_nbr_figures` controls how many plots are actually generated and returned to the screen. If you choose to save the plots with `save_output`, they will be put in the output directory and labelled by sample number.


```{code-cell}
```{code-cell} ipython3
### Plotting ####
create_figures(df_spec = df_spec,
bin_factor=5,
Expand All @@ -296,6 +296,3 @@ create_figures(df_spec = df_spec,
)
```

```{raw-cell}
```

0 comments on commit df06e2f

Please sign in to comment.