Skip to content

Commit

Permalink
Merge pull request #53 from jgieseler/main
Browse files Browse the repository at this point in the history
Unify pos_timestamp usage & improve warnings
  • Loading branch information
jgieseler authored Mar 5, 2024
2 parents a2d326f + 7937a3b commit 857d32a
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 49 deletions.
50 changes: 34 additions & 16 deletions seppy/loader/soho.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst

import cdflib
import datetime as dt
import glob
import numpy as np
import os
import warnings

import cdflib
import numpy as np
import pandas as pd
import pooch
import requests
import sunpy
import warnings

from sunpy.net import Fido
from sunpy.net import attrs as a
from sunpy.timeseries import TimeSeries

from seppy.util import resample_df
from seppy.util import custom_warning, resample_df


logger = pooch.get_logger()
logger.setLevel("WARNING")


def _get_metadata(dataset, path_to_cdf):
Expand Down Expand Up @@ -68,9 +72,9 @@ def _get_metadata(dataset, path_to_cdf):
return metadata


def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timestamp=None, max_conn=5):
def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timestamp='center', max_conn=5):
"""
Downloads CDF files via SunPy/Fido from CDAWeb for CELIAS, EPHIN, ERNE onboard SOHO
Download CDF files via SunPy/Fido from CDAWeb for CELIAS, EPHIN, ERNE onboard SOHO
Parameters
----------
Expand All @@ -93,7 +97,8 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta
resample : {str}, optional
Resample frequency in format understandable by Pandas, e.g. '1min', by default None
pos_timestamp : {str}, optional
Change the position of the timestamp: 'center' or 'start' of the accumulation interval, by default None
change the position of the timestamp: 'center' or 'start' of the accumulation interval,
or 'original' to do nothing, by default 'center'.
max_conn : {int}, optional
The number of parallel download slots used by Fido.fetch, by default 5
Expand All @@ -104,8 +109,12 @@ def soho_load(dataset, startdate, enddate, path=None, resample=None, pos_timesta
metadata : {dict}
Dictionary containing different metadata, e.g., energy channels
"""
if not (pos_timestamp=='center' or pos_timestamp=='start' or pos_timestamp is None):
raise ValueError(f'"pos_timestamp" must be either None, "center", or "start"!')
# Catch old default value for pos_timestamp
if pos_timestamp is None:
pos_timestamp = 'center'

if not (pos_timestamp=='center' or pos_timestamp=='start' or pos_timestamp=='original'):
raise ValueError(f'"pos_timestamp" must be either "original", "center", or "start"!')

if dataset == 'SOHO_COSTEP-EPHIN_L2-1MIN':
df, metadata = soho_ephin_loader(startdate, enddate, resample=resample, path=path, all_columns=False, pos_timestamp=pos_timestamp)
Expand Down Expand Up @@ -201,7 +210,8 @@ def calc_av_en_flux_ERNE(df, channels_dict_df, avg_channels, species='p', sensor


def soho_ephin_download(date, path=None):
"""Download SOHO/EPHIN level 2 data file from Kiel university to local path
"""
Download SOHO/EPHIN level 2 ascii data file from Kiel university to local path
Parameters
----------
Expand Down Expand Up @@ -241,12 +251,14 @@ def soho_ephin_download(date, path=None):
except requests.HTTPError:
print(f'No corresponding EPHIN data found at {url}')
downloaded_file = []
print('')

return downloaded_file


def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=False, pos_timestamp=None, use_uncorrected_data_on_own_risk=False):
"""Loads SOHO/EPHIN data and returns it as Pandas dataframe together with a dictionary providing the energy ranges per channel
def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=False, pos_timestamp='center', use_uncorrected_data_on_own_risk=False):
"""
Load SOHO/EPHIN level 2 ascii data and return it as Pandas dataframe together with a dictionary providing the energy ranges per channel
Parameters
----------
Expand All @@ -260,6 +272,9 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
local path where the files are/should be stored, by default None
all_columns : boolean, optional
if True provide all availalbe columns in returned dataframe, by default False
pos_timestamp : {str}, optional
change the position of the timestamp: 'center' or 'start' of the accumulation interval,
or 'original' to do nothing, by default 'center'.
Returns
-------
Expand Down Expand Up @@ -333,7 +348,8 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
# Setting use_uncorrected_data_on_own_risk=True skips this replacement, so that the uncorrected
# data can be obtained at own risk!
if use_uncorrected_data_on_own_risk:
warnings.warn("Proton and helium data is still uncorrected! Know what you're doing and use at own risk!")
# warnings.warn("Proton and helium data is still uncorrected! Know what you're doing and use at own risk!")
custom_warning("Proton and helium data is still uncorrected! Know what you're doing and use at own risk!")
else:
df.P4 = -9e9
df.P8 = -9e9
Expand Down Expand Up @@ -375,7 +391,8 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
cs_p25 = '25 - 53 MeV'
cs_he25 = '25 - 53 MeV/n'
if max(fmodes)==2:
warnings.warn('Careful: EPHIN ring off!')
# warnings.warn('Careful: EPHIN ring off!')
custom_warning('Careful: EPHIN ring off!')

# failure mode D since 4 Oct 2017:
# dates[-1].date() is enddate, used to catch cases when enddate is a string
Expand All @@ -384,7 +401,8 @@ def soho_ephin_loader(startdate, enddate, resample=None, path=None, all_columns=
cs_e1300 = "0.67 - 10.4 MeV"
# dates[0].date() is startdate, used to catch cases when startdate is a string
if dates[0].date() <= dt.date(2017, 10, 4):
warnings.warn('EPHIN instrument status (i.e., electron energy channels) changed during selected period (on Oct 4, 2017)!')
# warnings.warn('EPHIN instrument status (i.e., electron energy channels) changed during selected period (on Oct 4, 2017)!')
custom_warning('EPHIN instrument status (i.e., electron energy channels) changed during selected period (on Oct 4, 2017)!')

# careful!
# adjusting the position of the timestamp manually.
Expand Down
21 changes: 16 additions & 5 deletions seppy/loader/stereo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)


logger = pooch.get_logger()
logger.setLevel("WARNING")


def stereo_sept_download(date, spacecraft, species, viewing, path=None):
"""Download STEREO/SEPT level 2 data file from Kiel university to local path
Expand Down Expand Up @@ -70,11 +74,12 @@ def stereo_sept_download(date, spacecraft, species, viewing, path=None):
except requests.HTTPError:
print(f'No corresponding SEPT data found at {url}')
downloaded_file = []
print('')

return downloaded_file


def stereo_sept_loader(startdate, enddate, spacecraft, species, viewing, resample=None, path=None, all_columns=False, pos_timestamp=None):
def stereo_sept_loader(startdate, enddate, spacecraft, species, viewing, resample=None, path=None, all_columns=False, pos_timestamp='center'):
"""Loads STEREO/SEPT data and returns it as Pandas dataframe together with a dictionary providing the energy ranges per channel
Parameters
Expand Down Expand Up @@ -285,7 +290,7 @@ def _get_metadata(dataset, path_to_cdf):
return metadata


def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='RTN', sept_species='e', sept_viewing='sun', path=None, resample=None, pos_timestamp=None, max_conn=5):
def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='RTN', sept_species='e', sept_viewing='sun', path=None, resample=None, pos_timestamp='center', max_conn=5):
"""
Downloads CDF files via SunPy/Fido from CDAWeb for HET, LET, MAG, and SEPT onboard STEREO
Expand Down Expand Up @@ -322,7 +327,8 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R
resample : {str}, optional
resample frequency in format understandable by Pandas, e.g. '1min', by default None
pos_timestamp : {str}, optional
change the position of the timestamp: 'center' or 'start' of the accumulation interval, by default None
change the position of the timestamp: 'center' or 'start' of the accumulation interval,
or 'original' to do nothing, by default 'center'.
max_conn : {int}, optional
The number of parallel download slots used by Fido.fetch, by default 5
Expand All @@ -337,8 +343,13 @@ def stereo_load(instrument, startdate, enddate, spacecraft='ahead', mag_coord='R
trange = a.Time(startdate, enddate)
if trange.min==trange.max:
print(f'"startdate" and "enddate" might need to be different!')
if not (pos_timestamp=='center' or pos_timestamp=='start' or pos_timestamp is None):
raise ValueError(f'"pos_timestamp" must be either None, "center", or "start"!')

# Catch old default value for pos_timestamp
if pos_timestamp is None:
pos_timestamp = 'center'

if not (pos_timestamp=='center' or pos_timestamp=='start' or pos_timestamp=='original'):
raise ValueError(f'"pos_timestamp" must be either "original", "center", or "start"!')

# find name variations
if spacecraft.lower()=='a' or spacecraft.lower()=='sta':
Expand Down
6 changes: 6 additions & 0 deletions seppy/loader/wind.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
from seppy.util import resample_df


logger = pooch.get_logger()
logger.setLevel("WARNING")


def _download_metafile(dataset, path=None):
"""
Download master cdf file from cdaweb for 'dataset'
Expand All @@ -30,6 +34,7 @@ def _download_metafile(dataset, path=None):
downloaded_file = pooch.retrieve(url=url, known_hash=None, fname=fname, path=path, progressbar=True)
except ModuleNotFoundError:
downloaded_file = pooch.retrieve(url=url, known_hash=None, fname=fname, path=path, progressbar=False)
print('')
return downloaded_file


Expand Down Expand Up @@ -123,6 +128,7 @@ def wind3dp_single_download(file, path=None):
except requests.HTTPError:
print(f'No corresponding data found at {url}')
downloaded_file = []
print('')

return downloaded_file

Expand Down
6 changes: 3 additions & 3 deletions seppy/tests/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_psp_load_offline():

def test_soho_ephin_load_online():
df, meta = soho_load(dataset='SOHO_COSTEP-EPHIN_L2-1MIN', startdate="2021/04/16", enddate="2021/04/16",
path=None, resample="1min", pos_timestamp=None)
path=None, resample="1min", pos_timestamp='center')
assert isinstance(df, pd.DataFrame)
assert df.shape == (1145, 14)
assert meta['E1300'] == '0.67 - 10.4 MeV'
Expand Down Expand Up @@ -89,7 +89,7 @@ def test_solo_mag_load_offline():

def test_stereo_het_load_online():
df, meta = stereo_load(instrument="HET", startdate="2021/10/28", enddate="2021/10/29",
path=None, resample="1min", pos_timestamp=None)
path=None, resample="1min", pos_timestamp='center')
assert isinstance(df, pd.DataFrame)
assert df.shape == (1440, 28)
assert meta['Proton_Bins_Text'][0][0] == '13.6 - 15.1 MeV '
Expand All @@ -110,7 +110,7 @@ def test_stereo_het_load_offline():

def test_stereo_sept_load_online():
df, meta = stereo_load(instrument="SEPT", startdate="2006/11/14", enddate="2006/11/14",
path=None, resample="1min", pos_timestamp=None)
path=None, resample="1min", pos_timestamp='center')
assert isinstance(df, pd.DataFrame)
assert df.shape == (371, 30)
assert meta.ch_strings[meta.index==2].values[0] == '45.0-55.0 keV'
Expand Down
46 changes: 32 additions & 14 deletions seppy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,8 @@ def __init__(self, start_date, end_date, spacecraft, sensor,
# names from the dataframe.
self.load_all_viewing()

# JG: This is NOT enough to just check this at this position! This needs to be aborting the process if the specific combination is chosen!
# JG: Removed here and moved to appropriate positions.
# # Check that the data that was loaded is valid. If not, give a warning.
# self.validate_data()
self.validate_data()

# Download radio cdf files ONLY if asked to
if self.radio_spacecraft is not None:
Expand All @@ -107,15 +105,21 @@ def validate_data(self):
Provide an error msg if this object is initialized with a combination that yields invalid data products.
"""

# Data products for SolO/STEP before 22 Oct 2021 are no reliable for non-Pixel Averaged data
# SolO/STEP data before 22 Oct 2021 is not supported yet for non-'Pixel averaged' viewing
warn_mess_step_pixels_old = "SolO/STEP data is not included yet for individual Pixels for dates preceding Oct 22, 2021. Only 'Pixel averaged' is supported."
if self.spacecraft == "solo" and self.sensor == "step":
if self.start_date < pd.to_datetime("2021-10-22").date():
if not self.viewing == 'Pixel averaged':
raise Warning("WARNING! SolO/STEP data is not included yet for individual Pixels for dates preceding Oct 22, 2021.")
# when 'viewing' is undefined, only give a warning; if it's wrong defined, abort with warning
if not self.viewing:
# warnings.warn(message=warn_mess_step_pixels_old)
custom_warning(message=warn_mess_step_pixels_old)
else:
raise Warning(warn_mess_step_pixels_old)

# Electron data for SolO/STEP is removed for now (Feb 2024, JG)
if self.spacecraft == "solo" and self.sensor == "step" and self.species.lower()[0] == 'e':
raise Warning("WARNING! SolO/STEP electron data is not implemented yet!")
raise Warning("SolO/STEP electron data is not implemented yet!")

def update_onset_attributes(self, flux_series, onset_stats, onset_found, peak_flux, peak_time, fig, bg_mean):
"""
Expand Down Expand Up @@ -269,6 +273,10 @@ def load_data(self, spacecraft, sensor, viewing, data_level,
meta = {"E5": "0.45 - 0.50 MeV",
"E15": "0.70 - 1.10 MeV"}

# TODO:
# - add resample_df here?
# - add pos_timestamp here

self.update_viewing(viewing)
return df, meta

Expand Down Expand Up @@ -341,7 +349,8 @@ def load_data(self, spacecraft, sensor, viewing, data_level,
df, meta = bepi_sixs_load(startdate=self.start_date,
enddate=self.end_date,
side=viewing,
path=self.data_path)
path=self.data_path,
pos_timestamp='center')
df_i = df[[f"P{i}" for i in range(1, 10)]]
df_e = df[[f"E{i}" for i in range(1, 8)]]
return df_i, df_e, meta
Expand Down Expand Up @@ -472,7 +481,10 @@ def choose_data(self, viewing):
self.update_viewing(viewing)

if self.spacecraft == 'solo':
if viewing == 'sun':
if not viewing:
raise Exception("For this operation, the instrument's 'viewing' direction must be defined in the call of 'Event'!")

elif viewing == 'sun':

self.current_df_i = self.df_i_sun
self.current_df_e = self.df_e_sun
Expand Down Expand Up @@ -1094,7 +1106,7 @@ def onset_analysis(self, df_flux, windowstart, windowlen, windowrange, channels_

if (self.spacecraft == 'solo' or self.spacecraft == 'psp'):
plabel = AnchoredText(f"Onset time: {str(onset_stats[-1])[:19]}\n"
f"Peak flux: {df_flux_peak['flux'][0]:.2E}",
f"Peak flux: {df_flux_peak['flux'].iloc[0]:.2E}",
prop=dict(size=13), frameon=True,
loc=(4))
# if(self.spacecraft[:2].lower() == 'st' or self.spacecraft == 'soho' or self.spacecraft == 'wind'):
Expand Down Expand Up @@ -1178,8 +1190,9 @@ def find_onset(self, viewing, bg_start=None, bg_length=None, background_range=No
# Check if background is separated from plot range by over a day, issue a warning if so, but don't
if (background_range[0] < xlim[0] - datetime.timedelta(days=1) and background_range[0] < xlim[1] - datetime.timedelta(days=1)) or \
(background_range[1] > xlim[0] + datetime.timedelta(days=1) and background_range[1] > xlim[1] + datetime.timedelta(days=1)):
background_warning = "NOTICE that your background_range is separated from plot_range by over a day.\nIf this was intentional you may ignore this warning."
warnings.warn(message=background_warning)
background_warning = "Your background_range is separated from plot_range by over a day. If this was intentional you may ignore this warning."
# warnings.warn(message=background_warning)
custom_warning(message=background_warning)

if (self.spacecraft[:2].lower() == 'st' and self.sensor == 'sept') \
or (self.spacecraft.lower() == 'psp' and self.sensor.startswith('isois')) \
Expand Down Expand Up @@ -2442,9 +2455,13 @@ def print_energies(self):
from IPython.display import display

# This has to be run first, otherwise self.current_df does not exist
# Note that PSP will by default have its viewing=="all", which does not yield proper dataframes
if self.viewing != "all":
self.choose_data(self.viewing)
# Note that PSP will by default have its viewing=='all', which does not yield proper dataframes
if self.viewing != 'all':
if self.spacecraft == 'solo' and not self.viewing:
raise Warning("For this operation the instrument's 'viewing' direction must be defined in the call of 'Event'! Please define and re-run.")
return
else:
self.choose_data(self.viewing)
else:
if self.sensor == "isois-epihi":
# Just choose data with either ´A´ or ´B´. I'm not sure if there's a difference
Expand Down Expand Up @@ -2521,6 +2538,7 @@ def print_energies(self):
'display.max_columns', None,
):
display(df)
return

def save_and_update_rcparams(self, plotting_function: str):
"""
Expand Down
Loading

0 comments on commit 857d32a

Please sign in to comment.