From a1553439d06a3ded758a8332697f2aa220cc317b Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Mon, 8 Apr 2024 18:20:28 +0200 Subject: [PATCH 01/51] AMAROC-768 add heigh cloud flag and allow NSC --- src/ampycloud/data.py | 48 ++++++++++++++++++++++++++++---- test/ampycloud/test_core.py | 2 +- test/ampycloud/test_data.py | 55 +++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 6 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 6633dfa..644adf0 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -13,6 +13,7 @@ import logging import copy from abc import ABC, abstractmethod +import warnings import numpy as np import numpy.typing as npt import pandas as pd @@ -93,16 +94,31 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: # Begin with a thorough inspection of the dataset data = utils.check_data_consistency(data, req_cols=self.DATA_COLS) + # By default we set NSC to false, NOTE: attrs is still experimental in pandas! + try: + data.attrs['high_clouds_detected'] = False + except AttributeError: + # NOTE: Untestable as we cannot monkeypatch attrs as it would result in side-effects for __repr__ + # which break other tests + with warnings.catch_warnings(): + warnings.simplefilter("ignore") # pandas assumes we are trying to assign a column, which is not the case + data.attrs = {'high_clouds_detected': False} + # Then also drop any hits that is too high if self.msa is not None: hit_alt_lim = self.msa + self.msa_hit_buffer logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_alt_lim)) # Type 1 or less hits above the cut threshold get turned to NaNs, to signal a # non-detection below the MSA. Also change the hit type to 0 accordingly ! - data.loc[data[(data.alt > hit_alt_lim) & (data.type <= 1)].index, 'type'] = 0 - data.loc[data[(data.alt > hit_alt_lim) & (data.type <= 1)].index, 'alt'] = np.nan + above_msa_t1_or_less = data[(data.alt > hit_alt_lim) & (data.type <= 1)].index + data.loc[above_msa_t1_or_less, 'type'] = 0 + data.loc[above_msa_t1_or_less, 'alt'] = np.nan # Type 2 or more hits get cropped (there should be only 1 non-detection per time-stamp). - data = data.drop(data[(data.alt > hit_alt_lim) & (data.type > 1)].index) + above_msa_t2_or_more = data[(data.alt > hit_alt_lim) & (data.type > 1)].index + data = data.drop(above_msa_t2_or_more) + if len(above_msa_t1_or_less) + len(above_msa_t2_or_more) > self._prms['MAX_HITS_OKTA0']: + logger.info("Hits above MSA exceeded threshold for okta 0, will add NSC flag") + data.attrs['high_clouds_detected'] = True return data @@ -992,6 +1008,28 @@ def layers(self) -> pd.DataFrame: identified by the layering algorithm. """ return self._layers + @property + def high_clouds_detected(self) -> bool: + """ Returns whether high clouds were detected in the data. + + Returns: + bool: whether high clouds were detected. + + """ + return self.data.attrs['high_clouds_detected'] + + def _ncd_or_nsc(self) -> str: + """ Return the METAR code for No Cloud Detected / No Significant Cloud based on the attribute set in data.attrs. + + Returns: + str: 'NCD' or 'NSC' + + """ + if self.high_clouds_detected: + return 'NSC' + else: + return 'NCD' + def metar_msg(self, which: str = 'layers') -> str: """ Construct a METAR-like message for the identified cloud slices, groups, or layers. @@ -1025,7 +1063,7 @@ def metar_msg(self, which: str = 'layers') -> str: # Deal with the 0 layer situation if getattr(self, f'n_{which}') == 0: - return 'NCD' + return self._ncd_or_nsc() # Deal with the situation where layers have been found ... msg = sligrolay['code'] @@ -1036,6 +1074,6 @@ def metar_msg(self, which: str = 'layers') -> str: # Here, deal with the situations when all clouds are above the MSA if len(msg) == 0: - return 'NCD' + return self._ncd_or_nsc() return msg diff --git a/test/ampycloud/test_core.py b/test/ampycloud/test_core.py index 75dc17c..1737ef4 100644 --- a/test/ampycloud/test_core.py +++ b/test/ampycloud/test_core.py @@ -82,7 +82,7 @@ def test_run(): # Test the ability to specific parameters locally only out = run(mock_data, prms={'MSA': 0}) - assert out.metar_msg() == 'NCD' + assert out.metar_msg() == 'NSC' assert dynamic.AMPYCLOUD_PRMS['MSA'] is None # Test that warnings are being raised if a bad parameter is being given diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 75be198..fdc665d 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -80,6 +80,31 @@ def test_ceilochunk_init(): reset_prms() +@mark.parametrize('alt,expected_flag', [ + param(1000, False, id='low clouds'), + param(15000, True, id='high clouds'), +]) +def test_high_clouds_flag(alt: int, expected_flag: bool): + """ Test the high clouds flagging routine. """ + + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 + dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + + n_ceilos = 4 + lookback_time = 1200 + rate = 30 + # Create some fake data to get started + # 1 very flat layer with no gaps + mock_data = mocker.mock_layers( + n_ceilos, lookback_time, rate, + [{'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + ) + chunk = CeiloChunk(mock_data) + assert chunk.high_clouds_detected == expected_flag + + reset_prms() + + def test_ceilochunk_basic(): """ Test the basic methods of the CeiloChunk class. """ @@ -228,6 +253,36 @@ def test_ceilochunk_nocld(): assert chunk.metar_msg() == 'NCD' +def test_ceilochunk_highcld(): + """ Test the methods of CeiloChunks when high clouds are seen in the interval. """ + + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 + dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + + n_ceilos = 4 + lookback_time = 1200 + rate = 30 + # Create some fake data to get started + # 1 very flat layer with no gaps + mock_data = mocker.mock_layers( + n_ceilos, lookback_time, rate, + [{'alt': 15000, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + ) + + # Instantiate a CeiloChunk entity ... + chunk = CeiloChunk(mock_data) + + # Do the dance ... + chunk.find_slices() + chunk.find_groups() + chunk.find_layers() + + # Assert the final METAR code is correct + assert chunk.metar_msg() == 'NSC' + + reset_prms() + + def test_ceilochunk_2lay(): """ Test the methods of CeiloChunks when 2 layers are seen in the interval. """ From 2e2e2b0cb20dccd80071d833b2ccf11a9247e9b2 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 09:51:39 +0200 Subject: [PATCH 02/51] AMAROC-768 formatting (easy fixes) --- src/ampycloud/data.py | 44 +++++++++++++++++++------------------ src/ampycloud/scaler.py | 12 +++++----- test/ampycloud/test_data.py | 5 +++-- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 644adf0..ff704c7 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -98,10 +98,12 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: try: data.attrs['high_clouds_detected'] = False except AttributeError: - # NOTE: Untestable as we cannot monkeypatch attrs as it would result in side-effects for __repr__ - # which break other tests + # Catch for older versions of pandas that do not support DataFrame.attrs + # NOTE: Untestable as we cannot monkeypatch attrs as it would result + # in side-effects for __repr__ which break other tests with warnings.catch_warnings(): - warnings.simplefilter("ignore") # pandas assumes we are trying to assign a column, which is not the case + # pandas assumes we are trying to assign a column, which is not the case + warnings.simplefilter("ignore") data.attrs = {'high_clouds_detected': False} # Then also drop any hits that is too high @@ -202,8 +204,10 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, self._layers = None @log_func_call(logger) - def data_rescaled(self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = None, - dt_kwargs: Optional[dict] = None, alt_kwargs: Optional[dict] = None) -> pd.DataFrame: + def data_rescaled( + self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = None, + dt_kwargs: Optional[dict] = None, alt_kwargs: Optional[dict] = None + ) -> pd.DataFrame: """ Returns a copy of the data, rescaled according to the provided parameters. Args: @@ -312,13 +316,12 @@ def _get_min_sep_for_altitude(self, altitude: float) -> float: MIN_SEP_VALS """ - if len(self.prms['MIN_SEP_LIMS']) != \ - len(self.prms['MIN_SEP_VALS']) - 1: - raise AmpycloudError( - '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' - 'Got MIN_SEP_LIMS %i and MIN_SEP_VALS %i', - (self.prms['MIN_SEP_LIMS'], self.prms['MIN_SEP_VALS']) - ) + if len(self.prms['MIN_SEP_LIMS']) != len(self.prms['MIN_SEP_VALS']) - 1: + raise AmpycloudError( + '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' + f'Got MIN_SEP_LIMS {self.prms['MIN_SEP_LIMS']} ' + f'and MIN_SEP_VALS {self.prms['MIN_SEP_VALS']}.', + ) min_sep_val_id = np.searchsorted(self.prms['MIN_SEP_LIMS'], altitude) @@ -377,12 +380,11 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt ] # We want to raise early if 'which' is unknown. - if not which in ['slices', 'groups', 'layers']: + if which not in ['slices', 'groups', 'layers']: raise AmpycloudError( - 'Trying to initialize a data frame for %s ' + f'Trying to initialize a data frame for {which}, ' 'which is unknown. Keyword arg "which" must be one of' '"slices", "groups" or "layers"' - %which ) # If I am looking at the slices, also keep track of whether they are isolated, or not. @@ -535,9 +537,9 @@ def _calculate_sligrolay_base_height( # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid # Compute the base altitude - pdf.iloc[ind, pdf.columns.get_loc('alt_base')] = self._calculate_base_height_for_selection( - in_sligrolay, - ) + pdf.iloc[ + ind, pdf.columns.get_loc('alt_base') + ] = self._calculate_base_height_for_selection(in_sligrolay,) return pdf @log_func_call(logger) @@ -1019,7 +1021,8 @@ def high_clouds_detected(self) -> bool: return self.data.attrs['high_clouds_detected'] def _ncd_or_nsc(self) -> str: - """ Return the METAR code for No Cloud Detected / No Significant Cloud based on the attribute set in data.attrs. + """ Return the METAR code for No Cloud Detected / No Significant Cloud. + Decision based on the attribute set in data.attrs. Returns: str: 'NCD' or 'NSC' @@ -1027,8 +1030,7 @@ def _ncd_or_nsc(self) -> str: """ if self.high_clouds_detected: return 'NSC' - else: - return 'NCD' + return 'NCD' def metar_msg(self, which: str = 'layers') -> str: """ Construct a METAR-like message for the identified cloud slices, groups, or layers. diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index ee115d5..e752319 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -221,10 +221,10 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'shift-and-scale': # In this case, the only data I may need to derive from the data is the shift. - if 'shift' in kwargs.keys(): + if 'shift' in kwargs: # Already set - do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': kwargs['shift'] = np.nanmax(vals) elif kwargs['mode'] == 'undo': @@ -240,12 +240,12 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'minmax-scale': # In this case, the challenge lies with identifying min_val and max_val, knowing that the # user may specify a min_range value. - if 'min_val' in kwargs.keys() and 'max_val' in kwargs.keys(): + if 'min_val' in kwargs and 'max_val' in kwargs: # Already specified ... do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: @@ -260,7 +260,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: raise AmpycloudError(f"Mode unknown: {kwargs['mode']}") # 'mode' not set -> will default to 'do' - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index fdc665d..8ac8b2b 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -96,8 +96,9 @@ def test_high_clouds_flag(alt: int, expected_flag: bool): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers( - n_ceilos, lookback_time, rate, - [{'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + n_ceilos, lookback_time, rate, [ + {'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0} + ] ) chunk = CeiloChunk(mock_data) assert chunk.high_clouds_detected == expected_flag From 4ffcc62143c23156c57d0d369594b2519c0ec2ed Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 09:57:35 +0200 Subject: [PATCH 03/51] AMAROC-768 update copyright years --- src/ampycloud/data.py | 2 +- test/ampycloud/test_core.py | 2 +- test/ampycloud/test_data.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index ff704c7..feb4786 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. diff --git a/test/ampycloud/test_core.py b/test/ampycloud/test_core.py index 1737ef4..1908d61 100644 --- a/test/ampycloud/test_core.py +++ b/test/ampycloud/test_core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 8ac8b2b..b34721f 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. From cdbb945605b35e221621506784eb3fec851c7de4 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 09:59:51 +0200 Subject: [PATCH 04/51] AMAROC-768 update changelog --- CHANGELOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 1c1a1b7..777da1c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,8 @@ All notable changes to ampycloud will be documented in this file. The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v1.1.0] + - [regDaniel, 2024-04-09] Add flag for high clouds and allow for NSC in METAR message ## [v1.0.0] ### Added - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step From 8b180667dc00c1b6cc4661742c4f8f156aad35f0 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 10:04:02 +0200 Subject: [PATCH 05/51] AMAROC-768 fix f-string --- src/ampycloud/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index feb4786..57baf4a 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -319,8 +319,8 @@ def _get_min_sep_for_altitude(self, altitude: float) -> float: if len(self.prms['MIN_SEP_LIMS']) != len(self.prms['MIN_SEP_VALS']) - 1: raise AmpycloudError( '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' - f'Got MIN_SEP_LIMS {self.prms['MIN_SEP_LIMS']} ' - f'and MIN_SEP_VALS {self.prms['MIN_SEP_VALS']}.', + f'Got MIN_SEP_LIMS {self.prms["MIN_SEP_LIMS"]} ' + f'and MIN_SEP_VALS {self.prms["MIN_SEP_VALS"]}.', ) min_sep_val_id = np.searchsorted(self.prms['MIN_SEP_LIMS'], From fa4db2a631563870caebd20e0ead5975ef2dff43 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 10:22:00 +0200 Subject: [PATCH 06/51] AMAROC-768 NSC if sligrolay in buffer --- src/ampycloud/data.py | 3 +++ test/ampycloud/test_data.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 57baf4a..6846ed3 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -1076,6 +1076,9 @@ def metar_msg(self, which: str = 'layers') -> str: # Here, deal with the situations when all clouds are above the MSA if len(msg) == 0: + sligrolay_in_buffer = sligrolay['significant'] * (sligrolay['alt_base'] >= msa_val) + if sligrolay_in_buffer.any(): # clouds within the MSA buffer zone + return 'NSC' return self._ncd_or_nsc() return msg diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index b34721f..52bc644 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -254,11 +254,16 @@ def test_ceilochunk_nocld(): assert chunk.metar_msg() == 'NCD' -def test_ceilochunk_highcld(): +@mark.parametrize('alt', [ + param(10500, id='in buffer'), + param(15000, id='above buffer'), +]) +def test_ceilochunk_highcld(alt): """ Test the methods of CeiloChunks when high clouds are seen in the interval. """ dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = 1000 n_ceilos = 4 lookback_time = 1200 @@ -267,7 +272,7 @@ def test_ceilochunk_highcld(): # 1 very flat layer with no gaps mock_data = mocker.mock_layers( n_ceilos, lookback_time, rate, - [{'alt': 15000, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + [{'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] ) # Instantiate a CeiloChunk entity ... From 798f5b70753e0c34aeca76e0cd4cd332aa805731 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 11:04:39 +0200 Subject: [PATCH 07/51] AMAROC-768 Restrict python versions for pylinter --- .github/workflows/CI_pylinter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI_pylinter.yml b/.github/workflows/CI_pylinter.yml index 9c0b2a5..3e7b014 100644 --- a/.github/workflows/CI_pylinter.yml +++ b/.github/workflows/CI_pylinter.yml @@ -18,7 +18,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.x'] + python-version: ['3.9', '3.10', '3.11'] steps: # Checkout our repository From 745a50ac3a0666e390600e3fddce6c1bf2d952a9 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Tue, 9 Apr 2024 11:14:19 +0200 Subject: [PATCH 08/51] AMAROC-768 install setuptools for pylinter --- .github/workflows/CI_pylinter.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI_pylinter.yml b/.github/workflows/CI_pylinter.yml index 3e7b014..9da6ac9 100644 --- a/.github/workflows/CI_pylinter.yml +++ b/.github/workflows/CI_pylinter.yml @@ -18,7 +18,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.x'] steps: # Checkout our repository @@ -32,9 +32,10 @@ jobs: python-version: ${{ matrix.python-version }} # Install any dependency we require - - name: Install dependancies + - name: Install dependencies run: | python -m pip install --upgrade pip + python -m pip install setuptools shell: bash # Here, let's install our module to make sure all the dependencies specified in setup.py are From c22cd99dd1d70fee81fea03f5b400a5cac240073 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Wed, 10 Apr 2024 13:38:05 +0200 Subject: [PATCH 09/51] AMAROC-768 add comments --- src/ampycloud/data.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 6846ed3..72ff39e 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -102,7 +102,8 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: # NOTE: Untestable as we cannot monkeypatch attrs as it would result # in side-effects for __repr__ which break other tests with warnings.catch_warnings(): - # pandas assumes we are trying to assign a column, which is not the case + # pandas assumes we are trying to assign a column, but as this is not the case + # we catch its warning and ignore it. warnings.simplefilter("ignore") data.attrs = {'high_clouds_detected': False} @@ -110,8 +111,9 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: if self.msa is not None: hit_alt_lim = self.msa + self.msa_hit_buffer logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_alt_lim)) - # Type 1 or less hits above the cut threshold get turned to NaNs, to signal a - # non-detection below the MSA. Also change the hit type to 0 accordingly ! + # First layer and vervis hits above the cut threshold get turned to NaNs, to signal a + # non-detection below the MSA. Also change the hit type to 0 accordingly in order + # to create a "no hit detected" in the range of interest (i.e. below MSA). above_msa_t1_or_less = data[(data.alt > hit_alt_lim) & (data.type <= 1)].index data.loc[above_msa_t1_or_less, 'type'] = 0 data.loc[above_msa_t1_or_less, 'alt'] = np.nan @@ -119,7 +121,10 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: above_msa_t2_or_more = data[(data.alt > hit_alt_lim) & (data.type > 1)].index data = data.drop(above_msa_t2_or_more) if len(above_msa_t1_or_less) + len(above_msa_t2_or_more) > self._prms['MAX_HITS_OKTA0']: - logger.info("Hits above MSA exceeded threshold for okta 0, will add NSC flag") + logger.info( + "Hits above MSA + MSA_HIT_BUFFER exceeded threshold MAX_HITS_OKTA0. Will add " + "flag 'high_clouds_detected' to indicate the presence of high clouds." + ) data.attrs['high_clouds_detected'] = True return data @@ -1076,9 +1081,10 @@ def metar_msg(self, which: str = 'layers') -> str: # Here, deal with the situations when all clouds are above the MSA if len(msg) == 0: + # first check if any significant clouds are in the interval [MSA, MSA+MSA_HIT_BUFFER] sligrolay_in_buffer = sligrolay['significant'] * (sligrolay['alt_base'] >= msa_val) - if sligrolay_in_buffer.any(): # clouds within the MSA buffer zone - return 'NSC' - return self._ncd_or_nsc() + if sligrolay_in_buffer.any(): + return 'NSC' # and return a NSC as it implies that the cloud is above the MSA + return self._ncd_or_nsc() # else, check for CBH above MSA + MSA_HIT_BUFFER return msg From 17930a587f90d9b6be0184521500d753cf7da48f Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Wed, 10 Apr 2024 14:52:07 +0200 Subject: [PATCH 10/51] AMAROC-765 Add mypy to the setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a324922..a712759 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ "ruamel.yaml" ], extras_require={ - 'dev': ['sphinx', 'sphinx-rtd-theme', 'pylint', 'pytest'] + 'dev': ['sphinx', 'sphinx-rtd-theme', 'pylint', 'pytest', 'mypy'] }, # Setup entry points to use ampycloud directly from a terminal entry_points={ From ba885da7e3b81f3134d5a68a6e40b392ac591a59 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Wed, 10 Apr 2024 14:55:21 +0200 Subject: [PATCH 11/51] AMAROC-768 drop attrs, add CeiloChunk attribute --- src/ampycloud/data.py | 32 ++++++++++++-------------------- test/ampycloud/test_data.py | 5 +++-- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 72ff39e..52ca514 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -94,20 +94,11 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: # Begin with a thorough inspection of the dataset data = utils.check_data_consistency(data, req_cols=self.DATA_COLS) - # By default we set NSC to false, NOTE: attrs is still experimental in pandas! - try: - data.attrs['high_clouds_detected'] = False - except AttributeError: - # Catch for older versions of pandas that do not support DataFrame.attrs - # NOTE: Untestable as we cannot monkeypatch attrs as it would result - # in side-effects for __repr__ which break other tests - with warnings.catch_warnings(): - # pandas assumes we are trying to assign a column, but as this is not the case - # we catch its warning and ignore it. - warnings.simplefilter("ignore") - data.attrs = {'high_clouds_detected': False} - - # Then also drop any hits that is too high + # By default we set this flag to false and overwrite if enough hits are present + self._clouds_above_msa_buffer = False + + # Drop any hits that are too high and check if they exceed the threshold for 1 OKTA + # if yes, set the flag clouds_above_msa_buffer to True if self.msa is not None: hit_alt_lim = self.msa + self.msa_hit_buffer logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_alt_lim)) @@ -125,7 +116,7 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: "Hits above MSA + MSA_HIT_BUFFER exceeded threshold MAX_HITS_OKTA0. Will add " "flag 'high_clouds_detected' to indicate the presence of high clouds." ) - data.attrs['high_clouds_detected'] = True + self._clouds_above_msa_buffer = True return data @@ -1016,24 +1007,25 @@ def layers(self) -> pd.DataFrame: return self._layers @property - def high_clouds_detected(self) -> bool: - """ Returns whether high clouds were detected in the data. + def clouds_above_msa_buffer(self) -> bool: + """ Returns whether a number of hits exceeding the threshold for 1 okta is detected above + MSA + MSA_HIT_BUFFER. Returns: bool: whether high clouds were detected. """ - return self.data.attrs['high_clouds_detected'] + return self._clouds_above_msa_buffer def _ncd_or_nsc(self) -> str: """ Return the METAR code for No Cloud Detected / No Significant Cloud. - Decision based on the attribute set in data.attrs. + Decision based on the attribute self._clouds_above_msa_buffer. Returns: str: 'NCD' or 'NSC' """ - if self.high_clouds_detected: + if self._clouds_above_msa_buffer: return 'NSC' return 'NCD' diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 52bc644..0cbe18f 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -84,11 +84,12 @@ def test_ceilochunk_init(): param(1000, False, id='low clouds'), param(15000, True, id='high clouds'), ]) -def test_high_clouds_flag(alt: int, expected_flag: bool): +def test_clouds_above_msa_buffer_flag(alt: int, expected_flag: bool): """ Test the high clouds flagging routine. """ dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = 1000 n_ceilos = 4 lookback_time = 1200 @@ -101,7 +102,7 @@ def test_high_clouds_flag(alt: int, expected_flag: bool): ] ) chunk = CeiloChunk(mock_data) - assert chunk.high_clouds_detected == expected_flag + assert chunk.clouds_above_msa_buffer == expected_flag reset_prms() From 5aa7fae57d01a1432de0a8d51f449947698fe174 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Wed, 10 Apr 2024 15:00:02 +0200 Subject: [PATCH 12/51] AMAROC-768 remove unused import --- src/ampycloud/data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 52ca514..56e7126 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -13,7 +13,6 @@ import logging import copy from abc import ABC, abstractmethod -import warnings import numpy as np import numpy.typing as npt import pandas as pd From 456dbf559f3474539d36d57795aa1a72f7f33df7 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Wed, 10 Apr 2024 15:09:16 +0200 Subject: [PATCH 13/51] AMAROC-768 add NSC warning to scope.rst --- docs/source/scope.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/source/scope.rst b/docs/source/scope.rst index fe23e06..5825706 100644 --- a/docs/source/scope.rst +++ b/docs/source/scope.rst @@ -9,6 +9,16 @@ This has the following implications for ampycloud: passing them to ampycloud, e.g. by removing them or by converting them to cloud base heights. + * Note that regulation says that "if there are no clouds of operational significance + and no restriction on vertical visibility and the abbreviation 'CAVOK' is not + appropriate, the abbreviation 'NSC' should be used" (AMC1 MET.TR.205(e)(1)). + ampycloud cannot decide whether a 'CAVOK' is appropriate, and will therefore + always return 'NSC' if no clouds of operational significance are found. If no clouds + are detected at all by the ceilometers, ampycloud will return 'NCD'. Importantly, + users should bear in mind that ampycloud cannot handle CB and TCU cases, + such that any 'NCD'/'NSC' codes issued may need to be overwritten by the user in + certain situations. + * ampycloud can evidently be used for R&D work, but the code itself should not be seen as an R&D platform. From d09d1f53a1762a74ee28c09b98ee2d51b9e9ddc2 Mon Sep 17 00:00:00 2001 From: Dani Date: Wed, 10 Apr 2024 15:46:26 +0200 Subject: [PATCH 14/51] AMAROC-768 improve message in CHANGELOG --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 777da1c..934390b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,7 +4,7 @@ The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [v1.1.0] - - [regDaniel, 2024-04-09] Add flag for high clouds and allow for NSC in METAR message + - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message ## [v1.0.0] ### Added - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step From 804015dcb32d203e417c51af3c775b5edb512d56 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Sat, 23 Mar 2024 08:36:16 +0100 Subject: [PATCH 15/51] Set mock exemple to be 900s long --- src/ampycloud/utils/mocker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 7617aa5..50c05fd 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -194,12 +194,12 @@ def canonical_demo_data() -> pd.DataFrame: # Create the "famous" mock dataset n_ceilos = 4 - lookback_time = 1200 - hit_gap = 30 + lookback_time = 900 + hit_gap = 15 lyrs = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 0.1, 'period': 10, 'amplitude': 0}, {'alt': 2000, 'alt_std': 100, 'sky_cov_frac': 0.5, 'period': 10, 'amplitude': 0}, - {'alt': 5000, 'alt_std': 200, 'sky_cov_frac': 1, 'period': 2400, 'amplitude': 1000}, + {'alt': 5000, 'alt_std': 200, 'sky_cov_frac': 1, 'period': 1800, 'amplitude': 1000}, ] # Reset the random seed, but only do this temporarily, so as to not mess things up for the user. From 281be4ecfdfe7d7282e5af0ed1392d93552e9d81 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 26 Mar 2024 09:46:43 +0100 Subject: [PATCH 16/51] Use 'height' in 'ft aal' throughout --- CHANGELOG | 26 +- docs/source/running.rst | 2 +- src/ampycloud/core.py | 32 ++- src/ampycloud/data.py | 264 +++++++++--------- src/ampycloud/fluffer.py | 7 +- src/ampycloud/hardcoded.py | 4 +- src/ampycloud/layer.py | 13 +- src/ampycloud/plots/diagnostics.py | 58 ++-- src/ampycloud/plots/secondary.py | 14 +- src/ampycloud/prms/ampycloud_default_prms.yml | 40 +-- src/ampycloud/utils/mocker.py | 75 ++--- src/ampycloud/utils/utils.py | 51 ++-- src/ampycloud/wmo.py | 8 +- test/ampycloud/plots/test_core.py | 14 +- ...neva_2018.04.30-18.17.00_SCT100-BKN120.csv | 2 +- ...neva_2019.01.10-04.45.34_FEW040-BKN070.csv | 2 +- ...neva_2019.01.10-04.50.00_SCT040-BKN070.csv | 2 +- ...eva_2020.10.22.-21.15.00_SCT004-SCT007.csv | 2 +- ...neva_2020.12.29-04.50.00_FEW044-BKN055.csv | 2 +- ...neva_2021.05.01-03.50.00_SCT004-SCT013.csv | 2 +- .../Geneva_2021.05.23-04.20.00_FEW048.csv | 2 +- ...va_2021.09.09-07.50.00_FEW089_MSA10000.csv | 2 +- .../Geneva_2021.09.19-11.50.00_OVC032.csv | 2 +- ...neva_2021.10.05-03.50.00_BKN014-SCT029.csv | 2 +- ....10.06-19.20.00_FEW066-SCT073_MSA10000.csv | 2 +- ...neva_2021.11.27-16.50.00_FEW037-SCT059.csv | 2 +- .../Geneva_2021.12.15-06.50.00_OVC001.csv | 2 +- .../Geneva_2023.07.03-09.35.30_SCT044.csv | 2 +- ...oten_2020.12.24-01.20.00_FEW018-BKN051.csv | 2 +- ...g107_2020.01.01-00.01.01_SCT024-SCT031.csv | 2 +- ...g107_2020.01.01-00.16.01_BKN010-BKN025.csv | 2 +- test/ampycloud/test_core.py | 23 +- test/ampycloud/test_data.py | 42 +-- test/ampycloud/test_layer.py | 12 +- test/ampycloud/test_wmo.py | 14 +- test/ampycloud/utils/test_mocker.py | 24 +- test/ampycloud/utils/test_utils.py | 38 +-- 37 files changed, 418 insertions(+), 377 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 934390b..c7cd5c4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,17 +3,25 @@ All notable changes to ampycloud will be documented in this file. The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [v1.1.0] - - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message +## [v2.0.0] +### Added: + - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. + - [fpavogt, 2024-03-26] Add option to reset only a single parameter. +### Fixed: + - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. + - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. +### Changed: + - [fpavogt, 2024-03-23] Changed default mock dataset to be 900s long (instead of 2400s). + ## [v1.0.0] -### Added - - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step -### Fixed - - [regDaniel, 2023-11-09] Fix #107 (no more reporting of two layers at same height) -### Changed +### Added: + - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step. +### Fixed: + - [regDaniel, 2023-11-09] Fix #107 (no more reporting of two layers at same height). +### Changed: - [fpavogt, 2023-11-05] Updated version to 1.0.0 for first full release, incl. misc documentation changes. -- [regDaniel, 2023-11-09] Changed min sep in layering from layer mean to layer base altitude -- [regDaniel, 2023-11-10] Refactoring (modularization) of metarize method +- [regDaniel, 2023-11-09] Changed min sep in layering from layer mean to layer base altitude. +- [regDaniel, 2023-11-10] Refactoring (modularization) of metarize method. ## [v0.6.0.dev0] ### Added: diff --git a/docs/source/running.rst b/docs/source/running.rst index f0d941e..e2cf8a8 100644 --- a/docs/source/running.rst +++ b/docs/source/running.rst @@ -19,7 +19,7 @@ A no-words example for those that want to get started quickly # Your data should have *exactly* this structure mock_data = mocker.canonical_demo_data() - # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft + # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft aal chunk = ampycloud.run(mock_data, prms={'MSA': 10000}, geoloc='Mock data', ref_dt=datetime.now()) diff --git a/src/ampycloud/core.py b/src/ampycloud/core.py index 1e52de8..278c194 100644 --- a/src/ampycloud/core.py +++ b/src/ampycloud/core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the BSD-3-Clause license. @@ -131,9 +131,13 @@ def set_prms(pth: Union[str, Path]) -> None: @log_func_call(logger) -def reset_prms() -> None: +def reset_prms(which: Union[str, list] = None) -> None: """ Reset the ampycloud dynamic=scientific parameters to their default values. + Args: + which (str|list, optional): (list of) names of parameters to reset specifically. + If not set (by default), all parameters will be reset. + Example: :: @@ -148,7 +152,21 @@ def reset_prms() -> None: """ - dynamic.AMPYCLOUD_PRMS = dynamic.get_default_prms() + if which is None: + dynamic.AMPYCLOUD_PRMS = dynamic.get_default_prms() + return + + # Ok, reset the parameters one at a time + default_prms = dynamic.get_default_prms() + + # Clean up which + which = [which] if isinstance(which, str) else which + + for prm in which: + if prm not in default_prms.keys(): + raise AmpycloudError(f'Unknown parameter name: {prm}') + + dynamic.AMPYCLOUD_PRMS[prm] = default_prms[prm] @log_func_call(logger) @@ -179,7 +197,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, .. important :: ampycloud treats Vertical Visibility hits no differently than any other hit. Hence, it is up - to the user to adjust the Vertical Visibility hit altitude (and/or ignore some of them, for + to the user to adjust the Vertical Visibility hit height (and/or ignore some of them, for example) prior to feeding them to ampycloud, so that it can be used as a cloud hit. .. important:: @@ -193,7 +211,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, All the scientific parameters of the algorithm are set dynamically in the :py:mod:`.dynamic` module. From within a Python session all these parameters can be changed directly. For example, - to change the Minimum Sector Altitude, one would do: + to change the Minimum Sector Altitude (to be specified in ft aal), one would do: :: from ampycloud import dynamic @@ -223,7 +241,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, The :py:class:`.data.CeiloChunk` instance returned by this function contains all the information associated to the ampycloud algorithm, inclduing the raw data and slicing/grouping/layering info. Its method :py:meth:`.data.CeiloChunk.metar_msg` provides direct access to the resulting - METAR-like message. Users that require the altitude, okta amount, and/or exact sky coverage + METAR-like message. Users that require the height, okta amount, and/or exact sky coverage fraction of layers can get them via the :py:attr:`.data.CeiloChunk.layers` class property. Example: @@ -239,7 +257,7 @@ def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, # Generate the canonical demo dataset for ampycloud mock_data = mocker.canonical_demo_data() - # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft + # Run the ampycloud algorithm on it, setting the MSA to 10'000 ft aal. chunk = ampycloud.run(mock_data, prms={'MSA':10000}, geoloc='Mock data', ref_dt=datetime.now()) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 56e7126..a0ba87b 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -53,12 +53,13 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, @property def msa(self) -> float: - """ The Minimum Sector Altitude set when initializing this specific instance. """ + """ The Minimum Sector Altitude set when initializing this specific instance, in ft aal. """ return self.prms['MSA'] @property def msa_hit_buffer(self) -> float: - """ The Minimum Sector Altitude hit buffer set when initializing this specific instance. """ + """ The Minimum Sector Altitude hit buffer set when initializing this specific instance, + in ft. """ return self.prms['MSA_HIT_BUFFER'] @property @@ -99,16 +100,16 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: # Drop any hits that are too high and check if they exceed the threshold for 1 OKTA # if yes, set the flag clouds_above_msa_buffer to True if self.msa is not None: - hit_alt_lim = self.msa + self.msa_hit_buffer - logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_alt_lim)) + hit_height_lim = self.msa + self.msa_hit_buffer + logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_height_lim)) # First layer and vervis hits above the cut threshold get turned to NaNs, to signal a # non-detection below the MSA. Also change the hit type to 0 accordingly in order # to create a "no hit detected" in the range of interest (i.e. below MSA). - above_msa_t1_or_less = data[(data.alt > hit_alt_lim) & (data.type <= 1)].index + above_msa_t1_or_less = data[(data.height > hit_height_lim) & (data.type <= 1)].index data.loc[above_msa_t1_or_less, 'type'] = 0 - data.loc[above_msa_t1_or_less, 'alt'] = np.nan + data.loc[above_msa_t1_or_less, 'height'] = np.nan # Type 2 or more hits get cropped (there should be only 1 non-detection per time-stamp). - above_msa_t2_or_more = data[(data.alt > hit_alt_lim) & (data.type > 1)].index + above_msa_t2_or_more = data[(data.height > hit_height_lim) & (data.type > 1)].index data = data.drop(above_msa_t2_or_more) if len(above_msa_t1_or_less) + len(above_msa_t2_or_more) > self._prms['MAX_HITS_OKTA0']: logger.info( @@ -163,7 +164,7 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, CeiloChunk.DATA_COLS, i.e. : :: - ['ceilo', 'dt', 'alt', 'type'] + ['ceilo', 'dt', 'height', 'type'] Specifically: @@ -176,8 +177,8 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, hit time in s, **as float**. This should typically be a negative number (because METARs are assembled using *existing=past* ceilometer observations). - - ``alt``: cloud base hit altitude in ft, **as float**. The cloud base altitude computed - by the ceilometer. + - ``height``: cloud base hit height in ft above aerodrome level (aal), **as float**. + The cloud base height computed by the ceilometer. - ``type``: cloud hit type, **as int**. A value n>0 indicates that the hit is the n-th (from the ground) that was reported by this specific ceilometer for this specific @@ -199,18 +200,17 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, self._layers = None @log_func_call(logger) - def data_rescaled( - self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = None, - dt_kwargs: Optional[dict] = None, alt_kwargs: Optional[dict] = None - ) -> pd.DataFrame: + def data_rescaled(self, dt_mode: Optional[str] = None, height_mode: Optional[str] = None, + dt_kwargs: Optional[dict] = None, + height_kwargs: Optional[dict] = None) -> pd.DataFrame: """ Returns a copy of the data, rescaled according to the provided parameters. Args: dt_mode (str, optional): scaling rule for the time deltas. Defaults to None. - alt_mode (str, optional): scaling rule for the altitudes. Defaults to None. + height_mode (str, optional): scaling rule for the heights. Defaults to None. dt_kwargs (dict, optional): dict of arguments to be fed to the chosen dt scaling routine. Defaults to None. - alt_kwargs (dict, optinal): dict of arguments to be fed to the chosen alt scaling + height_kwargs (dict, optinal): dict of arguments to be fed to the chosen height scaling routine. Defaults to None. Returns: @@ -225,8 +225,8 @@ def data_rescaled( # Deal with the default NoneType if dt_kwargs is None: dt_kwargs = {} - if alt_kwargs is None: - alt_kwargs = {} + if height_kwargs is None: + height_kwargs = {} # Make a deep copy of the data, to avoid messing it up out = copy.deepcopy(self.data) @@ -234,8 +234,8 @@ def data_rescaled( # Deal with the dt first out['dt'] = scaler.apply_scaling(out['dt'], dt_mode, **dt_kwargs) - # Then the altitudes - out['alt'] = scaler.apply_scaling(out['alt'], alt_mode, **alt_kwargs) + # Then the heights + out['height'] = scaler.apply_scaling(out['height'], height_mode, **height_kwargs) return out @@ -287,24 +287,23 @@ def _calculate_base_height_for_selection( float: The base height of the given data selection. """ - # Start computing the base altitude + # Start computing the base height # First, compute which points should be considered in terms of lookback time - return utils.calc_base_alt( - self.data.sort_values('dt').loc[data_indexer]['alt'].values, + return utils.calc_base_height( + self.data.sort_values('dt').loc[data_indexer]['height'].values, self.prms['BASE_LVL_LOOKBACK_PERC'], - self.prms['BASE_LVL_ALT_PERC'], + self.prms['BASE_LVL_HEIGHT_PERC'], ) - - def _get_min_sep_for_altitude(self, altitude: float) -> float: - """Get the minimum separation for a given altitude. + def _get_min_sep_for_height(self, height: float) -> float: + """Get the minimum separation for a given height. Args: - altitude (float): The altitude for which we want to know the minimum - separation + height (float): The height for which we want to know the minimum + separation. Returns: - float: The minimum separation for the given altitude. + float: The minimum separation for the given height. Raises: AmpycloudError: If the length of MIN_SEP_LIMS is not one less than @@ -319,9 +318,9 @@ def _get_min_sep_for_altitude(self, altitude: float) -> float: ) min_sep_val_id = np.searchsorted(self.prms['MIN_SEP_LIMS'], - altitude) + height) min_sep = self.prms['MIN_SEP_VALS'][min_sep_val_id] - logger.info('Alt: %.1f', altitude) + logger.info('Height: %.1f', height) logger.info('min_sep value: %.1f', min_sep) return min_sep @@ -362,11 +361,11 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt cols = ['n_hits', # Duplicate-corrected number of hits 'perc', # Duplicate-corrected hit percentage (in %) 'okta', # Corresponding okta value - 'alt_base', # Slice/Group/Layer base altitude - 'alt_mean', # Slice/Group/Layer mean altitude - 'alt_std', # Slice/Group/Layer altitude std - 'alt_min', # Slice/Group/Layer min altitude - 'alt_max', # Slice/Group/Layer max altitude + 'height_base', # Slice/Group/Layer base height + 'height_mean', # Slice/Group/Layer mean height + 'height_std', # Slice/Group/Layer height std + 'height_min', # Slice/Group/Layer min height + 'height_max', # Slice/Group/Layer max height 'thickness', # Slice/Group/Layer thickness 'fluffiness', # Slice/Group/Layer fluffiness 'code', # METAR code @@ -486,29 +485,29 @@ def _add_sligrolay_information( groups/ layers. Returns: - pd.DataFrame: with additional results in the columns alt_min, - alt_mean, alt_max, alt_std, thickness, fluffiness. + pd.DataFrame: with additional results in the columns height_min, + height_mean, height_max, height_std, thickness, fluffiness. """ for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid - # Measure the mean altitude and associated std of the layer - pdf.iloc[ind, pdf.columns.get_loc('alt_mean')] = \ - self.data.loc[in_sligrolay, 'alt'].mean(skipna=True) - pdf.iloc[ind, pdf.columns.get_loc('alt_std')] = \ - self.data.loc[in_sligrolay, 'alt'].std(skipna=True) + # Measure the mean height and associated std of the layer + pdf.iloc[ind, pdf.columns.get_loc('height_mean')] = \ + self.data.loc[in_sligrolay, 'height'].mean(skipna=True) + pdf.iloc[ind, pdf.columns.get_loc('height_std')] = \ + self.data.loc[in_sligrolay, 'height'].std(skipna=True) # Let's also keep track of the min, max, thickness, and fluffiness values - pdf.iloc[ind, pdf.columns.get_loc('alt_min')] = \ - self.data.loc[in_sligrolay, 'alt'].min(skipna=True) - pdf.iloc[ind, pdf.columns.get_loc('alt_max')] = \ - self.data.loc[in_sligrolay, 'alt'].max(skipna=True) + pdf.iloc[ind, pdf.columns.get_loc('height_min')] = \ + self.data.loc[in_sligrolay, 'height'].min(skipna=True) + pdf.iloc[ind, pdf.columns.get_loc('height_max')] = \ + self.data.loc[in_sligrolay, 'height'].max(skipna=True) pdf.iloc[ind, pdf.columns.get_loc('thickness')] = \ - pdf.iloc[ind, pdf.columns.get_loc('alt_max')] - \ - pdf.iloc[ind, pdf.columns.get_loc('alt_min')] + pdf.iloc[ind, pdf.columns.get_loc('height_max')] - \ + pdf.iloc[ind, pdf.columns.get_loc('height_min')] pdf.iloc[ind, pdf.columns.get_loc('fluffiness')], _ = \ fluffer.get_fluffiness( - self.data.loc[in_sligrolay, ['dt', 'alt']].values, + self.data.loc[in_sligrolay, ['dt', 'height']].values, boost=self.prms['GROUPING_PRMS']['fluffiness_boost'], **self.prms['LOWESS']) @@ -525,21 +524,19 @@ def _calculate_sligrolay_base_height( cluster_ids (npt.ArrayLike): Original IDs of slices/ groups/ layers. Returns: - pd.DataFrame: with calculatio results in columnm alt_base + pd.DataFrame: with calculatio results in column height_base """ for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid - # Compute the base altitude - pdf.iloc[ - ind, pdf.columns.get_loc('alt_base') - ] = self._calculate_base_height_for_selection(in_sligrolay,) + # Compute the base height + pdf.iloc[ind, pdf.columns.get_loc('height_base')] = \ + self._calculate_base_height_for_selection(in_sligrolay) return pdf @log_func_call(logger) - def metarize( - self, which: str = 'slices') -> None: + def metarize(self, which: str = 'slices') -> None: """ Assembles a :py:class:`pandas.DataFrame` of slice/group/layer METAR properties of interest. @@ -556,13 +553,13 @@ def metarize( * ``n_hits (int)``: duplicate-corrected number of hits * ``perc (float)``: sky coverage percentage (between 0-100) * ``okta (int)``: okta count - * ``alt_base (float)``: base altitude - * ``alt_mean (float)``: mean altitude - * ``alt_std (float)``: altitude standard deviation - * ``alt_min (float)``: minimum altitude - * ``alt_max (float)``: maximum altitude + * ``height_base (float)``: base height + * ``height_mean (float)``: mean height + * ``height_std (float)``: height standard deviation + * ``height_min (float)``: minimum height + * ``height_max (float)``: maximum height * ``thickness (float)``: thickness - * ``fluffiness (float)``: fluffiness (expressed in altitude units, i.e. ft) + * ``fluffiness (float)``: fluffiness (expressed in height units, i.e. ft) * ``code (str)``: METAR-like code * ``significant (bool)``: whether the layer is significant according to the ICAO rules. See :py:func:`.icao.significant_cloud` for details. @@ -576,7 +573,7 @@ def metarize( same ceilometer* are counted as one only. In other words, if a Type ``1`` and ``2`` hits **from the same ceilometer, at the same observation time** are included in a given slice/group/layer, they are counted as one hit only. This is a direct consequence of the - fact that clouds have a single base altitude at any given time [*citation needed*]. + fact that clouds have a single base height at any given time [*citation needed*]. Note: The metarize function is modularized in private submethods defined above. @@ -589,7 +586,7 @@ def metarize( # calculate cloud amount in okta pdf = self._calculate_cloud_amount(which, pdf, cids) - # calculate slice/ group/ layer base altitude + # calculate slice/ group/ layer base height pdf = self._calculate_sligrolay_base_height(which, pdf, cids) # collect some more information, including fluffiness @@ -600,12 +597,13 @@ def metarize( pdf.iloc[ind, pdf.columns.get_loc('code')] = \ wmo.okta2code(pdf.iloc[ind, pdf.columns.get_loc('okta')]) + \ - wmo.alt2code(pdf.iloc[ind, pdf.columns.get_loc('alt_base')]) + wmo.height2code(pdf.iloc[ind, pdf.columns.get_loc('height_base')]) # Set the proper column types for cname in ['n_hits', 'okta', 'cluster_id']: pdf[cname] = pdf[cname].astype(int) - for cname in ['perc', 'alt_base', 'alt_mean', 'alt_std', 'alt_min', 'alt_max', 'thickness', + for cname in ['perc', 'height_base', 'height_mean', 'height_std', 'height_min', + 'height_max', 'thickness', 'fluffiness']: pdf[cname] = pdf[cname].astype(float) for cname in ['code']: @@ -618,9 +616,9 @@ def metarize( if which == 'groups': pdf['ncomp'] = pdf['ncomp'].astype(int) - # Sort the table as a function of the base altitude of the sli/gro/lay. + # Sort the table as a function of the base height of the sli/gro/lay. # This is why having the 'cluster_id' info is useful (so I remember which they are). - pdf.sort_values('alt_base', inplace=True) + pdf.sort_values('height_base', inplace=True) # Reset the index, 'cause I only need the one. pdf.reset_index(drop=True, inplace=True) @@ -633,7 +631,7 @@ def metarize( @log_func_call(logger) def find_slices(self) -> None: - """ Identify general altitude slices in the chunk data. Intended as the first stage towards + """ Identify general height slices in the chunk data. Intended as the first stage towards the identification of cloud layers. Important: @@ -644,12 +642,12 @@ def find_slices(self) -> None: # Get a scaled **copy** of the data to feed the clustering algorithm tmp = self.data_rescaled(dt_mode='shift-and-scale', dt_kwargs={'scale': self.prms['SLICING_PRMS']['dt_scale']}, - alt_mode=self.prms['SLICING_PRMS']['alt_scale_mode'], - alt_kwargs=self.prms['SLICING_PRMS']['alt_scale_kwargs'], + height_mode=self.prms['SLICING_PRMS']['height_scale_mode'], + height_kwargs=self.prms['SLICING_PRMS']['height_scale_kwargs'], ) # What are the valid points ? - valids = tmp['alt'].notna() + valids = tmp['height'].notna() # Add a column to the original data to keep track of the slice id. # First, set them all to -1 and force the correct dtype. I hate pandas for this ... @@ -662,12 +660,12 @@ def find_slices(self) -> None: elif len(valids[valids]) > 1: # ... run the clustering on them ... _, labels = cluster.clusterize( - tmp[['dt', 'alt']][valids].to_numpy(), algo='agglomerative', + tmp[['dt', 'height']][valids].to_numpy(), algo='agglomerative', **{'linkage': 'average', 'metric': 'manhattan', 'distance_threshold': self.prms['SLICING_PRMS']['distance_threshold']}) # ... and set the labels in the original data - self.data.loc[self.data['alt'].notna(), ['slice_id']] = labels + self.data.loc[self.data['height'].notna(), ['slice_id']] = labels else: # If I get no valid points, do nothing at all pass @@ -679,18 +677,18 @@ def find_slices(self) -> None: def _merge_close_groups(self) -> None: """Merge groups that are closer than the minimum separation at their - respective altitudes. + respective heights. Algorithm steps: - 1. Calculate preliminary group base altitude + 1. Calculate preliminary group base height 2. Start from bottom and search for the first two groups that are too close. - 3. Merge these groups and recalculate all base altitudes. + 3. Merge these groups and recalculate all base heights. 4. Repeat 2. and 3. until all groups are enough separated. The reason this is done iteratively is that there can be multiple overlaps. In these cases merging two groups often separates their new (combined) - base altitude far enough from the base altitude of the next layer above. + base height far enough from the base height of the next layer above. Doing the merging all at once would thus lead to unwanted overgrouping. Attention: This method only recalculates height, not amount! If you want @@ -698,24 +696,24 @@ def _merge_close_groups(self) -> None: _calculate_cloud_amount or the public metarize() method. """ - # Let's setup a groups df and get the groups base altitude. + # Let's setup a groups df and get the groups base height. # setup pd.DataFrame to store slices/ groups/ layers prelim_groups, cids = self._setup_sligrolay_pdf('groups') - # calculate bae altitude + # calculate bae height prelim_groups = self._calculate_sligrolay_base_height( 'groups', prelim_groups, cids ) - # Ordering by altitude - prelim_groups.sort_values('alt_base', inplace=True) + # Ordering by height + prelim_groups.sort_values('height_base', inplace=True) prelim_groups.reset_index(drop=True, inplace=True) #self.metarize('groups') #prelim_groups = self.groups - min_seps_grp = prelim_groups['alt_base'].apply(self._get_min_sep_for_altitude) - base_alt_diffs = prelim_groups['alt_base'].diff() + min_seps_grp = prelim_groups['height_base'].apply(self._get_min_sep_for_height) + base_height_diffs = prelim_groups['height_base'].diff() # create a boolean series to select values, fillna is necessary as # first entry of diff will be nan - lt_min_sep_indexer = (base_alt_diffs < min_seps_grp).fillna(False) + lt_min_sep_indexer = (base_height_diffs < min_seps_grp).fillna(False) while len(prelim_groups[lt_min_sep_indexer]) > 0: # start with the two lowest layers that are too close idx = prelim_groups[lt_min_sep_indexer].index[0] @@ -729,16 +727,16 @@ def _merge_close_groups(self) -> None: prelim_groups.drop(index=idx, inplace=True) # resetting because we must not have index gaps in the next iteration prelim_groups.reset_index(drop=True, inplace=True) - # now we recalculate the base alt for the merged supergroup + # now we recalculate the base height for the merged supergroup data_idxer = self.data['group_id'] == prelim_groups['cluster_id'].iloc[idx - 1] prelim_groups.iloc[ - idx - 1, prelim_groups.columns.get_loc('alt_base') + idx - 1, prelim_groups.columns.get_loc('height_base') ] = self._calculate_base_height_for_selection(data_idxer) - # as this changes base alt, it is possible that we now are closer + # as this changes base height, it is possible that we now are closer # to another group, so we have to continue iteratively. - min_seps_grp = prelim_groups['alt_base'].apply(self._get_min_sep_for_altitude) - base_alt_diffs = prelim_groups['alt_base'].diff() - lt_min_sep_indexer = (base_alt_diffs < min_seps_grp).fillna(False) + min_seps_grp = prelim_groups['height_base'].apply(self._get_min_sep_for_height) + base_height_diffs = prelim_groups['height_base'].diff() + lt_min_sep_indexer = (base_height_diffs < min_seps_grp).fillna(False) @log_func_call(logger) def find_groups(self) -> None: @@ -774,15 +772,15 @@ def find_groups(self) -> None: # Let's get ready to measure the slice separation above and below with respect to the # other ones. - alt_pad = self.prms['GROUPING_PRMS']['alt_pad_perc']/100 - m_lim = row['alt_min'] - alt_pad * row['thickness'] - p_lim = row['alt_max'] + alt_pad * row['thickness'] + height_pad = self.prms['GROUPING_PRMS']['height_pad_perc']/100 + m_lim = row['height_min'] - height_pad * row['thickness'] + p_lim = row['height_max'] + height_pad * row['thickness'] # For each other slice below and above, figure out if it is overlapping or not - seps_m = [m_lim < self.slices.iloc[item]['alt_max'] + - alt_pad * self.slices.iloc[item]['thickness'] for item in range(ind)] - seps_p = [p_lim > self.slices.iloc[item]['alt_min'] - - alt_pad * self.slices.iloc[item]['thickness'] + seps_m = [m_lim < self.slices.iloc[item]['height_max'] + + height_pad * self.slices.iloc[item]['thickness'] for item in range(ind)] + seps_p = [p_lim > self.slices.iloc[item]['height_min'] - + height_pad * self.slices.iloc[item]['thickness'] for item in range(ind+1, len(self.slices), 1)] # If the slice is isolated, I can stop here and move on ... @@ -819,29 +817,29 @@ def find_groups(self) -> None: valids = self.data['slice_id'].isin([self.slices.iloc[ind]['cluster_id'] for ind in grp]) - # Rescale these points in dt and alt prior to running the single-linkage clustering. + # Rescale these points in dt and height prior to running the single-linkage clustering. # dt scaling is provided by the user. - # alt scaling is derived from the smallest fluffiness of the slice bundle. - grp_alt_scale = self.slices.loc[grp, 'fluffiness'].min(skipna=True) - logger.debug('Bundle min. fluffiness: %.1f ft', grp_alt_scale) + # height scaling is derived from the smallest fluffiness of the slice bundle. + grp_height_scale = self.slices.loc[grp, 'fluffiness'].min(skipna=True) + logger.debug('Bundle min. fluffiness: %.1f ft', grp_height_scale) # ... and check against the allowed scaling range - grp_alt_scale = np.max([np.min(self.prms['GROUPING_PRMS']['alt_scale_range']), - grp_alt_scale]) - grp_alt_scale = np.min([np.max(self.prms['GROUPING_PRMS']['alt_scale_range']), - grp_alt_scale]) - logger.debug('Bundle alt. scale: %.1f ft', grp_alt_scale) + grp_height_scale = np.max([np.min(self.prms['GROUPING_PRMS']['height_scale_range']), + grp_height_scale]) + grp_height_scale = np.min([np.max(self.prms['GROUPING_PRMS']['height_scale_range']), + grp_height_scale]) + logger.debug('Bundle height scale: %.1f ft', grp_height_scale) # Ready to trigger the data rescaling tmp = self.data_rescaled(dt_mode='shift-and-scale', dt_kwargs={'scale': self.prms['GROUPING_PRMS']['dt_scale']}, - alt_mode='shift-and-scale', - alt_kwargs={'shift': 0, 'scale': grp_alt_scale}) + height_mode='shift-and-scale', + height_kwargs={'shift': 0, 'scale': grp_height_scale}) # What are the valid points ? - valids = tmp['alt'].notna() * valids + valids = tmp['height'].notna() * valids # Run the clustering nlabels, labels = cluster.clusterize( - tmp[['dt', 'alt']][valids].to_numpy(), algo='agglomerative', + tmp[['dt', 'height']][valids].to_numpy(), algo='agglomerative', **{'linkage': 'single', 'metric': 'euclidean', 'distance_threshold': 1}) # Based on the clustering, assign each element to a group. The group id is the slice_id @@ -857,7 +855,7 @@ def find_groups(self) -> None: # Now we need to separate layers if they are closer than the set # minimum separation self._merge_close_groups() - #Let's metarize the merged groups + # Let's metarize the merged groups self.metarize(which='groups') @log_func_call(logger) @@ -884,18 +882,17 @@ def find_layers(self) -> None: # Loop through every group, and look for sub-layers in it ... for ind in range(len(self.groups)): - # Let's extract the altitudes of all the hits in this group ... - gro_alts = self.data.loc[self.data.loc[:, 'group_id'] == - self._groups.at[ind, 'cluster_id'], - 'alt'].to_numpy() + # Let's extract the heights of all the hits in this group ... + gro_heights = self.data.loc[self.data.loc[:, 'group_id'] == + self._groups.at[ind, 'cluster_id'], 'height'].to_numpy() # Only look for multiple layers if it is worth it ... # 1) Layer density is large enough cond1 = self.groups.at[ind, 'okta'] < self.prms['LAYERING_PRMS']['min_okta_to_split'] # 2) I have more than 30 valid points (GMM is unstable below this amount). - cond2 = len(gro_alts[~np.isnan(gro_alts)]) < 30 - # 3) Not all the altitudes are the same - cond3 = len(np.unique(gro_alts[~np.isnan(gro_alts)])) == 1 + cond2 = len(gro_heights[~np.isnan(gro_heights)]) < 30 + # 3) Not all the heights are the same + cond3 = len(np.unique(gro_heights[~np.isnan(gro_heights)])) == 1 if cond1 or cond2 or cond3: # Add some useful info to the log logger.info( @@ -906,22 +903,22 @@ def find_layers(self) -> None: continue # Reshape the array in anticipation of the GMM routine ... - gro_alts = gro_alts.reshape(-1, 1) + gro_heights = gro_heights.reshape(-1, 1) - # Identify the minimum layer separation given the overall group base altitude - min_sep = self._get_min_sep_for_altitude(self.groups.at[ind, 'alt_base']) + # Identify the minimum layer separation given the overall group base height + min_sep = self._get_min_sep_for_height(self.groups.at[ind, 'height_base']) - # Handle #78: if the data is comprised of only two distinct altitudes, only look for + # Handle #78: if the data is comprised of only two distinct heights, only look for # up to 2 Gaussian components. Else, up to 3. - ncomp_max = np.min([len(np.unique(gro_alts[~np.isnan(gro_alts)])), 3]) + ncomp_max = np.min([len(np.unique(gro_heights[~np.isnan(gro_heights)])), 3]) logger.debug('Setting ncomp_max to: %i', ncomp_max) # And feed them to a Gaussian Mixture Model to figure out how many components it has ... ncomp, sub_layers_id, _ = layer.ncomp_from_gmm( - gro_alts, ncomp_max=ncomp_max, min_sep=min_sep, + gro_heights, ncomp_max=ncomp_max, min_sep=min_sep, layer_base_params={ 'lookback_perc': self.prms['BASE_LVL_LOOKBACK_PERC'], - 'alt_perc': self.prms['BASE_LVL_ALT_PERC'] + 'height_perc': self.prms['BASE_LVL_HEIGHT_PERC'] }, **self.prms['LAYERING_PRMS']['gmm_kwargs']) @@ -1043,9 +1040,10 @@ def metar_msg(self, which: str = 'layers') -> str: the resulting ``str`` ! See :py:func:`.icao.significant_cloud` for details. .. Caution:: - The Minimum Sector Altitude value set when the :py:class:`.CeiloChunk` instance **was - initialized** will be applied ! If in doubt, the value used by this method is that set - in the (parent) class attribute :py:attr:`.AbstractChunk.msa`. + The Minimum Sector Altitude values set when + the :py:class:`.CeiloChunk` instance **was initialized** will be applied ! If in doubt, + the values used by this method are those set in the (parent) class + attribute :py:attr:`.AbstractChunk.msa` """ @@ -1066,14 +1064,14 @@ def metar_msg(self, which: str = 'layers') -> str: # Deal with the situation where layers have been found ... msg = sligrolay['code'] # What layers are significant *AND* below the MSA ? - report = sligrolay['significant']*(sligrolay['alt_base'] < msa_val) + report = sligrolay['significant']*(sligrolay['height_base'] < msa_val) msg = sligrolay['code'][report] msg = ' '.join(msg.to_list()) # Here, deal with the situations when all clouds are above the MSA if len(msg) == 0: # first check if any significant clouds are in the interval [MSA, MSA+MSA_HIT_BUFFER] - sligrolay_in_buffer = sligrolay['significant'] * (sligrolay['alt_base'] >= msa_val) + sligrolay_in_buffer = sligrolay['significant'] * (sligrolay['height_base'] >= msa_val) if sligrolay_in_buffer.any(): return 'NSC' # and return a NSC as it implies that the cloud is above the MSA return self._ncd_or_nsc() # else, check for CBH above MSA + MSA_HIT_BUFFER diff --git a/src/ampycloud/fluffer.py b/src/ampycloud/fluffer.py index 0b5ea41..fa12230 100644 --- a/src/ampycloud/fluffer.py +++ b/src/ampycloud/fluffer.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -27,12 +27,13 @@ def get_fluffiness(pts, boost=2, **kwargs): """ Utility functions to compute the fluffiness of a set of ceilometer hits. Args: - pts (ndarray): 2D array of [dt, alt] ceilometer hits. None must have NaNs altitudes. + pts (ndarray): 2D array of [dt, height] ceilometer hits. None must have NaNs heights. boost (float): the fluffiness boost factor. Defaults to 2. **kwargs (optional): additional arguments to be fed to statsmodels.nonparameteric.lowess(). Returns: - float, ndarray: the fluffiness (in alt units) and LOWESS-smoothed (dt, alt) values (sorted). + float, ndarray: the fluffiness (in height units) and LOWESS-smoothed (dt, height) values + (sorted). The *fluffiness* is computed as `boost * mean(abs(y - lowess))`, where lowess is the smooth LOWESS fit to the ceilometer hits. diff --git a/src/ampycloud/hardcoded.py b/src/ampycloud/hardcoded.py index 4c4f5f1..2d652ad 100644 --- a/src/ampycloud/hardcoded.py +++ b/src/ampycloud/hardcoded.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2022-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -11,4 +11,4 @@ from pandas import StringDtype #: dict: the columns & associated types required for the pandas DataFrame fed to ampycloud. -REQ_DATA_COLS = {'ceilo': StringDtype(), 'dt': float, 'alt': float, 'type': int} +REQ_DATA_COLS = {'ceilo': StringDtype(), 'dt': float, 'height': float, 'type': int} diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index 7791629..465e29c 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-CLause BSD License. @@ -142,7 +142,7 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta', def ncomp_from_gmm(vals: np.ndarray, ncomp_max: int = 3, min_sep: Union[int, float] = 0, - layer_base_params: dict[str, int] = {'lookback_perc': 100, 'alt_perc': 5}, + layer_base_params: dict[str, int] = None, scores: str = 'BIC', rescale_0_to_x: float = None, random_seed: int = 42, @@ -180,6 +180,9 @@ def ncomp_from_gmm(vals: np.ndarray, ``_ """ + if layer_base_params is None: + layer_base_params = {'lookback_perc': 100, 'height_perc': 5} + # If I get a 1-D array, deal with it. if np.ndim(vals) == 1: vals = vals.reshape(-1, 1) @@ -244,12 +247,12 @@ def ncomp_from_gmm(vals: np.ndarray, return best_ncomp, best_ids, abics # If I found more than one component, let's make sure that they are sufficiently far apart. - # First, let's compute the component base altitude + # First, let's compute the component base height base_comp_heights = [ - utils.calc_base_alt( + utils.calc_base_height( vals_orig[best_ids == i].flatten(), layer_base_params['lookback_perc'], - layer_base_params['alt_perc'] + layer_base_params['height_perc'] ) for i in range(ncomp[best_model_ind]) ] diff --git a/src/ampycloud/plots/diagnostics.py b/src/ampycloud/plots/diagnostics.py index 6e65492..639fd9f 100644 --- a/src/ampycloud/plots/diagnostics.py +++ b/src/ampycloud/plots/diagnostics.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -123,7 +123,7 @@ def show_hits_only(self, show_ceilos: bool = False) -> None: self._axs[0].clear() # Add the points - self._axs[0].scatter(self._chunk.data['dt'], self._chunk.data['alt'], + self._axs[0].scatter(self._chunk.data['dt'], self._chunk.data['height'], marker='o', s=10, edgecolor=symb_clrs, facecolor=list(fcs)) @@ -161,7 +161,7 @@ def show_slices(self) -> None: # First plot anything that is not assigned to a slice self._axs[0].scatter(self._chunk.data[self._chunk.data['slice_id'] == -1]['dt'], - self._chunk.data[self._chunk.data['slice_id'] == -1]['alt'], + self._chunk.data[self._chunk.data['slice_id'] == -1]['height'], marker='s', s=15, facecolor=fcs[self._chunk.data['slice_id'] == -1], edgecolor='k') @@ -187,27 +187,27 @@ def show_slices(self) -> None: # I can finally show the points ... self._axs[0].scatter(self._chunk.data.loc[in_slice, 'dt'], - self._chunk.data.loc[in_slice, 'alt'], + self._chunk.data.loc[in_slice, 'height'], marker='o', s=10, c=fcs[in_slice], edgecolor=base_clr) # ... and the corresponding LOWESS-fit used to derive their fluffiness _, lowess_pts = fluffer.get_fluffiness( - self._chunk.data.loc[in_slice, ['dt', 'alt']].values, **self._chunk.prms['LOWESS']) + self._chunk.data.loc[in_slice, ['dt', 'height']].values, **self._chunk.prms['LOWESS']) self._axs[0].plot(lowess_pts[:, 0], lowess_pts[:, 1], ls='-', lw=1.5, c=base_clr, drawstyle='steps-mid', zorder=0) # Let's also plot the overlap area of the slice - slice_min = self._chunk.slices.loc[ind, 'alt_min'] - slice_max = self._chunk.slices.loc[ind, 'alt_max'] + slice_min = self._chunk.slices.loc[ind, 'height_min'] + slice_max = self._chunk.slices.loc[ind, 'height_max'] thickness = self._chunk.slices.loc[ind, 'thickness'] - alt_pad = self._chunk.prms['GROUPING_PRMS']['alt_pad_perc']/100 + height_pad = self._chunk.prms['GROUPING_PRMS']['height_pad_perc']/100 # Get some fake data spanning the entire data range misc = np.linspace(self._chunk.data['dt'].min(skipna=True), self._chunk.data['dt'].max(skipna=True), 3) self._axs[0].fill_between(misc, - np.ones_like(misc) * (slice_min - alt_pad * thickness), - np.ones_like(misc) * (slice_max + alt_pad * thickness), + np.ones_like(misc) * (slice_min - height_pad * thickness), + np.ones_like(misc) * (slice_max + height_pad * thickness), edgecolor='none', alpha=0.1, zorder=0, facecolor=base_clr) @@ -230,11 +230,11 @@ def show_slices(self) -> None: msg = r'\smaller ' msg += wmo.okta2symb( self._chunk.slices.iloc[ind]['okta'], - use_metsymb = dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + use_metsymb=dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') msg += ' ' + self._chunk.slices.iloc[ind]['code'] + \ rf' $f$:{self._chunk.slices.loc[ind, "fluffiness"]:.0f} ft' msg += warn - self._axs[1].text(0.5, self._chunk.slices.loc[ind, 'alt_base'], + self._axs[1].text(0.5, self._chunk.slices.loc[ind, 'height_base'], texify(msg), va='center', ha='center', color=base_clr, bbox={'facecolor': 'none', 'edgecolor': base_clr, @@ -265,7 +265,7 @@ def show_groups(self, show_points: bool = False) -> None: # I can finally show the points ... self._axs[0].scatter(self._chunk.data[in_group]['dt'], - self._chunk.data[in_group]['alt'], + self._chunk.data[in_group]['height'], marker=MRKS[ind % len(MRKS)], s=40, c='none', edgecolor='gray', lw=1, zorder=10, alpha=0.5) @@ -289,7 +289,7 @@ def show_groups(self, show_points: bool = False) -> None: self._chunk.groups.iloc[ind]['okta'], use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') ) + ' ' + self._chunk.groups.iloc[ind]['code'] + warn - self._axs[2].text(0.5, self._chunk.groups.iloc[ind]['alt_base'], + self._axs[2].text(0.5, self._chunk.groups.iloc[ind]['height_base'], texify(msg), va='center', ha='center', color='gray', bbox={'facecolor': 'none', 'edgecolor': 'gray', @@ -312,7 +312,7 @@ def show_layers(self) -> None: # I can finally show the points ... self._axs[0].scatter(self._chunk.data[in_layer]['dt'], - self._chunk.data[in_layer]['alt'], + self._chunk.data[in_layer]['height'], marker=MRKS[ind % len(MRKS)], s=40, c='none', edgecolor='k', lw=1, zorder=10, alpha=0.5) @@ -322,7 +322,7 @@ def show_layers(self) -> None: else: lls = '--' - self._axs[0].axhline(self._chunk.layers.iloc[ind]['alt_base'], xmax=1, c='k', + self._axs[0].axhline(self._chunk.layers.iloc[ind]['height_base'], xmax=1, c='k', lw=1, zorder=0, ls=lls, clip_on=False) # Stop here for empty layers @@ -341,7 +341,7 @@ def show_layers(self) -> None: self._chunk.layers.iloc[ind]['okta'], use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') ) + ' ' + self._chunk.layers.iloc[ind]['code'] - self._axs[3].text(0.5, self._chunk.layers.iloc[ind]['alt_base'], + self._axs[3].text(0.5, self._chunk.layers.iloc[ind]['height_base'], texify(msg), va='center', ha='center', color='k', bbox={'facecolor': 'none', 'edgecolor': 'k', 'alpha': alpha}) @@ -410,7 +410,7 @@ def add_metar(self) -> None: msg = r'\smaller \bf ampycloud: ' + self._chunk.metar_msg() if self._chunk.msa is not None: - msg += '\n' + rf'\smaller\smaller MSA: {self._chunk.msa} ft' + msg += '\n' + rf'\smaller\smaller MSA: {self._chunk.msa} ft aal' # Show the msg ... self._axs[2].text(0.5, 1.25, texify(msg), @@ -424,7 +424,7 @@ def format_primary_axes(self) -> None: # Add the axes labels self._axs[0].set_xlabel(r'$\Delta t$ [s]', labelpad=10) - self._axs[0].set_ylabel(r'Alt. [ft]', labelpad=10) + self._axs[0].set_ylabel(r'Height [ft aal]', labelpad=10) def format_slice_axes(self) -> None: """ Format the duplicate axes related to the slicing part. @@ -442,10 +442,10 @@ def format_slice_axes(self) -> None: 'shift-and-scale', {'scale': self._chunk.prms['SLICING_PRMS']['dt_scale']}) - (alt_scale_kwargs, alt_descale_kwargs) = \ - get_scaling_kwargs(self._chunk.data['alt'].values, - self._chunk.prms['SLICING_PRMS']['alt_scale_mode'], - self._chunk.prms['SLICING_PRMS']['alt_scale_kwargs']) + (height_scale_kwargs, height_descale_kwargs) = \ + get_scaling_kwargs(self._chunk.data['height'].values, + self._chunk.prms['SLICING_PRMS']['height_scale_mode'], + self._chunk.prms['SLICING_PRMS']['height_scale_kwargs']) # Then add the secondary axis, using partial function to define the back-and-forth # conversion functions. @@ -458,15 +458,15 @@ def format_slice_axes(self) -> None: secax_y = self._axs[0].secondary_yaxis( 1.03, functions=(partial(scaler.apply_scaling, - fct=self._chunk.prms['SLICING_PRMS']['alt_scale_mode'], - **alt_scale_kwargs), + fct=self._chunk.prms['SLICING_PRMS']['height_scale_mode'], + **height_scale_kwargs), partial(scaler.apply_scaling, - fct=self._chunk.prms['SLICING_PRMS']['alt_scale_mode'], - **alt_descale_kwargs))) + fct=self._chunk.prms['SLICING_PRMS']['height_scale_mode'], + **height_descale_kwargs))) # Add the axis labels secax_x.set_xlabel(texify(r'\smaller Slicing $\Delta t$')) - secax_y.set_ylabel(texify(r'\smaller Slicing Alt.')) + secax_y.set_ylabel(texify(r'\smaller Slicing height')) # And reduce the fontsize while we're at it ... secax_x.tick_params(axis='x', which='both', labelsize=rcParams['font.size']-2) @@ -475,7 +475,7 @@ def format_slice_axes(self) -> None: def format_group_axes(self) -> None: """ Format the duplicate axes related to the grouping part. - TODO: add secondary axis for the altitude rescaling as well. See #91. + TODO: add secondary axis for the height rescaling as well. See #91. """ diff --git a/src/ampycloud/plots/secondary.py b/src/ampycloud/plots/secondary.py index 8325863..9030f3f 100644 --- a/src/ampycloud/plots/secondary.py +++ b/src/ampycloud/plots/secondary.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -38,7 +38,7 @@ def scaling_fcts(show: bool = True, save_fmts (list|str, optional): a list of file formats to export the plot to. Defaults to None = ['png']. - This is a small utility routine to rapidly see the different altitude scaling options used by + This is a small utility routine to rapidly see the different height scaling options used by ampycloud. For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS. @@ -65,17 +65,17 @@ def scaling_fcts(show: bool = True, ax3 = fig.add_subplot(fig_gs[0, 2]) # Plot the different scaling laws - alts = np.arange(0, 25000, 10) + heights = np.arange(0, 25000, 10) # Plot the slicing scale - ax1.plot(alts, apply_scaling(alts, fct='shift-and-scale', **{'scale': 1000}), c='k', lw=2) + ax1.plot(heights, apply_scaling(heights, fct='shift-and-scale', **{'scale': 1000}), c='k', lw=2) ax1.set_title(texify(r'\smaller shift-and-scale')) - ax2.plot(alts, apply_scaling(alts, fct='minmax-scale'), c='k', lw=2) + ax2.plot(heights, apply_scaling(heights, fct='minmax-scale'), c='k', lw=2) ax2.set_title(texify(r'\smaller minmax-scale')) - ax3.plot(alts, apply_scaling( - alts, fct='step-scale', + ax3.plot(heights, apply_scaling( + heights, fct='step-scale', **{'steps': [3000, 8000], 'scales': [100, 500, 1000]}), c='k', lw=2) ax3.set_title(texify(r'\smaller step-scale')) diff --git a/src/ampycloud/prms/ampycloud_default_prms.yml b/src/ampycloud/prms/ampycloud_default_prms.yml index 90d8c00..7ce6661 100644 --- a/src/ampycloud/prms/ampycloud_default_prms.yml +++ b/src/ampycloud/prms/ampycloud_default_prms.yml @@ -13,8 +13,8 @@ # Plotting style. Can be one of 'base', 'latex', 'metsymb' MPL_STYLE: 'base' -# Minimum Sector Altitude, in ft. No cloud layers with a base above this value will be reported -# in the ampycloud METAR messages. Set it to null for no limit. +# Minimum Sector Altitude, in ft above aerodrome level. No cloud layers with a base above this value +# will be reported in the ampycloud METAR messages. Set it to null for no limit. MSA: null # Additional distance above the MSA value (in ft) beyond which hits will be ignored (=cropped) by @@ -29,32 +29,32 @@ MAX_HITS_OKTA0: 3 MAX_HOLES_OKTA8: 1 # Number (in %) of the cloud hit heights (sorted from smallest to highest) to ignore when -# computing the cloud altitude base, taken to be the min altitude of the +# computing the cloud base height, taken to be the min height of the # 100-LAYER_BASE_LVL_PERC % remaining points. -BASE_LVL_ALT_PERC: 5 +BASE_LVL_HEIGHT_PERC: 5 # Number of Slice/Group/Layer points (in %) - starting from the most recent ones - -# to use to compute the cloud base altitude. 30 would use only the 30% youngest hits. +# to use to compute the cloud base height. 30 would use only the 30% youngest hits. # Set to 100 to use all the points. BASE_LVL_LOOKBACK_PERC: 100 -# LOWESS parameters used to fit Slice/Groups/Layers altitude trends +# LOWESS parameters used to fit Slice/Groups/Layers height trends LOWESS: # Fraction of the slice/group/layer points to consider when deriving the LOWESS smooth fit. # Values too small will result in overfitting. Values too large will miss rapid - # altitude fluctuations. + # height fluctuations. frac: 0.35 # Number of LOWESS fitting iterations it: 3 -# Minimum separation (in ft) between the base altitudes of identified layers inside groups -# with a base altitude in the bins set by min_sep_lims. Any layers separated by less than this +# Minimum separation (in ft) between the base heights of identified layers inside groups +# with a base height in the bins set by min_sep_lims. Any layers separated by less than this # value will be remerged into one layer. Any value < than 5 times the vertical resolution of the data will # raise a warning, as it may lead to an over-layering of groups. For the ampycloud METAR # messages to be consistent with the ICAO rules of layer selection, this should be >=100 ft # below 10'000 ft. MIN_SEP_VALS: [250, 1000] -# Altitude threshold (in ft) separating the min_sep_val elements. +# Height threshold (in ft) separating the min_sep_val elements. # The 0 and + infty extrema will be automatically added to the list by ampycloud. # Just list here the intermediate steps. MIN_SEP_LIMS: [10000] @@ -65,24 +65,24 @@ SLICING_PRMS: distance_threshold: 0.2 # Time scaling factor dt_scale: 100000 - # Altitude scaling mode - alt_scale_mode: minmax-scale - # Altitude scaling parameters (in ft) - alt_scale_kwargs: + # Height scaling mode + height_scale_mode: minmax-scale + # Height scaling parameters (in ft) + height_scale_kwargs: min_range: 1000 # Clustering parameters GROUPING_PRMS: - # Padding (in % of the slice altitude range) to add above and below slices + # Padding (in % of the slice height range) to add above and below slices # before looking for overlaps - alt_pad_perc: +10 + height_pad_perc: +10 # Time scaling factor (in s) dt_scale: 180 - # Boost factor, applied to the slice fluffiness to derive the altitude scaling factor + # Boost factor, applied to the slice fluffiness to derive the height scaling factor fluffiness_boost: 2 - # Range of allowed altitude scaling factor (in ft). The fluffiness_boost will never be allowed - # to push the alt_scaling beyond this range. - alt_scale_range: [100, 500] + # Range of allowed height scaling factor (in ft). The fluffiness_boost will never be allowed + # to push the height_scaling beyond this range. + height_scale_range: [100, 500] # Layering parameters LAYERING_PRMS: diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 50c05fd..a85e242 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -25,37 +25,38 @@ @log_func_call(logger) -def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) -> pd.DataFrame: - """ Generates a mock, flat, Gaussian cloud layer around a given altitude. +def flat_layer(dts: np.array, height: float, height_std: float, + sky_cov_frac: float) -> pd.DataFrame: + """ Generates a mock, flat, Gaussian cloud layer around a given height. Args: dts (np.array of float): time deltas, in s, for the simulated ceilometer hits. - alt (float): layer mean altitude, in ft. - alt_std (float): layer altitude standard deviation, in ft. + height (float): layer mean height, in ft above aerodrome level (aal). + height_std (float): layer height standard deviation, in ft. sky_cov_frac (float): Sky coverage fraction. Random hits will be set to NaN to reach this value. Must be 0 <= x <= 1. Returns: - :py:class:`pandas.DataFrame`: the simulated layer with columns ['dt', 'alt']. + :py:class:`pandas.DataFrame`: the simulated layer with columns ['dt', 'height']. """ # How many points do I need to generate ? n_pts = len(dts) # Create the storage structure - out = pd.DataFrame(columns=['dt', 'alt'], dtype=float) + out = pd.DataFrame(columns=['dt', 'height'], dtype=float) - # Generate the random altitude data - out['alt'] = np.random.normal(loc=alt, scale=alt_std, size=n_pts) - # Cleanup any negative altitudes, if warranted. - out.loc[out['alt'] <= 0, 'alt'] = np.nan + # Generate the random height data + out['height'] = np.random.normal(loc=height, scale=height_std, size=n_pts) + # Cleanup any negative heights, if warranted. + out.loc[out['height'] <= 0, 'height'] = np.nan out['dt'] = dts # Empty hits to get the requested sky coverage fraction # First extract the hits I want to keep ... to_keep = out.sample(frac=sky_cov_frac) - # ... then get rid of the alt values everywhere ... - out.loc[:, 'alt'] = np.nan + # ... then get rid of the height values everywhere ... + out.loc[:, 'height'] = np.nan # ... and re-set the values I choose to keep. out.loc[to_keep.index] = to_keep @@ -63,28 +64,29 @@ def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) - @log_func_call(logger) -def sin_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float, +def sin_layer(dts: np.array, height: float, height_std: float, sky_cov_frac: float, period: Union[int, float], amplitude: Union[int, float]) -> pd.DataFrame: """ Generates a sinusoidal cloud layer. Args: dts (np.array of float): time deltas, in s, for the simulated ceilometer hits. - alt (float): layer mean altitude, in ft. - alt_std (float): layer altitude standard deviation, in ft. + height (float): layer mean heaight, in ft above aerodrome level (aal). + height_std (float): layer height standard deviation, in ft. sky_cov_frac (float, optional): Sky coverage fraction. Random hits will be set to NaN to reach this value. Must be 0 <= x <= 1. period (int|float): period of the sine-wave, in s. amplitude (int|float): amplitude of the sine-wave, in ft. Returns: - :py:class:`pandas.DataFrame`: the simulated layer with columns ['alt', 'dt']. + :py:class:`pandas.DataFrame`: the simulated layer with columns ['height', 'dt']. """ # First, get a flat layer - out = flat_layer(dts, alt, alt_std, sky_cov_frac) + out = flat_layer(dts, height, height_std, sky_cov_frac) # And add to it a sinusoidal fluctuations. Note that nan should stay nan. - out.loc[:, 'alt'] = out.loc[:, 'alt'] + np.sin(-np.pi/2 + out['dt']/period*2*np.pi) * amplitude + out.loc[:, 'height'] = out.loc[:, 'height'] + \ + np.sin(-np.pi/2 + out['dt']/period*2*np.pi) * amplitude return out @@ -103,13 +105,13 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, from ``lookback_time`` and ``hit_gap``): :: - {'alt':1000, 'alt_std': 100, 'sky_cov_frac': 1, + {'height':1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0} Returns: :py:class:`pandas.DataFrame`: a pandas DataFrame with the mock data, ready to be fed to - ampycloud. Columns ['ceilo', 'dt', 'alt', 'type'] correspond to 1) ceilo names, 2) time - deltas in s, 3) hit altitudes in ft, and 4) hit type. + ampycloud. Columns ['ceilo', 'dt', 'height', 'type'] correspond to 1) ceilo names, 2) time + deltas in s, 3) hit heights in ft aal, and 4) hit type. TODO: - add the possibility to set some VV hits in the mix @@ -124,10 +126,10 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, if not isinstance(item, dict): raise AmpycloudError(f'Element {ind} from layer_prms should be a dict,' + f' not: {type(item)}') - if not all(key in item.keys() for key in ['alt', 'alt_std', 'sky_cov_frac', + if not all(key in item.keys() for key in ['height', 'height_std', 'sky_cov_frac', 'period', 'amplitude']): raise AmpycloudError('One or more of the following dict keys are missing in ' + - f"layer_prms[{ind}]: 'alt', 'alt_std', 'sky_cov_frac'," + + f"layer_prms[{ind}]: 'height', 'height_std', 'sky_cov_frac'," + "'period', 'amplitude'.") # Let's create the layers individually for each ceilometer @@ -149,21 +151,21 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, # Here, adjust the types so that it ranks lowest to highest for every dt step. # This needs to be done on a point by point basis, given that layers can cross each other. for dt in np.unique(layers['dt']): - # Get the hit altitudes, and sort them from lowest to highest - alts = layers[layers['dt'] == dt]['alt'].sort_values(axis=0) + # Get the hit heights, and sort them from lowest to highest + heights = layers[layers['dt'] == dt]['height'].sort_values(axis=0) # Then deal with the other ones - for (a, alt) in enumerate(alts): + for (a, height) in enumerate(heights): # Except for the first one, any NaN hit gets dropped - if a > 0 and np.isnan(alt): - layers.drop(index=alts.index[a], + if a > 0 and np.isnan(height): + layers.drop(index=heights.index[a], inplace=True) - elif np.isnan(alt): + elif np.isnan(height): # A non-detection should be type 0 - layers.loc[alts.index[a], 'type'] = 0 + layers.loc[heights.index[a], 'type'] = 0 else: - layers.loc[alts.index[a], 'type'] = a+1 + layers.loc[heights.index[a], 'type'] = a+1 # Add the ceilo info as an int layers['ceilo'] = str(ceilo) @@ -174,7 +176,7 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, # Merge it all out = pd.concat(ceilos) # Sort the timesteps in order, and reset the index - out = out.sort_values(['dt', 'alt']).reset_index(drop=True) + out = out.sort_values(['dt', 'height']).reset_index(drop=True) # Fix the dtypes for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -197,9 +199,10 @@ def canonical_demo_data() -> pd.DataFrame: lookback_time = 900 hit_gap = 15 - lyrs = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 0.1, 'period': 10, 'amplitude': 0}, - {'alt': 2000, 'alt_std': 100, 'sky_cov_frac': 0.5, 'period': 10, 'amplitude': 0}, - {'alt': 5000, 'alt_std': 200, 'sky_cov_frac': 1, 'period': 1800, 'amplitude': 1000}, + lyrs = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 0.1, 'period': 10, 'amplitude': 0}, + {'height': 2000, 'height_std': 100, 'sky_cov_frac': 0.5, 'period': 10, 'amplitude': 0}, + {'height': 5000, 'height_std': 200, 'sky_cov_frac': 1, 'period': 1800, + 'amplitude': 1000}, ] # Reset the random seed, but only do this temporarily, so as to not mess things up for the user. diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index 072b78c..371384b 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2022-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -50,7 +50,7 @@ def check_data_consistency(pdf: pd.DataFrame, column names/types (formally defined in :py:data:`ampycloud.hardcoded.REQ_DATA_COLS`): :: - 'ceilo'/pd.StringDtype(), 'dt'/float, 'alt'/float, 'type'/int + 'ceilo'/pd.StringDtype(), 'dt'/float, 'height'/float, 'type'/int The ``ceilo`` column contains the names/ids of the ceilometers as ``pd.StringDtype()``. See the pandas @@ -62,8 +62,8 @@ def check_data_consistency(pdf: pd.DataFrame, time of the METAR message, such that ``dt`` values are negative, with the smallest one corresponding to the oldest measurement. - The ``alt`` column contains the cloud base hit altitudes reported by the ceilometers, in ft - above ground. + The ``height`` column contains the cloud base hit heights reported by the ceilometers, in ft + above aerodrome level. The ``type`` column contains integers that correspond to the hit *sequence id*. If a given ceilometer is reporting multiple hits for a given timestep (corresponding to a cloud level 1, @@ -71,11 +71,11 @@ def check_data_consistency(pdf: pd.DataFrame, ``2``, ``3``, etc ... Any data point with a ``type`` of ``-1`` will be flagged in the ampycloud plots as a vertical Visibility (VV) hit, **but it will not be treated any differently than any other regular hit**. Type ``0`` corresponds to no (cloud) detection, in which case the - corresponding hit altitude should be a NaN. + corresponding hit height should be a NaN. Important: A **non-detection** corresponds to a valid measurement with a ``dt`` value, a ``type 0``, - and ``NaN`` as the altitude. It should not be confused with a **non-observation**, + and ``NaN`` as the height. It should not be confused with a **non-observation**, when no data was acquired at all ! If it all sounds confusing, it is possible to obtain an example of the required data format @@ -102,9 +102,9 @@ def check_data_consistency(pdf: pd.DataFrame, * any of ``pdf`` column type is not as expected. Note that in this case, the code will try to correct the type on the fly. * ``pdf`` has any superfluous columns. In this case, the code will drop them automatically. - * Any hit altitude is negative. - * Any ``type 0`` hit has a non-NaN altitude. - * Any ``type 1`` hit has a NaN altitude. + * Any hit height is negative. + * Any ``type 0`` hit has a non-NaN height. + * Any ``type 1`` hit has a NaN height. * Any ``type 2`` hit does not have a coincident ``type 1`` hit. * Any ``type 3`` hit does not have a coincident ``type 2`` hit. @@ -150,15 +150,15 @@ def check_data_consistency(pdf: pd.DataFrame, logger.warning('Dropping the superfluous %s column from the input data.', key) data.drop((key), axis=1, inplace=True) - # A brief sanity check of the altitudes. We do not issue Errors, since the code can cope + # A brief sanity check of the heights. We do not issue Errors, since the code can cope # with those elements: we simply raise Warnings. msgs = [] - if np.any(data.loc[:, 'alt'].values < 0): - msgs += ['Some hit altitudes are negative ?!'] - if not np.all(np.isnan(data.loc[data.type == 0, 'alt'])): - msgs += ['Some type=0 hits have non-NaNs altitude values ?!'] - if np.any(np.isnan(data.loc[data.type == 1, 'alt'])): - msgs += ['Some type=1 hits have NaNs altitude values ?!'] + if np.any(data.loc[:, 'height'].values < 0): + msgs += ['Some hit heights are negative ?!'] + if not np.all(np.isnan(data.loc[data.type == 0, 'height'])): + msgs += ['Some type=0 hits have non-NaNs height values ?!'] + if np.any(np.isnan(data.loc[data.type == 1, 'height'])): + msgs += ['Some type=1 hits have NaNs height values ?!'] if not np.all(np.in1d(data.loc[data.type == 2, 'dt'].values, data.loc[data.type == 1, 'dt'].values)): msgs += ['Some type=2 hits have no coincident type=1 hits ?!'] @@ -241,23 +241,22 @@ def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: list = None) -> dic return ref_dict -def calc_base_alt( - vals: npt.ArrayLike, - lookback_perc: int, - alt_perc: int, - ) -> float: - """Calculate the layer base altitude. +def calc_base_height(vals: npt.ArrayLike, + lookback_perc: int, + height_perc: int, + ) -> float: + """Calculate the layer base height. Args: vals (npt.ArrayLike): Ceilometer hits of a given layer. Must be a flat array/ Series of scalars and ordered in time, most recent entries last. lookback_perc (int): Percentage of points to take into account. 100% would correspond to all points, 50% to the recent half, etc. - alt_perc (int): Percentage of points that should be neglected when calculating + height_perc (int): Percentage of points that should be neglected when calculating the base height. Base height will be the minimum of the remaining points. Returns: - float: The layer base altitude. + float: The layer base height. Raises: AmpycloudError: Raised if the array passed to the n_largest percentile calculation @@ -268,6 +267,6 @@ def calc_base_alt( if len(n_latest_elements) == 0: raise AmpycloudError( 'Cloud base calculation got an empty array.' - 'Maybe check lookback percentage (is set to %i)' %lookback_perc + f'Maybe check lookback percentage (is set to {lookback_perc})' ) - return np.percentile(n_latest_elements, alt_perc) + return np.percentile(n_latest_elements, height_perc) diff --git a/src/ampycloud/wmo.py b/src/ampycloud/wmo.py index d5a7597..57b71f2 100644 --- a/src/ampycloud/wmo.py +++ b/src/ampycloud/wmo.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -165,12 +165,12 @@ def okta2symb(val: int, use_metsymb: bool = False) -> str: @log_func_call(logger) -def alt2code(val: Union[int, float]) -> str: - """ Function that converts a given altitude in hundreds of ft (3 digit number), +def height2code(val: Union[int, float]) -> str: + """ Function that converts a given height in hundreds of ft (3 digit number), e.g. 5000 ft -> 050, 500 ft -> 005. Args: - val (int, float): the altitude to convert, in feet. + val (int, float): the height to convert, in feet. Returns: str: the corresponding METAR code chunk diff --git a/test/ampycloud/plots/test_core.py b/test/ampycloud/plots/test_core.py index c43a186..a39ed15 100644 --- a/test/ampycloud/plots/test_core.py +++ b/test/ampycloud/plots/test_core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -34,10 +34,10 @@ def test_large_dts(mpls): # Let's create some data with only NaNs data = pd.DataFrame([['1', 7e5, 1000, 1], ['1', 7e5+1, 1001, 1], ['1', 7e5+2, 1002, 1]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) data['ceilo'] = data['ceilo'].astype(pd.StringDtype()) data['dt'] = data['dt'].astype(float) - data['alt'] = data['alt'].astype(float) + data['height'] = data['height'].astype(float) data['type'] = data['type'].astype(int) # Run ampycloud @@ -68,10 +68,10 @@ def test_direct_prms(mpls): # Let's create some data with only NaNs data = pd.DataFrame([['1', 0, 1000, 1], ['1', 60, 1001, 1], ['1', 120, 1002, 1]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) data['ceilo'] = data['ceilo'].astype(pd.StringDtype()) data['dt'] = data['dt'].astype(float) - data['alt'] = data['alt'].astype(float) + data['height'] = data['height'].astype(float) data['type'] = data['type'].astype(int) # Run ampycloud @@ -137,10 +137,10 @@ def test_empty_plot(mpls): # Let's create some data with only NaNs data = pd.DataFrame([['1', -100, np.nan, 0], ['1', -99, np.nan, 0]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) data['ceilo'] = data['ceilo'].astype(pd.StringDtype()) data['dt'] = data['dt'].astype(float) - data['alt'] = data['alt'].astype(float) + data['height'] = data['height'].astype(float) data['type'] = data['type'].astype(int) # Run ampycloud diff --git a/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv b/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv index 100eeec..df8d761 100644 --- a/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv +++ b/test/ampycloud/ref_data/Geneva_2018.04.30-18.17.00_SCT100-BKN120.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final05,-897.0,11310.0,1 Final05,-897.0,12270.0,2 Final05,-882.0,11280.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv b/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv index dbca8c4..1181f72 100644 --- a/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv +++ b/test/ampycloud/ref_data/Geneva_2019.01.10-04.45.34_FEW040-BKN070.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-1189.0,7110.0,1 PO,-1174.0,7140.0,1 PO,-1159.0,7170.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv b/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv index 00209b8..ec10b06 100644 --- a/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv +++ b/test/ampycloud/ref_data/Geneva_2019.01.10-04.50.00_SCT040-BKN070.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-889.0,7200.0,1 PO,-874.0,7200.0,1 PO,-859.0,7200.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv b/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv index 82a1912..94f7ba0 100644 --- a/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv +++ b/test/ampycloud/ref_data/Geneva_2020.10.22.-21.15.00_SCT004-SCT007.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final05,-886.0,570.0,1 Final05,-871.0,570.0,1 Final05,-856.0,570.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv b/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv index 15d7ff6..67d7bb6 100644 --- a/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv +++ b/test/ampycloud/ref_data/Geneva_2020.12.29-04.50.00_FEW044-BKN055.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final23,-889.0,6570.0,1 Final23,-874.0,6540.0,1 Final23,-859.0,6600.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv b/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv index 059b5d1..99f97a2 100644 --- a/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv +++ b/test/ampycloud/ref_data/Geneva_2021.05.01-03.50.00_SCT004-SCT013.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-889.0,480.0,1 PO,-874.0,480.0,1 PO,-859.0,480.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv b/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv index d9c457e..a5d2848 100644 --- a/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv +++ b/test/ampycloud/ref_data/Geneva_2021.05.23-04.20.00_FEW048.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-897.0,,0 PO,-882.0,,0 PO,-867.0,,0 diff --git a/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv b/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv index c179467..eb04a67 100644 --- a/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv +++ b/test/ampycloud/ref_data/Geneva_2021.09.09-07.50.00_FEW089_MSA10000.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-1200.0,11880.0,1 PO,-1185.0,11670.0,1 PO,-1170.0,11400.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv b/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv index 0db0e0a..26ef5c6 100644 --- a/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv +++ b/test/ampycloud/ref_data/Geneva_2021.09.19-11.50.00_OVC032.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final23,-896.0,3330.0,1 Final23,-881.0,3330.0,1 Final23,-866.0,3390.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv b/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv index 2a0b216..6e18ec8 100644 --- a/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv +++ b/test/ampycloud/ref_data/Geneva_2021.10.05-03.50.00_BKN014-SCT029.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type RWY05,-898.0,3120.0,1 RWY05,-883.0,3120.0,1 RWY05,-868.0,3120.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv b/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv index 9f9531f..8a56458 100644 --- a/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv +++ b/test/ampycloud/ref_data/Geneva_2021.10.06-19.20.00_FEW066-SCT073_MSA10000.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final05,-242.0,,0 Final05,-227.0,,0 Final05,-92.0,,0 diff --git a/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv b/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv index 3f63d4f..fbf9016 100644 --- a/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv +++ b/test/ampycloud/ref_data/Geneva_2021.11.27-16.50.00_FEW037-SCT059.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type PO,-900.0,5850.0,1 PO,-885.0,5970.0,1 PO,-870.0,5820.0,1 diff --git a/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv b/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv index 5ebd888..12bd4ac 100644 --- a/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv +++ b/test/ampycloud/ref_data/Geneva_2021.12.15-06.50.00_OVC001.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type RWY05,-891.0,330.0,-1 RWY05,-876.0,330.0,-1 RWY05,-861.0,330.0,-1 diff --git a/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv b/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv index ee02838..b73d3d7 100644 --- a/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv +++ b/test/ampycloud/ref_data/Geneva_2023.07.03-09.35.30_SCT044.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type CL,1688369725.213,4620.0,1 CL,1688369710.293,4620.0,1 CL,1688369695.173,4620.0,1 diff --git a/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv b/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv index 841375a..636e647 100644 --- a/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv +++ b/test/ampycloud/ref_data/Kloten_2020.12.24-01.20.00_FEW018-BKN051.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type Final_14,-891.0,3540.0,1 Final_14,-876.0,1980.0,1 Final_14,-861.0,5430.0,1 diff --git a/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv b/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv index 90809f0..0bf567f 100644 --- a/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv +++ b/test/ampycloud/ref_data/debug107_2020.01.01-00.01.01_SCT024-SCT031.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type 3,-1181.5671950570443,2672.561745382283,1 0,-1181.4743327263454,2740.7140169791196,1 1,-1172.3053207784762,,0 diff --git a/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv b/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv index 6e72ebf..62bdf55 100644 --- a/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv +++ b/test/ampycloud/ref_data/debug107_2020.01.01-00.16.01_BKN010-BKN025.csv @@ -1,4 +1,4 @@ -ceilo,dt,alt,type +ceilo,dt,height,type 0,-1197.5049952437246,3262.4809940488567,1 0,-1197.5049952437246,4483.394304396294,2 2,-1197.1492609319444,3073.9448243991233,1 diff --git a/test/ampycloud/test_core.py b/test/ampycloud/test_core.py index 1908d61..a703a20 100644 --- a/test/ampycloud/test_core.py +++ b/test/ampycloud/test_core.py @@ -45,15 +45,24 @@ def test_reset_prms(): """ Test the reset_prms routine. """ # First, let's change one of the dynamic parameter - ref_val = dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] + ref_val0 = dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] + ref_val8 = dynamic.AMPYCLOUD_PRMS['MAX_HOLES_OKTA8'] + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = -1 assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == -1 - assert dynamic.AMPYCLOUD_PRMS['SLICING_PRMS']['alt_scale_mode'] == 'minmax-scale' - # Then try to reset it + # Try to reset this prm specifically + reset_prms(which='MAX_HITS_OKTA0') + assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == ref_val0 + assert dynamic.AMPYCLOUD_PRMS['SLICING_PRMS']['height_scale_mode'] == 'minmax-scale' + + # Then try to reset all of them at once + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = -1 + dynamic.AMPYCLOUD_PRMS['MAX_HOLES_OKTA8'] = -1 reset_prms() - assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == ref_val + assert dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] == ref_val0 + assert dynamic.AMPYCLOUD_PRMS['MAX_HOLES_OKTA8'] == ref_val8 def test_run(): @@ -67,9 +76,9 @@ def test_run(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 0.5, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}, - {'alt': 2000, 'alt_std': 10, 'sky_cov_frac': 0.5, + {'height': 2000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}]) out = run(mock_data) @@ -99,7 +108,7 @@ def test_run_single_point(): # Let's create some data with a single valid point data = pd.DataFrame([['1', -100, 2000, 1], ['1', -99, np.nan, 0]], - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 0cbe18f..0478b59 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -31,7 +31,7 @@ def test_ceilochunk_init(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 1, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}]) # The following line is required as long as the mocker module issues mock data with type 99. mock_data.iloc[-1, mock_data.columns.get_loc('type')] = 1 @@ -53,7 +53,7 @@ def test_ceilochunk_init(): # And now again, but this time with a large hit buffer that coverts all the data dynamic.AMPYCLOUD_PRMS['MSA'] = 0 - dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = mock_data['alt'].max() + 10 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = mock_data['height'].max() + 10 chunk = CeiloChunk(mock_data) assert len(chunk.data) == len(mock_data) @@ -67,9 +67,9 @@ def test_ceilochunk_init(): assert dynamic.AMPYCLOUD_PRMS['MSA'] == 0 # Check the ability to set nested parameters in one go - chunk = CeiloChunk(mock_data, prms={'GROUPING_PRMS': {'alt_pad_perc': 'test'}}) - assert chunk.prms['GROUPING_PRMS']['alt_pad_perc'] == 'test' - assert dynamic.AMPYCLOUD_PRMS['GROUPING_PRMS']['alt_pad_perc'] == +10 + chunk = CeiloChunk(mock_data, prms={'GROUPING_PRMS': {'height_pad_perc': 'test'}}) + assert chunk.prms['GROUPING_PRMS']['height_pad_perc'] == 'test' + assert dynamic.AMPYCLOUD_PRMS['GROUPING_PRMS']['height_pad_perc'] == +10 # Check that warnings are raised in case bad parameters are given with warns(AmpycloudWarning): @@ -117,7 +117,7 @@ def test_ceilochunk_basic(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 1, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -166,7 +166,7 @@ def test_ceilochunk_basic(): assert chunk.metar_msg(which='groups') == 'OVC009' -@mark.parametrize('altitude1,altitude2,altitude3,ngroups_expected', [ +@mark.parametrize('height1,height2,height3,ngroups_expected', [ param(250., 800., 1200., 3, id='gt min sep'), param(250., 500., 1200., 3, id='eq min sep'), param(250., 350., 1200., 2, id='gt min sep'), @@ -174,16 +174,16 @@ def test_ceilochunk_basic(): param(250., 350., 400., 1, id='multiple overlaps with second merge'), ]) def test_group_separation( - altitude1: float, altitude2: float, altitude3: float, ngroups_expected: int + height1: float, height2: float, height3: float, ngroups_expected: int ): """Test if the separation of close groups works as intended.""" - #create some fake data: - fake_hits = [altitude1] * 50 + [altitude2] * 50 + [altitude3] * 50 + # create some fake data: + fake_hits = [height1] * 50 + [height2] * 50 + [height3] * 50 fake_data = pd.DataFrame({ 'ceilo': ['Ceilometer.PO'] * 150, 'dt': [t for t in range(-1500, 0, 10)], - 'alt': fake_hits, + 'height': fake_hits, 'type': [1] * 150, }) fake_grp_ids = [1] * 50 + [2] * 50 + [3] * 50 @@ -214,7 +214,7 @@ def test_bad_layer_sep_lims(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 1, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -240,7 +240,7 @@ def test_ceilochunk_nocld(): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 0, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 0, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -299,9 +299,9 @@ def test_ceilochunk_2lay(): # Create some fake data to get started mock_data = mocker.mock_layers(n_ceilos, lookback_time, rate, - [{'alt': 1000, 'alt_std': 10, 'sky_cov_frac': 0.5, + [{'height': 1000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}, - {'alt': 2000, 'alt_std': 10, 'sky_cov_frac': 0.5, + {'height': 2000, 'height_std': 10, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}]) # Instantiate a CeiloChunk entity ... @@ -323,7 +323,7 @@ def test_layering_singlepts(): ['dummy', -1, 4000, 2], ['dummy', -1, 4500, 3], ['dummy', -1, np.nan, 0]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -344,12 +344,12 @@ def test_layering_singlepts(): def test_layering_singleval(): - """ Test the layering step when there is a single altitude value. See #76 for details. """ + """ Test the layering step when there is a single height value. See #76 for details. """ data = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*170000, np.ones(30)]) mock_data = pd.DataFrame(data.T, - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -373,7 +373,7 @@ def test_coplanar_hull(): data = np.array([np.ones(10), np.arange(0, 10, 1), np.arange(0, 10, 1), np.ones(10)]) mock_data = pd.DataFrame(data.T, - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): @@ -390,13 +390,13 @@ def test_coplanar_hull(): def test_layering_dualeval(): - """ Test the layering step when there are two single altitude values. See #78 for details. """ + """ Test the layering step when there are two single height values. See #78 for details. """ data1 = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*120, np.ones(30)*1]) data2 = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*150, np.ones(30)*2]) mock_data = pd.DataFrame(np.concatenate([data1, data2], axis=1).T, - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): diff --git a/test/ampycloud/test_layer.py b/test/ampycloud/test_layer.py index a217335..27e0683 100644 --- a/test/ampycloud/test_layer.py +++ b/test/ampycloud/test_layer.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -93,10 +93,10 @@ def test_unstable_layers(): data = pd.read_csv(f) # Drop anything below 6500 ft - data = data.drop(data[data['alt'] < 6500].index) + data = data.drop(data[data['height'] < 6500].index) # Let's run the the Gaussian Mixture Modelling 100 times ... - out = [ncomp_from_gmm(data['alt'].to_numpy(), + out = [ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=200, random_seed=45, @@ -108,7 +108,7 @@ def test_unstable_layers(): # Now do it once, but checking that overly-thin layers do not get split-up # Do I issue a warning if the min_sep is dangerously small ? with pytest.warns(AmpycloudWarning): - best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), + best_ncomp, _, _ = ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=0, random_seed=39, @@ -116,7 +116,7 @@ def test_unstable_layers(): # With this specific seed, I should be finding 3 layers assert best_ncomp == 3 # With this other seed, I should get 2 components - best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), + best_ncomp, _, _ = ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=0, random_seed=45, @@ -124,7 +124,7 @@ def test_unstable_layers(): assert best_ncomp == 2 # Once I take into account a suitable min_sep, do the layers not get split anymore ? - best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), + best_ncomp, _, _ = ncomp_from_gmm(data['height'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=200, random_seed=45, diff --git a/test/ampycloud/test_wmo.py b/test/ampycloud/test_wmo.py index 9356143..772a6a7 100644 --- a/test/ampycloud/test_wmo.py +++ b/test/ampycloud/test_wmo.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -12,7 +12,7 @@ import numpy as np # Import from this package -from ampycloud.wmo import perc2okta, alt2code +from ampycloud.wmo import perc2okta, height2code def test_perc2okta(): @@ -24,10 +24,10 @@ def test_perc2okta(): np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])) -def test_alt2code(): +def test_height2code(): """ Test the alt2code function, inculding the descaling mode. """ - assert alt2code(99.9) == '000' - assert alt2code(100) == '001' - assert alt2code(5555) == '055' - assert alt2code(12345) == '120' + assert height2code(99.9) == '000' + assert height2code(100) == '001' + assert height2code(5555) == '055' + assert height2code(12345) == '120' diff --git a/test/ampycloud/utils/test_mocker.py b/test/ampycloud/utils/test_mocker.py index 333452d..a7c311e 100644 --- a/test/ampycloud/utils/test_mocker.py +++ b/test/ampycloud/utils/test_mocker.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -23,7 +23,8 @@ def test_mock_layers(): n_ceilos = 1 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}] + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, + 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) # Correct type ? @@ -32,7 +33,7 @@ def test_mock_layers(): # Correct number of points ? assert len(out) == 1200/60 # No holes ? - assert not np.any(out['alt'].isna()) + assert not np.any(out['height'].isna()) assert not np.any(out['dt'].isna()) # Ordered chronologically ? assert out['dt'].is_monotonic_increasing @@ -41,24 +42,25 @@ def test_mock_layers(): n_ceilos = 2 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 0.5, 'period': 100, 'amplitude': 0}] + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 0.5, 'period': 100, + 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) # Correct number of points ? assert len(out) == 1200/60 * n_ceilos # Holes present ? - assert np.any(out['alt'].isna()) + assert np.any(out['height'].isna()) assert not np.any(out['dt'].isna()) # In good numbers ? - assert len(out[out['alt'].isna()]) == len(out)/2 + assert len(out[out['height'].isna()]) == len(out)/2 # Now with more than 1 layer n_ceilos = 2 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 1, + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}, - {'alt': 10000, 'alt_std': 200, 'sky_cov_frac': 1, + {'height': 10000, 'height_std': 200, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) @@ -69,14 +71,14 @@ def test_mock_layers(): n_ceilos = 2 lookback_time = 1200 hit_gap = 60 - layer_prms = [{'alt': 1000, 'alt_std': 100, 'sky_cov_frac': 1, + layer_prms = [{'height': 1000, 'height_std': 100, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}, - {'alt': 15000, 'alt_std': 200, 'sky_cov_frac': 1, + {'height': 15000, 'height_std': 200, 'sky_cov_frac': 1, 'period': 100, 'amplitude': 0}] out = mock_layers(n_ceilos, lookback_time, hit_gap, layer_prms) # Holes present ? There should be None, since we have a second layer complete - assert np.any(~out['alt'].isna()) + assert np.any(~out['height'].isna()) assert not np.any(out['dt'].isna()) # Make sur I have the correct number of timesteps diff --git a/test/ampycloud/utils/test_utils.py b/test/ampycloud/utils/test_utils.py index a1908c0..ea67c9e 100644 --- a/test/ampycloud/utils/test_utils.py +++ b/test/ampycloud/utils/test_utils.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -16,7 +16,7 @@ # Import from ampycloud from ampycloud.utils.utils import (check_data_consistency, tmp_seed, - adjust_nested_dict, calc_base_alt) + adjust_nested_dict, calc_base_height) from ampycloud.utils.mocker import canonical_demo_data from ampycloud.errors import AmpycloudError, AmpycloudWarning from ampycloud import hardcoded @@ -37,57 +37,57 @@ def test_check_data_consistency(): # Now, let's check specific elements that should raise errors or warnings with raises(AmpycloudError): # Empty DataFrame - data = pd.DataFrame(columns=['ceilo', 'dt', 'alt', 'type']) + data = pd.DataFrame(columns=['ceilo', 'dt', 'height', 'type']) check_data_consistency(data) with raises(AmpycloudError): # Missing column - data = pd.DataFrame(np.array([['a', 1, 1]]), columns=['ceilo', 'alt', 'type']) - for col in ['ceilo', 'alt', 'type']: + data = pd.DataFrame(np.array([['a', 1, 1]]), columns=['ceilo', 'height', 'type']) + for col in ['ceilo', 'height', 'type']: data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) check_data_consistency(data) with warns(AmpycloudWarning): # Bad data type - data = pd.DataFrame(np.array([['a', 0, 1, 1]]), columns=['ceilo', 'dt', 'alt', 'type']) + data = pd.DataFrame(np.array([['a', 0, 1, 1]]), columns=['ceilo', 'dt', 'height', 'type']) check_data_consistency(data) with warns(AmpycloudWarning): # Extra key data = pd.DataFrame(np.array([['a', 0, 1, 1, 99]]), - columns=['ceilo', 'dt', 'alt', 'type', 'extra']) + columns=['ceilo', 'dt', 'height', 'type', 'extra']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): - # Negative alts + # Negative heights data = pd.DataFrame(np.array([['a', 0, -1, 1]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Type 0 should be NaN data = pd.DataFrame(np.array([['a', 0, 1, 0]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Type 1 should not be NaN data = pd.DataFrame(np.array([['a', 0, np.nan, 1]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Missing type 1 pts data = pd.DataFrame(np.array([['a', 0, 1, 2]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) with warns(AmpycloudWarning): # Missing type 2 pts data = pd.DataFrame(np.array([['a', 0, 1, 3]]), - columns=['ceilo', 'dt', 'alt', 'type']) + columns=['ceilo', 'dt', 'height', 'type']) for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): data[col] = data.loc[:, col].astype(tpe) check_data_consistency(data) @@ -142,17 +142,17 @@ def test_adjust_nested_dict(): assert out == new_dict -@mark.parametrize('lookback_perc,alt_perc,q_expected', [ +@mark.parametrize('lookback_perc,height_perc,q_expected', [ param(50, 90, 95, id='both params'), - param(100, 98, 98, id='alt_perc only'), + param(100, 98, 98, id='height_perc only'), param(42, 100, 100., id='lookback_only'), ]) -def test_calc_base_alt( +def test_calc_base_height( lookback_perc: int, - alt_perc: int, + height_perc: int, q_expected: np.float64, ): - """Test the calculation of the slice/ group/ layer base altitude.""" + """Test the calculation of the slice/ group/ layer base height.""" vals = np.arange(1., 101.) - q = calc_base_alt(vals, lookback_perc, alt_perc) + q = calc_base_height(vals, lookback_perc, height_perc) np.testing.assert_almost_equal(q, q_expected, decimal=1) From 6108174a054e0c6a94dab6c9a1c65fbaf56aee50 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Mon, 8 Apr 2024 15:58:04 +0200 Subject: [PATCH 17/51] As of Python 3.12, setuptools needs to be explicitely requested as an 'extras' in setup.py for dev work --- README.md | 2 +- setup.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index cd33d24..26efe44 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ For the full documentation, installation instructions, etc ..., go to: https://m ### License & Copyright -ampycloud is released under the terms of **the 3-Clause BSD license**. The copyright (2021-2023) belongs to MeteoSwiss. +ampycloud is released under the terms of **the 3-Clause BSD license**. The copyright (2021-2024) belongs to MeteoSwiss. ### Contributing to ampycloud diff --git a/setup.py b/setup.py index a324922..31feab3 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -35,9 +35,10 @@ 'Changelog': 'https://meteoswiss.github.io/ampycloud/changelog.html', 'Issues': 'https://github.com/MeteoSwiss/ampycloud/issues' }, - author="Frédéric P.A. Vogt", + author="Frédéric P.A. Vogt, Daniel Regenass", author_email="frederic.vogt@meteoswiss.ch", - description="Characterization of cloud layers from ceilometer measurements", + description="Determining the sky coverage fraction and base height of cloud layers using" + "ceilometer data", long_description=long_description, long_description_content_type="text/markdown", python_requires='>=3.9.0', @@ -52,7 +53,7 @@ "ruamel.yaml" ], extras_require={ - 'dev': ['sphinx', 'sphinx-rtd-theme', 'pylint', 'pytest'] + 'dev': ['sphinx', 'sphinx-rtd-theme', 'pylint', 'pytest', 'setuptools'] }, # Setup entry points to use ampycloud directly from a terminal entry_points={ @@ -63,7 +64,7 @@ # 3 - Alpha # 4 - Beta # 5 - Production/Stable - 'Development Status :: 3 - Alpha', + 'Development Status :: 3 - Beta', # Indicate who your project is intended for 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering :: Atmospheric Science', @@ -71,7 +72,10 @@ 'License :: OSI Approved :: BSD License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', ], # Let's make sure the parameter non-py files get included in the wheels on pypi. include_package_data=True From c4d9847bbd6952382e3e5d7fc8194ffb1d49da64 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 9 Apr 2024 16:24:15 +0200 Subject: [PATCH 18/51] Bump version to reflect the breaking API changes --- src/ampycloud/version.py | 2 +- test/ampycloud/test_data.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ampycloud/version.py b/src/ampycloud/version.py index 8b104d3..21f9e0c 100644 --- a/src/ampycloud/version.py +++ b/src/ampycloud/version.py @@ -9,4 +9,4 @@ """ #:str: the one-and-only place where the ampycloud version is set. -VERSION = '1.0.0' +VERSION = '2.0.0' diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 0478b59..ee8d598 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -80,11 +80,11 @@ def test_ceilochunk_init(): reset_prms() -@mark.parametrize('alt,expected_flag', [ +@mark.parametrize('height,expected_flag', [ param(1000, False, id='low clouds'), param(15000, True, id='high clouds'), ]) -def test_clouds_above_msa_buffer_flag(alt: int, expected_flag: bool): +def test_clouds_above_msa_buffer_flag(height: int, expected_flag: bool): """ Test the high clouds flagging routine. """ dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 @@ -97,9 +97,9 @@ def test_clouds_above_msa_buffer_flag(alt: int, expected_flag: bool): # Create some fake data to get started # 1 very flat layer with no gaps mock_data = mocker.mock_layers( - n_ceilos, lookback_time, rate, [ - {'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0} - ] + n_ceilos, lookback_time, rate, [{ + 'height': height, 'height_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0 + }] ) chunk = CeiloChunk(mock_data) assert chunk.clouds_above_msa_buffer == expected_flag @@ -255,11 +255,11 @@ def test_ceilochunk_nocld(): assert chunk.metar_msg() == 'NCD' -@mark.parametrize('alt', [ +@mark.parametrize('height', [ param(10500, id='in buffer'), param(15000, id='above buffer'), ]) -def test_ceilochunk_highcld(alt): +def test_ceilochunk_highcld(height): """ Test the methods of CeiloChunks when high clouds are seen in the interval. """ dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 @@ -273,7 +273,7 @@ def test_ceilochunk_highcld(alt): # 1 very flat layer with no gaps mock_data = mocker.mock_layers( n_ceilos, lookback_time, rate, - [{'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + [{'height': height, 'height_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] ) # Instantiate a CeiloChunk entity ... From c732f2b4cb155cb8cf25a5ad05be78804623fd89 Mon Sep 17 00:00:00 2001 From: Daniel Regenass Date: Wed, 10 Apr 2024 16:14:30 +0200 Subject: [PATCH 19/51] AMAROC-768 remove explicit build of setuptools --- .github/workflows/CI_pylinter.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI_pylinter.yml b/.github/workflows/CI_pylinter.yml index 9da6ac9..0895c40 100644 --- a/.github/workflows/CI_pylinter.yml +++ b/.github/workflows/CI_pylinter.yml @@ -35,7 +35,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install setuptools shell: bash # Here, let's install our module to make sure all the dependencies specified in setup.py are From cc35cfd1ff393ee10fe265059327079a16015169 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Wed, 17 Apr 2024 11:54:50 +0200 Subject: [PATCH 20/51] AMAROC-765 Remove not necessary converting data --- src/ampycloud/utils/performance.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ampycloud/utils/performance.py b/src/ampycloud/utils/performance.py index 655e2d3..bbf74a5 100644 --- a/src/ampycloud/utils/performance.py +++ b/src/ampycloud/utils/performance.py @@ -44,8 +44,6 @@ def get_speed_benchmark(niter: int = 10) -> tuple: _, _ = demo() dts += [(datetime.now()-start).total_seconds()] - dts = np.array(dts) - logger.info('ampycloud demo() exec-time from %i runs:', niter) logger.info(' mean [std]: %.2fs [%.2fs]', np.mean(dts), np.std(dts)) logger.info(' median [min; max]: %.2f [%.2fs; %.2fs]', From 39908d083dd1c551d7b1c694732bf55a2e724c1b Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Wed, 17 Apr 2024 17:00:55 +0200 Subject: [PATCH 21/51] AMAROC-765 Correct easy type issues --- src/ampycloud/cluster.py | 6 +- src/ampycloud/core.py | 16 +- src/ampycloud/data.py | 23 ++- src/ampycloud/icao.py | 2 +- src/ampycloud/layer.py | 3 +- src/ampycloud/plots/core.py | 9 +- src/ampycloud/plots/diagnostics.py | 303 +++++++++++++++-------------- src/ampycloud/plots/secondary.py | 2 +- src/ampycloud/scaler.py | 8 +- src/ampycloud/utils/utils.py | 8 +- src/ampycloud/wmo.py | 4 +- 11 files changed, 196 insertions(+), 188 deletions(-) diff --git a/src/ampycloud/cluster.py b/src/ampycloud/cluster.py index b847fc8..36ad336 100644 --- a/src/ampycloud/cluster.py +++ b/src/ampycloud/cluster.py @@ -10,7 +10,7 @@ # Import from Python import logging -from typing import Union +from typing import (Optional, Union) import numpy as np from sklearn.cluster import AgglomerativeClustering @@ -23,7 +23,7 @@ @log_func_call(logger) -def agglomerative_cluster(data: np.ndarray, n_clusters: int = None, +def agglomerative_cluster(data: np.ndarray, n_clusters: Union[int, None] = None, metric: str = 'euclidean', linkage: str = 'single', distance_threshold: Union[int, float] = 1) -> tuple: """ Function that wraps arround :py:class:`sklearn.cluster.AgglomerativeClustering`. @@ -54,7 +54,7 @@ def agglomerative_cluster(data: np.ndarray, n_clusters: int = None, @log_func_call(logger) -def clusterize(data: np.ndarray, algo: str = None, **kwargs: dict) -> tuple: +def clusterize(data: np.ndarray, algo: Union[str, None] = None, **kwargs: dict) -> Optional[tuple]: """ Umbrella clustering routine, that provides a single access point to the different clustering algorithms. diff --git a/src/ampycloud/core.py b/src/ampycloud/core.py index 1e52de8..2d13a23 100644 --- a/src/ampycloud/core.py +++ b/src/ampycloud/core.py @@ -56,11 +56,11 @@ def copy_prm_file(save_loc: str = './', which: str = 'defaults') -> None: """ # Let's take a look at the path I was given - save_loc = Path(save_loc) + given_path = Path(save_loc) # I won't create stuff for the users. ampycloud is not that nice. - if not save_loc.exists(): + if not given_path.exists(): raise AmpycloudError('save_loc does not appear to exist !') - if not save_loc.is_dir(): + if not given_path.is_dir(): raise AmpycloudError('save_loc does not appear to be a directory !') # Next, let's look at all the parameter files available ... @@ -73,11 +73,11 @@ def copy_prm_file(save_loc: str = './', which: str = 'defaults') -> None: if (fname := f'ampycloud_{which}_prms.yml') not in ref_files: raise AmpycloudError(f'Parameter file {fname} not found.') - if (save_loc / fname).exists(): - raise AmpycloudError(f'File {fname} already exists at save_loc={save_loc}') + if (given_path / fname).exists(): + raise AmpycloudError(f'File {fname} already exists at save_loc={given_path}') # All looks good, let's copy the file - copy(ref_loc / fname, save_loc / fname) + copy(ref_loc / fname, given_path / fname) @log_func_call(logger) @@ -152,8 +152,8 @@ def reset_prms() -> None: @log_func_call(logger) -def run(data: pd.DataFrame, prms: dict = None, geoloc: str = None, - ref_dt: Union[str, datetime] = None) -> CeiloChunk: +def run(data: pd.DataFrame, prms: Union[dict, None] = None, geoloc: Union[str, None] = None, + ref_dt: Union[str, datetime, None] = None) -> CeiloChunk: """ Runs the ampycloud algorithm on a given dataset. Args: diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 6633dfa..1f999b2 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -260,7 +260,7 @@ def max_hits_per_layer(self) -> int: def _calculate_base_height_for_selection( self, - data_indexer: pd.Series(dtype=bool), + data_indexer # type: pd.Series[bool] ) -> float: """Calculate the cloud base height for a selection of data. @@ -297,12 +297,12 @@ def _get_min_sep_for_altitude(self, altitude: float) -> float: """ if len(self.prms['MIN_SEP_LIMS']) != \ - len(self.prms['MIN_SEP_VALS']) - 1: - raise AmpycloudError( - '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' - 'Got MIN_SEP_LIMS %i and MIN_SEP_VALS %i', - (self.prms['MIN_SEP_LIMS'], self.prms['MIN_SEP_VALS']) - ) + len(self.prms['MIN_SEP_VALS']) - 1: + raise AmpycloudError( + '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' + 'Got MIN_SEP_LIMS %i and MIN_SEP_VALS %i', + (self.prms['MIN_SEP_LIMS'], self.prms['MIN_SEP_VALS']) + ) min_sep_val_id = np.searchsorted(self.prms['MIN_SEP_LIMS'], altitude) @@ -361,7 +361,7 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt ] # We want to raise early if 'which' is unknown. - if not which in ['slices', 'groups', 'layers']: + if which not in ['slices', 'groups', 'layers']: raise AmpycloudError( 'Trying to initialize a data frame for %s ' 'which is unknown. Keyword arg "which" must be one of' @@ -390,7 +390,6 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt pdf = pd.DataFrame(index=range(n_ind), columns=cols) cluster_ids = self._get_cluster_ids(which) - for ind, cid in enumerate(cluster_ids): if which == 'groups': # Here, check if the layering was already done ... in which case one should NOT @@ -427,7 +426,7 @@ def _calculate_cloud_amount( Results are written to the "okta" column of the DF. """ - for ind, cid in enumerate(cluster_ids): + for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid # Compute the number of hits of this slice/group/layer for each ceilometer, @@ -515,7 +514,7 @@ def _calculate_sligrolay_base_height( pd.DataFrame: with calculatio results in columnm alt_base """ - for ind, cid in enumerate(cluster_ids): + for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid # Compute the base altitude @@ -583,7 +582,7 @@ def metarize( pdf = self._add_sligrolay_information(which, pdf, cids) # Then loop through all of the layers/ groups/ slices and add METAR codes - for ind, _ in enumerate(cids): + for ind, _ in enumerate(cids): pdf.iloc[ind, pdf.columns.get_loc('code')] = \ wmo.okta2code(pdf.iloc[ind, pdf.columns.get_loc('okta')]) + \ diff --git a/src/ampycloud/icao.py b/src/ampycloud/icao.py index c13b9db..37498e8 100644 --- a/src/ampycloud/icao.py +++ b/src/ampycloud/icao.py @@ -44,7 +44,7 @@ def significant_cloud(oktas: list) -> list: """ sig_level = 0 - sig = [] + sig: list[bool] = [] for okta in oktas: # There can be no more than 3 significant cloud layers. # See that footnote 14 in the ICAO doc ! diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index 7791629..70ed176 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -144,7 +144,7 @@ def ncomp_from_gmm(vals: np.ndarray, min_sep: Union[int, float] = 0, layer_base_params: dict[str, int] = {'lookback_perc': 100, 'alt_perc': 5}, scores: str = 'BIC', - rescale_0_to_x: float = None, + rescale_0_to_x: Union[float, None] = None, random_seed: int = 42, **kwargs: dict) -> tuple: """ Runs a Gaussian Mixture Model on 1-D data, to determine if it contains 1, 2, or 3 @@ -160,6 +160,7 @@ def ncomp_from_gmm(vals: np.ndarray, first decide how many components looks "best", at which point these may get merged depending on min_sep. I.e. min_sep does not lead to re-running the GMM, it only merges the identified layers if required. + layer_base_params: Defined ampycloud parameters. scores (str, optional): either 'BIC' or 'AIC', to use Baysian Information Criterion or Akaike Information criterion scores. rescale_0_to_x (float, optional): if set, vals will be rescaled between 0 and this value diff --git a/src/ampycloud/plots/core.py b/src/ampycloud/plots/core.py index 5ba0f3c..8d6a407 100644 --- a/src/ampycloud/plots/core.py +++ b/src/ampycloud/plots/core.py @@ -24,10 +24,13 @@ @set_mplstyle @log_func_call(logger) -def diagnostic(chunk: CeiloChunk, upto: str = 'layers', show_ceilos: bool = False, - ref_metar: str = None, ref_metar_origin: str = None, +def diagnostic(chunk: CeiloChunk, upto: str = 'layers', + show_ceilos: bool = False, + ref_metar: Union[str, None] = None, + ref_metar_origin: Union[str, None] = None, show: bool = True, - save_stem: str = None, save_fmts: Union[list, str] = None) -> None: + save_stem: Union[str, None] = None, + save_fmts: Union[list, str, None] = None) -> None: """ A function to create the ampycloud diagnostic plot all the way to the layering step (included). This is the ultimate ampycloud plot that shows it all (or not - you choose !). diff --git a/src/ampycloud/plots/diagnostics.py b/src/ampycloud/plots/diagnostics.py index 6e65492..b32bd9c 100644 --- a/src/ampycloud/plots/diagnostics.py +++ b/src/ampycloud/plots/diagnostics.py @@ -12,11 +12,11 @@ import logging from functools import partial from copy import deepcopy +from typing import Union import numpy as np import matplotlib.pyplot as plt -from matplotlib import gridspec +from matplotlib import (gridspec, rcParams) from matplotlib.lines import Line2D -from matplotlib import rcParams # Import from this package from .. import dynamic, scaler, fluffer @@ -174,71 +174,72 @@ def show_slices(self) -> None: all_clrs = plt.rcParams['axes.prop_cycle'].by_key()['color'] # Then loop through each slice - for ind in range(self._chunk.n_slices): - - # Which hits are in the slice ? - in_slice = np.array(self._chunk.data['slice_id'] == - self._chunk.slices.at[ind, 'cluster_id']) - - # Create an array of facecolors ... choose them from my set of colors - base_clr = all_clrs[ind % len(all_clrs)] - fcs = np.array([base_clr] * len(self._chunk.data)) - fcs[is_vv] = 'none' - - # I can finally show the points ... - self._axs[0].scatter(self._chunk.data.loc[in_slice, 'dt'], - self._chunk.data.loc[in_slice, 'alt'], - marker='o', s=10, c=fcs[in_slice], edgecolor=base_clr) - - # ... and the corresponding LOWESS-fit used to derive their fluffiness - _, lowess_pts = fluffer.get_fluffiness( - self._chunk.data.loc[in_slice, ['dt', 'alt']].values, **self._chunk.prms['LOWESS']) - self._axs[0].plot(lowess_pts[:, 0], lowess_pts[:, 1], - ls='-', lw=1.5, c=base_clr, drawstyle='steps-mid', zorder=0) - - # Let's also plot the overlap area of the slice - slice_min = self._chunk.slices.loc[ind, 'alt_min'] - slice_max = self._chunk.slices.loc[ind, 'alt_max'] - thickness = self._chunk.slices.loc[ind, 'thickness'] - alt_pad = self._chunk.prms['GROUPING_PRMS']['alt_pad_perc']/100 - - # Get some fake data spanning the entire data range - misc = np.linspace(self._chunk.data['dt'].min(skipna=True), - self._chunk.data['dt'].max(skipna=True), 3) - self._axs[0].fill_between(misc, - np.ones_like(misc) * (slice_min - alt_pad * thickness), - np.ones_like(misc) * (slice_max + alt_pad * thickness), - edgecolor='none', alpha=0.1, zorder=0, - facecolor=base_clr) - - # Stop here if that slice has 0 okta. - if self._chunk.slices.iloc[ind]['okta'] == 0: - continue - - # Prepare to display the METAR codes for the slices. - # First, check if it is isolated or not, and significant or not. - if self._chunk.slices.iloc[ind]['significant']: - alpha = 1 - else: - alpha = 0 - if self._chunk.slices.iloc[ind]['isolated'] is False: - warn = r' $\Bumpeq$' - else: - warn = '' - - # Show the slice METAR text, plus the fluffiness, plus the isolation status - msg = r'\smaller ' - msg += wmo.okta2symb( - self._chunk.slices.iloc[ind]['okta'], - use_metsymb = dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') - msg += ' ' + self._chunk.slices.iloc[ind]['code'] + \ - rf' $f$:{self._chunk.slices.loc[ind, "fluffiness"]:.0f} ft' - msg += warn - self._axs[1].text(0.5, self._chunk.slices.loc[ind, 'alt_base'], - texify(msg), - va='center', ha='center', color=base_clr, - bbox={'facecolor': 'none', 'edgecolor': base_clr, - 'alpha': alpha, 'ls': '--'}) + if self._chunk.n_slices is not None: + for ind in range(self._chunk.n_slices): + + # Which hits are in the slice ? + in_slice = np.array(self._chunk.data['slice_id'] == + self._chunk.slices.at[ind, 'cluster_id']) + + # Create an array of facecolors ... choose them from my set of colors + base_clr = all_clrs[ind % len(all_clrs)] + fcs = np.array([base_clr] * len(self._chunk.data)) + fcs[is_vv] = 'none' + + # I can finally show the points ... + self._axs[0].scatter(self._chunk.data.loc[in_slice, 'dt'], + self._chunk.data.loc[in_slice, 'alt'], + marker='o', s=10, c=fcs[in_slice], edgecolor=base_clr) + + # ... and the corresponding LOWESS-fit used to derive their fluffiness + _, lowess_pts = fluffer.get_fluffiness( + self._chunk.data.loc[in_slice, ['dt', 'alt']].values, **self._chunk.prms['LOWESS']) + self._axs[0].plot(lowess_pts[:, 0], lowess_pts[:, 1], + ls='-', lw=1.5, c=base_clr, drawstyle='steps-mid', zorder=0) + + # Let's also plot the overlap area of the slice + slice_min = self._chunk.slices.loc[ind, 'alt_min'] + slice_max = self._chunk.slices.loc[ind, 'alt_max'] + thickness = self._chunk.slices.loc[ind, 'thickness'] + alt_pad = self._chunk.prms['GROUPING_PRMS']['alt_pad_perc']/100 + + # Get some fake data spanning the entire data range + misc = np.linspace(self._chunk.data['dt'].min(skipna=True), + self._chunk.data['dt'].max(skipna=True), 3) + self._axs[0].fill_between(misc, + np.ones_like(misc) * (slice_min - alt_pad * thickness), + np.ones_like(misc) * (slice_max + alt_pad * thickness), + edgecolor='none', alpha=0.1, zorder=0, + facecolor=base_clr) + + # Stop here if that slice has 0 okta. + if self._chunk.slices.iloc[ind]['okta'] == 0: + continue + + # Prepare to display the METAR codes for the slices. + # First, check if it is isolated or not, and significant or not. + if self._chunk.slices.iloc[ind]['significant']: + alpha = 1 + else: + alpha = 0 + if self._chunk.slices.iloc[ind]['isolated'] is False: + warn = r' $\Bumpeq$' + else: + warn = '' + + # Show the slice METAR text, plus the fluffiness, plus the isolation status + msg = r'\smaller ' + msg += wmo.okta2symb( + self._chunk.slices.iloc[ind]['okta'], + use_metsymb = dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + msg += ' ' + self._chunk.slices.iloc[ind]['code'] + \ + rf' $f$:{self._chunk.slices.loc[ind, "fluffiness"]:.0f} ft' + msg += warn + self._axs[1].text(0.5, self._chunk.slices.loc[ind, 'alt_base'], + texify(msg), + va='center', ha='center', color=base_clr, + bbox={'facecolor': 'none', 'edgecolor': base_clr, + 'alpha': alpha, 'ls': '--'}) def show_groups(self, show_points: bool = False) -> None: """ Show the group data. @@ -256,44 +257,45 @@ def show_groups(self, show_points: bool = False) -> None: transform=self._axs[2].transAxes) # Loop through each identified group - for ind in range(self._chunk.n_groups): - - if show_points: - # Which hits are in the group ? - in_group = np.array(self._chunk.data['group_id'] == - self._chunk.groups.at[ind, 'cluster_id']) - - # I can finally show the points ... - self._axs[0].scatter(self._chunk.data[in_group]['dt'], - self._chunk.data[in_group]['alt'], - marker=MRKS[ind % len(MRKS)], - s=40, c='none', edgecolor='gray', lw=1, zorder=10, alpha=0.5) - - # Stop here if that group has 0 okta. - if self._chunk.groups.iloc[ind]['okta'] == 0: - continue - - # Prepare to display the METAR codes for the groups. - # First, check if it is significant or not. - if self._chunk.groups.iloc[ind]['significant']: - alpha = 1 - else: - alpha = 0 - - # Then also check if these groups contain multiple sub-layers ... - symbs = {-1: r'', 1: r'$-$', 2: r'$=$', 3: r'$\equiv$'} - warn = ' ' + symbs[self._chunk.groups.iloc[ind]['ncomp']] - - # Show the group METAR text - msg = r'\smaller ' + wmo.okta2symb( - self._chunk.groups.iloc[ind]['okta'], - use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') - ) + ' ' + self._chunk.groups.iloc[ind]['code'] + warn - self._axs[2].text(0.5, self._chunk.groups.iloc[ind]['alt_base'], - texify(msg), - va='center', ha='center', color='gray', - bbox={'facecolor': 'none', 'edgecolor': 'gray', - 'alpha': alpha, 'ls': '--'}) + if self._chunk.n_groups is not None: + for ind in range(self._chunk.n_groups): + + if show_points: + # Which hits are in the group ? + in_group = np.array(self._chunk.data['group_id'] == + self._chunk.groups.at[ind, 'cluster_id']) + + # I can finally show the points ... + self._axs[0].scatter(self._chunk.data[in_group]['dt'], + self._chunk.data[in_group]['alt'], + marker=MRKS[ind % len(MRKS)], + s=40, c='none', edgecolor='gray', lw=1, zorder=10, alpha=0.5) + + # Stop here if that group has 0 okta. + if self._chunk.groups.iloc[ind]['okta'] == 0: + continue + + # Prepare to display the METAR codes for the groups. + # First, check if it is significant or not. + if self._chunk.groups.iloc[ind]['significant']: + alpha = 1 + else: + alpha = 0 + + # Then also check if these groups contain multiple sub-layers ... + symbs = {-1: r'', 1: r'$-$', 2: r'$=$', 3: r'$\equiv$'} + warn = ' ' + symbs[self._chunk.groups.iloc[ind]['ncomp']] + + # Show the group METAR text + msg = r'\smaller ' + wmo.okta2symb( + self._chunk.groups.iloc[ind]['okta'], + use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + ) + ' ' + self._chunk.groups.iloc[ind]['code'] + warn + self._axs[2].text(0.5, self._chunk.groups.iloc[ind]['alt_base'], + texify(msg), + va='center', ha='center', color='gray', + bbox={'facecolor': 'none', 'edgecolor': 'gray', + 'alpha': alpha, 'ls': '--'}) def show_layers(self) -> None: """ Show the layer data. """ @@ -304,47 +306,48 @@ def show_layers(self) -> None: transform=self._axs[3].transAxes) # Start looping through every layer ... - for ind in range(self._chunk.n_layers): - - # Which hits are in the layer? - in_layer = np.array(self._chunk.data['layer_id'] == - self._chunk.layers.at[ind, 'cluster_id']) - - # I can finally show the points ... - self._axs[0].scatter(self._chunk.data[in_layer]['dt'], - self._chunk.data[in_layer]['alt'], - marker=MRKS[ind % len(MRKS)], - s=40, c='none', edgecolor='k', lw=1, zorder=10, alpha=0.5) - - # Draw the line of the layer base - if self._chunk.layers.iloc[ind]['okta'] == 0: - lls = ':' - else: - lls = '--' - - self._axs[0].axhline(self._chunk.layers.iloc[ind]['alt_base'], xmax=1, c='k', - lw=1, zorder=0, ls=lls, clip_on=False) - - # Stop here for empty layers - if self._chunk.layers.iloc[ind]['okta'] == 0: - continue - - # Prepare to display the METAR codes for the layer. - # First, check if it is significant, or not. - if self._chunk.layers.iloc[ind]['significant']: - alpha = 1 - else: - alpha = 0 - - # Display the actual METAR text - msg = r'\smaller ' + wmo.okta2symb( - self._chunk.layers.iloc[ind]['okta'], - use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') - ) + ' ' + self._chunk.layers.iloc[ind]['code'] - self._axs[3].text(0.5, self._chunk.layers.iloc[ind]['alt_base'], - texify(msg), - va='center', ha='center', color='k', - bbox={'facecolor': 'none', 'edgecolor': 'k', 'alpha': alpha}) + if self._chunk.n_layers is not None: + for ind in range(self._chunk.n_layers): + + # Which hits are in the layer? + in_layer = np.array(self._chunk.data['layer_id'] == + self._chunk.layers.at[ind, 'cluster_id']) + + # I can finally show the points ... + self._axs[0].scatter(self._chunk.data[in_layer]['dt'], + self._chunk.data[in_layer]['alt'], + marker=MRKS[ind % len(MRKS)], + s=40, c='none', edgecolor='k', lw=1, zorder=10, alpha=0.5) + + # Draw the line of the layer base + if self._chunk.layers.iloc[ind]['okta'] == 0: + lls = ':' + else: + lls = '--' + + self._axs[0].axhline(self._chunk.layers.iloc[ind]['alt_base'], xmax=1, c='k', + lw=1, zorder=0, ls=lls, clip_on=False) + + # Stop here for empty layers + if self._chunk.layers.iloc[ind]['okta'] == 0: + continue + + # Prepare to display the METAR codes for the layer. + # First, check if it is significant, or not. + if self._chunk.layers.iloc[ind]['significant']: + alpha = 1 + else: + alpha = 0 + + # Display the actual METAR text + msg = r'\smaller ' + wmo.okta2symb( + self._chunk.layers.iloc[ind]['okta'], + use_metsymb=(dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] == 'metsymb') + ) + ' ' + self._chunk.layers.iloc[ind]['code'] + self._axs[3].text(0.5, self._chunk.layers.iloc[ind]['alt_base'], + texify(msg), + va='center', ha='center', color='k', + bbox={'facecolor': 'none', 'edgecolor': 'k', 'alpha': alpha}) def add_vv_legend(self) -> None: """ Adds a legend about the VV hits.""" @@ -385,7 +388,7 @@ def add_geoloc_and_ref_dt(self) -> None: self._axs[2].text(0.5, -0.02, texify(r'\smaller ' + '\n '.join(msg)), transform=self._axs[2].transAxes, ha='center', va='top') - def add_ref_metar(self, name: str, metar: str) -> None: + def add_ref_metar(self, name: Union[str, None], metar: Union[str, None]) -> None: """ Display a reference METAR, for example from human observers, different code, etc ... Args: @@ -432,7 +435,7 @@ def format_slice_axes(self) -> None: """ # Here, only proceed if I have actually found some slices ! - if self._chunk.n_slices > 0: + if self._chunk.n_slices is not None and self._chunk.n_slices > 0: # In order to show secondary_axis, we need to feed the forward/reverse scaling function # with the actual ones used in the code. Since these are dependant upon the data, @@ -480,7 +483,7 @@ def format_group_axes(self) -> None: """ # Only proceed if I have found some clusters ... - if self._chunk.n_groups > 0: + if self._chunk.n_groups is not None and self._chunk.n_groups > 0: # In order to show secondary_axis, we need to feed the forward/reverse scaling function # with the actual ones used in the code. Since these are dependant upon the data, @@ -504,7 +507,7 @@ def format_group_axes(self) -> None: # And reduce the fontsize while we're at it ... secax_x.tick_params(axis='x', which='both', labelsize=rcParams['font.size']-2) - def save(self, fn_out: str, fmts: list = None) -> None: + def save(self, fn_out: str, fmts: Union[list, None] = None) -> None: """ Saves the plot to file. Args: diff --git a/src/ampycloud/plots/secondary.py b/src/ampycloud/plots/secondary.py index 8325863..7e73c41 100644 --- a/src/ampycloud/plots/secondary.py +++ b/src/ampycloud/plots/secondary.py @@ -28,7 +28,7 @@ @set_mplstyle @log_func_call(logger) def scaling_fcts(show: bool = True, - save_stem: str = None, save_fmts: Union[list, str] = None) -> None: + save_stem: Union[str, None] = None, save_fmts: Union[list, str, None] = None) -> None: """ Plots the different scaling functions. Args: diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index ee115d5..89814c9 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -22,7 +22,7 @@ @log_func_call(logger) -def shift_and_scale(vals: np.ndarray, shift: Union[int, float] = None, +def shift_and_scale(vals: np.ndarray, shift: Union[int, float, None] = None, scale: Union[int, float] = 1, mode: str = 'do') -> np.ndarray: """ Shift (by a constant) and scale (by a constant) the data. @@ -58,8 +58,8 @@ def shift_and_scale(vals: np.ndarray, shift: Union[int, float] = None, @log_func_call(logger) def minmax_scale(vals: np.ndarray, - min_val: Union[float, int] = None, - max_val: Union[float, int] = None, + min_val: Union[float, int, None] = None, + max_val: Union[float, int, None] = None, mode: str = 'do') -> np.ndarray: """ Rescale the data onto a [0, 1] interval, possibly forcing a specific and/or minimum interval range. @@ -276,7 +276,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: @log_func_call(logger) -def apply_scaling(vals: np.ndarray, fct: str = None, **kwargs: dict) -> np.ndarray: +def apply_scaling(vals: np.ndarray, fct: Union[str, None] = None, **kwargs: dict) -> np.ndarray: """ Umbrella scaling routine, that gathers all the individual ones under a single entry point. Args: diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index 072b78c..7778ee0 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -10,6 +10,7 @@ # Import from Python import logging +from typing import Union import warnings import contextlib import copy @@ -28,7 +29,7 @@ @log_func_call(logger) def check_data_consistency(pdf: pd.DataFrame, - req_cols: dict = None) -> pd.DataFrame: + req_cols: Union[dict, None] = None) -> pd.DataFrame: """ Assesses whether a given :py:class:`pandas.DataFrame` is compatible with the requirements of ampycloud. @@ -206,7 +207,7 @@ def tmp_seed(seed: int): @log_func_call(logger) -def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: list = None) -> dict: +def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: Union[list, None] = None) -> dict: """ Update a given (nested) dictionnary given a second (possibly incomplete) one. Args: @@ -270,4 +271,5 @@ def calc_base_alt( 'Cloud base calculation got an empty array.' 'Maybe check lookback percentage (is set to %i)' %lookback_perc ) - return np.percentile(n_latest_elements, alt_perc) + layer_base_altitude = np.percentile(n_latest_elements, alt_perc) + return layer_base_altitude diff --git a/src/ampycloud/wmo.py b/src/ampycloud/wmo.py index d5a7597..b88c491 100644 --- a/src/ampycloud/wmo.py +++ b/src/ampycloud/wmo.py @@ -10,7 +10,7 @@ # Import from Python import logging -from typing import Union +from typing import Optional, Union import numpy as np # Import from ampycloud @@ -79,7 +79,7 @@ def perc2okta(val: Union[int, float, np.ndarray]) -> np.ndarray: @log_func_call(logger) -def okta2code(val: int) -> str: +def okta2code(val: int) -> Optional[str]: """ Convert an okta value to a METAR code. Args: From 1e48c6915a7a501d9274ae0be368c4554204a5b0 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Mon, 22 Apr 2024 13:37:25 +0200 Subject: [PATCH 22/51] AMAROC-765 solve issue in texify and add test --- src/ampycloud/plots/tools.py | 4 ++-- test/ampycloud/plots/test_tools.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/ampycloud/plots/tools.py b/src/ampycloud/plots/tools.py index d7b37de..3b9ad52 100644 --- a/src/ampycloud/plots/tools.py +++ b/src/ampycloud/plots/tools.py @@ -135,9 +135,9 @@ def texify(msg: str) -> str: msg = msg.replace('%', r'\%') # Here, I want to clean the underscore, but ONLY outside of math mode. - msg = [item.replace('_', r'\_') if ind % 2 == 0 else item + msg_splitted = [item.replace('_', r'\_') if ind % 2 == 0 else item for (ind, item) in enumerate(msg.split('$'))] - msg = '$'.join(msg) + msg = '$'.join(msg_splitted) # Next cleanup any LaTeX-specific stuff ... else: msg = msg.replace(r'\smaller', '') diff --git a/test/ampycloud/plots/test_tools.py b/test/ampycloud/plots/test_tools.py index 60bbe3a..dc9f217 100644 --- a/test/ampycloud/plots/test_tools.py +++ b/test/ampycloud/plots/test_tools.py @@ -13,7 +13,7 @@ # Import from ampycloud from ampycloud import dynamic -from ampycloud.plots.tools import valid_styles, set_mplstyle +from ampycloud.plots.tools import valid_styles, set_mplstyle, texify def test_valid_styles(): @@ -47,3 +47,12 @@ def check_plot_context(): # Finally, let's check that I can also alter the preamble in context dynamic.AMPYCLOUD_PRMS['MPL_STYLE'] = 'latex' assert 'metsymb' not in check_plot_context()[1] + + +def test_texify(): + """ Test the texify function in case of a given string used with Latex context. """ + + mpl.rcParams['text.usetex'] = True + result = texify("some $fancy$ label with % and _") + assert result == "some $fancy$ label with \\% and \\_" + mpl.rcParams['text.usetex'] = False From cf87c4bbc0f5dbfa9042696e6f12269454f256d0 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Mon, 22 Apr 2024 13:57:39 +0200 Subject: [PATCH 23/51] AMAROC-765 fix arraylike type issue --- src/ampycloud/data.py | 11 +++++------ src/ampycloud/utils/utils.py | 7 +++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 1f999b2..b7b2f40 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -14,7 +14,6 @@ import copy from abc import ABC, abstractmethod import numpy as np -import numpy.typing as npt import pandas as pd # Import from this package @@ -311,7 +310,7 @@ def _get_min_sep_for_altitude(self, altitude: float) -> float: logger.info('min_sep value: %.1f', min_sep) return min_sep - def _get_cluster_ids(self, which: str) -> npt.ArrayLike: + def _get_cluster_ids(self, which: str) -> np.ndarray: """Get the original IDs of slices, groups or layers. Args: @@ -329,7 +328,7 @@ def _get_cluster_ids(self, which: str) -> npt.ArrayLike: # of hits that do not get assigned to a sli/gro/lay. return np.delete(cids, np.where(cids == -1)) - def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt.ArrayLike]: + def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, np.ndarray]: """Setup a data frame for slices, groups or layers and keep track of IDs. Args: @@ -410,7 +409,7 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt return pdf, cluster_ids def _calculate_cloud_amount( - self, which: str, pdf: pd.DataFrame, cluster_ids: npt.ArrayLike + self, which: str, pdf: pd.DataFrame, cluster_ids: np.ndarray ) -> pd.DataFrame: """Calculate cloud amount for a given slice, group or layer. @@ -461,7 +460,7 @@ def _add_sligrolay_information( self, which: str, pdf: pd.DataFrame, - cluster_ids: npt.ArrayLike + cluster_ids: np.ndarray ) -> pd.DataFrame: """Add statistical properties to slices/ groups/ layers . @@ -501,7 +500,7 @@ def _add_sligrolay_information( return pdf def _calculate_sligrolay_base_height( - self, which: str, pdf: pd.DataFrame, cluster_ids: npt.ArrayLike + self, which: str, pdf: pd.DataFrame, cluster_ids: np.ndarray ) -> pd.DataFrame: """Calculate base height for all slices/ groups/ layers. diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index 7778ee0..5f8090f 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -15,7 +15,6 @@ import contextlib import copy import numpy as np -import numpy.typing as npt import pandas as pd # Import from this package @@ -149,7 +148,7 @@ def check_data_consistency(pdf: pd.DataFrame, warnings.warn(f'Column {key} is not required by ampycloud.', AmpycloudWarning) logger.warning('Dropping the superfluous %s column from the input data.', key) - data.drop((key), axis=1, inplace=True) + data.drop(key, axis=1, inplace=True) # A brief sanity check of the altitudes. We do not issue Errors, since the code can cope # with those elements: we simply raise Warnings. @@ -243,10 +242,10 @@ def adjust_nested_dict(ref_dict: dict, new_dict: dict, lvls: Union[list, None] = def calc_base_alt( - vals: npt.ArrayLike, + vals: np.ndarray, lookback_perc: int, alt_perc: int, - ) -> float: +) -> float: """Calculate the layer base altitude. Args: From c2d65fe30470a29313275b110d587607390e43c3 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Mon, 22 Apr 2024 14:20:20 +0200 Subject: [PATCH 24/51] AMAROC-765 fix mixing type issues --- src/ampycloud/plots/diagnostics.py | 4 +-- src/ampycloud/scaler.py | 14 +++++------ src/ampycloud/utils/mocker.py | 40 +++++++++++++++--------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/ampycloud/plots/diagnostics.py b/src/ampycloud/plots/diagnostics.py index b32bd9c..cdf2ab9 100644 --- a/src/ampycloud/plots/diagnostics.py +++ b/src/ampycloud/plots/diagnostics.py @@ -109,8 +109,8 @@ def show_hits_only(self, show_ceilos: bool = False) -> None: # Get a list of ceilo colors from the cycler. ceilo_clrs = plt.rcParams['axes.prop_cycle'].by_key()['color'] # Assign them to each hit - symb_clrs = [ceilo_clrs[self._chunk.ceilos.index(item) % len(ceilo_clrs)] - for item in self._chunk.data['ceilo']] + symb_clrs = np.array([ceilo_clrs[self._chunk.ceilos.index(item) % len(ceilo_clrs)] + for item in self._chunk.data['ceilo']]) # What are the VV hits ? is_vv = np.array(self._chunk.data['type'] == -1) diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index 89814c9..d0521ed 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -195,7 +195,7 @@ def step_scale(vals: np.ndarray, @log_func_call(logger) -def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: +def convert_kwargs(vals: np.ndarray, fct: str, **kwargs) -> dict: """ Converts the user-input keywords such that they can be fed to the underlying scaling functions. @@ -221,10 +221,10 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'shift-and-scale': # In this case, the only data I may need to derive from the data is the shift. - if 'shift' in kwargs.keys(): + if 'shift' in kwargs: # Already set - do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': kwargs['shift'] = np.nanmax(vals) elif kwargs['mode'] == 'undo': @@ -240,12 +240,12 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'minmax-scale': # In this case, the challenge lies with identifying min_val and max_val, knowing that the # user may specify a min_range value. - if 'min_val' in kwargs.keys() and 'max_val' in kwargs.keys(): + if 'min_val' in kwargs and 'max_val' in kwargs: # Already specified ... do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: @@ -260,7 +260,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: raise AmpycloudError(f"Mode unknown: {kwargs['mode']}") # 'mode' not set -> will default to 'do' - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 7617aa5..e8b6f71 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -13,6 +13,7 @@ from typing import Union import numpy as np import pandas as pd +from pandas import DataFrame # import from ampycloud from ..logger import log_func_call @@ -25,7 +26,7 @@ @log_func_call(logger) -def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) -> pd.DataFrame: +def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) -> DataFrame: """ Generates a mock, flat, Gaussian cloud layer around a given altitude. Args: @@ -43,7 +44,7 @@ def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) - n_pts = len(dts) # Create the storage structure - out = pd.DataFrame(columns=['dt', 'alt'], dtype=float) + out = DataFrame(columns=['dt', 'alt'], dtype=float) # Generate the random altitude data out['alt'] = np.random.normal(loc=alt, scale=alt_std, size=n_pts) @@ -64,7 +65,7 @@ def flat_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float) - @log_func_call(logger) def sin_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float, - period: Union[int, float], amplitude: Union[int, float]) -> pd.DataFrame: + period: Union[int, float], amplitude: Union[int, float]) -> DataFrame: """ Generates a sinusoidal cloud layer. Args: @@ -81,7 +82,7 @@ def sin_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float, """ # First, get a flat layer - out = flat_layer(dts, alt, alt_std, sky_cov_frac) + out: DataFrame = flat_layer(dts, alt, alt_std, sky_cov_frac) # And add to it a sinusoidal fluctuations. Note that nan should stay nan. out.loc[:, 'alt'] = out.loc[:, 'alt'] + np.sin(-np.pi/2 + out['dt']/period*2*np.pi) * amplitude @@ -89,8 +90,7 @@ def sin_layer(dts: np.array, alt: float, alt_std: float, sky_cov_frac: float, return out -def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, - layer_prms: list) -> pd.DataFrame: +def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, layer_prms: list) -> DataFrame: """ Generate a mock set of cloud layers for a specified number of ceilometers. Args: @@ -139,40 +139,40 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, dts = np.random.random(n_pts) * -lookback_time # Let's now loop through each cloud layer and generate them - layers = [sin_layer(dts=dts, **prms) for prms in layer_prms] + layers: list[DataFrame] = [sin_layer(dts=dts, **prms) for prms in layer_prms] # Merge them all into one DataFrame ... - layers = pd.concat(layers).reset_index(drop=True) + merged_layers: DataFrame = pd.concat(layers).reset_index(drop=True) # Add the type column while I'm at it. Set it to None for now. - layers['type'] = None + merged_layers['type'] = None # Here, adjust the types so that it ranks lowest to highest for every dt step. # This needs to be done on a point by point basis, given that layers can cross each other. - for dt in np.unique(layers['dt']): + for dt in np.unique(merged_layers['dt']): # Get the hit altitudes, and sort them from lowest to highest - alts = layers[layers['dt'] == dt]['alt'].sort_values(axis=0) + alts = merged_layers[merged_layers['dt'] == dt]['alt'].sort_values(axis=0) # Then deal with the other ones for (a, alt) in enumerate(alts): # Except for the first one, any NaN hit gets dropped if a > 0 and np.isnan(alt): - layers.drop(index=alts.index[a], - inplace=True) + merged_layers.drop(index=alts.index[a], + inplace=True) elif np.isnan(alt): # A non-detection should be type 0 - layers.loc[alts.index[a], 'type'] = 0 + merged_layers.loc[alts.index[a], 'type'] = 0 else: - layers.loc[alts.index[a], 'type'] = a+1 + merged_layers.loc[alts.index[a], 'type'] = a+1 # Add the ceilo info as an int - layers['ceilo'] = str(ceilo) + merged_layers['ceilo'] = str(ceilo) # And store this for later - ceilos += [layers] + ceilos += [merged_layers] # Merge it all - out = pd.concat(ceilos) + out: DataFrame = pd.concat(ceilos) # Sort the timesteps in order, and reset the index out = out.sort_values(['dt', 'alt']).reset_index(drop=True) @@ -183,7 +183,7 @@ def mock_layers(n_ceilos: int, lookback_time: float, hit_gap: float, return out -def canonical_demo_data() -> pd.DataFrame: +def canonical_demo_data() -> DataFrame: """ This function creates the canonical ampycloud demonstration dataset, that can be used to illustrate the full behavior of the algorithm. @@ -205,6 +205,6 @@ def canonical_demo_data() -> pd.DataFrame: # Reset the random seed, but only do this temporarily, so as to not mess things up for the user. with utils.tmp_seed(42): # Actually generate the mock data - out = mock_layers(n_ceilos, lookback_time, hit_gap, lyrs) + out: DataFrame = mock_layers(n_ceilos, lookback_time, hit_gap, lyrs) return out From 19d3f25b01ca84699eec45818a5afc86ab6ce75a Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Mon, 22 Apr 2024 14:51:03 +0200 Subject: [PATCH 25/51] AMAROC-765 fix leftover issues after merge --- src/ampycloud/core.py | 2 +- src/ampycloud/data.py | 5 ++--- src/ampycloud/layer.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ampycloud/core.py b/src/ampycloud/core.py index 3d2eaa3..2d3b430 100644 --- a/src/ampycloud/core.py +++ b/src/ampycloud/core.py @@ -131,7 +131,7 @@ def set_prms(pth: Union[str, Path]) -> None: @log_func_call(logger) -def reset_prms(which: Union[str, list] = None) -> None: +def reset_prms(which: Union[str, list, None] = None) -> None: """ Reset the ampycloud dynamic=scientific parameters to their default values. Args: diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 644b1a3..c29d737 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -14,7 +14,6 @@ import copy from abc import ABC, abstractmethod import numpy as np -import numpy.typing as npt import pandas as pd # Import from this package @@ -275,7 +274,7 @@ def max_hits_per_layer(self) -> int: def _calculate_base_height_for_selection( self, - data_indexer: pd.Series(dtype=bool), + data_indexer # type: pd.Series[bool] ) -> float: """Calculate the cloud base height for a selection of data. @@ -474,7 +473,7 @@ def _add_sligrolay_information( self, which: str, pdf: pd.DataFrame, - cluster_ids: npt.ArrayLike + cluster_ids: np.ndarray ) -> pd.DataFrame: """Add statistical properties to slices/ groups/ layers . diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index ecc151d..4b8104d 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -142,7 +142,7 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta', def ncomp_from_gmm(vals: np.ndarray, ncomp_max: int = 3, min_sep: Union[int, float] = 0, - layer_base_params: dict[str, int] = None, + layer_base_params: Union[dict[str, int], None] = None, scores: str = 'BIC', rescale_0_to_x: Union[float, None] = None, random_seed: int = 42, From 9b28aec9d5dc87b768c6ceab05603422069b6e6c Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Mon, 22 Apr 2024 15:13:52 +0200 Subject: [PATCH 26/51] AMAROC-765 fix conf.py --- docs/source/conf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 276f9e8..e876ebd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,6 +13,7 @@ # from pathlib import Path +from typing import Any # Define file_absolute_path = Path(__file__).absolute() @@ -28,7 +29,7 @@ # -- Project information ----------------------------------------------------- project = 'ampycloud' -copyright = '2021-2023, MeteoSwiss' +copyright = '2021-2024, MeteoSwiss' author = 'Frédéric P.A. Vogt' version = vers @@ -63,7 +64,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns: list[Any] = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' From 65e9ce0d5218a085adc90f0e8b2d61866e5c6f91 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Mon, 22 Apr 2024 16:21:17 +0200 Subject: [PATCH 27/51] AMAROC-765 fix np.array in mocker.py --- src/ampycloud/utils/mocker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 293679d..6f694d3 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -26,7 +26,7 @@ @log_func_call(logger) -def flat_layer(dts: np.array, height: float, height_std: float, +def flat_layer(dts: np.ndarray, height: float, height_std: float, sky_cov_frac: float) -> DataFrame: """ Generates a mock, flat, Gaussian cloud layer around a given height. @@ -65,13 +65,13 @@ def flat_layer(dts: np.array, height: float, height_std: float, @log_func_call(logger) -def sin_layer(dts: np.array, height: float, height_std: float, sky_cov_frac: float, +def sin_layer(dts: np.ndarray, height: float, height_std: float, sky_cov_frac: float, period: Union[int, float], amplitude: Union[int, float]) -> DataFrame: """ Generates a sinusoidal cloud layer. Args: dts (np.array of float): time deltas, in s, for the simulated ceilometer hits. - height (float): layer mean heaight, in ft above aerodrome level (aal). + height (float): layer mean height, in ft above aerodrome level (aal). height_std (float): layer height standard deviation, in ft. sky_cov_frac (float, optional): Sky coverage fraction. Random hits will be set to NaN to reach this value. Must be 0 <= x <= 1. From 9bef3f88692e3024fa2cf8743d118a3ee445d896 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Tue, 23 Apr 2024 10:57:20 +0200 Subject: [PATCH 28/51] AMAROC-765 review suggestions applied --- CHANGELOG | 1 + src/ampycloud/cluster.py | 2 +- src/ampycloud/data.py | 2 +- src/ampycloud/scaler.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c7cd5c4..2ae49d3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. - [fpavogt, 2024-03-26] Add option to reset only a single parameter. ### Fixed: + - [srethore, 2024-04-23] Fix static types issues. - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. ### Changed: diff --git a/src/ampycloud/cluster.py b/src/ampycloud/cluster.py index 36ad336..221e2fa 100644 --- a/src/ampycloud/cluster.py +++ b/src/ampycloud/cluster.py @@ -10,7 +10,7 @@ # Import from Python import logging -from typing import (Optional, Union) +from typing import Optional, Union import numpy as np from sklearn.cluster import AgglomerativeClustering diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index c29d737..d1670b8 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -526,7 +526,7 @@ def _calculate_sligrolay_base_height( pd.DataFrame: with calculatio results in column height_base """ - for ind, cid in enumerate(cluster_ids): + for ind, cid in enumerate(cluster_ids): # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid # Compute the base height diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index d0521ed..65325d8 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -276,7 +276,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs) -> dict: @log_func_call(logger) -def apply_scaling(vals: np.ndarray, fct: Union[str, None] = None, **kwargs: dict) -> np.ndarray: +def apply_scaling(vals: np.ndarray, fct: Union[str, None] = None, **kwargs) -> np.ndarray: """ Umbrella scaling routine, that gathers all the individual ones under a single entry point. Args: From 7f002fa2e922e6da266b46d01352508a6d59a8f4 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Tue, 23 Apr 2024 13:58:21 +0200 Subject: [PATCH 29/51] AMAROC-765 temporary skip test macos python 3.9 --- .github/workflows/CI_pytest.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI_pytest.yml b/.github/workflows/CI_pytest.yml index 19a779f..4176507 100644 --- a/.github/workflows/CI_pytest.yml +++ b/.github/workflows/CI_pytest.yml @@ -45,22 +45,25 @@ jobs: echo $CONDA - name: Check the setup (II) - if: matrix.os != 'windows-latest' + if: matrix.os != 'windows-latest' || (matrix.os != 'macos-latest' && matrix.python-version != '3.9') run: | which python which conda # Install all the dependencies we require - name: Install dependencies + if: matrix.os != 'macos-latest' && matrix.python-version != '3.9' run: | python -m pip install --upgrade pip # Here, let's install our module to make sure all the dependencies specified in setup.py are # also installed - name: Install our module + if: matrix.os != 'macos-latest' && matrix.python-version != '3.9' run: pip install -e .[dev] # Read to run all the tests ! - name: Run pytest + if: matrix.os != 'macos-latest' && matrix.python-version != '3.9' run: | pytest -s From c1ad8317634be715dba9ada78b50bc27510fc55e Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Tue, 23 Apr 2024 14:00:55 +0200 Subject: [PATCH 30/51] AMAROC-765 temporary skip test macos python 3.9 --- .github/workflows/CI_pytest.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI_pytest.yml b/.github/workflows/CI_pytest.yml index 4176507..865c062 100644 --- a/.github/workflows/CI_pytest.yml +++ b/.github/workflows/CI_pytest.yml @@ -21,7 +21,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] steps: # Checkout the repository @@ -45,25 +45,22 @@ jobs: echo $CONDA - name: Check the setup (II) - if: matrix.os != 'windows-latest' || (matrix.os != 'macos-latest' && matrix.python-version != '3.9') + if: matrix.os != 'windows-latest' run: | which python which conda # Install all the dependencies we require - name: Install dependencies - if: matrix.os != 'macos-latest' && matrix.python-version != '3.9' run: | python -m pip install --upgrade pip # Here, let's install our module to make sure all the dependencies specified in setup.py are # also installed - name: Install our module - if: matrix.os != 'macos-latest' && matrix.python-version != '3.9' run: pip install -e .[dev] # Read to run all the tests ! - name: Run pytest - if: matrix.os != 'macos-latest' && matrix.python-version != '3.9' run: | pytest -s From 0e1405515da9bd6c093d75f445ac0c82a341ccce Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Tue, 23 Apr 2024 15:44:19 +0200 Subject: [PATCH 31/51] AMAROC-765 temporary skip test macos python 3.9 --- .github/workflows/CI_pytest.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI_pytest.yml b/.github/workflows/CI_pytest.yml index 865c062..a141385 100644 --- a/.github/workflows/CI_pytest.yml +++ b/.github/workflows/CI_pytest.yml @@ -21,7 +21,11 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.10', '3.11'] + python-version: ['3.9','3.10', '3.11'] + exclude: + # excludes python 3.9 on macOS-latest + - os: macos-latest + python-version: '3.9' steps: # Checkout the repository From c33405fbf3a936e377c8e9740040f3a247cf5388 Mon Sep 17 00:00:00 2001 From: Sophie Rethore Date: Tue, 23 Apr 2024 16:21:04 +0200 Subject: [PATCH 32/51] AMAROC-765 eclude conda --- .github/workflows/CI_pytest.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI_pytest.yml b/.github/workflows/CI_pytest.yml index a141385..41d8010 100644 --- a/.github/workflows/CI_pytest.yml +++ b/.github/workflows/CI_pytest.yml @@ -30,11 +30,11 @@ jobs: steps: # Checkout the repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Setup python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -46,13 +46,11 @@ jobs: if: matrix.os == 'windows-latest' run: | Get-Command python | Select-Object -ExpandProperty Definition - echo $CONDA - name: Check the setup (II) if: matrix.os != 'windows-latest' run: | which python - which conda # Install all the dependencies we require - name: Install dependencies From 3ccbd8048fae9bc6b1dcf522ce5cf2eaa9b1cec5 Mon Sep 17 00:00:00 2001 From: Dani Date: Tue, 23 Apr 2024 16:48:36 +0200 Subject: [PATCH 33/51] AMAROC-784 update setup-python in pytest workflow (#122) * AMAROC-784 update setup-python in pytest workflow * AMAROC-765 temporary skip test macos python 3.9 * AMAROC-765 temporary skip test macos python 3.9 * update python setup in gh actions * AMAROC-784 update CHANGELOG * AMAROC-765 temporary skip test macos python 3.9 * remove which conda from pytest workflow * AMAROC-784 bring back Python 3.9 to test matrix * AMAROC-784 conditional python setup in pytest workflow * AMAROC-784 update actions checkout to latest version * AMAROC-784 globally exclude Py3.9xmacOS_latest from test matrix * AMAROC-784 remove conda check for windows as well --------- Co-authored-by: Sophie Rethore --- .github/workflows/CI_check_version.yml | 4 ++-- .github/workflows/CI_docs_build_and_check.yml | 4 ++-- .github/workflows/CI_docs_build_and_publish.yml | 6 +++--- .github/workflows/CI_pylinter.yml | 4 ++-- .github/workflows/CI_pypi.yml | 4 ++-- .github/workflows/CI_pytest.yml | 9 +++++---- .github/workflows/CI_pytest_weekly.yml | 4 ++-- .github/workflows/CI_speed_check.yml | 4 ++-- CHANGELOG | 1 + 9 files changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/CI_check_version.yml b/.github/workflows/CI_check_version.yml index b6f64f6..769131c 100644 --- a/.github/workflows/CI_check_version.yml +++ b/.github/workflows/CI_check_version.yml @@ -21,10 +21,10 @@ jobs: steps: - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' diff --git a/.github/workflows/CI_docs_build_and_check.yml b/.github/workflows/CI_docs_build_and_check.yml index ae37ecf..e9ede0d 100644 --- a/.github/workflows/CI_docs_build_and_check.yml +++ b/.github/workflows/CI_docs_build_and_check.yml @@ -21,11 +21,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/CI_docs_build_and_publish.yml b/.github/workflows/CI_docs_build_and_publish.yml index e4fd15c..455c86d 100644 --- a/.github/workflows/CI_docs_build_and_publish.yml +++ b/.github/workflows/CI_docs_build_and_publish.yml @@ -28,11 +28,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Also check out the live docs, placing it into a pseudo "build" folder for the docs - name: Checkout live docs - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: MeteoSwiss/ampycloud ref: gh-pages @@ -51,7 +51,7 @@ jobs: # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/CI_pylinter.yml b/.github/workflows/CI_pylinter.yml index 0895c40..1496586 100644 --- a/.github/workflows/CI_pylinter.yml +++ b/.github/workflows/CI_pylinter.yml @@ -23,11 +23,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/CI_pypi.yml b/.github/workflows/CI_pypi.yml index fed29bc..d562031 100644 --- a/.github/workflows/CI_pypi.yml +++ b/.github/workflows/CI_pypi.yml @@ -19,10 +19,10 @@ jobs: steps: - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' diff --git a/.github/workflows/CI_pytest.yml b/.github/workflows/CI_pytest.yml index 19a779f..2500482 100644 --- a/.github/workflows/CI_pytest.yml +++ b/.github/workflows/CI_pytest.yml @@ -22,15 +22,18 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ['3.9', '3.10', '3.11'] + exclude: + - os: macos-latest + python-version: '3.9' steps: # Checkout the repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Setup python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -42,13 +45,11 @@ jobs: if: matrix.os == 'windows-latest' run: | Get-Command python | Select-Object -ExpandProperty Definition - echo $CONDA - name: Check the setup (II) if: matrix.os != 'windows-latest' run: | which python - which conda # Install all the dependencies we require - name: Install dependencies diff --git a/.github/workflows/CI_pytest_weekly.yml b/.github/workflows/CI_pytest_weekly.yml index 0ef622f..35ae8cc 100644 --- a/.github/workflows/CI_pytest_weekly.yml +++ b/.github/workflows/CI_pytest_weekly.yml @@ -26,14 +26,14 @@ jobs: # The idea for this Action is to spot issues with new dependency versions as soon as they # are released (and not when we decide to update ampycloud). - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: MeteoSwiss/ampycloud ref: master # Setup python - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' diff --git a/.github/workflows/CI_speed_check.yml b/.github/workflows/CI_speed_check.yml index 2644294..46b7bbe 100644 --- a/.github/workflows/CI_speed_check.yml +++ b/.github/workflows/CI_speed_check.yml @@ -19,11 +19,11 @@ jobs: steps: # Checkout our repository - name: Checkout current repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Set up Python - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/CHANGELOG b/CHANGELOG index c7cd5c4..fccce1f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. - [fpavogt, 2024-03-26] Add option to reset only a single parameter. ### Fixed: + - [regDaniel, 2024-04-23] Fix deprecated github actions worklflow file - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. ### Changed: From e34426154b1e59289df78e7ba6f99297fa6a9af9 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:39:50 +0200 Subject: [PATCH 34/51] Fix #116 --- CHANGELOG | 1 + src/ampycloud/utils/utils.py | 17 +++++++++++++++++ test/ampycloud/test_data.py | 4 ++-- test/ampycloud/utils/test_utils.py | 24 ++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index fccce1f..128d1a6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [v2.0.0] ### Added: + - [fpavogt, 2024-04-16] Improve input data consistency check (fix #116). - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. - [fpavogt, 2024-03-26] Add option to reset only a single parameter. ### Fixed: diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index 371384b..f6470aa 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -150,6 +150,23 @@ def check_data_consistency(pdf: pd.DataFrame, logger.warning('Dropping the superfluous %s column from the input data.', key) data.drop((key), axis=1, inplace=True) + # Check for any duplicated entry, which would make no sense. + if (duplic := data.duplicated()).any(): + raise AmpycloudError('Duplicated hits in the input data:\n' + f'{data[duplic].to_string(index=False)}') + + # Check for inconsistencies + # 1 - A non-detection should not be coincident with a detection + # 2 - A VV hit should not be coincident with a hit or a non-detection + for hit_type in [0, -1]: + nodets = data[data['type'] == hit_type][['dt', 'ceilo']] + dets = data[data['type'] != hit_type][['dt', 'ceilo']] + merged = dets.merge(nodets, how='inner', on=['dt', 'ceilo']) + if len(merged) > 0: + raise AmpycloudError('Inconsistent input data ' + f'(simultaneous type {hit_type} and !{hit_type}):\n' + f'{merged.to_string(index=False)}') + # A brief sanity check of the heights. We do not issue Errors, since the code can cope # with those elements: we simply raise Warnings. msgs = [] diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index ee8d598..204b3de 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -200,6 +200,7 @@ def test_group_separation( reset_prms() + def test_bad_layer_sep_lims(): """ Test that giving problematic layer separation limits does raise an error. """ @@ -321,8 +322,7 @@ def test_layering_singlepts(): mock_data = pd.DataFrame(np.array([['dummy', -1, 2300, 1], ['dummy', -1, 4000, 2], - ['dummy', -1, 4500, 3], - ['dummy', -1, np.nan, 0]]), + ['dummy', -1, 4500, 3]]), columns=['ceilo', 'dt', 'height', 'type']) # Set the proper column types diff --git a/test/ampycloud/utils/test_utils.py b/test/ampycloud/utils/test_utils.py index ea67c9e..8508698 100644 --- a/test/ampycloud/utils/test_utils.py +++ b/test/ampycloud/utils/test_utils.py @@ -35,6 +35,7 @@ def test_check_data_consistency(): assert np.all(out == canonical_demo_data()) # Now, let's check specific elements that should raise errors or warnings + ### ERRORS ### with raises(AmpycloudError): # Empty DataFrame data = pd.DataFrame(columns=['ceilo', 'dt', 'height', 'type']) @@ -45,6 +46,29 @@ def test_check_data_consistency(): for col in ['ceilo', 'height', 'type']: data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) check_data_consistency(data) + with raises(AmpycloudError): + # Duplicated hit + data = pd.DataFrame(np.array([['a', 0., 1, 1], ['a', 0., 1, 1]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + with raises(AmpycloudError): + # Inconsistent hits - type 0 vs type !0 + data = pd.DataFrame(np.array([['a', 0, 1, 1], ['a', 0, np.nan, 0]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + with raises(AmpycloudError): + # Inconsistent vv hits - it must be either a VV hit, either a hit, but not both. + data = pd.DataFrame(np.array([['a', 0, 1, -1], ['a', 0, 2, 1]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + + ### WARNINGS ### with warns(AmpycloudWarning): # Bad data type data = pd.DataFrame(np.array([['a', 0, 1, 1]]), columns=['ceilo', 'dt', 'height', 'type']) From 903bc724dcc60247dfe36eefde5f9c0130f3546e Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:50:22 +0200 Subject: [PATCH 35/51] Update docs --- src/ampycloud/utils/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index f6470aa..2d61008 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -96,6 +96,15 @@ def check_data_consistency(pdf: pd.DataFrame, * ``pdf`` is not a :py:class:`pandas.DataFrame`. * ``pdf`` is missing a required column. * ``pdf`` has a length of 0. + * ``pdf`` has duplicated rows. + * any time step for any ceilometer corresponds to both a type 0 (no hit) and not 0 (some + hit) + * any time step for any ceilometer corresponds to both a type -1 (VV hit) and not -1 (some + hit/no hit) + + The latter check implies that ampycloud cannot be fed a VV hit in parallel to a cloud base hit. + Should a specific ceilometer return VV hits in parallel to cloud base hits, it is up to the user + to decide whether to feed one or the other. In addition, this will raise an :py:class:`ampycloud.errors.AmpycloudWarning` if: From 7bd4b28f66c1c726fbd8888191080c94f9f05f6b Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:39:20 +0200 Subject: [PATCH 36/51] Add additional test case following review --- test/ampycloud/utils/test_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/ampycloud/utils/test_utils.py b/test/ampycloud/utils/test_utils.py index 8508698..40569e2 100644 --- a/test/ampycloud/utils/test_utils.py +++ b/test/ampycloud/utils/test_utils.py @@ -61,13 +61,20 @@ def test_check_data_consistency(): data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) check_data_consistency(data) with raises(AmpycloudError): - # Inconsistent vv hits - it must be either a VV hit, either a hit, but not both. + # Inconsistent vv hits - it must be either a VV hit, or a hit, but not both. data = pd.DataFrame(np.array([['a', 0, 1, -1], ['a', 0, 2, 1]]), columns=['ceilo', 'dt', 'height', 'type']) for col in ['ceilo', 'dt', 'height', 'type']: data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) check_data_consistency(data) + # The following should NOT raise an error, i.e. two simultaneous hits from *distinct* parameters + data = pd.DataFrame(np.array([['a', 0, 1, -1], ['b', 0, np.nan, 0]]), + columns=['ceilo', 'dt', 'height', 'type']) + for col in ['ceilo', 'dt', 'height', 'type']: + data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col]) + check_data_consistency(data) + ### WARNINGS ### with warns(AmpycloudWarning): # Bad data type From c4d72409a63dc2fa049c17bbddb91d2182747643 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Wed, 17 Apr 2024 19:18:32 +0200 Subject: [PATCH 37/51] Fix #119 --- CHANGELOG | 4 ++++ src/ampycloud/layer.py | 23 +++++++++++++++++------ src/ampycloud/utils/utils.py | 4 ++-- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 128d1a6..3594c0d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,7 +9,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. - [fpavogt, 2024-03-26] Add option to reset only a single parameter. ### Fixed: +<<<<<<< HEAD - [regDaniel, 2024-04-23] Fix deprecated github actions worklflow file +======= + - [fpavogt, 2024-04-17] Fix #119 +>>>>>>> Fix #119 - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. ### Changed: diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index 465e29c..16f05e3 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -221,9 +221,10 @@ def ncomp_from_gmm(vals: np.ndarray, models = {} # Run the Gaussian Mixture fit for all cases ... should we do anything more fancy here ? - with utils.tmp_seed(random_seed): - for n_val in ncomp: - models[n_val] = GaussianMixture(n_val, covariance_type='spherical').fit(vals) + #with utils.tmp_seed(random_seed): + for n_val in ncomp: + models[n_val] = GaussianMixture(n_val, covariance_type='spherical', + random_state=random_seed).fit(vals) # Extract the AICS and BICS scores if scores == 'AIC': @@ -233,14 +234,24 @@ def ncomp_from_gmm(vals: np.ndarray, else: raise AmpycloudError(f'Unknown scores: {scores}') + # Fix #119: at times, not all sub-layers may be populated by GaussianMixture. + # To avoid problems down the line, we shall boost the abics score of such cases to make sure + # they are NOT taken as the best model + for (n_id, n_val) in enumerate(ncomp): + if (n_eff := len(np.unique(models[n_val].predict(vals)))) < n_val: + logger.warning(' %i out of %i sub-layers populated by GaussianMixture(%i) - see #119. ' + 'Ruling out %i components as a possibility.', + n_eff, n_val, n_val, n_val) + abics[n_id] = max(abics) + 1 # The larger the abics score, the worst the fit. + # Get the interesting information out best_model_ind = best_gmm(abics, **kwargs) best_ncomp = ncomp[best_model_ind] best_ids = models[ncomp[best_model_ind]].predict(vals) - logger.debug('%s scores: %s', scores, abics) - logger.debug('best_model_ind (raw): %i', best_model_ind) - logger.debug('best_ncomp (raw): %i', best_ncomp) + logger.debug(' %s scores: %s', scores, abics) + logger.debug(' best_model_ind (raw): %i', best_model_ind) + logger.debug(' best_ncomp (raw): %i', best_ncomp) # If I found only one component, I can stop here if best_ncomp == 1: diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index 2d61008..c90f9b9 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -292,7 +292,7 @@ def calc_base_height(vals: npt.ArrayLike, n_latest_elements = vals[- int(len(vals) * lookback_perc / 100):] if len(n_latest_elements) == 0: raise AmpycloudError( - 'Cloud base calculation got an empty array.' - f'Maybe check lookback percentage (is set to {lookback_perc})' + 'Cloud base calculation got an empty array. ' + f'Maybe check lookback percentage ? (currently set to {lookback_perc})' ) return np.percentile(n_latest_elements, height_perc) From 4d05ff4e74bd7bc523ad970a529a49d77e1fdc3f Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Wed, 17 Apr 2024 19:28:52 +0200 Subject: [PATCH 38/51] Minor code cleanup --- src/ampycloud/layer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index 16f05e3..0ace7ae 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -221,7 +221,6 @@ def ncomp_from_gmm(vals: np.ndarray, models = {} # Run the Gaussian Mixture fit for all cases ... should we do anything more fancy here ? - #with utils.tmp_seed(random_seed): for n_val in ncomp: models[n_val] = GaussianMixture(n_val, covariance_type='spherical', random_state=random_seed).fit(vals) From 79413f1c1fbac5c647b9829a433e8f34dd847804 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:24:11 +0200 Subject: [PATCH 39/51] Fix #123 --- CHANGELOG | 1 + src/ampycloud/data.py | 1 - src/ampycloud/fluffer.py | 11 +++++++---- src/ampycloud/prms/ampycloud_default_prms.yml | 4 +--- test/ampycloud/test_fluffer.py | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index fe59e9b..e322538 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. ### Changed: + - [fpavogt, 2024-03-25] Set the fluffiness boost to 2 (fixes #123). - [fpavogt, 2024-03-23] Changed default mock dataset to be 900s long (instead of 2400s). ## [v1.0.0] diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index d1670b8..e3005e4 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -507,7 +507,6 @@ def _add_sligrolay_information( pdf.iloc[ind, pdf.columns.get_loc('fluffiness')], _ = \ fluffer.get_fluffiness( self.data.loc[in_sligrolay, ['dt', 'height']].values, - boost=self.prms['GROUPING_PRMS']['fluffiness_boost'], **self.prms['LOWESS']) return pdf diff --git a/src/ampycloud/fluffer.py b/src/ampycloud/fluffer.py index fa12230..37e5e32 100644 --- a/src/ampycloud/fluffer.py +++ b/src/ampycloud/fluffer.py @@ -23,21 +23,24 @@ @log_func_call(logger) -def get_fluffiness(pts, boost=2, **kwargs): +def get_fluffiness(pts, **kwargs): """ Utility functions to compute the fluffiness of a set of ceilometer hits. Args: pts (ndarray): 2D array of [dt, height] ceilometer hits. None must have NaNs heights. - boost (float): the fluffiness boost factor. Defaults to 2. **kwargs (optional): additional arguments to be fed to statsmodels.nonparameteric.lowess(). Returns: float, ndarray: the fluffiness (in height units) and LOWESS-smoothed (dt, height) values (sorted). - The *fluffiness* is computed as `boost * mean(abs(y - lowess))`, where lowess is the smooth + The *fluffiness* is computed as `2 * mean(abs(y - lowess))`, where lowess is the smooth LOWESS fit to the ceilometer hits. + The factor 2 stems from the fact that abs(y-lowess) corresponds to half(-ish) the slice + thickness, that needs to be doubled in order to use the fluffiness to rescale the slice onto the + 0 - 1 range. + To avoid LOWESS warning, hits with identical x coordinates (= time steps) are being offset by a small factor (1e-5). """ @@ -69,5 +72,5 @@ def get_fluffiness(pts, boost=2, **kwargs): lowess_pts = sm.nonparametric.lowess(pts[:, 1], unique_xs, return_sorted=True, is_sorted=True, missing='none', **kwargs) - fluffiness = boost * np.mean(np.abs(pts[:, 1] - lowess_pts[:, 1])) + fluffiness = 2 * np.mean(np.abs(pts[:, 1] - lowess_pts[:, 1])) return fluffiness, lowess_pts diff --git a/src/ampycloud/prms/ampycloud_default_prms.yml b/src/ampycloud/prms/ampycloud_default_prms.yml index 7ce6661..87aae3e 100644 --- a/src/ampycloud/prms/ampycloud_default_prms.yml +++ b/src/ampycloud/prms/ampycloud_default_prms.yml @@ -78,9 +78,7 @@ GROUPING_PRMS: height_pad_perc: +10 # Time scaling factor (in s) dt_scale: 180 - # Boost factor, applied to the slice fluffiness to derive the height scaling factor - fluffiness_boost: 2 - # Range of allowed height scaling factor (in ft). The fluffiness_boost will never be allowed + # Range of allowed height scaling factor (in ft). The fluffiness will never be allowed # to push the height_scaling beyond this range. height_scale_range: [100, 500] diff --git a/test/ampycloud/test_fluffer.py b/test/ampycloud/test_fluffer.py index da8d90b..af1f627 100644 --- a/test/ampycloud/test_fluffer.py +++ b/test/ampycloud/test_fluffer.py @@ -30,7 +30,7 @@ def test_get_fluffiness(): assert out[0] == 0 assert np.array_equal(out[1][:, 1], pts[:, 1]) - # Test the LOWESS warning issued with duplicat points + # Test the LOWESS warning issued with duplicate points pts = np.array([[0, 0], [0, 1], [1, 0]]) # For some reason, pytest refuses to convert the LOWESS warning into an error ... # ... so I need to catch it a bit differently ... sigh ! From 5f433e3ec173a64fc0b4265e123be660e21e04f1 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:24:09 +0200 Subject: [PATCH 40/51] Add one rogue point to the mock example --- CHANGELOG | 1 + src/ampycloud/utils/mocker.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index e322538..9464cd3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. ### Changed: + - [fpavogt, 2024-04-28] Add a rogue point to the mock example. - [fpavogt, 2024-03-25] Set the fluffiness boost to 2 (fixes #123). - [fpavogt, 2024-03-23] Changed default mock dataset to be 900s long (instead of 2400s). diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 6f694d3..2e296fe 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -210,4 +210,8 @@ def canonical_demo_data() -> DataFrame: # Actually generate the mock data out: DataFrame = mock_layers(n_ceilos, lookback_time, hit_gap, lyrs) + # Add a rogue hit, to illustrate the fact that layers that fall below Theta_0 are not + # reported in the diagnostic diagram, but remain "counted". + out.loc[len(out)] = [-800, 3100, 1, 1] + return out From 991c626d2fe2a2c55235cd8b318ef21e2efc50fc Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:26:29 +0200 Subject: [PATCH 41/51] Correct col type --- src/ampycloud/utils/mocker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 2e296fe..52e0ae0 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -214,4 +214,8 @@ def canonical_demo_data() -> DataFrame: # reported in the diagnostic diagram, but remain "counted". out.loc[len(out)] = [-800, 3100, 1, 1] + # Fix the dtypes + for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): + out[col] = out[col].astype(tpe) + return out From 9aafcb890e43bf334582d12fc02724507294b277 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Sun, 28 Apr 2024 16:51:33 +0200 Subject: [PATCH 42/51] Swap instead of add rogue hit --- src/ampycloud/utils/mocker.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ampycloud/utils/mocker.py b/src/ampycloud/utils/mocker.py index 52e0ae0..10f0ba1 100644 --- a/src/ampycloud/utils/mocker.py +++ b/src/ampycloud/utils/mocker.py @@ -212,10 +212,8 @@ def canonical_demo_data() -> DataFrame: # Add a rogue hit, to illustrate the fact that layers that fall below Theta_0 are not # reported in the diagnostic diagram, but remain "counted". - out.loc[len(out)] = [-800, 3100, 1, 1] - - # Fix the dtypes - for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): - out[col] = out[col].astype(tpe) + # To avoid confusing users, we will replace an existing hit: pick one closest to -800s. + swap_id = (abs(out[(out['ceilo'] == '1')*(out['type'] == 2)]['dt']-800)).argmin() + out.at[swap_id, 'height'] = 3100.0 return out From 0674d3e5c366ac42774304ccea87ffb509216ee3 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 25 Jun 2024 20:08:04 +0200 Subject: [PATCH 43/51] Add support for numpy >2.0 --- CHANGELOG | 1 + src/ampycloud/data.py | 2 +- src/ampycloud/scaler.py | 4 ++-- src/ampycloud/utils/utils.py | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 9464cd3..d4c16c8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - [regDaniel, 2024-04-09] Minor adaptions to better comply with PEP standards. - [fpavogt, 2024-03-26] Use "height" in "ft aal" throughout. ### Changed: + - [fpavogt, 2024-06-25] Add support for numpy >2.0 - [fpavogt, 2024-04-28] Add a rogue point to the mock example. - [fpavogt, 2024-03-25] Set the fluffiness boost to 2 (fixes #123). - [fpavogt, 2024-03-23] Changed default mock dataset to be 900s long (instead of 2400s). diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index e3005e4..7c01f54 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -1047,7 +1047,7 @@ def metar_msg(self, which: str = 'layers') -> str: # Deal with the MSA: set it to infinity if None was specified if self.msa is None: - msa_val = np.infty + msa_val = np.inf else: msa_val = self.msa diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index 65325d8..3cdf27a 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -154,11 +154,11 @@ def step_scale(vals: np.ndarray, offsets = [0] + steps # Get the bin edges for the scale mode - edges_in = [-np.infty] + steps + [np.infty] + edges_in = [-np.inf] + steps + [np.inf] # Idem for the descale mode ... this is more complex because of the continuity requirement edges_out = [steps[0]/scales[0] + np.sum((np.diff(steps)/scales[1:-1])[:ind]) for ind in range(len(steps))] - edges_out = [-np.infty] + edges_out + [np.infty] + edges_out = [-np.inf] + edges_out + [np.inf] # Prepare the output out = np.full_like(vals, np.nan, dtype=float) diff --git a/src/ampycloud/utils/utils.py b/src/ampycloud/utils/utils.py index db20d01..331de0e 100644 --- a/src/ampycloud/utils/utils.py +++ b/src/ampycloud/utils/utils.py @@ -185,10 +185,10 @@ def check_data_consistency(pdf: pd.DataFrame, msgs += ['Some type=0 hits have non-NaNs height values ?!'] if np.any(np.isnan(data.loc[data.type == 1, 'height'])): msgs += ['Some type=1 hits have NaNs height values ?!'] - if not np.all(np.in1d(data.loc[data.type == 2, 'dt'].values, + if not np.all(np.isin(data.loc[data.type == 2, 'dt'].values, data.loc[data.type == 1, 'dt'].values)): msgs += ['Some type=2 hits have no coincident type=1 hits ?!'] - if not np.all(np.in1d(data.loc[data.type == 3, 'dt'].values, + if not np.all(np.isin(data.loc[data.type == 3, 'dt'].values, data.loc[data.type == 2, 'dt'].values)): msgs += ['Some type=3 hits have no coincident type=2 hits ?!'] From 8a2b05b3b24a02fcd02ece341b2c851f15d16ff4 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 25 Jun 2024 21:58:55 +0200 Subject: [PATCH 44/51] Update docs --- docs/source/index.rst | 7 +++++-- docs/source/running.rst | 4 ++-- docs/source/scope.rst | 10 +++++----- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 4c68246..83d7204 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,10 +18,13 @@ Welcome to the ampycloud documentation -------------------------------------- * **What:** ampycloud refers to both a **Python package** and the **algorithm** at its core, designed - to characterize cloud layers (i.e. height and sky coverage fraction) using ceilometer measurements - (i.e. automatic cloud base *hits*), and derive a corresponding METAR-like message. + to characterize cloud layers (i.e. sky coverage fraction and base height) using ceilometer measurements + (i.e. cloud base *hits*), and derive a corresponding METAR-like message. A visual illustration of the algorithm is visible in :numref:`fig-demo`. + .. note:: + At the moment, ampycloud cannot use backscatter profiles to derive cloud base hits independantly. + * **Where:** ampycloud lives in `a dedicated repository `_ under the `MeteoSwiss organization `_ on Github, where you can submit all your `questions `_ and diff --git a/docs/source/running.rst b/docs/source/running.rst index e2cf8a8..6846be9 100644 --- a/docs/source/running.rst +++ b/docs/source/running.rst @@ -36,8 +36,8 @@ A no-words example for those that want to get started quickly The input data -------------- -The ampycloud algorithm is meant to process cloud base *hits* from ceilometer observations. A given -set of hits to be processed by the ampycloud package must be stored inside a +The ampycloud algorithm is meant to process cloud base *hits* derived from ceilometer observations. +A given set of hits to be processed by the ampycloud package must be stored inside a :py:class:`pandas.DataFrame` with a specific set of characteristics outlined below. Users can use the following utility function to check whether a given :py:class:`pandas.DataFrame` meets all the requirements of ampycloud. diff --git a/docs/source/scope.rst b/docs/source/scope.rst index 5825706..76e80c0 100644 --- a/docs/source/scope.rst +++ b/docs/source/scope.rst @@ -3,11 +3,11 @@ which requires all its dependencies to be robust and stable. This has the following implications for ampycloud: - * The scope of ampycloud will remain limited to the **automatic processing of ceilometer - hits**. In particular, ampycloud does not process Vertical Visibility (VV) measurements. - Depending on the ceilometer type, the user will need to decide how to treat VV hits *before* - passing them to ampycloud, e.g. by removing them or by converting them to cloud base - heights. + * The scope of ampycloud will remain limited to the **automatic processing of cloud base + hits derived using ceilometers**. Furthermore, ampycloud does not process + Vertical Visibility (VV) measurements. Depending on the ceilometer type, the user will need + to decide how to treat VV hits *before* passing them to ampycloud, e.g. by removing them or + by converting them to cloud base heights. * Note that regulation says that "if there are no clouds of operational significance and no restriction on vertical visibility and the abbreviation 'CAVOK' is not From 8c3c3a2280ed7d0180fee7b037de4255365209f5 Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Tue, 25 Jun 2024 22:05:04 +0200 Subject: [PATCH 45/51] Update docs --- docs/source/index.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 83d7204..b6d7c06 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -23,7 +23,13 @@ Welcome to the ampycloud documentation A visual illustration of the algorithm is visible in :numref:`fig-demo`. .. note:: - At the moment, ampycloud cannot use backscatter profiles to derive cloud base hits independantly. + At the moment, ampycloud **cannot** use backscatter profiles to derive cloud base hits independantly. + + .. note:: + ampycloud does not challenge the quality/nature of the cloud base hits that it is being provided. + It trusts them all fully and equally. The capacity of the algorithm to provide an accurate + assessment of cloud layers above an aerodrome is thus directly limited by the ability of + ceilometers to report clouds up to the aerodrome's Minimum Sector Altitude in the first place. * **Where:** ampycloud lives in `a dedicated repository `_ under the `MeteoSwiss organization `_ on Github, where you can From 3bf417ca749f5e7ea5a3068abaec500288f327cf Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Wed, 26 Jun 2024 10:36:29 +0200 Subject: [PATCH 46/51] Allow to trigger the weekly tests manually --- .github/workflows/CI_pytest_weekly.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/CI_pytest_weekly.yml b/.github/workflows/CI_pytest_weekly.yml index 35ae8cc..ed1ef5a 100644 --- a/.github/workflows/CI_pytest_weekly.yml +++ b/.github/workflows/CI_pytest_weekly.yml @@ -10,6 +10,7 @@ name: CI_pytest_weekly on: + workflow_dispatch: # Allow to trigger the weekly tests manually schedule: - cron: '43 21 * * 1,4' # Run at 21:43 UTC every Monday and Thursday From 9e52f4d3911745f6a01b29c1edef9fc7177de2fb Mon Sep 17 00:00:00 2001 From: 13974615+fpavogt <13974615+fpavogt@users.noreply.github.com> Date: Wed, 26 Jun 2024 10:38:16 +0200 Subject: [PATCH 47/51] Update CHANGELOG --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index d4c16c8..1bc0137 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [v2.0.0] ### Added: + - [fpavogt, 2024-06-26] Add ability to trigger weekly tests manually. - [fpavogt, 2024-04-16] Improve input data consistency check (fix #116). - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message. - [fpavogt, 2024-03-26] Add option to reset only a single parameter. From 5843bb79fbe9bcc9f6ed78ef1606fe5c8e05dcbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=2EA=2E=20Vogt?= <13974615+fpavogt@users.noreply.github.com> Date: Wed, 26 Jun 2024 16:34:28 +0200 Subject: [PATCH 48/51] Fix manual trigger of automated tests (#131) * Fix manual trigger --- .github/workflows/CI_pytest_weekly.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI_pytest_weekly.yml b/.github/workflows/CI_pytest_weekly.yml index ed1ef5a..421d69d 100644 --- a/.github/workflows/CI_pytest_weekly.yml +++ b/.github/workflows/CI_pytest_weekly.yml @@ -23,15 +23,23 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - # Checkout the master branch from the repository + # Checkout the master branch if this is a scheduled test # The idea for this Action is to spot issues with new dependency versions as soon as they # are released (and not when we decide to update ampycloud). - - name: Checkout current repository + - name: Checkout current repository (master branch) + if: ${{ github.event_name == 'schedule' }} uses: actions/checkout@v4 with: repository: MeteoSwiss/ampycloud ref: master + # Alternatively, checkout whichever branch was selected by the user upon the manual trigger. + - name: Checkout current repository (custom branch) + if: ${{ github.event_name == 'workflow_dispatch' }} + uses: actions/checkout@v4 + with: + repository: MeteoSwiss/ampycloud + # Setup python - name: Set up Python uses: actions/setup-python@v5 From 4d9ca2784852efc0be919688d238f2d5400a2c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=2EA=2E=20Vogt?= <13974615+fpavogt@users.noreply.github.com> Date: Fri, 28 Jun 2024 09:56:03 +0200 Subject: [PATCH 49/51] Update CONTRIBUTING.md Minor clean-up of the Contributing guidelines. --- CONTRIBUTING.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 00b3f55..c1c6f69 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,15 +2,15 @@ If you: -* :boom: want to **report a bug** with ampycloud: [jump here.](https://github.com/MeteoSwiss/ampycloud/issues) -* :question: have a **question** about ampycloud: [jump here instead.](https://github.com/MeteoSwiss/ampycloud/discussions) -* :construction_worker: want to **contribute** to ampycloud (:heart_eyes: :tada:): read on ! +* :boom: want to **report a bug** with ampycloud: [jump here.](https://github.com/MeteoSwiss/ampycloud/issues) +* :question: have a **question** about ampycloud: [jump here instead.](https://github.com/MeteoSwiss/ampycloud/discussions) +* :construction_worker: want to **contribute** to ampycloud, read on ! ## Table of contents - [Code of conduct](#code-of-conduct) -- [Scope of ampycloud](#scope-of-ampycloud) +- [Scope](#scope) - [Essential things to know about ampycloud for dev work](#essential-things-to-know-about-ampycloud-for-dev-work) - [Branching model](#branching-model) - [Installing from source](#installing-from-source) @@ -31,10 +31,9 @@ If you: ## Code of conduct This project and everyone participating in it is governed by the [ampycloud Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. -Please report unacceptable behavior to [loris.foresti@meteoswiss.ch](mailto:loris.foresti@meteoswiss.ch) and/or [frederic.vogt@meteoswiss.ch](mailto:frederic.vogt@meteoswiss.ch). +Please report unacceptable behavior to [loris.foresti@meteoswiss.ch](mailto:loris.foresti@meteoswiss.ch). - -## Scope of ampycloud +## Scope Please be sure to read (and understand the implications) of the [scope of ampycloud](https://meteoswiss.github.io/ampycloud/index.html#scope-of-ampycloud). @@ -86,9 +85,11 @@ Automated CI/CD checks are triggered upon Pull Requests being issued towards the * automatic publication of the Sphinx docs (for a PR to `master` only) * check that the code version was incremented (for PR to `master` only) -To test the code with the latest Python developments, a `pytest-weeklz` workflow runs the +To test the latest release of the code with the latest Python developments, a `pytest-weekly` workflow runs the ampycloud tests twice a week using the latest version of Python and of the ampycloud dependencies. +:warning: This test is being run from the `master` branch. Pushing a bug fix to `develop` will not be sufficient to make it turn green - a new code release is necessary ! + There is another Github action responsible for publishing the code onto pypi, that gets triggered upon a new release or pre-release being published. See the ampycloud [release mechanisms](#release-mechanisms) for details. @@ -331,7 +332,7 @@ create a new release of ampycloud: ## Less-Essential things to know about ampycloud for dev work ### Updating the copyright years -The ampycloud copyright years may need to be updated if the development goes on beyond 2022. If so, +The ampycloud copyright years may need to be updated if the development goes on beyond 2022 (which it already has 😉). If so, the copyright years will need to be manually updated in the following locations: * `docs/source/substitutions.rst` (the copyright tag) From cb70e14c810b664d979bd03abb9f33fbc28df2bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=2EA=2E=20Vogt?= <13974615+fpavogt@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:04:55 +0200 Subject: [PATCH 50/51] Update CONTRIBUTING.md --- CONTRIBUTING.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1c6f69..92facbb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,7 +23,7 @@ If you: - [Documentation](#documentation) - [Testing](#testing) - [Plotting](#plotting) - - [Release mechanisms](#release-mechanisms) + - [Release mechanism](#release-mechanism) - [Less-Essential things to know about ampycloud for dev work](#less-essential-things-to-know-about-ampycloud-for-dev-work) - [Updating the copyright years](#updating-the-copyright-years) @@ -294,7 +294,7 @@ def some_plot_function(...): With this decorator, all functions will automatically deploy the effects associated to the value of `dynamic.AMPYCLOUD_PRMS['MPL_STYLE']` which can take one of the following values: `['base', 'latex', 'metsymb']`. -### Release mechanisms +### Release mechanism When changes merged in the `develop` branch are stable and deemed *worthy*, follow these steps to create a new release of ampycloud: @@ -326,8 +326,9 @@ create a new release of ampycloud: - on the [release page](https://github.com/MeteoSwiss/ampycloud/releases), - in the [README](https://github.com/MeteoSwiss/ampycloud/blob/develop/README.md) tags, - on [testpypi](https://test.pypi.org/project/ampycloud/) and [pypi](https://pypi.org/project/ampycloud/), - - on the [`gh-pages` branch](https://github.com/MeteoSwiss/ampycloud/tree/gh-pages), and - - in the [live documentation](https://MeteoSwiss.github.io/ampycloud). + - on the [`gh-pages` branch](https://github.com/MeteoSwiss/ampycloud/tree/gh-pages), + - in the [live documentation](https://MeteoSwiss.github.io/ampycloud), and + - on [Zenodo](https://zenodo.org/doi/10.5281/zenodo.8399683) (for which the connection to this repo is enbaled from Zenodo itself, by the admins of the MeteoSwiss organization on Github). ## Less-Essential things to know about ampycloud for dev work From 6818b2f39a8287b6cbd99e2743056a939d1bda9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=2EA=2E=20Vogt?= <13974615+fpavogt@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:05:51 +0200 Subject: [PATCH 51/51] Update CONTRIBUTING.md --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 92facbb..1488b6f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -328,7 +328,7 @@ create a new release of ampycloud: - on [testpypi](https://test.pypi.org/project/ampycloud/) and [pypi](https://pypi.org/project/ampycloud/), - on the [`gh-pages` branch](https://github.com/MeteoSwiss/ampycloud/tree/gh-pages), - in the [live documentation](https://MeteoSwiss.github.io/ampycloud), and - - on [Zenodo](https://zenodo.org/doi/10.5281/zenodo.8399683) (for which the connection to this repo is enbaled from Zenodo itself, by the admins of the MeteoSwiss organization on Github). + - on [Zenodo](https://zenodo.org/doi/10.5281/zenodo.8399683) (for which the connection to this repo is enabled from Zenodo itself, by the admins of the MeteoSwiss organization on Github). ## Less-Essential things to know about ampycloud for dev work