From b15f4ed4acb3d42eca5fbc6cd8892f5aa676f47b Mon Sep 17 00:00:00 2001 From: Phil Elson Date: Thu, 3 May 2018 06:54:01 +0100 Subject: [PATCH] Prevent creation of invalid CF variable names. (#3009) Prevent creation of invalid CF variable names. --- ...bugfix_2018-May-03_var_name_constraint.txt | 2 ++ lib/iris/fileformats/netcdf.py | 24 +++++++++++++++ .../unit/fileformats/netcdf/test_Saver.py | 29 ++++++++++++++++++- 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 docs/iris/src/whatsnew/contributions_2.1/bugfix_2018-May-03_var_name_constraint.txt diff --git a/docs/iris/src/whatsnew/contributions_2.1/bugfix_2018-May-03_var_name_constraint.txt b/docs/iris/src/whatsnew/contributions_2.1/bugfix_2018-May-03_var_name_constraint.txt new file mode 100644 index 0000000000..4ffddcb7be --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.1/bugfix_2018-May-03_var_name_constraint.txt @@ -0,0 +1,2 @@ +* All var names being written to NetCDF are now CF compliant. Non alpha-numeric characters are replaced with '_', and must always have a leading letter. + Ref: https://github.com/SciTools/iris/pull/2930 diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 587b1e05f5..5aa3e54a11 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -1339,6 +1339,27 @@ def _get_dim_names(self, cube): dimension_names.append(dim_name) return dimension_names + @staticmethod + def cf_valid_var_name(var_name): + """ + Return a valid CF var_name given a potentially invalid name. + + Args: + + * var_name (str): + The var_name to normalise + + Returns: + A var_name suitable for passing through for variable creation. + + """ + # Replace invalid charaters with an underscore ("_"). + var_name = re.sub(r'[^a-zA-Z0-9]', "_", var_name) + # Ensure the variable name starts with a letter. + if re.match(r'^[^a-zA-Z]', var_name): + var_name = 'var_{}'.format(var_name) + return var_name + @staticmethod def _cf_coord_identity(coord): """ @@ -1448,6 +1469,7 @@ def _get_cube_variable_name(self, cube): # Convert to lower case and replace whitespace by underscores. cf_name = '_'.join(cube.name().lower().split()) + cf_name = self.cf_valid_var_name(cf_name) return cf_name def _get_coord_variable_name(self, cube, coord): @@ -1480,6 +1502,8 @@ def _get_coord_variable_name(self, cube, coord): name = 'unknown_scalar' # Convert to lower case and replace whitespace by underscores. cf_name = '_'.join(name.lower().split()) + + cf_name = self.cf_valid_var_name(cf_name) return cf_name def _create_cf_cell_measure_variable(self, cube, dimension_names, diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index 6cd5b62a3b..f4b77df15c 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2017, Met Office +# (C) British Crown Copyright 2013 - 2018, Met Office # # This file is part of Iris. # @@ -472,6 +472,33 @@ def test_masked_byte_fill_value_passed(self): pass +class Test_cf_valid_var_name(tests.IrisTest): + def test_no_replacement(self): + self.assertEqual(Saver.cf_valid_var_name('valid_Nam3'), + 'valid_Nam3') + + def test_special_chars(self): + self.assertEqual(Saver.cf_valid_var_name('inv?alid'), + 'inv_alid') + + def test_leading_underscore(self): + self.assertEqual(Saver.cf_valid_var_name('_invalid'), + 'var__invalid') + + def test_leading_number(self): + self.assertEqual(Saver.cf_valid_var_name('2invalid'), + 'var_2invalid') + + def test_leading_invalid(self): + self.assertEqual(Saver.cf_valid_var_name('?invalid'), + 'var__invalid') + + def test_no_hyphen(self): + # CF explicitly prohibits hyphen, even though it is fine in NetCDF. + self.assertEqual(Saver.cf_valid_var_name('valid-netcdf'), + 'valid_netcdf') + + class _Common__check_attribute_compliance(object): def setUp(self): self.container = mock.Mock(name='container', attributes={})