From f7b71ece2ae146524c927061a6c08d00a329e3b4 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 16 Sep 2023 21:57:20 +0200 Subject: [PATCH] bibcode/ads: normalize unicode * Closes #85. --- idutils/normalizers.py | 3 +++ idutils/validators.py | 2 ++ tests/test_idutils.py | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/idutils/normalizers.py b/idutils/normalizers.py index baeb3eb..a4c8b07 100644 --- a/idutils/normalizers.py +++ b/idutils/normalizers.py @@ -13,6 +13,8 @@ """ID normalizer helper functions.""" +import unicodedata + import isbnlib from .proxies import custom_schemes_registry @@ -34,6 +36,7 @@ def normalize_handle(val): def normalize_ads(val): """Normalize an ADS bibliographic code.""" + val = unicodedata.normalize("NFKD", val) m = ads_regexp.match(val) return m.group(2) diff --git a/idutils/validators.py b/idutils/validators.py index 364279f..df6e96f 100644 --- a/idutils/validators.py +++ b/idutils/validators.py @@ -14,6 +14,7 @@ """Utility file containing ID validators.""" +import unicodedata from urllib.parse import urlparse from .utils import * @@ -187,6 +188,7 @@ def is_urn(val): def is_ads(val): """Test if argument is an ADS bibliographic code.""" + val = unicodedata.normalize("NFKD", val) return ads_regexp.match(val) diff --git a/tests/test_idutils.py b/tests/test_idutils.py index c5eeaa0..1da757c 100644 --- a/tests/test_idutils.py +++ b/tests/test_idutils.py @@ -268,6 +268,12 @@ "2017zndo....495787v", "http://ui.adsabs.harvard.edu/#abs/2017zndo....495787v", ), + ( + "1992ApJ…400L…1W", + ["ads"], + "1992ApJ...400L...1W", + "http://ui.adsabs.harvard.edu/#abs/1992ApJ...400L...1W", + ), ( "0000000218250097", ["orcid", "isni"],