From 9c119506a9f9a67929491ad79011bcb323cec39e Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Wed, 6 Apr 2022 19:04:33 -0700 Subject: [PATCH 1/5] feat: validate SDConfig.SUPPORTED_LANGUAGES for *usable* locales A locale is considered usable if it is both (a) available in the filesystem and (b) configured by the administrator in SDConfig.SUPPORTED_LANGUAGES. Once we've determined which configured locales are actually usable, we: 1. warn if a configured locale is not available; 2. fall back to the hard-coded FALLBACK_LOCALE ("en_US") if SDConfig.DEFAULT_LOCALE is not usable; and 3. error out if neither the default nor the fallback locale is usable. --- securedrop/i18n.py | 78 +++++++++++++++++++------- securedrop/sdconfig.py | 5 +- securedrop/tests/test_i18n.py | 101 ++++++++++++++++++++++++++++++---- 3 files changed, 150 insertions(+), 34 deletions(-) diff --git a/securedrop/i18n.py b/securedrop/i18n.py index 4533fac04b..b2d615861a 100644 --- a/securedrop/i18n.py +++ b/securedrop/i18n.py @@ -17,7 +17,7 @@ # import collections -from typing import Dict, List +from typing import Dict, List, Set from babel.core import ( Locale, @@ -29,7 +29,7 @@ from flask import Flask, g, request, session from flask_babel import Babel -from sdconfig import SDConfig +from sdconfig import SDConfig, FALLBACK_LOCALE class RequestLocaleInfo: @@ -126,32 +126,46 @@ def configure_babel(config: SDConfig, app: Flask) -> Babel: return babel +def parse_locale_set(codes: List[str]) -> Set[Locale]: + return {Locale.parse(code) for code in codes} + + def validate_locale_configuration(config: SDConfig, babel: Babel) -> None: """ - Ensure that the configured locales are valid and translated. + Check that configured locales are available in the filesystem and therefore usable by + Babel. Warn about configured locales that are not usable, unless we're left with + no usable default or fallback locale, in which case raise an exception. """ - if config.DEFAULT_LOCALE not in config.SUPPORTED_LOCALES: - raise ValueError( - 'The default locale "{}" is not included in the set of supported locales "{}"'.format( - config.DEFAULT_LOCALE, config.SUPPORTED_LOCALES - ) + # These locales are available and loadable from the filesystem. + available = set(babel.list_translations()) + available.add(Locale.parse(FALLBACK_LOCALE)) + + # These locales were configured via "securedrop-admin sdconfig", meaning + # they were present on the Admin Workstation at "securedrop-admin" runtime. + configured = parse_locale_set(config.SUPPORTED_LOCALES) + + # The intersection of these sets is the set of locales usable by Babel. + usable = available & configured + + missing = configured - usable + if missing: + babel.app.logger.error( + f'Configured locales {missing} are not in the set of usable locales {usable}' ) - translations = babel.list_translations() - for locale in config.SUPPORTED_LOCALES: - if locale == "en_US": - continue + defaults = parse_locale_set([config.DEFAULT_LOCALE, FALLBACK_LOCALE]) + if not defaults & usable: + raise ValueError( + f'None of the default locales {defaults} are in the set of usable locales {usable}' + ) - parsed = Locale.parse(locale) - if parsed not in translations: - raise ValueError( - 'Configured locale "{}" is not in the set of translated locales "{}"'.format( - parsed, translations - ) - ) + global USABLE_LOCALES + USABLE_LOCALES = usable +# TODO(#6420): avoid relying on and manipulating on this global state LOCALES = collections.OrderedDict() # type: collections.OrderedDict[str, RequestLocaleInfo] +USABLE_LOCALES = set() # type: Set[Locale] def map_locale_display_names(config: SDConfig) -> None: @@ -185,6 +199,28 @@ def configure(config: SDConfig, app: Flask) -> None: map_locale_display_names(config) +def resolve_fallback_locale(config: SDConfig) -> str: + """ + Return a *usable* fallback locale. Namely: + + 1. Don't fall back to the configured `DEFAULT_LOCALE` if it isn't available. + + 2. Don't fall back to the hard-coded `FALLBACK_LOCALE` (`en_US`) if it isn't configured. + + NB. If neither the default nor the fallback locale is usable, then we should have crashed + already in `validate_locale_configuration()`. + """ + + if Locale.parse(config.DEFAULT_LOCALE) in USABLE_LOCALES: + return config.DEFAULT_LOCALE + + elif Locale.parse(FALLBACK_LOCALE) in USABLE_LOCALES: + return FALLBACK_LOCALE + + else: + raise ValueError('No usable fallback locale') + + def get_locale(config: SDConfig) -> str: """ Return the best supported locale for a request. @@ -208,8 +244,8 @@ def get_locale(config: SDConfig) -> str: if not locale: locale = negotiate_locale(get_accepted_languages(), LOCALES.keys()) - # Finally, fall back to the default locale if necessary. - return locale or config.DEFAULT_LOCALE + # Finally, if we can't negotiate a requested locale, resolve a fallback. + return locale or resolve_fallback_locale(config) def get_accepted_languages() -> List[str]: diff --git a/securedrop/sdconfig.py b/securedrop/sdconfig.py index 64db5813d3..3c01b990cb 100644 --- a/securedrop/sdconfig.py +++ b/securedrop/sdconfig.py @@ -8,6 +8,9 @@ from typing import Set +FALLBACK_LOCALE = "en_US" + + class SDConfig: def __init__(self) -> None: try: @@ -120,7 +123,7 @@ def __init__(self) -> None: # Config entries used by i18n.py # Use en_US as the default locale if the key is not defined in _config self.DEFAULT_LOCALE = getattr( - _config, "DEFAULT_LOCALE", "en_US" + _config, "DEFAULT_LOCALE", FALLBACK_LOCALE, ) # type: str supported_locales = set(getattr( _config, "SUPPORTED_LOCALES", [self.DEFAULT_LOCALE] diff --git a/securedrop/tests/test_i18n.py b/securedrop/tests/test_i18n.py index 1552e619a5..1ede55eaf7 100644 --- a/securedrop/tests/test_i18n.py +++ b/securedrop/tests/test_i18n.py @@ -32,13 +32,17 @@ from flask import request from flask import session from flask_babel import gettext -from sdconfig import SDConfig +from i18n import parse_locale_set, resolve_fallback_locale +from sdconfig import FALLBACK_LOCALE, SDConfig from sh import pybabel from sh import sed from .utils.env import TESTS_DIR from werkzeug.datastructures import Headers +NEVER_LOCALE = 'eo' # Esperanto + + def verify_i18n(app): not_translated = 'code hello i18n' translated_fr = 'code bonjour' @@ -225,32 +229,105 @@ def test_i18n(journalist_app, config): verify_i18n(app) -def test_supported_locales(config): +def test_parse_locale_set(): + assert parse_locale_set([FALLBACK_LOCALE]) == set([Locale.parse(FALLBACK_LOCALE)]) + + +def test_resolve_fallback_locale(config): + """ + Only a usable default or fallback locale is returned. + """ + i18n.USABLE_LOCALES = parse_locale_set([FALLBACK_LOCALE, 'es_ES']) fake_config = SDConfig() - # Check that an invalid locale raises an error during app - # configuration. - fake_config.SUPPORTED_LOCALES = ['en_US', 'yy_ZZ'] + # The default locale is neither configured nor available. + fake_config.DEFAULT_LOCALE = NEVER_LOCALE + assert resolve_fallback_locale(fake_config) == FALLBACK_LOCALE + + # The default locale is configured but not available. + fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE, NEVER_LOCALE] + assert resolve_fallback_locale(fake_config) == FALLBACK_LOCALE + + # The default locale is available but not configured. + fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE] + fake_config.DEFAULT_LOCALE = NEVER_LOCALE + assert resolve_fallback_locale(fake_config) == FALLBACK_LOCALE + + # Happy path: a non-fallback default locale is both available and configured. + fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE, 'es_ES'] + fake_config.DEFAULT_LOCALE = 'es_ES' + assert resolve_fallback_locale(fake_config) == 'es_ES' + + +def test_no_usable_fallback_locale(journalist_app, config): + """ + The apps fail if neither the default nor the fallback locale is usable. + """ + fake_config = SDConfig() + fake_config.DEFAULT_LOCALE = NEVER_LOCALE + fake_config.SUPPORTED_LOCALES = [NEVER_LOCALE] fake_config.TRANSLATION_DIRS = Path(config.TEMP_DIR) - with pytest.raises(UnknownLocaleError): + i18n.USABLE_LOCALES = set() + with pytest.raises(ValueError, match='No usable fallback locale'): + resolve_fallback_locale(fake_config) + + with pytest.raises(ValueError, match='in the set of usable locales'): journalist_app_module.create_app(fake_config) - with pytest.raises(UnknownLocaleError): + with pytest.raises(ValueError, match='in the set of usable locales'): source_app.create_app(fake_config) - # Check that a valid but unsupported locale raises an error during - # app configuration. - fake_config.SUPPORTED_LOCALES = ['en_US', 'wae_CH'] + +def test_unusable_default_but_usable_fallback_locale(config, caplog): + """ + The apps start even if the default locale is unusable, as along as the fallback locale is + usable, but log an error for OSSEC to pick up. + """ + fake_config = SDConfig() + fake_config.DEFAULT_LOCALE = NEVER_LOCALE + fake_config.SUPPORTED_LOCALES = [NEVER_LOCALE, FALLBACK_LOCALE] + fake_config.TRANSLATION_DIRS = Path(config.TEMP_DIR) + + for app in (journalist_app_module.create_app(fake_config), + source_app.create_app(fake_config)): + with app.app_context(): + assert NEVER_LOCALE in caplog.text + assert 'not in the set of usable locales' in caplog.text + + +def test_invalid_locales(config): + """ + An invalid locale raises an error during app configuration. + """ + fake_config = SDConfig() + fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE, 'yy_ZZ'] fake_config.TRANSLATION_DIRS = Path(config.TEMP_DIR) - with pytest.raises(ValueError, match="not in the set of translated locales"): + with pytest.raises(UnknownLocaleError): journalist_app_module.create_app(fake_config) - with pytest.raises(ValueError, match="not in the set of translated locales"): + with pytest.raises(UnknownLocaleError): source_app.create_app(fake_config) +def test_valid_but_unusable_locales(config, caplog): + """ + The apps start with one or more unusable, but still valid, locales, but log an error for + OSSEC to pick up. + """ + fake_config = SDConfig() + + fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE, 'wae_CH'] + fake_config.TRANSLATION_DIRS = Path(config.TEMP_DIR) + + for app in (journalist_app_module.create_app(fake_config), + source_app.create_app(fake_config)): + with app.app_context(): + assert 'wae' in caplog.text + assert 'not in the set of usable locales' in caplog.text + + def test_language_tags(): assert i18n.RequestLocaleInfo(Locale.parse('en')).language_tag == 'en' assert i18n.RequestLocaleInfo(Locale.parse('en-US', sep="-")).language_tag == 'en-US' From 20c59fd416b64d18ad4d18be4937f1e386a42c8b Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Tue, 19 Apr 2022 16:02:17 -0700 Subject: [PATCH 2/5] feat: map_locale_display_names(): skip (hide) unusable locales We can't iterate directly over USABLE_LOCALES: Set(Locale) because Locales aren't sortable. Instead, we parse config.SUPPORTED_LOCALES in order and skip those that aren't in USABLE_LOCALES. --- securedrop/i18n.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/securedrop/i18n.py b/securedrop/i18n.py index b2d615861a..8fe04c04f5 100644 --- a/securedrop/i18n.py +++ b/securedrop/i18n.py @@ -179,11 +179,17 @@ def map_locale_display_names(config: SDConfig) -> None: """ language_locale_counts = collections.defaultdict(int) # type: Dict[str, int] for l in sorted(config.SUPPORTED_LOCALES): + if Locale.parse(l) not in USABLE_LOCALES: + continue + locale = RequestLocaleInfo(l) language_locale_counts[locale.language] += 1 locale_map = collections.OrderedDict() for l in sorted(config.SUPPORTED_LOCALES): + if Locale.parse(l) not in USABLE_LOCALES: + continue + locale = RequestLocaleInfo(l) if language_locale_counts[locale.language] > 1: locale.use_display_name = True From 9621639db7499f94da20a241780b870f31149fb8 Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Mon, 2 May 2022 16:49:47 -0700 Subject: [PATCH 3/5] refactor(map_locale_display_names): use set rather than looping twice to disambiguate SUPPORTED_LOCALES thread: https://github.com/freedomofpress/securedrop/pull/6406#discussion_r863117986 --- securedrop/i18n.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/securedrop/i18n.py b/securedrop/i18n.py index 8fe04c04f5..c8ed590995 100644 --- a/securedrop/i18n.py +++ b/securedrop/i18n.py @@ -17,7 +17,7 @@ # import collections -from typing import Dict, List, Set +from typing import List, Set from babel.core import ( Locale, @@ -177,22 +177,19 @@ def map_locale_display_names(config: SDConfig) -> None: to distinguish them. For languages with more than one translation, like Chinese, we do need the additional detail. """ - language_locale_counts = collections.defaultdict(int) # type: Dict[str, int] - for l in sorted(config.SUPPORTED_LOCALES): - if Locale.parse(l) not in USABLE_LOCALES: - continue - - locale = RequestLocaleInfo(l) - language_locale_counts[locale.language] += 1 - + seen: Set[str] = set() locale_map = collections.OrderedDict() for l in sorted(config.SUPPORTED_LOCALES): if Locale.parse(l) not in USABLE_LOCALES: continue locale = RequestLocaleInfo(l) - if language_locale_counts[locale.language] > 1: + if locale.language in seen: + # Disambiguate translations for this language. locale.use_display_name = True + else: + seen.add(locale.language) + locale_map[str(locale)] = locale global LOCALES From eb2a042f59dc353e34ca0444029d154d28b0a7d3 Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Mon, 2 May 2022 17:45:30 -0700 Subject: [PATCH 4/5] fix(get_locale): don't assume the session-provided locale is usable This is a safe assumption except in the corner case where (e.g.): 1. A user begins a session and selects language L. 2. The Application Server upgrades to a version of securedrop-app-code that removes support for language L. 3. The user resumes their session, which still has language L selected. But, since this case has come up in testing[1], best to check for it. [1]: https://github.com/freedomofpress/securedrop/pull/6406#issuecomment-1115257623 --- securedrop/i18n.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/securedrop/i18n.py b/securedrop/i18n.py index c8ed590995..e0df618a84 100644 --- a/securedrop/i18n.py +++ b/securedrop/i18n.py @@ -233,10 +233,12 @@ def get_locale(config: SDConfig) -> str: - browser suggested locale, from the Accept-Languages header - config.DEFAULT_LOCALE """ - # Default to any locale set in the session. + # Default to any usable locale set in the session. locale = session.get("locale") + if locale: + locale = negotiate_locale([locale], LOCALES.keys()) - # A valid locale specified in request.args takes precedence. + # A usable locale specified in request.args takes precedence. if request.args.get("l"): negotiated = negotiate_locale([request.args["l"]], LOCALES.keys()) if negotiated: From 20c7212ffd258fbf38a6b0c5d01272794052ed27 Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Mon, 2 May 2022 18:06:14 -0700 Subject: [PATCH 5/5] refactor(get_locale): resolve locale from single list of preferences As of 9c11950, get_locale() handled user/agent-provided locale preferences and deferred to resolve_fallback_locale() if none could be satisfied. Here we build a single list of locales, beginning with user/agent preferences and ending with the server-side {DEFAULT_FALLBACK}_LOCALEs, and call babel.core.negotiate_locale() once to negotiate one (if possible). --- securedrop/i18n.py | 51 ++++++++++------------------------- securedrop/tests/test_i18n.py | 30 +-------------------- 2 files changed, 15 insertions(+), 66 deletions(-) diff --git a/securedrop/i18n.py b/securedrop/i18n.py index e0df618a84..1169e6762a 100644 --- a/securedrop/i18n.py +++ b/securedrop/i18n.py @@ -202,28 +202,6 @@ def configure(config: SDConfig, app: Flask) -> None: map_locale_display_names(config) -def resolve_fallback_locale(config: SDConfig) -> str: - """ - Return a *usable* fallback locale. Namely: - - 1. Don't fall back to the configured `DEFAULT_LOCALE` if it isn't available. - - 2. Don't fall back to the hard-coded `FALLBACK_LOCALE` (`en_US`) if it isn't configured. - - NB. If neither the default nor the fallback locale is usable, then we should have crashed - already in `validate_locale_configuration()`. - """ - - if Locale.parse(config.DEFAULT_LOCALE) in USABLE_LOCALES: - return config.DEFAULT_LOCALE - - elif Locale.parse(FALLBACK_LOCALE) in USABLE_LOCALES: - return FALLBACK_LOCALE - - else: - raise ValueError('No usable fallback locale') - - def get_locale(config: SDConfig) -> str: """ Return the best supported locale for a request. @@ -232,25 +210,24 @@ def get_locale(config: SDConfig) -> str: - l request argument or session['locale'] - browser suggested locale, from the Accept-Languages header - config.DEFAULT_LOCALE + - config.FALLBACK_LOCALE """ - # Default to any usable locale set in the session. - locale = session.get("locale") - if locale: - locale = negotiate_locale([locale], LOCALES.keys()) - - # A usable locale specified in request.args takes precedence. + preferences = [] + if session.get("locale"): + preferences.append(session.get("locale")) if request.args.get("l"): - negotiated = negotiate_locale([request.args["l"]], LOCALES.keys()) - if negotiated: - locale = negotiated + preferences.insert(0, request.args.get("l")) + if not preferences: + preferences.extend(get_accepted_languages()) + preferences.append(config.DEFAULT_LOCALE) + preferences.append(FALLBACK_LOCALE) + + negotiated = negotiate_locale(preferences, LOCALES.keys()) - # If the locale is not in the session or request.args, negotiate - # the best supported option from the browser's accepted languages. - if not locale: - locale = negotiate_locale(get_accepted_languages(), LOCALES.keys()) + if not negotiated: + raise ValueError("No usable locale") - # Finally, if we can't negotiate a requested locale, resolve a fallback. - return locale or resolve_fallback_locale(config) + return negotiated def get_accepted_languages() -> List[str]: diff --git a/securedrop/tests/test_i18n.py b/securedrop/tests/test_i18n.py index 1ede55eaf7..8c47c15922 100644 --- a/securedrop/tests/test_i18n.py +++ b/securedrop/tests/test_i18n.py @@ -32,7 +32,7 @@ from flask import request from flask import session from flask_babel import gettext -from i18n import parse_locale_set, resolve_fallback_locale +from i18n import parse_locale_set from sdconfig import FALLBACK_LOCALE, SDConfig from sh import pybabel from sh import sed @@ -233,32 +233,6 @@ def test_parse_locale_set(): assert parse_locale_set([FALLBACK_LOCALE]) == set([Locale.parse(FALLBACK_LOCALE)]) -def test_resolve_fallback_locale(config): - """ - Only a usable default or fallback locale is returned. - """ - i18n.USABLE_LOCALES = parse_locale_set([FALLBACK_LOCALE, 'es_ES']) - fake_config = SDConfig() - - # The default locale is neither configured nor available. - fake_config.DEFAULT_LOCALE = NEVER_LOCALE - assert resolve_fallback_locale(fake_config) == FALLBACK_LOCALE - - # The default locale is configured but not available. - fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE, NEVER_LOCALE] - assert resolve_fallback_locale(fake_config) == FALLBACK_LOCALE - - # The default locale is available but not configured. - fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE] - fake_config.DEFAULT_LOCALE = NEVER_LOCALE - assert resolve_fallback_locale(fake_config) == FALLBACK_LOCALE - - # Happy path: a non-fallback default locale is both available and configured. - fake_config.SUPPORTED_LOCALES = [FALLBACK_LOCALE, 'es_ES'] - fake_config.DEFAULT_LOCALE = 'es_ES' - assert resolve_fallback_locale(fake_config) == 'es_ES' - - def test_no_usable_fallback_locale(journalist_app, config): """ The apps fail if neither the default nor the fallback locale is usable. @@ -269,8 +243,6 @@ def test_no_usable_fallback_locale(journalist_app, config): fake_config.TRANSLATION_DIRS = Path(config.TEMP_DIR) i18n.USABLE_LOCALES = set() - with pytest.raises(ValueError, match='No usable fallback locale'): - resolve_fallback_locale(fake_config) with pytest.raises(ValueError, match='in the set of usable locales'): journalist_app_module.create_app(fake_config)