Skip to content

Commit

Permalink
Using MultiForkByKeyProvider for all datagen (#1615)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Feb 16, 2022
1 parent 68e051a commit fde100e
Show file tree
Hide file tree
Showing 15 changed files with 257 additions and 458 deletions.
2 changes: 1 addition & 1 deletion provider/cldr/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

`icu_provider_cldr` contains implementations of the [`ICU4X`] [data provider] interface
based on the JSON files shipped by CLDR. Create a [`CldrPaths`] and then pass it into
[`CldrJsonDataProvider`].
[`create_exportable_provider`].

This crate contains two implementations of [`CldrPaths`]:

Expand Down
18 changes: 0 additions & 18 deletions provider/cldr/src/cldr_paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ pub trait CldrPaths: std::fmt::Debug {
// more calendars here
vec
}

/// Path to uprops TOML data, which is required by some CLDR transformers
fn uprops(&self) -> Result<PathBuf, Error>;
}

/// An implementation of [`CldrPaths`] for multiple separate local CLDR JSON directories per
Expand Down Expand Up @@ -112,12 +109,6 @@ impl CldrPaths for CldrPathsLocal {
fn cldr_misc(&self) -> Result<PathBuf, Error> {
self.cldr_misc.clone().map_err(|e| e.into())
}
fn uprops(&self) -> Result<PathBuf, Error> {
Err(Error::Custom(
"This implementation does not know about uprops".to_owned(),
None,
))
}
}

impl Default for CldrPathsLocal {
Expand Down Expand Up @@ -151,7 +142,6 @@ impl Default for CldrPathsLocal {
/// let paths = CldrPathsAllInOne {
/// cldr_json_root: PathBuf::from("/path/to/cldr-json"),
/// locale_subset: "full".to_string(),
/// uprops_root: Some(PathBuf::from("path/to/uprops")),
/// };
///
/// assert_eq!(paths.cldr_misc().unwrap(), PathBuf::from("/path/to/cldr-json/cldr-misc-full"))
Expand All @@ -162,8 +152,6 @@ pub struct CldrPathsAllInOne {
pub cldr_json_root: PathBuf,
/// CLDR JSON directory suffix: probably either "modern" or "full"
pub locale_subset: String,
/// Path to uprops TOML root directory. Required by some CLDR transformers
pub uprops_root: Option<PathBuf>,
}

impl CldrPaths for CldrPathsAllInOne {
Expand Down Expand Up @@ -200,18 +188,12 @@ impl CldrPaths for CldrPathsAllInOne {
.clone()
.join(format!("cldr-misc-{}", self.locale_subset)))
}
fn uprops(&self) -> Result<PathBuf, Error> {
self.uprops_root
.clone()
.ok_or_else(|| Error::Custom("The uprops root has not been set".to_owned(), None))
}
}

#[cfg(test)]
pub(crate) fn for_test() -> CldrPathsAllInOne {
CldrPathsAllInOne {
cldr_json_root: icu_testdata::paths::cldr_json_root(),
locale_subset: "full".to_string(),
uprops_root: Some(icu_testdata::paths::uprops_toml_root()),
}
}
6 changes: 2 additions & 4 deletions provider/cldr/src/download/cldr_allinone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ use std::path::PathBuf;
/// let downloader = CldrAllInOneDownloader::try_new_from_github("38.1.0", "modern")
/// .expect("Cache directory not found");
///
/// let paths: Box<dyn CldrPaths> = Box::new(downloader.download(None)
/// .expect("The data should download successfully"));
/// let paths: Box<dyn CldrPaths> = Box::new(downloader.download().expect("The data should download successfully"));
/// ```
#[derive(Debug)]
pub struct CldrAllInOneDownloader {
Expand Down Expand Up @@ -65,13 +64,12 @@ impl CldrAllInOneDownloader {
})
}

pub fn download(self, uprops_root: Option<PathBuf>) -> Result<CldrPathsAllInOne, Error> {
pub fn download(self) -> Result<CldrPathsAllInOne, Error> {
// TODO(#297): Implement this async.
let downloaded = io_util::download_and_unzip(&self.url, &self.cache_dir)?;
Ok(CldrPathsAllInOne {
cldr_json_root: downloaded,
locale_subset: self.locale_subset,
uprops_root,
})
}
}
81 changes: 79 additions & 2 deletions provider/cldr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

//! `icu_provider_cldr` contains implementations of the [`ICU4X`] [data provider] interface
//! based on the JSON files shipped by CLDR. Create a [`CldrPaths`] and then pass it into
//! [`CldrJsonDataProvider`].
//! [`create_exportable_provider`].
//!
//! This crate contains two implementations of [`CldrPaths`]:
//!
Expand Down Expand Up @@ -32,4 +32,81 @@ pub use cldr_paths::CldrPaths;
pub use cldr_paths::CldrPathsAllInOne;
pub use cldr_paths::CldrPathsLocal;
pub use error::Error as CldrError;
pub use transform::CldrJsonDataProvider;

use icu_provider::fork::by_key::MultiForkByKeyProvider;
use icu_provider::iter::IterableDynProvider;
use icu_provider::prelude::*;
use std::convert::TryFrom;
use std::path::PathBuf;
use transform::calendar::japanese::JapaneseErasProvider;
use transform::datetime::patterns::DatePatternsProvider;
use transform::datetime::skeletons::DateSkeletonPatternsProvider;
use transform::datetime::symbols::DateSymbolsProvider;
use transform::decimal::NumbersProvider;
#[cfg(feature = "icu_list")]
use transform::list::ListProvider;
use transform::locale_canonicalizer::aliases::AliasesProvider;
use transform::locale_canonicalizer::likely_subtags::LikelySubtagsProvider;
use transform::plurals::PluralsProvider;
use transform::time_zones::TimeZonesProvider;

#[cfg(not(feature = "icu_list"))]
type ListProvider = PluralsProvider; // we can't cfg-exclude part of the bound, but we can do this...

pub fn create_exportable_provider<T: DataMarker>(
cldr_paths: &dyn CldrPaths,
_uprops_root: PathBuf,
) -> Result<MultiForkByKeyProvider<Box<dyn IterableDynProvider<T>>>, CldrError>
where
AliasesProvider: IterableDynProvider<T>,
DateSymbolsProvider: IterableDynProvider<T>,
DateSkeletonPatternsProvider: IterableDynProvider<T>,
DatePatternsProvider: IterableDynProvider<T>,
JapaneseErasProvider: IterableDynProvider<T>,
LikelySubtagsProvider: IterableDynProvider<T>,
NumbersProvider: IterableDynProvider<T>,
PluralsProvider: IterableDynProvider<T>,
TimeZonesProvider: IterableDynProvider<T>,
ListProvider: IterableDynProvider<T>,
{
#[allow(unused_variables)] // uprops_root is only used if icu_list
Ok(MultiForkByKeyProvider {
providers: vec![
Box::new(AliasesProvider::try_from(cldr_paths)?),
Box::new(DateSymbolsProvider::try_from(cldr_paths)?),
Box::new(DateSkeletonPatternsProvider::try_from(cldr_paths)?),
Box::new(DatePatternsProvider::try_from(cldr_paths)?),
Box::new(JapaneseErasProvider::try_from(cldr_paths)?),
Box::new(LikelySubtagsProvider::try_from(cldr_paths)?),
Box::new(NumbersProvider::try_from(cldr_paths)?),
Box::new(PluralsProvider::try_from(cldr_paths)?),
Box::new(TimeZonesProvider::try_from(cldr_paths)?),
#[cfg(feature = "icu_list")]
Box::new(ListProvider::try_from(cldr_paths, _uprops_root)?),
],
})
}

pub const ALL_KEYS: [ResourceKey; if cfg!(feature = "icu_list") { 18 } else { 15 }] = [
icu_calendar::provider::JapaneseErasV1Marker::KEY,
icu_datetime::provider::calendar::DatePatternsV1Marker::KEY,
icu_datetime::provider::calendar::DateSkeletonPatternsV1Marker::KEY,
icu_datetime::provider::calendar::DateSymbolsV1Marker::KEY,
icu_datetime::provider::time_zones::TimeZoneFormatsV1Marker::KEY,
icu_datetime::provider::time_zones::ExemplarCitiesV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneGenericNamesLongV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneGenericNamesShortV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneSpecificNamesLongV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneSpecificNamesShortV1Marker::KEY,
icu_decimal::provider::DecimalSymbolsV1Marker::KEY,
#[cfg(feature = "icu_list")]
icu_list::provider::AndListV1Marker::KEY,
#[cfg(feature = "icu_list")]
icu_list::provider::OrListV1Marker::KEY,
#[cfg(feature = "icu_list")]
icu_list::provider::UnitListV1Marker::KEY,
icu_locale_canonicalizer::provider::AliasesV1Marker::KEY,
icu_locale_canonicalizer::provider::LikelySubtagsV1Marker::KEY,
icu_plurals::provider::CardinalV1Marker::KEY,
icu_plurals::provider::OrdinalV1Marker::KEY,
];
17 changes: 8 additions & 9 deletions provider/cldr/src/transform/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,20 @@ use icu_list::provider::*;
use icu_locid_macros::langid;
use icu_provider::iter::IterableResourceProvider;
use icu_provider::prelude::*;
use std::convert::TryFrom;
use std::path::PathBuf;

/// A data provider reading from CLDR JSON list rule files.
#[derive(Debug)]
pub struct ListProvider {
cldr_misc: PathBuf,
uprops_path: PathBuf,
uprops_root: PathBuf,
}

impl TryFrom<&dyn CldrPaths> for ListProvider {
type Error = Error;
fn try_from(cldr_paths: &dyn CldrPaths) -> Result<Self, Self::Error> {
impl ListProvider {
pub fn try_from(cldr_paths: &dyn CldrPaths, uprops_root: PathBuf) -> Result<Self, Error> {
Ok(Self {
cldr_misc: cldr_paths.cldr_misc()?,
uprops_path: cldr_paths.uprops()?,
uprops_root,
})
}
}
Expand Down Expand Up @@ -116,8 +114,8 @@ impl<M: ResourceMarker<Yokeable = ListFormatterPatternsV1<'static>>> ResourcePro
&format!(
"[^{}]",
icu_properties::sets::get_for_script(
&icu_provider_uprops::PropertiesDataProvider::try_new(
&self.uprops_path
&icu_provider_uprops::EnumeratedPropertyUnicodeSetDataProvider::try_new(
&self.uprops_root
)
.map_err(|e| DataError::custom("Properties data provider error")
.with_display_context(&e))?,
Expand Down Expand Up @@ -179,7 +177,8 @@ mod tests {
macro_rules! test {
($langid:literal, $type:ident, $(($input:expr, $output:literal),)+) => {
let cldr_paths = crate::cldr_paths::for_test();
let provider = ListProvider::try_from(&cldr_paths as &dyn CldrPaths).unwrap();
let provider = ListProvider::try_from(
&cldr_paths as &dyn CldrPaths, icu_testdata::paths::uprops_toml_root()).unwrap();
let f = ListFormatter::$type(langid!($langid), &provider, ListStyle::Wide).unwrap();
$(
assert_writeable_eq!(f.format($input.iter()), $output);
Expand Down
84 changes: 7 additions & 77 deletions provider/cldr/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,81 +6,11 @@
//!
//! Every ICU4X component should have its own private submodule and then export the types from here.

mod calendar;
mod datetime;
mod decimal;
pub(crate) mod calendar;
pub(crate) mod datetime;
pub(crate) mod decimal;
#[cfg(feature = "icu_list")]
mod list;
mod locale_canonicalizer;
mod plurals;
mod time_zones;

use crate::error::Error;
use crate::CldrPaths;
use icu_provider::fork::by_key::MultiForkByKeyProvider;
use icu_provider::iter::IterableDynProvider;
use icu_provider::prelude::*;
use icu_provider::serde::SerializeMarker;
use std::convert::TryFrom;

pub struct CldrJsonDataProvider;

impl CldrJsonDataProvider {
pub fn try_new(
cldr_paths: &dyn CldrPaths,
) -> Result<MultiForkByKeyProvider<Box<dyn IterableDynProvider<SerializeMarker>>>, Error> {
Ok(MultiForkByKeyProvider {
providers: vec![
Box::new(locale_canonicalizer::aliases::AliasesProvider::try_from(
cldr_paths,
)?),
Box::new(datetime::symbols::DateSymbolsProvider::try_from(
cldr_paths,
)?),
Box::new(datetime::skeletons::DateSkeletonPatternsProvider::try_from(
cldr_paths,
)?),
Box::new(datetime::patterns::DatePatternsProvider::try_from(
cldr_paths,
)?),
Box::new(calendar::japanese::JapaneseErasProvider::try_from(
cldr_paths,
)?),
Box::new(
locale_canonicalizer::likely_subtags::LikelySubtagsProvider::try_from(
cldr_paths,
)?,
),
Box::new(decimal::NumbersProvider::try_from(cldr_paths)?),
Box::new(plurals::PluralsProvider::try_from(cldr_paths)?),
Box::new(time_zones::TimeZonesProvider::try_from(cldr_paths)?),
#[cfg(feature = "icu_list")]
Box::new(list::ListProvider::try_from(cldr_paths)?),
],
})
}

pub const ALL_KEYS: [ResourceKey; if cfg!(feature = "icu_list") { 18 } else { 15 }] = [
icu_calendar::provider::JapaneseErasV1Marker::KEY,
icu_datetime::provider::calendar::DatePatternsV1Marker::KEY,
icu_datetime::provider::calendar::DateSkeletonPatternsV1Marker::KEY,
icu_datetime::provider::calendar::DateSymbolsV1Marker::KEY,
icu_datetime::provider::time_zones::TimeZoneFormatsV1Marker::KEY,
icu_datetime::provider::time_zones::ExemplarCitiesV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneGenericNamesLongV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneGenericNamesShortV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneSpecificNamesLongV1Marker::KEY,
icu_datetime::provider::time_zones::MetaZoneSpecificNamesShortV1Marker::KEY,
icu_decimal::provider::DecimalSymbolsV1Marker::KEY,
#[cfg(feature = "icu_list")]
icu_list::provider::AndListV1Marker::KEY,
#[cfg(feature = "icu_list")]
icu_list::provider::OrListV1Marker::KEY,
#[cfg(feature = "icu_list")]
icu_list::provider::UnitListV1Marker::KEY,
icu_locale_canonicalizer::provider::AliasesV1Marker::KEY,
icu_locale_canonicalizer::provider::LikelySubtagsV1Marker::KEY,
icu_plurals::provider::CardinalV1Marker::KEY,
icu_plurals::provider::OrdinalV1Marker::KEY,
];
}
pub(crate) mod list;
pub(crate) mod locale_canonicalizer;
pub(crate) mod plurals;
pub(crate) mod time_zones;
18 changes: 7 additions & 11 deletions provider/core/src/dynutil.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@ where
/// [`SerializeMarker`]: (crate::serde::SerializeMarker)
#[macro_export]
macro_rules! impl_dyn_provider {
($provider:ty, { $($pat:pat => $struct_m:ty),+, }, ANY) => {
($provider:ty, { $($pat:pat $(if $guard:expr)? => $struct_m:ty),+, }, ANY) => {
$crate::impl_dyn_provider!(
$provider,
{ $($pat => $struct_m),+, },
{ $($pat $(if $guard)? => $struct_m),+, },
$crate::any::AnyMarker
);
};
Expand All @@ -173,11 +173,11 @@ macro_rules! impl_dyn_provider {
$crate::any::AnyMarker
);
};
($provider:ty, { $($pat:pat => $struct_m:ty),+, }, SERDE_SE) => {
($provider:ty, { $($pat:pat $(if $guard:expr)? => $struct_m:ty),+, }, SERDE_SE) => {
// If this fails to compile, enable the "serialize" feature on this crate.
$crate::impl_dyn_provider!(
$provider,
{ $($pat => $struct_m),+, },
{ $($pat $(if $guard)? => $struct_m),+, },
$crate::serde::SerializeMarker
);
};
Expand All @@ -189,7 +189,7 @@ macro_rules! impl_dyn_provider {
$crate::serde::SerializeMarker
);
};
($provider:ty, { $($pat:pat => $struct_m:ty),+, }, $dyn_m:path) => {
($provider:ty, { $($pat:pat $(if $guard:expr)? => $struct_m:ty),+, }, $dyn_m:path) => {
impl $crate::DynProvider<$dyn_m> for $provider
{
fn load_payload(
Expand All @@ -202,7 +202,7 @@ macro_rules! impl_dyn_provider {
> {
match key {
$(
$pat => {
$pat $(if $guard)? => {
let result: $crate::DataResponse<$struct_m> =
$crate::DynProvider::<$struct_m>::load_payload(self, key, req)?;
Ok(DataResponse {
Expand All @@ -213,8 +213,6 @@ macro_rules! impl_dyn_provider {
})
}
)+,
// Don't complain if the call site has its own wildcard match
#[allow(unreachable_patterns)]
_ => Err($crate::DataErrorKind::MissingResourceKey.with_req(key, req))
}
}
Expand All @@ -224,12 +222,10 @@ macro_rules! impl_dyn_provider {
fn supported_options_for_key(&self, key: &$crate::ResourceKey) -> Result<Box<dyn Iterator<Item = $crate::ResourceOptions> + '_>, $crate::DataError> {
match *key {
$(
$pat => {
$pat $(if $guard)? => {
$crate::iter::IterableDynProvider::<$struct_m>::supported_options_for_key(self, key)
}
)+,
// Don't complain if the call site has its own wildcard match
#[allow(unreachable_patterns)]
_ => Err($crate::DataErrorKind::MissingResourceKey.with_key(*key))
}
}
Expand Down
Loading

0 comments on commit fde100e

Please sign in to comment.