From ade415f91c6b875411432d064c02d839aa7e68d6 Mon Sep 17 00:00:00 2001 From: Konstantin Pozin Date: Mon, 13 Jan 2020 19:18:08 -0800 Subject: [PATCH] [uloc] Implement `to_language_tag()` Also: - Implement Rust macros for generating wrappers for the numerous `uloc` methods that output strings into a buffer of a fixed size. --- rust_icu_uloc/src/lib.rs | 342 ++++++++++++++++++++++++++------------- 1 file changed, 227 insertions(+), 115 deletions(-) diff --git a/rust_icu_uloc/src/lib.rs b/rust_icu_uloc/src/lib.rs index 9814e7cd..9434ad54 100644 --- a/rust_icu_uloc/src/lib.rs +++ b/rust_icu_uloc/src/lib.rs @@ -63,6 +63,114 @@ impl TryFrom<&ffi::CStr> for ULoc { } } +/// Generates a method to wrap ICU4C `uloc` methods that require a resizable output string buffer. +/// +/// The various `uloc` methods of this type have inconsistent signature patterns, with some putting +/// all their input arguments _before_ the `buffer` and its `capacity`, and some splitting the input +/// arguments. +/// +/// Therefore, the macro supports input arguments in both positions. +/// +/// For an invocation of the form +/// ``` +/// buffered_string_method_with_retry!( +/// my_method, +/// BUFFER_CAPACITY, +/// [before_arg_a: before_type_a, before_arg_b: before_type_b,], +/// [after_arg_a: after_type_a, after_arg_b: after_type_b,] +/// ); +/// ``` +/// the generated method has a signature of the form +/// ``` +/// fn my_method( +/// uloc_method: unsafe extern "C" fn( +/// before_type_a, +/// before_type_b, +/// *mut raw::c_char, +/// i32, +/// after_type_a, +/// after_type_b, +/// *mut UErrorCode, +/// ) -> i32, +/// before_arg_a: before_type_a, +/// before_arg_b: before_type_b, +/// after_arg_a: after_type_a, +/// after_arg_b: after_type_b +/// ) -> Result {} +/// ``` +macro_rules! buffered_string_method_with_retry { + + ($method_name:ident, $buffer_capacity:expr, + [$($before_arg:ident: $before_arg_type:ty,)*], + [$($after_arg:ident: $after_arg_type:ty,)*]) => { + fn $method_name( + uloc_method: unsafe extern "C" fn( + $($before_arg_type,)* + *mut raw::c_char, + i32, + $($after_arg_type,)* + *mut UErrorCode, + ) -> i32, + $($before_arg: $before_arg_type,)* + $($after_arg: $after_arg_type,)* + ) -> Result { + let mut status = common::Error::OK_CODE; + let mut buf: Vec = vec![0; $buffer_capacity]; + + // Requires that any pointers that are passed in are valid. + let full_len: i32 = unsafe { + assert!(common::Error::is_ok(status)); + uloc_method( + $($before_arg,)* + buf.as_mut_ptr() as *mut raw::c_char, + $buffer_capacity as i32, + $($after_arg,)* + &mut status, + ) + }; + + // `uloc` methods are inconsistent in whether they silently truncate the output or treat + // the overflow as an error, so we need to check both cases. + if status == UErrorCode::U_BUFFER_OVERFLOW_ERROR || + (common::Error::is_ok(status) && + full_len > $buffer_capacity + .try_into() + .map_err(|e| common::Error::wrapper(format!("{:?}", e)))?) { + + assert!(full_len > 0); + let full_len: usize = full_len + .try_into() + .map_err(|e| common::Error::wrapper(format!("{:?}", e)))?; + buf.resize(full_len, 0); + + // Same unsafe requirements as above, plus full_len must be exactly the output + // buffer size. + unsafe { + assert!(common::Error::is_ok(status)); + uloc_method( + $($before_arg,)* + buf.as_mut_ptr() as *mut raw::c_char, + full_len as i32, + $($after_arg,)* + &mut status, + ) + }; + } + + common::Error::ok_or_warning(status)?; + + // Adjust the size of the buffer here. + if (full_len > 0) { + let full_len: usize = full_len + .try_into() + .map_err(|e| common::Error::wrapper(format!("{:?}", e)))?; + buf.resize(full_len, 0); + } + String::from_utf8(buf).map_err(|_| common::Error::string_with_interior_nul()) + } + } +} + impl ULoc { /// Implements `uloc_getLanguage`. pub fn language(&self) -> Result { @@ -102,6 +210,25 @@ impl ULoc { .map(|repr| ULoc { repr }) } + // Implements `uloc_toLanguageTag` from ICU4C. + pub fn to_language_tag(&self, strict: bool) -> Result { + buffered_string_method_with_retry!( + buffered_string_to_language_tag, + LOCALE_CAPACITY, + [locale_id: *const raw::c_char,], + [strict: rust_icu_sys::UBool,] + ); + + let locale_id = self.as_c_str(); + // No `UBool` constants available in rust_icu_sys, unfortunately. + let strict = if strict { 1 } else { 0 }; + buffered_string_to_language_tag( + versioned_function!(uloc_toLanguageTag), + locale_id.as_ptr(), + strict, + ) + } + /// Returns the current label of this locale. pub fn label(&self) -> &str { &self.repr @@ -112,85 +239,16 @@ impl ULoc { ffi::CString::new(self.repr.clone()).expect("ULoc contained interior NUL bytes") } + // Implements `uloc_acceptLanguage` from ICU4C. + #[deprecated = "Use `rust_icu_uloc::accept_language`"] pub fn accept_language( accept_list: impl IntoIterator>, available_locales: impl IntoIterator>, ) -> Result<(Option, UAcceptResult), common::Error> { - let mut buf: Vec = vec![0; LOCALE_CAPACITY]; - let mut accept_result: UAcceptResult = UAcceptResult::ULOC_ACCEPT_FAILED; - let mut status = common::Error::OK_CODE; - - let mut accept_list_cstrings: Vec = vec![]; - // This is mutable only to satisfy the missing `const`s in the ICU4C API. - let mut accept_list: Vec<*const raw::c_char> = accept_list - .into_iter() - .map(|item| { - let uloc: ULoc = item.into(); - accept_list_cstrings.push(uloc.as_c_str()); - accept_list_cstrings - .last() - .expect("non-empty list") - .as_ptr() - }) - .collect(); - - let available_locales: Vec = available_locales - .into_iter() - .map(|item| item.into()) - .collect(); - let available_locales: Vec<&str> = - available_locales.iter().map(|uloc| uloc.label()).collect(); - let mut available_locales = Enumeration::try_from(&available_locales[..])?; - - let full_len = unsafe { - versioned_function!(uloc_acceptLanguage)( - buf.as_mut_ptr() as *mut raw::c_char, - buf.len() as i32, - &mut accept_result, - accept_list.as_mut_ptr(), - accept_list.len() as i32, - available_locales.repr(), - &mut status, - ) - }; - - if status == UErrorCode::U_BUFFER_OVERFLOW_ERROR { - assert!(full_len > 0); - let full_len: usize = full_len - .try_into() - .map_err(|e| common::Error::wrapper(format!("{:?}", e)))?; - buf.resize(full_len, 0); - unsafe { - versioned_function!(uloc_acceptLanguage)( - buf.as_mut_ptr() as *mut raw::c_char, - buf.len() as i32, - &mut accept_result, - accept_list.as_mut_ptr(), - accept_list.len() as i32, - available_locales.repr(), - &mut status, - ); - } - } - - common::Error::ok_or_warning(status)?; - // Having no match is a valid if disappointing result. - if accept_result == UAcceptResult::ULOC_ACCEPT_FAILED { - return Ok((None, accept_result)); - } - - // Adjust the size of the buffer here. - assert!(full_len > 0); - buf.resize(full_len as usize, 0); - - String::from_utf8(buf) - .map_err(|_| common::Error::string_with_interior_nul()) - .and_then(|s| ULoc::try_from(s.as_str())) - .map(|uloc| (Some(uloc), accept_result)) + accept_language(accept_list, available_locales) } - /// Call a `uloc_*` method with a particular signature (that clones and modifies the internal - /// representation of the locale ID and requires a resizable buffer). + /// Call a `uloc` method that takes this locale's ID and returns a string. fn call_buffered_string_method( &self, uloc_method: unsafe extern "C" fn( @@ -200,40 +258,14 @@ impl ULoc { *mut UErrorCode, ) -> i32, ) -> Result { - let mut status = common::Error::OK_CODE; - let repr = ffi::CString::new(self.repr.clone()) - .map_err(|_| common::Error::string_with_interior_nul())?; - let mut buf: Vec = vec![0; LOCALE_CAPACITY]; - - // Requires that repr is a valid pointer - let full_len = unsafe { - assert!(common::Error::is_ok(status)); - uloc_method( - repr.as_ptr(), - buf.as_mut_ptr() as *mut raw::c_char, - LOCALE_CAPACITY as i32, - &mut status, - ) - } as usize; - common::Error::ok_or_warning(status)?; - if full_len > LOCALE_CAPACITY { - buf.resize(full_len, 0); - // Same unsafe requirements as above, plus full_len must be exactly - // the output buffer size. - unsafe { - assert!(common::Error::is_ok(status)); - uloc_method( - repr.as_ptr(), - buf.as_mut_ptr() as *mut raw::c_char, - full_len as i32, - &mut status, - ) - }; - common::Error::ok_or_warning(status)?; - } - // Adjust the size of the buffer here. - buf.resize(full_len, 0); - String::from_utf8(buf).map_err(|_| common::Error::string_with_interior_nul()) + buffered_string_method_with_retry!( + buffered_string_char_star, + LOCALE_CAPACITY, + [char_star: *const raw::c_char,], + [] + ); + let asciiz = self.as_c_str(); + buffered_string_char_star(uloc_method, asciiz.as_ptr()) } } @@ -257,6 +289,63 @@ pub fn set_default(loc: &ULoc) -> Result<(), common::Error> { common::Error::ok_or_warning(status) } +// Implements `uloc_acceptLanguage` from ICU4C. +pub fn accept_language( + accept_list: impl IntoIterator>, + available_locales: impl IntoIterator>, +) -> Result<(Option, UAcceptResult), common::Error> { + buffered_string_method_with_retry!( + buffered_string_uloc_accept_language, + LOCALE_CAPACITY, + [], + [ + out_result: *mut UAcceptResult, + accept_list: *mut *const ::std::os::raw::c_char, + accept_list_count: i32, + available_locales: *mut UEnumeration, + ] + ); + + let mut accept_result: UAcceptResult = UAcceptResult::ULOC_ACCEPT_FAILED; + let mut accept_list_cstrings: Vec = vec![]; + // This is mutable only to satisfy the missing `const`s in the ICU4C API. + let mut accept_list: Vec<*const raw::c_char> = accept_list + .into_iter() + .map(|item| { + let uloc: ULoc = item.into(); + accept_list_cstrings.push(uloc.as_c_str()); + accept_list_cstrings + .last() + .expect("non-empty list") + .as_ptr() + }) + .collect(); + + let available_locales: Vec = available_locales + .into_iter() + .map(|item| item.into()) + .collect(); + let available_locales: Vec<&str> = available_locales.iter().map(|uloc| uloc.label()).collect(); + let mut available_locales = Enumeration::try_from(&available_locales[..])?; + + let matched_locale = buffered_string_uloc_accept_language( + versioned_function!(uloc_acceptLanguage), + &mut accept_result, + accept_list.as_mut_ptr(), + accept_list.len() as i32, + available_locales.repr(), + ); + + // Having no match is a valid if disappointing result. + if accept_result == UAcceptResult::ULOC_ACCEPT_FAILED { + return Ok((None, accept_result)); + } + + matched_locale + .and_then(|s| ULoc::try_from(s.as_str())) + .map(|uloc| (Some(uloc), accept_result)) +} + #[cfg(test)] mod tests { use super::*; @@ -316,23 +405,31 @@ mod tests { assert_eq!(minimized_subtags.label(), expected.label()); } + #[test] + fn test_to_language_tag() { + let loc = ULoc::try_from("sr_Cyrl_RS").expect("get sr_Cyrl_RS locale"); + let language_tag = loc + .to_language_tag(true) + .expect("should convert to language tag"); + assert_eq!(language_tag, "sr-Cyrl-RS".to_string()); + } + #[test] fn test_accept_language_fallback() { let accept_list: Result, _> = vec!["es_MX", "ar_EG", "fr_FR"] .into_iter() - .map(|s| ULoc::try_from(s)) + .map(ULoc::try_from) .collect(); let accept_list = accept_list.expect("make accept_list"); let available_locales: Result, _> = vec!["de_DE", "en_US", "es", "nl_NL", "sr_RS_Cyrl"] .into_iter() - .map(|s| ULoc::try_from(s)) + .map(ULoc::try_from) .collect(); let available_locales = available_locales.expect("make available_locales"); - let actual = - ULoc::accept_language(accept_list, available_locales).expect("call accept_language"); + let actual = accept_language(accept_list, available_locales).expect("call accept_language"); assert_eq!( actual, ( @@ -346,18 +443,17 @@ mod tests { fn test_accept_language_exact_match() { let accept_list: Result, _> = vec!["es_ES", "ar_EG", "fr_FR"] .into_iter() - .map(|s| ULoc::try_from(s)) + .map(ULoc::try_from) .collect(); let accept_list = accept_list.expect("make accept_list"); let available_locales: Result, _> = vec!["de_DE", "en_US", "es_MX", "ar_EG"] .into_iter() - .map(|s| ULoc::try_from(s)) + .map(ULoc::try_from) .collect(); let available_locales = available_locales.expect("make available_locales"); - let actual = - ULoc::accept_language(accept_list, available_locales).expect("call accept_language"); + let actual = accept_language(accept_list, available_locales).expect("call accept_language"); assert_eq!( actual, ( @@ -366,4 +462,20 @@ mod tests { ) ); } + + #[test] + fn test_accept_language_no_match() { + let accept_list: Result, _> = vec!["es_ES", "ar_EG", "fr_FR"] + .into_iter() + .map(ULoc::try_from) + .collect(); + let accept_list = accept_list.expect("make accept_list"); + + let available_locales: Result, _> = + vec!["el_GR"].into_iter().map(ULoc::try_from).collect(); + let available_locales = available_locales.expect("make available_locales"); + + let actual = accept_language(accept_list, available_locales).expect("call accept_language"); + assert_eq!(actual, (None, UAcceptResult::ULOC_ACCEPT_FAILED)) + } }