Decode utf16 names

mooman219 · Apr 8, 2021 · aadbbf9 · aadbbf9
1 parent e92547c
commit aadbbf9
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 6 deletions.
diff --git a/src/font.rs b/src/font.rs
@@ -2,6 +2,7 @@ use crate::layout::GlyphRasterConfig;
 use crate::math::{Geometry, Line};
 use crate::platform::{as_i32, ceil, floor, fract, is_negative};
 use crate::raster::Raster;
+use crate::unicode::decode_utf16;
 use crate::FontResult;
 use alloc::string::String;
 use alloc::vec;
@@ -10,7 +11,7 @@ use core::mem;
 use core::num::NonZeroU16;
 use core::ops::Deref;
 use hashbrown::HashMap;
-use ttf_parser::{Face, FaceParsingError};
+use ttf_parser::{Face, FaceParsingError, PlatformId};
 
 /// Defines the bounds for a glyph's outline in subpixels. A glyph's outline is always contained in
 /// its bitmap.
@@ -208,13 +209,29 @@ fn convert_error(error: FaceParsingError) -> &'static str {
 
 fn convert_name(face: &Face) -> Option<String> {
     for name in face.names() {
-        if name.name_id() == 4 {
-            return name.to_string();
+        if name.name_id() == 4 && is_unicode_encoding(name.platform_id(), name.encoding_id()) {
+            return Some(decode_utf16(name.name()));
         }
     }
     None
 }
 
+#[inline]
+fn is_unicode_encoding(platform_id: PlatformId, encoding_id: u16) -> bool {
+    // https://docs.microsoft.com/en-us/typography/opentype/spec/name#windows-encoding-ids
+    const WINDOWS_SYMBOL_ENCODING_ID: u16 = 0;
+    const WINDOWS_UNICODE_BMP_ENCODING_ID: u16 = 1;
+
+    match platform_id {
+        PlatformId::Unicode => true,
+        PlatformId::Windows => match encoding_id {
+            WINDOWS_SYMBOL_ENCODING_ID | WINDOWS_UNICODE_BMP_ENCODING_ID => true,
+            _ => false,
+        },
+        _ => false,
+    }
+}
+
 impl Font {
     /// Constructs a font from an array of bytes.
     pub fn from_bytes<Data: Deref<Target = [u8]>>(data: Data, settings: FontSettings) -> FontResult<Font> {

diff --git a/src/layout.rs b/src/layout.rs
@@ -363,7 +363,7 @@ impl<'a, U: Copy + Clone> Layout<U> {
             }
         }
         while byte_offset < style.text.len() {
-            let c = read_utf8(style.text, &mut byte_offset);
+            let c = read_utf8(style.text.as_bytes(), &mut byte_offset);
             let char_index = font.borrow().lookup_glyph_index(c);
             let char_data = classify(c, char_index);
             let metrics = if !char_data.is_control() {

diff --git a/src/unicode/mod.rs b/src/unicode/mod.rs
@@ -1,6 +1,7 @@
 mod tables;
 
 use crate::unicode::tables::*;
+use alloc::string::String;
 
 const CONT_MASK: u8 = 0b0011_1111;
 
@@ -9,9 +10,30 @@ fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
     (ch << 6) | (byte & CONT_MASK) as u32
 }
 
+pub fn decode_utf16(bytes: &[u8]) -> String {
+    let mut output = String::new();
+    let mut offset = 0;
+    while offset < bytes.len() {
+        output.push(read_utf16(bytes, &mut offset));
+    }
+    output
+}
+
+pub fn read_utf16(bytes: &[u8], offset: &mut usize) -> char {
+    let a = ((bytes[*offset] as u16) << 8) | bytes[*offset + 1] as u16;
+    *offset += 2;
+    if a < 0xD800 || 0xDFFF < a {
+        unsafe { core::char::from_u32_unchecked(a as u32) }
+    } else {
+        let b = ((bytes[*offset] as u16) << 8) | bytes[*offset + 1] as u16;
+        *offset += 2;
+        let c = (((a - 0xD800) as u32) << 10 | (b - 0xDC00) as u32) + 0x1_0000;
+        unsafe { core::char::from_u32_unchecked(c as u32) }
+    }
+}
+
 /// Returns (length, character). Cannot be run at the end of the string.
-pub fn read_utf8(string: &str, byte_offset: &mut usize) -> char {
-    let bytes = string.as_bytes();
+pub fn read_utf8(bytes: &[u8], byte_offset: &mut usize) -> char {
     let x = bytes[*byte_offset];
     *byte_offset += 1;
     if x < 128 {