Skip to content

Commit

Permalink
Decode utf16 names
Browse files Browse the repository at this point in the history
  • Loading branch information
mooman219 committed Apr 8, 2021
1 parent e92547c commit aadbbf9
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 6 deletions.
23 changes: 20 additions & 3 deletions src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::layout::GlyphRasterConfig;
use crate::math::{Geometry, Line};
use crate::platform::{as_i32, ceil, floor, fract, is_negative};
use crate::raster::Raster;
use crate::unicode::decode_utf16;
use crate::FontResult;
use alloc::string::String;
use alloc::vec;
Expand All @@ -10,7 +11,7 @@ use core::mem;
use core::num::NonZeroU16;
use core::ops::Deref;
use hashbrown::HashMap;
use ttf_parser::{Face, FaceParsingError};
use ttf_parser::{Face, FaceParsingError, PlatformId};

/// Defines the bounds for a glyph's outline in subpixels. A glyph's outline is always contained in
/// its bitmap.
Expand Down Expand Up @@ -208,13 +209,29 @@ fn convert_error(error: FaceParsingError) -> &'static str {

fn convert_name(face: &Face) -> Option<String> {
for name in face.names() {
if name.name_id() == 4 {
return name.to_string();
if name.name_id() == 4 && is_unicode_encoding(name.platform_id(), name.encoding_id()) {
return Some(decode_utf16(name.name()));
}
}
None
}

#[inline]
fn is_unicode_encoding(platform_id: PlatformId, encoding_id: u16) -> bool {
// https://docs.microsoft.com/en-us/typography/opentype/spec/name#windows-encoding-ids
const WINDOWS_SYMBOL_ENCODING_ID: u16 = 0;
const WINDOWS_UNICODE_BMP_ENCODING_ID: u16 = 1;

match platform_id {
PlatformId::Unicode => true,
PlatformId::Windows => match encoding_id {
WINDOWS_SYMBOL_ENCODING_ID | WINDOWS_UNICODE_BMP_ENCODING_ID => true,
_ => false,
},
_ => false,
}
}

impl Font {
/// Constructs a font from an array of bytes.
pub fn from_bytes<Data: Deref<Target = [u8]>>(data: Data, settings: FontSettings) -> FontResult<Font> {
Expand Down
2 changes: 1 addition & 1 deletion src/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ impl<'a, U: Copy + Clone> Layout<U> {
}
}
while byte_offset < style.text.len() {
let c = read_utf8(style.text, &mut byte_offset);
let c = read_utf8(style.text.as_bytes(), &mut byte_offset);
let char_index = font.borrow().lookup_glyph_index(c);
let char_data = classify(c, char_index);
let metrics = if !char_data.is_control() {
Expand Down
26 changes: 24 additions & 2 deletions src/unicode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod tables;

use crate::unicode::tables::*;
use alloc::string::String;

const CONT_MASK: u8 = 0b0011_1111;

Expand All @@ -9,9 +10,30 @@ fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}

pub fn decode_utf16(bytes: &[u8]) -> String {
let mut output = String::new();
let mut offset = 0;
while offset < bytes.len() {
output.push(read_utf16(bytes, &mut offset));
}
output
}

pub fn read_utf16(bytes: &[u8], offset: &mut usize) -> char {
let a = ((bytes[*offset] as u16) << 8) | bytes[*offset + 1] as u16;
*offset += 2;
if a < 0xD800 || 0xDFFF < a {
unsafe { core::char::from_u32_unchecked(a as u32) }
} else {
let b = ((bytes[*offset] as u16) << 8) | bytes[*offset + 1] as u16;
*offset += 2;
let c = (((a - 0xD800) as u32) << 10 | (b - 0xDC00) as u32) + 0x1_0000;
unsafe { core::char::from_u32_unchecked(c as u32) }
}
}

/// Returns (length, character). Cannot be run at the end of the string.
pub fn read_utf8(string: &str, byte_offset: &mut usize) -> char {
let bytes = string.as_bytes();
pub fn read_utf8(bytes: &[u8], byte_offset: &mut usize) -> char {
let x = bytes[*byte_offset];
*byte_offset += 1;
if x < 128 {
Expand Down

0 comments on commit aadbbf9

Please sign in to comment.