Skip to content

Commit

Permalink
Add overload to String.from_utf16 with pointer
Browse files Browse the repository at this point in the history
  • Loading branch information
straight-shoota authored and asterite committed Jan 16, 2018
1 parent 244da57 commit 295ddc3
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 1 deletion.
6 changes: 6 additions & 0 deletions spec/std/string/utf16_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,11 @@ describe "String UTF16" do
input = Slice[0xdc00_u16, 0xd800_u16]
String.from_utf16(input).should eq("\u{fffd}\u{fffd}")
end

it "handles null bytes" do
slice = Slice[104_u16, 105_u16, 0_u16, 55296_u16, 56485_u16]
String.from_utf16(slice).should eq("hi\0000𐂥")
String.from_utf16(slice.to_unsafe).should eq("hi")
end
end
end
35 changes: 34 additions & 1 deletion src/string/utf16.cr
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,15 @@ class String
# slice = Slice[104_u16, 105_u16, 32_u16, 55296_u16, 56485_u16]
# String.from_utf16(slice) # => "hi 𐂥"
# ```
def self.from_utf16(slice : Slice(UInt16)) : String
#
# If *slice* is a pointer, the string ends when a zero value is found.
#
# ```
# slice = Slice[104_u16, 105_u16, 0_u16, 55296_u16, 56485_u16]
# String.from_utf16(slice) # => "hi\0000𐂥"
# String.from_utf16(slice.to_unsafe) # => "hi"
# ```
def self.from_utf16(slice : Slice(UInt16) | Pointer(UInt16)) : String
bytesize = 0
size = 0

Expand Down Expand Up @@ -97,4 +105,29 @@ class String
i += 1
end
end

# Yields each decoded char in the given pointer, stopping at the first null byte.
private def self.each_utf16_char(pointer : Pointer(UInt16))
loop do
byte = pointer.value.to_i
break if byte == 0

if byte < 0xd800 || byte >= 0xe000
# One byte
codepoint = byte
elsif 0xd800 <= byte < 0xdc00 &&
0xdc00 <= (pointer + 1).value <= 0xdfff
# Surrougate pair
pointer = pointer + 1
codepoint = ((byte - 0xd800) << 10) + (pointer.value - 0xdc00) + 0x10000
else
# Invalid byte
codepoint = 0xfffd
end

yield codepoint.chr

pointer = pointer + 1
end
end
end

0 comments on commit 295ddc3

Please sign in to comment.