forked from dolphinsmalltalk/Dolphin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPreBoot.st
66 lines (62 loc) · 2.42 KB
/
PreBoot.st
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
!Utf8String methodsFor!
decodeAt: anInteger
"Private - Read the <Character> with code point in the receiver's encoding starting with the code unit at the specified index. Raise an error if the indexed code unit is not the first in a character. If the character encoding is invalid or incomplete, return the replacement character. Intolerance of indexing errors is intended to help detect logic errors in algorithms working with multi-byte strings, although this may also occur in incorrectly encoded data which is otherwise tolerated by returning the replacement character."
| c c2 size codePoint min |
c := self basicAt: anInteger.
c < 16r80
ifTrue:
["Ascii char"
^Character.CharacterSet at: c + 1].
"Lead byte?"
c >= 16rC0
ifFalse:
["On a continution byte so in the middle of a character, which is invalid"
^self errorIntraCharacterIndex: anInteger].
"At least 2 byte character"
size := self size.
anInteger < size ifFalse: [^Character.Utf8Default].
c2 := self basicAt: anInteger + 1.
(c2 bitAnd: 16rC0) == 16r80
ifTrue:
[codePoint := (c bitAnd: 16r1F) << 6 bitOr: (c2 bitAnd: 16r3F).
min := 16r80]
ifFalse:
["Invalid 1st continuation"
^Character.Utf8Default].
c >= 16rE0
ifTrue:
[| c3 |
"At least a 3-byte character"
anInteger + 2 > size ifTrue: [^Character.Utf8Default].
c3 := self basicAt: anInteger + 2.
(c3 bitAnd: 16rC0) == 16r80
ifTrue:
[codePoint := (codePoint bitAnd: 16r3FF) << 6 bitOr: (c3 bitAnd: 16r3F).
min := 16r800]
ifFalse:
["Invalid 2nd continuation"
^Character.Utf8Default].
c >= 16rF0
ifTrue:
[| c4 |
"4-byte character"
anInteger + 3 > size ifTrue: [^Character.Utf8Default].
c4 := self basicAt: anInteger + 3.
(c4 bitAnd: 16rC0) == 16r80
ifTrue:
[codePoint := (codePoint bitAnd: 16r7FFF) << 6 bitOr: (c4 bitAnd: 16r3F).
min := 16r10000]
ifFalse:
["Invalid 3rd continuation"
^Character.Utf8Default].
c >= 16rF8
ifTrue:
["5 or 6-byte character (not currently possible - only 4 bytes required for max Unicode char"
^Character.Utf8Default]]].
codePoint < min
ifTrue:
["Overlong encoding"
^Character.Utf8Default].
^Character codePoint: codePoint ifInvalid: [Character.Utf8Default]! !
Float addClassVariable: 'DefaultDecimalExponents' value: (-3 to: 6)!
#(#thousandSeparator #thousandSeparator:) do: [:each | NUMBERFMTW removeSelector: each]!