-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathunicode-java.rb
117 lines (91 loc) · 2.62 KB
/
unicode-java.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
module Unicode
extend self
VERSION = '0.4.3'
Normalizer = Java::JavaText::Normalizer
def strcmp(str1, str2)
decompose(str1).to_java.compare_to(decompose(str2))
end
def strcmp_compat(str1, str2)
decompose_compat(str1).to_java.compare_to(decompose_compat(str2))
end
def decompose(str)
Normalizer.normalize(str, Normalizer::Form::NFD)
end
alias_method :normalize_D, :decompose
alias_method :nfd, :decompose
# Decompose Unicode string with a non-standard mapping.
#
# It does not decompose the characters in CompositionExclusions.txt.
def decompose_safe(str)
raise NotImplementedError
end
alias_method :normalize_D_safe, :decompose_safe
alias_method :nfd_safe, :decompose_safe
def decompose_compat(str)
Normalizer.normalize(str, Normalizer::Form::NFKD)
end
alias_method :normalize_KD, :decompose_compat
alias_method :nfkd, :decompose_compat
# Compose Unicode string. Before composing, the trailing
# characters are sorted in canonical order.
#
# The parameter must be decomposed.
#
# The composition is based on the reverse of the
# character decomposition mapping in UnicodeData.txt,
# CompositionExclusions.txt and the Hangul composition
# algorithm.
def compose(str)
raise NotImplementedError
end
def normalize_C(str)
Normalizer.normalize(str, Normalizer::Form::NFC)
end
alias_method :nfc, :normalize_C
def normalize_KC(str)
Normalizer.normalize(str, Normalizer::Form::NFKC)
end
alias_method :nfkc, :normalize_KC
def normalize_C_safe(str)
compose(decompose_safe(str))
end
alias_method :nfc_safe, :normalize_C_safe
def upcase(str)
str.to_java.to_upper_case
end
def downcase(str)
str.to_java.to_lower_case
end
def capitalize(str)
downcase(str).tap { |s| s[0] = upcase(s[0]) }
end
# Get an array of general category names of the string.
#
# Can be called with a block.
def categories(str)
raise NotImplementedError
end
# Get an array of abbreviated category names of the string.
#
# Can be called with a block.
def abbr_categories(str)
raise NotImplementedError
end
# Get an array of text elements.
#
# A text element is a unit that is displayed as a single character.
#
# Can be called with a block.
def text_elements(str)
raise NotImplementedError
end
# Estimate the display width on the fixed pitch text terminal.
#
# It based on Markus Kuhn's mk_wcwidth.
#
# If the optional argument 'cjk' is true, East Asian
# Ambiguous characters are treated as wide characters.
def width(str, cjk = false)
raise NotImplementedError
end
end