-
Notifications
You must be signed in to change notification settings - Fork 166
/
Copy pathBidiBase.java
4783 lines (4495 loc) · 202 KB
/
BidiBase.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
*******************************************************************************
* Copyright (C) 2001-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
* algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
* concept of RUNS_ONLY which is a double operation.
* It could be advantageous to divide this into 3 concepts:
* a) Operation: direct / inverse / RUNS_ONLY
* b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
* c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
* This would allow combinations not possible today like RUNS_ONLY with
* NUMBERS_SPECIAL.
* Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
* REMOVE_CONTROLS for the inverse step.
* Not all combinations would be supported, and probably not all do make sense.
* This would need to document which ones are supported and what are the
* fallbacks for unsupported combinations.
*/
package jdk.internal.icu.text;
import java.lang.reflect.Array;
import java.text.AttributedCharacterIterator;
import java.text.Bidi;
import java.util.Arrays;
import jdk.internal.access.JavaAWTFontAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.icu.lang.UCharacter;
import jdk.internal.icu.impl.UBiDiProps;
/**
*
* <h2>Bidi algorithm for ICU</h2>
*
* This is an implementation of the Unicode Bidirectional Algorithm. The
* algorithm is defined in the
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>.
* <p>
*
* Note: Libraries that perform a bidirectional algorithm and reorder strings
* accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
* shaping (ArabicShaping) classes can be used at the core of such "Storage
* Layout Engines".
*
* <h3>General remarks about the API:</h3>
*
* The "limit" of a sequence of characters is the position just after
* their last character, i.e., one more than that position.
* <p>
*
* Some of the API methods provide access to "runs". Such a
* "run" is defined as a sequence of characters that are at the same
* embedding level after performing the Bidi algorithm.
*
* <h3>Basic concept: paragraph</h3>
* A piece of text can be divided into several paragraphs by characters
* with the Bidi class <code>Block Separator</code>. For handling of
* paragraphs, see:
* <ul>
* <li>{@link #countParagraphs}
* <li>{@link #getParaLevel}
* <li>{@link #getParagraph}
* <li>{@link #getParagraphByIndex}
* </ul>
*
* <h3>Basic concept: text direction</h3>
* The direction of a piece of text may be:
* <ul>
* <li>{@link #LTR}
* <li>{@link #RTL}
* <li>{@link #MIXED}
* <li>{@link #NEUTRAL}
* </ul>
*
* <h3>Basic concept: levels</h3>
*
* Levels in this API represent embedding levels according to the Unicode
* Bidirectional Algorithm.
* Their low-order bit (even/odd value) indicates the visual direction.<p>
*
* Levels can be abstract values when used for the
* <code>paraLevel</code> and <code>embeddingLevels</code>
* arguments of <code>setPara()</code>; there:
* <ul>
* <li>the high-order bit of an <code>embeddingLevels[]</code>
* value indicates whether the using application is
* specifying the level of a character to <i>override</i> whatever the
* Bidi implementation would resolve it to.</li>
* <li><code>paraLevel</code> can be set to the
* pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
* and <code>LEVEL_DEFAULT_RTL</code>.</li>
* </ul>
*
* <p>The related constants are not real, valid level values.
* <code>DEFAULT_XXX</code> can be used to specify
* a default for the paragraph level for
* when the <code>setPara()</code> method
* shall determine it but there is no
* strongly typed character in the input.<p>
*
* Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
* and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
* just like with normal LTR and RTL level values -
* these special values are designed that way. Also, the implementation
* assumes that MAX_EXPLICIT_LEVEL is odd.
*
* <p><b>See Also:</b>
* <ul>
* <li>{@link #LEVEL_DEFAULT_LTR}
* <li>{@link #LEVEL_DEFAULT_RTL}
* <li>{@link #LEVEL_OVERRIDE}
* <li>{@link #MAX_EXPLICIT_LEVEL}
* <li>{@link #setPara}
* </ul>
*
* <h3>Basic concept: Reordering Mode</h3>
* Reordering mode values indicate which variant of the Bidi algorithm to
* use.
*
* <p><b>See Also:</b>
* <ul>
* <li>{@link #setReorderingMode}
* <li>{@link #REORDER_DEFAULT}
* <li>{@link #REORDER_NUMBERS_SPECIAL}
* <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
* <li>{@link #REORDER_RUNS_ONLY}
* <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
* <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
* <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
* </ul>
*
* <h3>Basic concept: Reordering Options</h3>
* Reordering options can be applied during Bidi text transformations.
*
* <p><b>See Also:</b>
* <ul>
* <li>{@link #setReorderingOptions}
* <li>{@link #OPTION_DEFAULT}
* <li>{@link #OPTION_INSERT_MARKS}
* <li>{@link #OPTION_REMOVE_CONTROLS}
* <li>{@link #OPTION_STREAMING}
* </ul>
*
*
* @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
* @stable ICU 3.8
*
*
* <h4> Sample code for the ICU Bidi API </h4>
*
* <h5>Rendering a paragraph with the ICU Bidi API</h5>
*
* This is (hypothetical) sample code that illustrates how the ICU Bidi API
* could be used to render a paragraph of text. Rendering code depends highly on
* the graphics system, therefore this sample code must make a lot of
* assumptions, which may or may not match any existing graphics system's
* properties.
*
* <p>
* The basic assumptions are:
* </p>
* <ul>
* <li>Rendering is done from left to right on a horizontal line.</li>
* <li>A run of single-style, unidirectional text can be rendered at once.
* </li>
* <li>Such a run of text is passed to the graphics system with characters
* (code units) in logical order.</li>
* <li>The line-breaking algorithm is very complicated and Locale-dependent -
* and therefore its implementation omitted from this sample code.</li>
* </ul>
*
* <pre>{@code
*
* package com.ibm.icu.dev.test.bidi;
*
* import com.ibm.icu.text.Bidi;
* import com.ibm.icu.text.BidiRun;
*
* public class Sample {
*
* static final int styleNormal = 0;
* static final int styleSelected = 1;
* static final int styleBold = 2;
* static final int styleItalics = 4;
* static final int styleSuper=8;
* static final int styleSub = 16;
*
* static class StyleRun {
* int limit;
* int style;
*
* public StyleRun(int limit, int style) {
* this.limit = limit;
* this.style = style;
* }
* }
*
* static class Bounds {
* int start;
* int limit;
*
* public Bounds(int start, int limit) {
* this.start = start;
* this.limit = limit;
* }
* }
*
* static int getTextWidth(String text, int start, int limit,
* StyleRun[] styleRuns, int styleRunCount) {
* // simplistic way to compute the width
* return limit - start;
* }
*
* // set limit and StyleRun limit for a line
* // from text[start] and from styleRuns[styleRunStart]
* // using Bidi.getLogicalRun(...)
* // returns line width
* static int getLineBreak(String text, Bounds line, Bidi para,
* StyleRun styleRuns[], Bounds styleRun) {
* // dummy return
* return 0;
* }
*
* // render runs on a line sequentially, always from left to right
*
* // prepare rendering a new line
* static void startLine(byte textDirection, int lineWidth) {
* System.out.println();
* }
*
* // render a run of text and advance to the right by the run width
* // the text[start..limit-1] is always in logical order
* static void renderRun(String text, int start, int limit,
* byte textDirection, int style) {
* }
*
* // We could compute a cross-product
* // from the style runs with the directional runs
* // and then reorder it.
* // Instead, here we iterate over each run type
* // and render the intersections -
* // with shortcuts in simple (and common) cases.
* // renderParagraph() is the main function.
*
* // render a directional run with
* // (possibly) multiple style runs intersecting with it
* static void renderDirectionalRun(String text, int start, int limit,
* byte direction, StyleRun styleRuns[],
* int styleRunCount) {
* int i;
*
* // iterate over style runs
* if (direction == Bidi.LTR) {
* int styleLimit;
* for (i = 0; i < styleRunCount; ++i) {
* styleLimit = styleRuns[i].limit;
* if (start < styleLimit) {
* if (styleLimit > limit) {
* styleLimit = limit;
* }
* renderRun(text, start, styleLimit,
* direction, styleRuns[i].style);
* if (styleLimit == limit) {
* break;
* }
* start = styleLimit;
* }
* }
* } else {
* int styleStart;
*
* for (i = styleRunCount-1; i >= 0; --i) {
* if (i > 0) {
* styleStart = styleRuns[i-1].limit;
* } else {
* styleStart = 0;
* }
* if (limit >= styleStart) {
* if (styleStart < start) {
* styleStart = start;
* }
* renderRun(text, styleStart, limit, direction,
* styleRuns[i].style);
* if (styleStart == start) {
* break;
* }
* limit = styleStart;
* }
* }
* }
* }
*
* // the line object represents text[start..limit-1]
* static void renderLine(Bidi line, String text, int start, int limit,
* StyleRun styleRuns[], int styleRunCount) {
* byte direction = line.getDirection();
* if (direction != Bidi.MIXED) {
* // unidirectional
* if (styleRunCount <= 1) {
* renderRun(text, start, limit, direction, styleRuns[0].style);
* } else {
* renderDirectionalRun(text, start, limit, direction,
* styleRuns, styleRunCount);
* }
* } else {
* // mixed-directional
* int count, i;
* BidiRun run;
*
* try {
* count = line.countRuns();
* } catch (IllegalStateException e) {
* e.printStackTrace();
* return;
* }
* if (styleRunCount <= 1) {
* int style = styleRuns[0].style;
*
* // iterate over directional runs
* for (i = 0; i < count; ++i) {
* run = line.getVisualRun(i);
* renderRun(text, run.getStart(), run.getLimit(),
* run.getDirection(), style);
* }
* } else {
* // iterate over both directional and style runs
* for (i = 0; i < count; ++i) {
* run = line.getVisualRun(i);
* renderDirectionalRun(text, run.getStart(),
* run.getLimit(), run.getDirection(),
* styleRuns, styleRunCount);
* }
* }
* }
* }
*
* static void renderParagraph(String text, byte textDirection,
* StyleRun styleRuns[], int styleRunCount,
* int lineWidth) {
* int length = text.length();
* Bidi para = new Bidi();
* try {
* para.setPara(text,
* textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
* : Bidi.LEVEL_DEFAULT_LTR,
* null);
* } catch (Exception e) {
* e.printStackTrace();
* return;
* }
* byte paraLevel = (byte)(1 & para.getParaLevel());
* StyleRun styleRun = new StyleRun(length, styleNormal);
*
* if (styleRuns == null || styleRunCount <= 0) {
* styleRuns = new StyleRun[1];
* styleRunCount = 1;
* styleRuns[0] = styleRun;
* }
* // assume styleRuns[styleRunCount-1].limit>=length
*
* int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
* if (width <= lineWidth) {
* // everything fits onto one line
*
* // prepare rendering a new line from either left or right
* startLine(paraLevel, width);
*
* renderLine(para, text, 0, length, styleRuns, styleRunCount);
* } else {
* // we need to render several lines
* Bidi line = new Bidi(length, 0);
* int start = 0, limit;
* int styleRunStart = 0, styleRunLimit;
*
* for (;;) {
* limit = length;
* styleRunLimit = styleRunCount;
* width = getLineBreak(text, new Bounds(start, limit),
* para, styleRuns,
* new Bounds(styleRunStart, styleRunLimit));
* try {
* line = para.setLine(start, limit);
* } catch (Exception e) {
* e.printStackTrace();
* return;
* }
* // prepare rendering a new line
* // from either left or right
* startLine(paraLevel, width);
*
* if (styleRunStart > 0) {
* int newRunCount = styleRuns.length - styleRunStart;
* StyleRun[] newRuns = new StyleRun[newRunCount];
* System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
* newRunCount);
* renderLine(line, text, start, limit, newRuns,
* styleRunLimit - styleRunStart);
* } else {
* renderLine(line, text, start, limit, styleRuns,
* styleRunLimit - styleRunStart);
* }
* if (limit == length) {
* break;
* }
* start = limit;
* styleRunStart = styleRunLimit - 1;
* if (start >= styleRuns[styleRunStart].limit) {
* ++styleRunStart;
* }
* }
* }
* }
*
* public static void main(String[] args)
* {
* renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
* renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
* }
* }
*
* }</pre>
*/
/*
* General implementation notes:
*
* Throughout the implementation, there are comments like (W2) that refer to
* rules of the BiDi algorithm, in this example to the second rule of the
* resolution of weak types.
*
* For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
* character according to UTF-16, the second UChar gets the directional property of
* the entire character assigned, while the first one gets a BN, a boundary
* neutral, type, which is ignored by most of the algorithm according to
* rule (X9) and the implementation suggestions of the BiDi algorithm.
*
* Later, adjustWSLevels() will set the level for each BN to that of the
* following character (UChar), which results in surrogate pairs getting the
* same level on each of their surrogates.
*
* In a UTF-8 implementation, the same thing could be done: the last byte of
* a multi-byte sequence would get the "real" property, while all previous
* bytes of that sequence would get BN.
*
* It is not possible to assign all those parts of a character the same real
* property because this would fail in the resolution of weak types with rules
* that look at immediately surrounding types.
*
* As a related topic, this implementation does not remove Boundary Neutral
* types from the input, but ignores them wherever this is relevant.
* For example, the loop for the resolution of the weak types reads
* types until it finds a non-BN.
* Also, explicit embedding codes are neither changed into BN nor removed.
* They are only treated the same way real BNs are.
* As stated before, adjustWSLevels() takes care of them at the end.
* For the purpose of conformance, the levels of all these codes
* do not matter.
*
* Note that this implementation modifies the dirProps
* after the initial setup, when applying X5c (replace FSI by LRI or RLI),
* X6, N0 (replace paired brackets by L or R).
*
* In this implementation, the resolution of weak types (W1 to W6),
* neutrals (N1 and N2), and the assignment of the resolved level (In)
* are all done in one single loop, in resolveImplicitLevels().
* Changes of dirProp values are done on the fly, without writing
* them back to the dirProps array.
*
*
* This implementation contains code that allows to bypass steps of the
* algorithm that are not needed on the specific paragraph
* in order to speed up the most common cases considerably,
* like text that is entirely LTR, or RTL text without numbers.
*
* Most of this is done by setting a bit for each directional property
* in a flags variable and later checking for whether there are
* any LTR characters or any RTL characters, or both, whether
* there are any explicit embedding codes, etc.
*
* If the (Xn) steps are performed, then the flags are re-evaluated,
* because they will then not contain the embedding codes any more
* and will be adjusted for override codes, so that subsequently
* more bypassing may be possible than what the initial flags suggested.
*
* If the text is not mixed-directional, then the
* algorithm steps for the weak type resolution are not performed,
* and all levels are set to the paragraph level.
*
* If there are no explicit embedding codes, then the (Xn) steps
* are not performed.
*
* If embedding levels are supplied as a parameter, then all
* explicit embedding codes are ignored, and the (Xn) steps
* are not performed.
*
* White Space types could get the level of the run they belong to,
* and are checked with a test of (flags&MASK_EMBEDDING) to
* consider if the paragraph direction should be considered in
* the flags variable.
*
* If there are no White Space types in the paragraph, then
* (L1) is not necessary in adjustWSLevels().
*/
// Original filename in ICU4J: Bidi.java
public class BidiBase {
static class Point {
int pos; /* position in text */
int flag; /* flag for LRM/RLM, before/after */
}
static class InsertPoints {
int size;
int confirmed;
Point[] points = new Point[0];
}
static class Opening {
int position; /* position of opening bracket */
int match; /* matching char or -position of closing bracket */
int contextPos; /* position of last strong char found before opening */
short flags; /* bits for L or R/AL found within the pair */
byte contextDir; /* L or R according to last strong char before opening */
}
static class IsoRun {
int contextPos; /* position of char determining context */
short start; /* index of first opening entry for this run */
short limit; /* index after last opening entry for this run */
byte level; /* level of this run */
byte lastStrong; /* bidi class of last strong char found in this run */
byte lastBase; /* bidi class of last base char found in this run */
byte contextDir; /* L or R to use as context for following openings */
}
static class BracketData {
Opening[] openings = new Opening[SIMPLE_PARAS_COUNT];
int isoRunLast; /* index of last used entry */
/* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
+ 1 for index 0, + 1 for before the first isolated sequence */
IsoRun[] isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2];
boolean isNumbersSpecial; /*reordering mode for NUMBERS_SPECIAL */
}
static class Isolate {
int startON;
int start1;
short stateImp;
short state;
}
/** Paragraph level setting<p>
*
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
* Algorithm. If no strong directional character is present,
* then set the paragraph level to 0 (left-to-right).<p>
*
* If this value is used in conjunction with reordering modes
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
* the rightmost or leftmost strong character of the source text is RTL
* or Arabic Letter, the direction will be LTR otherwise.<p>
*
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
* be added at the beginning of the result string to ensure round trip
* (that the result string, when reordered back to visual, will produce
* the original source text).
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
/** Paragraph level setting<p>
*
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
* Algorithm. If no strong directional character is present,
* then set the paragraph level to 1 (right-to-left).<p>
*
* If this value is used in conjunction with reordering modes
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
* the rightmost or leftmost strong character of the source text is RTL
* or Arabic Letter, or if the text contains no strong character;
* the direction will be LTR otherwise.<p>
*
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
* be added at the beginning of the result string to ensure round trip
* (that the result string, when reordered back to visual, will produce
* the original source text).
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
/**
* Maximum explicit embedding level.
* (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
* @stable ICU 3.8
*/
public static final byte MAX_EXPLICIT_LEVEL = 125;
/**
* Bit flag for level input.
* Overrides directional properties.
* @stable ICU 3.8
*/
public static final byte LEVEL_OVERRIDE = (byte)0x80;
/**
* Special value which can be returned by the mapping methods when a
* logical index has no corresponding visual index or vice-versa. This may
* happen for the logical-to-visual mapping of a Bidi control when option
* <code>OPTION_REMOVE_CONTROLS</code> is
* specified. This can also happen for the visual-to-logical mapping of a
* Bidi mark (LRM or RLM) inserted by option
* <code>OPTION_INSERT_MARKS</code>.
* @see #getVisualIndex
* @see #getVisualMap
* @see #getLogicalIndex
* @see #getLogicalMap
* @see #OPTION_INSERT_MARKS
* @see #OPTION_REMOVE_CONTROLS
* @stable ICU 3.8
*/
public static final int MAP_NOWHERE = -1;
/**
* Left-to-right text.
* <ul>
* <li>As return value for <code>getDirection()</code>, it means
* that the source string contains no right-to-left characters, or
* that the source string is empty and the paragraph level is even.
* <li>As return value for <code>getBaseDirection()</code>, it
* means that the first strong character of the source string has
* a left-to-right direction.
* </ul>
* @stable ICU 3.8
*/
public static final byte LTR = 0;
/**
* Right-to-left text.
* <ul>
* <li>As return value for <code>getDirection()</code>, it means
* that the source string contains no left-to-right characters, or
* that the source string is empty and the paragraph level is odd.
* <li>As return value for <code>getBaseDirection()</code>, it
* means that the first strong character of the source string has
* a right-to-left direction.
* </ul>
* @stable ICU 3.8
*/
public static final byte RTL = 1;
/**
* Mixed-directional text.
* <p>As return value for <code>getDirection()</code>, it means
* that the source string contains both left-to-right and
* right-to-left characters.
* @stable ICU 3.8
*/
public static final byte MIXED = 2;
/**
* option bit for writeReordered():
* keep combining characters after their base characters in RTL runs
*
* @see #writeReordered
* @stable ICU 3.8
*/
public static final short KEEP_BASE_COMBINING = 1;
/**
* option bit for writeReordered():
* replace characters with the "mirrored" property in RTL runs
* by their mirror-image mappings
*
* @see #writeReordered
* @stable ICU 3.8
*/
public static final short DO_MIRRORING = 2;
/**
* option bit for writeReordered():
* surround the run with LRMs if necessary;
* this is part of the approximate "inverse Bidi" algorithm
*
* <p>This option does not imply corresponding adjustment of the index
* mappings.</p>
*
* @see #setInverse
* @see #writeReordered
* @stable ICU 3.8
*/
public static final short INSERT_LRM_FOR_NUMERIC = 4;
/**
* option bit for writeReordered():
* remove Bidi control characters
* (this does not affect INSERT_LRM_FOR_NUMERIC)
*
* <p>This option does not imply corresponding adjustment of the index
* mappings.</p>
*
* @see #writeReordered
* @see #INSERT_LRM_FOR_NUMERIC
* @stable ICU 3.8
*/
public static final short REMOVE_BIDI_CONTROLS = 8;
/**
* option bit for writeReordered():
* write the output in reverse order
*
* <p>This has the same effect as calling <code>writeReordered()</code>
* first without this option, and then calling
* <code>writeReverse()</code> without mirroring.
* Doing this in the same step is faster and avoids a temporary buffer.
* An example for using this option is output to a character terminal that
* is designed for RTL scripts and stores text in reverse order.</p>
*
* @see #writeReordered
* @stable ICU 3.8
*/
public static final short OUTPUT_REVERSE = 16;
/** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
* @see #setReorderingMode
* @stable ICU 3.8
*/
private static final short REORDER_DEFAULT = 0;
/** Reordering mode: Logical to Visual algorithm which handles numbers in
* a way which mimicks the behavior of Windows XP.
* @see #setReorderingMode
* @stable ICU 3.8
*/
private static final short REORDER_NUMBERS_SPECIAL = 1;
/** Reordering mode: Logical to Visual algorithm grouping numbers with
* adjacent R characters (reversible algorithm).
* @see #setReorderingMode
* @stable ICU 3.8
*/
private static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
/** Reordering mode: Reorder runs only to transform a Logical LTR string
* to the logical RTL string with the same display, or vice-versa.<br>
* If this mode is set together with option
* <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
* text may be removed and other controls may be added to produce the
* minimum combination which has the required display.
* @see #OPTION_INSERT_MARKS
* @see #setReorderingMode
* @stable ICU 3.8
*/
static final short REORDER_RUNS_ONLY = 3;
/** Reordering mode: Visual to Logical algorithm which handles numbers
* like L (same algorithm as selected by <code>setInverse(true)</code>.
* @see #setInverse
* @see #setReorderingMode
* @stable ICU 3.8
*/
static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
/** Reordering mode: Visual to Logical algorithm equivalent to the regular
* Logical to Visual algorithm.
* @see #setReorderingMode
* @stable ICU 3.8
*/
static final short REORDER_INVERSE_LIKE_DIRECT = 5;
/** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
* <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
* @see #setReorderingMode
* @stable ICU 3.8
*/
static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
/* Reordering mode values must be ordered so that all the regular logical to
* visual modes come first, and all inverse Bidi modes come last.
*/
private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
REORDER_NUMBERS_SPECIAL;
/**
* Option bit for <code>setReorderingOptions</code>:
* insert Bidi marks (LRM or RLM) when needed to ensure correct result of
* a reordering to a Logical order
*
* <p>This option must be set or reset before calling
* <code>setPara</code>.</p>
*
* <p>This option is significant only with reordering modes which generate
* a result with Logical order, specifically.</p>
* <ul>
* <li><code>REORDER_RUNS_ONLY</code></li>
* <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
* <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
* <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
* </ul>
*
* <p>If this option is set in conjunction with reordering mode
* <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
* <code>setInverse(true)</code>, it implies option
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
* <code>writeReordered()</code>.</p>
*
* <p>For other reordering modes, a minimum number of LRM or RLM characters
* will be added to the source text after reordering it so as to ensure
* round trip, i.e. when applying the inverse reordering mode on the
* resulting logical text with removal of Bidi marks
* (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
* <code>setPara()</code> or option
* <code>REMOVE_BIDI_CONTROLS</code> in
* <code>writeReordered</code>), the result will be identical to the
* source text in the first transformation.
*
* <p>This option will be ignored if specified together with option
* <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
* <code>REMOVE_BIDI_CONTROLS</code> in calls to method
* <code>writeReordered()</code> and it implies option
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
* <code>writeReordered()</code> if the reordering mode is
* <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
*
* @see #setReorderingMode
* @see #setReorderingOptions
* @see #INSERT_LRM_FOR_NUMERIC
* @see #REMOVE_BIDI_CONTROLS
* @see #OPTION_REMOVE_CONTROLS
* @see #REORDER_RUNS_ONLY
* @see #REORDER_INVERSE_NUMBERS_AS_L
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
static final int OPTION_INSERT_MARKS = 1;
/**
* Option bit for <code>setReorderingOptions</code>:
* remove Bidi control characters
*
* <p>This option must be set or reset before calling
* <code>setPara</code>.</p>
*
* <p>This option nullifies option
* <code>OPTION_INSERT_MARKS</code>. It inhibits option
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
* <code>writeReordered()</code> and it implies option
* <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p>
*
* @see #setReorderingMode
* @see #setReorderingOptions
* @see #OPTION_INSERT_MARKS
* @see #INSERT_LRM_FOR_NUMERIC
* @see #REMOVE_BIDI_CONTROLS
* @stable ICU 3.8
*/
static final int OPTION_REMOVE_CONTROLS = 2;
/**
* Option bit for <code>setReorderingOptions</code>:
* process the output as part of a stream to be continued
*
* <p>This option must be set or reset before calling
* <code>setPara</code>.</p>
*
* <p>This option specifies that the caller is interested in processing
* large text object in parts. The results of the successive calls are
* expected to be concatenated by the caller. Only the call for the last
* part will have this option bit off.</p>
*
* <p>When this option bit is on, <code>setPara()</code> may process
* less than the full source text in order to truncate the text at a
* meaningful boundary. The caller should call
* <code>getProcessedLength()</code> immediately after calling
* <code>setPara()</code> in order to determine how much of the source
* text has been processed. Source text beyond that length should be
* resubmitted in following calls to <code>setPara</code>. The
* processed length may be less than the length of the source text if a
* character preceding the last character of the source text constitutes a
* reasonable boundary (like a block separator) for text to be continued.<br>
* If the last character of the source text constitutes a reasonable
* boundary, the whole text will be processed at once.<br>
* If nowhere in the source text there exists
* such a reasonable boundary, the processed length will be zero.<br>
* The caller should check for such an occurrence and do one of the following:
* <ul><li>submit a larger amount of text with a better chance to include
* a reasonable boundary.</li>
* <li>resubmit the same text after turning off option
* <code>OPTION_STREAMING</code>.</li></ul>
* In all cases, this option should be turned off before processing the last
* part of the text.</p>
*
* <p>When the <code>OPTION_STREAMING</code> option is used, it is
* recommended to call <code>orderParagraphsLTR(true)</code> before calling
* <code>setPara()</code> so that later paragraphs may be concatenated to
* previous paragraphs on the right.
* </p>
*
* @see #setReorderingMode
* @see #setReorderingOptions
* @see #getProcessedLength
* @stable ICU 3.8
*/
private static final int OPTION_STREAMING = 4;
/*
* Comparing the description of the Bidi algorithm with this implementation
* is easier with the same names for the Bidi types in the code as there.
* See UCharacterDirection
*/
/* private */ static final byte L = 0;
private static final byte R = 1;
private static final byte EN = 2;
private static final byte ES = 3;
private static final byte ET = 4;
private static final byte AN = 5;
private static final byte CS = 6;
static final byte B = 7;
private static final byte S = 8;
private static final byte WS = 9;
private static final byte ON = 10;
private static final byte LRE = 11;
private static final byte LRO = 12;
private static final byte AL = 13;
private static final byte RLE = 14;
private static final byte RLO = 15;
private static final byte PDF = 16;
private static final byte NSM = 17;
private static final byte BN = 18;
private static final byte FSI = 19;
private static final byte LRI = 20;
private static final byte RLI = 21;
private static final byte PDI = 22;
private static final byte ENL = PDI + 1; /* EN after W7 */
private static final byte ENR = ENL + 1; /* EN not subject to W7 */
// Number of directional types
private static final int CHAR_DIRECTION_COUNT = 23;
/**
* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
* Used in
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>.
* Returns UCharacter.BidiPairedBracketType values.
* @stable ICU 52
*/
public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
/**
* Bidi Paired Bracket Type constants.
*
* @see UProperty#BIDI_PAIRED_BRACKET_TYPE
* @stable ICU 52
*/