From b0656d8a6d423f0e44e8a455af984939315d699a Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Thu, 6 Jun 2024 14:30:24 -0700 Subject: [PATCH 01/14] Rebase --- .../java/org/unicode/props/UcdProperty.java | 13 +++++++---- .../unicode/props/ExtraPropertyAliases.txt | 14 ++++++++++- .../org/unicode/props/IndexPropertyRegex.txt | 23 +++++++++++++------ .../unicode/props/IndexUnicodeProperties.txt | 13 +++++++++++ 4 files changed, 51 insertions(+), 12 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java index fd9e5b7a3..9befbc369 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java @@ -84,12 +84,16 @@ public enum UcdProperty { Emoji_SB(PropertyType.Miscellaneous, "ESB"), ISO_Comment(PropertyType.Miscellaneous, "isc"), Jamo_Short_Name(PropertyType.Miscellaneous, "JSN"), + NC_Corrected(PropertyType.Miscellaneous, "ncCorrected"), + NC_Original(PropertyType.Miscellaneous, "ncOriginal"), + NC_Version(PropertyType.Miscellaneous, "ncVersion"), Name(PropertyType.Miscellaneous, "na"), Name_Alias(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "Name_Alias"), Named_Sequences(PropertyType.Miscellaneous, "NS"), Named_Sequences_Prov(PropertyType.Miscellaneous, "NSP"), Standardized_Variant(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "SV"), Unicode_1_Name(PropertyType.Miscellaneous, "na1"), + emoji_variation_sequence(PropertyType.Miscellaneous, "EVS"), kAlternateTotalStrokes(PropertyType.Miscellaneous, "cjkAlternateTotalStrokes"), kBigFive(PropertyType.Miscellaneous, "cjkBigFive"), kCCCII(PropertyType.Miscellaneous, "cjkCCCII"), @@ -190,7 +194,7 @@ public enum UcdProperty { kRSKanWa(PropertyType.Miscellaneous, "cjkRSKanWa"), kRSKangXi(PropertyType.Miscellaneous, "cjkRSKangXi"), kRSKorean(PropertyType.Miscellaneous, "cjkRSKorean"), - kRSTUnicode(PropertyType.Miscellaneous, "kRSTUnicode"), + kRSTUnicode(PropertyType.Miscellaneous, "cjkRSTUnicode"), kRSUnicode( PropertyType.Miscellaneous, null, @@ -198,7 +202,7 @@ public enum UcdProperty { "cjkRSUnicode", "Unicode_Radical_Stroke", "URS"), - kReading(PropertyType.Miscellaneous, "kReading"), + kReading(PropertyType.Miscellaneous, "cjkReading"), kSBGY(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkSBGY"), kSMSZD2003Index(PropertyType.Miscellaneous, "cjkSMSZD2003Index"), kSMSZD2003Readings(PropertyType.Miscellaneous, "cjkSMSZD2003Readings"), @@ -210,11 +214,11 @@ public enum UcdProperty { ValueCardinality.Unordered, "cjkSpecializedSemanticVariant"), kSpoofingVariant(PropertyType.Miscellaneous, "cjkSpoofingVariant"), - kSrc_NushuDuben(PropertyType.Miscellaneous, "kSrc_NushuDuben"), + kSrc_NushuDuben(PropertyType.Miscellaneous, "cjkSrc_NushuDuben"), kStrange(PropertyType.Miscellaneous, "cjkStrange"), kTGH(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTGH"), kTGHZ2013(PropertyType.Miscellaneous, "cjkTGHZ2013"), - kTGT_MergedSrc(PropertyType.Miscellaneous, "kTGT_MergedSrc"), + kTGT_MergedSrc(PropertyType.Miscellaneous, "cjkTGT_MergedSrc"), kTaiwanTelegraph(PropertyType.Miscellaneous, "cjkTaiwanTelegraph"), kTang(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTang"), kTotalStrokes(PropertyType.Miscellaneous, null, ValueCardinality.Ordered, "cjkTotalStrokes"), @@ -224,6 +228,7 @@ public enum UcdProperty { kXHC1983(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkXHC1983"), kXerox(PropertyType.Miscellaneous, "cjkXerox"), kZVariant(PropertyType.Miscellaneous, "cjkZVariant"), + kZhuang(PropertyType.Miscellaneous, "cjkZhuang"), kZhuangNumeric(PropertyType.Miscellaneous, "cjkZhuangNumeric"), // Catalog diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt index 80faee3c7..b2b4fd449 100644 --- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt +++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt @@ -66,6 +66,7 @@ CJKR ; CJK_Radical EDCM ; Emoji_DCM EKDDI ; Emoji_KDDI ESB ; Emoji_SB +EVS ; emoji_variation_sequence NS ; Named_Sequences NSP ; Named_Sequences_Prov SV ; Standardized_Variant @@ -149,6 +150,13 @@ cjkJoyoKanji ; kJoyoKanji cjkKoreanEducationHanja ; kKoreanEducationHanja cjkKoreanName ; kKoreanName cjkTGH ; kTGH +cjkRSTUnicode ; kRSTUnicode +cjkReading ; kReading +cjkSrc_NushuDuben ; kSrc_NushuDuben +cjkTGT_MergedSrc ; kTGT_MergedSrc +ncCorrected ; NC_Corrected +ncOriginal ; NC_Original +ncVersion ; NC_Version # 13.0 cjkSpoofingVariant ; kSpoofingVariant cjkTGHZ2013 ; kTGHZ2013 @@ -166,6 +174,7 @@ cjkVietnameseNumeric ; kVietnameseNumeric cjkZhuangNumeric ; kZhuangNumeric # 16.0 cjkFanqie ; kFanqie +<<<<<<< HEAD kTGT_MergedSrc ; kTGT_MergedSrc kRSTUnicode ; kRSTUnicode @@ -175,4 +184,7 @@ kReading ; kReading kEH_Func ; kEH_Func kEH_FVal ; kEH_FVal -kEH_UniK ; kEH_UniK \ No newline at end of file +kEH_UniK ; kEH_UniK +======= +cjkZhuang ; kZhuang +>>>>>>> 2f297052 (Initial checkin for UcdXML) diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt index 5c8153d33..f3856add4 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt @@ -142,7 +142,7 @@ kHanYu ; MULTI_VALUED ; [1-8][0-9]{4}\.[0-3 kIRGHanyuDaZidian ; SINGLE_VALUED ; [1-8][0-9]{4}\.[0-3][0-9][01] kCNS1992 ; SINGLE_VALUED ; [1-9]-[0-9A-F]{4} kTotalStrokes ; ORDERED ; [1-9][0-9]{0,2} -kRSUnicode ; ORDERED ; [1-9][0-9]{0,2}\'?\.[0-9]{1,2} +kRSUnicode ; ORDERED ; [1-9][0-9]{0,2}\'?\.[0-9]{1,2} kRSJapanese ; EXTENSIBLE ; [1-9][0-9]{0,2}\.[0-9]{1,2} kRSKanWa ; EXTENSIBLE ; [1-9][0-9]{0,2}\.[0-9]{1,2} kRSKangXi ; EXTENSIBLE ; [1-9][0-9]{0,2}\.[0-9]{1,2} @@ -170,16 +170,25 @@ kHanyuPinlu ; MULTI_VALUED ; [a-z\x{308}]+[1-5]\ kCantonese ; MULTI_VALUED ; [a-z]{1,6}[1-6] kTang ; MULTI_VALUED ; \*?[A-Za-z()\x{E6}\x{251}\x{259}\x{25B}\x{300}\x{30C}]+ -kJinmeiyoKanji ; MULTI_VALUED ; (20[0-9]{2})(:U\+2?[0-9A-F]{4})? -kJoyoKanji ; MULTI_VALUED ; (20[0-9]{2})|(U\+2?[0-9A-F]{4}) +kJinmeiyoKanji ; MULTI_VALUED ; (20[0-9]{2})(:U\+2?[0-9A-F]{4})? +kJoyoKanji ; MULTI_VALUED ; (20[0-9]{2})|(U\+2?[0-9A-F]{4}) kKoreanEducationHanja ; MULTI_VALUED ; 20[0-9]{2} -kKoreanName ; MULTI_VALUED ; (20[0-9]{2})(:U\+2?[0-9A-F]{4})* -kTGH ; MULTI_VALUED ; 20[0-9]{2}:[1-9][0-9]{0,3} +kKoreanName ; MULTI_VALUED ; (20[0-9]{2})(:U\+2?[0-9A-F]{4})* +kTGH ; MULTI_VALUED ; 20[0-9]{2}:[1-9][0-9]{0,3} -kIRG_UKSource ; SINGLE_VALUED ; V[0-4]-[0-9A-F]{4} +kIRG_UKSource ; SINGLE_VALUED ; V[0-4]-[0-9A-F]{4} kIRG_SSource ; SINGLE_VALUED ; V[0-4]-[0-9A-F]{4} +kSrc_NushuDuben ; SINGLE_VALUED ; [0-9]+\.[0-9]+ +kReading ; SINGLE_VALUED ; [a-z]{1,6}[1-6]+ +kRSTUnicode ; SINGLE_VALUED ; [0-9]+\.[0-9]+ +kTGT_MergedSrc ; SINGLE_VALUED ; L2008-[0-9A-F]{4,5}(-[0-9]{4,5})? + +NC_Original ; SINGLE_VALUED ; [0-9A-F]{4,5} +NC_Corrected ; SINGLE_VALUED ; [0-9A-F]{4,5} +NC_Version ; SINGLE_VALUED ; [0-9]\.[0-9]\.[0-9] + # ============================= # Catalog/Enum/Binary Properties @@ -204,5 +213,5 @@ Confusable_MA ; SINGLE_VALUED ; $codePoints #Emoji ; SINGLE_VALUED ; #Emoji_Presentation ; SINGLE_VALUED ; #Emoji_Modifier ; SINGLE_VALUED ; -#Emoji_Modifier_Base ; SINGLE_VALUED ; +#Emoji_Modifier_Base ; SINGLE_VALUED ; diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt index 018f9614d..c171cdacb 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt @@ -36,6 +36,8 @@ FileType ; Unihan_OtherMappings ; PropertyValue FileType ; Unihan_RadicalStrokeCounts ; PropertyValue FileType ; Unihan_Readings ; PropertyValue FileType ; Unihan_Variants ; PropertyValue +FileType ; NushuSources ; PropertyValue +FileType ; TangutSources ; PropertyValue # NameAliases File Type # Contains a multivalued property, where successive values are not in the same line, but are divided out on successive lines with the same code point @@ -43,6 +45,7 @@ FileType ; Unihan_Variants ; PropertyValue FileType ; NameAliases ; NameAliases FileType ; NameAliasesProv ; NameAliases FileType ; StandardizedVariants ; StandardizedVariants +FileType ; emoji-variation-sequences ; StandardizedVariants # CJKRadicals File Type @@ -309,6 +312,15 @@ Unihan_Variants ; kSpoofingVariant Unihan_Variants ; kTraditionalVariant Unihan_Variants ; kZVariant +NushuSources ; kSrc_NushuDuben +NushuSources ; kReading +TangutSources ; kRSTUnicode +TangutSources ; kTGT_MergedSrc + +NormalizationCorrections ; NC_Original +NormalizationCorrections ; NC_Corrected +NormalizationCorrections ; NC_Version + # Extras ScriptExtensions ; Script_Extensions @@ -319,6 +331,7 @@ EmojiSources ; Emoji_SB ; 3 NamedSequences ; Named_Sequences NamedSequencesProv ; Named_Sequences_Prov StandardizedVariants ; Standardized_Variant +emoji-variation-sequences ; emoji-variation-sequence DoNotEmit ; Do_Not_Emit_Preferred ; 1 DoNotEmit ; Do_Not_Emit_Type ; 2 From 3ce611a0371f19cb8ddbe8ad45b55b355a3895bc Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Thu, 6 Jun 2024 14:31:05 -0700 Subject: [PATCH 02/14] Initial checkin for UcdXML --- .gitignore | 1 + .../org/unicode/xml/AttributeResolver.java | 404 ++++++++ .../java/org/unicode/xml/UCDDataResolver.java | 194 ++++ .../java/org/unicode/xml/UCDXMLWriter.java | 78 ++ .../org/unicode/xml/UcdPropertyDetail.java | 923 ++++++++++++++++++ .../org/unicode/xml/UcdSectionComponent.java | 28 + .../org/unicode/xml/UcdSectionDetail.java | 153 +++ .../src/main/java/org/unicode/xml/UcdXML.java | 362 +++++++ 8 files changed, 2143 insertions(+) create mode 100644 unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdXML.java diff --git a/.gitignore b/.gitignore index 60e7ec63e..c6d5a34bd 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ perf-*.xml test-*.xml # Directories +.idea/ .settings/ .vs/ .vscode/ diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java new file mode 100644 index 000000000..b5d093635 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -0,0 +1,404 @@ +package org.unicode.xml; + +import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.util.VersionInfo; +import org.unicode.cldr.draft.FileUtilities; +import org.unicode.props.*; + +import java.util.*; + +public class AttributeResolver { + + private final IndexUnicodeProperties indexUnicodeProperties; + private final UnicodeMap map_age; + private final UnicodeMap map_bidi_class; + private final UnicodeMap map_bidi_paired_bracket_type; + private final UnicodeMap map_block; + private final UnicodeMap map_canonical_combining_class; + private final UnicodeMap map_decomposition_type; + private final UnicodeMap map_do_not_emit_type; + private final UnicodeMap map_east_asian_width; + private final UnicodeMap map_general_category; + private final UnicodeMap map_grapheme_cluster_break; + private final UnicodeMap map_hangul_syllable_type; + private final UnicodeMap map_identifier_status; + private final UnicodeMap map_identifier_type; + private final UnicodeMap map_idn_2008; + private final UnicodeMap map_idn_status; + private final UnicodeMap map_indic_conjunct_break; + private final UnicodeMap map_indic_positional_category; + private final UnicodeMap map_indic_syllabic_category; + private final UnicodeMap map_jamo_short_name; + private final UnicodeMap map_joining_group; + private final UnicodeMap map_joining_type; + private final UnicodeMap map_line_break; + private final UnicodeMap map_nfc_quick_check; + private final UnicodeMap map_nfd_quick_check; + private final UnicodeMap map_nfkc_quick_check; + private final UnicodeMap map_nfkd_quick_check; + private final UnicodeMap map_numeric_type; + private final UnicodeMap map_other_joining_type; + private final UnicodeMap map_script; + private final UnicodeMap map_script_extensions; + private final UnicodeMap map_sentence_break; + private final UnicodeMap map_vertical_orientation; + private final UnicodeMap map_word_break; + private final HashMap> map_NameAlias; + + //If there is a change in any of these properties between two adjacent characters, it will result in a new range. + private final UcdProperty[] rangeDefiningProperties = { + UcdProperty.Age, + UcdProperty.Bidi_Class, + UcdProperty.Block, + UcdProperty.Decomposition_Mapping, + UcdProperty.Numeric_Type, + UcdProperty.Numeric_Value, + UcdProperty.Vertical_Orientation + }; + + public AttributeResolver(IndexUnicodeProperties iup) { + indexUnicodeProperties = iup; + map_age = indexUnicodeProperties.loadEnum(UcdProperty.Age); + map_bidi_class = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Class); + map_bidi_paired_bracket_type = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Paired_Bracket_Type); + map_block = indexUnicodeProperties.loadEnum(UcdProperty.Block); + map_canonical_combining_class = indexUnicodeProperties.loadEnum(UcdProperty.Canonical_Combining_Class); + map_decomposition_type = indexUnicodeProperties.loadEnum(UcdProperty.Decomposition_Type); + map_do_not_emit_type = indexUnicodeProperties.loadEnum(UcdProperty.Do_Not_Emit_Type); + map_east_asian_width = indexUnicodeProperties.loadEnum(UcdProperty.East_Asian_Width); + map_general_category = indexUnicodeProperties.loadEnum(UcdProperty.General_Category); + map_grapheme_cluster_break = indexUnicodeProperties.loadEnum(UcdProperty.Grapheme_Cluster_Break); + map_hangul_syllable_type = indexUnicodeProperties.loadEnum(UcdProperty.Hangul_Syllable_Type); + map_identifier_status = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Status); + map_identifier_type = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Type); + map_idn_2008 = indexUnicodeProperties.loadEnum(UcdProperty.Idn_2008); + map_idn_status = indexUnicodeProperties.loadEnum(UcdProperty.Idn_Status); + map_indic_conjunct_break = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Conjunct_Break); + map_indic_positional_category = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Positional_Category); + map_indic_syllabic_category = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Syllabic_Category); + map_jamo_short_name = indexUnicodeProperties.loadEnum(UcdProperty.Jamo_Short_Name); + map_joining_group = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Group); + map_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Type); + map_line_break = indexUnicodeProperties.loadEnum(UcdProperty.Line_Break); + map_nfc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFC_Quick_Check); + map_nfd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFD_Quick_Check); + map_nfkc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKC_Quick_Check); + map_nfkd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKD_Quick_Check); + map_numeric_type = indexUnicodeProperties.loadEnum(UcdProperty.Numeric_Type); + map_other_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Other_Joining_Type); + map_script = indexUnicodeProperties.loadEnum(UcdProperty.Script); + map_script_extensions = indexUnicodeProperties.getProperty(UcdProperty.Script_Extensions).getUnicodeMap(); + map_sentence_break = indexUnicodeProperties.loadEnum(UcdProperty.Sentence_Break); + map_vertical_orientation = indexUnicodeProperties.loadEnum(UcdProperty.Vertical_Orientation); + map_word_break = indexUnicodeProperties.loadEnum(UcdProperty.Word_Break); + + //UCD code is only set up to read a single Alias value from NameAliases.txt + //Instead, we'll load the Alias and the Type data as part of the constructor. We'll keep in memory as it + //NameAliases isn't too large. + map_NameAlias = loadNameAliases(); + } + + private enum AliasType { + ABBREVIATION ("abbreviation"), + ALTERNATE ("alternate"), + CONTROL ("control"), + CORRECTION ("correction"), + FIGMENT ("figment"); + + private final String aliasType; + + AliasType(String aliasType) { + this.aliasType = aliasType; + } + + public String toString() { + return aliasType; + } + } + + private static class NameAlias { + + private String alias; + private final AliasType type; + + private NameAlias(String alias, AliasType type) { + this.alias = alias; + this.type = type; + } + + public String getAlias() { + return alias; + } + public AliasType getType() { + return type; + } + + } + + private static class NameAliasComparator implements java.util.Comparator { + + @Override + public int compare(NameAlias o1, NameAlias o2) { + return o1.getAlias().compareTo(o2.getAlias()); + } + } + + private HashMap> loadNameAliases() { + HashMap> nameAliasesByCodepoint = new HashMap<>(); + final PropertyParsingInfo fileInfo = PropertyParsingInfo.getPropertyInfo(UcdProperty.Name_Alias); + String fullFilename = fileInfo.getFullFileName(indexUnicodeProperties.getUcdVersion()); + UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); + NameAliasComparator nameAliasComparator = new NameAliasComparator(); + + for (UcdLineParser.UcdLine line : parser) { + String[] parts = line.getParts(); + int codepoint = Integer.parseInt(parts[0], 16); + NameAlias nameAlias = new NameAlias( + parts[1], AliasType.valueOf(parts[2].toUpperCase())); + + if (nameAliasesByCodepoint.containsKey(codepoint)) { + LinkedList nameAliases = new LinkedList<>(nameAliasesByCodepoint.get(codepoint)); + nameAliases.add(nameAlias); + nameAliases.sort(nameAliasComparator); + nameAliasesByCodepoint.replace(codepoint, nameAliases); + } + else { + nameAliasesByCodepoint.put(codepoint, new LinkedList<>(List.of(nameAlias))); + } + } + return nameAliasesByCodepoint; + } + + public String getAttributeValue(UcdProperty prop, int codepoint) { + String resolvedValue = indexUnicodeProperties.getResolvedValue(prop, codepoint); + switch(prop.getType()) { + case Numeric: + switch(prop) { + case kOtherNumeric: + case kPrimaryNumeric: + case kAccountingNumeric: + return (resolvedValue.equals("NaN")) ? null : resolvedValue; + default: + return Optional.ofNullable(resolvedValue).orElse("NaN"); + } + case String: + switch(prop) { + case Equivalent_Unified_Ideograph: + String EqUIdeo = getMappingValue(codepoint, resolvedValue, false, ""); + return (EqUIdeo.equals("#")) ? null : EqUIdeo; + case kCompatibilityVariant: + String kCompatibilityVariant = getMappingValue(codepoint, resolvedValue, false, "U+"); + return (kCompatibilityVariant.equals("#")) ? "" : kCompatibilityVariant; + case kSimplifiedVariant: + case kTraditionalVariant: + String kVariant = getMappingValue(codepoint, resolvedValue, isUnihanAttributeRange(codepoint), "U+"); + return (kVariant.equals("#")) ? "" : kVariant; + case Bidi_Mirroring_Glyph: + //TODO: Question for PAG - This is probably not the desired behavior, but adding this case to maintain consistent output. + // Check the spec. But otherwise keep consistent. Update this comment to indicate why. + String bmg = getMappingValue(codepoint, resolvedValue, false, ""); + return (bmg.equals("#")) ? "" : bmg; + default: + return getMappingValue(codepoint, resolvedValue, false, ""); + } + case Miscellaneous: + switch(prop) { + case Jamo_Short_Name: + //return map_jamo_short_name.get(codepoint).getShortName(); + return Optional.ofNullable(resolvedValue).orElse(""); + case Name: + if(resolvedValue != null && resolvedValue.startsWith("CJK UNIFIED IDEOGRAPH-")) { + return "CJK UNIFIED IDEOGRAPH-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("CJK COMPATIBILITY IDEOGRAPH-")) { + return "CJK COMPATIBILITY IDEOGRAPH-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("TANGUT IDEOGRAPH-")) { + return "TANGUT IDEOGRAPH-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("KHITAN SMALL SCRIPT CHARACTER-")) { + return "KHITAN SMALL SCRIPT CHARACTER-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("NUSHU CHARACTER-")) { + return "NUSHU CHARACTER-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("EGYPTIAN HIEROGLYPH-")) { + return "EGYPTIAN HIEROGLYPH-#"; + } + return Optional.ofNullable(resolvedValue).orElse(""); + case kDefinition: + return resolvedValue; + default: + if (resolvedValue!= null) { + return resolvedValue.replaceAll("\\|", " "); + } + return ""; + } + case Catalog: + switch(prop) { + case Age: + String age = map_age.get(codepoint).getShortName(); + return (age.equals("NA")) ? "unassigned" : age; + case Block: + return map_block.get(codepoint).getShortName(); + case Script: + return map_script.get(codepoint).getShortName(); + case Script_Extensions: + StringBuilder extensionBuilder = new StringBuilder(); + String[] extensions = map_script_extensions.get(codepoint).split("\\|", 0); + for (String extension : extensions) { + extensionBuilder.append(UcdPropertyValues.Script_Values.valueOf(extension).getShortName()); + extensionBuilder.append(" "); + } + return extensionBuilder.toString().trim(); + default: + throw new RuntimeException("Missing Catalog case"); + } + case Enumerated: + switch(prop) { + case Bidi_Class: + return map_bidi_class.get(codepoint).getShortName(); + case Bidi_Paired_Bracket_Type: + return map_bidi_paired_bracket_type.get(codepoint).getShortName(); + case Canonical_Combining_Class: + return map_canonical_combining_class.get(codepoint).getShortName(); + case Decomposition_Type: + //TODO: Question for PAG - This is probably not the desired behavior, but specifying lower case to maintain consistent output. + // Check the spec. But otherwise keep consistent. Update this comment to indicate why. + return map_decomposition_type.get(codepoint).getShortName().toLowerCase(); + case Do_Not_Emit_Type: + return map_do_not_emit_type.get(codepoint).getShortName(); + case East_Asian_Width: + return map_east_asian_width.get(codepoint).getShortName(); + case General_Category: + return map_general_category.get(codepoint).getShortName(); + case Grapheme_Cluster_Break: + return map_grapheme_cluster_break.get(codepoint).getShortName(); + case Hangul_Syllable_Type: + return map_hangul_syllable_type.get(codepoint).getShortName(); + case Identifier_Status: + return map_identifier_status.get(codepoint).getShortName(); + case Identifier_Type: + return map_identifier_type.get(codepoint).getShortName(); + case Idn_2008: + return map_idn_2008.get(codepoint).getShortName(); + case Idn_Status: + return map_idn_status.get(codepoint).getShortName(); + case Indic_Conjunct_Break: + return map_indic_conjunct_break.get(codepoint).getShortName(); + case Indic_Positional_Category: + return map_indic_positional_category.get(codepoint).getShortName(); + case Indic_Syllabic_Category: + return map_indic_syllabic_category.get(codepoint).getShortName(); + case Joining_Group: + return map_joining_group.get(codepoint).getShortName(); + case Joining_Type: + return map_joining_type.get(codepoint).getShortName(); + case Line_Break: + return map_line_break.get(codepoint).getShortName(); + case NFC_Quick_Check: + return map_nfc_quick_check.get(codepoint).getShortName(); + case NFD_Quick_Check: + return map_nfd_quick_check.get(codepoint).getShortName(); + case NFKC_Quick_Check: + return map_nfkc_quick_check.get(codepoint).getShortName(); + case NFKD_Quick_Check: + return map_nfkd_quick_check.get(codepoint).getShortName(); + case Numeric_Type: + return map_numeric_type.get(codepoint).getShortName(); + case Other_Joining_Type: + return map_other_joining_type.get(codepoint).getShortName(); + case Sentence_Break: + return map_sentence_break.get(codepoint).getShortName(); + case Vertical_Orientation: + return map_vertical_orientation.get(codepoint).getShortName(); + case Word_Break: + return map_word_break.get(codepoint).getShortName(); + default: + throw new RuntimeException("Missing Enumerated case"); + } + case Binary: + { + switch(resolvedValue) { + // Seems overkill to get this from UcdPropertyValues.Binary + case "No": + return "N"; + case "Yes": + return "Y"; + default: + throw new RuntimeException("Unexpected Binary value"); + } + } + default: + throw new RuntimeException("Missing PropertyType case"); + } + } + + public boolean isUnassignedCodepoint(int codepoint) { + return UcdPropertyValues.General_Category_Values.Unassigned.equals(getgc(codepoint)) || + UcdPropertyValues.General_Category_Values.Private_Use.equals(getgc(codepoint)) || + UcdPropertyValues.General_Category_Values.Surrogate.equals(getgc(codepoint)); + } + + public UcdPropertyValues.General_Category_Values getgc(int codepoint) { + return map_general_category.get(codepoint); + } + + public String getNChar(int codepoint) { + return getAttributeValue(UcdProperty.Noncharacter_Code_Point, codepoint); + } + + public HashMap getNameAliases(int codepoint) { + HashMap nameAliases = new LinkedHashMap<>(); + LinkedList nameAliasList = map_NameAlias.get(codepoint); + if (null != nameAliasList && !nameAliasList.isEmpty()) { + for (NameAlias nameAlias : nameAliasList) { + nameAliases.put(nameAlias.getAlias(), nameAlias.getType().toString()); + } + return nameAliases; + } + return null; + } + + private String getMappingValue(int codepoint, String resolvedValue, boolean ignoreUnihanRange, String prefix) { + if (null == resolvedValue) { + return "#"; + } + int[] resolvedValueInts = resolvedValue.codePoints().toArray(); + if (resolvedValueInts.length == 1 && resolvedValueInts[0] == codepoint && !ignoreUnihanRange) { + return "#"; + } + StringBuilder sb = new StringBuilder(); + for (int i : resolvedValueInts) { + sb.append(prefix).append(getCPString(i)).append(" "); + } + return sb.toString().trim(); + } + + public boolean isDifferentRange(int codepointA, int codepointB) { + boolean isDifference = false; + for (UcdProperty property : rangeDefiningProperties) { + isDifference = isDifference || + !getAttributeValue(property, codepointA).equals(getAttributeValue(property, codepointB)); + } + return isDifference; + } + + private static String getCPString(int codepoint) { + return String.format("%4s", Integer.toHexString(codepoint)).replace(" ", "0").toUpperCase(); + } + + public String getHexString(int codepoint) { + return getCPString(codepoint); + } + + public boolean isUnihanAttributeRange(int codepoint) { + return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") || + !getAttributeValue(UcdProperty.kCompatibilityVariant, codepoint).isEmpty(); + } + + public boolean isUnifiedIdeograph(int codepoint) { + return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") && + getAttributeValue(UcdProperty.Name, codepoint).equals("CJK UNIFIED IDEOGRAPH-#"); + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java new file mode 100644 index 000000000..888dedc0a --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java @@ -0,0 +1,194 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.cldr.draft.FileUtilities; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.PropertyParsingInfo; +import org.unicode.props.UcdLineParser; +import org.unicode.props.UcdProperty; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import java.util.*; + +public class UCDDataResolver { + + private final IndexUnicodeProperties indexUnicodeProperties; + private final String namespace; + private final UCDXMLWriter writer; + + public UCDDataResolver(IndexUnicodeProperties iup, String namespace, UCDXMLWriter writer) { + indexUnicodeProperties = iup; + this.namespace = namespace; + this.writer = writer; + } + + public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXException { + VersionInfo minVersion = ucdSection.getMinVersion(); + VersionInfo maxVersion = ucdSection.getMaxVersion(); + String tag = ucdSection.toString(); + String childTag = ucdSection.getChildTag(); + boolean parserWithRange = ucdSection.getParserWithRange(); + boolean parserWithMissing = ucdSection.getParserWithMissing(); + UcdSectionComponent[] ucdSectionComponents = ucdSection.getUcdSectionDetail().getUcdSectionComponents(); + + if (isCompatibleVersion(minVersion, maxVersion)) { + writer.startElement(tag); { + for (UcdSectionComponent ucdSectionComponent : ucdSectionComponents) { + if (isCompatibleVersion(ucdSectionComponent.getMinVersion(), ucdSectionComponent.getMaxVersion())) { + final PropertyParsingInfo fileInfoEVS = PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty()); + String fullFilename = fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion()); + UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); + parser.withRange(parserWithRange); + parser.withMissing(parserWithMissing); + switch (ucdSection) { + case BLOCKS: + for (UcdLineParser.UcdLine line : parser) { + if (!line.getOriginalLine().startsWith("#")) { + AttributesImpl attributes = getBlockAttributes(namespace, line); + writer.startElement(childTag, attributes); { + writer.endElement(childTag); + } + } + } + break; + case NAMEDSEQUENCES: + HashMap namedSequences = new HashMap<>(); + for (UcdLineParser.UcdLine line : parser) { + String[] parts = line.getParts(); + namedSequences.put(parts[0], parts[1]); + } + List names = new ArrayList<>(namedSequences.keySet()); + Collections.sort(names); + for (String name : names) { + AttributesImpl attributes = getNamedSequenceAttributes(namespace, name, namedSequences); + writer.startElement(childTag, attributes); { + writer.endElement(childTag); + } + } + break; + default: + for (UcdLineParser.UcdLine line : parser) { + AttributesImpl attributes = getAttributes(ucdSection, namespace, line); + writer.startElement(childTag, attributes); + { + writer.endElement(childTag); + } + } + } + } + } + writer.endElement(tag); + } + } + } + + private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace, UcdLineParser.UcdLine line) { + switch(ucdSection) { + case CJKRADICALS: + return getCJKRadicalAttributes(namespace, line); + case DONOTEMIT: + return getDoNotEmitAttributes(namespace, line); + case EMOJISOURCES: + return getEmojiSourceAttributes(namespace, line); + case NORMALIZATIONCORRECTIONS: + return getNCAttributes(namespace, line); + case STANDARDIZEDVARIANTS: + return getSVAttributes(namespace, line); + default: + throw new IllegalArgumentException("getAttributes failed on an unexpected UcdSection"); + } + } + + private static AttributesImpl getBlockAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + String[] range = parts[0].split("\\.\\."); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "first-cp", "first-cp", "CDATA", range[0]); + attributes.addAttribute( + namespace, "last-cp", "last-cp", "CDATA", range[1]); + attributes.addAttribute( + namespace, "name", "name", "CDATA", parts[1]); + return attributes; + } + + private static AttributesImpl getCJKRadicalAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "number", "number", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "radical", "radical", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "ideograph", "ideograph", "CDATA", parts[2]); + return attributes; + } + + private static AttributesImpl getDoNotEmitAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "of", "of", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "use", "use", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "because", "because", "CDATA", parts[2]); + return attributes; + } + + private static AttributesImpl getEmojiSourceAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "unicode", "unicode", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "docomo", "docomo", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "kddi", "kddi", "CDATA", parts[2]); + attributes.addAttribute( + namespace, "softbank", "softbank", "CDATA", parts[3]); + return attributes; + } + + private static AttributesImpl getNamedSequenceAttributes(String namespace, String name, HashMap namedSequences) { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "name", "name", "CDATA", name); + attributes.addAttribute( + namespace, "cps", "cps", "CDATA", namedSequences.get(name)); + return attributes; + } + + private static AttributesImpl getNCAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "cp", "cp", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "old", "old", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "new", "new", "CDATA", parts[2]); + attributes.addAttribute( + namespace, "version", "version", "CDATA", parts[3]); + return attributes; + } + + private static AttributesImpl getSVAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "cps", "cps", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "desc", "desc", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "when", "when", "CDATA", + parts[2] != null ? parts[2] : ""); + return attributes; + } + + private boolean isCompatibleVersion(VersionInfo minVersion, VersionInfo maxVersion) { + return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0 && ( + maxVersion == null || indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0)); + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java new file mode 100644 index 000000000..1c22267b1 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java @@ -0,0 +1,78 @@ +package org.unicode.xml; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.sax.SAXTransformerFactory; +import javax.xml.transform.sax.TransformerHandler; +import javax.xml.transform.stream.StreamResult; +import java.io.FileOutputStream; + +public class UCDXMLWriter { + + public static final String NAMESPACE + = "http://www.unicode.org/ns/2003/ucd/1.0"; + + private final TransformerHandler transformerHandler; + + public TransformerHandler getTransformerHandler() { + return transformerHandler; + } + + public UCDXMLWriter(FileOutputStream f) throws TransformerConfigurationException { + TransformerFactory tfactory = TransformerFactory.newInstance(); + SAXTransformerFactory sfactory = (SAXTransformerFactory) tfactory; + transformerHandler = sfactory.newTransformerHandler (); + Transformer transformer = transformerHandler.getTransformer (); + transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.STANDALONE, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); + transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "3"); + transformerHandler.setResult (new StreamResult(f)); + } + + public void startFile() throws SAXException { + transformerHandler.startDocument (); + char[] c = "\n".toCharArray (); + transformerHandler.characters (c, 0, c.length); + c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray (); + transformerHandler.comment (c, 0, c.length); + c = "\n".toCharArray (); + transformerHandler.characters (c, 0, c.length); + c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray (); + transformerHandler.comment (c, 0, c.length); + c = "\n\n\n".toCharArray (); + transformerHandler.characters (c, 0, c.length); + + } + public void endFile() throws SAXException { + transformerHandler.endDocument (); + } + + public void startElement(String tagName) throws SAXException { + AttributesImpl attributes = new AttributesImpl (); + startElement(tagName, attributes); + } + + public void startElement(String tagName, AttributesImpl attributes) throws SAXException { + transformerHandler.startElement (NAMESPACE, tagName, tagName, attributes); + } + + public void addContent(String s) throws SAXException { + char[] d = s.toCharArray (); + transformerHandler.characters (d, 0, d.length); + } + + public void endElement(String tagName) throws SAXException { + transformerHandler.endElement (NAMESPACE, tagName, tagName); + } +} + + diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java new file mode 100644 index 000000000..5e5c607c9 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java @@ -0,0 +1,923 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UcdProperty; + +import java.util.LinkedHashSet; +import java.util.Set; + +public class UcdPropertyDetail { + + static private LinkedHashSet basePropertyDetails = new LinkedHashSet (); + static private LinkedHashSet cjkPropertyDetails = new LinkedHashSet (); + static private LinkedHashSet ucdxmlPropertyDetails = new LinkedHashSet (); + static private LinkedHashSet allPropertyDetails = new LinkedHashSet (); + + public static UcdPropertyDetail Age_Detail = new UcdPropertyDetail ( + UcdProperty.Age, VersionInfo.getInstance(1,1,0), 1, + true, false, false, true); + public static UcdPropertyDetail Name_Detail = new UcdPropertyDetail ( + UcdProperty.Name, VersionInfo.getInstance(1,1,0), 2, + true, false, false, true); + public static UcdPropertyDetail Jamo_Short_Name_Detail = new UcdPropertyDetail ( + UcdProperty.Jamo_Short_Name, VersionInfo.getInstance(1,1,0), 3, + true, false, false, true); + public static UcdPropertyDetail General_Category_Detail = new UcdPropertyDetail ( + UcdProperty.General_Category, VersionInfo.getInstance(1,1,0), 4, + true, false, false, true); + public static UcdPropertyDetail Canonical_Combining_Class_Detail = new UcdPropertyDetail ( + UcdProperty.Canonical_Combining_Class, VersionInfo.getInstance(1,1,0), 5, + true, false, false, true); + public static UcdPropertyDetail Decomposition_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Decomposition_Type, VersionInfo.getInstance(1,1,0), 6, + true, false, false, true); + public static UcdPropertyDetail Decomposition_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Decomposition_Mapping, VersionInfo.getInstance(1,1,0), 7, + true, false, false, true); + public static UcdPropertyDetail Numeric_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Numeric_Type, VersionInfo.getInstance(1,1,0), 8, + true, false, false, true); + public static UcdPropertyDetail Numeric_Value_Detail = new UcdPropertyDetail ( + UcdProperty.Numeric_Value, VersionInfo.getInstance(1,1,0), 9, + true, false, false, true); + public static UcdPropertyDetail Bidi_Class_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Class, VersionInfo.getInstance(1,1,0), 10, + true, false, false, true); + public static UcdPropertyDetail Bidi_Paired_Bracket_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Paired_Bracket_Type, VersionInfo.getInstance(6,3,0), 11, + true, false, false, true); + public static UcdPropertyDetail Bidi_Paired_Bracket_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Paired_Bracket, VersionInfo.getInstance(6,3,0), 12, + true, false, false, true); + public static UcdPropertyDetail Bidi_Mirrored_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Mirrored, VersionInfo.getInstance(1,1,0), 13, + true, false, false, true); + public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Mirroring_Glyph, VersionInfo.getInstance(1,1,0), 14, + true, false, false, true); + public static UcdPropertyDetail Simple_Uppercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Uppercase_Mapping, VersionInfo.getInstance(1,1,0), 15, + true, false, false, true); + public static UcdPropertyDetail Simple_Lowercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Lowercase_Mapping, VersionInfo.getInstance(1,1,0), 16, + true, false, false, true); + public static UcdPropertyDetail Simple_Titlecase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Titlecase_Mapping, VersionInfo.getInstance(1,1,0), 17, + true, false, false, true); + public static UcdPropertyDetail Uppercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Uppercase_Mapping, VersionInfo.getInstance(1,1,0), 18, + true, false, false, true); + public static UcdPropertyDetail Lowercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Lowercase_Mapping, VersionInfo.getInstance(1,1,0), 19, + true, false, false, true); + public static UcdPropertyDetail Titlecase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Titlecase_Mapping, VersionInfo.getInstance(1,1,0), 20, + true, false, false, true); +// public static UcdPropertyDetail Special_Case_Condition_Detail = new UcdPropertyDetail ( +// UcdProperty.Special_Case_Condition, VersionInfo.getInstance(1,1,0), 21, +// true, false, false, true); + public static UcdPropertyDetail Simple_Case_Folding_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Case_Folding, VersionInfo.getInstance(1,1,0), 22, + true, false, false, true); + public static UcdPropertyDetail Case_Folding_Detail = new UcdPropertyDetail ( + UcdProperty.Case_Folding, VersionInfo.getInstance(1,1,0), 23, + true, false, false, true); + public static UcdPropertyDetail Joining_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Joining_Type, VersionInfo.getInstance(1,1,0), 24, + true, false, false, true); + public static UcdPropertyDetail Joining_Group_Detail = new UcdPropertyDetail ( + UcdProperty.Joining_Group, VersionInfo.getInstance(1,1,0), 25, + true, false, false, true); + public static UcdPropertyDetail East_Asian_Width_Detail = new UcdPropertyDetail ( + UcdProperty.East_Asian_Width, VersionInfo.getInstance(1,1,0), 26, + true, false, false, true); + public static UcdPropertyDetail Line_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Line_Break, VersionInfo.getInstance(1,1,0), 27, + true, false, false, true); + public static UcdPropertyDetail Script_Detail = new UcdPropertyDetail ( + UcdProperty.Script, VersionInfo.getInstance(1,1,0), 28, + true, false, false, true); + public static UcdPropertyDetail Script_Extensions_Detail = new UcdPropertyDetail ( + UcdProperty.Script_Extensions, VersionInfo.getInstance(6,1,0), 29, + true, false, false, true); + public static UcdPropertyDetail Dash_Detail = new UcdPropertyDetail ( + UcdProperty.Dash, VersionInfo.getInstance(1,1,0), 30, + true, false, false, true); + public static UcdPropertyDetail White_Space_Detail = new UcdPropertyDetail ( + UcdProperty.White_Space, VersionInfo.getInstance(1,1,0), 31, + true, false, false, true); + public static UcdPropertyDetail Hyphen_Detail = new UcdPropertyDetail ( + UcdProperty.Hyphen, VersionInfo.getInstance(1,1,0), 32, + true, false, false, true); + public static UcdPropertyDetail Quotation_Mark_Detail = new UcdPropertyDetail ( + UcdProperty.Quotation_Mark, VersionInfo.getInstance(1,1,0), 33, + true, false, false, true); + public static UcdPropertyDetail Radical_Detail = new UcdPropertyDetail ( + UcdProperty.Radical, VersionInfo.getInstance(1,1,0), 34, + true, false, false, true); + public static UcdPropertyDetail Ideographic_Detail = new UcdPropertyDetail ( + UcdProperty.Ideographic, VersionInfo.getInstance(1,1,0), 35, + true, false, false, true); + public static UcdPropertyDetail Unified_Ideograph_Detail = new UcdPropertyDetail ( + UcdProperty.Unified_Ideograph, VersionInfo.getInstance(1,1,0), 36, + true, false, false, true); + public static UcdPropertyDetail IDS_Binary_Operator_Detail = new UcdPropertyDetail ( + UcdProperty.IDS_Binary_Operator, VersionInfo.getInstance(1,1,0), 37, + true, false, false, true); + public static UcdPropertyDetail IDS_Trinary_Operator_Detail = new UcdPropertyDetail ( + UcdProperty.IDS_Trinary_Operator, VersionInfo.getInstance(1,1,0), 38, + true, false, false, true); + public static UcdPropertyDetail Hangul_Syllable_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Hangul_Syllable_Type, VersionInfo.getInstance(1,1,0), 39, + true, false, false, true); + public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail ( + UcdProperty.Default_Ignorable_Code_Point, VersionInfo.getInstance(1,1,0), 40, + true, false, false, true); + public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Default_Ignorable_Code_Point, VersionInfo.getInstance(1,1,0), 41, + true, false, false, true); + public static UcdPropertyDetail Alphabetic_Detail = new UcdPropertyDetail ( + UcdProperty.Alphabetic, VersionInfo.getInstance(1,1,0), 42, + true, false, false, true); + public static UcdPropertyDetail Other_Alphabetic_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Alphabetic, VersionInfo.getInstance(1,1,0), 43, + true, false, false, true); + public static UcdPropertyDetail Uppercase_Detail = new UcdPropertyDetail ( + UcdProperty.Uppercase, VersionInfo.getInstance(1,1,0), 44, + true, false, false, true); + public static UcdPropertyDetail Other_Uppercase_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Uppercase, VersionInfo.getInstance(1,1,0), 45, + true, false, false, true); + public static UcdPropertyDetail Lowercase_Detail = new UcdPropertyDetail ( + UcdProperty.Lowercase, VersionInfo.getInstance(1,1,0), 46, + true, false, false, true); + public static UcdPropertyDetail Other_Lowercase_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Lowercase, VersionInfo.getInstance(1,1,0), 47, + true, false, false, true); + public static UcdPropertyDetail Math_Detail = new UcdPropertyDetail ( + UcdProperty.Math, VersionInfo.getInstance(1,1,0), 48, + true, false, false, true); + public static UcdPropertyDetail Other_Math_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Math, VersionInfo.getInstance(1,1,0), 49, + true, false, false, true); + public static UcdPropertyDetail Hex_Digit_Detail = new UcdPropertyDetail ( + UcdProperty.Hex_Digit, VersionInfo.getInstance(1,1,0), 50, + true, false, false, true); + public static UcdPropertyDetail ASCII_Hex_Digit_Detail = new UcdPropertyDetail ( + UcdProperty.ASCII_Hex_Digit, VersionInfo.getInstance(1,1,0), 51, + true, false, false, true); + public static UcdPropertyDetail Noncharacter_Code_Point_Detail = new UcdPropertyDetail ( + UcdProperty.Noncharacter_Code_Point, VersionInfo.getInstance(1,1,0), 52, + true, false, false, true); + public static UcdPropertyDetail Variation_Selector_Detail = new UcdPropertyDetail ( + UcdProperty.Variation_Selector, VersionInfo.getInstance(1,1,0), 53, + true, false, false, true); + public static UcdPropertyDetail Bidi_Control_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Control, VersionInfo.getInstance(1,1,0), 54, + true, false, false, true); + public static UcdPropertyDetail Join_Control_Detail = new UcdPropertyDetail ( + UcdProperty.Join_Control, VersionInfo.getInstance(1,1,0), 55, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Base_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Base, VersionInfo.getInstance(1,1,0), 56, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Extend_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Extend, VersionInfo.getInstance(1,1,0), 57, + true, false, false, true); + public static UcdPropertyDetail Other_Grapheme_Extend_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Grapheme_Extend, VersionInfo.getInstance(1,1,0), 58, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Link_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Link, VersionInfo.getInstance(1,1,0), 59, + true, false, false, true); + public static UcdPropertyDetail Sentence_Terminal_Detail = new UcdPropertyDetail ( + UcdProperty.Sentence_Terminal, VersionInfo.getInstance(1,1,0), 60, + true, false, false, true); + public static UcdPropertyDetail Extender_Detail = new UcdPropertyDetail ( + UcdProperty.Extender, VersionInfo.getInstance(1,1,0), 61, + true, false, false, true); + public static UcdPropertyDetail Terminal_Punctuation_Detail = new UcdPropertyDetail ( + UcdProperty.Terminal_Punctuation, VersionInfo.getInstance(1,1,0), 62, + true, false, false, true); + public static UcdPropertyDetail Diacritic_Detail = new UcdPropertyDetail ( + UcdProperty.Diacritic, VersionInfo.getInstance(1,1,0), 63, + true, false, false, true); + public static UcdPropertyDetail Deprecated_Detail = new UcdPropertyDetail ( + UcdProperty.Deprecated, VersionInfo.getInstance(1,1,0), 64, + true, false, false, true); + public static UcdPropertyDetail ID_Start_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Start, VersionInfo.getInstance(1,1,0), 65, + true, false, false, true); + public static UcdPropertyDetail Other_ID_Start_Detail = new UcdPropertyDetail ( + UcdProperty.Other_ID_Start, VersionInfo.getInstance(1,1,0), 66, + true, false, false, true); + public static UcdPropertyDetail XID_Start_Detail = new UcdPropertyDetail ( + UcdProperty.XID_Start, VersionInfo.getInstance(1,1,0), 67, + true, false, false, true); + public static UcdPropertyDetail ID_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Continue, VersionInfo.getInstance(1,1,0), 68, + true, false, false, true); + public static UcdPropertyDetail Other_ID_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.Other_ID_Continue, VersionInfo.getInstance(1,1,0), 69, + true, false, false, true); + public static UcdPropertyDetail XID_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.XID_Continue, VersionInfo.getInstance(1,1,0), 70, + true, false, false, true); + public static UcdPropertyDetail Soft_Dotted_Detail = new UcdPropertyDetail ( + UcdProperty.Soft_Dotted, VersionInfo.getInstance(1,1,0), 71, + true, false, false, true); + public static UcdPropertyDetail Logical_Order_Exception_Detail = new UcdPropertyDetail ( + UcdProperty.Logical_Order_Exception, VersionInfo.getInstance(1,1,0), 72, + true, false, false, true); + public static UcdPropertyDetail Pattern_White_Space_Detail = new UcdPropertyDetail ( + UcdProperty.Pattern_White_Space, VersionInfo.getInstance(1,1,0), 73, + true, false, false, true); + public static UcdPropertyDetail Pattern_Syntax_Detail = new UcdPropertyDetail ( + UcdProperty.Pattern_Syntax, VersionInfo.getInstance(1,1,0), 74, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Cluster_Break, VersionInfo.getInstance(1,1,0), 75, + true, false, false, true); + public static UcdPropertyDetail Word_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Word_Break, VersionInfo.getInstance(1,1,0), 76, + true, false, false, true); + public static UcdPropertyDetail Sentence_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Sentence_Break, VersionInfo.getInstance(1,1,0), 77, + true, false, false, true); + public static UcdPropertyDetail Composition_Exclusion_Detail = new UcdPropertyDetail ( + UcdProperty.Composition_Exclusion, VersionInfo.getInstance(1,1,0), 78, + true, false, false, true); + public static UcdPropertyDetail Full_Composition_Exclusion_Detail = new UcdPropertyDetail ( + UcdProperty.Full_Composition_Exclusion, VersionInfo.getInstance(1,1,0), 79, + true, false, false, true); + public static UcdPropertyDetail NFC_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFC_Quick_Check, VersionInfo.getInstance(1,1,0), 80, + true, false, false, true); + public static UcdPropertyDetail NFD_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFD_Quick_Check, VersionInfo.getInstance(1,1,0), 81, + true, false, false, true); + public static UcdPropertyDetail NFKC_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFKC_Quick_Check, VersionInfo.getInstance(1,1,0), 82, + true, false, false, true); + public static UcdPropertyDetail NFKD_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFKD_Quick_Check, VersionInfo.getInstance(1,1,0), 83, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFC_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFC, VersionInfo.getInstance(1,1,0), 84, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFD_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFD, VersionInfo.getInstance(1,1,0), 85, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFKC_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFKC, VersionInfo.getInstance(1,1,0), 86, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFKD_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFKD, VersionInfo.getInstance(1,1,0), 87, + true, false, false, true); + public static UcdPropertyDetail FC_NFC_Closure_Detail = new UcdPropertyDetail ( + UcdProperty.FC_NFKC_Closure, VersionInfo.getInstance(1,1,0), 88, + true, false, false, true); + public static UcdPropertyDetail Case_Ignorable_Detail = new UcdPropertyDetail ( + UcdProperty.Case_Ignorable, VersionInfo.getInstance(5,2,0), 89, + true, false, false, true); + public static UcdPropertyDetail Cased_Detail = new UcdPropertyDetail ( + UcdProperty.Cased, VersionInfo.getInstance(5,2,0), 90, + true, false, false, true); + public static UcdPropertyDetail Changes_When_CaseFolded_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Casefolded, VersionInfo.getInstance(5,2,0), 91, + true, false, false, true); + public static UcdPropertyDetail Changes_When_CaseMapped_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Casemapped, VersionInfo.getInstance(5,2,0), 92, + true, false, false, true); + public static UcdPropertyDetail Changes_When_NFKC_Casefolded_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_NFKC_Casefolded, VersionInfo.getInstance(5,2,0), 93, + true, false, false, true); + public static UcdPropertyDetail Changes_When_Lowercased_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Lowercased, VersionInfo.getInstance(5,2,0), 94, + true, false, false, true); + public static UcdPropertyDetail Changes_When_Titlecased_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Titlecased, VersionInfo.getInstance(5,2,0), 95, + true, false, false, true); + public static UcdPropertyDetail Changes_When_Uppercased_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Uppercased, VersionInfo.getInstance(5,2,0), 96, + true, false, false, true); + public static UcdPropertyDetail NFKC_Casefold_Detail = new UcdPropertyDetail ( + UcdProperty.NFKC_Casefold, VersionInfo.getInstance(5,2,0), 97, + true, false, false, true); + public static UcdPropertyDetail Indic_Syllabic_Category_Detail = new UcdPropertyDetail ( + UcdProperty.Indic_Syllabic_Category, VersionInfo.getInstance(6,0,0), 98, + true, false, false, true); +// public static UcdPropertyDetail Indic_Matra_Category_Detail = new UcdPropertyDetail ( +// UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,0,0), VersionInfo.getInstance(7,0,0), 99, +// true, false, false, true); + public static UcdPropertyDetail Indic_Positional_Category_Detail = new UcdPropertyDetail ( + UcdProperty.Indic_Positional_Category, VersionInfo.getInstance(8,0,0), 100, + true, false, false, true); + public static UcdPropertyDetail kJa_Detail = new UcdPropertyDetail ( + UcdProperty.kJa, VersionInfo.getInstance(8,0,0), 101, + false, true, false, true); + public static UcdPropertyDetail Prepended_Concatenation_Mark_Detail = new UcdPropertyDetail ( + UcdProperty.Prepended_Concatenation_Mark, VersionInfo.getInstance(9,0,0), 102, + true, false, false, true); + public static UcdPropertyDetail Vertical_Orientation_Detail = new UcdPropertyDetail ( + UcdProperty.Vertical_Orientation, VersionInfo.getInstance(10,0,0), 103, + true, false, false, true); + public static UcdPropertyDetail Regional_Indicator_Detail = new UcdPropertyDetail ( + UcdProperty.Regional_Indicator, VersionInfo.getInstance(10,0,0), 104, + true, false, false, true); + public static UcdPropertyDetail Block_Detail = new UcdPropertyDetail ( + UcdProperty.Block, VersionInfo.getInstance(10,0,0), 105, + true, false, false, true); + public static UcdPropertyDetail Equivalent_Unified_Ideograph_Detail = new UcdPropertyDetail ( + UcdProperty.Equivalent_Unified_Ideograph, VersionInfo.getInstance(11,0,0), 106, + false, true, false, true); + public static UcdPropertyDetail kCompatibilityVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kCompatibilityVariant, VersionInfo.getInstance(11,0,0), 107, + false, true, true, true); + public static UcdPropertyDetail kRSUnicode_Detail = new UcdPropertyDetail ( + UcdProperty.kRSUnicode, VersionInfo.getInstance(11,0,0), 108, + false, true, false, true); +// public static UcdPropertyDetail kIRG_RSIndex_Detail = new UcdPropertyDetail ( +// UcdProperty.kIRG_RSIndex, VersionInfo.getInstance(11,0,0), 109, +// false, true, false, true); + public static UcdPropertyDetail kIRG_GSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_GSource, VersionInfo.getInstance(11,0,0), 110, + false, true, true, true); + public static UcdPropertyDetail kIRG_TSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_TSource, VersionInfo.getInstance(11,0,0), 111, + false, true, true, true); + public static UcdPropertyDetail kIRG_JSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_JSource, VersionInfo.getInstance(11,0,0), 112, + false, true, true, true); + public static UcdPropertyDetail kIRG_KSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_KSource, VersionInfo.getInstance(11,0,0), 113, + false, true, true, true); + public static UcdPropertyDetail kIRG_KPSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_KPSource, VersionInfo.getInstance(11,0,0), 114, + false, true, true, true); + public static UcdPropertyDetail kIRG_VSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_VSource, VersionInfo.getInstance(11,0,0), 115, + false, true, true, true); + public static UcdPropertyDetail kIRG_HSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_HSource, VersionInfo.getInstance(11,0,0), 116, + false, true, true, true); + public static UcdPropertyDetail kIRG_USource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_USource, VersionInfo.getInstance(11,0,0), 117, + false, true, true, true); + public static UcdPropertyDetail kIRG_MSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_MSource, VersionInfo.getInstance(11,0,0), 118, + false, true, true, true); + public static UcdPropertyDetail kIRG_UKSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_UKSource, VersionInfo.getInstance(13,0,0), 119, + false, true, true, true); + public static UcdPropertyDetail kIRG_SSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_SSource, VersionInfo.getInstance(13,0,0), 120, + false, true, true, true); + public static UcdPropertyDetail kIICore_Detail = new UcdPropertyDetail ( + UcdProperty.kIICore, VersionInfo.getInstance(11,0,0), 121, + false, true, false, true); + public static UcdPropertyDetail kUnihanCore2020_Detail = new UcdPropertyDetail ( + UcdProperty.kUnihanCore2020, VersionInfo.getInstance(11,0,0), 122, + false, true, false, true); + public static UcdPropertyDetail kGB0_Detail = new UcdPropertyDetail ( + UcdProperty.kGB0, VersionInfo.getInstance(11,0,0), 123, + false, true, false, true); + public static UcdPropertyDetail kGB1_Detail = new UcdPropertyDetail ( + UcdProperty.kGB1, VersionInfo.getInstance(11,0,0), 124, + false, true, false, true); + public static UcdPropertyDetail kGB3_Detail = new UcdPropertyDetail ( + UcdProperty.kGB3, VersionInfo.getInstance(11,0,0), 125, + false, true, false, true); + public static UcdPropertyDetail kGB5_Detail = new UcdPropertyDetail ( + UcdProperty.kGB5, VersionInfo.getInstance(11,0,0), 126, + false, true, false, true); + public static UcdPropertyDetail kGB7_Detail = new UcdPropertyDetail ( + UcdProperty.kGB7, VersionInfo.getInstance(11,0,0), 127, + false, true, false, true); + public static UcdPropertyDetail kGB8_Detail = new UcdPropertyDetail ( + UcdProperty.kGB8, VersionInfo.getInstance(11,0,0), 128, + false, true, false, true); + public static UcdPropertyDetail kCNS1986_Detail = new UcdPropertyDetail ( + UcdProperty.kCNS1986, VersionInfo.getInstance(11,0,0), 129, + false, true, false, true); + public static UcdPropertyDetail kCNS1992_Detail = new UcdPropertyDetail ( + UcdProperty.kCNS1992, VersionInfo.getInstance(11,0,0), 130, + false, true, false, true); + public static UcdPropertyDetail kJis0_Detail = new UcdPropertyDetail ( + UcdProperty.kJis0, VersionInfo.getInstance(11,0,0), 131, + false, true, false, true); + public static UcdPropertyDetail kJis1_Detail = new UcdPropertyDetail ( + UcdProperty.kJis1, VersionInfo.getInstance(11,0,0), 132, + false, true, false, true); + public static UcdPropertyDetail kJIS0213_Detail = new UcdPropertyDetail ( + UcdProperty.kJIS0213, VersionInfo.getInstance(11,0,0), 133, + false, true, false, true); + public static UcdPropertyDetail kKSC0_Detail = new UcdPropertyDetail ( + UcdProperty.kKSC0, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 134, + false, true, false, true); + public static UcdPropertyDetail kKSC1_Detail = new UcdPropertyDetail ( + UcdProperty.kKSC1, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 135, + false, true, false, true); + public static UcdPropertyDetail kKPS0_Detail = new UcdPropertyDetail ( + UcdProperty.kKPS0, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 136, + false, true, false, true); + public static UcdPropertyDetail kKPS1_Detail = new UcdPropertyDetail ( + UcdProperty.kKPS1, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 137, + false, true, false, true); + public static UcdPropertyDetail kHKSCS_Detail = new UcdPropertyDetail ( + UcdProperty.kHKSCS, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 138, + false, true, false, true); + public static UcdPropertyDetail kCantonese_Detail = new UcdPropertyDetail ( + UcdProperty.kCantonese, VersionInfo.getInstance(11,0,0), 139, + false, true, false, true); + public static UcdPropertyDetail kHangul_Detail = new UcdPropertyDetail ( + UcdProperty.kHangul, VersionInfo.getInstance(11,0,0), 140, + false, true, false, true); + public static UcdPropertyDetail kDefinition_Detail = new UcdPropertyDetail ( + UcdProperty.kDefinition, VersionInfo.getInstance(11,0,0), 141, + false, true, false, true); + public static UcdPropertyDetail kHanYu_Detail = new UcdPropertyDetail ( + UcdProperty.kHanYu, VersionInfo.getInstance(11,0,0), 142, + false, true, false, true); +// public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( +// UcdProperty.kAlternateHanYu, VersionInfo.getInstance(11,0,0), 143, +// false, true, false, true); + public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail ( + UcdProperty.kMandarin, VersionInfo.getInstance(11,0,0), 144, + false, true, false, true); + public static UcdPropertyDetail kCihaiT_Detail = new UcdPropertyDetail ( + UcdProperty.kCihaiT, VersionInfo.getInstance(11,0,0), 145, + false, true, false, true); + public static UcdPropertyDetail kSBGY_Detail = new UcdPropertyDetail ( + UcdProperty.kSBGY, VersionInfo.getInstance(11,0,0), 146, + false, true, false, true); + public static UcdPropertyDetail kNelson_Detail = new UcdPropertyDetail ( + UcdProperty.kNelson, VersionInfo.getInstance(11,0,0), 147, + false, true, false, true); + public static UcdPropertyDetail kCowles_Detail = new UcdPropertyDetail ( + UcdProperty.kCowles, VersionInfo.getInstance(11,0,0), 148, + false, true, false, true); + public static UcdPropertyDetail kMatthews_Detail = new UcdPropertyDetail ( + UcdProperty.kMatthews, VersionInfo.getInstance(11,0,0), 149, + false, true, false, true); + public static UcdPropertyDetail kOtherNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kOtherNumeric, VersionInfo.getInstance(11,0,0), 150, + false, true, false, true); + public static UcdPropertyDetail kPhonetic_Detail = new UcdPropertyDetail ( + UcdProperty.kPhonetic, VersionInfo.getInstance(11,0,0), 151, + false, true, false, true); + public static UcdPropertyDetail kGSR_Detail = new UcdPropertyDetail ( + UcdProperty.kGSR, VersionInfo.getInstance(11,0,0), 152, + false, true, false, true); + public static UcdPropertyDetail kFenn_Detail = new UcdPropertyDetail ( + UcdProperty.kFenn, VersionInfo.getInstance(11,0,0), 153, + false, true, false, true); + public static UcdPropertyDetail kFennIndex_Detail = new UcdPropertyDetail ( + UcdProperty.kFennIndex, VersionInfo.getInstance(11,0,0), 154, + false, true, false, true); + public static UcdPropertyDetail kKarlgren_Detail = new UcdPropertyDetail ( + UcdProperty.kKarlgren, VersionInfo.getInstance(11,0,0), 155, + false, true, false, true); + public static UcdPropertyDetail kCangjie_Detail = new UcdPropertyDetail ( + UcdProperty.kCangjie, VersionInfo.getInstance(11,0,0), 156, + false, true, false, true); + public static UcdPropertyDetail kMeyerWempe_Detail = new UcdPropertyDetail ( + UcdProperty.kMeyerWempe, VersionInfo.getInstance(11,0,0), 157, + false, true, false, true); + public static UcdPropertyDetail kSimplifiedVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSimplifiedVariant, VersionInfo.getInstance(11,0,0), 158, + false, true, false, true); + public static UcdPropertyDetail kTraditionalVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kTraditionalVariant, VersionInfo.getInstance(11,0,0), 159, + false, true, false, true); + public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSpecializedSemanticVariant, VersionInfo.getInstance(11,0,0), 160, + false, true, false, true); + public static UcdPropertyDetail kSemanticVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSemanticVariant, VersionInfo.getInstance(11,0,0), 161, + false, true, false, true); + public static UcdPropertyDetail kVietnamese_Detail = new UcdPropertyDetail ( + UcdProperty.kVietnamese, VersionInfo.getInstance(11,0,0), 162, + false, true, false, true); + public static UcdPropertyDetail kLau_Detail = new UcdPropertyDetail ( + UcdProperty.kLau, VersionInfo.getInstance(11,0,0), 163, + false, true, false, true); + public static UcdPropertyDetail kTang_Detail = new UcdPropertyDetail ( + UcdProperty.kTang, VersionInfo.getInstance(11,0,0), 164, + false, true, false, true); + public static UcdPropertyDetail kZVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kZVariant, VersionInfo.getInstance(11,0,0), 165, + false, true, false, true); + public static UcdPropertyDetail kJapaneseKun_Detail = new UcdPropertyDetail ( + UcdProperty.kJapaneseKun, VersionInfo.getInstance(11,0,0), 166, + false, true, false, true); + public static UcdPropertyDetail kJapaneseOn_Detail = new UcdPropertyDetail ( + UcdProperty.kJapaneseOn, VersionInfo.getInstance(11,0,0), 167, + false, true, false, true); + public static UcdPropertyDetail kKangXi_Detail = new UcdPropertyDetail ( + UcdProperty.kKangXi, VersionInfo.getInstance(11,0,0), 168, + false, true, false, true); +// public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( +// UcdProperty.kAlternateKangXi, VersionInfo.getInstance(11,0,0), 169, +// false, true, false, true); + public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail ( + UcdProperty.kBigFive, VersionInfo.getInstance(11,0,0), 170, + false, true, false, true); + public static UcdPropertyDetail kCCCII_Detail = new UcdPropertyDetail ( + UcdProperty.kCCCII, VersionInfo.getInstance(11,0,0), 171, + false, true, false, true); + public static UcdPropertyDetail kDaeJaweon_Detail = new UcdPropertyDetail ( + UcdProperty.kDaeJaweon, VersionInfo.getInstance(11,0,0), 172, + false, true, false, true); + public static UcdPropertyDetail kEACC_Detail = new UcdPropertyDetail ( + UcdProperty.kEACC, VersionInfo.getInstance(11,0,0), 173, + false, true, false, true); + public static UcdPropertyDetail kFrequency_Detail = new UcdPropertyDetail ( + UcdProperty.kFrequency, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(16,0,0), 174, + false, true, false, true); + public static UcdPropertyDetail kGradeLevel_Detail = new UcdPropertyDetail ( + UcdProperty.kGradeLevel, VersionInfo.getInstance(11,0,0), 175, + false, true, false, true); + public static UcdPropertyDetail kHDZRadBreak_Detail = new UcdPropertyDetail ( + UcdProperty.kHDZRadBreak, VersionInfo.getInstance(11,0,0), 176, + false, true, false, true); + public static UcdPropertyDetail kHKGlyph_Detail = new UcdPropertyDetail ( + UcdProperty.kHKGlyph, VersionInfo.getInstance(11,0,0), 177, + false, true, false, true); + public static UcdPropertyDetail kHanyuPinlu_Detail = new UcdPropertyDetail ( + UcdProperty.kHanyuPinlu, VersionInfo.getInstance(11,0,0), 178, + false, true, false, true); + public static UcdPropertyDetail kHanyuPinyin_Detail = new UcdPropertyDetail ( + UcdProperty.kHanyuPinyin, VersionInfo.getInstance(11,0,0), 179, + false, true, false, true); + public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGHanyuDaZidian, VersionInfo.getInstance(11,0,0), 180, + false, true, false, true); + public static UcdPropertyDetail kIRGKangXi_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGKangXi, VersionInfo.getInstance(11,0,0), 181, + false, true, false, true); + public static UcdPropertyDetail kIRGDaeJaweon_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGDaeJaweon, VersionInfo.getInstance(11,0,0), 182, + false, true, false, true); + public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGDaiKanwaZiten, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 183, + false, true, false, true); + public static UcdPropertyDetail kKorean_Detail = new UcdPropertyDetail ( + UcdProperty.kKorean, VersionInfo.getInstance(11,0,0), 184, + false, true, false, true); + public static UcdPropertyDetail kMainlandTelegraph_Detail = new UcdPropertyDetail ( + UcdProperty.kMainlandTelegraph, VersionInfo.getInstance(11,0,0), 185, + false, true, false, true); + public static UcdPropertyDetail kMorohashi_Detail = new UcdPropertyDetail ( + UcdProperty.kMorohashi, VersionInfo.getInstance(11,0,0), 186, + false, true, false, true); +// public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( +// UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(11,0,0), 187, +// false, true, false, true); + public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kPrimaryNumeric, VersionInfo.getInstance(11,0,0), 188, + false, true, false, true); + public static UcdPropertyDetail kTaiwanTelegraph_Detail = new UcdPropertyDetail ( + UcdProperty.kTaiwanTelegraph, VersionInfo.getInstance(11,0,0), 189, + false, true, false, true); + public static UcdPropertyDetail kXerox_Detail = new UcdPropertyDetail ( + UcdProperty.kXerox, VersionInfo.getInstance(11,0,0), 190, + false, true, false, true); + public static UcdPropertyDetail kPseudoGB1_Detail = new UcdPropertyDetail ( + UcdProperty.kPseudoGB1, VersionInfo.getInstance(11,0,0), 191, + false, true, false, true); + public static UcdPropertyDetail kIBMJapan_Detail = new UcdPropertyDetail ( + UcdProperty.kIBMJapan, VersionInfo.getInstance(11,0,0), 192, + false, true, false, true); + public static UcdPropertyDetail kAccountingNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kAccountingNumeric, VersionInfo.getInstance(11,0,0), 193, + false, true, false, true); + public static UcdPropertyDetail kCheungBauer_Detail = new UcdPropertyDetail ( + UcdProperty.kCheungBauer, VersionInfo.getInstance(11,0,0), 194, + false, true, false, true); + public static UcdPropertyDetail kCheungBauerIndex_Detail = new UcdPropertyDetail ( + UcdProperty.kCheungBauerIndex, VersionInfo.getInstance(11,0,0), 195, + false, true, false, true); + public static UcdPropertyDetail kFourCornerCode_Detail = new UcdPropertyDetail ( + UcdProperty.kFourCornerCode, VersionInfo.getInstance(11,0,0), 196, + false, true, false, true); +// public static UcdPropertyDetail kWubi_Detail = new UcdPropertyDetail ( +// UcdProperty.kWubi, VersionInfo.getInstance(11,0,0), 197, +// false, true, false, true); + public static UcdPropertyDetail kXHC1983_Detail = new UcdPropertyDetail ( + UcdProperty.kXHC1983, VersionInfo.getInstance(11,0,0), 198, + false, true, false, true); + public static UcdPropertyDetail kJinmeiyoKanji_Detail = new UcdPropertyDetail ( + UcdProperty.kJinmeiyoKanji, VersionInfo.getInstance(11,0,0), 199, + false, true, false, true); + public static UcdPropertyDetail kJoyoKanji_Detail = new UcdPropertyDetail ( + UcdProperty.kJoyoKanji, VersionInfo.getInstance(11,0,0), 200, + false, true, false, true); + public static UcdPropertyDetail kKoreanEducationHanja_Detail = new UcdPropertyDetail ( + UcdProperty.kKoreanEducationHanja, VersionInfo.getInstance(11,0,0), 201, + false, true, false, true); + public static UcdPropertyDetail kKoreanName_Detail = new UcdPropertyDetail ( + UcdProperty.kKoreanName, VersionInfo.getInstance(11,0,0), 202, + false, true, false, true); + public static UcdPropertyDetail kTGH_Detail = new UcdPropertyDetail ( + UcdProperty.kTGH, VersionInfo.getInstance(11,0,0), 203, + false, true, false, true); + public static UcdPropertyDetail kTGHZ2013_Detail = new UcdPropertyDetail ( + UcdProperty.kTGHZ2013, VersionInfo.getInstance(11,0,0), 204, + false, true, false, true); + public static UcdPropertyDetail kSpoofingVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSpoofingVariant, VersionInfo.getInstance(11,0,0), 205, + false, true, false, true); + public static UcdPropertyDetail kRSKanWa_Detail = new UcdPropertyDetail ( + UcdProperty.kRSKanWa, VersionInfo.getInstance(11,0,0), 206, + false, true, false, true); + public static UcdPropertyDetail kRSJapanese_Detail = new UcdPropertyDetail ( + UcdProperty.kRSJapanese, VersionInfo.getInstance(11,0,0), 207, + false, true, false, true); + public static UcdPropertyDetail kRSKorean_Detail = new UcdPropertyDetail ( + UcdProperty.kRSKorean, VersionInfo.getInstance(11,0,0), 208, + false, true, false, true); + public static UcdPropertyDetail kRSKangXi_Detail = new UcdPropertyDetail ( + UcdProperty.kRSKangXi, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 209, + false, true, false, true); + public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = new UcdPropertyDetail ( + UcdProperty.kRSAdobe_Japan1_6, VersionInfo.getInstance(11,0,0), 210, + false, true, false, true); + public static UcdPropertyDetail kTotalStrokes_Detail = new UcdPropertyDetail ( + UcdProperty.kTotalStrokes, VersionInfo.getInstance(11,0,0), 211, + false, true, false, true); + public static UcdPropertyDetail kRSTUnicode_Detail = new UcdPropertyDetail ( + UcdProperty.kRSTUnicode, VersionInfo.getInstance(9,0,0), 212, + false, true, false, true); + public static UcdPropertyDetail kTGT_MergedSrc_Detail = new UcdPropertyDetail ( + UcdProperty.kTGT_MergedSrc, VersionInfo.getInstance(9,0,0), 213, + false, true, false, true); + public static UcdPropertyDetail kSrc_NushuDuben_Detail = new UcdPropertyDetail ( + UcdProperty.kSrc_NushuDuben, VersionInfo.getInstance(10,0,0), 214, + false, true, false, true); + public static UcdPropertyDetail kReading_Detail = new UcdPropertyDetail ( + UcdProperty.kReading, VersionInfo.getInstance(10,0,0), 215, + false, true, false, true); + public static UcdPropertyDetail ISO_Comment_Detail = new UcdPropertyDetail ( + UcdProperty.ISO_Comment, VersionInfo.getInstance(11,0,0), 216, + true, false, false, true); + public static UcdPropertyDetail Unicode_1_Name_Detail = new UcdPropertyDetail ( + UcdProperty.Unicode_1_Name, VersionInfo.getInstance(11,0,0), 217, + true, false, false, true); + public static UcdPropertyDetail Name_Alias_Detail = new UcdPropertyDetail ( + UcdProperty.Name_Alias, VersionInfo.getInstance(11,0,0), 218, + false, false, false, true); + public static UcdPropertyDetail Emoji_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji, VersionInfo.getInstance(13,0,0), 219, + true, false, false, true); + public static UcdPropertyDetail Emoji_Presentation_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Presentation, VersionInfo.getInstance(13,0,0), 220, + true, false, false, true); + public static UcdPropertyDetail Emoji_Modifier_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Modifier, VersionInfo.getInstance(13,0,0), 221, + true, false, false, true); + public static UcdPropertyDetail Emoji_Modifier_Base_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Modifier_Base, VersionInfo.getInstance(13,0,0), 222, + true, false, false, true); + public static UcdPropertyDetail Emoji_Component_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Component, VersionInfo.getInstance(13,0,0), 223, + true, false, false, true); + public static UcdPropertyDetail Extended_Pictographic_Detail = new UcdPropertyDetail ( + UcdProperty.Extended_Pictographic, VersionInfo.getInstance(13,0,0), 224, + true, false, false, true); + public static UcdPropertyDetail kStrange_Detail = new UcdPropertyDetail ( + UcdProperty.kStrange, VersionInfo.getInstance(14,0,0), 225, + false, true, false, true); + public static UcdPropertyDetail kAlternateTotalStrokes_Detail = new UcdPropertyDetail ( + UcdProperty.kAlternateTotalStrokes, VersionInfo.getInstance(15,0,0), 226, + false, true, false, true); + public static UcdPropertyDetail NFKC_Simple_Casefold_Detail = new UcdPropertyDetail ( + UcdProperty.NFKC_Simple_Casefold, VersionInfo.getInstance(15,1,0), 227, + true, false, false, true); + public static UcdPropertyDetail ID_Compat_Math_Start_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Compat_Math_Start, VersionInfo.getInstance(15,1,0), 228, + true, false, false, true); + public static UcdPropertyDetail ID_Compat_Math_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Compat_Math_Continue, VersionInfo.getInstance(15,1,0), 229, + true, false, false, true); + public static UcdPropertyDetail IDS_Unary_Operator_Detail = new UcdPropertyDetail ( + UcdProperty.IDS_Unary_Operator, VersionInfo.getInstance(15,1,0), 230, + true, false, false, true); + public static UcdPropertyDetail kJapanese_Detail = new UcdPropertyDetail ( + UcdProperty.kJapanese, VersionInfo.getInstance(15,1,0), 231, + false, true, false, true); + public static UcdPropertyDetail kMojiJoho_Detail = new UcdPropertyDetail ( + UcdProperty.kMojiJoho, VersionInfo.getInstance(15,1,0), 232, + false, true, false, true); + public static UcdPropertyDetail kSMSZD2003Index_Detail = new UcdPropertyDetail ( + UcdProperty.kSMSZD2003Index, VersionInfo.getInstance(15,1,0), 233, + false, true, false, true); + public static UcdPropertyDetail kSMSZD2003Readings_Detail = new UcdPropertyDetail ( + UcdProperty.kSMSZD2003Readings, VersionInfo.getInstance(15,1,0), 234, + false, true, false, true); + public static UcdPropertyDetail kVietnameseNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kVietnameseNumeric, VersionInfo.getInstance(15,1,0), 235, + false, true, false, true); + public static UcdPropertyDetail kZhuangNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kZhuangNumeric, VersionInfo.getInstance(15,1,0), 236, + false, true, false, true); + public static UcdPropertyDetail Indic_Conjunct_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Indic_Conjunct_Break, VersionInfo.getInstance(15,1,0), 237, + true, false, false, true); + public static UcdPropertyDetail Modifier_Combining_Mark_Detail = new UcdPropertyDetail ( + UcdProperty.Modifier_Combining_Mark, VersionInfo.getInstance(16,0,0), 238, + true, false, false, true); + public static UcdPropertyDetail kFanqie_Detail = new UcdPropertyDetail ( + UcdProperty.kFanqie, VersionInfo.getInstance(16,0,0), 239, + false, true, false, true); + public static UcdPropertyDetail kZhuang_Detail = new UcdPropertyDetail ( + UcdProperty.kZhuang, VersionInfo.getInstance(16,0,0), 240, + false, true, false, true); + public static UcdPropertyDetail Basic_Emoji_Detail = new UcdPropertyDetail ( + UcdProperty.Basic_Emoji, -1, + false, false, false, false); + public static UcdPropertyDetail CJK_Radical_Detail = new UcdPropertyDetail ( + UcdProperty.CJK_Radical, -2, + false, false, false, false); + public static UcdPropertyDetail Confusable_MA_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_MA, -3, + false, false, false, false); + public static UcdPropertyDetail Confusable_ML_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_ML, -4, + false, false, false, false); + public static UcdPropertyDetail Confusable_SA_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_SA, -5, + false, false, false, false); + public static UcdPropertyDetail Confusable_SL_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_SL, -6, + false, false, false, false); + public static UcdPropertyDetail Do_Not_Emit_Preferred_Detail = new UcdPropertyDetail ( + UcdProperty.Do_Not_Emit_Preferred, -7, + false, false, false, false); + public static UcdPropertyDetail Do_Not_Emit_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Do_Not_Emit_Type, -8, + false, false, false, false); + public static UcdPropertyDetail Emoji_DCM_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_DCM, VersionInfo.getInstance(6,0,0), -9, + false, false, false, false); + public static UcdPropertyDetail Emoji_KDDI_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_KDDI, VersionInfo.getInstance(6,0,0), -10, + false, false, false, false); + public static UcdPropertyDetail Emoji_SB_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_SB, VersionInfo.getInstance(6,0,0), -11, + false, false, false, false); + public static UcdPropertyDetail Identifier_Status_Detail = new UcdPropertyDetail ( + UcdProperty.Identifier_Status, VersionInfo.getInstance(9,0,0), -12, + false, false, false, false); + public static UcdPropertyDetail Identifier_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Identifier_Type, VersionInfo.getInstance(9,0,0), -13, + false, false, false, false); + public static UcdPropertyDetail Idn_2008_Detail = new UcdPropertyDetail ( + UcdProperty.Idn_2008, -14, + false, false, false, false); + public static UcdPropertyDetail Idn_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Idn_Mapping, -15, + false, false, false, false); + public static UcdPropertyDetail Idn_Status_Detail = new UcdPropertyDetail ( + UcdProperty.Idn_Status, -16, + false, false, false, false); + public static UcdPropertyDetail Named_Sequences_Detail = new UcdPropertyDetail ( + UcdProperty.Named_Sequences, -17, + false, false, false, false); + public static UcdPropertyDetail Named_Sequences_Prov_Detail = new UcdPropertyDetail ( + UcdProperty.Named_Sequences_Prov, -18, + false, false, false, false); + public static UcdPropertyDetail Other_Joining_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Joining_Type, -19, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Flag_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Flag_Sequence, -20, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Keycap_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Keycap_Sequence, -21, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Modifier_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Modifier_Sequence, -22, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Tag_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Tag_Sequence, -23, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Zwj_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Zwj_Sequence, -24, + false, false, false, false); + public static UcdPropertyDetail Standardized_Variant_Detail = new UcdPropertyDetail ( + UcdProperty.Standardized_Variant, -25, + false, false, false, false); + + private UcdProperty ucdProperty; + private VersionInfo minVersion; + private VersionInfo maxVersion; + private int sortOrder; + private boolean isBaseAttribute; + private boolean isCJKAttribute; + private boolean isCJKShowIfEmpty; + private boolean isOrgUCDXMLAttribute; + + private UcdPropertyDetail( + UcdProperty ucdProperty, + VersionInfo minVersion, + int sortOrder, + boolean isBaseAttribute, + boolean isCJKAttribute, + boolean isCJKShowIfEmpty, + boolean isOrgUCDXMLAttribute) { + this ( + ucdProperty, minVersion, null, + sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); + } + + private UcdPropertyDetail( + UcdProperty ucdProperty, + int sortOrder, + boolean isBaseAttribute, + boolean isCJKAttribute, + boolean isCJKShowIfEmpty, + boolean isOrgUCDXMLAttribute) { + this ( + ucdProperty, null, null, + sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); + } + + private UcdPropertyDetail( + UcdProperty ucdProperty, + VersionInfo minVersion, + VersionInfo maxVersion, + int sortOrder, + boolean isBaseAttribute, + boolean isCJKAttribute, + boolean isCJKShowIfEmpty, + boolean isOrgUCDXMLAttribute) { + this.ucdProperty = ucdProperty; + this.minVersion = minVersion; + this.maxVersion = maxVersion; + this.sortOrder = sortOrder; + this.isBaseAttribute = isBaseAttribute; + this.isCJKAttribute = isCJKAttribute; + this.isCJKShowIfEmpty = isCJKShowIfEmpty; + this.isOrgUCDXMLAttribute = isOrgUCDXMLAttribute; + + allPropertyDetails.add(this); + if(isBaseAttribute) { + basePropertyDetails.add(this); + ucdxmlPropertyDetails.add(this); + } + if(isCJKAttribute) { + cjkPropertyDetails.add(this); + ucdxmlPropertyDetails.add(this); + } + } + + public static Set values () { + return allPropertyDetails; + } + public static Set baseValues () { + return basePropertyDetails; + } + public static Set cjkValues () { + return cjkPropertyDetails; + } + public static Set ucdxmlValues () { + return ucdxmlPropertyDetails; + } + + public UcdProperty getUcdProperty() { + return this.ucdProperty; + } + + public VersionInfo getMinVersion() { + return this.minVersion; + } + + public VersionInfo getMaxVersion() { + return this.maxVersion; + } + + public boolean isBaseAttribute() { + return this.isBaseAttribute; + } + + public boolean isCJKAttribute() { + return this.isCJKAttribute; + } + + public boolean isCJKShowIfEmpty() { + return this.isCJKShowIfEmpty; + } + + public boolean isOrgUCDXMLAttribute() { + return this.isOrgUCDXMLAttribute; + } +} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java new file mode 100644 index 000000000..0773486cc --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java @@ -0,0 +1,28 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UcdProperty; + +public class UcdSectionComponent { + private final VersionInfo minVersion; + private final VersionInfo maxVersion; + private final UcdProperty ucdProperty; + + UcdSectionComponent(VersionInfo minVersion, VersionInfo maxVersion, UcdProperty ucdProperty) { + this.minVersion = minVersion; + this.maxVersion = maxVersion; + this.ucdProperty = ucdProperty; + } + + public VersionInfo getMinVersion() { + return this.minVersion; + } + + public VersionInfo getMaxVersion() { + return this.maxVersion; + } + + public UcdProperty getUcdProperty() { + return this.ucdProperty; + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java new file mode 100644 index 000000000..24b9a35a6 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java @@ -0,0 +1,153 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UcdProperty; + +import java.util.LinkedHashSet; +import java.util.Set; + +public class UcdSectionDetail { + + public enum UcdSection { + BLOCKS ("blocks", "block", VersionInfo.getInstance(1, 1, 0), null, Blocks_Detail, true, true), + CJKRADICALS ("cjk-radicals", "cjk-radical", VersionInfo.getInstance(1, 1, 0), null, CJKRadicals_Detail, false, false), + DONOTEMIT ("do-not-emit", "instead", VersionInfo.getInstance(16, 0, 0), null, DoNotEmit_Detail, false, false), + EMOJISOURCES ("emoji-sources", "emoji-source", VersionInfo.getInstance(1, 1, 0), null, EmojiSources_Detail, true, false), + NAMEDSEQUENCES ("named-sequences", "named-sequence", VersionInfo.getInstance(1, 1, 0), null, NamedSequences_Detail, false, false), + NORMALIZATIONCORRECTIONS ("normalization-corrections", "normalization-correction", VersionInfo.getInstance(1, 1, 0), null, NormalizationCorrections_Detail, true, false), + STANDARDIZEDVARIANTS ("standardized-variants", "standardized-variant", VersionInfo.getInstance(1, 1, 0), null, StandardizedVariants_Detail, true, false); + private final String tag; + private final String childTag; + private final VersionInfo minVersion; + private final VersionInfo maxVersion; + private final UcdSectionDetail ucdSectionDetail; + private final boolean parserWithRange; + private final boolean parserWithMissing; + + UcdSection( + String tag, + String childTag, + VersionInfo minVersion, + VersionInfo maxVersion, + UcdSectionDetail ucdSectionDetail, + boolean parserWithRange, + boolean parserWithMissing) { + this.tag = tag; + this.childTag = childTag; + this.minVersion = minVersion; + this.maxVersion = maxVersion; + this.ucdSectionDetail = ucdSectionDetail; + this.parserWithRange = parserWithRange; + this.parserWithMissing = parserWithMissing; + } + + public String toString() { + return tag; + } + public String getChildTag() { + return childTag; + } + public VersionInfo getMinVersion() { + return minVersion; + } + public VersionInfo getMaxVersion() { + return maxVersion; + } + public UcdSectionDetail getUcdSectionDetail() { + return ucdSectionDetail; + } + public boolean getParserWithRange() { return parserWithRange; } + public boolean getParserWithMissing() { return parserWithMissing; } + } + + public static UcdSectionDetail Blocks_Detail = new UcdSectionDetail( + UcdSection.BLOCKS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Block) + }, + 0); + public static UcdSectionDetail NamedSequences_Detail = new UcdSectionDetail( + UcdSection.NAMEDSEQUENCES, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Named_Sequences) + }, + 1); + public static UcdSectionDetail NormalizationCorrections_Detail = new UcdSectionDetail( + UcdSection.NORMALIZATIONCORRECTIONS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.NC_Original) + }, + 2); + public static UcdSectionDetail StandardizedVariants_Detail = new UcdSectionDetail( + UcdSection.STANDARDIZEDVARIANTS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Standardized_Variant), + new UcdSectionComponent( + VersionInfo.getInstance(13, 1, 0), + null, + UcdProperty.emoji_variation_sequence) + }, + 3); + public static UcdSectionDetail CJKRadicals_Detail = new UcdSectionDetail( + UcdSection.CJKRADICALS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.CJK_Radical) + }, + 4); + public static UcdSectionDetail EmojiSources_Detail = new UcdSectionDetail( + UcdSection.EMOJISOURCES, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Emoji_DCM) + }, + 5); + public static UcdSectionDetail DoNotEmit_Detail = new UcdSectionDetail( + UcdSection.DONOTEMIT, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Do_Not_Emit_Type) + }, + 6); + + private final UcdSection ucdSection; + private final UcdSectionComponent[] ucdSectionComponents; + private final int sortOrder; + + private UcdSectionDetail( + UcdSection ucdSection, + UcdSectionComponent[] ucdSectionComponents, + int sortOrder) { + this.ucdSection = ucdSection; + this.ucdSectionComponents = ucdSectionComponents; + this.sortOrder = sortOrder; + } + + public UcdSection getSection() { + return this.ucdSection; + } + public UcdSectionComponent[] getUcdSectionComponents() { + return this.ucdSectionComponents; + } + public int getSortOrder() { + return this.sortOrder; + } +} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java new file mode 100644 index 000000000..c826b4f40 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java @@ -0,0 +1,362 @@ +package org.unicode.xml; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.ibm.icu.util.VersionInfo; +import com.thaiopensource.resolver.Input; +import org.unicode.props.*; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import javax.xml.transform.TransformerConfigurationException; + + +public class UcdXML { + + private static final String NAMESPACE = "http://www.unicode.org/ns/2003/ucd/1.0"; + + private enum OutputType { + STRICT, + COMPATIBLE + } + + private enum UCDXMLOUTPUTRANGE { + ALL, + NOUNIHAN, + UNIHAN; + } + + private enum UCDXMLOUTPUTTYPE { + FLAT, + GROUPED; + } + + private enum Range { + RESERVED ("reserved"), + SURROGATE ("surrogate"), + NONCHARACTER ("noncharacter"), + CHARACTER ("char"), + CJKUNIFIEDIDEOGRAPH ("char"), + NONRANGE ("nonrange"); + + private final String tag; + + Range(String tag) { + this.tag = tag; + } + + public String toString() { + return tag; + } + } + + public static void main(String[] args) throws Exception { + + VersionInfo ucdVersion = VersionInfo.getInstance(15, 1, 0); + File destinationFolder = new File( + "C:\\_git\\Unicode\\ucdxml\\data\\" + + getVersionString(ucdVersion, 3) + "\\xmltest\\"); + if(!destinationFolder.exists()) { + destinationFolder.mkdir(); + } + buildUcdXMLFile(ucdVersion, destinationFolder, UCDXMLOUTPUTRANGE.ALL, UCDXMLOUTPUTTYPE.FLAT); + + System.out.println("end"); + } + + private static void buildUcdXMLFile( + VersionInfo ucdVersion, File destinationFolder, UCDXMLOUTPUTRANGE outputRange, UCDXMLOUTPUTTYPE outputType) + throws IOException, TransformerConfigurationException, SAXException { + int lowCodepoint = 0x0; + int highCodepoint = 0x10FFFF; + // Tangut + //int lowCodepoint = 0x17000; + //int highCodepoint = 0x1B2FB; + //0x10FFFF + + File tempFile = new File(destinationFolder, "temp.xml"); + String outputFilename = "ucd." + + outputRange.toString().toLowerCase() + "." + + outputType.toString().toLowerCase() + ".xml"; + File destinationFile = new File(destinationFolder, outputFilename); + + FileOutputStream fileOutputStream = new FileOutputStream(tempFile); + UCDXMLWriter writer = new UCDXMLWriter(fileOutputStream); + + IndexUnicodeProperties iup = IndexUnicodeProperties.make(ucdVersion); + AttributeResolver attributeResolver = new AttributeResolver(iup); + UCDDataResolver ucdDataResolver = new UCDDataResolver(iup, NAMESPACE, writer); + + writer.startFile(); + writer.startElement("ucd"); { + writer.startElement("description"); { + writer.addContent("Unicode " + getVersionString(ucdVersion, 3)); + writer.endElement("description"); + } + buildRepertoire(writer, attributeResolver, ucdVersion, lowCodepoint, highCodepoint, outputRange); + if(outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.BLOCKS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NAMEDSEQUENCES); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NORMALIZATIONCORRECTIONS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.STANDARDIZEDVARIANTS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.CJKRADICALS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.EMOJISOURCES); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.DONOTEMIT); + } + writer.endElement("ucd"); + } + writer.endFile(); + fileOutputStream.close (); + cleanUcdXMLFile(tempFile, destinationFile); + tempFile.delete(); + } + + private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws IOException { + //XALAN writes out characters outside the BMP as entities. + //Use this code to replace the entities with the correct characters. + //See: https://issues.apache.org/jira/browse/XALANJ-2595 + + FileInputStream fileInputStream = new FileInputStream(tempFile); + FileOutputStream fileOutputStream = new FileOutputStream(destinationFile); + + InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, StandardCharsets.UTF_8); + OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); + + BufferedReader bufferedReader = new BufferedReader(inputStreamReader); + BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter); + + String line; + while ((line = bufferedReader.readLine()) != null) { + Matcher matcher = Pattern.compile("&#([\\d]+);").matcher(line); + line = matcher.replaceAll(matchResult -> new String(Character.toChars(Integer.parseInt(matcher.group(1))))); + bufferedWriter.append(line); + bufferedWriter.newLine(); + } + bufferedWriter.flush(); + fileInputStream.close(); + fileOutputStream.close(); + } + + private static void buildRepertoire( + UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, + int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange) + throws SAXException { + + writer.startElement("repertoire"); { + + + ArrayList range = new ArrayList<>(); + Range rangeType = Range.NONRANGE; + + for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { + if (attributeResolver.isUnassignedCodepoint(codepoint) || + (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && attributeResolver.isUnifiedIdeograph(codepoint))) { + Range currentRangeType = getRangeType(attributeResolver, codepoint); + if (!range.isEmpty()){ + if (!currentRangeType.equals(rangeType) || attributeResolver.isDifferentRange(codepoint, codepoint - 1)) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + range.clear(); + } + } + range.add(codepoint); + rangeType = currentRangeType; + } + else { + if (!range.isEmpty()) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + range.clear(); + rangeType = Range.NONRANGE; + } + buildChar(writer, attributeResolver, ucdVersion, codepoint, outputRange); + } + } + //Handle any range before the end of the repertoire element. + if (!range.isEmpty()) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + } + writer.endElement("repertoire"); + } + } + + private static void buildChar( + UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, int codepoint, + UCDXMLOUTPUTRANGE outputRange) + throws SAXException { + + if(outputRange != UCDXMLOUTPUTRANGE.UNIHAN || attributeResolver.isUnihanAttributeRange(codepoint)) { + AttributesImpl at = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); + writer.startElement("char", at); { + HashMap nameAliases = attributeResolver.getNameAliases(codepoint); + if (null != nameAliases && !nameAliases.isEmpty()) { + for (String alias : nameAliases.keySet()) { + AttributesImpl nameAliasAt = new AttributesImpl(); + nameAliasAt.addAttribute( + NAMESPACE, "alias", "alias", "CDATA", alias); + nameAliasAt.addAttribute( + NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); + writer.startElement("name-alias", nameAliasAt); { + writer.endElement("name-alias"); + } + } + } + writer.endElement("char"); + } + } + } + + private static void buildRange(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, + ArrayList range, Range rangeType) + throws SAXException { + AttributesImpl at = getReservedAttributes(ucdVersion, attributeResolver, range); + writer.startElement(rangeType.tag, at); { + writer.endElement(rangeType.tag); + } + } + + private static Range getRangeType(AttributeResolver attributeResolver, int codepoint) { + String NChar = attributeResolver.getNChar(codepoint); + UcdPropertyValues.General_Category_Values gc = attributeResolver.getgc(codepoint); + + if (attributeResolver.isUnihanAttributeRange(codepoint)) { + return Range.CJKUNIFIEDIDEOGRAPH; + } + if (gc.equals(UcdPropertyValues.General_Category_Values.Surrogate)) { + return Range.SURROGATE; + } + if (gc.equals(UcdPropertyValues.General_Category_Values.Private_Use)) { + return Range.CHARACTER; + } + if (NChar.equals(UcdPropertyValues.Binary.Yes.getShortName())) { + return Range.NONCHARACTER; + } + return Range.RESERVED; + } + + private static AttributesImpl getAttributes( + VersionInfo version, AttributeResolver attributeResolver, int codepoint, UCDXMLOUTPUTRANGE outputRange) { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); + + for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { + UcdProperty prop = propDetail.getUcdProperty(); + if (version.compareTo(propDetail.getMinVersion()) >= 0 && + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) + { + String attrValue = attributeResolver.getAttributeValue(prop, codepoint); + boolean isAttributeIncluded = getIsAttributeIncluded( + attrValue, + attributeResolver.isUnihanAttributeRange(codepoint), + propDetail, prop, + outputRange); + + if(isAttributeIncluded) { + String propName = prop.getShortName(); + if(propName.startsWith("cjk")) { + propName = propName.substring(2); + } + attributes.addAttribute( + NAMESPACE, + propName, + propName, + "CDATA", + attrValue + ); + } + } + } + return attributes; + } + + private static boolean getIsAttributeIncluded( + String attrValue, + boolean isUnihanAttributeRange, + UcdPropertyDetail propDetail, + UcdProperty prop, + UCDXMLOUTPUTRANGE outputRange) { + if (attrValue == null) { return false; } + if (isUnihanAttributeRange) { + if (outputRange == UCDXMLOUTPUTRANGE.UNIHAN) { + if (prop.equals(UcdProperty.Numeric_Type) && !attrValue.equals("None")) { + return true; + } + if (prop.equals(UcdProperty.Numeric_Value) && !attrValue.equals("NaN")) { + return true; + } + return propDetail.isCJKAttribute() && (propDetail.isCJKShowIfEmpty() || !attrValue.isEmpty()); + } + if (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && propDetail.isCJKAttribute()) { + return false; + } + if (propDetail.isCJKShowIfEmpty()) { + return true; + } + } + if (propDetail.isBaseAttribute()) { + return true; + } + return !attrValue.isEmpty(); + } + + + private static AttributesImpl getReservedAttributes( + VersionInfo version, AttributeResolver attributeResolver, ArrayList range) { + AttributesImpl attributes = new AttributesImpl(); + + if (range.size() == 1) { + attributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", + attributeResolver.getHexString(range.get(0))); + } + else { + attributes.addAttribute( + NAMESPACE, "first-cp", "first-cp", "CDATA", + attributeResolver.getHexString(range.get(0))); + attributes.addAttribute( + NAMESPACE, "last-cp", "last-cp", "CDATA", + attributeResolver.getHexString(range.get(range.size() - 1))); + } + for (UcdPropertyDetail propDetail : UcdPropertyDetail.baseValues()) { + UcdProperty prop = propDetail.getUcdProperty(); + if (version.compareTo(propDetail.getMinVersion()) >= 0 && + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) <= 0)) + { + String attrValue = attributeResolver.getAttributeValue(propDetail.getUcdProperty(), range.get(0)); + + attributes.addAttribute( + NAMESPACE, + prop.getShortName(), + prop.getShortName(), + "CDATA", + attrValue + ); + } + } + return attributes; + } + + private static String getVersionString(VersionInfo version, int maxDigits) { + if (maxDigits >= 1 && maxDigits <= 4) { + int[] digits = new int[]{version.getMajor(), version.getMinor(), version.getMilli(), version.getMicro()}; + StringBuilder verStr = new StringBuilder(7); + verStr.append(digits[0]); + for(int i = 1; i < maxDigits; ++i) { + verStr.append("."); + verStr.append(digits[i]); + } + return verStr.toString(); + } else { + throw new IllegalArgumentException("Invalid maxDigits range"); + } + } +} \ No newline at end of file From 0ba5996f7678bd858b0a9b52d50b2769fb006981 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Thu, 6 Jun 2024 15:54:40 -0700 Subject: [PATCH 03/14] Interim checkin: implemented groups --- .../org/unicode/xml/AttributeResolver.java | 63 +- .../java/org/unicode/xml/UCDDataResolver.java | 23 +- .../java/org/unicode/xml/UCDXMLWriter.java | 1 + .../org/unicode/xml/UcdPropertyDetail.java | 1049 +++++++++-------- .../org/unicode/xml/UcdSectionDetail.java | 35 +- .../src/main/java/org/unicode/xml/UcdXML.java | 511 +++++--- 6 files changed, 974 insertions(+), 708 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index b5d093635..cb173b00c 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -1,7 +1,6 @@ package org.unicode.xml; import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.util.VersionInfo; import org.unicode.cldr.draft.FileUtilities; import org.unicode.props.*; @@ -99,11 +98,11 @@ public AttributeResolver(IndexUnicodeProperties iup) { } private enum AliasType { - ABBREVIATION ("abbreviation"), - ALTERNATE ("alternate"), - CONTROL ("control"), - CORRECTION ("correction"), - FIGMENT ("figment"); + ABBREVIATION("abbreviation"), + ALTERNATE("alternate"), + CONTROL("control"), + CORRECTION("correction"), + FIGMENT("figment"); private final String aliasType; @@ -129,6 +128,7 @@ private NameAlias(String alias, AliasType type) { public String getAlias() { return alias; } + public AliasType getType() { return type; } @@ -154,15 +154,14 @@ private HashMap> loadNameAliases() { String[] parts = line.getParts(); int codepoint = Integer.parseInt(parts[0], 16); NameAlias nameAlias = new NameAlias( - parts[1], AliasType.valueOf(parts[2].toUpperCase())); + parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); if (nameAliasesByCodepoint.containsKey(codepoint)) { LinkedList nameAliases = new LinkedList<>(nameAliasesByCodepoint.get(codepoint)); nameAliases.add(nameAlias); nameAliases.sort(nameAliasComparator); nameAliasesByCodepoint.replace(codepoint, nameAliases); - } - else { + } else { nameAliasesByCodepoint.put(codepoint, new LinkedList<>(List.of(nameAlias))); } } @@ -171,9 +170,9 @@ private HashMap> loadNameAliases() { public String getAttributeValue(UcdProperty prop, int codepoint) { String resolvedValue = indexUnicodeProperties.getResolvedValue(prop, codepoint); - switch(prop.getType()) { + switch (prop.getType()) { case Numeric: - switch(prop) { + switch (prop) { case kOtherNumeric: case kPrimaryNumeric: case kAccountingNumeric: @@ -182,7 +181,7 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { return Optional.ofNullable(resolvedValue).orElse("NaN"); } case String: - switch(prop) { + switch (prop) { case Equivalent_Unified_Ideograph: String EqUIdeo = getMappingValue(codepoint, resolvedValue, false, ""); return (EqUIdeo.equals("#")) ? null : EqUIdeo; @@ -191,51 +190,51 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { return (kCompatibilityVariant.equals("#")) ? "" : kCompatibilityVariant; case kSimplifiedVariant: case kTraditionalVariant: - String kVariant = getMappingValue(codepoint, resolvedValue, isUnihanAttributeRange(codepoint), "U+"); + String kVariant = getMappingValue(codepoint, resolvedValue, isUnihanAttributeRange(codepoint) + , "U+"); return (kVariant.equals("#")) ? "" : kVariant; case Bidi_Mirroring_Glyph: - //TODO: Question for PAG - This is probably not the desired behavior, but adding this case to maintain consistent output. - // Check the spec. But otherwise keep consistent. Update this comment to indicate why. + //Returning empty string for bmg to maintain compatibility with older generated files. String bmg = getMappingValue(codepoint, resolvedValue, false, ""); return (bmg.equals("#")) ? "" : bmg; default: return getMappingValue(codepoint, resolvedValue, false, ""); } case Miscellaneous: - switch(prop) { + switch (prop) { case Jamo_Short_Name: //return map_jamo_short_name.get(codepoint).getShortName(); return Optional.ofNullable(resolvedValue).orElse(""); case Name: - if(resolvedValue != null && resolvedValue.startsWith("CJK UNIFIED IDEOGRAPH-")) { + if (resolvedValue != null && resolvedValue.startsWith("CJK UNIFIED IDEOGRAPH-")) { return "CJK UNIFIED IDEOGRAPH-#"; } - if(resolvedValue != null && resolvedValue.startsWith("CJK COMPATIBILITY IDEOGRAPH-")) { + if (resolvedValue != null && resolvedValue.startsWith("CJK COMPATIBILITY IDEOGRAPH-")) { return "CJK COMPATIBILITY IDEOGRAPH-#"; } - if(resolvedValue != null && resolvedValue.startsWith("TANGUT IDEOGRAPH-")) { + if (resolvedValue != null && resolvedValue.startsWith("TANGUT IDEOGRAPH-")) { return "TANGUT IDEOGRAPH-#"; } - if(resolvedValue != null && resolvedValue.startsWith("KHITAN SMALL SCRIPT CHARACTER-")) { + if (resolvedValue != null && resolvedValue.startsWith("KHITAN SMALL SCRIPT CHARACTER-")) { return "KHITAN SMALL SCRIPT CHARACTER-#"; } - if(resolvedValue != null && resolvedValue.startsWith("NUSHU CHARACTER-")) { + if (resolvedValue != null && resolvedValue.startsWith("NUSHU CHARACTER-")) { return "NUSHU CHARACTER-#"; } - if(resolvedValue != null && resolvedValue.startsWith("EGYPTIAN HIEROGLYPH-")) { + if (resolvedValue != null && resolvedValue.startsWith("EGYPTIAN HIEROGLYPH-")) { return "EGYPTIAN HIEROGLYPH-#"; } return Optional.ofNullable(resolvedValue).orElse(""); case kDefinition: return resolvedValue; default: - if (resolvedValue!= null) { + if (resolvedValue != null) { return resolvedValue.replaceAll("\\|", " "); } return ""; } case Catalog: - switch(prop) { + switch (prop) { case Age: String age = map_age.get(codepoint).getShortName(); return (age.equals("NA")) ? "unassigned" : age; @@ -245,7 +244,7 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { return map_script.get(codepoint).getShortName(); case Script_Extensions: StringBuilder extensionBuilder = new StringBuilder(); - String[] extensions = map_script_extensions.get(codepoint).split("\\|", 0); + String[] extensions = map_script_extensions.get(codepoint).split("\\|", 0); for (String extension : extensions) { extensionBuilder.append(UcdPropertyValues.Script_Values.valueOf(extension).getShortName()); extensionBuilder.append(" "); @@ -255,7 +254,7 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { throw new RuntimeException("Missing Catalog case"); } case Enumerated: - switch(prop) { + switch (prop) { case Bidi_Class: return map_bidi_class.get(codepoint).getShortName(); case Bidi_Paired_Bracket_Type: @@ -263,9 +262,8 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { case Canonical_Combining_Class: return map_canonical_combining_class.get(codepoint).getShortName(); case Decomposition_Type: - //TODO: Question for PAG - This is probably not the desired behavior, but specifying lower case to maintain consistent output. - // Check the spec. But otherwise keep consistent. Update this comment to indicate why. - return map_decomposition_type.get(codepoint).getShortName().toLowerCase(); + //Returning lower case to maintain compatibility with older generated files. + return map_decomposition_type.get(codepoint).getShortName().toLowerCase(Locale.ROOT); case Do_Not_Emit_Type: return map_do_not_emit_type.get(codepoint).getShortName(); case East_Asian_Width: @@ -317,9 +315,8 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { default: throw new RuntimeException("Missing Enumerated case"); } - case Binary: - { - switch(resolvedValue) { + case Binary: { + switch (resolvedValue) { // Seems overkill to get this from UcdPropertyValues.Binary case "No": return "N"; @@ -385,7 +382,7 @@ public boolean isDifferentRange(int codepointA, int codepointB) { } private static String getCPString(int codepoint) { - return String.format("%4s", Integer.toHexString(codepoint)).replace(" ", "0").toUpperCase(); + return String.format("%4s", Integer.toHexString(codepoint)).replace(" ", "0").toUpperCase(Locale.ROOT); } public String getHexString(int codepoint) { diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java index 888dedc0a..1baa4131b 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java @@ -33,10 +33,12 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep UcdSectionComponent[] ucdSectionComponents = ucdSection.getUcdSectionDetail().getUcdSectionComponents(); if (isCompatibleVersion(minVersion, maxVersion)) { - writer.startElement(tag); { + writer.startElement(tag); + { for (UcdSectionComponent ucdSectionComponent : ucdSectionComponents) { if (isCompatibleVersion(ucdSectionComponent.getMinVersion(), ucdSectionComponent.getMaxVersion())) { - final PropertyParsingInfo fileInfoEVS = PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty()); + final PropertyParsingInfo fileInfoEVS = + PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty()); String fullFilename = fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion()); UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); parser.withRange(parserWithRange); @@ -46,7 +48,8 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep for (UcdLineParser.UcdLine line : parser) { if (!line.getOriginalLine().startsWith("#")) { AttributesImpl attributes = getBlockAttributes(namespace, line); - writer.startElement(childTag, attributes); { + writer.startElement(childTag, attributes); + { writer.endElement(childTag); } } @@ -61,8 +64,10 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep List names = new ArrayList<>(namedSequences.keySet()); Collections.sort(names); for (String name : names) { - AttributesImpl attributes = getNamedSequenceAttributes(namespace, name, namedSequences); - writer.startElement(childTag, attributes); { + AttributesImpl attributes = getNamedSequenceAttributes(namespace, name, + namedSequences); + writer.startElement(childTag, attributes); + { writer.endElement(childTag); } } @@ -83,8 +88,9 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep } } - private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace, UcdLineParser.UcdLine line) { - switch(ucdSection) { + private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace, + UcdLineParser.UcdLine line) { + switch (ucdSection) { case CJKRADICALS: return getCJKRadicalAttributes(namespace, line); case DONOTEMIT: @@ -151,7 +157,8 @@ private static AttributesImpl getEmojiSourceAttributes(String namespace, UcdLine return attributes; } - private static AttributesImpl getNamedSequenceAttributes(String namespace, String name, HashMap namedSequences) { + private static AttributesImpl getNamedSequenceAttributes(String namespace, String name, + HashMap namedSequences) { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute( namespace, "name", "name", "CDATA", name); diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java index 1c22267b1..27d88a766 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java +++ b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java @@ -42,6 +42,7 @@ public void startFile() throws SAXException { transformerHandler.startDocument (); char[] c = "\n".toCharArray (); transformerHandler.characters (c, 0, c.length); + //TODO: JRW change hardcoded 2023 to current year. c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray (); transformerHandler.comment (c, 0, c.length); c = "\n".toCharArray (); diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java index 5e5c607c9..594a6f67b 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java @@ -8,812 +8,812 @@ public class UcdPropertyDetail { - static private LinkedHashSet basePropertyDetails = new LinkedHashSet (); - static private LinkedHashSet cjkPropertyDetails = new LinkedHashSet (); - static private LinkedHashSet ucdxmlPropertyDetails = new LinkedHashSet (); - static private LinkedHashSet allPropertyDetails = new LinkedHashSet (); + static private LinkedHashSet basePropertyDetails = new LinkedHashSet(); + static private LinkedHashSet cjkPropertyDetails = new LinkedHashSet(); + static private LinkedHashSet ucdxmlPropertyDetails = new LinkedHashSet(); + static private LinkedHashSet allPropertyDetails = new LinkedHashSet(); - public static UcdPropertyDetail Age_Detail = new UcdPropertyDetail ( - UcdProperty.Age, VersionInfo.getInstance(1,1,0), 1, + public static UcdPropertyDetail Age_Detail = new UcdPropertyDetail( + UcdProperty.Age, VersionInfo.getInstance(1, 1, 0), 1, true, false, false, true); - public static UcdPropertyDetail Name_Detail = new UcdPropertyDetail ( - UcdProperty.Name, VersionInfo.getInstance(1,1,0), 2, + public static UcdPropertyDetail Name_Detail = new UcdPropertyDetail( + UcdProperty.Name, VersionInfo.getInstance(1, 1, 0), 2, true, false, false, true); - public static UcdPropertyDetail Jamo_Short_Name_Detail = new UcdPropertyDetail ( - UcdProperty.Jamo_Short_Name, VersionInfo.getInstance(1,1,0), 3, + public static UcdPropertyDetail Jamo_Short_Name_Detail = new UcdPropertyDetail( + UcdProperty.Jamo_Short_Name, VersionInfo.getInstance(1, 1, 0), 3, true, false, false, true); - public static UcdPropertyDetail General_Category_Detail = new UcdPropertyDetail ( - UcdProperty.General_Category, VersionInfo.getInstance(1,1,0), 4, + public static UcdPropertyDetail General_Category_Detail = new UcdPropertyDetail( + UcdProperty.General_Category, VersionInfo.getInstance(1, 1, 0), 4, true, false, false, true); - public static UcdPropertyDetail Canonical_Combining_Class_Detail = new UcdPropertyDetail ( - UcdProperty.Canonical_Combining_Class, VersionInfo.getInstance(1,1,0), 5, + public static UcdPropertyDetail Canonical_Combining_Class_Detail = new UcdPropertyDetail( + UcdProperty.Canonical_Combining_Class, VersionInfo.getInstance(1, 1, 0), 5, true, false, false, true); - public static UcdPropertyDetail Decomposition_Type_Detail = new UcdPropertyDetail ( - UcdProperty.Decomposition_Type, VersionInfo.getInstance(1,1,0), 6, + public static UcdPropertyDetail Decomposition_Type_Detail = new UcdPropertyDetail( + UcdProperty.Decomposition_Type, VersionInfo.getInstance(1, 1, 0), 6, true, false, false, true); - public static UcdPropertyDetail Decomposition_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Decomposition_Mapping, VersionInfo.getInstance(1,1,0), 7, + public static UcdPropertyDetail Decomposition_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Decomposition_Mapping, VersionInfo.getInstance(1, 1, 0), 7, true, false, false, true); - public static UcdPropertyDetail Numeric_Type_Detail = new UcdPropertyDetail ( - UcdProperty.Numeric_Type, VersionInfo.getInstance(1,1,0), 8, + public static UcdPropertyDetail Numeric_Type_Detail = new UcdPropertyDetail( + UcdProperty.Numeric_Type, VersionInfo.getInstance(1, 1, 0), 8, true, false, false, true); - public static UcdPropertyDetail Numeric_Value_Detail = new UcdPropertyDetail ( - UcdProperty.Numeric_Value, VersionInfo.getInstance(1,1,0), 9, + public static UcdPropertyDetail Numeric_Value_Detail = new UcdPropertyDetail( + UcdProperty.Numeric_Value, VersionInfo.getInstance(1, 1, 0), 9, true, false, false, true); - public static UcdPropertyDetail Bidi_Class_Detail = new UcdPropertyDetail ( - UcdProperty.Bidi_Class, VersionInfo.getInstance(1,1,0), 10, + public static UcdPropertyDetail Bidi_Class_Detail = new UcdPropertyDetail( + UcdProperty.Bidi_Class, VersionInfo.getInstance(1, 1, 0), 10, true, false, false, true); - public static UcdPropertyDetail Bidi_Paired_Bracket_Type_Detail = new UcdPropertyDetail ( - UcdProperty.Bidi_Paired_Bracket_Type, VersionInfo.getInstance(6,3,0), 11, + public static UcdPropertyDetail Bidi_Paired_Bracket_Type_Detail = new UcdPropertyDetail( + UcdProperty.Bidi_Paired_Bracket_Type, VersionInfo.getInstance(6, 3, 0), 11, true, false, false, true); - public static UcdPropertyDetail Bidi_Paired_Bracket_Detail = new UcdPropertyDetail ( - UcdProperty.Bidi_Paired_Bracket, VersionInfo.getInstance(6,3,0), 12, + public static UcdPropertyDetail Bidi_Paired_Bracket_Detail = new UcdPropertyDetail( + UcdProperty.Bidi_Paired_Bracket, VersionInfo.getInstance(6, 3, 0), 12, true, false, false, true); - public static UcdPropertyDetail Bidi_Mirrored_Detail = new UcdPropertyDetail ( - UcdProperty.Bidi_Mirrored, VersionInfo.getInstance(1,1,0), 13, + public static UcdPropertyDetail Bidi_Mirrored_Detail = new UcdPropertyDetail( + UcdProperty.Bidi_Mirrored, VersionInfo.getInstance(1, 1, 0), 13, true, false, false, true); - public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = new UcdPropertyDetail ( - UcdProperty.Bidi_Mirroring_Glyph, VersionInfo.getInstance(1,1,0), 14, + public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = new UcdPropertyDetail( + UcdProperty.Bidi_Mirroring_Glyph, VersionInfo.getInstance(1, 1, 0), 14, true, false, false, true); - public static UcdPropertyDetail Simple_Uppercase_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Simple_Uppercase_Mapping, VersionInfo.getInstance(1,1,0), 15, + public static UcdPropertyDetail Simple_Uppercase_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Simple_Uppercase_Mapping, VersionInfo.getInstance(1, 1, 0), 15, true, false, false, true); - public static UcdPropertyDetail Simple_Lowercase_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Simple_Lowercase_Mapping, VersionInfo.getInstance(1,1,0), 16, + public static UcdPropertyDetail Simple_Lowercase_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Simple_Lowercase_Mapping, VersionInfo.getInstance(1, 1, 0), 16, true, false, false, true); - public static UcdPropertyDetail Simple_Titlecase_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Simple_Titlecase_Mapping, VersionInfo.getInstance(1,1,0), 17, + public static UcdPropertyDetail Simple_Titlecase_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Simple_Titlecase_Mapping, VersionInfo.getInstance(1, 1, 0), 17, true, false, false, true); - public static UcdPropertyDetail Uppercase_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Uppercase_Mapping, VersionInfo.getInstance(1,1,0), 18, + public static UcdPropertyDetail Uppercase_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Uppercase_Mapping, VersionInfo.getInstance(1, 1, 0), 18, true, false, false, true); - public static UcdPropertyDetail Lowercase_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Lowercase_Mapping, VersionInfo.getInstance(1,1,0), 19, + public static UcdPropertyDetail Lowercase_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Lowercase_Mapping, VersionInfo.getInstance(1, 1, 0), 19, true, false, false, true); - public static UcdPropertyDetail Titlecase_Mapping_Detail = new UcdPropertyDetail ( - UcdProperty.Titlecase_Mapping, VersionInfo.getInstance(1,1,0), 20, + public static UcdPropertyDetail Titlecase_Mapping_Detail = new UcdPropertyDetail( + UcdProperty.Titlecase_Mapping, VersionInfo.getInstance(1, 1, 0), 20, true, false, false, true); -// public static UcdPropertyDetail Special_Case_Condition_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail Special_Case_Condition_Detail = new UcdPropertyDetail ( // UcdProperty.Special_Case_Condition, VersionInfo.getInstance(1,1,0), 21, // true, false, false, true); - public static UcdPropertyDetail Simple_Case_Folding_Detail = new UcdPropertyDetail ( - UcdProperty.Simple_Case_Folding, VersionInfo.getInstance(1,1,0), 22, + public static UcdPropertyDetail Simple_Case_Folding_Detail = new UcdPropertyDetail( + UcdProperty.Simple_Case_Folding, VersionInfo.getInstance(1, 1, 0), 22, true, false, false, true); - public static UcdPropertyDetail Case_Folding_Detail = new UcdPropertyDetail ( - UcdProperty.Case_Folding, VersionInfo.getInstance(1,1,0), 23, + public static UcdPropertyDetail Case_Folding_Detail = new UcdPropertyDetail( + UcdProperty.Case_Folding, VersionInfo.getInstance(1, 1, 0), 23, true, false, false, true); - public static UcdPropertyDetail Joining_Type_Detail = new UcdPropertyDetail ( - UcdProperty.Joining_Type, VersionInfo.getInstance(1,1,0), 24, + public static UcdPropertyDetail Joining_Type_Detail = new UcdPropertyDetail( + UcdProperty.Joining_Type, VersionInfo.getInstance(1, 1, 0), 24, true, false, false, true); - public static UcdPropertyDetail Joining_Group_Detail = new UcdPropertyDetail ( - UcdProperty.Joining_Group, VersionInfo.getInstance(1,1,0), 25, + public static UcdPropertyDetail Joining_Group_Detail = new UcdPropertyDetail( + UcdProperty.Joining_Group, VersionInfo.getInstance(1, 1, 0), 25, true, false, false, true); - public static UcdPropertyDetail East_Asian_Width_Detail = new UcdPropertyDetail ( - UcdProperty.East_Asian_Width, VersionInfo.getInstance(1,1,0), 26, + public static UcdPropertyDetail East_Asian_Width_Detail = new UcdPropertyDetail( + UcdProperty.East_Asian_Width, VersionInfo.getInstance(1, 1, 0), 26, true, false, false, true); - public static UcdPropertyDetail Line_Break_Detail = new UcdPropertyDetail ( - UcdProperty.Line_Break, VersionInfo.getInstance(1,1,0), 27, + public static UcdPropertyDetail Line_Break_Detail = new UcdPropertyDetail( + UcdProperty.Line_Break, VersionInfo.getInstance(1, 1, 0), 27, true, false, false, true); - public static UcdPropertyDetail Script_Detail = new UcdPropertyDetail ( - UcdProperty.Script, VersionInfo.getInstance(1,1,0), 28, + public static UcdPropertyDetail Script_Detail = new UcdPropertyDetail( + UcdProperty.Script, VersionInfo.getInstance(1, 1, 0), 28, true, false, false, true); - public static UcdPropertyDetail Script_Extensions_Detail = new UcdPropertyDetail ( - UcdProperty.Script_Extensions, VersionInfo.getInstance(6,1,0), 29, + public static UcdPropertyDetail Script_Extensions_Detail = new UcdPropertyDetail( + UcdProperty.Script_Extensions, VersionInfo.getInstance(6, 1, 0), 29, true, false, false, true); - public static UcdPropertyDetail Dash_Detail = new UcdPropertyDetail ( - UcdProperty.Dash, VersionInfo.getInstance(1,1,0), 30, + public static UcdPropertyDetail Dash_Detail = new UcdPropertyDetail( + UcdProperty.Dash, VersionInfo.getInstance(1, 1, 0), 30, true, false, false, true); - public static UcdPropertyDetail White_Space_Detail = new UcdPropertyDetail ( - UcdProperty.White_Space, VersionInfo.getInstance(1,1,0), 31, + public static UcdPropertyDetail White_Space_Detail = new UcdPropertyDetail( + UcdProperty.White_Space, VersionInfo.getInstance(1, 1, 0), 31, true, false, false, true); - public static UcdPropertyDetail Hyphen_Detail = new UcdPropertyDetail ( - UcdProperty.Hyphen, VersionInfo.getInstance(1,1,0), 32, + public static UcdPropertyDetail Hyphen_Detail = new UcdPropertyDetail( + UcdProperty.Hyphen, VersionInfo.getInstance(1, 1, 0), 32, true, false, false, true); - public static UcdPropertyDetail Quotation_Mark_Detail = new UcdPropertyDetail ( - UcdProperty.Quotation_Mark, VersionInfo.getInstance(1,1,0), 33, + public static UcdPropertyDetail Quotation_Mark_Detail = new UcdPropertyDetail( + UcdProperty.Quotation_Mark, VersionInfo.getInstance(1, 1, 0), 33, true, false, false, true); - public static UcdPropertyDetail Radical_Detail = new UcdPropertyDetail ( - UcdProperty.Radical, VersionInfo.getInstance(1,1,0), 34, + public static UcdPropertyDetail Radical_Detail = new UcdPropertyDetail( + UcdProperty.Radical, VersionInfo.getInstance(1, 1, 0), 34, true, false, false, true); - public static UcdPropertyDetail Ideographic_Detail = new UcdPropertyDetail ( - UcdProperty.Ideographic, VersionInfo.getInstance(1,1,0), 35, + public static UcdPropertyDetail Ideographic_Detail = new UcdPropertyDetail( + UcdProperty.Ideographic, VersionInfo.getInstance(1, 1, 0), 35, true, false, false, true); - public static UcdPropertyDetail Unified_Ideograph_Detail = new UcdPropertyDetail ( - UcdProperty.Unified_Ideograph, VersionInfo.getInstance(1,1,0), 36, + public static UcdPropertyDetail Unified_Ideograph_Detail = new UcdPropertyDetail( + UcdProperty.Unified_Ideograph, VersionInfo.getInstance(1, 1, 0), 36, true, false, false, true); - public static UcdPropertyDetail IDS_Binary_Operator_Detail = new UcdPropertyDetail ( - UcdProperty.IDS_Binary_Operator, VersionInfo.getInstance(1,1,0), 37, + public static UcdPropertyDetail IDS_Binary_Operator_Detail = new UcdPropertyDetail( + UcdProperty.IDS_Binary_Operator, VersionInfo.getInstance(1, 1, 0), 37, true, false, false, true); - public static UcdPropertyDetail IDS_Trinary_Operator_Detail = new UcdPropertyDetail ( - UcdProperty.IDS_Trinary_Operator, VersionInfo.getInstance(1,1,0), 38, + public static UcdPropertyDetail IDS_Trinary_Operator_Detail = new UcdPropertyDetail( + UcdProperty.IDS_Trinary_Operator, VersionInfo.getInstance(1, 1, 0), 38, true, false, false, true); - public static UcdPropertyDetail Hangul_Syllable_Type_Detail = new UcdPropertyDetail ( - UcdProperty.Hangul_Syllable_Type, VersionInfo.getInstance(1,1,0), 39, + public static UcdPropertyDetail Hangul_Syllable_Type_Detail = new UcdPropertyDetail( + UcdProperty.Hangul_Syllable_Type, VersionInfo.getInstance(1, 1, 0), 39, true, false, false, true); - public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail ( - UcdProperty.Default_Ignorable_Code_Point, VersionInfo.getInstance(1,1,0), 40, + public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail( + UcdProperty.Default_Ignorable_Code_Point, VersionInfo.getInstance(1, 1, 0), 40, true, false, false, true); - public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail ( - UcdProperty.Other_Default_Ignorable_Code_Point, VersionInfo.getInstance(1,1,0), 41, + public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail( + UcdProperty.Other_Default_Ignorable_Code_Point, VersionInfo.getInstance(1, 1, 0), 41, true, false, false, true); - public static UcdPropertyDetail Alphabetic_Detail = new UcdPropertyDetail ( - UcdProperty.Alphabetic, VersionInfo.getInstance(1,1,0), 42, + public static UcdPropertyDetail Alphabetic_Detail = new UcdPropertyDetail( + UcdProperty.Alphabetic, VersionInfo.getInstance(1, 1, 0), 42, true, false, false, true); - public static UcdPropertyDetail Other_Alphabetic_Detail = new UcdPropertyDetail ( - UcdProperty.Other_Alphabetic, VersionInfo.getInstance(1,1,0), 43, + public static UcdPropertyDetail Other_Alphabetic_Detail = new UcdPropertyDetail( + UcdProperty.Other_Alphabetic, VersionInfo.getInstance(1, 1, 0), 43, true, false, false, true); - public static UcdPropertyDetail Uppercase_Detail = new UcdPropertyDetail ( - UcdProperty.Uppercase, VersionInfo.getInstance(1,1,0), 44, + public static UcdPropertyDetail Uppercase_Detail = new UcdPropertyDetail( + UcdProperty.Uppercase, VersionInfo.getInstance(1, 1, 0), 44, true, false, false, true); - public static UcdPropertyDetail Other_Uppercase_Detail = new UcdPropertyDetail ( - UcdProperty.Other_Uppercase, VersionInfo.getInstance(1,1,0), 45, + public static UcdPropertyDetail Other_Uppercase_Detail = new UcdPropertyDetail( + UcdProperty.Other_Uppercase, VersionInfo.getInstance(1, 1, 0), 45, true, false, false, true); - public static UcdPropertyDetail Lowercase_Detail = new UcdPropertyDetail ( - UcdProperty.Lowercase, VersionInfo.getInstance(1,1,0), 46, + public static UcdPropertyDetail Lowercase_Detail = new UcdPropertyDetail( + UcdProperty.Lowercase, VersionInfo.getInstance(1, 1, 0), 46, true, false, false, true); - public static UcdPropertyDetail Other_Lowercase_Detail = new UcdPropertyDetail ( - UcdProperty.Other_Lowercase, VersionInfo.getInstance(1,1,0), 47, + public static UcdPropertyDetail Other_Lowercase_Detail = new UcdPropertyDetail( + UcdProperty.Other_Lowercase, VersionInfo.getInstance(1, 1, 0), 47, true, false, false, true); - public static UcdPropertyDetail Math_Detail = new UcdPropertyDetail ( - UcdProperty.Math, VersionInfo.getInstance(1,1,0), 48, + public static UcdPropertyDetail Math_Detail = new UcdPropertyDetail( + UcdProperty.Math, VersionInfo.getInstance(1, 1, 0), 48, true, false, false, true); - public static UcdPropertyDetail Other_Math_Detail = new UcdPropertyDetail ( - UcdProperty.Other_Math, VersionInfo.getInstance(1,1,0), 49, + public static UcdPropertyDetail Other_Math_Detail = new UcdPropertyDetail( + UcdProperty.Other_Math, VersionInfo.getInstance(1, 1, 0), 49, true, false, false, true); - public static UcdPropertyDetail Hex_Digit_Detail = new UcdPropertyDetail ( - UcdProperty.Hex_Digit, VersionInfo.getInstance(1,1,0), 50, + public static UcdPropertyDetail Hex_Digit_Detail = new UcdPropertyDetail( + UcdProperty.Hex_Digit, VersionInfo.getInstance(1, 1, 0), 50, true, false, false, true); - public static UcdPropertyDetail ASCII_Hex_Digit_Detail = new UcdPropertyDetail ( - UcdProperty.ASCII_Hex_Digit, VersionInfo.getInstance(1,1,0), 51, + public static UcdPropertyDetail ASCII_Hex_Digit_Detail = new UcdPropertyDetail( + UcdProperty.ASCII_Hex_Digit, VersionInfo.getInstance(1, 1, 0), 51, true, false, false, true); - public static UcdPropertyDetail Noncharacter_Code_Point_Detail = new UcdPropertyDetail ( - UcdProperty.Noncharacter_Code_Point, VersionInfo.getInstance(1,1,0), 52, + public static UcdPropertyDetail Noncharacter_Code_Point_Detail = new UcdPropertyDetail( + UcdProperty.Noncharacter_Code_Point, VersionInfo.getInstance(1, 1, 0), 52, true, false, false, true); - public static UcdPropertyDetail Variation_Selector_Detail = new UcdPropertyDetail ( - UcdProperty.Variation_Selector, VersionInfo.getInstance(1,1,0), 53, + public static UcdPropertyDetail Variation_Selector_Detail = new UcdPropertyDetail( + UcdProperty.Variation_Selector, VersionInfo.getInstance(1, 1, 0), 53, true, false, false, true); - public static UcdPropertyDetail Bidi_Control_Detail = new UcdPropertyDetail ( - UcdProperty.Bidi_Control, VersionInfo.getInstance(1,1,0), 54, + public static UcdPropertyDetail Bidi_Control_Detail = new UcdPropertyDetail( + UcdProperty.Bidi_Control, VersionInfo.getInstance(1, 1, 0), 54, true, false, false, true); - public static UcdPropertyDetail Join_Control_Detail = new UcdPropertyDetail ( - UcdProperty.Join_Control, VersionInfo.getInstance(1,1,0), 55, + public static UcdPropertyDetail Join_Control_Detail = new UcdPropertyDetail( + UcdProperty.Join_Control, VersionInfo.getInstance(1, 1, 0), 55, true, false, false, true); - public static UcdPropertyDetail Grapheme_Base_Detail = new UcdPropertyDetail ( - UcdProperty.Grapheme_Base, VersionInfo.getInstance(1,1,0), 56, + public static UcdPropertyDetail Grapheme_Base_Detail = new UcdPropertyDetail( + UcdProperty.Grapheme_Base, VersionInfo.getInstance(1, 1, 0), 56, true, false, false, true); - public static UcdPropertyDetail Grapheme_Extend_Detail = new UcdPropertyDetail ( - UcdProperty.Grapheme_Extend, VersionInfo.getInstance(1,1,0), 57, + public static UcdPropertyDetail Grapheme_Extend_Detail = new UcdPropertyDetail( + UcdProperty.Grapheme_Extend, VersionInfo.getInstance(1, 1, 0), 57, true, false, false, true); - public static UcdPropertyDetail Other_Grapheme_Extend_Detail = new UcdPropertyDetail ( - UcdProperty.Other_Grapheme_Extend, VersionInfo.getInstance(1,1,0), 58, + public static UcdPropertyDetail Other_Grapheme_Extend_Detail = new UcdPropertyDetail( + UcdProperty.Other_Grapheme_Extend, VersionInfo.getInstance(1, 1, 0), 58, true, false, false, true); - public static UcdPropertyDetail Grapheme_Link_Detail = new UcdPropertyDetail ( - UcdProperty.Grapheme_Link, VersionInfo.getInstance(1,1,0), 59, + public static UcdPropertyDetail Grapheme_Link_Detail = new UcdPropertyDetail( + UcdProperty.Grapheme_Link, VersionInfo.getInstance(1, 1, 0), 59, true, false, false, true); - public static UcdPropertyDetail Sentence_Terminal_Detail = new UcdPropertyDetail ( - UcdProperty.Sentence_Terminal, VersionInfo.getInstance(1,1,0), 60, + public static UcdPropertyDetail Sentence_Terminal_Detail = new UcdPropertyDetail( + UcdProperty.Sentence_Terminal, VersionInfo.getInstance(1, 1, 0), 60, true, false, false, true); - public static UcdPropertyDetail Extender_Detail = new UcdPropertyDetail ( - UcdProperty.Extender, VersionInfo.getInstance(1,1,0), 61, + public static UcdPropertyDetail Extender_Detail = new UcdPropertyDetail( + UcdProperty.Extender, VersionInfo.getInstance(1, 1, 0), 61, true, false, false, true); - public static UcdPropertyDetail Terminal_Punctuation_Detail = new UcdPropertyDetail ( - UcdProperty.Terminal_Punctuation, VersionInfo.getInstance(1,1,0), 62, + public static UcdPropertyDetail Terminal_Punctuation_Detail = new UcdPropertyDetail( + UcdProperty.Terminal_Punctuation, VersionInfo.getInstance(1, 1, 0), 62, true, false, false, true); - public static UcdPropertyDetail Diacritic_Detail = new UcdPropertyDetail ( - UcdProperty.Diacritic, VersionInfo.getInstance(1,1,0), 63, + public static UcdPropertyDetail Diacritic_Detail = new UcdPropertyDetail( + UcdProperty.Diacritic, VersionInfo.getInstance(1, 1, 0), 63, true, false, false, true); - public static UcdPropertyDetail Deprecated_Detail = new UcdPropertyDetail ( - UcdProperty.Deprecated, VersionInfo.getInstance(1,1,0), 64, + public static UcdPropertyDetail Deprecated_Detail = new UcdPropertyDetail( + UcdProperty.Deprecated, VersionInfo.getInstance(1, 1, 0), 64, true, false, false, true); - public static UcdPropertyDetail ID_Start_Detail = new UcdPropertyDetail ( - UcdProperty.ID_Start, VersionInfo.getInstance(1,1,0), 65, + public static UcdPropertyDetail ID_Start_Detail = new UcdPropertyDetail( + UcdProperty.ID_Start, VersionInfo.getInstance(1, 1, 0), 65, true, false, false, true); - public static UcdPropertyDetail Other_ID_Start_Detail = new UcdPropertyDetail ( - UcdProperty.Other_ID_Start, VersionInfo.getInstance(1,1,0), 66, + public static UcdPropertyDetail Other_ID_Start_Detail = new UcdPropertyDetail( + UcdProperty.Other_ID_Start, VersionInfo.getInstance(1, 1, 0), 66, true, false, false, true); - public static UcdPropertyDetail XID_Start_Detail = new UcdPropertyDetail ( - UcdProperty.XID_Start, VersionInfo.getInstance(1,1,0), 67, + public static UcdPropertyDetail XID_Start_Detail = new UcdPropertyDetail( + UcdProperty.XID_Start, VersionInfo.getInstance(1, 1, 0), 67, true, false, false, true); - public static UcdPropertyDetail ID_Continue_Detail = new UcdPropertyDetail ( - UcdProperty.ID_Continue, VersionInfo.getInstance(1,1,0), 68, + public static UcdPropertyDetail ID_Continue_Detail = new UcdPropertyDetail( + UcdProperty.ID_Continue, VersionInfo.getInstance(1, 1, 0), 68, true, false, false, true); - public static UcdPropertyDetail Other_ID_Continue_Detail = new UcdPropertyDetail ( - UcdProperty.Other_ID_Continue, VersionInfo.getInstance(1,1,0), 69, + public static UcdPropertyDetail Other_ID_Continue_Detail = new UcdPropertyDetail( + UcdProperty.Other_ID_Continue, VersionInfo.getInstance(1, 1, 0), 69, true, false, false, true); - public static UcdPropertyDetail XID_Continue_Detail = new UcdPropertyDetail ( - UcdProperty.XID_Continue, VersionInfo.getInstance(1,1,0), 70, + public static UcdPropertyDetail XID_Continue_Detail = new UcdPropertyDetail( + UcdProperty.XID_Continue, VersionInfo.getInstance(1, 1, 0), 70, true, false, false, true); - public static UcdPropertyDetail Soft_Dotted_Detail = new UcdPropertyDetail ( - UcdProperty.Soft_Dotted, VersionInfo.getInstance(1,1,0), 71, + public static UcdPropertyDetail Soft_Dotted_Detail = new UcdPropertyDetail( + UcdProperty.Soft_Dotted, VersionInfo.getInstance(1, 1, 0), 71, true, false, false, true); - public static UcdPropertyDetail Logical_Order_Exception_Detail = new UcdPropertyDetail ( - UcdProperty.Logical_Order_Exception, VersionInfo.getInstance(1,1,0), 72, + public static UcdPropertyDetail Logical_Order_Exception_Detail = new UcdPropertyDetail( + UcdProperty.Logical_Order_Exception, VersionInfo.getInstance(1, 1, 0), 72, true, false, false, true); - public static UcdPropertyDetail Pattern_White_Space_Detail = new UcdPropertyDetail ( - UcdProperty.Pattern_White_Space, VersionInfo.getInstance(1,1,0), 73, + public static UcdPropertyDetail Pattern_White_Space_Detail = new UcdPropertyDetail( + UcdProperty.Pattern_White_Space, VersionInfo.getInstance(1, 1, 0), 73, true, false, false, true); - public static UcdPropertyDetail Pattern_Syntax_Detail = new UcdPropertyDetail ( - UcdProperty.Pattern_Syntax, VersionInfo.getInstance(1,1,0), 74, + public static UcdPropertyDetail Pattern_Syntax_Detail = new UcdPropertyDetail( + UcdProperty.Pattern_Syntax, VersionInfo.getInstance(1, 1, 0), 74, true, false, false, true); - public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = new UcdPropertyDetail ( - UcdProperty.Grapheme_Cluster_Break, VersionInfo.getInstance(1,1,0), 75, + public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = new UcdPropertyDetail( + UcdProperty.Grapheme_Cluster_Break, VersionInfo.getInstance(1, 1, 0), 75, true, false, false, true); - public static UcdPropertyDetail Word_Break_Detail = new UcdPropertyDetail ( - UcdProperty.Word_Break, VersionInfo.getInstance(1,1,0), 76, + public static UcdPropertyDetail Word_Break_Detail = new UcdPropertyDetail( + UcdProperty.Word_Break, VersionInfo.getInstance(1, 1, 0), 76, true, false, false, true); - public static UcdPropertyDetail Sentence_Break_Detail = new UcdPropertyDetail ( - UcdProperty.Sentence_Break, VersionInfo.getInstance(1,1,0), 77, + public static UcdPropertyDetail Sentence_Break_Detail = new UcdPropertyDetail( + UcdProperty.Sentence_Break, VersionInfo.getInstance(1, 1, 0), 77, true, false, false, true); - public static UcdPropertyDetail Composition_Exclusion_Detail = new UcdPropertyDetail ( - UcdProperty.Composition_Exclusion, VersionInfo.getInstance(1,1,0), 78, + public static UcdPropertyDetail Composition_Exclusion_Detail = new UcdPropertyDetail( + UcdProperty.Composition_Exclusion, VersionInfo.getInstance(1, 1, 0), 78, true, false, false, true); - public static UcdPropertyDetail Full_Composition_Exclusion_Detail = new UcdPropertyDetail ( - UcdProperty.Full_Composition_Exclusion, VersionInfo.getInstance(1,1,0), 79, + public static UcdPropertyDetail Full_Composition_Exclusion_Detail = new UcdPropertyDetail( + UcdProperty.Full_Composition_Exclusion, VersionInfo.getInstance(1, 1, 0), 79, true, false, false, true); - public static UcdPropertyDetail NFC_Quick_Check_Detail = new UcdPropertyDetail ( - UcdProperty.NFC_Quick_Check, VersionInfo.getInstance(1,1,0), 80, + public static UcdPropertyDetail NFC_Quick_Check_Detail = new UcdPropertyDetail( + UcdProperty.NFC_Quick_Check, VersionInfo.getInstance(1, 1, 0), 80, true, false, false, true); - public static UcdPropertyDetail NFD_Quick_Check_Detail = new UcdPropertyDetail ( - UcdProperty.NFD_Quick_Check, VersionInfo.getInstance(1,1,0), 81, + public static UcdPropertyDetail NFD_Quick_Check_Detail = new UcdPropertyDetail( + UcdProperty.NFD_Quick_Check, VersionInfo.getInstance(1, 1, 0), 81, true, false, false, true); - public static UcdPropertyDetail NFKC_Quick_Check_Detail = new UcdPropertyDetail ( - UcdProperty.NFKC_Quick_Check, VersionInfo.getInstance(1,1,0), 82, + public static UcdPropertyDetail NFKC_Quick_Check_Detail = new UcdPropertyDetail( + UcdProperty.NFKC_Quick_Check, VersionInfo.getInstance(1, 1, 0), 82, true, false, false, true); - public static UcdPropertyDetail NFKD_Quick_Check_Detail = new UcdPropertyDetail ( - UcdProperty.NFKD_Quick_Check, VersionInfo.getInstance(1,1,0), 83, + public static UcdPropertyDetail NFKD_Quick_Check_Detail = new UcdPropertyDetail( + UcdProperty.NFKD_Quick_Check, VersionInfo.getInstance(1, 1, 0), 83, true, false, false, true); - public static UcdPropertyDetail Expands_On_NFC_Detail = new UcdPropertyDetail ( - UcdProperty.Expands_On_NFC, VersionInfo.getInstance(1,1,0), 84, + public static UcdPropertyDetail Expands_On_NFC_Detail = new UcdPropertyDetail( + UcdProperty.Expands_On_NFC, VersionInfo.getInstance(1, 1, 0), 84, true, false, false, true); - public static UcdPropertyDetail Expands_On_NFD_Detail = new UcdPropertyDetail ( - UcdProperty.Expands_On_NFD, VersionInfo.getInstance(1,1,0), 85, + public static UcdPropertyDetail Expands_On_NFD_Detail = new UcdPropertyDetail( + UcdProperty.Expands_On_NFD, VersionInfo.getInstance(1, 1, 0), 85, true, false, false, true); - public static UcdPropertyDetail Expands_On_NFKC_Detail = new UcdPropertyDetail ( - UcdProperty.Expands_On_NFKC, VersionInfo.getInstance(1,1,0), 86, + public static UcdPropertyDetail Expands_On_NFKC_Detail = new UcdPropertyDetail( + UcdProperty.Expands_On_NFKC, VersionInfo.getInstance(1, 1, 0), 86, true, false, false, true); - public static UcdPropertyDetail Expands_On_NFKD_Detail = new UcdPropertyDetail ( - UcdProperty.Expands_On_NFKD, VersionInfo.getInstance(1,1,0), 87, + public static UcdPropertyDetail Expands_On_NFKD_Detail = new UcdPropertyDetail( + UcdProperty.Expands_On_NFKD, VersionInfo.getInstance(1, 1, 0), 87, true, false, false, true); - public static UcdPropertyDetail FC_NFC_Closure_Detail = new UcdPropertyDetail ( - UcdProperty.FC_NFKC_Closure, VersionInfo.getInstance(1,1,0), 88, + public static UcdPropertyDetail FC_NFC_Closure_Detail = new UcdPropertyDetail( + UcdProperty.FC_NFKC_Closure, VersionInfo.getInstance(1, 1, 0), 88, true, false, false, true); - public static UcdPropertyDetail Case_Ignorable_Detail = new UcdPropertyDetail ( - UcdProperty.Case_Ignorable, VersionInfo.getInstance(5,2,0), 89, + public static UcdPropertyDetail Case_Ignorable_Detail = new UcdPropertyDetail( + UcdProperty.Case_Ignorable, VersionInfo.getInstance(5, 2, 0), 89, true, false, false, true); - public static UcdPropertyDetail Cased_Detail = new UcdPropertyDetail ( - UcdProperty.Cased, VersionInfo.getInstance(5,2,0), 90, + public static UcdPropertyDetail Cased_Detail = new UcdPropertyDetail( + UcdProperty.Cased, VersionInfo.getInstance(5, 2, 0), 90, true, false, false, true); - public static UcdPropertyDetail Changes_When_CaseFolded_Detail = new UcdPropertyDetail ( - UcdProperty.Changes_When_Casefolded, VersionInfo.getInstance(5,2,0), 91, + public static UcdPropertyDetail Changes_When_CaseFolded_Detail = new UcdPropertyDetail( + UcdProperty.Changes_When_Casefolded, VersionInfo.getInstance(5, 2, 0), 91, true, false, false, true); - public static UcdPropertyDetail Changes_When_CaseMapped_Detail = new UcdPropertyDetail ( - UcdProperty.Changes_When_Casemapped, VersionInfo.getInstance(5,2,0), 92, + public static UcdPropertyDetail Changes_When_CaseMapped_Detail = new UcdPropertyDetail( + UcdProperty.Changes_When_Casemapped, VersionInfo.getInstance(5, 2, 0), 92, true, false, false, true); - public static UcdPropertyDetail Changes_When_NFKC_Casefolded_Detail = new UcdPropertyDetail ( - UcdProperty.Changes_When_NFKC_Casefolded, VersionInfo.getInstance(5,2,0), 93, + public static UcdPropertyDetail Changes_When_NFKC_Casefolded_Detail = new UcdPropertyDetail( + UcdProperty.Changes_When_NFKC_Casefolded, VersionInfo.getInstance(5, 2, 0), 93, true, false, false, true); - public static UcdPropertyDetail Changes_When_Lowercased_Detail = new UcdPropertyDetail ( - UcdProperty.Changes_When_Lowercased, VersionInfo.getInstance(5,2,0), 94, + public static UcdPropertyDetail Changes_When_Lowercased_Detail = new UcdPropertyDetail( + UcdProperty.Changes_When_Lowercased, VersionInfo.getInstance(5, 2, 0), 94, true, false, false, true); - public static UcdPropertyDetail Changes_When_Titlecased_Detail = new UcdPropertyDetail ( - UcdProperty.Changes_When_Titlecased, VersionInfo.getInstance(5,2,0), 95, + public static UcdPropertyDetail Changes_When_Titlecased_Detail = new UcdPropertyDetail( + UcdProperty.Changes_When_Titlecased, VersionInfo.getInstance(5, 2, 0), 95, true, false, false, true); - public static UcdPropertyDetail Changes_When_Uppercased_Detail = new UcdPropertyDetail ( - UcdProperty.Changes_When_Uppercased, VersionInfo.getInstance(5,2,0), 96, + public static UcdPropertyDetail Changes_When_Uppercased_Detail = new UcdPropertyDetail( + UcdProperty.Changes_When_Uppercased, VersionInfo.getInstance(5, 2, 0), 96, true, false, false, true); - public static UcdPropertyDetail NFKC_Casefold_Detail = new UcdPropertyDetail ( - UcdProperty.NFKC_Casefold, VersionInfo.getInstance(5,2,0), 97, + public static UcdPropertyDetail NFKC_Casefold_Detail = new UcdPropertyDetail( + UcdProperty.NFKC_Casefold, VersionInfo.getInstance(5, 2, 0), 97, true, false, false, true); - public static UcdPropertyDetail Indic_Syllabic_Category_Detail = new UcdPropertyDetail ( - UcdProperty.Indic_Syllabic_Category, VersionInfo.getInstance(6,0,0), 98, + public static UcdPropertyDetail Indic_Syllabic_Category_Detail = new UcdPropertyDetail( + UcdProperty.Indic_Syllabic_Category, VersionInfo.getInstance(6, 0, 0), 98, true, false, false, true); -// public static UcdPropertyDetail Indic_Matra_Category_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail Indic_Matra_Category_Detail = new UcdPropertyDetail ( // UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,0,0), VersionInfo.getInstance(7,0,0), 99, // true, false, false, true); - public static UcdPropertyDetail Indic_Positional_Category_Detail = new UcdPropertyDetail ( - UcdProperty.Indic_Positional_Category, VersionInfo.getInstance(8,0,0), 100, + public static UcdPropertyDetail Indic_Positional_Category_Detail = new UcdPropertyDetail( + UcdProperty.Indic_Positional_Category, VersionInfo.getInstance(8, 0, 0), 100, true, false, false, true); - public static UcdPropertyDetail kJa_Detail = new UcdPropertyDetail ( - UcdProperty.kJa, VersionInfo.getInstance(8,0,0), 101, + public static UcdPropertyDetail kJa_Detail = new UcdPropertyDetail( + UcdProperty.kJa, VersionInfo.getInstance(8, 0, 0), 101, false, true, false, true); - public static UcdPropertyDetail Prepended_Concatenation_Mark_Detail = new UcdPropertyDetail ( - UcdProperty.Prepended_Concatenation_Mark, VersionInfo.getInstance(9,0,0), 102, + public static UcdPropertyDetail Prepended_Concatenation_Mark_Detail = new UcdPropertyDetail( + UcdProperty.Prepended_Concatenation_Mark, VersionInfo.getInstance(9, 0, 0), 102, true, false, false, true); - public static UcdPropertyDetail Vertical_Orientation_Detail = new UcdPropertyDetail ( - UcdProperty.Vertical_Orientation, VersionInfo.getInstance(10,0,0), 103, + public static UcdPropertyDetail Vertical_Orientation_Detail = new UcdPropertyDetail( + UcdProperty.Vertical_Orientation, VersionInfo.getInstance(10, 0, 0), 103, true, false, false, true); - public static UcdPropertyDetail Regional_Indicator_Detail = new UcdPropertyDetail ( - UcdProperty.Regional_Indicator, VersionInfo.getInstance(10,0,0), 104, + public static UcdPropertyDetail Regional_Indicator_Detail = new UcdPropertyDetail( + UcdProperty.Regional_Indicator, VersionInfo.getInstance(10, 0, 0), 104, true, false, false, true); - public static UcdPropertyDetail Block_Detail = new UcdPropertyDetail ( - UcdProperty.Block, VersionInfo.getInstance(10,0,0), 105, + public static UcdPropertyDetail Block_Detail = new UcdPropertyDetail( + UcdProperty.Block, VersionInfo.getInstance(10, 0, 0), 105, true, false, false, true); - public static UcdPropertyDetail Equivalent_Unified_Ideograph_Detail = new UcdPropertyDetail ( - UcdProperty.Equivalent_Unified_Ideograph, VersionInfo.getInstance(11,0,0), 106, + public static UcdPropertyDetail Equivalent_Unified_Ideograph_Detail = new UcdPropertyDetail( + UcdProperty.Equivalent_Unified_Ideograph, VersionInfo.getInstance(11, 0, 0), 106, false, true, false, true); - public static UcdPropertyDetail kCompatibilityVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kCompatibilityVariant, VersionInfo.getInstance(11,0,0), 107, + public static UcdPropertyDetail kCompatibilityVariant_Detail = new UcdPropertyDetail( + UcdProperty.kCompatibilityVariant, VersionInfo.getInstance(11, 0, 0), 107, false, true, true, true); - public static UcdPropertyDetail kRSUnicode_Detail = new UcdPropertyDetail ( - UcdProperty.kRSUnicode, VersionInfo.getInstance(11,0,0), 108, + public static UcdPropertyDetail kRSUnicode_Detail = new UcdPropertyDetail( + UcdProperty.kRSUnicode, VersionInfo.getInstance(11, 0, 0), 108, false, true, false, true); -// public static UcdPropertyDetail kIRG_RSIndex_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail kIRG_RSIndex_Detail = new UcdPropertyDetail ( // UcdProperty.kIRG_RSIndex, VersionInfo.getInstance(11,0,0), 109, // false, true, false, true); - public static UcdPropertyDetail kIRG_GSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_GSource, VersionInfo.getInstance(11,0,0), 110, + public static UcdPropertyDetail kIRG_GSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_GSource, VersionInfo.getInstance(11, 0, 0), 110, false, true, true, true); - public static UcdPropertyDetail kIRG_TSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_TSource, VersionInfo.getInstance(11,0,0), 111, + public static UcdPropertyDetail kIRG_TSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_TSource, VersionInfo.getInstance(11, 0, 0), 111, false, true, true, true); - public static UcdPropertyDetail kIRG_JSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_JSource, VersionInfo.getInstance(11,0,0), 112, + public static UcdPropertyDetail kIRG_JSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_JSource, VersionInfo.getInstance(11, 0, 0), 112, false, true, true, true); - public static UcdPropertyDetail kIRG_KSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_KSource, VersionInfo.getInstance(11,0,0), 113, + public static UcdPropertyDetail kIRG_KSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_KSource, VersionInfo.getInstance(11, 0, 0), 113, false, true, true, true); - public static UcdPropertyDetail kIRG_KPSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_KPSource, VersionInfo.getInstance(11,0,0), 114, + public static UcdPropertyDetail kIRG_KPSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_KPSource, VersionInfo.getInstance(11, 0, 0), 114, false, true, true, true); - public static UcdPropertyDetail kIRG_VSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_VSource, VersionInfo.getInstance(11,0,0), 115, + public static UcdPropertyDetail kIRG_VSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_VSource, VersionInfo.getInstance(11, 0, 0), 115, false, true, true, true); - public static UcdPropertyDetail kIRG_HSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_HSource, VersionInfo.getInstance(11,0,0), 116, + public static UcdPropertyDetail kIRG_HSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_HSource, VersionInfo.getInstance(11, 0, 0), 116, false, true, true, true); - public static UcdPropertyDetail kIRG_USource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_USource, VersionInfo.getInstance(11,0,0), 117, + public static UcdPropertyDetail kIRG_USource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_USource, VersionInfo.getInstance(11, 0, 0), 117, false, true, true, true); - public static UcdPropertyDetail kIRG_MSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_MSource, VersionInfo.getInstance(11,0,0), 118, + public static UcdPropertyDetail kIRG_MSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_MSource, VersionInfo.getInstance(11, 0, 0), 118, false, true, true, true); - public static UcdPropertyDetail kIRG_UKSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_UKSource, VersionInfo.getInstance(13,0,0), 119, + public static UcdPropertyDetail kIRG_UKSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_UKSource, VersionInfo.getInstance(13, 0, 0), 119, false, true, true, true); - public static UcdPropertyDetail kIRG_SSource_Detail = new UcdPropertyDetail ( - UcdProperty.kIRG_SSource, VersionInfo.getInstance(13,0,0), 120, + public static UcdPropertyDetail kIRG_SSource_Detail = new UcdPropertyDetail( + UcdProperty.kIRG_SSource, VersionInfo.getInstance(13, 0, 0), 120, false, true, true, true); - public static UcdPropertyDetail kIICore_Detail = new UcdPropertyDetail ( - UcdProperty.kIICore, VersionInfo.getInstance(11,0,0), 121, + public static UcdPropertyDetail kIICore_Detail = new UcdPropertyDetail( + UcdProperty.kIICore, VersionInfo.getInstance(11, 0, 0), 121, false, true, false, true); - public static UcdPropertyDetail kUnihanCore2020_Detail = new UcdPropertyDetail ( - UcdProperty.kUnihanCore2020, VersionInfo.getInstance(11,0,0), 122, + public static UcdPropertyDetail kUnihanCore2020_Detail = new UcdPropertyDetail( + UcdProperty.kUnihanCore2020, VersionInfo.getInstance(11, 0, 0), 122, false, true, false, true); - public static UcdPropertyDetail kGB0_Detail = new UcdPropertyDetail ( - UcdProperty.kGB0, VersionInfo.getInstance(11,0,0), 123, + public static UcdPropertyDetail kGB0_Detail = new UcdPropertyDetail( + UcdProperty.kGB0, VersionInfo.getInstance(11, 0, 0), 123, false, true, false, true); - public static UcdPropertyDetail kGB1_Detail = new UcdPropertyDetail ( - UcdProperty.kGB1, VersionInfo.getInstance(11,0,0), 124, + public static UcdPropertyDetail kGB1_Detail = new UcdPropertyDetail( + UcdProperty.kGB1, VersionInfo.getInstance(11, 0, 0), 124, false, true, false, true); - public static UcdPropertyDetail kGB3_Detail = new UcdPropertyDetail ( - UcdProperty.kGB3, VersionInfo.getInstance(11,0,0), 125, + public static UcdPropertyDetail kGB3_Detail = new UcdPropertyDetail( + UcdProperty.kGB3, VersionInfo.getInstance(11, 0, 0), 125, false, true, false, true); - public static UcdPropertyDetail kGB5_Detail = new UcdPropertyDetail ( - UcdProperty.kGB5, VersionInfo.getInstance(11,0,0), 126, + public static UcdPropertyDetail kGB5_Detail = new UcdPropertyDetail( + UcdProperty.kGB5, VersionInfo.getInstance(11, 0, 0), 126, false, true, false, true); - public static UcdPropertyDetail kGB7_Detail = new UcdPropertyDetail ( - UcdProperty.kGB7, VersionInfo.getInstance(11,0,0), 127, + public static UcdPropertyDetail kGB7_Detail = new UcdPropertyDetail( + UcdProperty.kGB7, VersionInfo.getInstance(11, 0, 0), 127, false, true, false, true); - public static UcdPropertyDetail kGB8_Detail = new UcdPropertyDetail ( - UcdProperty.kGB8, VersionInfo.getInstance(11,0,0), 128, + public static UcdPropertyDetail kGB8_Detail = new UcdPropertyDetail( + UcdProperty.kGB8, VersionInfo.getInstance(11, 0, 0), 128, false, true, false, true); - public static UcdPropertyDetail kCNS1986_Detail = new UcdPropertyDetail ( - UcdProperty.kCNS1986, VersionInfo.getInstance(11,0,0), 129, + public static UcdPropertyDetail kCNS1986_Detail = new UcdPropertyDetail( + UcdProperty.kCNS1986, VersionInfo.getInstance(11, 0, 0), 129, false, true, false, true); - public static UcdPropertyDetail kCNS1992_Detail = new UcdPropertyDetail ( - UcdProperty.kCNS1992, VersionInfo.getInstance(11,0,0), 130, + public static UcdPropertyDetail kCNS1992_Detail = new UcdPropertyDetail( + UcdProperty.kCNS1992, VersionInfo.getInstance(11, 0, 0), 130, false, true, false, true); - public static UcdPropertyDetail kJis0_Detail = new UcdPropertyDetail ( - UcdProperty.kJis0, VersionInfo.getInstance(11,0,0), 131, + public static UcdPropertyDetail kJis0_Detail = new UcdPropertyDetail( + UcdProperty.kJis0, VersionInfo.getInstance(11, 0, 0), 131, false, true, false, true); - public static UcdPropertyDetail kJis1_Detail = new UcdPropertyDetail ( - UcdProperty.kJis1, VersionInfo.getInstance(11,0,0), 132, + public static UcdPropertyDetail kJis1_Detail = new UcdPropertyDetail( + UcdProperty.kJis1, VersionInfo.getInstance(11, 0, 0), 132, false, true, false, true); - public static UcdPropertyDetail kJIS0213_Detail = new UcdPropertyDetail ( - UcdProperty.kJIS0213, VersionInfo.getInstance(11,0,0), 133, + public static UcdPropertyDetail kJIS0213_Detail = new UcdPropertyDetail( + UcdProperty.kJIS0213, VersionInfo.getInstance(11, 0, 0), 133, false, true, false, true); - public static UcdPropertyDetail kKSC0_Detail = new UcdPropertyDetail ( - UcdProperty.kKSC0, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 134, + public static UcdPropertyDetail kKSC0_Detail = new UcdPropertyDetail( + UcdProperty.kKSC0, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 134, false, true, false, true); - public static UcdPropertyDetail kKSC1_Detail = new UcdPropertyDetail ( - UcdProperty.kKSC1, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 135, + public static UcdPropertyDetail kKSC1_Detail = new UcdPropertyDetail( + UcdProperty.kKSC1, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 135, false, true, false, true); - public static UcdPropertyDetail kKPS0_Detail = new UcdPropertyDetail ( - UcdProperty.kKPS0, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 136, + public static UcdPropertyDetail kKPS0_Detail = new UcdPropertyDetail( + UcdProperty.kKPS0, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 136, false, true, false, true); - public static UcdPropertyDetail kKPS1_Detail = new UcdPropertyDetail ( - UcdProperty.kKPS1, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 137, + public static UcdPropertyDetail kKPS1_Detail = new UcdPropertyDetail( + UcdProperty.kKPS1, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 137, false, true, false, true); - public static UcdPropertyDetail kHKSCS_Detail = new UcdPropertyDetail ( - UcdProperty.kHKSCS, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 138, + public static UcdPropertyDetail kHKSCS_Detail = new UcdPropertyDetail( + UcdProperty.kHKSCS, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 138, false, true, false, true); - public static UcdPropertyDetail kCantonese_Detail = new UcdPropertyDetail ( - UcdProperty.kCantonese, VersionInfo.getInstance(11,0,0), 139, + public static UcdPropertyDetail kCantonese_Detail = new UcdPropertyDetail( + UcdProperty.kCantonese, VersionInfo.getInstance(11, 0, 0), 139, false, true, false, true); - public static UcdPropertyDetail kHangul_Detail = new UcdPropertyDetail ( - UcdProperty.kHangul, VersionInfo.getInstance(11,0,0), 140, + public static UcdPropertyDetail kHangul_Detail = new UcdPropertyDetail( + UcdProperty.kHangul, VersionInfo.getInstance(11, 0, 0), 140, false, true, false, true); - public static UcdPropertyDetail kDefinition_Detail = new UcdPropertyDetail ( - UcdProperty.kDefinition, VersionInfo.getInstance(11,0,0), 141, + public static UcdPropertyDetail kDefinition_Detail = new UcdPropertyDetail( + UcdProperty.kDefinition, VersionInfo.getInstance(11, 0, 0), 141, false, true, false, true); - public static UcdPropertyDetail kHanYu_Detail = new UcdPropertyDetail ( - UcdProperty.kHanYu, VersionInfo.getInstance(11,0,0), 142, + public static UcdPropertyDetail kHanYu_Detail = new UcdPropertyDetail( + UcdProperty.kHanYu, VersionInfo.getInstance(11, 0, 0), 142, false, true, false, true); -// public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( // UcdProperty.kAlternateHanYu, VersionInfo.getInstance(11,0,0), 143, // false, true, false, true); - public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail ( - UcdProperty.kMandarin, VersionInfo.getInstance(11,0,0), 144, + public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail( + UcdProperty.kMandarin, VersionInfo.getInstance(11, 0, 0), 144, false, true, false, true); - public static UcdPropertyDetail kCihaiT_Detail = new UcdPropertyDetail ( - UcdProperty.kCihaiT, VersionInfo.getInstance(11,0,0), 145, + public static UcdPropertyDetail kCihaiT_Detail = new UcdPropertyDetail( + UcdProperty.kCihaiT, VersionInfo.getInstance(11, 0, 0), 145, false, true, false, true); - public static UcdPropertyDetail kSBGY_Detail = new UcdPropertyDetail ( - UcdProperty.kSBGY, VersionInfo.getInstance(11,0,0), 146, + public static UcdPropertyDetail kSBGY_Detail = new UcdPropertyDetail( + UcdProperty.kSBGY, VersionInfo.getInstance(11, 0, 0), 146, false, true, false, true); - public static UcdPropertyDetail kNelson_Detail = new UcdPropertyDetail ( - UcdProperty.kNelson, VersionInfo.getInstance(11,0,0), 147, + public static UcdPropertyDetail kNelson_Detail = new UcdPropertyDetail( + UcdProperty.kNelson, VersionInfo.getInstance(11, 0, 0), 147, false, true, false, true); - public static UcdPropertyDetail kCowles_Detail = new UcdPropertyDetail ( - UcdProperty.kCowles, VersionInfo.getInstance(11,0,0), 148, + public static UcdPropertyDetail kCowles_Detail = new UcdPropertyDetail( + UcdProperty.kCowles, VersionInfo.getInstance(11, 0, 0), 148, false, true, false, true); - public static UcdPropertyDetail kMatthews_Detail = new UcdPropertyDetail ( - UcdProperty.kMatthews, VersionInfo.getInstance(11,0,0), 149, + public static UcdPropertyDetail kMatthews_Detail = new UcdPropertyDetail( + UcdProperty.kMatthews, VersionInfo.getInstance(11, 0, 0), 149, false, true, false, true); - public static UcdPropertyDetail kOtherNumeric_Detail = new UcdPropertyDetail ( - UcdProperty.kOtherNumeric, VersionInfo.getInstance(11,0,0), 150, + public static UcdPropertyDetail kOtherNumeric_Detail = new UcdPropertyDetail( + UcdProperty.kOtherNumeric, VersionInfo.getInstance(11, 0, 0), 150, false, true, false, true); - public static UcdPropertyDetail kPhonetic_Detail = new UcdPropertyDetail ( - UcdProperty.kPhonetic, VersionInfo.getInstance(11,0,0), 151, + public static UcdPropertyDetail kPhonetic_Detail = new UcdPropertyDetail( + UcdProperty.kPhonetic, VersionInfo.getInstance(11, 0, 0), 151, false, true, false, true); - public static UcdPropertyDetail kGSR_Detail = new UcdPropertyDetail ( - UcdProperty.kGSR, VersionInfo.getInstance(11,0,0), 152, + public static UcdPropertyDetail kGSR_Detail = new UcdPropertyDetail( + UcdProperty.kGSR, VersionInfo.getInstance(11, 0, 0), 152, false, true, false, true); - public static UcdPropertyDetail kFenn_Detail = new UcdPropertyDetail ( - UcdProperty.kFenn, VersionInfo.getInstance(11,0,0), 153, + public static UcdPropertyDetail kFenn_Detail = new UcdPropertyDetail( + UcdProperty.kFenn, VersionInfo.getInstance(11, 0, 0), 153, false, true, false, true); - public static UcdPropertyDetail kFennIndex_Detail = new UcdPropertyDetail ( - UcdProperty.kFennIndex, VersionInfo.getInstance(11,0,0), 154, + public static UcdPropertyDetail kFennIndex_Detail = new UcdPropertyDetail( + UcdProperty.kFennIndex, VersionInfo.getInstance(11, 0, 0), 154, false, true, false, true); - public static UcdPropertyDetail kKarlgren_Detail = new UcdPropertyDetail ( - UcdProperty.kKarlgren, VersionInfo.getInstance(11,0,0), 155, + public static UcdPropertyDetail kKarlgren_Detail = new UcdPropertyDetail( + UcdProperty.kKarlgren, VersionInfo.getInstance(11, 0, 0), 155, false, true, false, true); - public static UcdPropertyDetail kCangjie_Detail = new UcdPropertyDetail ( - UcdProperty.kCangjie, VersionInfo.getInstance(11,0,0), 156, + public static UcdPropertyDetail kCangjie_Detail = new UcdPropertyDetail( + UcdProperty.kCangjie, VersionInfo.getInstance(11, 0, 0), 156, false, true, false, true); - public static UcdPropertyDetail kMeyerWempe_Detail = new UcdPropertyDetail ( - UcdProperty.kMeyerWempe, VersionInfo.getInstance(11,0,0), 157, + public static UcdPropertyDetail kMeyerWempe_Detail = new UcdPropertyDetail( + UcdProperty.kMeyerWempe, VersionInfo.getInstance(11, 0, 0), 157, false, true, false, true); - public static UcdPropertyDetail kSimplifiedVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kSimplifiedVariant, VersionInfo.getInstance(11,0,0), 158, + public static UcdPropertyDetail kSimplifiedVariant_Detail = new UcdPropertyDetail( + UcdProperty.kSimplifiedVariant, VersionInfo.getInstance(11, 0, 0), 158, false, true, false, true); - public static UcdPropertyDetail kTraditionalVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kTraditionalVariant, VersionInfo.getInstance(11,0,0), 159, + public static UcdPropertyDetail kTraditionalVariant_Detail = new UcdPropertyDetail( + UcdProperty.kTraditionalVariant, VersionInfo.getInstance(11, 0, 0), 159, false, true, false, true); - public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kSpecializedSemanticVariant, VersionInfo.getInstance(11,0,0), 160, + public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = new UcdPropertyDetail( + UcdProperty.kSpecializedSemanticVariant, VersionInfo.getInstance(11, 0, 0), 160, false, true, false, true); - public static UcdPropertyDetail kSemanticVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kSemanticVariant, VersionInfo.getInstance(11,0,0), 161, + public static UcdPropertyDetail kSemanticVariant_Detail = new UcdPropertyDetail( + UcdProperty.kSemanticVariant, VersionInfo.getInstance(11, 0, 0), 161, false, true, false, true); - public static UcdPropertyDetail kVietnamese_Detail = new UcdPropertyDetail ( - UcdProperty.kVietnamese, VersionInfo.getInstance(11,0,0), 162, + public static UcdPropertyDetail kVietnamese_Detail = new UcdPropertyDetail( + UcdProperty.kVietnamese, VersionInfo.getInstance(11, 0, 0), 162, false, true, false, true); - public static UcdPropertyDetail kLau_Detail = new UcdPropertyDetail ( - UcdProperty.kLau, VersionInfo.getInstance(11,0,0), 163, + public static UcdPropertyDetail kLau_Detail = new UcdPropertyDetail( + UcdProperty.kLau, VersionInfo.getInstance(11, 0, 0), 163, false, true, false, true); - public static UcdPropertyDetail kTang_Detail = new UcdPropertyDetail ( - UcdProperty.kTang, VersionInfo.getInstance(11,0,0), 164, + public static UcdPropertyDetail kTang_Detail = new UcdPropertyDetail( + UcdProperty.kTang, VersionInfo.getInstance(11, 0, 0), 164, false, true, false, true); - public static UcdPropertyDetail kZVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kZVariant, VersionInfo.getInstance(11,0,0), 165, + public static UcdPropertyDetail kZVariant_Detail = new UcdPropertyDetail( + UcdProperty.kZVariant, VersionInfo.getInstance(11, 0, 0), 165, false, true, false, true); - public static UcdPropertyDetail kJapaneseKun_Detail = new UcdPropertyDetail ( - UcdProperty.kJapaneseKun, VersionInfo.getInstance(11,0,0), 166, + public static UcdPropertyDetail kJapaneseKun_Detail = new UcdPropertyDetail( + UcdProperty.kJapaneseKun, VersionInfo.getInstance(11, 0, 0), 166, false, true, false, true); - public static UcdPropertyDetail kJapaneseOn_Detail = new UcdPropertyDetail ( - UcdProperty.kJapaneseOn, VersionInfo.getInstance(11,0,0), 167, + public static UcdPropertyDetail kJapaneseOn_Detail = new UcdPropertyDetail( + UcdProperty.kJapaneseOn, VersionInfo.getInstance(11, 0, 0), 167, false, true, false, true); - public static UcdPropertyDetail kKangXi_Detail = new UcdPropertyDetail ( - UcdProperty.kKangXi, VersionInfo.getInstance(11,0,0), 168, + public static UcdPropertyDetail kKangXi_Detail = new UcdPropertyDetail( + UcdProperty.kKangXi, VersionInfo.getInstance(11, 0, 0), 168, false, true, false, true); -// public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( // UcdProperty.kAlternateKangXi, VersionInfo.getInstance(11,0,0), 169, // false, true, false, true); - public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail ( - UcdProperty.kBigFive, VersionInfo.getInstance(11,0,0), 170, + public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail( + UcdProperty.kBigFive, VersionInfo.getInstance(11, 0, 0), 170, false, true, false, true); - public static UcdPropertyDetail kCCCII_Detail = new UcdPropertyDetail ( - UcdProperty.kCCCII, VersionInfo.getInstance(11,0,0), 171, + public static UcdPropertyDetail kCCCII_Detail = new UcdPropertyDetail( + UcdProperty.kCCCII, VersionInfo.getInstance(11, 0, 0), 171, false, true, false, true); - public static UcdPropertyDetail kDaeJaweon_Detail = new UcdPropertyDetail ( - UcdProperty.kDaeJaweon, VersionInfo.getInstance(11,0,0), 172, + public static UcdPropertyDetail kDaeJaweon_Detail = new UcdPropertyDetail( + UcdProperty.kDaeJaweon, VersionInfo.getInstance(11, 0, 0), 172, false, true, false, true); - public static UcdPropertyDetail kEACC_Detail = new UcdPropertyDetail ( - UcdProperty.kEACC, VersionInfo.getInstance(11,0,0), 173, + public static UcdPropertyDetail kEACC_Detail = new UcdPropertyDetail( + UcdProperty.kEACC, VersionInfo.getInstance(11, 0, 0), 173, false, true, false, true); - public static UcdPropertyDetail kFrequency_Detail = new UcdPropertyDetail ( - UcdProperty.kFrequency, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(16,0,0), 174, + public static UcdPropertyDetail kFrequency_Detail = new UcdPropertyDetail( + UcdProperty.kFrequency, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(16, 0, 0), 174, false, true, false, true); - public static UcdPropertyDetail kGradeLevel_Detail = new UcdPropertyDetail ( - UcdProperty.kGradeLevel, VersionInfo.getInstance(11,0,0), 175, + public static UcdPropertyDetail kGradeLevel_Detail = new UcdPropertyDetail( + UcdProperty.kGradeLevel, VersionInfo.getInstance(11, 0, 0), 175, false, true, false, true); - public static UcdPropertyDetail kHDZRadBreak_Detail = new UcdPropertyDetail ( - UcdProperty.kHDZRadBreak, VersionInfo.getInstance(11,0,0), 176, + public static UcdPropertyDetail kHDZRadBreak_Detail = new UcdPropertyDetail( + UcdProperty.kHDZRadBreak, VersionInfo.getInstance(11, 0, 0), 176, false, true, false, true); - public static UcdPropertyDetail kHKGlyph_Detail = new UcdPropertyDetail ( - UcdProperty.kHKGlyph, VersionInfo.getInstance(11,0,0), 177, + public static UcdPropertyDetail kHKGlyph_Detail = new UcdPropertyDetail( + UcdProperty.kHKGlyph, VersionInfo.getInstance(11, 0, 0), 177, false, true, false, true); - public static UcdPropertyDetail kHanyuPinlu_Detail = new UcdPropertyDetail ( - UcdProperty.kHanyuPinlu, VersionInfo.getInstance(11,0,0), 178, + public static UcdPropertyDetail kHanyuPinlu_Detail = new UcdPropertyDetail( + UcdProperty.kHanyuPinlu, VersionInfo.getInstance(11, 0, 0), 178, false, true, false, true); - public static UcdPropertyDetail kHanyuPinyin_Detail = new UcdPropertyDetail ( - UcdProperty.kHanyuPinyin, VersionInfo.getInstance(11,0,0), 179, + public static UcdPropertyDetail kHanyuPinyin_Detail = new UcdPropertyDetail( + UcdProperty.kHanyuPinyin, VersionInfo.getInstance(11, 0, 0), 179, false, true, false, true); - public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = new UcdPropertyDetail ( - UcdProperty.kIRGHanyuDaZidian, VersionInfo.getInstance(11,0,0), 180, + public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = new UcdPropertyDetail( + UcdProperty.kIRGHanyuDaZidian, VersionInfo.getInstance(11, 0, 0), 180, false, true, false, true); - public static UcdPropertyDetail kIRGKangXi_Detail = new UcdPropertyDetail ( - UcdProperty.kIRGKangXi, VersionInfo.getInstance(11,0,0), 181, + public static UcdPropertyDetail kIRGKangXi_Detail = new UcdPropertyDetail( + UcdProperty.kIRGKangXi, VersionInfo.getInstance(11, 0, 0), 181, false, true, false, true); - public static UcdPropertyDetail kIRGDaeJaweon_Detail = new UcdPropertyDetail ( - UcdProperty.kIRGDaeJaweon, VersionInfo.getInstance(11,0,0), 182, + public static UcdPropertyDetail kIRGDaeJaweon_Detail = new UcdPropertyDetail( + UcdProperty.kIRGDaeJaweon, VersionInfo.getInstance(11, 0, 0), 182, false, true, false, true); - public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = new UcdPropertyDetail ( - UcdProperty.kIRGDaiKanwaZiten, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 183, + public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = new UcdPropertyDetail( + UcdProperty.kIRGDaiKanwaZiten, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 183, false, true, false, true); - public static UcdPropertyDetail kKorean_Detail = new UcdPropertyDetail ( - UcdProperty.kKorean, VersionInfo.getInstance(11,0,0), 184, + public static UcdPropertyDetail kKorean_Detail = new UcdPropertyDetail( + UcdProperty.kKorean, VersionInfo.getInstance(11, 0, 0), 184, false, true, false, true); - public static UcdPropertyDetail kMainlandTelegraph_Detail = new UcdPropertyDetail ( - UcdProperty.kMainlandTelegraph, VersionInfo.getInstance(11,0,0), 185, + public static UcdPropertyDetail kMainlandTelegraph_Detail = new UcdPropertyDetail( + UcdProperty.kMainlandTelegraph, VersionInfo.getInstance(11, 0, 0), 185, false, true, false, true); - public static UcdPropertyDetail kMorohashi_Detail = new UcdPropertyDetail ( - UcdProperty.kMorohashi, VersionInfo.getInstance(11,0,0), 186, + public static UcdPropertyDetail kMorohashi_Detail = new UcdPropertyDetail( + UcdProperty.kMorohashi, VersionInfo.getInstance(11, 0, 0), 186, false, true, false, true); -// public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( // UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(11,0,0), 187, // false, true, false, true); - public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail ( - UcdProperty.kPrimaryNumeric, VersionInfo.getInstance(11,0,0), 188, + public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail( + UcdProperty.kPrimaryNumeric, VersionInfo.getInstance(11, 0, 0), 188, false, true, false, true); - public static UcdPropertyDetail kTaiwanTelegraph_Detail = new UcdPropertyDetail ( - UcdProperty.kTaiwanTelegraph, VersionInfo.getInstance(11,0,0), 189, + public static UcdPropertyDetail kTaiwanTelegraph_Detail = new UcdPropertyDetail( + UcdProperty.kTaiwanTelegraph, VersionInfo.getInstance(11, 0, 0), 189, false, true, false, true); - public static UcdPropertyDetail kXerox_Detail = new UcdPropertyDetail ( - UcdProperty.kXerox, VersionInfo.getInstance(11,0,0), 190, + public static UcdPropertyDetail kXerox_Detail = new UcdPropertyDetail( + UcdProperty.kXerox, VersionInfo.getInstance(11, 0, 0), 190, false, true, false, true); - public static UcdPropertyDetail kPseudoGB1_Detail = new UcdPropertyDetail ( - UcdProperty.kPseudoGB1, VersionInfo.getInstance(11,0,0), 191, + public static UcdPropertyDetail kPseudoGB1_Detail = new UcdPropertyDetail( + UcdProperty.kPseudoGB1, VersionInfo.getInstance(11, 0, 0), 191, false, true, false, true); - public static UcdPropertyDetail kIBMJapan_Detail = new UcdPropertyDetail ( - UcdProperty.kIBMJapan, VersionInfo.getInstance(11,0,0), 192, + public static UcdPropertyDetail kIBMJapan_Detail = new UcdPropertyDetail( + UcdProperty.kIBMJapan, VersionInfo.getInstance(11, 0, 0), 192, false, true, false, true); - public static UcdPropertyDetail kAccountingNumeric_Detail = new UcdPropertyDetail ( - UcdProperty.kAccountingNumeric, VersionInfo.getInstance(11,0,0), 193, + public static UcdPropertyDetail kAccountingNumeric_Detail = new UcdPropertyDetail( + UcdProperty.kAccountingNumeric, VersionInfo.getInstance(11, 0, 0), 193, false, true, false, true); - public static UcdPropertyDetail kCheungBauer_Detail = new UcdPropertyDetail ( - UcdProperty.kCheungBauer, VersionInfo.getInstance(11,0,0), 194, + public static UcdPropertyDetail kCheungBauer_Detail = new UcdPropertyDetail( + UcdProperty.kCheungBauer, VersionInfo.getInstance(11, 0, 0), 194, false, true, false, true); - public static UcdPropertyDetail kCheungBauerIndex_Detail = new UcdPropertyDetail ( - UcdProperty.kCheungBauerIndex, VersionInfo.getInstance(11,0,0), 195, + public static UcdPropertyDetail kCheungBauerIndex_Detail = new UcdPropertyDetail( + UcdProperty.kCheungBauerIndex, VersionInfo.getInstance(11, 0, 0), 195, false, true, false, true); - public static UcdPropertyDetail kFourCornerCode_Detail = new UcdPropertyDetail ( - UcdProperty.kFourCornerCode, VersionInfo.getInstance(11,0,0), 196, + public static UcdPropertyDetail kFourCornerCode_Detail = new UcdPropertyDetail( + UcdProperty.kFourCornerCode, VersionInfo.getInstance(11, 0, 0), 196, false, true, false, true); -// public static UcdPropertyDetail kWubi_Detail = new UcdPropertyDetail ( + // public static UcdPropertyDetail kWubi_Detail = new UcdPropertyDetail ( // UcdProperty.kWubi, VersionInfo.getInstance(11,0,0), 197, // false, true, false, true); - public static UcdPropertyDetail kXHC1983_Detail = new UcdPropertyDetail ( - UcdProperty.kXHC1983, VersionInfo.getInstance(11,0,0), 198, + public static UcdPropertyDetail kXHC1983_Detail = new UcdPropertyDetail( + UcdProperty.kXHC1983, VersionInfo.getInstance(11, 0, 0), 198, false, true, false, true); - public static UcdPropertyDetail kJinmeiyoKanji_Detail = new UcdPropertyDetail ( - UcdProperty.kJinmeiyoKanji, VersionInfo.getInstance(11,0,0), 199, + public static UcdPropertyDetail kJinmeiyoKanji_Detail = new UcdPropertyDetail( + UcdProperty.kJinmeiyoKanji, VersionInfo.getInstance(11, 0, 0), 199, false, true, false, true); - public static UcdPropertyDetail kJoyoKanji_Detail = new UcdPropertyDetail ( - UcdProperty.kJoyoKanji, VersionInfo.getInstance(11,0,0), 200, + public static UcdPropertyDetail kJoyoKanji_Detail = new UcdPropertyDetail( + UcdProperty.kJoyoKanji, VersionInfo.getInstance(11, 0, 0), 200, false, true, false, true); - public static UcdPropertyDetail kKoreanEducationHanja_Detail = new UcdPropertyDetail ( - UcdProperty.kKoreanEducationHanja, VersionInfo.getInstance(11,0,0), 201, + public static UcdPropertyDetail kKoreanEducationHanja_Detail = new UcdPropertyDetail( + UcdProperty.kKoreanEducationHanja, VersionInfo.getInstance(11, 0, 0), 201, false, true, false, true); - public static UcdPropertyDetail kKoreanName_Detail = new UcdPropertyDetail ( - UcdProperty.kKoreanName, VersionInfo.getInstance(11,0,0), 202, + public static UcdPropertyDetail kKoreanName_Detail = new UcdPropertyDetail( + UcdProperty.kKoreanName, VersionInfo.getInstance(11, 0, 0), 202, false, true, false, true); - public static UcdPropertyDetail kTGH_Detail = new UcdPropertyDetail ( - UcdProperty.kTGH, VersionInfo.getInstance(11,0,0), 203, + public static UcdPropertyDetail kTGH_Detail = new UcdPropertyDetail( + UcdProperty.kTGH, VersionInfo.getInstance(11, 0, 0), 203, false, true, false, true); - public static UcdPropertyDetail kTGHZ2013_Detail = new UcdPropertyDetail ( - UcdProperty.kTGHZ2013, VersionInfo.getInstance(11,0,0), 204, + public static UcdPropertyDetail kTGHZ2013_Detail = new UcdPropertyDetail( + UcdProperty.kTGHZ2013, VersionInfo.getInstance(11, 0, 0), 204, false, true, false, true); - public static UcdPropertyDetail kSpoofingVariant_Detail = new UcdPropertyDetail ( - UcdProperty.kSpoofingVariant, VersionInfo.getInstance(11,0,0), 205, + public static UcdPropertyDetail kSpoofingVariant_Detail = new UcdPropertyDetail( + UcdProperty.kSpoofingVariant, VersionInfo.getInstance(11, 0, 0), 205, false, true, false, true); - public static UcdPropertyDetail kRSKanWa_Detail = new UcdPropertyDetail ( - UcdProperty.kRSKanWa, VersionInfo.getInstance(11,0,0), 206, + public static UcdPropertyDetail kRSKanWa_Detail = new UcdPropertyDetail( + UcdProperty.kRSKanWa, VersionInfo.getInstance(11, 0, 0), 206, false, true, false, true); - public static UcdPropertyDetail kRSJapanese_Detail = new UcdPropertyDetail ( - UcdProperty.kRSJapanese, VersionInfo.getInstance(11,0,0), 207, + public static UcdPropertyDetail kRSJapanese_Detail = new UcdPropertyDetail( + UcdProperty.kRSJapanese, VersionInfo.getInstance(11, 0, 0), 207, false, true, false, true); - public static UcdPropertyDetail kRSKorean_Detail = new UcdPropertyDetail ( - UcdProperty.kRSKorean, VersionInfo.getInstance(11,0,0), 208, + public static UcdPropertyDetail kRSKorean_Detail = new UcdPropertyDetail( + UcdProperty.kRSKorean, VersionInfo.getInstance(11, 0, 0), 208, false, true, false, true); - public static UcdPropertyDetail kRSKangXi_Detail = new UcdPropertyDetail ( - UcdProperty.kRSKangXi, VersionInfo.getInstance(11,0,0), - VersionInfo.getInstance(15,1,0), 209, + public static UcdPropertyDetail kRSKangXi_Detail = new UcdPropertyDetail( + UcdProperty.kRSKangXi, VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), 209, false, true, false, true); - public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = new UcdPropertyDetail ( - UcdProperty.kRSAdobe_Japan1_6, VersionInfo.getInstance(11,0,0), 210, + public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = new UcdPropertyDetail( + UcdProperty.kRSAdobe_Japan1_6, VersionInfo.getInstance(11, 0, 0), 210, false, true, false, true); - public static UcdPropertyDetail kTotalStrokes_Detail = new UcdPropertyDetail ( - UcdProperty.kTotalStrokes, VersionInfo.getInstance(11,0,0), 211, + public static UcdPropertyDetail kTotalStrokes_Detail = new UcdPropertyDetail( + UcdProperty.kTotalStrokes, VersionInfo.getInstance(11, 0, 0), 211, false, true, false, true); - public static UcdPropertyDetail kRSTUnicode_Detail = new UcdPropertyDetail ( - UcdProperty.kRSTUnicode, VersionInfo.getInstance(9,0,0), 212, + public static UcdPropertyDetail kRSTUnicode_Detail = new UcdPropertyDetail( + UcdProperty.kRSTUnicode, VersionInfo.getInstance(9, 0, 0), 212, false, true, false, true); - public static UcdPropertyDetail kTGT_MergedSrc_Detail = new UcdPropertyDetail ( - UcdProperty.kTGT_MergedSrc, VersionInfo.getInstance(9,0,0), 213, + public static UcdPropertyDetail kTGT_MergedSrc_Detail = new UcdPropertyDetail( + UcdProperty.kTGT_MergedSrc, VersionInfo.getInstance(9, 0, 0), 213, false, true, false, true); - public static UcdPropertyDetail kSrc_NushuDuben_Detail = new UcdPropertyDetail ( - UcdProperty.kSrc_NushuDuben, VersionInfo.getInstance(10,0,0), 214, + public static UcdPropertyDetail kSrc_NushuDuben_Detail = new UcdPropertyDetail( + UcdProperty.kSrc_NushuDuben, VersionInfo.getInstance(10, 0, 0), 214, false, true, false, true); - public static UcdPropertyDetail kReading_Detail = new UcdPropertyDetail ( - UcdProperty.kReading, VersionInfo.getInstance(10,0,0), 215, + public static UcdPropertyDetail kReading_Detail = new UcdPropertyDetail( + UcdProperty.kReading, VersionInfo.getInstance(10, 0, 0), 215, false, true, false, true); - public static UcdPropertyDetail ISO_Comment_Detail = new UcdPropertyDetail ( - UcdProperty.ISO_Comment, VersionInfo.getInstance(11,0,0), 216, + public static UcdPropertyDetail ISO_Comment_Detail = new UcdPropertyDetail( + UcdProperty.ISO_Comment, VersionInfo.getInstance(11, 0, 0), 216, true, false, false, true); - public static UcdPropertyDetail Unicode_1_Name_Detail = new UcdPropertyDetail ( - UcdProperty.Unicode_1_Name, VersionInfo.getInstance(11,0,0), 217, + public static UcdPropertyDetail Unicode_1_Name_Detail = new UcdPropertyDetail( + UcdProperty.Unicode_1_Name, VersionInfo.getInstance(11, 0, 0), 217, true, false, false, true); - public static UcdPropertyDetail Name_Alias_Detail = new UcdPropertyDetail ( - UcdProperty.Name_Alias, VersionInfo.getInstance(11,0,0), 218, + public static UcdPropertyDetail Name_Alias_Detail = new UcdPropertyDetail( + UcdProperty.Name_Alias, VersionInfo.getInstance(11, 0, 0), 218, false, false, false, true); - public static UcdPropertyDetail Emoji_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji, VersionInfo.getInstance(13,0,0), 219, + public static UcdPropertyDetail Emoji_Detail = new UcdPropertyDetail( + UcdProperty.Emoji, VersionInfo.getInstance(13, 0, 0), 219, true, false, false, true); - public static UcdPropertyDetail Emoji_Presentation_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_Presentation, VersionInfo.getInstance(13,0,0), 220, + public static UcdPropertyDetail Emoji_Presentation_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_Presentation, VersionInfo.getInstance(13, 0, 0), 220, true, false, false, true); - public static UcdPropertyDetail Emoji_Modifier_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_Modifier, VersionInfo.getInstance(13,0,0), 221, + public static UcdPropertyDetail Emoji_Modifier_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_Modifier, VersionInfo.getInstance(13, 0, 0), 221, true, false, false, true); - public static UcdPropertyDetail Emoji_Modifier_Base_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_Modifier_Base, VersionInfo.getInstance(13,0,0), 222, + public static UcdPropertyDetail Emoji_Modifier_Base_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_Modifier_Base, VersionInfo.getInstance(13, 0, 0), 222, true, false, false, true); - public static UcdPropertyDetail Emoji_Component_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_Component, VersionInfo.getInstance(13,0,0), 223, + public static UcdPropertyDetail Emoji_Component_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_Component, VersionInfo.getInstance(13, 0, 0), 223, true, false, false, true); - public static UcdPropertyDetail Extended_Pictographic_Detail = new UcdPropertyDetail ( - UcdProperty.Extended_Pictographic, VersionInfo.getInstance(13,0,0), 224, + public static UcdPropertyDetail Extended_Pictographic_Detail = new UcdPropertyDetail( + UcdProperty.Extended_Pictographic, VersionInfo.getInstance(13, 0, 0), 224, true, false, false, true); - public static UcdPropertyDetail kStrange_Detail = new UcdPropertyDetail ( - UcdProperty.kStrange, VersionInfo.getInstance(14,0,0), 225, + public static UcdPropertyDetail kStrange_Detail = new UcdPropertyDetail( + UcdProperty.kStrange, VersionInfo.getInstance(14, 0, 0), 225, false, true, false, true); - public static UcdPropertyDetail kAlternateTotalStrokes_Detail = new UcdPropertyDetail ( - UcdProperty.kAlternateTotalStrokes, VersionInfo.getInstance(15,0,0), 226, + public static UcdPropertyDetail kAlternateTotalStrokes_Detail = new UcdPropertyDetail( + UcdProperty.kAlternateTotalStrokes, VersionInfo.getInstance(15, 0, 0), 226, false, true, false, true); - public static UcdPropertyDetail NFKC_Simple_Casefold_Detail = new UcdPropertyDetail ( - UcdProperty.NFKC_Simple_Casefold, VersionInfo.getInstance(15,1,0), 227, + public static UcdPropertyDetail NFKC_Simple_Casefold_Detail = new UcdPropertyDetail( + UcdProperty.NFKC_Simple_Casefold, VersionInfo.getInstance(15, 1, 0), 227, true, false, false, true); - public static UcdPropertyDetail ID_Compat_Math_Start_Detail = new UcdPropertyDetail ( - UcdProperty.ID_Compat_Math_Start, VersionInfo.getInstance(15,1,0), 228, + public static UcdPropertyDetail ID_Compat_Math_Start_Detail = new UcdPropertyDetail( + UcdProperty.ID_Compat_Math_Start, VersionInfo.getInstance(15, 1, 0), 228, true, false, false, true); - public static UcdPropertyDetail ID_Compat_Math_Continue_Detail = new UcdPropertyDetail ( - UcdProperty.ID_Compat_Math_Continue, VersionInfo.getInstance(15,1,0), 229, + public static UcdPropertyDetail ID_Compat_Math_Continue_Detail = new UcdPropertyDetail( + UcdProperty.ID_Compat_Math_Continue, VersionInfo.getInstance(15, 1, 0), 229, true, false, false, true); - public static UcdPropertyDetail IDS_Unary_Operator_Detail = new UcdPropertyDetail ( - UcdProperty.IDS_Unary_Operator, VersionInfo.getInstance(15,1,0), 230, + public static UcdPropertyDetail IDS_Unary_Operator_Detail = new UcdPropertyDetail( + UcdProperty.IDS_Unary_Operator, VersionInfo.getInstance(15, 1, 0), 230, true, false, false, true); - public static UcdPropertyDetail kJapanese_Detail = new UcdPropertyDetail ( - UcdProperty.kJapanese, VersionInfo.getInstance(15,1,0), 231, + public static UcdPropertyDetail kJapanese_Detail = new UcdPropertyDetail( + UcdProperty.kJapanese, VersionInfo.getInstance(15, 1, 0), 231, false, true, false, true); - public static UcdPropertyDetail kMojiJoho_Detail = new UcdPropertyDetail ( - UcdProperty.kMojiJoho, VersionInfo.getInstance(15,1,0), 232, + public static UcdPropertyDetail kMojiJoho_Detail = new UcdPropertyDetail( + UcdProperty.kMojiJoho, VersionInfo.getInstance(15, 1, 0), 232, false, true, false, true); - public static UcdPropertyDetail kSMSZD2003Index_Detail = new UcdPropertyDetail ( - UcdProperty.kSMSZD2003Index, VersionInfo.getInstance(15,1,0), 233, + public static UcdPropertyDetail kSMSZD2003Index_Detail = new UcdPropertyDetail( + UcdProperty.kSMSZD2003Index, VersionInfo.getInstance(15, 1, 0), 233, false, true, false, true); - public static UcdPropertyDetail kSMSZD2003Readings_Detail = new UcdPropertyDetail ( - UcdProperty.kSMSZD2003Readings, VersionInfo.getInstance(15,1,0), 234, + public static UcdPropertyDetail kSMSZD2003Readings_Detail = new UcdPropertyDetail( + UcdProperty.kSMSZD2003Readings, VersionInfo.getInstance(15, 1, 0), 234, false, true, false, true); - public static UcdPropertyDetail kVietnameseNumeric_Detail = new UcdPropertyDetail ( - UcdProperty.kVietnameseNumeric, VersionInfo.getInstance(15,1,0), 235, + public static UcdPropertyDetail kVietnameseNumeric_Detail = new UcdPropertyDetail( + UcdProperty.kVietnameseNumeric, VersionInfo.getInstance(15, 1, 0), 235, false, true, false, true); - public static UcdPropertyDetail kZhuangNumeric_Detail = new UcdPropertyDetail ( - UcdProperty.kZhuangNumeric, VersionInfo.getInstance(15,1,0), 236, + public static UcdPropertyDetail kZhuangNumeric_Detail = new UcdPropertyDetail( + UcdProperty.kZhuangNumeric, VersionInfo.getInstance(15, 1, 0), 236, false, true, false, true); - public static UcdPropertyDetail Indic_Conjunct_Break_Detail = new UcdPropertyDetail ( - UcdProperty.Indic_Conjunct_Break, VersionInfo.getInstance(15,1,0), 237, + public static UcdPropertyDetail Indic_Conjunct_Break_Detail = new UcdPropertyDetail( + UcdProperty.Indic_Conjunct_Break, VersionInfo.getInstance(15, 1, 0), 237, true, false, false, true); - public static UcdPropertyDetail Modifier_Combining_Mark_Detail = new UcdPropertyDetail ( - UcdProperty.Modifier_Combining_Mark, VersionInfo.getInstance(16,0,0), 238, + public static UcdPropertyDetail Modifier_Combining_Mark_Detail = new UcdPropertyDetail( + UcdProperty.Modifier_Combining_Mark, VersionInfo.getInstance(16, 0, 0), 238, true, false, false, true); - public static UcdPropertyDetail kFanqie_Detail = new UcdPropertyDetail ( - UcdProperty.kFanqie, VersionInfo.getInstance(16,0,0), 239, + public static UcdPropertyDetail kFanqie_Detail = new UcdPropertyDetail( + UcdProperty.kFanqie, VersionInfo.getInstance(16, 0, 0), 239, false, true, false, true); - public static UcdPropertyDetail kZhuang_Detail = new UcdPropertyDetail ( - UcdProperty.kZhuang, VersionInfo.getInstance(16,0,0), 240, + public static UcdPropertyDetail kZhuang_Detail = new UcdPropertyDetail( + UcdProperty.kZhuang, VersionInfo.getInstance(16, 0, 0), 240, false, true, false, true); - public static UcdPropertyDetail Basic_Emoji_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Basic_Emoji_Detail = new UcdPropertyDetail( UcdProperty.Basic_Emoji, -1, false, false, false, false); - public static UcdPropertyDetail CJK_Radical_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail CJK_Radical_Detail = new UcdPropertyDetail( UcdProperty.CJK_Radical, -2, false, false, false, false); - public static UcdPropertyDetail Confusable_MA_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Confusable_MA_Detail = new UcdPropertyDetail( UcdProperty.Confusable_MA, -3, false, false, false, false); - public static UcdPropertyDetail Confusable_ML_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Confusable_ML_Detail = new UcdPropertyDetail( UcdProperty.Confusable_ML, -4, false, false, false, false); - public static UcdPropertyDetail Confusable_SA_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Confusable_SA_Detail = new UcdPropertyDetail( UcdProperty.Confusable_SA, -5, false, false, false, false); - public static UcdPropertyDetail Confusable_SL_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Confusable_SL_Detail = new UcdPropertyDetail( UcdProperty.Confusable_SL, -6, false, false, false, false); - public static UcdPropertyDetail Do_Not_Emit_Preferred_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Do_Not_Emit_Preferred_Detail = new UcdPropertyDetail( UcdProperty.Do_Not_Emit_Preferred, -7, false, false, false, false); - public static UcdPropertyDetail Do_Not_Emit_Type_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Do_Not_Emit_Type_Detail = new UcdPropertyDetail( UcdProperty.Do_Not_Emit_Type, -8, false, false, false, false); - public static UcdPropertyDetail Emoji_DCM_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_DCM, VersionInfo.getInstance(6,0,0), -9, + public static UcdPropertyDetail Emoji_DCM_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_DCM, VersionInfo.getInstance(6, 0, 0), -9, false, false, false, false); - public static UcdPropertyDetail Emoji_KDDI_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_KDDI, VersionInfo.getInstance(6,0,0), -10, + public static UcdPropertyDetail Emoji_KDDI_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_KDDI, VersionInfo.getInstance(6, 0, 0), -10, false, false, false, false); - public static UcdPropertyDetail Emoji_SB_Detail = new UcdPropertyDetail ( - UcdProperty.Emoji_SB, VersionInfo.getInstance(6,0,0), -11, + public static UcdPropertyDetail Emoji_SB_Detail = new UcdPropertyDetail( + UcdProperty.Emoji_SB, VersionInfo.getInstance(6, 0, 0), -11, false, false, false, false); - public static UcdPropertyDetail Identifier_Status_Detail = new UcdPropertyDetail ( - UcdProperty.Identifier_Status, VersionInfo.getInstance(9,0,0), -12, + public static UcdPropertyDetail Identifier_Status_Detail = new UcdPropertyDetail( + UcdProperty.Identifier_Status, VersionInfo.getInstance(9, 0, 0), -12, false, false, false, false); - public static UcdPropertyDetail Identifier_Type_Detail = new UcdPropertyDetail ( - UcdProperty.Identifier_Type, VersionInfo.getInstance(9,0,0), -13, + public static UcdPropertyDetail Identifier_Type_Detail = new UcdPropertyDetail( + UcdProperty.Identifier_Type, VersionInfo.getInstance(9, 0, 0), -13, false, false, false, false); - public static UcdPropertyDetail Idn_2008_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Idn_2008_Detail = new UcdPropertyDetail( UcdProperty.Idn_2008, -14, false, false, false, false); - public static UcdPropertyDetail Idn_Mapping_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Idn_Mapping_Detail = new UcdPropertyDetail( UcdProperty.Idn_Mapping, -15, false, false, false, false); - public static UcdPropertyDetail Idn_Status_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Idn_Status_Detail = new UcdPropertyDetail( UcdProperty.Idn_Status, -16, false, false, false, false); - public static UcdPropertyDetail Named_Sequences_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Named_Sequences_Detail = new UcdPropertyDetail( UcdProperty.Named_Sequences, -17, false, false, false, false); - public static UcdPropertyDetail Named_Sequences_Prov_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Named_Sequences_Prov_Detail = new UcdPropertyDetail( UcdProperty.Named_Sequences_Prov, -18, false, false, false, false); - public static UcdPropertyDetail Other_Joining_Type_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Other_Joining_Type_Detail = new UcdPropertyDetail( UcdProperty.Other_Joining_Type, -19, false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Flag_Sequence_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail RGI_Emoji_Flag_Sequence_Detail = new UcdPropertyDetail( UcdProperty.RGI_Emoji_Flag_Sequence, -20, false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Keycap_Sequence_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail RGI_Emoji_Keycap_Sequence_Detail = new UcdPropertyDetail( UcdProperty.RGI_Emoji_Keycap_Sequence, -21, false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Modifier_Sequence_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail RGI_Emoji_Modifier_Sequence_Detail = new UcdPropertyDetail( UcdProperty.RGI_Emoji_Modifier_Sequence, -22, false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Tag_Sequence_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail RGI_Emoji_Tag_Sequence_Detail = new UcdPropertyDetail( UcdProperty.RGI_Emoji_Tag_Sequence, -23, false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Zwj_Sequence_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail RGI_Emoji_Zwj_Sequence_Detail = new UcdPropertyDetail( UcdProperty.RGI_Emoji_Zwj_Sequence, -24, false, false, false, false); - public static UcdPropertyDetail Standardized_Variant_Detail = new UcdPropertyDetail ( + public static UcdPropertyDetail Standardized_Variant_Detail = new UcdPropertyDetail( UcdProperty.Standardized_Variant, -25, false, false, false, false); @@ -834,7 +834,7 @@ private UcdPropertyDetail( boolean isCJKAttribute, boolean isCJKShowIfEmpty, boolean isOrgUCDXMLAttribute) { - this ( + this( ucdProperty, minVersion, null, sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); } @@ -846,7 +846,7 @@ private UcdPropertyDetail( boolean isCJKAttribute, boolean isCJKShowIfEmpty, boolean isOrgUCDXMLAttribute) { - this ( + this( ucdProperty, null, null, sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); } @@ -870,26 +870,29 @@ private UcdPropertyDetail( this.isOrgUCDXMLAttribute = isOrgUCDXMLAttribute; allPropertyDetails.add(this); - if(isBaseAttribute) { + if (isBaseAttribute) { basePropertyDetails.add(this); ucdxmlPropertyDetails.add(this); } - if(isCJKAttribute) { + if (isCJKAttribute) { cjkPropertyDetails.add(this); ucdxmlPropertyDetails.add(this); } } - public static Set values () { + public static Set values() { return allPropertyDetails; } - public static Set baseValues () { + + public static Set baseValues() { return basePropertyDetails; } - public static Set cjkValues () { + + public static Set cjkValues() { return cjkPropertyDetails; } - public static Set ucdxmlValues () { + + public static Set ucdxmlValues() { return ucdxmlPropertyDetails; } diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java index 24b9a35a6..c9f938410 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java @@ -9,13 +9,18 @@ public class UcdSectionDetail { public enum UcdSection { - BLOCKS ("blocks", "block", VersionInfo.getInstance(1, 1, 0), null, Blocks_Detail, true, true), - CJKRADICALS ("cjk-radicals", "cjk-radical", VersionInfo.getInstance(1, 1, 0), null, CJKRadicals_Detail, false, false), - DONOTEMIT ("do-not-emit", "instead", VersionInfo.getInstance(16, 0, 0), null, DoNotEmit_Detail, false, false), - EMOJISOURCES ("emoji-sources", "emoji-source", VersionInfo.getInstance(1, 1, 0), null, EmojiSources_Detail, true, false), - NAMEDSEQUENCES ("named-sequences", "named-sequence", VersionInfo.getInstance(1, 1, 0), null, NamedSequences_Detail, false, false), - NORMALIZATIONCORRECTIONS ("normalization-corrections", "normalization-correction", VersionInfo.getInstance(1, 1, 0), null, NormalizationCorrections_Detail, true, false), - STANDARDIZEDVARIANTS ("standardized-variants", "standardized-variant", VersionInfo.getInstance(1, 1, 0), null, StandardizedVariants_Detail, true, false); + BLOCKS("blocks", "block", VersionInfo.getInstance(1, 1, 0), null, Blocks_Detail, true, true), + CJKRADICALS("cjk-radicals", "cjk-radical", VersionInfo.getInstance(1, 1, 0), null, CJKRadicals_Detail, false, + false), + DONOTEMIT("do-not-emit", "instead", VersionInfo.getInstance(16, 0, 0), null, DoNotEmit_Detail, false, false), + EMOJISOURCES("emoji-sources", "emoji-source", VersionInfo.getInstance(1, 1, 0), null, EmojiSources_Detail, + true, false), + NAMEDSEQUENCES("named-sequences", "named-sequence", VersionInfo.getInstance(1, 1, 0), null, + NamedSequences_Detail, false, false), + NORMALIZATIONCORRECTIONS("normalization-corrections", "normalization-correction", VersionInfo.getInstance(1, + 1, 0), null, NormalizationCorrections_Detail, true, false), + STANDARDIZEDVARIANTS("standardized-variants", "standardized-variant", VersionInfo.getInstance(1, 1, 0), null, + StandardizedVariants_Detail, true, false); private final String tag; private final String childTag; private final VersionInfo minVersion; @@ -44,20 +49,30 @@ public enum UcdSection { public String toString() { return tag; } + public String getChildTag() { return childTag; } + public VersionInfo getMinVersion() { return minVersion; } + public VersionInfo getMaxVersion() { return maxVersion; } + public UcdSectionDetail getUcdSectionDetail() { return ucdSectionDetail; } - public boolean getParserWithRange() { return parserWithRange; } - public boolean getParserWithMissing() { return parserWithMissing; } + + public boolean getParserWithRange() { + return parserWithRange; + } + + public boolean getParserWithMissing() { + return parserWithMissing; + } } public static UcdSectionDetail Blocks_Detail = new UcdSectionDetail( @@ -144,9 +159,11 @@ private UcdSectionDetail( public UcdSection getSection() { return this.ucdSection; } + public UcdSectionComponent[] getUcdSectionComponents() { return this.ucdSectionComponents; } + public int getSortOrder() { return this.sortOrder; } diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java index c826b4f40..e40cc0d6f 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java @@ -1,29 +1,25 @@ package org.unicode.xml; -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - +import com.ibm.icu.dev.tool.UOption; import com.ibm.icu.util.VersionInfo; -import com.thaiopensource.resolver.Input; -import org.unicode.props.*; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.props.UcdPropertyValues; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; import javax.xml.transform.TransformerConfigurationException; +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class UcdXML { private static final String NAMESPACE = "http://www.unicode.org/ns/2003/ucd/1.0"; - private enum OutputType { - STRICT, - COMPATIBLE - } - private enum UCDXMLOUTPUTRANGE { ALL, NOUNIHAN, @@ -54,23 +50,111 @@ public String toString() { } } + private static final UOption[] options = { + UOption.HELP_H(), + UOption.create("ucdversion", 'v', UOption.REQUIRES_ARG), + UOption.create("range", 'r', UOption.REQUIRES_ARG), + UOption.create("output", 'o', UOption.REQUIRES_ARG), + UOption.create("outputfolder", 'f', UOption.REQUIRES_ARG) + }; + private static final int + HELP = 0, + UCDVERSION = 1, + RANGE = 2, + OUTPUT = 3, + OUTPUTFOLDER = 4; + + public static void main(String[] args) throws Exception { + VersionInfo ucdVersion = null; + UCDXMLOUTPUTRANGE ucdxmloutputrange = null; + UCDXMLOUTPUTTYPE ucdxmloutputtype = null; + File destinationFolder = null; - VersionInfo ucdVersion = VersionInfo.getInstance(15, 1, 0); - File destinationFolder = new File( - "C:\\_git\\Unicode\\ucdxml\\data\\" + - getVersionString(ucdVersion, 3) + "\\xmltest\\"); - if(!destinationFolder.exists()) { - destinationFolder.mkdir(); + UOption.parseArgs(args, options); + + if (options[HELP].doesOccur) { + System.out.println("UcdXML --ucdversion {version number} --outputfolder {destination} " + + "--range [ALL|NOUNIHAN|UNIHAN] --output [FLAT|GROUPED]"); + System.exit(0); } - buildUcdXMLFile(ucdVersion, destinationFolder, UCDXMLOUTPUTRANGE.ALL, UCDXMLOUTPUTTYPE.FLAT); - System.out.println("end"); + try { + if (options[UCDVERSION].doesOccur) { + try { + ucdVersion = VersionInfo.getInstance(options[UCDVERSION].value); + } + catch (Exception e) { + throw new IllegalArgumentException("Could not convert " + options[UCDVERSION].value + + " to a valid UCD version"); + } + } + else { + throw new IllegalArgumentException("Missing command line option: --ucdversion (or -v)"); + } + if (options[RANGE].doesOccur) { + try { + ucdxmloutputrange = UCDXMLOUTPUTRANGE.valueOf(options[RANGE].value.toUpperCase(Locale.ROOT)); + } + catch (Exception e) { + throw new IllegalArgumentException("Could not convert " + options[RANGE].value + + " to one of [ALL|NOUNIHAN|UNIHAN]"); + } + } + else { + throw new IllegalArgumentException("Missing command line option: --range (or -r)"); + } + if (options[OUTPUT].doesOccur) { + try { + ucdxmloutputtype = UCDXMLOUTPUTTYPE.valueOf(options[OUTPUT].value.toUpperCase(Locale.ROOT)); + } + catch (Exception e) { + throw new IllegalArgumentException("Could not convert " + options[OUTPUT].value + + " to one of [FLAT|GROUPED]"); + } + } + else { + throw new IllegalArgumentException("Missing command line option: --output (or -o)"); + } + if (options[OUTPUTFOLDER].doesOccur) { + try { + destinationFolder = new File(options[OUTPUTFOLDER].value + getVersionString(ucdVersion, 3) + + "\\xmltest\\"); + if (!destinationFolder.exists()) { + if(!destinationFolder.mkdir()) { + throw new IOException(); + } + } + } + catch (Exception e) { + throw new IllegalArgumentException("Could not find or create " + options[OUTPUTFOLDER].value); + } + } + else { + throw new IllegalArgumentException("Missing command line option: --outputfolder (or -f)"); + } + + } + catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + if (ucdVersion != null && destinationFolder.exists()) { + buildUcdXMLFile(ucdVersion, destinationFolder, ucdxmloutputrange, ucdxmloutputtype); + System.out.println("end"); + System.exit(0); + } + else { + System.err.println("Unexpected error when building UcdXML file."); + System.exit(1); + } + + } - private static void buildUcdXMLFile( - VersionInfo ucdVersion, File destinationFolder, UCDXMLOUTPUTRANGE outputRange, UCDXMLOUTPUTTYPE outputType) - throws IOException, TransformerConfigurationException, SAXException { + private static void buildUcdXMLFile(VersionInfo ucdVersion, File destinationFolder, UCDXMLOUTPUTRANGE outputRange + , UCDXMLOUTPUTTYPE outputType) throws IOException, TransformerConfigurationException, SAXException { int lowCodepoint = 0x0; int highCodepoint = 0x10FFFF; // Tangut @@ -79,9 +163,9 @@ private static void buildUcdXMLFile( //0x10FFFF File tempFile = new File(destinationFolder, "temp.xml"); - String outputFilename = "ucd." + - outputRange.toString().toLowerCase() + "." + - outputType.toString().toLowerCase() + ".xml"; + String outputFilename = + "ucd." + outputRange.toString().toLowerCase(Locale.ROOT) + "." + + outputType.toString().toLowerCase(Locale.ROOT) + ".xml"; File destinationFile = new File(destinationFolder, outputFilename); FileOutputStream fileOutputStream = new FileOutputStream(tempFile); @@ -92,13 +176,16 @@ private static void buildUcdXMLFile( UCDDataResolver ucdDataResolver = new UCDDataResolver(iup, NAMESPACE, writer); writer.startFile(); - writer.startElement("ucd"); { - writer.startElement("description"); { + writer.startElement("ucd"); + { + writer.startElement("description"); + { writer.addContent("Unicode " + getVersionString(ucdVersion, 3)); writer.endElement("description"); } - buildRepertoire(writer, attributeResolver, ucdVersion, lowCodepoint, highCodepoint, outputRange); - if(outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRepertoire(writer, attributeResolver, ucdVersion, lowCodepoint, highCodepoint, outputRange, + outputType); + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.BLOCKS); ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NAMEDSEQUENCES); ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NORMALIZATIONCORRECTIONS); @@ -110,9 +197,11 @@ private static void buildUcdXMLFile( writer.endElement("ucd"); } writer.endFile(); - fileOutputStream.close (); + fileOutputStream.close(); cleanUcdXMLFile(tempFile, destinationFile); - tempFile.delete(); + if(!tempFile.delete()) { + throw new IOException("Could not delete temporary file " + tempFile); + } } private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws IOException { @@ -131,7 +220,7 @@ private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws String line; while ((line = bufferedReader.readLine()) != null) { - Matcher matcher = Pattern.compile("&#([\\d]+);").matcher(line); + Matcher matcher = Pattern.compile("&#(\\d+);").matcher(line); line = matcher.replaceAll(matchResult -> new String(Character.toChars(Integer.parseInt(matcher.group(1))))); bufferedWriter.append(line); bufferedWriter.newLine(); @@ -141,88 +230,186 @@ private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws fileOutputStream.close(); } - private static void buildRepertoire( - UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, - int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange) - throws SAXException { + private static void buildRepertoire(UCDXMLWriter writer, AttributeResolver attributeResolver, + VersionInfo ucdVersion, int lowCodepoint, int highCodepoint, + UCDXMLOUTPUTRANGE outputRange, UCDXMLOUTPUTTYPE outputType) throws SAXException { + + writer.startElement("repertoire"); + { + for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { + if (isWritableCodepoint(codepoint, outputRange, attributeResolver)) { + if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { + codepoint = buildGroup(writer, attributeResolver, ucdVersion, codepoint, highCodepoint, + outputRange, outputType); + } else { + codepoint = buildChars(writer, attributeResolver, ucdVersion, codepoint, highCodepoint, + outputRange, outputType, null); + } + } + } + writer.endElement("repertoire"); + } + } - writer.startElement("repertoire"); { + private static int buildGroup(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, + int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange, + UCDXMLOUTPUTTYPE outputType) throws SAXException { + int lastCodepointInGroup = getLastCodepointInGroup(attributeResolver, lowCodepoint, highCodepoint); - ArrayList range = new ArrayList<>(); - Range rangeType = Range.NONRANGE; + AttributesImpl groupAttrs = getGroupAttributes(ucdVersion, attributeResolver, lowCodepoint, + lastCodepointInGroup, outputRange); - for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { - if (attributeResolver.isUnassignedCodepoint(codepoint) || - (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && attributeResolver.isUnifiedIdeograph(codepoint))) { - Range currentRangeType = getRangeType(attributeResolver, codepoint); - if (!range.isEmpty()){ - if (!currentRangeType.equals(rangeType) || attributeResolver.isDifferentRange(codepoint, codepoint - 1)) { - if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { - buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + writer.startElement("group", groupAttrs); + { + buildChars(writer, attributeResolver, ucdVersion, lowCodepoint, lastCodepointInGroup, outputRange, + outputType, groupAttrs); + writer.endElement("group"); + } + return lastCodepointInGroup; + } + + private static int buildChars(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, + int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange, + UCDXMLOUTPUTTYPE outputType, AttributesImpl groupAttrs) throws SAXException { + + ArrayList range = new ArrayList<>(); + Range rangeType = Range.NONRANGE; + for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { + if (attributeResolver.isUnassignedCodepoint(codepoint) || + (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && attributeResolver.isUnifiedIdeograph(codepoint))) { + Range currentRangeType = getRangeType(attributeResolver, codepoint); + if (!range.isEmpty()) { + if (!currentRangeType.equals(rangeType) || attributeResolver.isDifferentRange(codepoint, + codepoint - 1)) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { + buildGroupedRange(writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); + } else { + buildUngroupedRange(writer, attributeResolver, ucdVersion, range, rangeType); } - range.clear(); } + range.clear(); } - range.add(codepoint); - rangeType = currentRangeType; } - else { - if (!range.isEmpty()) { - if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { - buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + range.add(codepoint); + rangeType = currentRangeType; + } else { + if (!range.isEmpty()) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { + buildGroupedRange(writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); + } else { + buildUngroupedRange(writer, attributeResolver, ucdVersion, range, rangeType); } - range.clear(); - rangeType = Range.NONRANGE; } - buildChar(writer, attributeResolver, ucdVersion, codepoint, outputRange); + range.clear(); + rangeType = Range.NONRANGE; + } + if (isWritableCodepoint(codepoint, outputRange, attributeResolver)) { + if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { + buildGroupedChar(writer, attributeResolver, ucdVersion, codepoint, outputRange, groupAttrs); + } else { + buildUngroupedChar(writer, attributeResolver, ucdVersion, codepoint, outputRange); + } } } - //Handle any range before the end of the repertoire element. - if (!range.isEmpty()) { - if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { - buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + //Handle any range before the end of the repertoire element. + if (!range.isEmpty()) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { + buildGroupedRange(writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); + } else { + buildUngroupedRange(writer, attributeResolver, ucdVersion, range, rangeType); } } - writer.endElement("repertoire"); } + return highCodepoint; } - private static void buildChar( - UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, int codepoint, - UCDXMLOUTPUTRANGE outputRange) + private static void buildUngroupedChar(UCDXMLWriter writer, AttributeResolver attributeResolver, + VersionInfo ucdVersion, int codepoint, UCDXMLOUTPUTRANGE outputRange) throws SAXException { - if(outputRange != UCDXMLOUTPUTRANGE.UNIHAN || attributeResolver.isUnihanAttributeRange(codepoint)) { - AttributesImpl at = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); - writer.startElement("char", at); { - HashMap nameAliases = attributeResolver.getNameAliases(codepoint); - if (null != nameAliases && !nameAliases.isEmpty()) { - for (String alias : nameAliases.keySet()) { - AttributesImpl nameAliasAt = new AttributesImpl(); - nameAliasAt.addAttribute( - NAMESPACE, "alias", "alias", "CDATA", alias); - nameAliasAt.addAttribute( - NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); - writer.startElement("name-alias", nameAliasAt); { - writer.endElement("name-alias"); - } + AttributesImpl charAttributes = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); + buildChar(writer, attributeResolver, codepoint, charAttributes); + } + + private static void buildGroupedChar(UCDXMLWriter writer, AttributeResolver attributeResolver, + VersionInfo ucdVersion, int codepoint, UCDXMLOUTPUTRANGE outputRange, + AttributesImpl groupAttrs) throws SAXException { + + AttributesImpl orgCharAttributes = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); + AttributesImpl charAttributes = new AttributesImpl(); + for (int index = 0; index < orgCharAttributes.getLength(); index++) { + String attributeQName = orgCharAttributes.getQName(index); + String orgCharAttributesValue = orgCharAttributes.getValue(index); + String groupAttributeValue = groupAttrs.getValue(attributeQName); + if (!orgCharAttributesValue.equals(groupAttributeValue)) { + charAttributes.addAttribute(NAMESPACE, attributeQName, attributeQName, "CDATA", orgCharAttributesValue); + } + } + buildChar(writer, attributeResolver, codepoint, charAttributes); + } + + private static void buildChar(UCDXMLWriter writer, AttributeResolver attributeResolver, int codepoint, + AttributesImpl charAttributes) throws SAXException { + writer.startElement("char", charAttributes); + { + HashMap nameAliases = attributeResolver.getNameAliases(codepoint); + if (null != nameAliases && !nameAliases.isEmpty()) { + for (String alias : nameAliases.keySet()) { + AttributesImpl nameAliasAt = new AttributesImpl(); + nameAliasAt.addAttribute(NAMESPACE, "alias", "alias", "CDATA", alias); + nameAliasAt.addAttribute(NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); + writer.startElement("name-alias", nameAliasAt); + { + writer.endElement("name-alias"); } } - writer.endElement("char"); } + writer.endElement("char"); + } + } + + private static void buildGroupedRange(UCDXMLWriter writer, AttributeResolver attributeResolver, + VersionInfo ucdVersion, ArrayList range, Range rangeType, + AttributesImpl groupAttrs) throws SAXException { + AttributesImpl orgRangeAttributes = getReservedAttributes(ucdVersion, attributeResolver, range); + AttributesImpl rangeAttributes = new AttributesImpl(); + for (int index = 0; index < orgRangeAttributes.getLength(); index++) { + String attributeQName = orgRangeAttributes.getQName(index); + String orgCharAttributesValue = orgRangeAttributes.getValue(index); + String groupAttributeValue = groupAttrs.getValue(attributeQName); + if (!orgCharAttributesValue.equals(groupAttributeValue)) { + rangeAttributes.addAttribute(NAMESPACE, attributeQName, attributeQName, "CDATA", + orgCharAttributesValue); + } + } + writer.startElement(rangeType.tag, rangeAttributes); + { + writer.endElement(rangeType.tag); } } - private static void buildRange(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, - ArrayList range, Range rangeType) + private static void buildUngroupedRange(UCDXMLWriter writer, AttributeResolver attributeResolver, + VersionInfo ucdVersion, ArrayList range, Range rangeType) throws SAXException { - AttributesImpl at = getReservedAttributes(ucdVersion, attributeResolver, range); - writer.startElement(rangeType.tag, at); { + AttributesImpl rangeAttributes = getReservedAttributes(ucdVersion, attributeResolver, range); + writer.startElement(rangeType.tag, rangeAttributes); + { writer.endElement(rangeType.tag); } } + private static boolean isWritableCodepoint(int codepoint, UCDXMLOUTPUTRANGE outputRange, + AttributeResolver attributeResolver) { + return outputRange == UCDXMLOUTPUTRANGE.ALL || + (outputRange == UCDXMLOUTPUTRANGE.UNIHAN && attributeResolver.isUnihanAttributeRange(codepoint)) || + (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && !attributeResolver.isUnifiedIdeograph(codepoint)); + } + private static Range getRangeType(AttributeResolver attributeResolver, int codepoint) { String NChar = attributeResolver.getNChar(codepoint); UcdPropertyValues.General_Category_Values gc = attributeResolver.getgc(codepoint); @@ -242,49 +429,115 @@ private static Range getRangeType(AttributeResolver attributeResolver, int codep return Range.RESERVED; } - private static AttributesImpl getAttributes( - VersionInfo version, AttributeResolver attributeResolver, int codepoint, UCDXMLOUTPUTRANGE outputRange) { + private static int getLastCodepointInGroup(AttributeResolver attributeResolver, int lowCodepoint, + int highCodepoint) { + String blk = attributeResolver.getAttributeValue(UcdProperty.Block, lowCodepoint); + for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { + if (!blk.equals(attributeResolver.getAttributeValue(UcdProperty.Block, codepoint))) { + return codepoint - 1; + } + if (codepoint == 0x20 - 1 // put the C0 controls in their own group + || codepoint == 0xa0 - 1 // put the C0 controls in their own group + || codepoint == 0x1160 - 1 // split the jamos into three groups + || codepoint == 0x11a8 - 1 // split the jamos into three groups + || codepoint == 0x1f1e6 - 1 // put the regional indicators in their own group + ) { + return codepoint; + } + } + return highCodepoint; + } + + private static AttributesImpl getAttributes(VersionInfo version, AttributeResolver attributeResolver, + int codepoint, UCDXMLOUTPUTRANGE outputRange) { AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); + attributes.addAttribute(NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { UcdProperty prop = propDetail.getUcdProperty(); if (version.compareTo(propDetail.getMinVersion()) >= 0 && - (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) - { + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) { String attrValue = attributeResolver.getAttributeValue(prop, codepoint); - boolean isAttributeIncluded = getIsAttributeIncluded( - attrValue, - attributeResolver.isUnihanAttributeRange(codepoint), - propDetail, prop, - outputRange); - - if(isAttributeIncluded) { + boolean isAttributeIncluded = getIsAttributeIncluded(attrValue, + attributeResolver.isUnihanAttributeRange(codepoint), propDetail, prop, outputRange); + if (isAttributeIncluded) { String propName = prop.getShortName(); - if(propName.startsWith("cjk")) { + if (propName.startsWith("cjk")) { propName = propName.substring(2); } - attributes.addAttribute( - NAMESPACE, - propName, - propName, - "CDATA", - attrValue - ); + attributes.addAttribute(NAMESPACE, propName, propName, "CDATA", attrValue); + } + } + } + return attributes; + } + + private static AttributesImpl getGroupAttributes(VersionInfo version, AttributeResolver attributeResolver, + int lowCodepoint, int highCodepoint, + UCDXMLOUTPUTRANGE outputRange) { + AttributesImpl attributes = new AttributesImpl(); + + for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { + UcdProperty prop = propDetail.getUcdProperty(); + if (version.compareTo(propDetail.getMinVersion()) >= 0 && + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) { + int totalCount = 0; + Map counters = new LinkedHashMap<>(); + + for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { + if (!attributeResolver.isUnassignedCodepoint(codepoint)) { + String attrValue = attributeResolver.getAttributeValue(prop, codepoint); + int currentCount = (counters.get(attrValue) == null) ? 0 : counters.get(attrValue); + currentCount++; + totalCount++; + counters.put(attrValue, currentCount); + } + } + int max = Integer.MIN_VALUE; + String bestAttrValue = null; + for (String attrValue : counters.keySet()) { + int thisCount = counters.get(attrValue); + if (thisCount > max) { + max = thisCount; + bestAttrValue = attrValue; + } + } + switch (prop) { + case Decomposition_Mapping: + case Simple_Uppercase_Mapping: + case Simple_Lowercase_Mapping: + case Simple_Titlecase_Mapping: + case Uppercase_Mapping: + case Lowercase_Mapping: + case Titlecase_Mapping: + case Simple_Case_Folding: + case Case_Folding: + if (bestAttrValue != null) { + bestAttrValue = "#"; + } + } + if (max > 0.2 * totalCount && max > 1) { + boolean isAttributeIncluded = getIsAttributeIncluded(bestAttrValue, + attributeResolver.isUnihanAttributeRange(lowCodepoint), propDetail, prop, outputRange); + if (isAttributeIncluded) { + String propName = prop.getShortName(); + if (propName.startsWith("cjk")) { + propName = propName.substring(2); + } + attributes.addAttribute(NAMESPACE, propName, propName, "CDATA", bestAttrValue); + } } } } return attributes; } - private static boolean getIsAttributeIncluded( - String attrValue, - boolean isUnihanAttributeRange, - UcdPropertyDetail propDetail, - UcdProperty prop, - UCDXMLOUTPUTRANGE outputRange) { - if (attrValue == null) { return false; } + private static boolean getIsAttributeIncluded(String attrValue, boolean isUnihanAttributeRange, + UcdPropertyDetail propDetail, UcdProperty prop, + UCDXMLOUTPUTRANGE outputRange) { + if (attrValue == null) { + return false; + } if (isUnihanAttributeRange) { if (outputRange == UCDXMLOUTPUTRANGE.UNIHAN) { if (prop.equals(UcdProperty.Numeric_Type) && !attrValue.equals("None")) { @@ -309,37 +562,25 @@ private static boolean getIsAttributeIncluded( } - private static AttributesImpl getReservedAttributes( - VersionInfo version, AttributeResolver attributeResolver, ArrayList range) { + private static AttributesImpl getReservedAttributes(VersionInfo version, AttributeResolver attributeResolver, + ArrayList range) { AttributesImpl attributes = new AttributesImpl(); if (range.size() == 1) { - attributes.addAttribute( - NAMESPACE, "cp", "cp", "CDATA", - attributeResolver.getHexString(range.get(0))); - } - else { - attributes.addAttribute( - NAMESPACE, "first-cp", "first-cp", "CDATA", + attributes.addAttribute(NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(range.get(0))); + } else { + attributes.addAttribute(NAMESPACE, "first-cp", "first-cp", "CDATA", attributeResolver.getHexString(range.get(0))); - attributes.addAttribute( - NAMESPACE, "last-cp", "last-cp", "CDATA", + attributes.addAttribute(NAMESPACE, "last-cp", "last-cp", "CDATA", attributeResolver.getHexString(range.get(range.size() - 1))); } for (UcdPropertyDetail propDetail : UcdPropertyDetail.baseValues()) { UcdProperty prop = propDetail.getUcdProperty(); if (version.compareTo(propDetail.getMinVersion()) >= 0 && - (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) <= 0)) - { + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) <= 0)) { String attrValue = attributeResolver.getAttributeValue(propDetail.getUcdProperty(), range.get(0)); - attributes.addAttribute( - NAMESPACE, - prop.getShortName(), - prop.getShortName(), - "CDATA", - attrValue - ); + attributes.addAttribute(NAMESPACE, prop.getShortName(), prop.getShortName(), "CDATA", attrValue); } } return attributes; @@ -350,7 +591,7 @@ private static String getVersionString(VersionInfo version, int maxDigits) { int[] digits = new int[]{version.getMajor(), version.getMinor(), version.getMilli(), version.getMicro()}; StringBuilder verStr = new StringBuilder(7); verStr.append(digits[0]); - for(int i = 1; i < maxDigits; ++i) { + for (int i = 1; i < maxDigits; ++i) { verStr.append("."); verStr.append(digits[i]); } From 7764f6c3dff01f3eac0a17e57506244b1c97bb5a Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Fri, 7 Jun 2024 08:42:43 -0700 Subject: [PATCH 04/14] Rebase --- .../org/unicode/xml/AttributeResolver.java | 184 +- .../java/org/unicode/xml/UCDDataResolver.java | 122 +- .../java/org/unicode/xml/UCDXMLWriter.java | 57 +- .../org/unicode/xml/UcdPropertyDetail.java | 3043 ++++++++++++----- .../org/unicode/xml/UcdSectionDetail.java | 206 +- .../src/main/java/org/unicode/xml/UcdXML.java | 505 ++- 6 files changed, 2884 insertions(+), 1233 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index cb173b00c..87d88dbde 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -1,32 +1,38 @@ package org.unicode.xml; import com.ibm.icu.dev.util.UnicodeMap; +import java.util.*; import org.unicode.cldr.draft.FileUtilities; import org.unicode.props.*; -import java.util.*; - public class AttributeResolver { private final IndexUnicodeProperties indexUnicodeProperties; private final UnicodeMap map_age; private final UnicodeMap map_bidi_class; - private final UnicodeMap map_bidi_paired_bracket_type; + private final UnicodeMap + map_bidi_paired_bracket_type; private final UnicodeMap map_block; - private final UnicodeMap map_canonical_combining_class; + private final UnicodeMap + map_canonical_combining_class; private final UnicodeMap map_decomposition_type; private final UnicodeMap map_do_not_emit_type; private final UnicodeMap map_east_asian_width; private final UnicodeMap map_general_category; - private final UnicodeMap map_grapheme_cluster_break; - private final UnicodeMap map_hangul_syllable_type; + private final UnicodeMap + map_grapheme_cluster_break; + private final UnicodeMap + map_hangul_syllable_type; private final UnicodeMap map_identifier_status; private final UnicodeMap map_identifier_type; private final UnicodeMap map_idn_2008; private final UnicodeMap map_idn_status; - private final UnicodeMap map_indic_conjunct_break; - private final UnicodeMap map_indic_positional_category; - private final UnicodeMap map_indic_syllabic_category; + private final UnicodeMap + map_indic_conjunct_break; + private final UnicodeMap + map_indic_positional_category; + private final UnicodeMap + map_indic_syllabic_category; private final UnicodeMap map_jamo_short_name; private final UnicodeMap map_joining_group; private final UnicodeMap map_joining_type; @@ -40,41 +46,50 @@ public class AttributeResolver { private final UnicodeMap map_script; private final UnicodeMap map_script_extensions; private final UnicodeMap map_sentence_break; - private final UnicodeMap map_vertical_orientation; + private final UnicodeMap + map_vertical_orientation; private final UnicodeMap map_word_break; private final HashMap> map_NameAlias; - //If there is a change in any of these properties between two adjacent characters, it will result in a new range. + // If there is a change in any of these properties between two adjacent characters, it will + // result in a new range. private final UcdProperty[] rangeDefiningProperties = { - UcdProperty.Age, - UcdProperty.Bidi_Class, - UcdProperty.Block, - UcdProperty.Decomposition_Mapping, - UcdProperty.Numeric_Type, - UcdProperty.Numeric_Value, - UcdProperty.Vertical_Orientation + UcdProperty.Age, + UcdProperty.Bidi_Class, + UcdProperty.Block, + UcdProperty.Decomposition_Mapping, + UcdProperty.Numeric_Type, + UcdProperty.Numeric_Value, + UcdProperty.Vertical_Orientation }; public AttributeResolver(IndexUnicodeProperties iup) { indexUnicodeProperties = iup; map_age = indexUnicodeProperties.loadEnum(UcdProperty.Age); map_bidi_class = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Class); - map_bidi_paired_bracket_type = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Paired_Bracket_Type); + map_bidi_paired_bracket_type = + indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Paired_Bracket_Type); map_block = indexUnicodeProperties.loadEnum(UcdProperty.Block); - map_canonical_combining_class = indexUnicodeProperties.loadEnum(UcdProperty.Canonical_Combining_Class); + map_canonical_combining_class = + indexUnicodeProperties.loadEnum(UcdProperty.Canonical_Combining_Class); map_decomposition_type = indexUnicodeProperties.loadEnum(UcdProperty.Decomposition_Type); map_do_not_emit_type = indexUnicodeProperties.loadEnum(UcdProperty.Do_Not_Emit_Type); map_east_asian_width = indexUnicodeProperties.loadEnum(UcdProperty.East_Asian_Width); map_general_category = indexUnicodeProperties.loadEnum(UcdProperty.General_Category); - map_grapheme_cluster_break = indexUnicodeProperties.loadEnum(UcdProperty.Grapheme_Cluster_Break); - map_hangul_syllable_type = indexUnicodeProperties.loadEnum(UcdProperty.Hangul_Syllable_Type); + map_grapheme_cluster_break = + indexUnicodeProperties.loadEnum(UcdProperty.Grapheme_Cluster_Break); + map_hangul_syllable_type = + indexUnicodeProperties.loadEnum(UcdProperty.Hangul_Syllable_Type); map_identifier_status = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Status); map_identifier_type = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Type); map_idn_2008 = indexUnicodeProperties.loadEnum(UcdProperty.Idn_2008); map_idn_status = indexUnicodeProperties.loadEnum(UcdProperty.Idn_Status); - map_indic_conjunct_break = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Conjunct_Break); - map_indic_positional_category = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Positional_Category); - map_indic_syllabic_category = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Syllabic_Category); + map_indic_conjunct_break = + indexUnicodeProperties.loadEnum(UcdProperty.Indic_Conjunct_Break); + map_indic_positional_category = + indexUnicodeProperties.loadEnum(UcdProperty.Indic_Positional_Category); + map_indic_syllabic_category = + indexUnicodeProperties.loadEnum(UcdProperty.Indic_Syllabic_Category); map_jamo_short_name = indexUnicodeProperties.loadEnum(UcdProperty.Jamo_Short_Name); map_joining_group = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Group); map_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Type); @@ -86,14 +101,17 @@ public AttributeResolver(IndexUnicodeProperties iup) { map_numeric_type = indexUnicodeProperties.loadEnum(UcdProperty.Numeric_Type); map_other_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Other_Joining_Type); map_script = indexUnicodeProperties.loadEnum(UcdProperty.Script); - map_script_extensions = indexUnicodeProperties.getProperty(UcdProperty.Script_Extensions).getUnicodeMap(); + map_script_extensions = + indexUnicodeProperties.getProperty(UcdProperty.Script_Extensions).getUnicodeMap(); map_sentence_break = indexUnicodeProperties.loadEnum(UcdProperty.Sentence_Break); - map_vertical_orientation = indexUnicodeProperties.loadEnum(UcdProperty.Vertical_Orientation); + map_vertical_orientation = + indexUnicodeProperties.loadEnum(UcdProperty.Vertical_Orientation); map_word_break = indexUnicodeProperties.loadEnum(UcdProperty.Word_Break); - //UCD code is only set up to read a single Alias value from NameAliases.txt - //Instead, we'll load the Alias and the Type data as part of the constructor. We'll keep in memory as it - //NameAliases isn't too large. + // UCD code is only set up to read a single Alias value from NameAliases.txt + // Instead, we'll load the Alias and the Type data as part of the constructor. We'll keep in + // memory as it + // NameAliases isn't too large. map_NameAlias = loadNameAliases(); } @@ -132,7 +150,6 @@ public String getAlias() { public AliasType getType() { return type; } - } private static class NameAliasComparator implements java.util.Comparator { @@ -145,7 +162,8 @@ public int compare(NameAlias o1, NameAlias o2) { private HashMap> loadNameAliases() { HashMap> nameAliasesByCodepoint = new HashMap<>(); - final PropertyParsingInfo fileInfo = PropertyParsingInfo.getPropertyInfo(UcdProperty.Name_Alias); + final PropertyParsingInfo fileInfo = + PropertyParsingInfo.getPropertyInfo(UcdProperty.Name_Alias); String fullFilename = fileInfo.getFullFileName(indexUnicodeProperties.getUcdVersion()); UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); NameAliasComparator nameAliasComparator = new NameAliasComparator(); @@ -153,11 +171,12 @@ private HashMap> loadNameAliases() { for (UcdLineParser.UcdLine line : parser) { String[] parts = line.getParts(); int codepoint = Integer.parseInt(parts[0], 16); - NameAlias nameAlias = new NameAlias( - parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); + NameAlias nameAlias = + new NameAlias(parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); if (nameAliasesByCodepoint.containsKey(codepoint)) { - LinkedList nameAliases = new LinkedList<>(nameAliasesByCodepoint.get(codepoint)); + LinkedList nameAliases = + new LinkedList<>(nameAliasesByCodepoint.get(codepoint)); nameAliases.add(nameAlias); nameAliases.sort(nameAliasComparator); nameAliasesByCodepoint.replace(codepoint, nameAliases); @@ -186,15 +205,21 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { String EqUIdeo = getMappingValue(codepoint, resolvedValue, false, ""); return (EqUIdeo.equals("#")) ? null : EqUIdeo; case kCompatibilityVariant: - String kCompatibilityVariant = getMappingValue(codepoint, resolvedValue, false, "U+"); + String kCompatibilityVariant = + getMappingValue(codepoint, resolvedValue, false, "U+"); return (kCompatibilityVariant.equals("#")) ? "" : kCompatibilityVariant; case kSimplifiedVariant: case kTraditionalVariant: - String kVariant = getMappingValue(codepoint, resolvedValue, isUnihanAttributeRange(codepoint) - , "U+"); + String kVariant = + getMappingValue( + codepoint, + resolvedValue, + isUnihanAttributeRange(codepoint), + "U+"); return (kVariant.equals("#")) ? "" : kVariant; case Bidi_Mirroring_Glyph: - //Returning empty string for bmg to maintain compatibility with older generated files. + // Returning empty string for bmg to maintain compatibility with older + // generated files. String bmg = getMappingValue(codepoint, resolvedValue, false, ""); return (bmg.equals("#")) ? "" : bmg; default: @@ -203,25 +228,30 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { case Miscellaneous: switch (prop) { case Jamo_Short_Name: - //return map_jamo_short_name.get(codepoint).getShortName(); + // return map_jamo_short_name.get(codepoint).getShortName(); return Optional.ofNullable(resolvedValue).orElse(""); case Name: - if (resolvedValue != null && resolvedValue.startsWith("CJK UNIFIED IDEOGRAPH-")) { + if (resolvedValue != null + && resolvedValue.startsWith("CJK UNIFIED IDEOGRAPH-")) { return "CJK UNIFIED IDEOGRAPH-#"; } - if (resolvedValue != null && resolvedValue.startsWith("CJK COMPATIBILITY IDEOGRAPH-")) { + if (resolvedValue != null + && resolvedValue.startsWith("CJK COMPATIBILITY IDEOGRAPH-")) { return "CJK COMPATIBILITY IDEOGRAPH-#"; } - if (resolvedValue != null && resolvedValue.startsWith("TANGUT IDEOGRAPH-")) { + if (resolvedValue != null + && resolvedValue.startsWith("TANGUT IDEOGRAPH-")) { return "TANGUT IDEOGRAPH-#"; } - if (resolvedValue != null && resolvedValue.startsWith("KHITAN SMALL SCRIPT CHARACTER-")) { + if (resolvedValue != null + && resolvedValue.startsWith("KHITAN SMALL SCRIPT CHARACTER-")) { return "KHITAN SMALL SCRIPT CHARACTER-#"; } if (resolvedValue != null && resolvedValue.startsWith("NUSHU CHARACTER-")) { return "NUSHU CHARACTER-#"; } - if (resolvedValue != null && resolvedValue.startsWith("EGYPTIAN HIEROGLYPH-")) { + if (resolvedValue != null + && resolvedValue.startsWith("EGYPTIAN HIEROGLYPH-")) { return "EGYPTIAN HIEROGLYPH-#"; } return Optional.ofNullable(resolvedValue).orElse(""); @@ -246,7 +276,9 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { StringBuilder extensionBuilder = new StringBuilder(); String[] extensions = map_script_extensions.get(codepoint).split("\\|", 0); for (String extension : extensions) { - extensionBuilder.append(UcdPropertyValues.Script_Values.valueOf(extension).getShortName()); + extensionBuilder.append( + UcdPropertyValues.Script_Values.valueOf(extension) + .getShortName()); extensionBuilder.append(" "); } return extensionBuilder.toString().trim(); @@ -262,8 +294,12 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { case Canonical_Combining_Class: return map_canonical_combining_class.get(codepoint).getShortName(); case Decomposition_Type: - //Returning lower case to maintain compatibility with older generated files. - return map_decomposition_type.get(codepoint).getShortName().toLowerCase(Locale.ROOT); + // Returning lower case to maintain compatibility with older generated + // files. + return map_decomposition_type + .get(codepoint) + .getShortName() + .toLowerCase(Locale.ROOT); case Do_Not_Emit_Type: return map_do_not_emit_type.get(codepoint).getShortName(); case East_Asian_Width: @@ -315,26 +351,27 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { default: throw new RuntimeException("Missing Enumerated case"); } - case Binary: { - switch (resolvedValue) { - // Seems overkill to get this from UcdPropertyValues.Binary - case "No": - return "N"; - case "Yes": - return "Y"; - default: - throw new RuntimeException("Unexpected Binary value"); + case Binary: + { + switch (resolvedValue) { + // Seems overkill to get this from UcdPropertyValues.Binary + case "No": + return "N"; + case "Yes": + return "Y"; + default: + throw new RuntimeException("Unexpected Binary value"); + } } - } default: throw new RuntimeException("Missing PropertyType case"); } } public boolean isUnassignedCodepoint(int codepoint) { - return UcdPropertyValues.General_Category_Values.Unassigned.equals(getgc(codepoint)) || - UcdPropertyValues.General_Category_Values.Private_Use.equals(getgc(codepoint)) || - UcdPropertyValues.General_Category_Values.Surrogate.equals(getgc(codepoint)); + return UcdPropertyValues.General_Category_Values.Unassigned.equals(getgc(codepoint)) + || UcdPropertyValues.General_Category_Values.Private_Use.equals(getgc(codepoint)) + || UcdPropertyValues.General_Category_Values.Surrogate.equals(getgc(codepoint)); } public UcdPropertyValues.General_Category_Values getgc(int codepoint) { @@ -357,12 +394,15 @@ public HashMap getNameAliases(int codepoint) { return null; } - private String getMappingValue(int codepoint, String resolvedValue, boolean ignoreUnihanRange, String prefix) { + private String getMappingValue( + int codepoint, String resolvedValue, boolean ignoreUnihanRange, String prefix) { if (null == resolvedValue) { return "#"; } int[] resolvedValueInts = resolvedValue.codePoints().toArray(); - if (resolvedValueInts.length == 1 && resolvedValueInts[0] == codepoint && !ignoreUnihanRange) { + if (resolvedValueInts.length == 1 + && resolvedValueInts[0] == codepoint + && !ignoreUnihanRange) { return "#"; } StringBuilder sb = new StringBuilder(); @@ -375,14 +415,18 @@ private String getMappingValue(int codepoint, String resolvedValue, boolean igno public boolean isDifferentRange(int codepointA, int codepointB) { boolean isDifference = false; for (UcdProperty property : rangeDefiningProperties) { - isDifference = isDifference || - !getAttributeValue(property, codepointA).equals(getAttributeValue(property, codepointB)); + isDifference = + isDifference + || !getAttributeValue(property, codepointA) + .equals(getAttributeValue(property, codepointB)); } return isDifference; } private static String getCPString(int codepoint) { - return String.format("%4s", Integer.toHexString(codepoint)).replace(" ", "0").toUpperCase(Locale.ROOT); + return String.format("%4s", Integer.toHexString(codepoint)) + .replace(" ", "0") + .toUpperCase(Locale.ROOT); } public String getHexString(int codepoint) { @@ -390,12 +434,12 @@ public String getHexString(int codepoint) { } public boolean isUnihanAttributeRange(int codepoint) { - return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") || - !getAttributeValue(UcdProperty.kCompatibilityVariant, codepoint).isEmpty(); + return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") + || !getAttributeValue(UcdProperty.kCompatibilityVariant, codepoint).isEmpty(); } public boolean isUnifiedIdeograph(int codepoint) { - return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") && - getAttributeValue(UcdProperty.Name, codepoint).equals("CJK UNIFIED IDEOGRAPH-#"); + return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") + && getAttributeValue(UcdProperty.Name, codepoint).equals("CJK UNIFIED IDEOGRAPH-#"); } } diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java index 1baa4131b..f4b407106 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java @@ -1,16 +1,14 @@ package org.unicode.xml; import com.ibm.icu.util.VersionInfo; +import java.util.*; import org.unicode.cldr.draft.FileUtilities; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.PropertyParsingInfo; import org.unicode.props.UcdLineParser; -import org.unicode.props.UcdProperty; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; -import java.util.*; - public class UCDDataResolver { private final IndexUnicodeProperties indexUnicodeProperties; @@ -30,24 +28,31 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep String childTag = ucdSection.getChildTag(); boolean parserWithRange = ucdSection.getParserWithRange(); boolean parserWithMissing = ucdSection.getParserWithMissing(); - UcdSectionComponent[] ucdSectionComponents = ucdSection.getUcdSectionDetail().getUcdSectionComponents(); + UcdSectionComponent[] ucdSectionComponents = + ucdSection.getUcdSectionDetail().getUcdSectionComponents(); if (isCompatibleVersion(minVersion, maxVersion)) { writer.startElement(tag); { for (UcdSectionComponent ucdSectionComponent : ucdSectionComponents) { - if (isCompatibleVersion(ucdSectionComponent.getMinVersion(), ucdSectionComponent.getMaxVersion())) { + if (isCompatibleVersion( + ucdSectionComponent.getMinVersion(), + ucdSectionComponent.getMaxVersion())) { final PropertyParsingInfo fileInfoEVS = - PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty()); - String fullFilename = fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion()); - UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); + PropertyParsingInfo.getPropertyInfo( + ucdSectionComponent.getUcdProperty()); + String fullFilename = + fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion()); + UcdLineParser parser = + new UcdLineParser(FileUtilities.in("", fullFilename)); parser.withRange(parserWithRange); parser.withMissing(parserWithMissing); switch (ucdSection) { case BLOCKS: for (UcdLineParser.UcdLine line : parser) { if (!line.getOriginalLine().startsWith("#")) { - AttributesImpl attributes = getBlockAttributes(namespace, line); + AttributesImpl attributes = + getBlockAttributes(namespace, line); writer.startElement(childTag, attributes); { writer.endElement(childTag); @@ -64,8 +69,9 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep List names = new ArrayList<>(namedSequences.keySet()); Collections.sort(names); for (String name : names) { - AttributesImpl attributes = getNamedSequenceAttributes(namespace, name, - namedSequences); + AttributesImpl attributes = + getNamedSequenceAttributes( + namespace, name, namedSequences); writer.startElement(childTag, attributes); { writer.endElement(childTag); @@ -74,7 +80,8 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep break; default: for (UcdLineParser.UcdLine line : parser) { - AttributesImpl attributes = getAttributes(ucdSection, namespace, line); + AttributesImpl attributes = + getAttributes(ucdSection, namespace, line); writer.startElement(childTag, attributes); { writer.endElement(childTag); @@ -88,8 +95,8 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep } } - private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace, - UcdLineParser.UcdLine line) { + private AttributesImpl getAttributes( + UcdSectionDetail.UcdSection ucdSection, String namespace, UcdLineParser.UcdLine line) { switch (ucdSection) { case CJKRADICALS: return getCJKRadicalAttributes(namespace, line); @@ -102,7 +109,8 @@ private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, Str case STANDARDIZEDVARIANTS: return getSVAttributes(namespace, line); default: - throw new IllegalArgumentException("getAttributes failed on an unexpected UcdSection"); + throw new IllegalArgumentException( + "getAttributes failed on an unexpected UcdSection"); } } @@ -110,92 +118,74 @@ private static AttributesImpl getBlockAttributes(String namespace, UcdLineParser String[] parts = line.getParts(); String[] range = parts[0].split("\\.\\."); AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - namespace, "first-cp", "first-cp", "CDATA", range[0]); - attributes.addAttribute( - namespace, "last-cp", "last-cp", "CDATA", range[1]); - attributes.addAttribute( - namespace, "name", "name", "CDATA", parts[1]); + attributes.addAttribute(namespace, "first-cp", "first-cp", "CDATA", range[0]); + attributes.addAttribute(namespace, "last-cp", "last-cp", "CDATA", range[1]); + attributes.addAttribute(namespace, "name", "name", "CDATA", parts[1]); return attributes; } - private static AttributesImpl getCJKRadicalAttributes(String namespace, UcdLineParser.UcdLine line) { + private static AttributesImpl getCJKRadicalAttributes( + String namespace, UcdLineParser.UcdLine line) { String[] parts = line.getParts(); AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - namespace, "number", "number", "CDATA", parts[0]); - attributes.addAttribute( - namespace, "radical", "radical", "CDATA", parts[1]); - attributes.addAttribute( - namespace, "ideograph", "ideograph", "CDATA", parts[2]); + attributes.addAttribute(namespace, "number", "number", "CDATA", parts[0]); + attributes.addAttribute(namespace, "radical", "radical", "CDATA", parts[1]); + attributes.addAttribute(namespace, "ideograph", "ideograph", "CDATA", parts[2]); return attributes; } - private static AttributesImpl getDoNotEmitAttributes(String namespace, UcdLineParser.UcdLine line) { + private static AttributesImpl getDoNotEmitAttributes( + String namespace, UcdLineParser.UcdLine line) { String[] parts = line.getParts(); AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - namespace, "of", "of", "CDATA", parts[0]); - attributes.addAttribute( - namespace, "use", "use", "CDATA", parts[1]); - attributes.addAttribute( - namespace, "because", "because", "CDATA", parts[2]); + attributes.addAttribute(namespace, "of", "of", "CDATA", parts[0]); + attributes.addAttribute(namespace, "use", "use", "CDATA", parts[1]); + attributes.addAttribute(namespace, "because", "because", "CDATA", parts[2]); return attributes; } - private static AttributesImpl getEmojiSourceAttributes(String namespace, UcdLineParser.UcdLine line) { + private static AttributesImpl getEmojiSourceAttributes( + String namespace, UcdLineParser.UcdLine line) { String[] parts = line.getParts(); AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - namespace, "unicode", "unicode", "CDATA", parts[0]); - attributes.addAttribute( - namespace, "docomo", "docomo", "CDATA", parts[1]); - attributes.addAttribute( - namespace, "kddi", "kddi", "CDATA", parts[2]); - attributes.addAttribute( - namespace, "softbank", "softbank", "CDATA", parts[3]); + attributes.addAttribute(namespace, "unicode", "unicode", "CDATA", parts[0]); + attributes.addAttribute(namespace, "docomo", "docomo", "CDATA", parts[1]); + attributes.addAttribute(namespace, "kddi", "kddi", "CDATA", parts[2]); + attributes.addAttribute(namespace, "softbank", "softbank", "CDATA", parts[3]); return attributes; } - private static AttributesImpl getNamedSequenceAttributes(String namespace, String name, - HashMap namedSequences) { + private static AttributesImpl getNamedSequenceAttributes( + String namespace, String name, HashMap namedSequences) { AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - namespace, "name", "name", "CDATA", name); - attributes.addAttribute( - namespace, "cps", "cps", "CDATA", namedSequences.get(name)); + attributes.addAttribute(namespace, "name", "name", "CDATA", name); + attributes.addAttribute(namespace, "cps", "cps", "CDATA", namedSequences.get(name)); return attributes; } private static AttributesImpl getNCAttributes(String namespace, UcdLineParser.UcdLine line) { String[] parts = line.getParts(); AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute( - namespace, "cp", "cp", "CDATA", parts[0]); - attributes.addAttribute( - namespace, "old", "old", "CDATA", parts[1]); - attributes.addAttribute( - namespace, "new", "new", "CDATA", parts[2]); - attributes.addAttribute( - namespace, "version", "version", "CDATA", parts[3]); + attributes.addAttribute(namespace, "cp", "cp", "CDATA", parts[0]); + attributes.addAttribute(namespace, "old", "old", "CDATA", parts[1]); + attributes.addAttribute(namespace, "new", "new", "CDATA", parts[2]); + attributes.addAttribute(namespace, "version", "version", "CDATA", parts[3]); return attributes; } private static AttributesImpl getSVAttributes(String namespace, UcdLineParser.UcdLine line) { String[] parts = line.getParts(); AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute(namespace, "cps", "cps", "CDATA", parts[0]); + attributes.addAttribute(namespace, "desc", "desc", "CDATA", parts[1]); attributes.addAttribute( - namespace, "cps", "cps", "CDATA", parts[0]); - attributes.addAttribute( - namespace, "desc", "desc", "CDATA", parts[1]); - attributes.addAttribute( - namespace, "when", "when", "CDATA", - parts[2] != null ? parts[2] : ""); + namespace, "when", "when", "CDATA", parts[2] != null ? parts[2] : ""); return attributes; } private boolean isCompatibleVersion(VersionInfo minVersion, VersionInfo maxVersion) { - return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0 && ( - maxVersion == null || indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0)); + return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0 + && (maxVersion == null + || indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0)); } } diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java index 27d88a766..ff31e69c6 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java +++ b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java @@ -1,9 +1,6 @@ package org.unicode.xml; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.AttributesImpl; - +import java.io.FileOutputStream; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; @@ -11,12 +8,12 @@ import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.sax.TransformerHandler; import javax.xml.transform.stream.StreamResult; -import java.io.FileOutputStream; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; public class UCDXMLWriter { - public static final String NAMESPACE - = "http://www.unicode.org/ns/2003/ucd/1.0"; + public static final String NAMESPACE = "http://www.unicode.org/ns/2003/ucd/1.0"; private final TransformerHandler transformerHandler; @@ -27,53 +24,51 @@ public TransformerHandler getTransformerHandler() { public UCDXMLWriter(FileOutputStream f) throws TransformerConfigurationException { TransformerFactory tfactory = TransformerFactory.newInstance(); SAXTransformerFactory sfactory = (SAXTransformerFactory) tfactory; - transformerHandler = sfactory.newTransformerHandler (); - Transformer transformer = transformerHandler.getTransformer (); + transformerHandler = sfactory.newTransformerHandler(); + Transformer transformer = transformerHandler.getTransformer(); transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.STANDALONE, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "3"); - transformerHandler.setResult (new StreamResult(f)); + transformerHandler.setResult(new StreamResult(f)); } public void startFile() throws SAXException { - transformerHandler.startDocument (); - char[] c = "\n".toCharArray (); - transformerHandler.characters (c, 0, c.length); - //TODO: JRW change hardcoded 2023 to current year. - c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray (); - transformerHandler.comment (c, 0, c.length); - c = "\n".toCharArray (); - transformerHandler.characters (c, 0, c.length); - c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray (); - transformerHandler.comment (c, 0, c.length); - c = "\n\n\n".toCharArray (); - transformerHandler.characters (c, 0, c.length); - + transformerHandler.startDocument(); + char[] c = "\n".toCharArray(); + transformerHandler.characters(c, 0, c.length); + // TODO: JRW change hardcoded 2023 to current year. + c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray(); + transformerHandler.comment(c, 0, c.length); + c = "\n".toCharArray(); + transformerHandler.characters(c, 0, c.length); + c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray(); + transformerHandler.comment(c, 0, c.length); + c = "\n\n\n".toCharArray(); + transformerHandler.characters(c, 0, c.length); } + public void endFile() throws SAXException { - transformerHandler.endDocument (); + transformerHandler.endDocument(); } public void startElement(String tagName) throws SAXException { - AttributesImpl attributes = new AttributesImpl (); + AttributesImpl attributes = new AttributesImpl(); startElement(tagName, attributes); } public void startElement(String tagName, AttributesImpl attributes) throws SAXException { - transformerHandler.startElement (NAMESPACE, tagName, tagName, attributes); + transformerHandler.startElement(NAMESPACE, tagName, tagName, attributes); } public void addContent(String s) throws SAXException { - char[] d = s.toCharArray (); - transformerHandler.characters (d, 0, d.length); + char[] d = s.toCharArray(); + transformerHandler.characters(d, 0, d.length); } public void endElement(String tagName) throws SAXException { - transformerHandler.endElement (NAMESPACE, tagName, tagName); + transformerHandler.endElement(NAMESPACE, tagName, tagName); } } - - diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java index 594a6f67b..57989d255 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java @@ -1,821 +1,2236 @@ package org.unicode.xml; import com.ibm.icu.util.VersionInfo; -import org.unicode.props.UcdProperty; - import java.util.LinkedHashSet; import java.util.Set; +import org.unicode.props.UcdProperty; public class UcdPropertyDetail { - static private LinkedHashSet basePropertyDetails = new LinkedHashSet(); - static private LinkedHashSet cjkPropertyDetails = new LinkedHashSet(); - static private LinkedHashSet ucdxmlPropertyDetails = new LinkedHashSet(); - static private LinkedHashSet allPropertyDetails = new LinkedHashSet(); + private static LinkedHashSet basePropertyDetails = + new LinkedHashSet(); + private static LinkedHashSet cjkPropertyDetails = + new LinkedHashSet(); + private static LinkedHashSet ucdxmlPropertyDetails = + new LinkedHashSet(); + private static LinkedHashSet allPropertyDetails = + new LinkedHashSet(); - public static UcdPropertyDetail Age_Detail = new UcdPropertyDetail( - UcdProperty.Age, VersionInfo.getInstance(1, 1, 0), 1, - true, false, false, true); - public static UcdPropertyDetail Name_Detail = new UcdPropertyDetail( - UcdProperty.Name, VersionInfo.getInstance(1, 1, 0), 2, - true, false, false, true); - public static UcdPropertyDetail Jamo_Short_Name_Detail = new UcdPropertyDetail( - UcdProperty.Jamo_Short_Name, VersionInfo.getInstance(1, 1, 0), 3, - true, false, false, true); - public static UcdPropertyDetail General_Category_Detail = new UcdPropertyDetail( - UcdProperty.General_Category, VersionInfo.getInstance(1, 1, 0), 4, - true, false, false, true); - public static UcdPropertyDetail Canonical_Combining_Class_Detail = new UcdPropertyDetail( - UcdProperty.Canonical_Combining_Class, VersionInfo.getInstance(1, 1, 0), 5, - true, false, false, true); - public static UcdPropertyDetail Decomposition_Type_Detail = new UcdPropertyDetail( - UcdProperty.Decomposition_Type, VersionInfo.getInstance(1, 1, 0), 6, - true, false, false, true); - public static UcdPropertyDetail Decomposition_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Decomposition_Mapping, VersionInfo.getInstance(1, 1, 0), 7, - true, false, false, true); - public static UcdPropertyDetail Numeric_Type_Detail = new UcdPropertyDetail( - UcdProperty.Numeric_Type, VersionInfo.getInstance(1, 1, 0), 8, - true, false, false, true); - public static UcdPropertyDetail Numeric_Value_Detail = new UcdPropertyDetail( - UcdProperty.Numeric_Value, VersionInfo.getInstance(1, 1, 0), 9, - true, false, false, true); - public static UcdPropertyDetail Bidi_Class_Detail = new UcdPropertyDetail( - UcdProperty.Bidi_Class, VersionInfo.getInstance(1, 1, 0), 10, - true, false, false, true); - public static UcdPropertyDetail Bidi_Paired_Bracket_Type_Detail = new UcdPropertyDetail( - UcdProperty.Bidi_Paired_Bracket_Type, VersionInfo.getInstance(6, 3, 0), 11, - true, false, false, true); - public static UcdPropertyDetail Bidi_Paired_Bracket_Detail = new UcdPropertyDetail( - UcdProperty.Bidi_Paired_Bracket, VersionInfo.getInstance(6, 3, 0), 12, - true, false, false, true); - public static UcdPropertyDetail Bidi_Mirrored_Detail = new UcdPropertyDetail( - UcdProperty.Bidi_Mirrored, VersionInfo.getInstance(1, 1, 0), 13, - true, false, false, true); - public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = new UcdPropertyDetail( - UcdProperty.Bidi_Mirroring_Glyph, VersionInfo.getInstance(1, 1, 0), 14, - true, false, false, true); - public static UcdPropertyDetail Simple_Uppercase_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Simple_Uppercase_Mapping, VersionInfo.getInstance(1, 1, 0), 15, - true, false, false, true); - public static UcdPropertyDetail Simple_Lowercase_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Simple_Lowercase_Mapping, VersionInfo.getInstance(1, 1, 0), 16, - true, false, false, true); - public static UcdPropertyDetail Simple_Titlecase_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Simple_Titlecase_Mapping, VersionInfo.getInstance(1, 1, 0), 17, - true, false, false, true); - public static UcdPropertyDetail Uppercase_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Uppercase_Mapping, VersionInfo.getInstance(1, 1, 0), 18, - true, false, false, true); - public static UcdPropertyDetail Lowercase_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Lowercase_Mapping, VersionInfo.getInstance(1, 1, 0), 19, - true, false, false, true); - public static UcdPropertyDetail Titlecase_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Titlecase_Mapping, VersionInfo.getInstance(1, 1, 0), 20, - true, false, false, true); - // public static UcdPropertyDetail Special_Case_Condition_Detail = new UcdPropertyDetail ( -// UcdProperty.Special_Case_Condition, VersionInfo.getInstance(1,1,0), 21, -// true, false, false, true); - public static UcdPropertyDetail Simple_Case_Folding_Detail = new UcdPropertyDetail( - UcdProperty.Simple_Case_Folding, VersionInfo.getInstance(1, 1, 0), 22, - true, false, false, true); - public static UcdPropertyDetail Case_Folding_Detail = new UcdPropertyDetail( - UcdProperty.Case_Folding, VersionInfo.getInstance(1, 1, 0), 23, - true, false, false, true); - public static UcdPropertyDetail Joining_Type_Detail = new UcdPropertyDetail( - UcdProperty.Joining_Type, VersionInfo.getInstance(1, 1, 0), 24, - true, false, false, true); - public static UcdPropertyDetail Joining_Group_Detail = new UcdPropertyDetail( - UcdProperty.Joining_Group, VersionInfo.getInstance(1, 1, 0), 25, - true, false, false, true); - public static UcdPropertyDetail East_Asian_Width_Detail = new UcdPropertyDetail( - UcdProperty.East_Asian_Width, VersionInfo.getInstance(1, 1, 0), 26, - true, false, false, true); - public static UcdPropertyDetail Line_Break_Detail = new UcdPropertyDetail( - UcdProperty.Line_Break, VersionInfo.getInstance(1, 1, 0), 27, - true, false, false, true); - public static UcdPropertyDetail Script_Detail = new UcdPropertyDetail( - UcdProperty.Script, VersionInfo.getInstance(1, 1, 0), 28, - true, false, false, true); - public static UcdPropertyDetail Script_Extensions_Detail = new UcdPropertyDetail( - UcdProperty.Script_Extensions, VersionInfo.getInstance(6, 1, 0), 29, - true, false, false, true); - public static UcdPropertyDetail Dash_Detail = new UcdPropertyDetail( - UcdProperty.Dash, VersionInfo.getInstance(1, 1, 0), 30, - true, false, false, true); - public static UcdPropertyDetail White_Space_Detail = new UcdPropertyDetail( - UcdProperty.White_Space, VersionInfo.getInstance(1, 1, 0), 31, - true, false, false, true); - public static UcdPropertyDetail Hyphen_Detail = new UcdPropertyDetail( - UcdProperty.Hyphen, VersionInfo.getInstance(1, 1, 0), 32, - true, false, false, true); - public static UcdPropertyDetail Quotation_Mark_Detail = new UcdPropertyDetail( - UcdProperty.Quotation_Mark, VersionInfo.getInstance(1, 1, 0), 33, - true, false, false, true); - public static UcdPropertyDetail Radical_Detail = new UcdPropertyDetail( - UcdProperty.Radical, VersionInfo.getInstance(1, 1, 0), 34, - true, false, false, true); - public static UcdPropertyDetail Ideographic_Detail = new UcdPropertyDetail( - UcdProperty.Ideographic, VersionInfo.getInstance(1, 1, 0), 35, - true, false, false, true); - public static UcdPropertyDetail Unified_Ideograph_Detail = new UcdPropertyDetail( - UcdProperty.Unified_Ideograph, VersionInfo.getInstance(1, 1, 0), 36, - true, false, false, true); - public static UcdPropertyDetail IDS_Binary_Operator_Detail = new UcdPropertyDetail( - UcdProperty.IDS_Binary_Operator, VersionInfo.getInstance(1, 1, 0), 37, - true, false, false, true); - public static UcdPropertyDetail IDS_Trinary_Operator_Detail = new UcdPropertyDetail( - UcdProperty.IDS_Trinary_Operator, VersionInfo.getInstance(1, 1, 0), 38, - true, false, false, true); - public static UcdPropertyDetail Hangul_Syllable_Type_Detail = new UcdPropertyDetail( - UcdProperty.Hangul_Syllable_Type, VersionInfo.getInstance(1, 1, 0), 39, - true, false, false, true); - public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail( - UcdProperty.Default_Ignorable_Code_Point, VersionInfo.getInstance(1, 1, 0), 40, - true, false, false, true); - public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail( - UcdProperty.Other_Default_Ignorable_Code_Point, VersionInfo.getInstance(1, 1, 0), 41, - true, false, false, true); - public static UcdPropertyDetail Alphabetic_Detail = new UcdPropertyDetail( - UcdProperty.Alphabetic, VersionInfo.getInstance(1, 1, 0), 42, - true, false, false, true); - public static UcdPropertyDetail Other_Alphabetic_Detail = new UcdPropertyDetail( - UcdProperty.Other_Alphabetic, VersionInfo.getInstance(1, 1, 0), 43, - true, false, false, true); - public static UcdPropertyDetail Uppercase_Detail = new UcdPropertyDetail( - UcdProperty.Uppercase, VersionInfo.getInstance(1, 1, 0), 44, - true, false, false, true); - public static UcdPropertyDetail Other_Uppercase_Detail = new UcdPropertyDetail( - UcdProperty.Other_Uppercase, VersionInfo.getInstance(1, 1, 0), 45, - true, false, false, true); - public static UcdPropertyDetail Lowercase_Detail = new UcdPropertyDetail( - UcdProperty.Lowercase, VersionInfo.getInstance(1, 1, 0), 46, - true, false, false, true); - public static UcdPropertyDetail Other_Lowercase_Detail = new UcdPropertyDetail( - UcdProperty.Other_Lowercase, VersionInfo.getInstance(1, 1, 0), 47, - true, false, false, true); - public static UcdPropertyDetail Math_Detail = new UcdPropertyDetail( - UcdProperty.Math, VersionInfo.getInstance(1, 1, 0), 48, - true, false, false, true); - public static UcdPropertyDetail Other_Math_Detail = new UcdPropertyDetail( - UcdProperty.Other_Math, VersionInfo.getInstance(1, 1, 0), 49, - true, false, false, true); - public static UcdPropertyDetail Hex_Digit_Detail = new UcdPropertyDetail( - UcdProperty.Hex_Digit, VersionInfo.getInstance(1, 1, 0), 50, - true, false, false, true); - public static UcdPropertyDetail ASCII_Hex_Digit_Detail = new UcdPropertyDetail( - UcdProperty.ASCII_Hex_Digit, VersionInfo.getInstance(1, 1, 0), 51, - true, false, false, true); - public static UcdPropertyDetail Noncharacter_Code_Point_Detail = new UcdPropertyDetail( - UcdProperty.Noncharacter_Code_Point, VersionInfo.getInstance(1, 1, 0), 52, - true, false, false, true); - public static UcdPropertyDetail Variation_Selector_Detail = new UcdPropertyDetail( - UcdProperty.Variation_Selector, VersionInfo.getInstance(1, 1, 0), 53, - true, false, false, true); - public static UcdPropertyDetail Bidi_Control_Detail = new UcdPropertyDetail( - UcdProperty.Bidi_Control, VersionInfo.getInstance(1, 1, 0), 54, - true, false, false, true); - public static UcdPropertyDetail Join_Control_Detail = new UcdPropertyDetail( - UcdProperty.Join_Control, VersionInfo.getInstance(1, 1, 0), 55, - true, false, false, true); - public static UcdPropertyDetail Grapheme_Base_Detail = new UcdPropertyDetail( - UcdProperty.Grapheme_Base, VersionInfo.getInstance(1, 1, 0), 56, - true, false, false, true); - public static UcdPropertyDetail Grapheme_Extend_Detail = new UcdPropertyDetail( - UcdProperty.Grapheme_Extend, VersionInfo.getInstance(1, 1, 0), 57, - true, false, false, true); - public static UcdPropertyDetail Other_Grapheme_Extend_Detail = new UcdPropertyDetail( - UcdProperty.Other_Grapheme_Extend, VersionInfo.getInstance(1, 1, 0), 58, - true, false, false, true); - public static UcdPropertyDetail Grapheme_Link_Detail = new UcdPropertyDetail( - UcdProperty.Grapheme_Link, VersionInfo.getInstance(1, 1, 0), 59, - true, false, false, true); - public static UcdPropertyDetail Sentence_Terminal_Detail = new UcdPropertyDetail( - UcdProperty.Sentence_Terminal, VersionInfo.getInstance(1, 1, 0), 60, - true, false, false, true); - public static UcdPropertyDetail Extender_Detail = new UcdPropertyDetail( - UcdProperty.Extender, VersionInfo.getInstance(1, 1, 0), 61, - true, false, false, true); - public static UcdPropertyDetail Terminal_Punctuation_Detail = new UcdPropertyDetail( - UcdProperty.Terminal_Punctuation, VersionInfo.getInstance(1, 1, 0), 62, - true, false, false, true); - public static UcdPropertyDetail Diacritic_Detail = new UcdPropertyDetail( - UcdProperty.Diacritic, VersionInfo.getInstance(1, 1, 0), 63, - true, false, false, true); - public static UcdPropertyDetail Deprecated_Detail = new UcdPropertyDetail( - UcdProperty.Deprecated, VersionInfo.getInstance(1, 1, 0), 64, - true, false, false, true); - public static UcdPropertyDetail ID_Start_Detail = new UcdPropertyDetail( - UcdProperty.ID_Start, VersionInfo.getInstance(1, 1, 0), 65, - true, false, false, true); - public static UcdPropertyDetail Other_ID_Start_Detail = new UcdPropertyDetail( - UcdProperty.Other_ID_Start, VersionInfo.getInstance(1, 1, 0), 66, - true, false, false, true); - public static UcdPropertyDetail XID_Start_Detail = new UcdPropertyDetail( - UcdProperty.XID_Start, VersionInfo.getInstance(1, 1, 0), 67, - true, false, false, true); - public static UcdPropertyDetail ID_Continue_Detail = new UcdPropertyDetail( - UcdProperty.ID_Continue, VersionInfo.getInstance(1, 1, 0), 68, - true, false, false, true); - public static UcdPropertyDetail Other_ID_Continue_Detail = new UcdPropertyDetail( - UcdProperty.Other_ID_Continue, VersionInfo.getInstance(1, 1, 0), 69, - true, false, false, true); - public static UcdPropertyDetail XID_Continue_Detail = new UcdPropertyDetail( - UcdProperty.XID_Continue, VersionInfo.getInstance(1, 1, 0), 70, - true, false, false, true); - public static UcdPropertyDetail Soft_Dotted_Detail = new UcdPropertyDetail( - UcdProperty.Soft_Dotted, VersionInfo.getInstance(1, 1, 0), 71, - true, false, false, true); - public static UcdPropertyDetail Logical_Order_Exception_Detail = new UcdPropertyDetail( - UcdProperty.Logical_Order_Exception, VersionInfo.getInstance(1, 1, 0), 72, - true, false, false, true); - public static UcdPropertyDetail Pattern_White_Space_Detail = new UcdPropertyDetail( - UcdProperty.Pattern_White_Space, VersionInfo.getInstance(1, 1, 0), 73, - true, false, false, true); - public static UcdPropertyDetail Pattern_Syntax_Detail = new UcdPropertyDetail( - UcdProperty.Pattern_Syntax, VersionInfo.getInstance(1, 1, 0), 74, - true, false, false, true); - public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = new UcdPropertyDetail( - UcdProperty.Grapheme_Cluster_Break, VersionInfo.getInstance(1, 1, 0), 75, - true, false, false, true); - public static UcdPropertyDetail Word_Break_Detail = new UcdPropertyDetail( - UcdProperty.Word_Break, VersionInfo.getInstance(1, 1, 0), 76, - true, false, false, true); - public static UcdPropertyDetail Sentence_Break_Detail = new UcdPropertyDetail( - UcdProperty.Sentence_Break, VersionInfo.getInstance(1, 1, 0), 77, - true, false, false, true); - public static UcdPropertyDetail Composition_Exclusion_Detail = new UcdPropertyDetail( - UcdProperty.Composition_Exclusion, VersionInfo.getInstance(1, 1, 0), 78, - true, false, false, true); - public static UcdPropertyDetail Full_Composition_Exclusion_Detail = new UcdPropertyDetail( - UcdProperty.Full_Composition_Exclusion, VersionInfo.getInstance(1, 1, 0), 79, - true, false, false, true); - public static UcdPropertyDetail NFC_Quick_Check_Detail = new UcdPropertyDetail( - UcdProperty.NFC_Quick_Check, VersionInfo.getInstance(1, 1, 0), 80, - true, false, false, true); - public static UcdPropertyDetail NFD_Quick_Check_Detail = new UcdPropertyDetail( - UcdProperty.NFD_Quick_Check, VersionInfo.getInstance(1, 1, 0), 81, - true, false, false, true); - public static UcdPropertyDetail NFKC_Quick_Check_Detail = new UcdPropertyDetail( - UcdProperty.NFKC_Quick_Check, VersionInfo.getInstance(1, 1, 0), 82, - true, false, false, true); - public static UcdPropertyDetail NFKD_Quick_Check_Detail = new UcdPropertyDetail( - UcdProperty.NFKD_Quick_Check, VersionInfo.getInstance(1, 1, 0), 83, - true, false, false, true); - public static UcdPropertyDetail Expands_On_NFC_Detail = new UcdPropertyDetail( - UcdProperty.Expands_On_NFC, VersionInfo.getInstance(1, 1, 0), 84, - true, false, false, true); - public static UcdPropertyDetail Expands_On_NFD_Detail = new UcdPropertyDetail( - UcdProperty.Expands_On_NFD, VersionInfo.getInstance(1, 1, 0), 85, - true, false, false, true); - public static UcdPropertyDetail Expands_On_NFKC_Detail = new UcdPropertyDetail( - UcdProperty.Expands_On_NFKC, VersionInfo.getInstance(1, 1, 0), 86, - true, false, false, true); - public static UcdPropertyDetail Expands_On_NFKD_Detail = new UcdPropertyDetail( - UcdProperty.Expands_On_NFKD, VersionInfo.getInstance(1, 1, 0), 87, - true, false, false, true); - public static UcdPropertyDetail FC_NFC_Closure_Detail = new UcdPropertyDetail( - UcdProperty.FC_NFKC_Closure, VersionInfo.getInstance(1, 1, 0), 88, - true, false, false, true); - public static UcdPropertyDetail Case_Ignorable_Detail = new UcdPropertyDetail( - UcdProperty.Case_Ignorable, VersionInfo.getInstance(5, 2, 0), 89, - true, false, false, true); - public static UcdPropertyDetail Cased_Detail = new UcdPropertyDetail( - UcdProperty.Cased, VersionInfo.getInstance(5, 2, 0), 90, - true, false, false, true); - public static UcdPropertyDetail Changes_When_CaseFolded_Detail = new UcdPropertyDetail( - UcdProperty.Changes_When_Casefolded, VersionInfo.getInstance(5, 2, 0), 91, - true, false, false, true); - public static UcdPropertyDetail Changes_When_CaseMapped_Detail = new UcdPropertyDetail( - UcdProperty.Changes_When_Casemapped, VersionInfo.getInstance(5, 2, 0), 92, - true, false, false, true); - public static UcdPropertyDetail Changes_When_NFKC_Casefolded_Detail = new UcdPropertyDetail( - UcdProperty.Changes_When_NFKC_Casefolded, VersionInfo.getInstance(5, 2, 0), 93, - true, false, false, true); - public static UcdPropertyDetail Changes_When_Lowercased_Detail = new UcdPropertyDetail( - UcdProperty.Changes_When_Lowercased, VersionInfo.getInstance(5, 2, 0), 94, - true, false, false, true); - public static UcdPropertyDetail Changes_When_Titlecased_Detail = new UcdPropertyDetail( - UcdProperty.Changes_When_Titlecased, VersionInfo.getInstance(5, 2, 0), 95, - true, false, false, true); - public static UcdPropertyDetail Changes_When_Uppercased_Detail = new UcdPropertyDetail( - UcdProperty.Changes_When_Uppercased, VersionInfo.getInstance(5, 2, 0), 96, - true, false, false, true); - public static UcdPropertyDetail NFKC_Casefold_Detail = new UcdPropertyDetail( - UcdProperty.NFKC_Casefold, VersionInfo.getInstance(5, 2, 0), 97, - true, false, false, true); - public static UcdPropertyDetail Indic_Syllabic_Category_Detail = new UcdPropertyDetail( - UcdProperty.Indic_Syllabic_Category, VersionInfo.getInstance(6, 0, 0), 98, - true, false, false, true); + public static UcdPropertyDetail Age_Detail = + new UcdPropertyDetail( + UcdProperty.Age, VersionInfo.getInstance(1, 1, 0), 1, true, false, false, true); + public static UcdPropertyDetail Name_Detail = + new UcdPropertyDetail( + UcdProperty.Name, + VersionInfo.getInstance(1, 1, 0), + 2, + true, + false, + false, + true); + public static UcdPropertyDetail Jamo_Short_Name_Detail = + new UcdPropertyDetail( + UcdProperty.Jamo_Short_Name, + VersionInfo.getInstance(1, 1, 0), + 3, + true, + false, + false, + true); + public static UcdPropertyDetail General_Category_Detail = + new UcdPropertyDetail( + UcdProperty.General_Category, + VersionInfo.getInstance(1, 1, 0), + 4, + true, + false, + false, + true); + public static UcdPropertyDetail Canonical_Combining_Class_Detail = + new UcdPropertyDetail( + UcdProperty.Canonical_Combining_Class, + VersionInfo.getInstance(1, 1, 0), + 5, + true, + false, + false, + true); + public static UcdPropertyDetail Decomposition_Type_Detail = + new UcdPropertyDetail( + UcdProperty.Decomposition_Type, + VersionInfo.getInstance(1, 1, 0), + 6, + true, + false, + false, + true); + public static UcdPropertyDetail Decomposition_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Decomposition_Mapping, + VersionInfo.getInstance(1, 1, 0), + 7, + true, + false, + false, + true); + public static UcdPropertyDetail Numeric_Type_Detail = + new UcdPropertyDetail( + UcdProperty.Numeric_Type, + VersionInfo.getInstance(1, 1, 0), + 8, + true, + false, + false, + true); + public static UcdPropertyDetail Numeric_Value_Detail = + new UcdPropertyDetail( + UcdProperty.Numeric_Value, + VersionInfo.getInstance(1, 1, 0), + 9, + true, + false, + false, + true); + public static UcdPropertyDetail Bidi_Class_Detail = + new UcdPropertyDetail( + UcdProperty.Bidi_Class, + VersionInfo.getInstance(1, 1, 0), + 10, + true, + false, + false, + true); + public static UcdPropertyDetail Bidi_Paired_Bracket_Type_Detail = + new UcdPropertyDetail( + UcdProperty.Bidi_Paired_Bracket_Type, + VersionInfo.getInstance(6, 3, 0), + 11, + true, + false, + false, + true); + public static UcdPropertyDetail Bidi_Paired_Bracket_Detail = + new UcdPropertyDetail( + UcdProperty.Bidi_Paired_Bracket, + VersionInfo.getInstance(6, 3, 0), + 12, + true, + false, + false, + true); + public static UcdPropertyDetail Bidi_Mirrored_Detail = + new UcdPropertyDetail( + UcdProperty.Bidi_Mirrored, + VersionInfo.getInstance(1, 1, 0), + 13, + true, + false, + false, + true); + public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = + new UcdPropertyDetail( + UcdProperty.Bidi_Mirroring_Glyph, + VersionInfo.getInstance(1, 1, 0), + 14, + true, + false, + false, + true); + public static UcdPropertyDetail Simple_Uppercase_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Simple_Uppercase_Mapping, + VersionInfo.getInstance(1, 1, 0), + 15, + true, + false, + false, + true); + public static UcdPropertyDetail Simple_Lowercase_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Simple_Lowercase_Mapping, + VersionInfo.getInstance(1, 1, 0), + 16, + true, + false, + false, + true); + public static UcdPropertyDetail Simple_Titlecase_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Simple_Titlecase_Mapping, + VersionInfo.getInstance(1, 1, 0), + 17, + true, + false, + false, + true); + public static UcdPropertyDetail Uppercase_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Uppercase_Mapping, + VersionInfo.getInstance(1, 1, 0), + 18, + true, + false, + false, + true); + public static UcdPropertyDetail Lowercase_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Lowercase_Mapping, + VersionInfo.getInstance(1, 1, 0), + 19, + true, + false, + false, + true); + public static UcdPropertyDetail Titlecase_Mapping_Detail = + new UcdPropertyDetail( + UcdProperty.Titlecase_Mapping, + VersionInfo.getInstance(1, 1, 0), + 20, + true, + false, + false, + true); + // public static UcdPropertyDetail Special_Case_Condition_Detail = new UcdPropertyDetail + // ( + // UcdProperty.Special_Case_Condition, VersionInfo.getInstance(1,1,0), 21, + // true, false, false, true); + public static UcdPropertyDetail Simple_Case_Folding_Detail = + new UcdPropertyDetail( + UcdProperty.Simple_Case_Folding, + VersionInfo.getInstance(1, 1, 0), + 22, + true, + false, + false, + true); + public static UcdPropertyDetail Case_Folding_Detail = + new UcdPropertyDetail( + UcdProperty.Case_Folding, + VersionInfo.getInstance(1, 1, 0), + 23, + true, + false, + false, + true); + public static UcdPropertyDetail Joining_Type_Detail = + new UcdPropertyDetail( + UcdProperty.Joining_Type, + VersionInfo.getInstance(1, 1, 0), + 24, + true, + false, + false, + true); + public static UcdPropertyDetail Joining_Group_Detail = + new UcdPropertyDetail( + UcdProperty.Joining_Group, + VersionInfo.getInstance(1, 1, 0), + 25, + true, + false, + false, + true); + public static UcdPropertyDetail East_Asian_Width_Detail = + new UcdPropertyDetail( + UcdProperty.East_Asian_Width, + VersionInfo.getInstance(1, 1, 0), + 26, + true, + false, + false, + true); + public static UcdPropertyDetail Line_Break_Detail = + new UcdPropertyDetail( + UcdProperty.Line_Break, + VersionInfo.getInstance(1, 1, 0), + 27, + true, + false, + false, + true); + public static UcdPropertyDetail Script_Detail = + new UcdPropertyDetail( + UcdProperty.Script, + VersionInfo.getInstance(1, 1, 0), + 28, + true, + false, + false, + true); + public static UcdPropertyDetail Script_Extensions_Detail = + new UcdPropertyDetail( + UcdProperty.Script_Extensions, + VersionInfo.getInstance(6, 1, 0), + 29, + true, + false, + false, + true); + public static UcdPropertyDetail Dash_Detail = + new UcdPropertyDetail( + UcdProperty.Dash, + VersionInfo.getInstance(1, 1, 0), + 30, + true, + false, + false, + true); + public static UcdPropertyDetail White_Space_Detail = + new UcdPropertyDetail( + UcdProperty.White_Space, + VersionInfo.getInstance(1, 1, 0), + 31, + true, + false, + false, + true); + public static UcdPropertyDetail Hyphen_Detail = + new UcdPropertyDetail( + UcdProperty.Hyphen, + VersionInfo.getInstance(1, 1, 0), + 32, + true, + false, + false, + true); + public static UcdPropertyDetail Quotation_Mark_Detail = + new UcdPropertyDetail( + UcdProperty.Quotation_Mark, + VersionInfo.getInstance(1, 1, 0), + 33, + true, + false, + false, + true); + public static UcdPropertyDetail Radical_Detail = + new UcdPropertyDetail( + UcdProperty.Radical, + VersionInfo.getInstance(1, 1, 0), + 34, + true, + false, + false, + true); + public static UcdPropertyDetail Ideographic_Detail = + new UcdPropertyDetail( + UcdProperty.Ideographic, + VersionInfo.getInstance(1, 1, 0), + 35, + true, + false, + false, + true); + public static UcdPropertyDetail Unified_Ideograph_Detail = + new UcdPropertyDetail( + UcdProperty.Unified_Ideograph, + VersionInfo.getInstance(1, 1, 0), + 36, + true, + false, + false, + true); + public static UcdPropertyDetail IDS_Binary_Operator_Detail = + new UcdPropertyDetail( + UcdProperty.IDS_Binary_Operator, + VersionInfo.getInstance(1, 1, 0), + 37, + true, + false, + false, + true); + public static UcdPropertyDetail IDS_Trinary_Operator_Detail = + new UcdPropertyDetail( + UcdProperty.IDS_Trinary_Operator, + VersionInfo.getInstance(1, 1, 0), + 38, + true, + false, + false, + true); + public static UcdPropertyDetail Hangul_Syllable_Type_Detail = + new UcdPropertyDetail( + UcdProperty.Hangul_Syllable_Type, + VersionInfo.getInstance(1, 1, 0), + 39, + true, + false, + false, + true); + public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = + new UcdPropertyDetail( + UcdProperty.Default_Ignorable_Code_Point, + VersionInfo.getInstance(1, 1, 0), + 40, + true, + false, + false, + true); + public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = + new UcdPropertyDetail( + UcdProperty.Other_Default_Ignorable_Code_Point, + VersionInfo.getInstance(1, 1, 0), + 41, + true, + false, + false, + true); + public static UcdPropertyDetail Alphabetic_Detail = + new UcdPropertyDetail( + UcdProperty.Alphabetic, + VersionInfo.getInstance(1, 1, 0), + 42, + true, + false, + false, + true); + public static UcdPropertyDetail Other_Alphabetic_Detail = + new UcdPropertyDetail( + UcdProperty.Other_Alphabetic, + VersionInfo.getInstance(1, 1, 0), + 43, + true, + false, + false, + true); + public static UcdPropertyDetail Uppercase_Detail = + new UcdPropertyDetail( + UcdProperty.Uppercase, + VersionInfo.getInstance(1, 1, 0), + 44, + true, + false, + false, + true); + public static UcdPropertyDetail Other_Uppercase_Detail = + new UcdPropertyDetail( + UcdProperty.Other_Uppercase, + VersionInfo.getInstance(1, 1, 0), + 45, + true, + false, + false, + true); + public static UcdPropertyDetail Lowercase_Detail = + new UcdPropertyDetail( + UcdProperty.Lowercase, + VersionInfo.getInstance(1, 1, 0), + 46, + true, + false, + false, + true); + public static UcdPropertyDetail Other_Lowercase_Detail = + new UcdPropertyDetail( + UcdProperty.Other_Lowercase, + VersionInfo.getInstance(1, 1, 0), + 47, + true, + false, + false, + true); + public static UcdPropertyDetail Math_Detail = + new UcdPropertyDetail( + UcdProperty.Math, + VersionInfo.getInstance(1, 1, 0), + 48, + true, + false, + false, + true); + public static UcdPropertyDetail Other_Math_Detail = + new UcdPropertyDetail( + UcdProperty.Other_Math, + VersionInfo.getInstance(1, 1, 0), + 49, + true, + false, + false, + true); + public static UcdPropertyDetail Hex_Digit_Detail = + new UcdPropertyDetail( + UcdProperty.Hex_Digit, + VersionInfo.getInstance(1, 1, 0), + 50, + true, + false, + false, + true); + public static UcdPropertyDetail ASCII_Hex_Digit_Detail = + new UcdPropertyDetail( + UcdProperty.ASCII_Hex_Digit, + VersionInfo.getInstance(1, 1, 0), + 51, + true, + false, + false, + true); + public static UcdPropertyDetail Noncharacter_Code_Point_Detail = + new UcdPropertyDetail( + UcdProperty.Noncharacter_Code_Point, + VersionInfo.getInstance(1, 1, 0), + 52, + true, + false, + false, + true); + public static UcdPropertyDetail Variation_Selector_Detail = + new UcdPropertyDetail( + UcdProperty.Variation_Selector, + VersionInfo.getInstance(1, 1, 0), + 53, + true, + false, + false, + true); + public static UcdPropertyDetail Bidi_Control_Detail = + new UcdPropertyDetail( + UcdProperty.Bidi_Control, + VersionInfo.getInstance(1, 1, 0), + 54, + true, + false, + false, + true); + public static UcdPropertyDetail Join_Control_Detail = + new UcdPropertyDetail( + UcdProperty.Join_Control, + VersionInfo.getInstance(1, 1, 0), + 55, + true, + false, + false, + true); + public static UcdPropertyDetail Grapheme_Base_Detail = + new UcdPropertyDetail( + UcdProperty.Grapheme_Base, + VersionInfo.getInstance(1, 1, 0), + 56, + true, + false, + false, + true); + public static UcdPropertyDetail Grapheme_Extend_Detail = + new UcdPropertyDetail( + UcdProperty.Grapheme_Extend, + VersionInfo.getInstance(1, 1, 0), + 57, + true, + false, + false, + true); + public static UcdPropertyDetail Other_Grapheme_Extend_Detail = + new UcdPropertyDetail( + UcdProperty.Other_Grapheme_Extend, + VersionInfo.getInstance(1, 1, 0), + 58, + true, + false, + false, + true); + public static UcdPropertyDetail Grapheme_Link_Detail = + new UcdPropertyDetail( + UcdProperty.Grapheme_Link, + VersionInfo.getInstance(1, 1, 0), + 59, + true, + false, + false, + true); + public static UcdPropertyDetail Sentence_Terminal_Detail = + new UcdPropertyDetail( + UcdProperty.Sentence_Terminal, + VersionInfo.getInstance(1, 1, 0), + 60, + true, + false, + false, + true); + public static UcdPropertyDetail Extender_Detail = + new UcdPropertyDetail( + UcdProperty.Extender, + VersionInfo.getInstance(1, 1, 0), + 61, + true, + false, + false, + true); + public static UcdPropertyDetail Terminal_Punctuation_Detail = + new UcdPropertyDetail( + UcdProperty.Terminal_Punctuation, + VersionInfo.getInstance(1, 1, 0), + 62, + true, + false, + false, + true); + public static UcdPropertyDetail Diacritic_Detail = + new UcdPropertyDetail( + UcdProperty.Diacritic, + VersionInfo.getInstance(1, 1, 0), + 63, + true, + false, + false, + true); + public static UcdPropertyDetail Deprecated_Detail = + new UcdPropertyDetail( + UcdProperty.Deprecated, + VersionInfo.getInstance(1, 1, 0), + 64, + true, + false, + false, + true); + public static UcdPropertyDetail ID_Start_Detail = + new UcdPropertyDetail( + UcdProperty.ID_Start, + VersionInfo.getInstance(1, 1, 0), + 65, + true, + false, + false, + true); + public static UcdPropertyDetail Other_ID_Start_Detail = + new UcdPropertyDetail( + UcdProperty.Other_ID_Start, + VersionInfo.getInstance(1, 1, 0), + 66, + true, + false, + false, + true); + public static UcdPropertyDetail XID_Start_Detail = + new UcdPropertyDetail( + UcdProperty.XID_Start, + VersionInfo.getInstance(1, 1, 0), + 67, + true, + false, + false, + true); + public static UcdPropertyDetail ID_Continue_Detail = + new UcdPropertyDetail( + UcdProperty.ID_Continue, + VersionInfo.getInstance(1, 1, 0), + 68, + true, + false, + false, + true); + public static UcdPropertyDetail Other_ID_Continue_Detail = + new UcdPropertyDetail( + UcdProperty.Other_ID_Continue, + VersionInfo.getInstance(1, 1, 0), + 69, + true, + false, + false, + true); + public static UcdPropertyDetail XID_Continue_Detail = + new UcdPropertyDetail( + UcdProperty.XID_Continue, + VersionInfo.getInstance(1, 1, 0), + 70, + true, + false, + false, + true); + public static UcdPropertyDetail Soft_Dotted_Detail = + new UcdPropertyDetail( + UcdProperty.Soft_Dotted, + VersionInfo.getInstance(1, 1, 0), + 71, + true, + false, + false, + true); + public static UcdPropertyDetail Logical_Order_Exception_Detail = + new UcdPropertyDetail( + UcdProperty.Logical_Order_Exception, + VersionInfo.getInstance(1, 1, 0), + 72, + true, + false, + false, + true); + public static UcdPropertyDetail Pattern_White_Space_Detail = + new UcdPropertyDetail( + UcdProperty.Pattern_White_Space, + VersionInfo.getInstance(1, 1, 0), + 73, + true, + false, + false, + true); + public static UcdPropertyDetail Pattern_Syntax_Detail = + new UcdPropertyDetail( + UcdProperty.Pattern_Syntax, + VersionInfo.getInstance(1, 1, 0), + 74, + true, + false, + false, + true); + public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = + new UcdPropertyDetail( + UcdProperty.Grapheme_Cluster_Break, + VersionInfo.getInstance(1, 1, 0), + 75, + true, + false, + false, + true); + public static UcdPropertyDetail Word_Break_Detail = + new UcdPropertyDetail( + UcdProperty.Word_Break, + VersionInfo.getInstance(1, 1, 0), + 76, + true, + false, + false, + true); + public static UcdPropertyDetail Sentence_Break_Detail = + new UcdPropertyDetail( + UcdProperty.Sentence_Break, + VersionInfo.getInstance(1, 1, 0), + 77, + true, + false, + false, + true); + public static UcdPropertyDetail Composition_Exclusion_Detail = + new UcdPropertyDetail( + UcdProperty.Composition_Exclusion, + VersionInfo.getInstance(1, 1, 0), + 78, + true, + false, + false, + true); + public static UcdPropertyDetail Full_Composition_Exclusion_Detail = + new UcdPropertyDetail( + UcdProperty.Full_Composition_Exclusion, + VersionInfo.getInstance(1, 1, 0), + 79, + true, + false, + false, + true); + public static UcdPropertyDetail NFC_Quick_Check_Detail = + new UcdPropertyDetail( + UcdProperty.NFC_Quick_Check, + VersionInfo.getInstance(1, 1, 0), + 80, + true, + false, + false, + true); + public static UcdPropertyDetail NFD_Quick_Check_Detail = + new UcdPropertyDetail( + UcdProperty.NFD_Quick_Check, + VersionInfo.getInstance(1, 1, 0), + 81, + true, + false, + false, + true); + public static UcdPropertyDetail NFKC_Quick_Check_Detail = + new UcdPropertyDetail( + UcdProperty.NFKC_Quick_Check, + VersionInfo.getInstance(1, 1, 0), + 82, + true, + false, + false, + true); + public static UcdPropertyDetail NFKD_Quick_Check_Detail = + new UcdPropertyDetail( + UcdProperty.NFKD_Quick_Check, + VersionInfo.getInstance(1, 1, 0), + 83, + true, + false, + false, + true); + public static UcdPropertyDetail Expands_On_NFC_Detail = + new UcdPropertyDetail( + UcdProperty.Expands_On_NFC, + VersionInfo.getInstance(1, 1, 0), + 84, + true, + false, + false, + true); + public static UcdPropertyDetail Expands_On_NFD_Detail = + new UcdPropertyDetail( + UcdProperty.Expands_On_NFD, + VersionInfo.getInstance(1, 1, 0), + 85, + true, + false, + false, + true); + public static UcdPropertyDetail Expands_On_NFKC_Detail = + new UcdPropertyDetail( + UcdProperty.Expands_On_NFKC, + VersionInfo.getInstance(1, 1, 0), + 86, + true, + false, + false, + true); + public static UcdPropertyDetail Expands_On_NFKD_Detail = + new UcdPropertyDetail( + UcdProperty.Expands_On_NFKD, + VersionInfo.getInstance(1, 1, 0), + 87, + true, + false, + false, + true); + public static UcdPropertyDetail FC_NFC_Closure_Detail = + new UcdPropertyDetail( + UcdProperty.FC_NFKC_Closure, + VersionInfo.getInstance(1, 1, 0), + 88, + true, + false, + false, + true); + public static UcdPropertyDetail Case_Ignorable_Detail = + new UcdPropertyDetail( + UcdProperty.Case_Ignorable, + VersionInfo.getInstance(5, 2, 0), + 89, + true, + false, + false, + true); + public static UcdPropertyDetail Cased_Detail = + new UcdPropertyDetail( + UcdProperty.Cased, + VersionInfo.getInstance(5, 2, 0), + 90, + true, + false, + false, + true); + public static UcdPropertyDetail Changes_When_CaseFolded_Detail = + new UcdPropertyDetail( + UcdProperty.Changes_When_Casefolded, + VersionInfo.getInstance(5, 2, 0), + 91, + true, + false, + false, + true); + public static UcdPropertyDetail Changes_When_CaseMapped_Detail = + new UcdPropertyDetail( + UcdProperty.Changes_When_Casemapped, + VersionInfo.getInstance(5, 2, 0), + 92, + true, + false, + false, + true); + public static UcdPropertyDetail Changes_When_NFKC_Casefolded_Detail = + new UcdPropertyDetail( + UcdProperty.Changes_When_NFKC_Casefolded, + VersionInfo.getInstance(5, 2, 0), + 93, + true, + false, + false, + true); + public static UcdPropertyDetail Changes_When_Lowercased_Detail = + new UcdPropertyDetail( + UcdProperty.Changes_When_Lowercased, + VersionInfo.getInstance(5, 2, 0), + 94, + true, + false, + false, + true); + public static UcdPropertyDetail Changes_When_Titlecased_Detail = + new UcdPropertyDetail( + UcdProperty.Changes_When_Titlecased, + VersionInfo.getInstance(5, 2, 0), + 95, + true, + false, + false, + true); + public static UcdPropertyDetail Changes_When_Uppercased_Detail = + new UcdPropertyDetail( + UcdProperty.Changes_When_Uppercased, + VersionInfo.getInstance(5, 2, 0), + 96, + true, + false, + false, + true); + public static UcdPropertyDetail NFKC_Casefold_Detail = + new UcdPropertyDetail( + UcdProperty.NFKC_Casefold, + VersionInfo.getInstance(5, 2, 0), + 97, + true, + false, + false, + true); + public static UcdPropertyDetail Indic_Syllabic_Category_Detail = + new UcdPropertyDetail( + UcdProperty.Indic_Syllabic_Category, + VersionInfo.getInstance(6, 0, 0), + 98, + true, + false, + false, + true); // public static UcdPropertyDetail Indic_Matra_Category_Detail = new UcdPropertyDetail ( -// UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,0,0), VersionInfo.getInstance(7,0,0), 99, -// true, false, false, true); - public static UcdPropertyDetail Indic_Positional_Category_Detail = new UcdPropertyDetail( - UcdProperty.Indic_Positional_Category, VersionInfo.getInstance(8, 0, 0), 100, - true, false, false, true); - public static UcdPropertyDetail kJa_Detail = new UcdPropertyDetail( - UcdProperty.kJa, VersionInfo.getInstance(8, 0, 0), 101, - false, true, false, true); - public static UcdPropertyDetail Prepended_Concatenation_Mark_Detail = new UcdPropertyDetail( - UcdProperty.Prepended_Concatenation_Mark, VersionInfo.getInstance(9, 0, 0), 102, - true, false, false, true); - public static UcdPropertyDetail Vertical_Orientation_Detail = new UcdPropertyDetail( - UcdProperty.Vertical_Orientation, VersionInfo.getInstance(10, 0, 0), 103, - true, false, false, true); - public static UcdPropertyDetail Regional_Indicator_Detail = new UcdPropertyDetail( - UcdProperty.Regional_Indicator, VersionInfo.getInstance(10, 0, 0), 104, - true, false, false, true); - public static UcdPropertyDetail Block_Detail = new UcdPropertyDetail( - UcdProperty.Block, VersionInfo.getInstance(10, 0, 0), 105, - true, false, false, true); - public static UcdPropertyDetail Equivalent_Unified_Ideograph_Detail = new UcdPropertyDetail( - UcdProperty.Equivalent_Unified_Ideograph, VersionInfo.getInstance(11, 0, 0), 106, - false, true, false, true); - public static UcdPropertyDetail kCompatibilityVariant_Detail = new UcdPropertyDetail( - UcdProperty.kCompatibilityVariant, VersionInfo.getInstance(11, 0, 0), 107, - false, true, true, true); - public static UcdPropertyDetail kRSUnicode_Detail = new UcdPropertyDetail( - UcdProperty.kRSUnicode, VersionInfo.getInstance(11, 0, 0), 108, - false, true, false, true); + // UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,0,0), + // VersionInfo.getInstance(7,0,0), 99, + // true, false, false, true); + public static UcdPropertyDetail Indic_Positional_Category_Detail = + new UcdPropertyDetail( + UcdProperty.Indic_Positional_Category, + VersionInfo.getInstance(8, 0, 0), + 100, + true, + false, + false, + true); + public static UcdPropertyDetail kJa_Detail = + new UcdPropertyDetail( + UcdProperty.kJa, + VersionInfo.getInstance(8, 0, 0), + 101, + false, + true, + false, + true); + public static UcdPropertyDetail Prepended_Concatenation_Mark_Detail = + new UcdPropertyDetail( + UcdProperty.Prepended_Concatenation_Mark, + VersionInfo.getInstance(9, 0, 0), + 102, + true, + false, + false, + true); + public static UcdPropertyDetail Vertical_Orientation_Detail = + new UcdPropertyDetail( + UcdProperty.Vertical_Orientation, + VersionInfo.getInstance(10, 0, 0), + 103, + true, + false, + false, + true); + public static UcdPropertyDetail Regional_Indicator_Detail = + new UcdPropertyDetail( + UcdProperty.Regional_Indicator, + VersionInfo.getInstance(10, 0, 0), + 104, + true, + false, + false, + true); + public static UcdPropertyDetail Block_Detail = + new UcdPropertyDetail( + UcdProperty.Block, + VersionInfo.getInstance(10, 0, 0), + 105, + true, + false, + false, + true); + public static UcdPropertyDetail Equivalent_Unified_Ideograph_Detail = + new UcdPropertyDetail( + UcdProperty.Equivalent_Unified_Ideograph, + VersionInfo.getInstance(11, 0, 0), + 106, + false, + true, + false, + true); + public static UcdPropertyDetail kCompatibilityVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kCompatibilityVariant, + VersionInfo.getInstance(11, 0, 0), + 107, + false, + true, + true, + true); + public static UcdPropertyDetail kRSUnicode_Detail = + new UcdPropertyDetail( + UcdProperty.kRSUnicode, + VersionInfo.getInstance(11, 0, 0), + 108, + false, + true, + false, + true); // public static UcdPropertyDetail kIRG_RSIndex_Detail = new UcdPropertyDetail ( -// UcdProperty.kIRG_RSIndex, VersionInfo.getInstance(11,0,0), 109, -// false, true, false, true); - public static UcdPropertyDetail kIRG_GSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_GSource, VersionInfo.getInstance(11, 0, 0), 110, - false, true, true, true); - public static UcdPropertyDetail kIRG_TSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_TSource, VersionInfo.getInstance(11, 0, 0), 111, - false, true, true, true); - public static UcdPropertyDetail kIRG_JSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_JSource, VersionInfo.getInstance(11, 0, 0), 112, - false, true, true, true); - public static UcdPropertyDetail kIRG_KSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_KSource, VersionInfo.getInstance(11, 0, 0), 113, - false, true, true, true); - public static UcdPropertyDetail kIRG_KPSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_KPSource, VersionInfo.getInstance(11, 0, 0), 114, - false, true, true, true); - public static UcdPropertyDetail kIRG_VSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_VSource, VersionInfo.getInstance(11, 0, 0), 115, - false, true, true, true); - public static UcdPropertyDetail kIRG_HSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_HSource, VersionInfo.getInstance(11, 0, 0), 116, - false, true, true, true); - public static UcdPropertyDetail kIRG_USource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_USource, VersionInfo.getInstance(11, 0, 0), 117, - false, true, true, true); - public static UcdPropertyDetail kIRG_MSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_MSource, VersionInfo.getInstance(11, 0, 0), 118, - false, true, true, true); - public static UcdPropertyDetail kIRG_UKSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_UKSource, VersionInfo.getInstance(13, 0, 0), 119, - false, true, true, true); - public static UcdPropertyDetail kIRG_SSource_Detail = new UcdPropertyDetail( - UcdProperty.kIRG_SSource, VersionInfo.getInstance(13, 0, 0), 120, - false, true, true, true); - public static UcdPropertyDetail kIICore_Detail = new UcdPropertyDetail( - UcdProperty.kIICore, VersionInfo.getInstance(11, 0, 0), 121, - false, true, false, true); - public static UcdPropertyDetail kUnihanCore2020_Detail = new UcdPropertyDetail( - UcdProperty.kUnihanCore2020, VersionInfo.getInstance(11, 0, 0), 122, - false, true, false, true); - public static UcdPropertyDetail kGB0_Detail = new UcdPropertyDetail( - UcdProperty.kGB0, VersionInfo.getInstance(11, 0, 0), 123, - false, true, false, true); - public static UcdPropertyDetail kGB1_Detail = new UcdPropertyDetail( - UcdProperty.kGB1, VersionInfo.getInstance(11, 0, 0), 124, - false, true, false, true); - public static UcdPropertyDetail kGB3_Detail = new UcdPropertyDetail( - UcdProperty.kGB3, VersionInfo.getInstance(11, 0, 0), 125, - false, true, false, true); - public static UcdPropertyDetail kGB5_Detail = new UcdPropertyDetail( - UcdProperty.kGB5, VersionInfo.getInstance(11, 0, 0), 126, - false, true, false, true); - public static UcdPropertyDetail kGB7_Detail = new UcdPropertyDetail( - UcdProperty.kGB7, VersionInfo.getInstance(11, 0, 0), 127, - false, true, false, true); - public static UcdPropertyDetail kGB8_Detail = new UcdPropertyDetail( - UcdProperty.kGB8, VersionInfo.getInstance(11, 0, 0), 128, - false, true, false, true); - public static UcdPropertyDetail kCNS1986_Detail = new UcdPropertyDetail( - UcdProperty.kCNS1986, VersionInfo.getInstance(11, 0, 0), 129, - false, true, false, true); - public static UcdPropertyDetail kCNS1992_Detail = new UcdPropertyDetail( - UcdProperty.kCNS1992, VersionInfo.getInstance(11, 0, 0), 130, - false, true, false, true); - public static UcdPropertyDetail kJis0_Detail = new UcdPropertyDetail( - UcdProperty.kJis0, VersionInfo.getInstance(11, 0, 0), 131, - false, true, false, true); - public static UcdPropertyDetail kJis1_Detail = new UcdPropertyDetail( - UcdProperty.kJis1, VersionInfo.getInstance(11, 0, 0), 132, - false, true, false, true); - public static UcdPropertyDetail kJIS0213_Detail = new UcdPropertyDetail( - UcdProperty.kJIS0213, VersionInfo.getInstance(11, 0, 0), 133, - false, true, false, true); - public static UcdPropertyDetail kKSC0_Detail = new UcdPropertyDetail( - UcdProperty.kKSC0, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 134, - false, true, false, true); - public static UcdPropertyDetail kKSC1_Detail = new UcdPropertyDetail( - UcdProperty.kKSC1, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 135, - false, true, false, true); - public static UcdPropertyDetail kKPS0_Detail = new UcdPropertyDetail( - UcdProperty.kKPS0, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 136, - false, true, false, true); - public static UcdPropertyDetail kKPS1_Detail = new UcdPropertyDetail( - UcdProperty.kKPS1, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 137, - false, true, false, true); - public static UcdPropertyDetail kHKSCS_Detail = new UcdPropertyDetail( - UcdProperty.kHKSCS, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 138, - false, true, false, true); - public static UcdPropertyDetail kCantonese_Detail = new UcdPropertyDetail( - UcdProperty.kCantonese, VersionInfo.getInstance(11, 0, 0), 139, - false, true, false, true); - public static UcdPropertyDetail kHangul_Detail = new UcdPropertyDetail( - UcdProperty.kHangul, VersionInfo.getInstance(11, 0, 0), 140, - false, true, false, true); - public static UcdPropertyDetail kDefinition_Detail = new UcdPropertyDetail( - UcdProperty.kDefinition, VersionInfo.getInstance(11, 0, 0), 141, - false, true, false, true); - public static UcdPropertyDetail kHanYu_Detail = new UcdPropertyDetail( - UcdProperty.kHanYu, VersionInfo.getInstance(11, 0, 0), 142, - false, true, false, true); + // UcdProperty.kIRG_RSIndex, VersionInfo.getInstance(11,0,0), 109, + // false, true, false, true); + public static UcdPropertyDetail kIRG_GSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_GSource, + VersionInfo.getInstance(11, 0, 0), + 110, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_TSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_TSource, + VersionInfo.getInstance(11, 0, 0), + 111, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_JSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_JSource, + VersionInfo.getInstance(11, 0, 0), + 112, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_KSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_KSource, + VersionInfo.getInstance(11, 0, 0), + 113, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_KPSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_KPSource, + VersionInfo.getInstance(11, 0, 0), + 114, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_VSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_VSource, + VersionInfo.getInstance(11, 0, 0), + 115, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_HSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_HSource, + VersionInfo.getInstance(11, 0, 0), + 116, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_USource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_USource, + VersionInfo.getInstance(11, 0, 0), + 117, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_MSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_MSource, + VersionInfo.getInstance(11, 0, 0), + 118, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_UKSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_UKSource, + VersionInfo.getInstance(13, 0, 0), + 119, + false, + true, + true, + true); + public static UcdPropertyDetail kIRG_SSource_Detail = + new UcdPropertyDetail( + UcdProperty.kIRG_SSource, + VersionInfo.getInstance(13, 0, 0), + 120, + false, + true, + true, + true); + public static UcdPropertyDetail kIICore_Detail = + new UcdPropertyDetail( + UcdProperty.kIICore, + VersionInfo.getInstance(11, 0, 0), + 121, + false, + true, + false, + true); + public static UcdPropertyDetail kUnihanCore2020_Detail = + new UcdPropertyDetail( + UcdProperty.kUnihanCore2020, + VersionInfo.getInstance(11, 0, 0), + 122, + false, + true, + false, + true); + public static UcdPropertyDetail kGB0_Detail = + new UcdPropertyDetail( + UcdProperty.kGB0, + VersionInfo.getInstance(11, 0, 0), + 123, + false, + true, + false, + true); + public static UcdPropertyDetail kGB1_Detail = + new UcdPropertyDetail( + UcdProperty.kGB1, + VersionInfo.getInstance(11, 0, 0), + 124, + false, + true, + false, + true); + public static UcdPropertyDetail kGB3_Detail = + new UcdPropertyDetail( + UcdProperty.kGB3, + VersionInfo.getInstance(11, 0, 0), + 125, + false, + true, + false, + true); + public static UcdPropertyDetail kGB5_Detail = + new UcdPropertyDetail( + UcdProperty.kGB5, + VersionInfo.getInstance(11, 0, 0), + 126, + false, + true, + false, + true); + public static UcdPropertyDetail kGB7_Detail = + new UcdPropertyDetail( + UcdProperty.kGB7, + VersionInfo.getInstance(11, 0, 0), + 127, + false, + true, + false, + true); + public static UcdPropertyDetail kGB8_Detail = + new UcdPropertyDetail( + UcdProperty.kGB8, + VersionInfo.getInstance(11, 0, 0), + 128, + false, + true, + false, + true); + public static UcdPropertyDetail kCNS1986_Detail = + new UcdPropertyDetail( + UcdProperty.kCNS1986, + VersionInfo.getInstance(11, 0, 0), + 129, + false, + true, + false, + true); + public static UcdPropertyDetail kCNS1992_Detail = + new UcdPropertyDetail( + UcdProperty.kCNS1992, + VersionInfo.getInstance(11, 0, 0), + 130, + false, + true, + false, + true); + public static UcdPropertyDetail kJis0_Detail = + new UcdPropertyDetail( + UcdProperty.kJis0, + VersionInfo.getInstance(11, 0, 0), + 131, + false, + true, + false, + true); + public static UcdPropertyDetail kJis1_Detail = + new UcdPropertyDetail( + UcdProperty.kJis1, + VersionInfo.getInstance(11, 0, 0), + 132, + false, + true, + false, + true); + public static UcdPropertyDetail kJIS0213_Detail = + new UcdPropertyDetail( + UcdProperty.kJIS0213, + VersionInfo.getInstance(11, 0, 0), + 133, + false, + true, + false, + true); + public static UcdPropertyDetail kKSC0_Detail = + new UcdPropertyDetail( + UcdProperty.kKSC0, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 134, + false, + true, + false, + true); + public static UcdPropertyDetail kKSC1_Detail = + new UcdPropertyDetail( + UcdProperty.kKSC1, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 135, + false, + true, + false, + true); + public static UcdPropertyDetail kKPS0_Detail = + new UcdPropertyDetail( + UcdProperty.kKPS0, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 136, + false, + true, + false, + true); + public static UcdPropertyDetail kKPS1_Detail = + new UcdPropertyDetail( + UcdProperty.kKPS1, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 137, + false, + true, + false, + true); + public static UcdPropertyDetail kHKSCS_Detail = + new UcdPropertyDetail( + UcdProperty.kHKSCS, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 138, + false, + true, + false, + true); + public static UcdPropertyDetail kCantonese_Detail = + new UcdPropertyDetail( + UcdProperty.kCantonese, + VersionInfo.getInstance(11, 0, 0), + 139, + false, + true, + false, + true); + public static UcdPropertyDetail kHangul_Detail = + new UcdPropertyDetail( + UcdProperty.kHangul, + VersionInfo.getInstance(11, 0, 0), + 140, + false, + true, + false, + true); + public static UcdPropertyDetail kDefinition_Detail = + new UcdPropertyDetail( + UcdProperty.kDefinition, + VersionInfo.getInstance(11, 0, 0), + 141, + false, + true, + false, + true); + public static UcdPropertyDetail kHanYu_Detail = + new UcdPropertyDetail( + UcdProperty.kHanYu, + VersionInfo.getInstance(11, 0, 0), + 142, + false, + true, + false, + true); // public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( -// UcdProperty.kAlternateHanYu, VersionInfo.getInstance(11,0,0), 143, -// false, true, false, true); - public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail( - UcdProperty.kMandarin, VersionInfo.getInstance(11, 0, 0), 144, - false, true, false, true); - public static UcdPropertyDetail kCihaiT_Detail = new UcdPropertyDetail( - UcdProperty.kCihaiT, VersionInfo.getInstance(11, 0, 0), 145, - false, true, false, true); - public static UcdPropertyDetail kSBGY_Detail = new UcdPropertyDetail( - UcdProperty.kSBGY, VersionInfo.getInstance(11, 0, 0), 146, - false, true, false, true); - public static UcdPropertyDetail kNelson_Detail = new UcdPropertyDetail( - UcdProperty.kNelson, VersionInfo.getInstance(11, 0, 0), 147, - false, true, false, true); - public static UcdPropertyDetail kCowles_Detail = new UcdPropertyDetail( - UcdProperty.kCowles, VersionInfo.getInstance(11, 0, 0), 148, - false, true, false, true); - public static UcdPropertyDetail kMatthews_Detail = new UcdPropertyDetail( - UcdProperty.kMatthews, VersionInfo.getInstance(11, 0, 0), 149, - false, true, false, true); - public static UcdPropertyDetail kOtherNumeric_Detail = new UcdPropertyDetail( - UcdProperty.kOtherNumeric, VersionInfo.getInstance(11, 0, 0), 150, - false, true, false, true); - public static UcdPropertyDetail kPhonetic_Detail = new UcdPropertyDetail( - UcdProperty.kPhonetic, VersionInfo.getInstance(11, 0, 0), 151, - false, true, false, true); - public static UcdPropertyDetail kGSR_Detail = new UcdPropertyDetail( - UcdProperty.kGSR, VersionInfo.getInstance(11, 0, 0), 152, - false, true, false, true); - public static UcdPropertyDetail kFenn_Detail = new UcdPropertyDetail( - UcdProperty.kFenn, VersionInfo.getInstance(11, 0, 0), 153, - false, true, false, true); - public static UcdPropertyDetail kFennIndex_Detail = new UcdPropertyDetail( - UcdProperty.kFennIndex, VersionInfo.getInstance(11, 0, 0), 154, - false, true, false, true); - public static UcdPropertyDetail kKarlgren_Detail = new UcdPropertyDetail( - UcdProperty.kKarlgren, VersionInfo.getInstance(11, 0, 0), 155, - false, true, false, true); - public static UcdPropertyDetail kCangjie_Detail = new UcdPropertyDetail( - UcdProperty.kCangjie, VersionInfo.getInstance(11, 0, 0), 156, - false, true, false, true); - public static UcdPropertyDetail kMeyerWempe_Detail = new UcdPropertyDetail( - UcdProperty.kMeyerWempe, VersionInfo.getInstance(11, 0, 0), 157, - false, true, false, true); - public static UcdPropertyDetail kSimplifiedVariant_Detail = new UcdPropertyDetail( - UcdProperty.kSimplifiedVariant, VersionInfo.getInstance(11, 0, 0), 158, - false, true, false, true); - public static UcdPropertyDetail kTraditionalVariant_Detail = new UcdPropertyDetail( - UcdProperty.kTraditionalVariant, VersionInfo.getInstance(11, 0, 0), 159, - false, true, false, true); - public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = new UcdPropertyDetail( - UcdProperty.kSpecializedSemanticVariant, VersionInfo.getInstance(11, 0, 0), 160, - false, true, false, true); - public static UcdPropertyDetail kSemanticVariant_Detail = new UcdPropertyDetail( - UcdProperty.kSemanticVariant, VersionInfo.getInstance(11, 0, 0), 161, - false, true, false, true); - public static UcdPropertyDetail kVietnamese_Detail = new UcdPropertyDetail( - UcdProperty.kVietnamese, VersionInfo.getInstance(11, 0, 0), 162, - false, true, false, true); - public static UcdPropertyDetail kLau_Detail = new UcdPropertyDetail( - UcdProperty.kLau, VersionInfo.getInstance(11, 0, 0), 163, - false, true, false, true); - public static UcdPropertyDetail kTang_Detail = new UcdPropertyDetail( - UcdProperty.kTang, VersionInfo.getInstance(11, 0, 0), 164, - false, true, false, true); - public static UcdPropertyDetail kZVariant_Detail = new UcdPropertyDetail( - UcdProperty.kZVariant, VersionInfo.getInstance(11, 0, 0), 165, - false, true, false, true); - public static UcdPropertyDetail kJapaneseKun_Detail = new UcdPropertyDetail( - UcdProperty.kJapaneseKun, VersionInfo.getInstance(11, 0, 0), 166, - false, true, false, true); - public static UcdPropertyDetail kJapaneseOn_Detail = new UcdPropertyDetail( - UcdProperty.kJapaneseOn, VersionInfo.getInstance(11, 0, 0), 167, - false, true, false, true); - public static UcdPropertyDetail kKangXi_Detail = new UcdPropertyDetail( - UcdProperty.kKangXi, VersionInfo.getInstance(11, 0, 0), 168, - false, true, false, true); + // UcdProperty.kAlternateHanYu, VersionInfo.getInstance(11,0,0), 143, + // false, true, false, true); + public static UcdPropertyDetail kMandarin_Detail = + new UcdPropertyDetail( + UcdProperty.kMandarin, + VersionInfo.getInstance(11, 0, 0), + 144, + false, + true, + false, + true); + public static UcdPropertyDetail kCihaiT_Detail = + new UcdPropertyDetail( + UcdProperty.kCihaiT, + VersionInfo.getInstance(11, 0, 0), + 145, + false, + true, + false, + true); + public static UcdPropertyDetail kSBGY_Detail = + new UcdPropertyDetail( + UcdProperty.kSBGY, + VersionInfo.getInstance(11, 0, 0), + 146, + false, + true, + false, + true); + public static UcdPropertyDetail kNelson_Detail = + new UcdPropertyDetail( + UcdProperty.kNelson, + VersionInfo.getInstance(11, 0, 0), + 147, + false, + true, + false, + true); + public static UcdPropertyDetail kCowles_Detail = + new UcdPropertyDetail( + UcdProperty.kCowles, + VersionInfo.getInstance(11, 0, 0), + 148, + false, + true, + false, + true); + public static UcdPropertyDetail kMatthews_Detail = + new UcdPropertyDetail( + UcdProperty.kMatthews, + VersionInfo.getInstance(11, 0, 0), + 149, + false, + true, + false, + true); + public static UcdPropertyDetail kOtherNumeric_Detail = + new UcdPropertyDetail( + UcdProperty.kOtherNumeric, + VersionInfo.getInstance(11, 0, 0), + 150, + false, + true, + false, + true); + public static UcdPropertyDetail kPhonetic_Detail = + new UcdPropertyDetail( + UcdProperty.kPhonetic, + VersionInfo.getInstance(11, 0, 0), + 151, + false, + true, + false, + true); + public static UcdPropertyDetail kGSR_Detail = + new UcdPropertyDetail( + UcdProperty.kGSR, + VersionInfo.getInstance(11, 0, 0), + 152, + false, + true, + false, + true); + public static UcdPropertyDetail kFenn_Detail = + new UcdPropertyDetail( + UcdProperty.kFenn, + VersionInfo.getInstance(11, 0, 0), + 153, + false, + true, + false, + true); + public static UcdPropertyDetail kFennIndex_Detail = + new UcdPropertyDetail( + UcdProperty.kFennIndex, + VersionInfo.getInstance(11, 0, 0), + 154, + false, + true, + false, + true); + public static UcdPropertyDetail kKarlgren_Detail = + new UcdPropertyDetail( + UcdProperty.kKarlgren, + VersionInfo.getInstance(11, 0, 0), + 155, + false, + true, + false, + true); + public static UcdPropertyDetail kCangjie_Detail = + new UcdPropertyDetail( + UcdProperty.kCangjie, + VersionInfo.getInstance(11, 0, 0), + 156, + false, + true, + false, + true); + public static UcdPropertyDetail kMeyerWempe_Detail = + new UcdPropertyDetail( + UcdProperty.kMeyerWempe, + VersionInfo.getInstance(11, 0, 0), + 157, + false, + true, + false, + true); + public static UcdPropertyDetail kSimplifiedVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kSimplifiedVariant, + VersionInfo.getInstance(11, 0, 0), + 158, + false, + true, + false, + true); + public static UcdPropertyDetail kTraditionalVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kTraditionalVariant, + VersionInfo.getInstance(11, 0, 0), + 159, + false, + true, + false, + true); + public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kSpecializedSemanticVariant, + VersionInfo.getInstance(11, 0, 0), + 160, + false, + true, + false, + true); + public static UcdPropertyDetail kSemanticVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kSemanticVariant, + VersionInfo.getInstance(11, 0, 0), + 161, + false, + true, + false, + true); + public static UcdPropertyDetail kVietnamese_Detail = + new UcdPropertyDetail( + UcdProperty.kVietnamese, + VersionInfo.getInstance(11, 0, 0), + 162, + false, + true, + false, + true); + public static UcdPropertyDetail kLau_Detail = + new UcdPropertyDetail( + UcdProperty.kLau, + VersionInfo.getInstance(11, 0, 0), + 163, + false, + true, + false, + true); + public static UcdPropertyDetail kTang_Detail = + new UcdPropertyDetail( + UcdProperty.kTang, + VersionInfo.getInstance(11, 0, 0), + 164, + false, + true, + false, + true); + public static UcdPropertyDetail kZVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kZVariant, + VersionInfo.getInstance(11, 0, 0), + 165, + false, + true, + false, + true); + public static UcdPropertyDetail kJapaneseKun_Detail = + new UcdPropertyDetail( + UcdProperty.kJapaneseKun, + VersionInfo.getInstance(11, 0, 0), + 166, + false, + true, + false, + true); + public static UcdPropertyDetail kJapaneseOn_Detail = + new UcdPropertyDetail( + UcdProperty.kJapaneseOn, + VersionInfo.getInstance(11, 0, 0), + 167, + false, + true, + false, + true); + public static UcdPropertyDetail kKangXi_Detail = + new UcdPropertyDetail( + UcdProperty.kKangXi, + VersionInfo.getInstance(11, 0, 0), + 168, + false, + true, + false, + true); // public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( -// UcdProperty.kAlternateKangXi, VersionInfo.getInstance(11,0,0), 169, -// false, true, false, true); - public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail( - UcdProperty.kBigFive, VersionInfo.getInstance(11, 0, 0), 170, - false, true, false, true); - public static UcdPropertyDetail kCCCII_Detail = new UcdPropertyDetail( - UcdProperty.kCCCII, VersionInfo.getInstance(11, 0, 0), 171, - false, true, false, true); - public static UcdPropertyDetail kDaeJaweon_Detail = new UcdPropertyDetail( - UcdProperty.kDaeJaweon, VersionInfo.getInstance(11, 0, 0), 172, - false, true, false, true); - public static UcdPropertyDetail kEACC_Detail = new UcdPropertyDetail( - UcdProperty.kEACC, VersionInfo.getInstance(11, 0, 0), 173, - false, true, false, true); - public static UcdPropertyDetail kFrequency_Detail = new UcdPropertyDetail( - UcdProperty.kFrequency, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(16, 0, 0), 174, - false, true, false, true); - public static UcdPropertyDetail kGradeLevel_Detail = new UcdPropertyDetail( - UcdProperty.kGradeLevel, VersionInfo.getInstance(11, 0, 0), 175, - false, true, false, true); - public static UcdPropertyDetail kHDZRadBreak_Detail = new UcdPropertyDetail( - UcdProperty.kHDZRadBreak, VersionInfo.getInstance(11, 0, 0), 176, - false, true, false, true); - public static UcdPropertyDetail kHKGlyph_Detail = new UcdPropertyDetail( - UcdProperty.kHKGlyph, VersionInfo.getInstance(11, 0, 0), 177, - false, true, false, true); - public static UcdPropertyDetail kHanyuPinlu_Detail = new UcdPropertyDetail( - UcdProperty.kHanyuPinlu, VersionInfo.getInstance(11, 0, 0), 178, - false, true, false, true); - public static UcdPropertyDetail kHanyuPinyin_Detail = new UcdPropertyDetail( - UcdProperty.kHanyuPinyin, VersionInfo.getInstance(11, 0, 0), 179, - false, true, false, true); - public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = new UcdPropertyDetail( - UcdProperty.kIRGHanyuDaZidian, VersionInfo.getInstance(11, 0, 0), 180, - false, true, false, true); - public static UcdPropertyDetail kIRGKangXi_Detail = new UcdPropertyDetail( - UcdProperty.kIRGKangXi, VersionInfo.getInstance(11, 0, 0), 181, - false, true, false, true); - public static UcdPropertyDetail kIRGDaeJaweon_Detail = new UcdPropertyDetail( - UcdProperty.kIRGDaeJaweon, VersionInfo.getInstance(11, 0, 0), 182, - false, true, false, true); - public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = new UcdPropertyDetail( - UcdProperty.kIRGDaiKanwaZiten, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 183, - false, true, false, true); - public static UcdPropertyDetail kKorean_Detail = new UcdPropertyDetail( - UcdProperty.kKorean, VersionInfo.getInstance(11, 0, 0), 184, - false, true, false, true); - public static UcdPropertyDetail kMainlandTelegraph_Detail = new UcdPropertyDetail( - UcdProperty.kMainlandTelegraph, VersionInfo.getInstance(11, 0, 0), 185, - false, true, false, true); - public static UcdPropertyDetail kMorohashi_Detail = new UcdPropertyDetail( - UcdProperty.kMorohashi, VersionInfo.getInstance(11, 0, 0), 186, - false, true, false, true); + // UcdProperty.kAlternateKangXi, VersionInfo.getInstance(11,0,0), 169, + // false, true, false, true); + public static UcdPropertyDetail kBigFive_Detail = + new UcdPropertyDetail( + UcdProperty.kBigFive, + VersionInfo.getInstance(11, 0, 0), + 170, + false, + true, + false, + true); + public static UcdPropertyDetail kCCCII_Detail = + new UcdPropertyDetail( + UcdProperty.kCCCII, + VersionInfo.getInstance(11, 0, 0), + 171, + false, + true, + false, + true); + public static UcdPropertyDetail kDaeJaweon_Detail = + new UcdPropertyDetail( + UcdProperty.kDaeJaweon, + VersionInfo.getInstance(11, 0, 0), + 172, + false, + true, + false, + true); + public static UcdPropertyDetail kEACC_Detail = + new UcdPropertyDetail( + UcdProperty.kEACC, + VersionInfo.getInstance(11, 0, 0), + 173, + false, + true, + false, + true); + public static UcdPropertyDetail kFrequency_Detail = + new UcdPropertyDetail( + UcdProperty.kFrequency, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(16, 0, 0), + 174, + false, + true, + false, + true); + public static UcdPropertyDetail kGradeLevel_Detail = + new UcdPropertyDetail( + UcdProperty.kGradeLevel, + VersionInfo.getInstance(11, 0, 0), + 175, + false, + true, + false, + true); + public static UcdPropertyDetail kHDZRadBreak_Detail = + new UcdPropertyDetail( + UcdProperty.kHDZRadBreak, + VersionInfo.getInstance(11, 0, 0), + 176, + false, + true, + false, + true); + public static UcdPropertyDetail kHKGlyph_Detail = + new UcdPropertyDetail( + UcdProperty.kHKGlyph, + VersionInfo.getInstance(11, 0, 0), + 177, + false, + true, + false, + true); + public static UcdPropertyDetail kHanyuPinlu_Detail = + new UcdPropertyDetail( + UcdProperty.kHanyuPinlu, + VersionInfo.getInstance(11, 0, 0), + 178, + false, + true, + false, + true); + public static UcdPropertyDetail kHanyuPinyin_Detail = + new UcdPropertyDetail( + UcdProperty.kHanyuPinyin, + VersionInfo.getInstance(11, 0, 0), + 179, + false, + true, + false, + true); + public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = + new UcdPropertyDetail( + UcdProperty.kIRGHanyuDaZidian, + VersionInfo.getInstance(11, 0, 0), + 180, + false, + true, + false, + true); + public static UcdPropertyDetail kIRGKangXi_Detail = + new UcdPropertyDetail( + UcdProperty.kIRGKangXi, + VersionInfo.getInstance(11, 0, 0), + 181, + false, + true, + false, + true); + public static UcdPropertyDetail kIRGDaeJaweon_Detail = + new UcdPropertyDetail( + UcdProperty.kIRGDaeJaweon, + VersionInfo.getInstance(11, 0, 0), + 182, + false, + true, + false, + true); + public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = + new UcdPropertyDetail( + UcdProperty.kIRGDaiKanwaZiten, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 183, + false, + true, + false, + true); + public static UcdPropertyDetail kKorean_Detail = + new UcdPropertyDetail( + UcdProperty.kKorean, + VersionInfo.getInstance(11, 0, 0), + 184, + false, + true, + false, + true); + public static UcdPropertyDetail kMainlandTelegraph_Detail = + new UcdPropertyDetail( + UcdProperty.kMainlandTelegraph, + VersionInfo.getInstance(11, 0, 0), + 185, + false, + true, + false, + true); + public static UcdPropertyDetail kMorohashi_Detail = + new UcdPropertyDetail( + UcdProperty.kMorohashi, + VersionInfo.getInstance(11, 0, 0), + 186, + false, + true, + false, + true); // public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( -// UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(11,0,0), 187, -// false, true, false, true); - public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail( - UcdProperty.kPrimaryNumeric, VersionInfo.getInstance(11, 0, 0), 188, - false, true, false, true); - public static UcdPropertyDetail kTaiwanTelegraph_Detail = new UcdPropertyDetail( - UcdProperty.kTaiwanTelegraph, VersionInfo.getInstance(11, 0, 0), 189, - false, true, false, true); - public static UcdPropertyDetail kXerox_Detail = new UcdPropertyDetail( - UcdProperty.kXerox, VersionInfo.getInstance(11, 0, 0), 190, - false, true, false, true); - public static UcdPropertyDetail kPseudoGB1_Detail = new UcdPropertyDetail( - UcdProperty.kPseudoGB1, VersionInfo.getInstance(11, 0, 0), 191, - false, true, false, true); - public static UcdPropertyDetail kIBMJapan_Detail = new UcdPropertyDetail( - UcdProperty.kIBMJapan, VersionInfo.getInstance(11, 0, 0), 192, - false, true, false, true); - public static UcdPropertyDetail kAccountingNumeric_Detail = new UcdPropertyDetail( - UcdProperty.kAccountingNumeric, VersionInfo.getInstance(11, 0, 0), 193, - false, true, false, true); - public static UcdPropertyDetail kCheungBauer_Detail = new UcdPropertyDetail( - UcdProperty.kCheungBauer, VersionInfo.getInstance(11, 0, 0), 194, - false, true, false, true); - public static UcdPropertyDetail kCheungBauerIndex_Detail = new UcdPropertyDetail( - UcdProperty.kCheungBauerIndex, VersionInfo.getInstance(11, 0, 0), 195, - false, true, false, true); - public static UcdPropertyDetail kFourCornerCode_Detail = new UcdPropertyDetail( - UcdProperty.kFourCornerCode, VersionInfo.getInstance(11, 0, 0), 196, - false, true, false, true); + // UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(11,0,0), 187, + // false, true, false, true); + public static UcdPropertyDetail kPrimaryNumeric_Detail = + new UcdPropertyDetail( + UcdProperty.kPrimaryNumeric, + VersionInfo.getInstance(11, 0, 0), + 188, + false, + true, + false, + true); + public static UcdPropertyDetail kTaiwanTelegraph_Detail = + new UcdPropertyDetail( + UcdProperty.kTaiwanTelegraph, + VersionInfo.getInstance(11, 0, 0), + 189, + false, + true, + false, + true); + public static UcdPropertyDetail kXerox_Detail = + new UcdPropertyDetail( + UcdProperty.kXerox, + VersionInfo.getInstance(11, 0, 0), + 190, + false, + true, + false, + true); + public static UcdPropertyDetail kPseudoGB1_Detail = + new UcdPropertyDetail( + UcdProperty.kPseudoGB1, + VersionInfo.getInstance(11, 0, 0), + 191, + false, + true, + false, + true); + public static UcdPropertyDetail kIBMJapan_Detail = + new UcdPropertyDetail( + UcdProperty.kIBMJapan, + VersionInfo.getInstance(11, 0, 0), + 192, + false, + true, + false, + true); + public static UcdPropertyDetail kAccountingNumeric_Detail = + new UcdPropertyDetail( + UcdProperty.kAccountingNumeric, + VersionInfo.getInstance(11, 0, 0), + 193, + false, + true, + false, + true); + public static UcdPropertyDetail kCheungBauer_Detail = + new UcdPropertyDetail( + UcdProperty.kCheungBauer, + VersionInfo.getInstance(11, 0, 0), + 194, + false, + true, + false, + true); + public static UcdPropertyDetail kCheungBauerIndex_Detail = + new UcdPropertyDetail( + UcdProperty.kCheungBauerIndex, + VersionInfo.getInstance(11, 0, 0), + 195, + false, + true, + false, + true); + public static UcdPropertyDetail kFourCornerCode_Detail = + new UcdPropertyDetail( + UcdProperty.kFourCornerCode, + VersionInfo.getInstance(11, 0, 0), + 196, + false, + true, + false, + true); // public static UcdPropertyDetail kWubi_Detail = new UcdPropertyDetail ( -// UcdProperty.kWubi, VersionInfo.getInstance(11,0,0), 197, -// false, true, false, true); - public static UcdPropertyDetail kXHC1983_Detail = new UcdPropertyDetail( - UcdProperty.kXHC1983, VersionInfo.getInstance(11, 0, 0), 198, - false, true, false, true); - public static UcdPropertyDetail kJinmeiyoKanji_Detail = new UcdPropertyDetail( - UcdProperty.kJinmeiyoKanji, VersionInfo.getInstance(11, 0, 0), 199, - false, true, false, true); - public static UcdPropertyDetail kJoyoKanji_Detail = new UcdPropertyDetail( - UcdProperty.kJoyoKanji, VersionInfo.getInstance(11, 0, 0), 200, - false, true, false, true); - public static UcdPropertyDetail kKoreanEducationHanja_Detail = new UcdPropertyDetail( - UcdProperty.kKoreanEducationHanja, VersionInfo.getInstance(11, 0, 0), 201, - false, true, false, true); - public static UcdPropertyDetail kKoreanName_Detail = new UcdPropertyDetail( - UcdProperty.kKoreanName, VersionInfo.getInstance(11, 0, 0), 202, - false, true, false, true); - public static UcdPropertyDetail kTGH_Detail = new UcdPropertyDetail( - UcdProperty.kTGH, VersionInfo.getInstance(11, 0, 0), 203, - false, true, false, true); - public static UcdPropertyDetail kTGHZ2013_Detail = new UcdPropertyDetail( - UcdProperty.kTGHZ2013, VersionInfo.getInstance(11, 0, 0), 204, - false, true, false, true); - public static UcdPropertyDetail kSpoofingVariant_Detail = new UcdPropertyDetail( - UcdProperty.kSpoofingVariant, VersionInfo.getInstance(11, 0, 0), 205, - false, true, false, true); - public static UcdPropertyDetail kRSKanWa_Detail = new UcdPropertyDetail( - UcdProperty.kRSKanWa, VersionInfo.getInstance(11, 0, 0), 206, - false, true, false, true); - public static UcdPropertyDetail kRSJapanese_Detail = new UcdPropertyDetail( - UcdProperty.kRSJapanese, VersionInfo.getInstance(11, 0, 0), 207, - false, true, false, true); - public static UcdPropertyDetail kRSKorean_Detail = new UcdPropertyDetail( - UcdProperty.kRSKorean, VersionInfo.getInstance(11, 0, 0), 208, - false, true, false, true); - public static UcdPropertyDetail kRSKangXi_Detail = new UcdPropertyDetail( - UcdProperty.kRSKangXi, VersionInfo.getInstance(11, 0, 0), - VersionInfo.getInstance(15, 1, 0), 209, - false, true, false, true); - public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = new UcdPropertyDetail( - UcdProperty.kRSAdobe_Japan1_6, VersionInfo.getInstance(11, 0, 0), 210, - false, true, false, true); - public static UcdPropertyDetail kTotalStrokes_Detail = new UcdPropertyDetail( - UcdProperty.kTotalStrokes, VersionInfo.getInstance(11, 0, 0), 211, - false, true, false, true); - public static UcdPropertyDetail kRSTUnicode_Detail = new UcdPropertyDetail( - UcdProperty.kRSTUnicode, VersionInfo.getInstance(9, 0, 0), 212, - false, true, false, true); - public static UcdPropertyDetail kTGT_MergedSrc_Detail = new UcdPropertyDetail( - UcdProperty.kTGT_MergedSrc, VersionInfo.getInstance(9, 0, 0), 213, - false, true, false, true); - public static UcdPropertyDetail kSrc_NushuDuben_Detail = new UcdPropertyDetail( - UcdProperty.kSrc_NushuDuben, VersionInfo.getInstance(10, 0, 0), 214, - false, true, false, true); - public static UcdPropertyDetail kReading_Detail = new UcdPropertyDetail( - UcdProperty.kReading, VersionInfo.getInstance(10, 0, 0), 215, - false, true, false, true); - public static UcdPropertyDetail ISO_Comment_Detail = new UcdPropertyDetail( - UcdProperty.ISO_Comment, VersionInfo.getInstance(11, 0, 0), 216, - true, false, false, true); - public static UcdPropertyDetail Unicode_1_Name_Detail = new UcdPropertyDetail( - UcdProperty.Unicode_1_Name, VersionInfo.getInstance(11, 0, 0), 217, - true, false, false, true); - public static UcdPropertyDetail Name_Alias_Detail = new UcdPropertyDetail( - UcdProperty.Name_Alias, VersionInfo.getInstance(11, 0, 0), 218, - false, false, false, true); - public static UcdPropertyDetail Emoji_Detail = new UcdPropertyDetail( - UcdProperty.Emoji, VersionInfo.getInstance(13, 0, 0), 219, - true, false, false, true); - public static UcdPropertyDetail Emoji_Presentation_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_Presentation, VersionInfo.getInstance(13, 0, 0), 220, - true, false, false, true); - public static UcdPropertyDetail Emoji_Modifier_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_Modifier, VersionInfo.getInstance(13, 0, 0), 221, - true, false, false, true); - public static UcdPropertyDetail Emoji_Modifier_Base_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_Modifier_Base, VersionInfo.getInstance(13, 0, 0), 222, - true, false, false, true); - public static UcdPropertyDetail Emoji_Component_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_Component, VersionInfo.getInstance(13, 0, 0), 223, - true, false, false, true); - public static UcdPropertyDetail Extended_Pictographic_Detail = new UcdPropertyDetail( - UcdProperty.Extended_Pictographic, VersionInfo.getInstance(13, 0, 0), 224, - true, false, false, true); - public static UcdPropertyDetail kStrange_Detail = new UcdPropertyDetail( - UcdProperty.kStrange, VersionInfo.getInstance(14, 0, 0), 225, - false, true, false, true); - public static UcdPropertyDetail kAlternateTotalStrokes_Detail = new UcdPropertyDetail( - UcdProperty.kAlternateTotalStrokes, VersionInfo.getInstance(15, 0, 0), 226, - false, true, false, true); - public static UcdPropertyDetail NFKC_Simple_Casefold_Detail = new UcdPropertyDetail( - UcdProperty.NFKC_Simple_Casefold, VersionInfo.getInstance(15, 1, 0), 227, - true, false, false, true); - public static UcdPropertyDetail ID_Compat_Math_Start_Detail = new UcdPropertyDetail( - UcdProperty.ID_Compat_Math_Start, VersionInfo.getInstance(15, 1, 0), 228, - true, false, false, true); - public static UcdPropertyDetail ID_Compat_Math_Continue_Detail = new UcdPropertyDetail( - UcdProperty.ID_Compat_Math_Continue, VersionInfo.getInstance(15, 1, 0), 229, - true, false, false, true); - public static UcdPropertyDetail IDS_Unary_Operator_Detail = new UcdPropertyDetail( - UcdProperty.IDS_Unary_Operator, VersionInfo.getInstance(15, 1, 0), 230, - true, false, false, true); - public static UcdPropertyDetail kJapanese_Detail = new UcdPropertyDetail( - UcdProperty.kJapanese, VersionInfo.getInstance(15, 1, 0), 231, - false, true, false, true); - public static UcdPropertyDetail kMojiJoho_Detail = new UcdPropertyDetail( - UcdProperty.kMojiJoho, VersionInfo.getInstance(15, 1, 0), 232, - false, true, false, true); - public static UcdPropertyDetail kSMSZD2003Index_Detail = new UcdPropertyDetail( - UcdProperty.kSMSZD2003Index, VersionInfo.getInstance(15, 1, 0), 233, - false, true, false, true); - public static UcdPropertyDetail kSMSZD2003Readings_Detail = new UcdPropertyDetail( - UcdProperty.kSMSZD2003Readings, VersionInfo.getInstance(15, 1, 0), 234, - false, true, false, true); - public static UcdPropertyDetail kVietnameseNumeric_Detail = new UcdPropertyDetail( - UcdProperty.kVietnameseNumeric, VersionInfo.getInstance(15, 1, 0), 235, - false, true, false, true); - public static UcdPropertyDetail kZhuangNumeric_Detail = new UcdPropertyDetail( - UcdProperty.kZhuangNumeric, VersionInfo.getInstance(15, 1, 0), 236, - false, true, false, true); - public static UcdPropertyDetail Indic_Conjunct_Break_Detail = new UcdPropertyDetail( - UcdProperty.Indic_Conjunct_Break, VersionInfo.getInstance(15, 1, 0), 237, - true, false, false, true); - public static UcdPropertyDetail Modifier_Combining_Mark_Detail = new UcdPropertyDetail( - UcdProperty.Modifier_Combining_Mark, VersionInfo.getInstance(16, 0, 0), 238, - true, false, false, true); - public static UcdPropertyDetail kFanqie_Detail = new UcdPropertyDetail( - UcdProperty.kFanqie, VersionInfo.getInstance(16, 0, 0), 239, - false, true, false, true); - public static UcdPropertyDetail kZhuang_Detail = new UcdPropertyDetail( - UcdProperty.kZhuang, VersionInfo.getInstance(16, 0, 0), 240, - false, true, false, true); - public static UcdPropertyDetail Basic_Emoji_Detail = new UcdPropertyDetail( - UcdProperty.Basic_Emoji, -1, - false, false, false, false); - public static UcdPropertyDetail CJK_Radical_Detail = new UcdPropertyDetail( - UcdProperty.CJK_Radical, -2, - false, false, false, false); - public static UcdPropertyDetail Confusable_MA_Detail = new UcdPropertyDetail( - UcdProperty.Confusable_MA, -3, - false, false, false, false); - public static UcdPropertyDetail Confusable_ML_Detail = new UcdPropertyDetail( - UcdProperty.Confusable_ML, -4, - false, false, false, false); - public static UcdPropertyDetail Confusable_SA_Detail = new UcdPropertyDetail( - UcdProperty.Confusable_SA, -5, - false, false, false, false); - public static UcdPropertyDetail Confusable_SL_Detail = new UcdPropertyDetail( - UcdProperty.Confusable_SL, -6, - false, false, false, false); - public static UcdPropertyDetail Do_Not_Emit_Preferred_Detail = new UcdPropertyDetail( - UcdProperty.Do_Not_Emit_Preferred, -7, - false, false, false, false); - public static UcdPropertyDetail Do_Not_Emit_Type_Detail = new UcdPropertyDetail( - UcdProperty.Do_Not_Emit_Type, -8, - false, false, false, false); - public static UcdPropertyDetail Emoji_DCM_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_DCM, VersionInfo.getInstance(6, 0, 0), -9, - false, false, false, false); - public static UcdPropertyDetail Emoji_KDDI_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_KDDI, VersionInfo.getInstance(6, 0, 0), -10, - false, false, false, false); - public static UcdPropertyDetail Emoji_SB_Detail = new UcdPropertyDetail( - UcdProperty.Emoji_SB, VersionInfo.getInstance(6, 0, 0), -11, - false, false, false, false); - public static UcdPropertyDetail Identifier_Status_Detail = new UcdPropertyDetail( - UcdProperty.Identifier_Status, VersionInfo.getInstance(9, 0, 0), -12, - false, false, false, false); - public static UcdPropertyDetail Identifier_Type_Detail = new UcdPropertyDetail( - UcdProperty.Identifier_Type, VersionInfo.getInstance(9, 0, 0), -13, - false, false, false, false); - public static UcdPropertyDetail Idn_2008_Detail = new UcdPropertyDetail( - UcdProperty.Idn_2008, -14, - false, false, false, false); - public static UcdPropertyDetail Idn_Mapping_Detail = new UcdPropertyDetail( - UcdProperty.Idn_Mapping, -15, - false, false, false, false); - public static UcdPropertyDetail Idn_Status_Detail = new UcdPropertyDetail( - UcdProperty.Idn_Status, -16, - false, false, false, false); - public static UcdPropertyDetail Named_Sequences_Detail = new UcdPropertyDetail( - UcdProperty.Named_Sequences, -17, - false, false, false, false); - public static UcdPropertyDetail Named_Sequences_Prov_Detail = new UcdPropertyDetail( - UcdProperty.Named_Sequences_Prov, -18, - false, false, false, false); - public static UcdPropertyDetail Other_Joining_Type_Detail = new UcdPropertyDetail( - UcdProperty.Other_Joining_Type, -19, - false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Flag_Sequence_Detail = new UcdPropertyDetail( - UcdProperty.RGI_Emoji_Flag_Sequence, -20, - false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Keycap_Sequence_Detail = new UcdPropertyDetail( - UcdProperty.RGI_Emoji_Keycap_Sequence, -21, - false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Modifier_Sequence_Detail = new UcdPropertyDetail( - UcdProperty.RGI_Emoji_Modifier_Sequence, -22, - false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Tag_Sequence_Detail = new UcdPropertyDetail( - UcdProperty.RGI_Emoji_Tag_Sequence, -23, - false, false, false, false); - public static UcdPropertyDetail RGI_Emoji_Zwj_Sequence_Detail = new UcdPropertyDetail( - UcdProperty.RGI_Emoji_Zwj_Sequence, -24, - false, false, false, false); - public static UcdPropertyDetail Standardized_Variant_Detail = new UcdPropertyDetail( - UcdProperty.Standardized_Variant, -25, - false, false, false, false); + // UcdProperty.kWubi, VersionInfo.getInstance(11,0,0), 197, + // false, true, false, true); + public static UcdPropertyDetail kXHC1983_Detail = + new UcdPropertyDetail( + UcdProperty.kXHC1983, + VersionInfo.getInstance(11, 0, 0), + 198, + false, + true, + false, + true); + public static UcdPropertyDetail kJinmeiyoKanji_Detail = + new UcdPropertyDetail( + UcdProperty.kJinmeiyoKanji, + VersionInfo.getInstance(11, 0, 0), + 199, + false, + true, + false, + true); + public static UcdPropertyDetail kJoyoKanji_Detail = + new UcdPropertyDetail( + UcdProperty.kJoyoKanji, + VersionInfo.getInstance(11, 0, 0), + 200, + false, + true, + false, + true); + public static UcdPropertyDetail kKoreanEducationHanja_Detail = + new UcdPropertyDetail( + UcdProperty.kKoreanEducationHanja, + VersionInfo.getInstance(11, 0, 0), + 201, + false, + true, + false, + true); + public static UcdPropertyDetail kKoreanName_Detail = + new UcdPropertyDetail( + UcdProperty.kKoreanName, + VersionInfo.getInstance(11, 0, 0), + 202, + false, + true, + false, + true); + public static UcdPropertyDetail kTGH_Detail = + new UcdPropertyDetail( + UcdProperty.kTGH, + VersionInfo.getInstance(11, 0, 0), + 203, + false, + true, + false, + true); + public static UcdPropertyDetail kTGHZ2013_Detail = + new UcdPropertyDetail( + UcdProperty.kTGHZ2013, + VersionInfo.getInstance(11, 0, 0), + 204, + false, + true, + false, + true); + public static UcdPropertyDetail kSpoofingVariant_Detail = + new UcdPropertyDetail( + UcdProperty.kSpoofingVariant, + VersionInfo.getInstance(11, 0, 0), + 205, + false, + true, + false, + true); + public static UcdPropertyDetail kRSKanWa_Detail = + new UcdPropertyDetail( + UcdProperty.kRSKanWa, + VersionInfo.getInstance(11, 0, 0), + 206, + false, + true, + false, + true); + public static UcdPropertyDetail kRSJapanese_Detail = + new UcdPropertyDetail( + UcdProperty.kRSJapanese, + VersionInfo.getInstance(11, 0, 0), + 207, + false, + true, + false, + true); + public static UcdPropertyDetail kRSKorean_Detail = + new UcdPropertyDetail( + UcdProperty.kRSKorean, + VersionInfo.getInstance(11, 0, 0), + 208, + false, + true, + false, + true); + public static UcdPropertyDetail kRSKangXi_Detail = + new UcdPropertyDetail( + UcdProperty.kRSKangXi, + VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(15, 1, 0), + 209, + false, + true, + false, + true); + public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = + new UcdPropertyDetail( + UcdProperty.kRSAdobe_Japan1_6, + VersionInfo.getInstance(11, 0, 0), + 210, + false, + true, + false, + true); + public static UcdPropertyDetail kTotalStrokes_Detail = + new UcdPropertyDetail( + UcdProperty.kTotalStrokes, + VersionInfo.getInstance(11, 0, 0), + 211, + false, + true, + false, + true); + public static UcdPropertyDetail kRSTUnicode_Detail = + new UcdPropertyDetail( + UcdProperty.kRSTUnicode, + VersionInfo.getInstance(9, 0, 0), + 212, + false, + true, + false, + true); + public static UcdPropertyDetail kTGT_MergedSrc_Detail = + new UcdPropertyDetail( + UcdProperty.kTGT_MergedSrc, + VersionInfo.getInstance(9, 0, 0), + 213, + false, + true, + false, + true); + public static UcdPropertyDetail kSrc_NushuDuben_Detail = + new UcdPropertyDetail( + UcdProperty.kSrc_NushuDuben, + VersionInfo.getInstance(10, 0, 0), + 214, + false, + true, + false, + true); + public static UcdPropertyDetail kReading_Detail = + new UcdPropertyDetail( + UcdProperty.kReading, + VersionInfo.getInstance(10, 0, 0), + 215, + false, + true, + false, + true); + public static UcdPropertyDetail ISO_Comment_Detail = + new UcdPropertyDetail( + UcdProperty.ISO_Comment, + VersionInfo.getInstance(11, 0, 0), + 216, + true, + false, + false, + true); + public static UcdPropertyDetail Unicode_1_Name_Detail = + new UcdPropertyDetail( + UcdProperty.Unicode_1_Name, + VersionInfo.getInstance(11, 0, 0), + 217, + true, + false, + false, + true); + public static UcdPropertyDetail Name_Alias_Detail = + new UcdPropertyDetail( + UcdProperty.Name_Alias, + VersionInfo.getInstance(11, 0, 0), + 218, + false, + false, + false, + true); + public static UcdPropertyDetail Emoji_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji, + VersionInfo.getInstance(13, 0, 0), + 219, + true, + false, + false, + true); + public static UcdPropertyDetail Emoji_Presentation_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_Presentation, + VersionInfo.getInstance(13, 0, 0), + 220, + true, + false, + false, + true); + public static UcdPropertyDetail Emoji_Modifier_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_Modifier, + VersionInfo.getInstance(13, 0, 0), + 221, + true, + false, + false, + true); + public static UcdPropertyDetail Emoji_Modifier_Base_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_Modifier_Base, + VersionInfo.getInstance(13, 0, 0), + 222, + true, + false, + false, + true); + public static UcdPropertyDetail Emoji_Component_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_Component, + VersionInfo.getInstance(13, 0, 0), + 223, + true, + false, + false, + true); + public static UcdPropertyDetail Extended_Pictographic_Detail = + new UcdPropertyDetail( + UcdProperty.Extended_Pictographic, + VersionInfo.getInstance(13, 0, 0), + 224, + true, + false, + false, + true); + public static UcdPropertyDetail kStrange_Detail = + new UcdPropertyDetail( + UcdProperty.kStrange, + VersionInfo.getInstance(14, 0, 0), + 225, + false, + true, + false, + true); + public static UcdPropertyDetail kAlternateTotalStrokes_Detail = + new UcdPropertyDetail( + UcdProperty.kAlternateTotalStrokes, + VersionInfo.getInstance(15, 0, 0), + 226, + false, + true, + false, + true); + public static UcdPropertyDetail NFKC_Simple_Casefold_Detail = + new UcdPropertyDetail( + UcdProperty.NFKC_Simple_Casefold, + VersionInfo.getInstance(15, 1, 0), + 227, + true, + false, + false, + true); + public static UcdPropertyDetail ID_Compat_Math_Start_Detail = + new UcdPropertyDetail( + UcdProperty.ID_Compat_Math_Start, + VersionInfo.getInstance(15, 1, 0), + 228, + true, + false, + false, + true); + public static UcdPropertyDetail ID_Compat_Math_Continue_Detail = + new UcdPropertyDetail( + UcdProperty.ID_Compat_Math_Continue, + VersionInfo.getInstance(15, 1, 0), + 229, + true, + false, + false, + true); + public static UcdPropertyDetail IDS_Unary_Operator_Detail = + new UcdPropertyDetail( + UcdProperty.IDS_Unary_Operator, + VersionInfo.getInstance(15, 1, 0), + 230, + true, + false, + false, + true); + public static UcdPropertyDetail kJapanese_Detail = + new UcdPropertyDetail( + UcdProperty.kJapanese, + VersionInfo.getInstance(15, 1, 0), + 231, + false, + true, + false, + true); + public static UcdPropertyDetail kMojiJoho_Detail = + new UcdPropertyDetail( + UcdProperty.kMojiJoho, + VersionInfo.getInstance(15, 1, 0), + 232, + false, + true, + false, + true); + public static UcdPropertyDetail kSMSZD2003Index_Detail = + new UcdPropertyDetail( + UcdProperty.kSMSZD2003Index, + VersionInfo.getInstance(15, 1, 0), + 233, + false, + true, + false, + true); + public static UcdPropertyDetail kSMSZD2003Readings_Detail = + new UcdPropertyDetail( + UcdProperty.kSMSZD2003Readings, + VersionInfo.getInstance(15, 1, 0), + 234, + false, + true, + false, + true); + public static UcdPropertyDetail kVietnameseNumeric_Detail = + new UcdPropertyDetail( + UcdProperty.kVietnameseNumeric, + VersionInfo.getInstance(15, 1, 0), + 235, + false, + true, + false, + true); + public static UcdPropertyDetail kZhuangNumeric_Detail = + new UcdPropertyDetail( + UcdProperty.kZhuangNumeric, + VersionInfo.getInstance(15, 1, 0), + 236, + false, + true, + false, + true); + public static UcdPropertyDetail Indic_Conjunct_Break_Detail = + new UcdPropertyDetail( + UcdProperty.Indic_Conjunct_Break, + VersionInfo.getInstance(15, 1, 0), + 237, + true, + false, + false, + true); + public static UcdPropertyDetail Modifier_Combining_Mark_Detail = + new UcdPropertyDetail( + UcdProperty.Modifier_Combining_Mark, + VersionInfo.getInstance(16, 0, 0), + 238, + true, + false, + false, + true); + public static UcdPropertyDetail kFanqie_Detail = + new UcdPropertyDetail( + UcdProperty.kFanqie, + VersionInfo.getInstance(16, 0, 0), + 239, + false, + true, + false, + true); + public static UcdPropertyDetail kZhuang_Detail = + new UcdPropertyDetail( + UcdProperty.kZhuang, + VersionInfo.getInstance(16, 0, 0), + 240, + false, + true, + false, + true); + public static UcdPropertyDetail Basic_Emoji_Detail = + new UcdPropertyDetail(UcdProperty.Basic_Emoji, -1, false, false, false, false); + public static UcdPropertyDetail CJK_Radical_Detail = + new UcdPropertyDetail(UcdProperty.CJK_Radical, -2, false, false, false, false); + public static UcdPropertyDetail Confusable_MA_Detail = + new UcdPropertyDetail(UcdProperty.Confusable_MA, -3, false, false, false, false); + public static UcdPropertyDetail Confusable_ML_Detail = + new UcdPropertyDetail(UcdProperty.Confusable_ML, -4, false, false, false, false); + public static UcdPropertyDetail Confusable_SA_Detail = + new UcdPropertyDetail(UcdProperty.Confusable_SA, -5, false, false, false, false); + public static UcdPropertyDetail Confusable_SL_Detail = + new UcdPropertyDetail(UcdProperty.Confusable_SL, -6, false, false, false, false); + public static UcdPropertyDetail Do_Not_Emit_Preferred_Detail = + new UcdPropertyDetail( + UcdProperty.Do_Not_Emit_Preferred, -7, false, false, false, false); + public static UcdPropertyDetail Do_Not_Emit_Type_Detail = + new UcdPropertyDetail(UcdProperty.Do_Not_Emit_Type, -8, false, false, false, false); + public static UcdPropertyDetail Emoji_DCM_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_DCM, + VersionInfo.getInstance(6, 0, 0), + -9, + false, + false, + false, + false); + public static UcdPropertyDetail Emoji_KDDI_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_KDDI, + VersionInfo.getInstance(6, 0, 0), + -10, + false, + false, + false, + false); + public static UcdPropertyDetail Emoji_SB_Detail = + new UcdPropertyDetail( + UcdProperty.Emoji_SB, + VersionInfo.getInstance(6, 0, 0), + -11, + false, + false, + false, + false); + public static UcdPropertyDetail Identifier_Status_Detail = + new UcdPropertyDetail( + UcdProperty.Identifier_Status, + VersionInfo.getInstance(9, 0, 0), + -12, + false, + false, + false, + false); + public static UcdPropertyDetail Identifier_Type_Detail = + new UcdPropertyDetail( + UcdProperty.Identifier_Type, + VersionInfo.getInstance(9, 0, 0), + -13, + false, + false, + false, + false); + public static UcdPropertyDetail Idn_2008_Detail = + new UcdPropertyDetail(UcdProperty.Idn_2008, -14, false, false, false, false); + public static UcdPropertyDetail Idn_Mapping_Detail = + new UcdPropertyDetail(UcdProperty.Idn_Mapping, -15, false, false, false, false); + public static UcdPropertyDetail Idn_Status_Detail = + new UcdPropertyDetail(UcdProperty.Idn_Status, -16, false, false, false, false); + public static UcdPropertyDetail Named_Sequences_Detail = + new UcdPropertyDetail(UcdProperty.Named_Sequences, -17, false, false, false, false); + public static UcdPropertyDetail Named_Sequences_Prov_Detail = + new UcdPropertyDetail( + UcdProperty.Named_Sequences_Prov, -18, false, false, false, false); + public static UcdPropertyDetail Other_Joining_Type_Detail = + new UcdPropertyDetail(UcdProperty.Other_Joining_Type, -19, false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Flag_Sequence_Detail = + new UcdPropertyDetail( + UcdProperty.RGI_Emoji_Flag_Sequence, -20, false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Keycap_Sequence_Detail = + new UcdPropertyDetail( + UcdProperty.RGI_Emoji_Keycap_Sequence, -21, false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Modifier_Sequence_Detail = + new UcdPropertyDetail( + UcdProperty.RGI_Emoji_Modifier_Sequence, -22, false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Tag_Sequence_Detail = + new UcdPropertyDetail( + UcdProperty.RGI_Emoji_Tag_Sequence, -23, false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Zwj_Sequence_Detail = + new UcdPropertyDetail( + UcdProperty.RGI_Emoji_Zwj_Sequence, -24, false, false, false, false); + public static UcdPropertyDetail Standardized_Variant_Detail = + new UcdPropertyDetail( + UcdProperty.Standardized_Variant, -25, false, false, false, false); private UcdProperty ucdProperty; private VersionInfo minVersion; @@ -835,8 +2250,14 @@ private UcdPropertyDetail( boolean isCJKShowIfEmpty, boolean isOrgUCDXMLAttribute) { this( - ucdProperty, minVersion, null, - sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); + ucdProperty, + minVersion, + null, + sortOrder, + isBaseAttribute, + isCJKAttribute, + isCJKShowIfEmpty, + isOrgUCDXMLAttribute); } private UcdPropertyDetail( @@ -847,8 +2268,14 @@ private UcdPropertyDetail( boolean isCJKShowIfEmpty, boolean isOrgUCDXMLAttribute) { this( - ucdProperty, null, null, - sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); + ucdProperty, + null, + null, + sortOrder, + isBaseAttribute, + isCJKAttribute, + isCJKShowIfEmpty, + isOrgUCDXMLAttribute); } private UcdPropertyDetail( @@ -923,4 +2350,4 @@ public boolean isCJKShowIfEmpty() { public boolean isOrgUCDXMLAttribute() { return this.isOrgUCDXMLAttribute; } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java index c9f938410..3b93a8520 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java @@ -3,24 +3,65 @@ import com.ibm.icu.util.VersionInfo; import org.unicode.props.UcdProperty; -import java.util.LinkedHashSet; -import java.util.Set; - public class UcdSectionDetail { public enum UcdSection { - BLOCKS("blocks", "block", VersionInfo.getInstance(1, 1, 0), null, Blocks_Detail, true, true), - CJKRADICALS("cjk-radicals", "cjk-radical", VersionInfo.getInstance(1, 1, 0), null, CJKRadicals_Detail, false, + BLOCKS( + "blocks", + "block", + VersionInfo.getInstance(1, 1, 0), + null, + Blocks_Detail, + true, + true), + CJKRADICALS( + "cjk-radicals", + "cjk-radical", + VersionInfo.getInstance(1, 1, 0), + null, + CJKRadicals_Detail, + false, + false), + DONOTEMIT( + "do-not-emit", + "instead", + VersionInfo.getInstance(16, 0, 0), + null, + DoNotEmit_Detail, + false, + false), + EMOJISOURCES( + "emoji-sources", + "emoji-source", + VersionInfo.getInstance(1, 1, 0), + null, + EmojiSources_Detail, + true, + false), + NAMEDSEQUENCES( + "named-sequences", + "named-sequence", + VersionInfo.getInstance(1, 1, 0), + null, + NamedSequences_Detail, + false, + false), + NORMALIZATIONCORRECTIONS( + "normalization-corrections", + "normalization-correction", + VersionInfo.getInstance(1, 1, 0), + null, + NormalizationCorrections_Detail, + true, false), - DONOTEMIT("do-not-emit", "instead", VersionInfo.getInstance(16, 0, 0), null, DoNotEmit_Detail, false, false), - EMOJISOURCES("emoji-sources", "emoji-source", VersionInfo.getInstance(1, 1, 0), null, EmojiSources_Detail, - true, false), - NAMEDSEQUENCES("named-sequences", "named-sequence", VersionInfo.getInstance(1, 1, 0), null, - NamedSequences_Detail, false, false), - NORMALIZATIONCORRECTIONS("normalization-corrections", "normalization-correction", VersionInfo.getInstance(1, - 1, 0), null, NormalizationCorrections_Detail, true, false), - STANDARDIZEDVARIANTS("standardized-variants", "standardized-variant", VersionInfo.getInstance(1, 1, 0), null, - StandardizedVariants_Detail, true, false); + STANDARDIZEDVARIANTS( + "standardized-variants", + "standardized-variant", + VersionInfo.getInstance(1, 1, 0), + null, + StandardizedVariants_Detail, + true, + false); private final String tag; private final String childTag; private final VersionInfo minVersion; @@ -75,82 +116,77 @@ public boolean getParserWithMissing() { } } - public static UcdSectionDetail Blocks_Detail = new UcdSectionDetail( - UcdSection.BLOCKS, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.Block) - }, - 0); - public static UcdSectionDetail NamedSequences_Detail = new UcdSectionDetail( - UcdSection.NAMEDSEQUENCES, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.Named_Sequences) - }, - 1); - public static UcdSectionDetail NormalizationCorrections_Detail = new UcdSectionDetail( - UcdSection.NORMALIZATIONCORRECTIONS, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.NC_Original) - }, - 2); - public static UcdSectionDetail StandardizedVariants_Detail = new UcdSectionDetail( - UcdSection.STANDARDIZEDVARIANTS, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.Standardized_Variant), - new UcdSectionComponent( - VersionInfo.getInstance(13, 1, 0), - null, - UcdProperty.emoji_variation_sequence) - }, - 3); - public static UcdSectionDetail CJKRadicals_Detail = new UcdSectionDetail( - UcdSection.CJKRADICALS, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.CJK_Radical) - }, - 4); - public static UcdSectionDetail EmojiSources_Detail = new UcdSectionDetail( - UcdSection.EMOJISOURCES, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.Emoji_DCM) - }, - 5); - public static UcdSectionDetail DoNotEmit_Detail = new UcdSectionDetail( - UcdSection.DONOTEMIT, - new UcdSectionComponent[]{ - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), - null, - UcdProperty.Do_Not_Emit_Type) - }, - 6); + public static UcdSectionDetail Blocks_Detail = + new UcdSectionDetail( + UcdSection.BLOCKS, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Block) + }, + 0); + public static UcdSectionDetail NamedSequences_Detail = + new UcdSectionDetail( + UcdSection.NAMEDSEQUENCES, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Named_Sequences) + }, + 1); + public static UcdSectionDetail NormalizationCorrections_Detail = + new UcdSectionDetail( + UcdSection.NORMALIZATIONCORRECTIONS, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.NC_Original) + }, + 2); + public static UcdSectionDetail StandardizedVariants_Detail = + new UcdSectionDetail( + UcdSection.STANDARDIZEDVARIANTS, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Standardized_Variant), + new UcdSectionComponent( + VersionInfo.getInstance(13, 1, 0), + null, + UcdProperty.emoji_variation_sequence) + }, + 3); + public static UcdSectionDetail CJKRadicals_Detail = + new UcdSectionDetail( + UcdSection.CJKRADICALS, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.CJK_Radical) + }, + 4); + public static UcdSectionDetail EmojiSources_Detail = + new UcdSectionDetail( + UcdSection.EMOJISOURCES, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Emoji_DCM) + }, + 5); + public static UcdSectionDetail DoNotEmit_Detail = + new UcdSectionDetail( + UcdSection.DONOTEMIT, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Do_Not_Emit_Type) + }, + 6); private final UcdSection ucdSection; private final UcdSectionComponent[] ucdSectionComponents; private final int sortOrder; private UcdSectionDetail( - UcdSection ucdSection, - UcdSectionComponent[] ucdSectionComponents, - int sortOrder) { + UcdSection ucdSection, UcdSectionComponent[] ucdSectionComponents, int sortOrder) { this.ucdSection = ucdSection; this.ucdSectionComponents = ucdSectionComponents; this.sortOrder = sortOrder; @@ -167,4 +203,4 @@ public UcdSectionComponent[] getUcdSectionComponents() { public int getSortOrder() { return this.sortOrder; } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java index e40cc0d6f..409c6b959 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java @@ -2,19 +2,17 @@ import com.ibm.icu.dev.tool.UOption; import com.ibm.icu.util.VersionInfo; -import org.unicode.props.IndexUnicodeProperties; -import org.unicode.props.UcdProperty; -import org.unicode.props.UcdPropertyValues; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.AttributesImpl; - -import javax.xml.transform.TransformerConfigurationException; import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; - +import javax.xml.transform.TransformerConfigurationException; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.props.UcdPropertyValues; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; public class UcdXML { @@ -32,12 +30,12 @@ private enum UCDXMLOUTPUTTYPE { } private enum Range { - RESERVED ("reserved"), - SURROGATE ("surrogate"), - NONCHARACTER ("noncharacter"), - CHARACTER ("char"), - CJKUNIFIEDIDEOGRAPH ("char"), - NONRANGE ("nonrange"); + RESERVED("reserved"), + SURROGATE("surrogate"), + NONCHARACTER("noncharacter"), + CHARACTER("char"), + CJKUNIFIEDIDEOGRAPH("char"), + NONRANGE("nonrange"); private final String tag; @@ -51,19 +49,13 @@ public String toString() { } private static final UOption[] options = { - UOption.HELP_H(), - UOption.create("ucdversion", 'v', UOption.REQUIRES_ARG), - UOption.create("range", 'r', UOption.REQUIRES_ARG), - UOption.create("output", 'o', UOption.REQUIRES_ARG), - UOption.create("outputfolder", 'f', UOption.REQUIRES_ARG) + UOption.HELP_H(), + UOption.create("ucdversion", 'v', UOption.REQUIRES_ARG), + UOption.create("range", 'r', UOption.REQUIRES_ARG), + UOption.create("output", 'o', UOption.REQUIRES_ARG), + UOption.create("outputfolder", 'f', UOption.REQUIRES_ARG) }; - private static final int - HELP = 0, - UCDVERSION = 1, - RANGE = 2, - OUTPUT = 3, - OUTPUTFOLDER = 4; - + private static final int HELP = 0, UCDVERSION = 1, RANGE = 2, OUTPUT = 3, OUTPUTFOLDER = 4; public static void main(String[] args) throws Exception { VersionInfo ucdVersion = null; @@ -74,8 +66,9 @@ public static void main(String[] args) throws Exception { UOption.parseArgs(args, options); if (options[HELP].doesOccur) { - System.out.println("UcdXML --ucdversion {version number} --outputfolder {destination} " + - "--range [ALL|NOUNIHAN|UNIHAN] --output [FLAT|GROUPED]"); + System.out.println( + "UcdXML --ucdversion {version number} --outputfolder {destination} " + + "--range [ALL|NOUNIHAN|UNIHAN] --output [FLAT|GROUPED]"); System.exit(0); } @@ -83,59 +76,66 @@ public static void main(String[] args) throws Exception { if (options[UCDVERSION].doesOccur) { try { ucdVersion = VersionInfo.getInstance(options[UCDVERSION].value); + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not convert " + + options[UCDVERSION].value + + " to a valid UCD version"); } - catch (Exception e) { - throw new IllegalArgumentException("Could not convert " + options[UCDVERSION].value + - " to a valid UCD version"); - } - } - else { - throw new IllegalArgumentException("Missing command line option: --ucdversion (or -v)"); + } else { + throw new IllegalArgumentException( + "Missing command line option: --ucdversion (or -v)"); } if (options[RANGE].doesOccur) { try { - ucdxmloutputrange = UCDXMLOUTPUTRANGE.valueOf(options[RANGE].value.toUpperCase(Locale.ROOT)); + ucdxmloutputrange = + UCDXMLOUTPUTRANGE.valueOf( + options[RANGE].value.toUpperCase(Locale.ROOT)); + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not convert " + + options[RANGE].value + + " to one of [ALL|NOUNIHAN|UNIHAN]"); } - catch (Exception e) { - throw new IllegalArgumentException("Could not convert " + options[RANGE].value + - " to one of [ALL|NOUNIHAN|UNIHAN]"); - } - } - else { + } else { throw new IllegalArgumentException("Missing command line option: --range (or -r)"); } if (options[OUTPUT].doesOccur) { try { - ucdxmloutputtype = UCDXMLOUTPUTTYPE.valueOf(options[OUTPUT].value.toUpperCase(Locale.ROOT)); - } - catch (Exception e) { - throw new IllegalArgumentException("Could not convert " + options[OUTPUT].value + - " to one of [FLAT|GROUPED]"); + ucdxmloutputtype = + UCDXMLOUTPUTTYPE.valueOf( + options[OUTPUT].value.toUpperCase(Locale.ROOT)); + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not convert " + + options[OUTPUT].value + + " to one of [FLAT|GROUPED]"); } - } - else { + } else { throw new IllegalArgumentException("Missing command line option: --output (or -o)"); } if (options[OUTPUTFOLDER].doesOccur) { try { - destinationFolder = new File(options[OUTPUTFOLDER].value + getVersionString(ucdVersion, 3) + - "\\xmltest\\"); + destinationFolder = + new File( + options[OUTPUTFOLDER].value + + getVersionString(ucdVersion, 3) + + "\\xmltest\\"); if (!destinationFolder.exists()) { - if(!destinationFolder.mkdir()) { + if (!destinationFolder.mkdir()) { throw new IOException(); } } + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not find or create " + options[OUTPUTFOLDER].value); } - catch (Exception e) { - throw new IllegalArgumentException("Could not find or create " + options[OUTPUTFOLDER].value); - } - } - else { - throw new IllegalArgumentException("Missing command line option: --outputfolder (or -f)"); + } else { + throw new IllegalArgumentException( + "Missing command line option: --outputfolder (or -f)"); } - } - catch (Exception e) { + } catch (Exception e) { System.err.println(e.getMessage()); System.exit(1); } @@ -144,28 +144,32 @@ public static void main(String[] args) throws Exception { buildUcdXMLFile(ucdVersion, destinationFolder, ucdxmloutputrange, ucdxmloutputtype); System.out.println("end"); System.exit(0); - } - else { + } else { System.err.println("Unexpected error when building UcdXML file."); System.exit(1); } - - } - private static void buildUcdXMLFile(VersionInfo ucdVersion, File destinationFolder, UCDXMLOUTPUTRANGE outputRange - , UCDXMLOUTPUTTYPE outputType) throws IOException, TransformerConfigurationException, SAXException { + private static void buildUcdXMLFile( + VersionInfo ucdVersion, + File destinationFolder, + UCDXMLOUTPUTRANGE outputRange, + UCDXMLOUTPUTTYPE outputType) + throws IOException, TransformerConfigurationException, SAXException { int lowCodepoint = 0x0; int highCodepoint = 0x10FFFF; // Tangut - //int lowCodepoint = 0x17000; - //int highCodepoint = 0x1B2FB; - //0x10FFFF + // int lowCodepoint = 0x17000; + // int highCodepoint = 0x1B2FB; + // 0x10FFFF File tempFile = new File(destinationFolder, "temp.xml"); String outputFilename = - "ucd." + outputRange.toString().toLowerCase(Locale.ROOT) + "." + - outputType.toString().toLowerCase(Locale.ROOT) + ".xml"; + "ucd." + + outputRange.toString().toLowerCase(Locale.ROOT) + + "." + + outputType.toString().toLowerCase(Locale.ROOT) + + ".xml"; File destinationFile = new File(destinationFolder, outputFilename); FileOutputStream fileOutputStream = new FileOutputStream(tempFile); @@ -183,7 +187,13 @@ private static void buildUcdXMLFile(VersionInfo ucdVersion, File destinationFold writer.addContent("Unicode " + getVersionString(ucdVersion, 3)); writer.endElement("description"); } - buildRepertoire(writer, attributeResolver, ucdVersion, lowCodepoint, highCodepoint, outputRange, + buildRepertoire( + writer, + attributeResolver, + ucdVersion, + lowCodepoint, + highCodepoint, + outputRange, outputType); if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.BLOCKS); @@ -199,21 +209,23 @@ private static void buildUcdXMLFile(VersionInfo ucdVersion, File destinationFold writer.endFile(); fileOutputStream.close(); cleanUcdXMLFile(tempFile, destinationFile); - if(!tempFile.delete()) { + if (!tempFile.delete()) { throw new IOException("Could not delete temporary file " + tempFile); } } private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws IOException { - //XALAN writes out characters outside the BMP as entities. - //Use this code to replace the entities with the correct characters. - //See: https://issues.apache.org/jira/browse/XALANJ-2595 + // XALAN writes out characters outside the BMP as entities. + // Use this code to replace the entities with the correct characters. + // See: https://issues.apache.org/jira/browse/XALANJ-2595 FileInputStream fileInputStream = new FileInputStream(tempFile); FileOutputStream fileOutputStream = new FileOutputStream(destinationFile); - InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, StandardCharsets.UTF_8); - OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); + InputStreamReader inputStreamReader = + new InputStreamReader(fileInputStream, StandardCharsets.UTF_8); + OutputStreamWriter outputStreamWriter = + new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter); @@ -221,7 +233,11 @@ private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws String line; while ((line = bufferedReader.readLine()) != null) { Matcher matcher = Pattern.compile("&#(\\d+);").matcher(line); - line = matcher.replaceAll(matchResult -> new String(Character.toChars(Integer.parseInt(matcher.group(1))))); + line = + matcher.replaceAll( + matchResult -> + new String( + Character.toChars(Integer.parseInt(matcher.group(1))))); bufferedWriter.append(line); bufferedWriter.newLine(); } @@ -230,20 +246,41 @@ private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws fileOutputStream.close(); } - private static void buildRepertoire(UCDXMLWriter writer, AttributeResolver attributeResolver, - VersionInfo ucdVersion, int lowCodepoint, int highCodepoint, - UCDXMLOUTPUTRANGE outputRange, UCDXMLOUTPUTTYPE outputType) throws SAXException { + private static void buildRepertoire( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + int lowCodepoint, + int highCodepoint, + UCDXMLOUTPUTRANGE outputRange, + UCDXMLOUTPUTTYPE outputType) + throws SAXException { writer.startElement("repertoire"); { for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { if (isWritableCodepoint(codepoint, outputRange, attributeResolver)) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { - codepoint = buildGroup(writer, attributeResolver, ucdVersion, codepoint, highCodepoint, - outputRange, outputType); + codepoint = + buildGroup( + writer, + attributeResolver, + ucdVersion, + codepoint, + highCodepoint, + outputRange, + outputType); } else { - codepoint = buildChars(writer, attributeResolver, ucdVersion, codepoint, highCodepoint, - outputRange, outputType, null); + codepoint = + buildChars( + writer, + attributeResolver, + ucdVersion, + codepoint, + highCodepoint, + outputRange, + outputType, + null); } } } @@ -251,42 +288,76 @@ private static void buildRepertoire(UCDXMLWriter writer, AttributeResolver attri } } - private static int buildGroup(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, - int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange, - UCDXMLOUTPUTTYPE outputType) throws SAXException { + private static int buildGroup( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + int lowCodepoint, + int highCodepoint, + UCDXMLOUTPUTRANGE outputRange, + UCDXMLOUTPUTTYPE outputType) + throws SAXException { - int lastCodepointInGroup = getLastCodepointInGroup(attributeResolver, lowCodepoint, highCodepoint); + int lastCodepointInGroup = + getLastCodepointInGroup(attributeResolver, lowCodepoint, highCodepoint); - AttributesImpl groupAttrs = getGroupAttributes(ucdVersion, attributeResolver, lowCodepoint, - lastCodepointInGroup, outputRange); + AttributesImpl groupAttrs = + getGroupAttributes( + ucdVersion, + attributeResolver, + lowCodepoint, + lastCodepointInGroup, + outputRange); writer.startElement("group", groupAttrs); { - buildChars(writer, attributeResolver, ucdVersion, lowCodepoint, lastCodepointInGroup, outputRange, - outputType, groupAttrs); + buildChars( + writer, + attributeResolver, + ucdVersion, + lowCodepoint, + lastCodepointInGroup, + outputRange, + outputType, + groupAttrs); writer.endElement("group"); } return lastCodepointInGroup; } - private static int buildChars(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, - int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange, - UCDXMLOUTPUTTYPE outputType, AttributesImpl groupAttrs) throws SAXException { + private static int buildChars( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + int lowCodepoint, + int highCodepoint, + UCDXMLOUTPUTRANGE outputRange, + UCDXMLOUTPUTTYPE outputType, + AttributesImpl groupAttrs) + throws SAXException { ArrayList range = new ArrayList<>(); Range rangeType = Range.NONRANGE; for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { - if (attributeResolver.isUnassignedCodepoint(codepoint) || - (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && attributeResolver.isUnifiedIdeograph(codepoint))) { + if (attributeResolver.isUnassignedCodepoint(codepoint) + || (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN + && attributeResolver.isUnifiedIdeograph(codepoint))) { Range currentRangeType = getRangeType(attributeResolver, codepoint); if (!range.isEmpty()) { - if (!currentRangeType.equals(rangeType) || attributeResolver.isDifferentRange(codepoint, - codepoint - 1)) { + if (!currentRangeType.equals(rangeType) + || attributeResolver.isDifferentRange(codepoint, codepoint - 1)) { if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { - buildGroupedRange(writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); + buildGroupedRange( + writer, + attributeResolver, + ucdVersion, + range, + rangeType, + groupAttrs); } else { - buildUngroupedRange(writer, attributeResolver, ucdVersion, range, rangeType); + buildUngroupedRange( + writer, attributeResolver, ucdVersion, range, rangeType); } } range.clear(); @@ -298,9 +369,16 @@ private static int buildChars(UCDXMLWriter writer, AttributeResolver attributeRe if (!range.isEmpty()) { if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { - buildGroupedRange(writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); + buildGroupedRange( + writer, + attributeResolver, + ucdVersion, + range, + rangeType, + groupAttrs); } else { - buildUngroupedRange(writer, attributeResolver, ucdVersion, range, rangeType); + buildUngroupedRange( + writer, attributeResolver, ucdVersion, range, rangeType); } } range.clear(); @@ -308,18 +386,26 @@ private static int buildChars(UCDXMLWriter writer, AttributeResolver attributeRe } if (isWritableCodepoint(codepoint, outputRange, attributeResolver)) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { - buildGroupedChar(writer, attributeResolver, ucdVersion, codepoint, outputRange, groupAttrs); + buildGroupedChar( + writer, + attributeResolver, + ucdVersion, + codepoint, + outputRange, + groupAttrs); } else { - buildUngroupedChar(writer, attributeResolver, ucdVersion, codepoint, outputRange); + buildUngroupedChar( + writer, attributeResolver, ucdVersion, codepoint, outputRange); } } } } - //Handle any range before the end of the repertoire element. + // Handle any range before the end of the repertoire element. if (!range.isEmpty()) { if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { - buildGroupedRange(writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); + buildGroupedRange( + writer, attributeResolver, ucdVersion, range, rangeType, groupAttrs); } else { buildUngroupedRange(writer, attributeResolver, ucdVersion, range, rangeType); } @@ -328,33 +414,49 @@ private static int buildChars(UCDXMLWriter writer, AttributeResolver attributeRe return highCodepoint; } - private static void buildUngroupedChar(UCDXMLWriter writer, AttributeResolver attributeResolver, - VersionInfo ucdVersion, int codepoint, UCDXMLOUTPUTRANGE outputRange) + private static void buildUngroupedChar( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + int codepoint, + UCDXMLOUTPUTRANGE outputRange) throws SAXException { - AttributesImpl charAttributes = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); + AttributesImpl charAttributes = + getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); buildChar(writer, attributeResolver, codepoint, charAttributes); } - private static void buildGroupedChar(UCDXMLWriter writer, AttributeResolver attributeResolver, - VersionInfo ucdVersion, int codepoint, UCDXMLOUTPUTRANGE outputRange, - AttributesImpl groupAttrs) throws SAXException { + private static void buildGroupedChar( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + int codepoint, + UCDXMLOUTPUTRANGE outputRange, + AttributesImpl groupAttrs) + throws SAXException { - AttributesImpl orgCharAttributes = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); + AttributesImpl orgCharAttributes = + getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); AttributesImpl charAttributes = new AttributesImpl(); for (int index = 0; index < orgCharAttributes.getLength(); index++) { String attributeQName = orgCharAttributes.getQName(index); String orgCharAttributesValue = orgCharAttributes.getValue(index); String groupAttributeValue = groupAttrs.getValue(attributeQName); if (!orgCharAttributesValue.equals(groupAttributeValue)) { - charAttributes.addAttribute(NAMESPACE, attributeQName, attributeQName, "CDATA", orgCharAttributesValue); + charAttributes.addAttribute( + NAMESPACE, attributeQName, attributeQName, "CDATA", orgCharAttributesValue); } } buildChar(writer, attributeResolver, codepoint, charAttributes); } - private static void buildChar(UCDXMLWriter writer, AttributeResolver attributeResolver, int codepoint, - AttributesImpl charAttributes) throws SAXException { + private static void buildChar( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + int codepoint, + AttributesImpl charAttributes) + throws SAXException { writer.startElement("char", charAttributes); { HashMap nameAliases = attributeResolver.getNameAliases(codepoint); @@ -362,7 +464,8 @@ private static void buildChar(UCDXMLWriter writer, AttributeResolver attributeRe for (String alias : nameAliases.keySet()) { AttributesImpl nameAliasAt = new AttributesImpl(); nameAliasAt.addAttribute(NAMESPACE, "alias", "alias", "CDATA", alias); - nameAliasAt.addAttribute(NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); + nameAliasAt.addAttribute( + NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); writer.startElement("name-alias", nameAliasAt); { writer.endElement("name-alias"); @@ -373,18 +476,24 @@ private static void buildChar(UCDXMLWriter writer, AttributeResolver attributeRe } } - private static void buildGroupedRange(UCDXMLWriter writer, AttributeResolver attributeResolver, - VersionInfo ucdVersion, ArrayList range, Range rangeType, - AttributesImpl groupAttrs) throws SAXException { - AttributesImpl orgRangeAttributes = getReservedAttributes(ucdVersion, attributeResolver, range); + private static void buildGroupedRange( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + ArrayList range, + Range rangeType, + AttributesImpl groupAttrs) + throws SAXException { + AttributesImpl orgRangeAttributes = + getReservedAttributes(ucdVersion, attributeResolver, range); AttributesImpl rangeAttributes = new AttributesImpl(); for (int index = 0; index < orgRangeAttributes.getLength(); index++) { String attributeQName = orgRangeAttributes.getQName(index); String orgCharAttributesValue = orgRangeAttributes.getValue(index); String groupAttributeValue = groupAttrs.getValue(attributeQName); if (!orgCharAttributesValue.equals(groupAttributeValue)) { - rangeAttributes.addAttribute(NAMESPACE, attributeQName, attributeQName, "CDATA", - orgCharAttributesValue); + rangeAttributes.addAttribute( + NAMESPACE, attributeQName, attributeQName, "CDATA", orgCharAttributesValue); } } writer.startElement(rangeType.tag, rangeAttributes); @@ -393,21 +502,28 @@ private static void buildGroupedRange(UCDXMLWriter writer, AttributeResolver att } } - private static void buildUngroupedRange(UCDXMLWriter writer, AttributeResolver attributeResolver, - VersionInfo ucdVersion, ArrayList range, Range rangeType) + private static void buildUngroupedRange( + UCDXMLWriter writer, + AttributeResolver attributeResolver, + VersionInfo ucdVersion, + ArrayList range, + Range rangeType) throws SAXException { - AttributesImpl rangeAttributes = getReservedAttributes(ucdVersion, attributeResolver, range); + AttributesImpl rangeAttributes = + getReservedAttributes(ucdVersion, attributeResolver, range); writer.startElement(rangeType.tag, rangeAttributes); { writer.endElement(rangeType.tag); } } - private static boolean isWritableCodepoint(int codepoint, UCDXMLOUTPUTRANGE outputRange, - AttributeResolver attributeResolver) { - return outputRange == UCDXMLOUTPUTRANGE.ALL || - (outputRange == UCDXMLOUTPUTRANGE.UNIHAN && attributeResolver.isUnihanAttributeRange(codepoint)) || - (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && !attributeResolver.isUnifiedIdeograph(codepoint)); + private static boolean isWritableCodepoint( + int codepoint, UCDXMLOUTPUTRANGE outputRange, AttributeResolver attributeResolver) { + return outputRange == UCDXMLOUTPUTRANGE.ALL + || (outputRange == UCDXMLOUTPUTRANGE.UNIHAN + && attributeResolver.isUnihanAttributeRange(codepoint)) + || (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN + && !attributeResolver.isUnifiedIdeograph(codepoint)); } private static Range getRangeType(AttributeResolver attributeResolver, int codepoint) { @@ -429,17 +545,17 @@ private static Range getRangeType(AttributeResolver attributeResolver, int codep return Range.RESERVED; } - private static int getLastCodepointInGroup(AttributeResolver attributeResolver, int lowCodepoint, - int highCodepoint) { + private static int getLastCodepointInGroup( + AttributeResolver attributeResolver, int lowCodepoint, int highCodepoint) { String blk = attributeResolver.getAttributeValue(UcdProperty.Block, lowCodepoint); for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { if (!blk.equals(attributeResolver.getAttributeValue(UcdProperty.Block, codepoint))) { return codepoint - 1; } - if (codepoint == 0x20 - 1 // put the C0 controls in their own group - || codepoint == 0xa0 - 1 // put the C0 controls in their own group - || codepoint == 0x1160 - 1 // split the jamos into three groups - || codepoint == 0x11a8 - 1 // split the jamos into three groups + if (codepoint == 0x20 - 1 // put the C0 controls in their own group + || codepoint == 0xa0 - 1 // put the C0 controls in their own group + || codepoint == 0x1160 - 1 // split the jamos into three groups + || codepoint == 0x11a8 - 1 // split the jamos into three groups || codepoint == 0x1f1e6 - 1 // put the regional indicators in their own group ) { return codepoint; @@ -448,18 +564,28 @@ private static int getLastCodepointInGroup(AttributeResolver attributeResolver, return highCodepoint; } - private static AttributesImpl getAttributes(VersionInfo version, AttributeResolver attributeResolver, - int codepoint, UCDXMLOUTPUTRANGE outputRange) { + private static AttributesImpl getAttributes( + VersionInfo version, + AttributeResolver attributeResolver, + int codepoint, + UCDXMLOUTPUTRANGE outputRange) { AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute(NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); + attributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { UcdProperty prop = propDetail.getUcdProperty(); - if (version.compareTo(propDetail.getMinVersion()) >= 0 && - (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) { + if (version.compareTo(propDetail.getMinVersion()) >= 0 + && (propDetail.getMaxVersion() == null + || version.compareTo(propDetail.getMaxVersion()) < 0)) { String attrValue = attributeResolver.getAttributeValue(prop, codepoint); - boolean isAttributeIncluded = getIsAttributeIncluded(attrValue, - attributeResolver.isUnihanAttributeRange(codepoint), propDetail, prop, outputRange); + boolean isAttributeIncluded = + getIsAttributeIncluded( + attrValue, + attributeResolver.isUnihanAttributeRange(codepoint), + propDetail, + prop, + outputRange); if (isAttributeIncluded) { String propName = prop.getShortName(); if (propName.startsWith("cjk")) { @@ -472,22 +598,27 @@ private static AttributesImpl getAttributes(VersionInfo version, AttributeResolv return attributes; } - private static AttributesImpl getGroupAttributes(VersionInfo version, AttributeResolver attributeResolver, - int lowCodepoint, int highCodepoint, - UCDXMLOUTPUTRANGE outputRange) { + private static AttributesImpl getGroupAttributes( + VersionInfo version, + AttributeResolver attributeResolver, + int lowCodepoint, + int highCodepoint, + UCDXMLOUTPUTRANGE outputRange) { AttributesImpl attributes = new AttributesImpl(); for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { UcdProperty prop = propDetail.getUcdProperty(); - if (version.compareTo(propDetail.getMinVersion()) >= 0 && - (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) { + if (version.compareTo(propDetail.getMinVersion()) >= 0 + && (propDetail.getMaxVersion() == null + || version.compareTo(propDetail.getMaxVersion()) < 0)) { int totalCount = 0; Map counters = new LinkedHashMap<>(); for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { if (!attributeResolver.isUnassignedCodepoint(codepoint)) { String attrValue = attributeResolver.getAttributeValue(prop, codepoint); - int currentCount = (counters.get(attrValue) == null) ? 0 : counters.get(attrValue); + int currentCount = + (counters.get(attrValue) == null) ? 0 : counters.get(attrValue); currentCount++; totalCount++; counters.put(attrValue, currentCount); @@ -517,14 +648,20 @@ private static AttributesImpl getGroupAttributes(VersionInfo version, AttributeR } } if (max > 0.2 * totalCount && max > 1) { - boolean isAttributeIncluded = getIsAttributeIncluded(bestAttrValue, - attributeResolver.isUnihanAttributeRange(lowCodepoint), propDetail, prop, outputRange); + boolean isAttributeIncluded = + getIsAttributeIncluded( + bestAttrValue, + attributeResolver.isUnihanAttributeRange(lowCodepoint), + propDetail, + prop, + outputRange); if (isAttributeIncluded) { String propName = prop.getShortName(); if (propName.startsWith("cjk")) { propName = propName.substring(2); } - attributes.addAttribute(NAMESPACE, propName, propName, "CDATA", bestAttrValue); + attributes.addAttribute( + NAMESPACE, propName, propName, "CDATA", bestAttrValue); } } } @@ -532,9 +669,12 @@ private static AttributesImpl getGroupAttributes(VersionInfo version, AttributeR return attributes; } - private static boolean getIsAttributeIncluded(String attrValue, boolean isUnihanAttributeRange, - UcdPropertyDetail propDetail, UcdProperty prop, - UCDXMLOUTPUTRANGE outputRange) { + private static boolean getIsAttributeIncluded( + String attrValue, + boolean isUnihanAttributeRange, + UcdPropertyDetail propDetail, + UcdProperty prop, + UCDXMLOUTPUTRANGE outputRange) { if (attrValue == null) { return false; } @@ -546,7 +686,8 @@ private static boolean getIsAttributeIncluded(String attrValue, boolean isUnihan if (prop.equals(UcdProperty.Numeric_Value) && !attrValue.equals("NaN")) { return true; } - return propDetail.isCJKAttribute() && (propDetail.isCJKShowIfEmpty() || !attrValue.isEmpty()); + return propDetail.isCJKAttribute() + && (propDetail.isCJKShowIfEmpty() || !attrValue.isEmpty()); } if (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && propDetail.isCJKAttribute()) { return false; @@ -561,26 +702,38 @@ private static boolean getIsAttributeIncluded(String attrValue, boolean isUnihan return !attrValue.isEmpty(); } - - private static AttributesImpl getReservedAttributes(VersionInfo version, AttributeResolver attributeResolver, - ArrayList range) { + private static AttributesImpl getReservedAttributes( + VersionInfo version, AttributeResolver attributeResolver, ArrayList range) { AttributesImpl attributes = new AttributesImpl(); if (range.size() == 1) { - attributes.addAttribute(NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(range.get(0))); + attributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(range.get(0))); } else { - attributes.addAttribute(NAMESPACE, "first-cp", "first-cp", "CDATA", + attributes.addAttribute( + NAMESPACE, + "first-cp", + "first-cp", + "CDATA", attributeResolver.getHexString(range.get(0))); - attributes.addAttribute(NAMESPACE, "last-cp", "last-cp", "CDATA", + attributes.addAttribute( + NAMESPACE, + "last-cp", + "last-cp", + "CDATA", attributeResolver.getHexString(range.get(range.size() - 1))); } for (UcdPropertyDetail propDetail : UcdPropertyDetail.baseValues()) { UcdProperty prop = propDetail.getUcdProperty(); - if (version.compareTo(propDetail.getMinVersion()) >= 0 && - (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) <= 0)) { - String attrValue = attributeResolver.getAttributeValue(propDetail.getUcdProperty(), range.get(0)); - - attributes.addAttribute(NAMESPACE, prop.getShortName(), prop.getShortName(), "CDATA", attrValue); + if (version.compareTo(propDetail.getMinVersion()) >= 0 + && (propDetail.getMaxVersion() == null + || version.compareTo(propDetail.getMaxVersion()) <= 0)) { + String attrValue = + attributeResolver.getAttributeValue( + propDetail.getUcdProperty(), range.get(0)); + + attributes.addAttribute( + NAMESPACE, prop.getShortName(), prop.getShortName(), "CDATA", attrValue); } } return attributes; @@ -588,7 +741,13 @@ private static AttributesImpl getReservedAttributes(VersionInfo version, Attribu private static String getVersionString(VersionInfo version, int maxDigits) { if (maxDigits >= 1 && maxDigits <= 4) { - int[] digits = new int[]{version.getMajor(), version.getMinor(), version.getMilli(), version.getMicro()}; + int[] digits = + new int[] { + version.getMajor(), + version.getMinor(), + version.getMilli(), + version.getMicro() + }; StringBuilder verStr = new StringBuilder(7); verStr.append(digits[0]); for (int i = 1; i < maxDigits; ++i) { @@ -600,4 +759,4 @@ private static String getVersionString(VersionInfo version, int maxDigits) { throw new IllegalArgumentException("Invalid maxDigits range"); } } -} \ No newline at end of file +} From 7e161a6adcdd48e2c70b32842e9bacae6eb63c61 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Fri, 7 Jun 2024 08:45:07 -0700 Subject: [PATCH 05/14] Ran GenerateEnums --- .../src/main/java/org/unicode/props/UcdPropertyValues.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 37020e727..5b748c30a 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -756,6 +756,7 @@ public static East_Asian_Width_Values forName(String name) { // Emoji_DCM // Emoji_KDDI // Emoji_SB + // emoji_variation_sequence // Equivalent_Unified_Ideograph // FC_NFKC_Closure public enum General_Category_Values implements Named { @@ -1537,6 +1538,7 @@ public static Joining_Type_Values forName(String name) { // kVietnameseNumeric // kXerox // kXHC1983 + // kZhuang // kZhuangNumeric // kZVariant public enum Line_Break_Values implements Named { @@ -1619,6 +1621,9 @@ public static Line_Break_Values forName(String name) { // Name_Alias // Named_Sequences // Named_Sequences_Prov + // NC_Corrected + // NC_Original + // NC_Version public enum NFC_Quick_Check_Values implements Named { Maybe("M"), No("N"), From d609d928818fdacdc23a9b09896aee8716c6d104 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Wed, 26 Jun 2024 14:22:10 -0700 Subject: [PATCH 06/14] Fixing a broken rebase --- .../org/unicode/props/ExtraPropertyAliases.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt index b2b4fd449..c6a68f7de 100644 --- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt +++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt @@ -174,17 +174,7 @@ cjkVietnameseNumeric ; kVietnameseNumeric cjkZhuangNumeric ; kZhuangNumeric # 16.0 cjkFanqie ; kFanqie -<<<<<<< HEAD - -kTGT_MergedSrc ; kTGT_MergedSrc -kRSTUnicode ; kRSTUnicode - -kSrc_NushuDuben ; kSrc_NushuDuben -kReading ; kReading - kEH_Func ; kEH_Func kEH_FVal ; kEH_FVal kEH_UniK ; kEH_UniK -======= cjkZhuang ; kZhuang ->>>>>>> 2f297052 (Initial checkin for UcdXML) From cb314e855a9fe08cced23e10740c4b4980e8a8a5 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Wed, 26 Jun 2024 14:26:43 -0700 Subject: [PATCH 07/14] Fixing a broken rebase --- .../org/unicode/props/IndexUnicodeProperties.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt index c171cdacb..26b4cc049 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt @@ -382,15 +382,6 @@ emoji/*/emoji-zwj-sequences; RGI_Emoji_Zwj_Sequence #emoji/*/emoji-test ; Emoji_Short_Name - -FileType ; TangutSources ; PropertyValue -TangutSources ; kTGT_MergedSrc -TangutSources ; kRSTUnicode - -FileType ; NushuSources ; PropertyValue -NushuSources ; kSrc_NushuDuben -NushuSources ; kReading - FileType ; Unikemet ; PropertyValue Unikemet ; kEH_Cat Unikemet ; kEH_Core From 776e00e0470c640d828765f17f8a92192a07be73 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Fri, 23 Aug 2024 08:55:07 -0700 Subject: [PATCH 08/14] Added support for comparing different ucdxml files --- .../unicode/props/PropertyParsingInfo.java | 21 +- .../org/unicode/xml/AttributeResolver.java | 47 +- .../java/org/unicode/xml/CompareUcdXML.java | 180 +++++++ .../java/org/unicode/xml/UCDDataResolver.java | 18 + .../org/unicode/xml/UcdPropertyDetail.java | 344 ++++++------- .../org/unicode/xml/UcdSectionDetail.java | 23 +- .../src/main/java/org/unicode/xml/UcdXML.java | 109 ++-- .../java/org/unicode/xml/XMLProperties.java | 471 ++++++++++++++++++ 8 files changed, 987 insertions(+), 226 deletions(-) create mode 100644 unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/XMLProperties.java diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java index 6c794380e..522a6eceb 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java @@ -760,10 +760,23 @@ private static void parsePropertyValueFile( assert propInfo.property.getType() == PropertyType.Binary; value = "Yes"; } else { - value = - propInfo.property.getType() == PropertyType.Binary - ? "Yes" - : line.getParts()[2]; + if (propInfo.property.getType() == PropertyType.Binary) { + //Handle @missing values for binary attributes (see 13.0.0 emoji-data.txt) + if (line.getParts().length == 3) { + if (line.getParts()[2].equals("No")) { + value = "No"; + } + else { + value = "Yes"; + } + } + else { + value = "Yes"; + } + } + else { + value = line.getParts()[2]; + } // The value should not be an empty string. // Exception: NFKC_Casefold does remove some characters by mapping them to nothing. assert !value.isEmpty() diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index 87d88dbde..a9db3392e 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -2,6 +2,8 @@ import com.ibm.icu.dev.util.UnicodeMap; import java.util.*; + +import com.ibm.icu.util.VersionInfo; import org.unicode.cldr.draft.FileUtilities; import org.unicode.props.*; @@ -53,14 +55,14 @@ public class AttributeResolver { // If there is a change in any of these properties between two adjacent characters, it will // result in a new range. - private final UcdProperty[] rangeDefiningProperties = { - UcdProperty.Age, - UcdProperty.Bidi_Class, - UcdProperty.Block, - UcdProperty.Decomposition_Mapping, - UcdProperty.Numeric_Type, - UcdProperty.Numeric_Value, - UcdProperty.Vertical_Orientation + private final UcdPropertyDetail[] rangeDefiningPropertyDetails = { + UcdPropertyDetail.Age_Detail, + UcdPropertyDetail.Bidi_Class_Detail, + UcdPropertyDetail.Block_Detail, + UcdPropertyDetail.Decomposition_Mapping_Detail, + UcdPropertyDetail.Numeric_Type_Detail, + UcdPropertyDetail.Numeric_Value_Detail, + UcdPropertyDetail.Vertical_Orientation_Detail }; public AttributeResolver(IndexUnicodeProperties iup) { @@ -120,7 +122,8 @@ private enum AliasType { ALTERNATE("alternate"), CONTROL("control"), CORRECTION("correction"), - FIGMENT("figment"); + FIGMENT("figment"), + NONE("none"); private final String aliasType; @@ -171,8 +174,13 @@ private HashMap> loadNameAliases() { for (UcdLineParser.UcdLine line : parser) { String[] parts = line.getParts(); int codepoint = Integer.parseInt(parts[0], 16); - NameAlias nameAlias = - new NameAlias(parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); + NameAlias nameAlias; + if(parts.length < 3) { + nameAlias = new NameAlias(parts[1], AliasType.NONE); + } + else { + nameAlias = new NameAlias(parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); + } if (nameAliasesByCodepoint.containsKey(codepoint)) { LinkedList nameAliases = @@ -412,13 +420,18 @@ private String getMappingValue( return sb.toString().trim(); } - public boolean isDifferentRange(int codepointA, int codepointB) { + public boolean isDifferentRange(VersionInfo ucdVersion, int codepointA, int codepointB) { boolean isDifference = false; - for (UcdProperty property : rangeDefiningProperties) { - isDifference = - isDifference - || !getAttributeValue(property, codepointA) - .equals(getAttributeValue(property, codepointB)); + for (UcdPropertyDetail propDetail : rangeDefiningPropertyDetails) { + UcdProperty prop = propDetail.getUcdProperty(); + if (ucdVersion.compareTo(propDetail.getMinVersion()) >= 0 + && (propDetail.getMaxVersion() == null + || ucdVersion.compareTo(propDetail.getMaxVersion()) < 0)) { + isDifference = + isDifference + || !getAttributeValue(prop, codepointA) + .equals(getAttributeValue(prop, codepointB)); + } } return isDifference; } diff --git a/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java b/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java new file mode 100644 index 000000000..122b30a71 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java @@ -0,0 +1,180 @@ +package org.unicode.xml; + +import com.ibm.icu.dev.tool.UOption; +import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.text.UnicodeSet; +import org.unicode.props.UcdProperty; + +import java.io.*; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Objects; + +public class CompareUcdXML { + + private static final String NEWLINE = System.getProperty("line.separator"); + private static final UOption[] options = { + UOption.HELP_H(), + UOption.create("fileA", 'a', UOption.REQUIRES_ARG), + UOption.create("fileB", 'b', UOption.REQUIRES_ARG) + }; + + private static final UcdProperty[] codepointSequenceProperties = new UcdProperty[]{ + UcdProperty.Named_Sequences, + UcdProperty.Named_Sequences_Prov, + UcdProperty.Standardized_Variant, + UcdProperty.Emoji_DCM, + UcdProperty.Emoji_KDDI, + UcdProperty.Emoji_SB, + UcdProperty.Do_Not_Emit_Preferred + }; + + private static final HashMap knownDifferences; + + static { + knownDifferences = new HashMap<>(); + + //https://github.com/unicode-org/properties/issues/296 + knownDifferences.put(0x31E4, new String [] {"Hani", "Zyyy"}); + knownDifferences.put(0x31E5, new String [] {"Hani", "Zyyy"}); + + //https://github.com/unicode-org/unicodetools/issues/325 + knownDifferences.put(0x109F7, new String [] {"1/6", "2/12"}); + knownDifferences.put(0x109F8, new String [] {"1/4", "3/12"}); + knownDifferences.put(0x109F9, new String [] {"1/3", "4/12"}); + knownDifferences.put(0x109FB, new String [] {"1/2", "6/12"}); + knownDifferences.put(0x109FD, new String [] {"2/3", "8/12"}); + knownDifferences.put(0x109FE, new String [] {"3/4", "9/12"}); + knownDifferences.put(0x109FF, new String [] {"5/6", "10/12"}); + + //https://github.com/unicode-org/properties/issues/172 + knownDifferences.put(0x5146, new String [] {"1000000", "1000000 1000000000000"}); + knownDifferences.put(0x79ED, new String [] {"1000000000", "1000000000 1000000000000"}); + } + + private static final int HELP = 0, FILE_A = 1, FILE_B = 2, LOGFILE = 3; + + public static void main(String[] args) throws Exception { + File fileA = null; + File fileB = null; + int errorCount = 0; + + UOption.parseArgs(args, options); + + if (options[HELP].doesOccur) { + System.out.println( + "CompareUcdXML --fileA {file path} --fileB {file path}"); + System.exit(0); + } + + if (options[FILE_A].doesOccur) { + try { + fileA = + new File(options[FILE_A].value); + if (!fileA.exists()) { + throw new IOException(); + } + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not find " + options[FILE_A].value); + } + } else { + throw new IllegalArgumentException( + "Missing command line option: --fileA (or -a)"); + } + + if (options[FILE_B].doesOccur) { + try { + fileB = + new File(options[FILE_B].value); + if (!fileB.exists()) { + throw new IOException(); + } + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not find " + options[FILE_B].value); + } + } else { + throw new IllegalArgumentException( + "Missing command line option: --fileB (or -b)"); + } + + System.out.println("Comparing " + fileA + " and " + fileB); + + final XMLProperties xmlPropsA = new XMLProperties(fileA); + final XMLProperties xmlPropsB = new XMLProperties(fileB); + + //First, iterate through the UcdProperties on each codepoint. + for (final UcdProperty prop : UcdProperty.values()) { + UnicodeMap fileAMap = xmlPropsA.getMap(prop); + UnicodeMap fileBMap = xmlPropsB.getMap(prop); + if (!fileAMap.equals(fileBMap)) { + for (int i = 0; i <= 0x10ffff; ++i) { + try { + String xmlValA = fileAMap.get(i); + String xmlValB = fileBMap.get(i); + if(!Objects.equals(xmlValA, xmlValB)) { + //At least one string is != null and the strings are different, but we don't care if one + // is null and one is empty_string + //As far as we care, empty_string == null == "00000" + int lenA = (xmlValA == null ? 0 : (xmlValA.equals("00000") ? 0 : xmlValA.length())); + int lenB = (xmlValB == null ? 0 : (xmlValB.equals("00000") ? 0 : xmlValB.length())); + if (!(lenA == 0 && lenB == 0) && !isKnownDifference(i, xmlValA, xmlValB)) { + errorCount++; + System.out.println("For UCDProperty " + prop.name() + " (" + prop.getShortName() + + ") [" + String.format("0x%04X", i) + "], "); + System.out.println("\t" + fileA + " = " + xmlValA); + System.out.println("\t" + fileB + " = " + xmlValB); + } + } + } + catch (Exception e) { + System.out.println("Exception thrown for " + String.format("0x%04X", i)); + System.out.println(e.getMessage()); + } + } + } + } + //Now handle anything that contains codepoint sequences. + for (UcdProperty prop : codepointSequenceProperties) { + UnicodeMap fileAMap = xmlPropsA.getMap(prop); + UnicodeMap fileBMap = xmlPropsB.getMap(prop); + UnicodeSet differences = fileAMap.keySet().addAll(fileBMap.keySet()); + for (String key : differences) { + try { + String xmlValA = fileAMap.get(key); + String xmlValB = fileBMap.get(key); + if(!Objects.equals(xmlValA, xmlValB)) { + //At least one string is != null and the strings are different, but we don't care if one + // is null and one is empty_string + //As far as we care, empty_string == null == "00000" + int lenA = (xmlValA == null ? 0 : (xmlValA.equals("00000") ? 0 : xmlValA.length())); + int lenB = (xmlValB == null ? 0 : (xmlValB.equals("00000") ? 0 : xmlValB.length())); + if (!(lenA == 0 && lenB == 0)) { + errorCount++; + System.out.println("For UCDProperty " + prop.name() + " (" + prop.getShortName() + + ") [" + key + "], "); + System.out.println("\t" + fileA + " = " + xmlValA); + System.out.println("\t" + fileB + " = " + xmlValB); + } + } + } + catch (Exception e) { + System.out.println("Exception thrown for " + String.format("0x%04X", key)); + System.out.println(e.getMessage()); + } + } + } + System.exit(errorCount); + } + + private static boolean isKnownDifference(int codepoint, String xmlValA, String xmlValB) { + if (knownDifferences.containsKey(codepoint)) { + String knownValue1 = knownDifferences.get(codepoint)[0]; + String knownValue2 = knownDifferences.get(codepoint)[1]; + return (knownValue1.equals(xmlValA) && knownValue2.equals(xmlValB)) || + (knownValue1.equals(xmlValB) && knownValue2.equals(xmlValA)); + } + return false; + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java index f4b407106..b2b979467 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java @@ -78,6 +78,24 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep } } break; + case PROVISIONALNAMEDSEQUENCES: + HashMap provisionalNamedSequences = new HashMap<>(); + for (UcdLineParser.UcdLine line : parser) { + String[] parts = line.getParts(); + provisionalNamedSequences.put(parts[0], parts[1]); + } + List psNames = new ArrayList<>(provisionalNamedSequences.keySet()); + Collections.sort(psNames); + for (String name : psNames) { + AttributesImpl attributes = + getNamedSequenceAttributes( + namespace, name, provisionalNamedSequences); + writer.startElement(childTag, attributes); + { + writer.endElement(childTag); + } + } + break; default: for (UcdLineParser.UcdLine line : parser) { AttributesImpl attributes = diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java index 57989d255..08a9edc53 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java @@ -18,7 +18,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Age_Detail = new UcdPropertyDetail( - UcdProperty.Age, VersionInfo.getInstance(1, 1, 0), 1, true, false, false, true); + UcdProperty.Age, VersionInfo.getInstance(3, 2, 0), 1, true, false, false, true); public static UcdPropertyDetail Name_Detail = new UcdPropertyDetail( UcdProperty.Name, @@ -31,7 +31,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Jamo_Short_Name_Detail = new UcdPropertyDetail( UcdProperty.Jamo_Short_Name, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(5, 1, 0), 3, true, false, @@ -130,7 +130,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = new UcdPropertyDetail( UcdProperty.Bidi_Mirroring_Glyph, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 1), 14, true, false, @@ -166,7 +166,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Uppercase_Mapping_Detail = new UcdPropertyDetail( UcdProperty.Uppercase_Mapping, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 1, 8), 18, true, false, @@ -175,7 +175,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Lowercase_Mapping_Detail = new UcdPropertyDetail( UcdProperty.Lowercase_Mapping, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 1, 8), 19, true, false, @@ -184,7 +184,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Titlecase_Mapping_Detail = new UcdPropertyDetail( UcdProperty.Titlecase_Mapping, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 1, 8), 20, true, false, @@ -197,7 +197,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Simple_Case_Folding_Detail = new UcdPropertyDetail( UcdProperty.Simple_Case_Folding, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 1), 22, true, false, @@ -206,7 +206,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Case_Folding_Detail = new UcdPropertyDetail( UcdProperty.Case_Folding, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 1), 23, true, false, @@ -215,7 +215,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Joining_Type_Detail = new UcdPropertyDetail( UcdProperty.Joining_Type, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 24, true, false, @@ -224,7 +224,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Joining_Group_Detail = new UcdPropertyDetail( UcdProperty.Joining_Group, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 25, true, false, @@ -233,7 +233,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail East_Asian_Width_Detail = new UcdPropertyDetail( UcdProperty.East_Asian_Width, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 0), 26, true, false, @@ -242,7 +242,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Line_Break_Detail = new UcdPropertyDetail( UcdProperty.Line_Break, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 0), 27, true, false, @@ -251,7 +251,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Script_Detail = new UcdPropertyDetail( UcdProperty.Script, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 28, true, false, @@ -269,7 +269,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Dash_Detail = new UcdPropertyDetail( UcdProperty.Dash, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 30, true, false, @@ -278,7 +278,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail White_Space_Detail = new UcdPropertyDetail( UcdProperty.White_Space, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 31, true, false, @@ -287,7 +287,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Hyphen_Detail = new UcdPropertyDetail( UcdProperty.Hyphen, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 32, true, false, @@ -296,7 +296,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Quotation_Mark_Detail = new UcdPropertyDetail( UcdProperty.Quotation_Mark, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 33, true, false, @@ -305,7 +305,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Radical_Detail = new UcdPropertyDetail( UcdProperty.Radical, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 34, true, false, @@ -314,7 +314,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Ideographic_Detail = new UcdPropertyDetail( UcdProperty.Ideographic, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 35, true, false, @@ -323,7 +323,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Unified_Ideograph_Detail = new UcdPropertyDetail( UcdProperty.Unified_Ideograph, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 36, true, false, @@ -332,7 +332,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail IDS_Binary_Operator_Detail = new UcdPropertyDetail( UcdProperty.IDS_Binary_Operator, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 37, true, false, @@ -341,7 +341,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail IDS_Trinary_Operator_Detail = new UcdPropertyDetail( UcdProperty.IDS_Trinary_Operator, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 38, true, false, @@ -350,7 +350,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Hangul_Syllable_Type_Detail = new UcdPropertyDetail( UcdProperty.Hangul_Syllable_Type, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 0, 0), 39, true, false, @@ -359,7 +359,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail( UcdProperty.Default_Ignorable_Code_Point, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 40, true, false, @@ -368,7 +368,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail( UcdProperty.Other_Default_Ignorable_Code_Point, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 41, true, false, @@ -386,7 +386,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_Alphabetic_Detail = new UcdPropertyDetail( UcdProperty.Other_Alphabetic, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 43, true, false, @@ -395,7 +395,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Uppercase_Detail = new UcdPropertyDetail( UcdProperty.Uppercase, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 44, true, false, @@ -404,7 +404,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_Uppercase_Detail = new UcdPropertyDetail( UcdProperty.Other_Uppercase, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 45, true, false, @@ -413,7 +413,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Lowercase_Detail = new UcdPropertyDetail( UcdProperty.Lowercase, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 46, true, false, @@ -422,7 +422,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_Lowercase_Detail = new UcdPropertyDetail( UcdProperty.Other_Lowercase, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 47, true, false, @@ -431,7 +431,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Math_Detail = new UcdPropertyDetail( UcdProperty.Math, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 48, true, false, @@ -440,7 +440,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_Math_Detail = new UcdPropertyDetail( UcdProperty.Other_Math, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 49, true, false, @@ -449,7 +449,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Hex_Digit_Detail = new UcdPropertyDetail( UcdProperty.Hex_Digit, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 50, true, false, @@ -458,7 +458,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail ASCII_Hex_Digit_Detail = new UcdPropertyDetail( UcdProperty.ASCII_Hex_Digit, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 1), 51, true, false, @@ -467,7 +467,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Noncharacter_Code_Point_Detail = new UcdPropertyDetail( UcdProperty.Noncharacter_Code_Point, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 1), 52, true, false, @@ -476,7 +476,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Variation_Selector_Detail = new UcdPropertyDetail( UcdProperty.Variation_Selector, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 0, 1), 53, true, false, @@ -485,7 +485,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Bidi_Control_Detail = new UcdPropertyDetail( UcdProperty.Bidi_Control, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 54, true, false, @@ -494,7 +494,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Join_Control_Detail = new UcdPropertyDetail( UcdProperty.Join_Control, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 55, true, false, @@ -503,7 +503,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Grapheme_Base_Detail = new UcdPropertyDetail( UcdProperty.Grapheme_Base, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 56, true, false, @@ -512,7 +512,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Grapheme_Extend_Detail = new UcdPropertyDetail( UcdProperty.Grapheme_Extend, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 57, true, false, @@ -521,7 +521,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_Grapheme_Extend_Detail = new UcdPropertyDetail( UcdProperty.Other_Grapheme_Extend, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 58, true, false, @@ -530,7 +530,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Grapheme_Link_Detail = new UcdPropertyDetail( UcdProperty.Grapheme_Link, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 59, true, false, @@ -539,7 +539,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Sentence_Terminal_Detail = new UcdPropertyDetail( UcdProperty.Sentence_Terminal, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(9, 0, 0), 60, true, false, @@ -548,7 +548,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Extender_Detail = new UcdPropertyDetail( UcdProperty.Extender, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 61, true, false, @@ -557,7 +557,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Terminal_Punctuation_Detail = new UcdPropertyDetail( UcdProperty.Terminal_Punctuation, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 62, true, false, @@ -566,7 +566,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Diacritic_Detail = new UcdPropertyDetail( UcdProperty.Diacritic, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(2, 0, 0), 63, true, false, @@ -575,7 +575,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Deprecated_Detail = new UcdPropertyDetail( UcdProperty.Deprecated, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 64, true, false, @@ -584,7 +584,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail ID_Start_Detail = new UcdPropertyDetail( UcdProperty.ID_Start, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 65, true, false, @@ -593,7 +593,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_ID_Start_Detail = new UcdPropertyDetail( UcdProperty.Other_ID_Start, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 0, 0), 66, true, false, @@ -602,7 +602,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail XID_Start_Detail = new UcdPropertyDetail( UcdProperty.XID_Start, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 67, true, false, @@ -611,7 +611,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail ID_Continue_Detail = new UcdPropertyDetail( UcdProperty.ID_Continue, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 68, true, false, @@ -620,7 +620,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Other_ID_Continue_Detail = new UcdPropertyDetail( UcdProperty.Other_ID_Continue, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 1, 0), 69, true, false, @@ -629,7 +629,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail XID_Continue_Detail = new UcdPropertyDetail( UcdProperty.XID_Continue, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 70, true, false, @@ -638,7 +638,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Soft_Dotted_Detail = new UcdPropertyDetail( UcdProperty.Soft_Dotted, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 71, true, false, @@ -647,7 +647,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Logical_Order_Exception_Detail = new UcdPropertyDetail( UcdProperty.Logical_Order_Exception, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 72, true, false, @@ -656,7 +656,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Pattern_White_Space_Detail = new UcdPropertyDetail( UcdProperty.Pattern_White_Space, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 1, 0), 73, true, false, @@ -665,7 +665,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Pattern_Syntax_Detail = new UcdPropertyDetail( UcdProperty.Pattern_Syntax, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 1, 0), 74, true, false, @@ -674,7 +674,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = new UcdPropertyDetail( UcdProperty.Grapheme_Cluster_Break, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 1, 0), 75, true, false, @@ -683,7 +683,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Word_Break_Detail = new UcdPropertyDetail( UcdProperty.Word_Break, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 1, 0), 76, true, false, @@ -692,7 +692,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Sentence_Break_Detail = new UcdPropertyDetail( UcdProperty.Sentence_Break, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(4, 1, 0), 77, true, false, @@ -701,7 +701,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Composition_Exclusion_Detail = new UcdPropertyDetail( UcdProperty.Composition_Exclusion, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 0, 0), 78, true, false, @@ -710,7 +710,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Full_Composition_Exclusion_Detail = new UcdPropertyDetail( UcdProperty.Full_Composition_Exclusion, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 79, true, false, @@ -719,7 +719,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail NFC_Quick_Check_Detail = new UcdPropertyDetail( UcdProperty.NFC_Quick_Check, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 80, true, false, @@ -728,7 +728,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail NFD_Quick_Check_Detail = new UcdPropertyDetail( UcdProperty.NFD_Quick_Check, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 81, true, false, @@ -737,7 +737,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail NFKC_Quick_Check_Detail = new UcdPropertyDetail( UcdProperty.NFKC_Quick_Check, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(5, 2, 0), 82, true, false, @@ -746,7 +746,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail NFKD_Quick_Check_Detail = new UcdPropertyDetail( UcdProperty.NFKD_Quick_Check, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 83, true, false, @@ -755,7 +755,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Expands_On_NFC_Detail = new UcdPropertyDetail( UcdProperty.Expands_On_NFC, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 84, true, false, @@ -764,7 +764,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Expands_On_NFD_Detail = new UcdPropertyDetail( UcdProperty.Expands_On_NFD, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 85, true, false, @@ -773,7 +773,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Expands_On_NFKC_Detail = new UcdPropertyDetail( UcdProperty.Expands_On_NFKC, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 86, true, false, @@ -782,7 +782,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Expands_On_NFKD_Detail = new UcdPropertyDetail( UcdProperty.Expands_On_NFKD, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 2, 0), 87, true, false, @@ -791,7 +791,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail FC_NFC_Closure_Detail = new UcdPropertyDetail( UcdProperty.FC_NFKC_Closure, - VersionInfo.getInstance(1, 1, 0), + VersionInfo.getInstance(3, 1, 0), 88, true, false, @@ -881,14 +881,14 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Indic_Syllabic_Category_Detail = new UcdPropertyDetail( UcdProperty.Indic_Syllabic_Category, - VersionInfo.getInstance(6, 0, 0), + VersionInfo.getInstance(6, 1, 0), 98, true, false, false, true); // public static UcdPropertyDetail Indic_Matra_Category_Detail = new UcdPropertyDetail ( - // UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,0,0), + // UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,1,0), // VersionInfo.getInstance(7,0,0), 99, // true, false, false, true); public static UcdPropertyDetail Indic_Positional_Category_Detail = @@ -939,7 +939,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Block_Detail = new UcdPropertyDetail( UcdProperty.Block, - VersionInfo.getInstance(10, 0, 0), + VersionInfo.getInstance(2, 0, 0), 105, true, false, @@ -957,7 +957,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCompatibilityVariant_Detail = new UcdPropertyDetail( UcdProperty.kCompatibilityVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 107, false, true, @@ -966,7 +966,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kRSUnicode_Detail = new UcdPropertyDetail( UcdProperty.kRSUnicode, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 108, false, true, @@ -978,7 +978,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_GSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_GSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 110, false, true, @@ -987,7 +987,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_TSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_TSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 111, false, true, @@ -996,7 +996,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_JSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_JSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 112, false, true, @@ -1005,7 +1005,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_KSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_KSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 113, false, true, @@ -1014,7 +1014,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_KPSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_KPSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 114, false, true, @@ -1023,7 +1023,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_VSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_VSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 115, false, true, @@ -1032,7 +1032,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_HSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_HSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 0), 116, false, true, @@ -1041,7 +1041,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_USource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_USource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 0, 1), 117, false, true, @@ -1050,7 +1050,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRG_MSource_Detail = new UcdPropertyDetail( UcdProperty.kIRG_MSource, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 2, 0), 118, false, true, @@ -1077,7 +1077,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIICore_Detail = new UcdPropertyDetail( UcdProperty.kIICore, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 1, 0), 121, false, true, @@ -1086,7 +1086,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kUnihanCore2020_Detail = new UcdPropertyDetail( UcdProperty.kUnihanCore2020, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(13, 0, 0), 122, false, true, @@ -1095,7 +1095,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGB0_Detail = new UcdPropertyDetail( UcdProperty.kGB0, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 123, false, true, @@ -1104,7 +1104,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGB1_Detail = new UcdPropertyDetail( UcdProperty.kGB1, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 124, false, true, @@ -1113,7 +1113,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGB3_Detail = new UcdPropertyDetail( UcdProperty.kGB3, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 125, false, true, @@ -1122,7 +1122,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGB5_Detail = new UcdPropertyDetail( UcdProperty.kGB5, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 126, false, true, @@ -1131,7 +1131,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGB7_Detail = new UcdPropertyDetail( UcdProperty.kGB7, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 127, false, true, @@ -1140,7 +1140,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGB8_Detail = new UcdPropertyDetail( UcdProperty.kGB8, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 128, false, true, @@ -1149,7 +1149,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCNS1986_Detail = new UcdPropertyDetail( UcdProperty.kCNS1986, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 129, false, true, @@ -1158,7 +1158,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCNS1992_Detail = new UcdPropertyDetail( UcdProperty.kCNS1992, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 130, false, true, @@ -1167,7 +1167,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kJis0_Detail = new UcdPropertyDetail( UcdProperty.kJis0, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 131, false, true, @@ -1176,7 +1176,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kJis1_Detail = new UcdPropertyDetail( UcdProperty.kJis1, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 132, false, true, @@ -1185,7 +1185,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kJIS0213_Detail = new UcdPropertyDetail( UcdProperty.kJIS0213, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 133, false, true, @@ -1194,7 +1194,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKSC0_Detail = new UcdPropertyDetail( UcdProperty.kKSC0, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), VersionInfo.getInstance(15, 1, 0), 134, false, @@ -1204,7 +1204,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKSC1_Detail = new UcdPropertyDetail( UcdProperty.kKSC1, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), VersionInfo.getInstance(15, 1, 0), 135, false, @@ -1214,7 +1214,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKPS0_Detail = new UcdPropertyDetail( UcdProperty.kKPS0, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), VersionInfo.getInstance(15, 1, 0), 136, false, @@ -1224,7 +1224,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKPS1_Detail = new UcdPropertyDetail( UcdProperty.kKPS1, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), VersionInfo.getInstance(15, 1, 0), 137, false, @@ -1234,7 +1234,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHKSCS_Detail = new UcdPropertyDetail( UcdProperty.kHKSCS, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), VersionInfo.getInstance(15, 1, 0), 138, false, @@ -1244,7 +1244,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCantonese_Detail = new UcdPropertyDetail( UcdProperty.kCantonese, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 139, false, true, @@ -1253,7 +1253,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHangul_Detail = new UcdPropertyDetail( UcdProperty.kHangul, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 0, 0), 140, false, true, @@ -1262,7 +1262,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kDefinition_Detail = new UcdPropertyDetail( UcdProperty.kDefinition, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 141, false, true, @@ -1271,19 +1271,19 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHanYu_Detail = new UcdPropertyDetail( UcdProperty.kHanYu, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 142, false, true, false, true); // public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( - // UcdProperty.kAlternateHanYu, VersionInfo.getInstance(11,0,0), 143, + // UcdProperty.kAlternateHanYu, VersionInfo.getInstance(2,0,0), VersionInfo.getInstance(3,1,1), 143, // false, true, false, true); public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail( UcdProperty.kMandarin, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 144, false, true, @@ -1292,7 +1292,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCihaiT_Detail = new UcdPropertyDetail( UcdProperty.kCihaiT, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 145, false, true, @@ -1301,7 +1301,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kSBGY_Detail = new UcdPropertyDetail( UcdProperty.kSBGY, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 146, false, true, @@ -1310,7 +1310,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kNelson_Detail = new UcdPropertyDetail( UcdProperty.kNelson, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 147, false, true, @@ -1319,7 +1319,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCowles_Detail = new UcdPropertyDetail( UcdProperty.kCowles, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 148, false, true, @@ -1328,7 +1328,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kMatthews_Detail = new UcdPropertyDetail( UcdProperty.kMatthews, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 149, false, true, @@ -1337,7 +1337,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kOtherNumeric_Detail = new UcdPropertyDetail( UcdProperty.kOtherNumeric, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 150, false, true, @@ -1346,7 +1346,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kPhonetic_Detail = new UcdPropertyDetail( UcdProperty.kPhonetic, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 0), 151, false, true, @@ -1355,7 +1355,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGSR_Detail = new UcdPropertyDetail( UcdProperty.kGSR, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 0, 1), 152, false, true, @@ -1364,7 +1364,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kFenn_Detail = new UcdPropertyDetail( UcdProperty.kFenn, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 153, false, true, @@ -1373,7 +1373,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kFennIndex_Detail = new UcdPropertyDetail( UcdProperty.kFennIndex, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 1, 0), 154, false, true, @@ -1382,7 +1382,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKarlgren_Detail = new UcdPropertyDetail( UcdProperty.kKarlgren, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 155, false, true, @@ -1391,7 +1391,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCangjie_Detail = new UcdPropertyDetail( UcdProperty.kCangjie, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 156, false, true, @@ -1400,7 +1400,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kMeyerWempe_Detail = new UcdPropertyDetail( UcdProperty.kMeyerWempe, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 0), 157, false, true, @@ -1409,7 +1409,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kSimplifiedVariant_Detail = new UcdPropertyDetail( UcdProperty.kSimplifiedVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 158, false, true, @@ -1418,7 +1418,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kTraditionalVariant_Detail = new UcdPropertyDetail( UcdProperty.kTraditionalVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 159, false, true, @@ -1427,7 +1427,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = new UcdPropertyDetail( UcdProperty.kSpecializedSemanticVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 160, false, true, @@ -1436,7 +1436,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kSemanticVariant_Detail = new UcdPropertyDetail( UcdProperty.kSemanticVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 161, false, true, @@ -1445,7 +1445,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kVietnamese_Detail = new UcdPropertyDetail( UcdProperty.kVietnamese, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 162, false, true, @@ -1454,7 +1454,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kLau_Detail = new UcdPropertyDetail( UcdProperty.kLau, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 163, false, true, @@ -1463,7 +1463,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kTang_Detail = new UcdPropertyDetail( UcdProperty.kTang, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 164, false, true, @@ -1472,7 +1472,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kZVariant_Detail = new UcdPropertyDetail( UcdProperty.kZVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 165, false, true, @@ -1481,7 +1481,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kJapaneseKun_Detail = new UcdPropertyDetail( UcdProperty.kJapaneseKun, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 166, false, true, @@ -1490,7 +1490,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kJapaneseOn_Detail = new UcdPropertyDetail( UcdProperty.kJapaneseOn, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 167, false, true, @@ -1499,19 +1499,19 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKangXi_Detail = new UcdPropertyDetail( UcdProperty.kKangXi, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 168, false, true, false, true); // public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( - // UcdProperty.kAlternateKangXi, VersionInfo.getInstance(11,0,0), 169, + // UcdProperty.kAlternateKangXi, VersionInfo.getInstance(2,0,0), VersionInfo.getInstance(4,0,1), 169, // false, true, false, true); public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail( UcdProperty.kBigFive, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 170, false, true, @@ -1520,7 +1520,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCCCII_Detail = new UcdPropertyDetail( UcdProperty.kCCCII, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 171, false, true, @@ -1529,7 +1529,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kDaeJaweon_Detail = new UcdPropertyDetail( UcdProperty.kDaeJaweon, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 172, false, true, @@ -1538,7 +1538,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kEACC_Detail = new UcdPropertyDetail( UcdProperty.kEACC, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 173, false, true, @@ -1547,7 +1547,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kFrequency_Detail = new UcdPropertyDetail( UcdProperty.kFrequency, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), VersionInfo.getInstance(16, 0, 0), 174, false, @@ -1557,7 +1557,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kGradeLevel_Detail = new UcdPropertyDetail( UcdProperty.kGradeLevel, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 175, false, true, @@ -1566,7 +1566,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHDZRadBreak_Detail = new UcdPropertyDetail( UcdProperty.kHDZRadBreak, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 1, 0), 176, false, true, @@ -1575,7 +1575,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHKGlyph_Detail = new UcdPropertyDetail( UcdProperty.kHKGlyph, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 1), 177, false, true, @@ -1584,7 +1584,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHanyuPinlu_Detail = new UcdPropertyDetail( UcdProperty.kHanyuPinlu, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 0, 1), 178, false, true, @@ -1593,7 +1593,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kHanyuPinyin_Detail = new UcdPropertyDetail( UcdProperty.kHanyuPinyin, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 2, 0), 179, false, true, @@ -1602,7 +1602,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = new UcdPropertyDetail( UcdProperty.kIRGHanyuDaZidian, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 180, false, true, @@ -1611,7 +1611,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRGKangXi_Detail = new UcdPropertyDetail( UcdProperty.kIRGKangXi, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 181, false, true, @@ -1620,7 +1620,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRGDaeJaweon_Detail = new UcdPropertyDetail( UcdProperty.kIRGDaeJaweon, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), 182, false, true, @@ -1629,7 +1629,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = new UcdPropertyDetail( UcdProperty.kIRGDaiKanwaZiten, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 0, 0), VersionInfo.getInstance(15, 1, 0), 183, false, @@ -1639,7 +1639,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kKorean_Detail = new UcdPropertyDetail( UcdProperty.kKorean, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 184, false, true, @@ -1648,7 +1648,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kMainlandTelegraph_Detail = new UcdPropertyDetail( UcdProperty.kMainlandTelegraph, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 185, false, true, @@ -1657,19 +1657,19 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kMorohashi_Detail = new UcdPropertyDetail( UcdProperty.kMorohashi, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 186, false, true, false, true); // public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( - // UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(11,0,0), 187, + // UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(2,0,0), VersionInfo.getInstance(4,0,1), 187, // false, true, false, true); public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail( UcdProperty.kPrimaryNumeric, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 188, false, true, @@ -1678,7 +1678,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kTaiwanTelegraph_Detail = new UcdPropertyDetail( UcdProperty.kTaiwanTelegraph, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 189, false, true, @@ -1687,7 +1687,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kXerox_Detail = new UcdPropertyDetail( UcdProperty.kXerox, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 190, false, true, @@ -1696,7 +1696,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kPseudoGB1_Detail = new UcdPropertyDetail( UcdProperty.kPseudoGB1, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 191, false, true, @@ -1705,7 +1705,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kIBMJapan_Detail = new UcdPropertyDetail( UcdProperty.kIBMJapan, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 192, false, true, @@ -1714,7 +1714,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kAccountingNumeric_Detail = new UcdPropertyDetail( UcdProperty.kAccountingNumeric, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 2, 0), 193, false, true, @@ -1723,7 +1723,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCheungBauer_Detail = new UcdPropertyDetail( UcdProperty.kCheungBauer, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 0, 0), 194, false, true, @@ -1732,7 +1732,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kCheungBauerIndex_Detail = new UcdPropertyDetail( UcdProperty.kCheungBauerIndex, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 0, 0), 195, false, true, @@ -1741,7 +1741,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kFourCornerCode_Detail = new UcdPropertyDetail( UcdProperty.kFourCornerCode, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 0, 0), 196, false, true, @@ -1753,7 +1753,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kXHC1983_Detail = new UcdPropertyDetail( UcdProperty.kXHC1983, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 1, 0), 198, false, true, @@ -1807,7 +1807,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kTGHZ2013_Detail = new UcdPropertyDetail( UcdProperty.kTGHZ2013, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(13, 0, 0), 204, false, true, @@ -1816,7 +1816,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kSpoofingVariant_Detail = new UcdPropertyDetail( UcdProperty.kSpoofingVariant, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(13, 0, 0), 205, false, true, @@ -1825,7 +1825,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kRSKanWa_Detail = new UcdPropertyDetail( UcdProperty.kRSKanWa, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 206, false, true, @@ -1834,7 +1834,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kRSJapanese_Detail = new UcdPropertyDetail( UcdProperty.kRSJapanese, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 207, false, true, @@ -1843,7 +1843,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kRSKorean_Detail = new UcdPropertyDetail( UcdProperty.kRSKorean, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 208, false, true, @@ -1852,7 +1852,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kRSKangXi_Detail = new UcdPropertyDetail( UcdProperty.kRSKangXi, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), VersionInfo.getInstance(15, 1, 0), 209, false, @@ -1862,7 +1862,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = new UcdPropertyDetail( UcdProperty.kRSAdobe_Japan1_6, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(4, 1, 0), 210, false, true, @@ -1871,7 +1871,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail kTotalStrokes_Detail = new UcdPropertyDetail( UcdProperty.kTotalStrokes, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(3, 1, 0), 211, false, true, @@ -1925,7 +1925,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Unicode_1_Name_Detail = new UcdPropertyDetail( UcdProperty.Unicode_1_Name, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(2, 0, 0), 217, true, false, @@ -1934,7 +1934,7 @@ public class UcdPropertyDetail { public static UcdPropertyDetail Name_Alias_Detail = new UcdPropertyDetail( UcdProperty.Name_Alias, - VersionInfo.getInstance(11, 0, 0), + VersionInfo.getInstance(5, 0, 0), 218, false, false, diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java index 3b93a8520..070c24fc6 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java @@ -46,6 +46,14 @@ public enum UcdSection { NamedSequences_Detail, false, false), + PROVISIONALNAMEDSEQUENCES( + "provisional-named-sequences", + "named-sequence", + VersionInfo.getInstance(5, 0, 0), + VersionInfo.getInstance(13, 0, 0), + ProvisionalNamedSequences_Detail, + false, + false), NORMALIZATIONCORRECTIONS( "normalization-corrections", "normalization-correction", @@ -128,8 +136,17 @@ public boolean getParserWithMissing() { new UcdSectionDetail( UcdSection.NAMEDSEQUENCES, new UcdSectionComponent[] { - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Named_Sequences) + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Named_Sequences) + }, + 1); + public static UcdSectionDetail ProvisionalNamedSequences_Detail = + new UcdSectionDetail( + UcdSection.PROVISIONALNAMEDSEQUENCES, + new UcdSectionComponent[] { + new UcdSectionComponent( + VersionInfo.getInstance(5, 0, 0), VersionInfo.getInstance(13, 0, 0), + UcdProperty.Named_Sequences_Prov) }, 1); public static UcdSectionDetail NormalizationCorrections_Detail = @@ -149,7 +166,7 @@ public boolean getParserWithMissing() { null, UcdProperty.Standardized_Variant), new UcdSectionComponent( - VersionInfo.getInstance(13, 1, 0), + VersionInfo.getInstance(13, 0, 0), null, UcdProperty.emoji_variation_sequence) }, diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java index 409c6b959..4366ffc89 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java @@ -58,9 +58,17 @@ public String toString() { private static final int HELP = 0, UCDVERSION = 1, RANGE = 2, OUTPUT = 3, OUTPUTFOLDER = 4; public static void main(String[] args) throws Exception { + VersionInfo ucdVersion = null; - UCDXMLOUTPUTRANGE ucdxmloutputrange = null; - UCDXMLOUTPUTTYPE ucdxmloutputtype = null; + UCDXMLOUTPUTRANGE[] ucdxmloutputranges = new UCDXMLOUTPUTRANGE[] { + UCDXMLOUTPUTRANGE.ALL, + UCDXMLOUTPUTRANGE.NOUNIHAN, + UCDXMLOUTPUTRANGE.UNIHAN + }; + UCDXMLOUTPUTTYPE[] ucdxmloutputtypes = new UCDXMLOUTPUTTYPE[] { + UCDXMLOUTPUTTYPE.FLAT, + UCDXMLOUTPUTTYPE.GROUPED + }; File destinationFolder = null; UOption.parseArgs(args, options); @@ -88,31 +96,29 @@ public static void main(String[] args) throws Exception { } if (options[RANGE].doesOccur) { try { - ucdxmloutputrange = + ucdxmloutputranges = new UCDXMLOUTPUTRANGE[]{ UCDXMLOUTPUTRANGE.valueOf( - options[RANGE].value.toUpperCase(Locale.ROOT)); + options[RANGE].value.toUpperCase(Locale.ROOT)) + }; } catch (Exception e) { throw new IllegalArgumentException( "Could not convert " + options[RANGE].value + " to one of [ALL|NOUNIHAN|UNIHAN]"); } - } else { - throw new IllegalArgumentException("Missing command line option: --range (or -r)"); } if (options[OUTPUT].doesOccur) { try { - ucdxmloutputtype = + ucdxmloutputtypes = new UCDXMLOUTPUTTYPE[] { UCDXMLOUTPUTTYPE.valueOf( - options[OUTPUT].value.toUpperCase(Locale.ROOT)); + options[OUTPUT].value.toUpperCase(Locale.ROOT)) + }; } catch (Exception e) { throw new IllegalArgumentException( "Could not convert " + options[OUTPUT].value + " to one of [FLAT|GROUPED]"); } - } else { - throw new IllegalArgumentException("Missing command line option: --output (or -o)"); } if (options[OUTPUTFOLDER].doesOccur) { try { @@ -141,8 +147,14 @@ public static void main(String[] args) throws Exception { } if (ucdVersion != null && destinationFolder.exists()) { - buildUcdXMLFile(ucdVersion, destinationFolder, ucdxmloutputrange, ucdxmloutputtype); - System.out.println("end"); + for (UCDXMLOUTPUTRANGE ucdxmloutputrange : ucdxmloutputranges) { + for (UCDXMLOUTPUTTYPE ucdxmloutputtype: ucdxmloutputtypes) { + System.out.println("Building the " + ucdxmloutputrange + " " + ucdxmloutputtype + + " UcdXML file for " + ucdVersion); + buildUcdXMLFile(ucdVersion, destinationFolder, ucdxmloutputrange, ucdxmloutputtype); + } + } + System.out.println("End"); System.exit(0); } else { System.err.println("Unexpected error when building UcdXML file."); @@ -198,11 +210,18 @@ private static void buildUcdXMLFile( if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.BLOCKS); ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NAMEDSEQUENCES); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.PROVISIONALNAMEDSEQUENCES); ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NORMALIZATIONCORRECTIONS); ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.STANDARDIZEDVARIANTS); - ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.CJKRADICALS); - ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.EMOJISOURCES); - ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.DONOTEMIT); + if (ucdVersion.compareTo(VersionInfo.getInstance(5, 2, 0)) >= 0) { + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.CJKRADICALS); + } + if (ucdVersion.compareTo(VersionInfo.getInstance(6, 0, 0)) >= 0) { + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.EMOJISOURCES); + } + if (ucdVersion.compareTo(VersionInfo.getInstance(16, 0, 0)) >= 0) { + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.DONOTEMIT); + } } writer.endElement("ucd"); } @@ -345,7 +364,7 @@ private static int buildChars( Range currentRangeType = getRangeType(attributeResolver, codepoint); if (!range.isEmpty()) { if (!currentRangeType.equals(rangeType) - || attributeResolver.isDifferentRange(codepoint, codepoint - 1)) { + || attributeResolver.isDifferentRange(ucdVersion, codepoint, codepoint - 1)) { if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { buildGroupedRange( @@ -439,13 +458,19 @@ private static void buildGroupedChar( AttributesImpl orgCharAttributes = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); AttributesImpl charAttributes = new AttributesImpl(); - for (int index = 0; index < orgCharAttributes.getLength(); index++) { - String attributeQName = orgCharAttributes.getQName(index); - String orgCharAttributesValue = orgCharAttributes.getValue(index); - String groupAttributeValue = groupAttrs.getValue(attributeQName); - if (!orgCharAttributesValue.equals(groupAttributeValue)) { + charAttributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); + + for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { + String qName = propDetail.getUcdProperty().getShortName(); + if (qName.startsWith("cjk")) { + qName = qName.substring(2); + } + String orgCharAttributesValue = orgCharAttributes.getValue(qName); + String groupAttributeValue = groupAttrs.getValue(qName); + if (!Objects.equals(orgCharAttributesValue, groupAttributeValue)) { charAttributes.addAttribute( - NAMESPACE, attributeQName, attributeQName, "CDATA", orgCharAttributesValue); + NAMESPACE, qName, qName, "CDATA", Objects.requireNonNullElse(orgCharAttributesValue, "")); } } buildChar(writer, attributeResolver, codepoint, charAttributes); @@ -464,8 +489,11 @@ private static void buildChar( for (String alias : nameAliases.keySet()) { AttributesImpl nameAliasAt = new AttributesImpl(); nameAliasAt.addAttribute(NAMESPACE, "alias", "alias", "CDATA", alias); - nameAliasAt.addAttribute( - NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); + String type = nameAliases.get(alias); + if (!Objects.equals(type, "none")) { + nameAliasAt.addAttribute( + NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); + } writer.startElement("name-alias", nameAliasAt); { writer.endElement("name-alias"); @@ -487,13 +515,34 @@ private static void buildGroupedRange( AttributesImpl orgRangeAttributes = getReservedAttributes(ucdVersion, attributeResolver, range); AttributesImpl rangeAttributes = new AttributesImpl(); - for (int index = 0; index < orgRangeAttributes.getLength(); index++) { - String attributeQName = orgRangeAttributes.getQName(index); - String orgCharAttributesValue = orgRangeAttributes.getValue(index); - String groupAttributeValue = groupAttrs.getValue(attributeQName); - if (!orgCharAttributesValue.equals(groupAttributeValue)) { + if (range.size() == 1) { + rangeAttributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(range.get(0))); + } else { + rangeAttributes.addAttribute( + NAMESPACE, + "first-cp", + "first-cp", + "CDATA", + attributeResolver.getHexString(range.get(0))); + rangeAttributes.addAttribute( + NAMESPACE, + "last-cp", + "last-cp", + "CDATA", + attributeResolver.getHexString(range.get(range.size() - 1))); + } + + for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { + String qName = propDetail.getUcdProperty().getShortName(); + if (qName.startsWith("cjk")) { + qName = qName.substring(2); + } + String orgCharAttributesValue = orgRangeAttributes.getValue(qName); + String groupAttributeValue = groupAttrs.getValue(qName); + if (!Objects.equals(orgCharAttributesValue, groupAttributeValue)) { rangeAttributes.addAttribute( - NAMESPACE, attributeQName, attributeQName, "CDATA", orgCharAttributesValue); + NAMESPACE, qName, qName, "CDATA", Objects.requireNonNullElse(orgCharAttributesValue, "")); } } writer.startElement(rangeType.tag, rangeAttributes); diff --git a/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java b/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java new file mode 100644 index 000000000..b9421886d --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java @@ -0,0 +1,471 @@ +package org.unicode.xml; + +import com.ibm.icu.dev.util.UnicodeMap; +import org.unicode.cldr.util.XMLFileReader; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.text.utility.Utility; +import org.xml.sax.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.*; +import java.util.Map.Entry; + +public class XMLProperties { + + enum XmlLeaf { + // Leaf + BLOCK, + BLOCKS, + CHAR, + CJK_RADICAL, + CJK_RADICALS, + DESCRIPTION, + DO_NOT_EMIT, + EMOJI_SOURCE, + EMOJI_SOURCES, + GROUP, + INSTEAD, + NAME_ALIAS, + NAMED_SEQUENCE, + NAMED_SEQUENCES, + NONCHARACTER, + NORMALIZATION_CORRECTION, + NORMALIZATION_CORRECTIONS, + PROVISIONAL_NAMED_SEQUENCES, + REPERTOIRE, + RESERVED, + STANDARDIZED_VARIANT, + STANDARDIZED_VARIANTS, + SURROGATE, + UCD; + static final XmlLeaf GREATEST_LEAF = NAME_ALIAS; + static final XmlLeaf GREATEST_BOTH = CHAR; + + static XmlLeaf forString(String source) { + try { + return XmlLeaf.valueOf(source.toUpperCase().replace('-', '_')); + } catch (final Exception e) { + return null; + } + } + } + + static class IntRange { + int start; + int end; + } + + Map> property2data = + new EnumMap>(UcdProperty.class); + { + for (final UcdProperty prop : UcdProperty.values()) { + property2data.put(prop, new UnicodeMap()); + } + } + + Set leavesNotHandled = new LinkedHashSet(); + public XMLProperties(File ucdxmlFile) { + readFile(ucdxmlFile); + + for (final UcdProperty prop : property2data.keySet()) { + final UnicodeMap map = property2data.get(prop); + map.freeze(); + } + } + + public void readFile(File ucdxmlFile) { + try { + System.out.println("Reading: " + ucdxmlFile.toString()); + final FileInputStream fis = new FileInputStream(ucdxmlFile); + final XMLReader xmlReader = XMLFileReader.createXMLReader(false); + xmlReader.setErrorHandler(new MyErrorHandler()); + xmlReader.setContentHandler(new MyContentHandler()); + final InputSource is = new InputSource(fis); + is.setSystemId(ucdxmlFile.toString()); + xmlReader.parse(is); + fis.close(); + } catch (final IOException | SAXException e) { + System.out.println("\t" + "Can't read " + ucdxmlFile); + System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); + } + } + + class MyContentHandler implements ContentHandler { + IntRange cp = new IntRange(); + HashMap attributes = new HashMap(); + HashMap groupAttributes = new HashMap(); + private final List lastElements = new ArrayList(); + + public MyContentHandler() {} + + @Override + public void characters(char[] arg0, int arg1, int arg2) throws SAXException { + final String chars = String.valueOf(arg0, arg1, arg2).trim(); + if (!chars.trim().isEmpty() + && lastElements.get(lastElements.size() - 1) != XmlLeaf.DESCRIPTION) { + throw new IllegalArgumentException("Should have no element content"); + } + } + + @Override + public void endElement(String arg0, String arg1, String arg2) throws SAXException { + try { + if (lastElements.isEmpty()) { + System.out.println( + "endElement: can't remove last element. Args: " + + arg0 + + ", " + + arg1 + + ", " + + arg2); + } else { + final XmlLeaf removed = lastElements.remove(lastElements.size() - 1); + } + } catch (ArrayIndexOutOfBoundsException e) { + throw new IllegalArgumentException( + "endElement: can't remove last element. Args: " + + arg0 + + ", " + + arg1 + + ", " + + arg2, + e); + } + } + + @Override + public void endDocument() throws SAXException {} + + @Override + public void endPrefixMapping(String arg0) throws SAXException {} + + @Override + public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {} + + @Override + public void processingInstruction(String arg0, String arg1) throws SAXException {} + + @Override + public void setDocumentLocator(Locator arg0) {} + + @Override + public void skippedEntity(String arg0) throws SAXException {} + + @Override + public void startDocument() throws SAXException {} + + @Override + public void startPrefixMapping(String arg0, String arg1) throws SAXException {} + + @Override + public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { + try { + final XmlLeaf xmlLeaf = XmlLeaf.forString(qName); + if (xmlLeaf == null) { + throw new IllegalArgumentException(qName); + } + lastElements.add(xmlLeaf); + // System.out.println("Added:\t" + lastElements); + + if (xmlLeaf == XmlLeaf.GROUP) { + groupAttributes.clear(); + addAttributes(atts, groupAttributes); + return; + } + attributes.clear(); + attributes.putAll(groupAttributes); + addAttributes(atts, attributes); + String cps; + switch (xmlLeaf) { + case CHAR: + case RESERVED: + case SURROGATE: + case NONCHARACTER: + parseCp(attributes); + for (final Entry entry : attributes.entrySet()) { + doAttributes(entry.getKey(), entry.getValue()); + } + if (xmlLeaf == XmlLeaf.NONCHARACTER) { + property2data + .get(UcdProperty.Noncharacter_Code_Point) + .putAll(cp.start, cp.end, "Yes"); + } + break; + case BLOCK: + parseCp(attributes); + property2data + .get(UcdProperty.Block) + .putAll(cp.start, cp.end, attributes.get("name")); + break; + case NAMED_SEQUENCE: + cps = Utility.fromHex(attributes.get("cps")); + property2data + .get(UcdProperty.Named_Sequences) + .put(cps, attributes.get("name")); + break; + case CJK_RADICAL: + final String number = attributes.get("number"); + setProp( + Utility.fromHex(attributes.get("radical")), + UcdProperty.CJK_Radical, + number); + setProp( + Utility.fromHex(attributes.get("ideograph")), + UcdProperty.CJK_Radical, + number); + break; + case EMOJI_SOURCE: + cps = Utility.fromHex(attributes.get("unicode")); + setProp(cps, UcdProperty.Emoji_DCM, attributes.get("docomo")); + setProp(cps, UcdProperty.Emoji_KDDI, attributes.get("kddi")); + setProp(cps, UcdProperty.Emoji_SB, attributes.get("softbank")); + break; + case REPERTOIRE: + case BLOCKS: + case CJK_RADICALS: + case EMOJI_SOURCES: + case NAMED_SEQUENCES: + case PROVISIONAL_NAMED_SEQUENCES: + case NORMALIZATION_CORRECTIONS: + case STANDARDIZED_VARIANTS: + case DESCRIPTION: + case DO_NOT_EMIT: + // non-informational nodes, skip + if (atts.getLength() != 0) { + throw new IllegalArgumentException("Has attributes"); + } + break; + case UCD: + if (atts.getLength() != 0) { + throw new IllegalArgumentException( + "Has wrong number of attributes: " + attributes.entrySet()); + } + break; + case NAME_ALIAS: + final String alias = attributes.get("alias") + "(" + attributes.get("type") + ")"; + appendProp(cp.start, UcdProperty.Name_Alias, alias); + break; + case STANDARDIZED_VARIANT: + { + String desc = attributes.get("desc"); + final String when = attributes.get("when"); + if (!when.isEmpty()) { + desc = desc + "(" + when + ")"; + } + cps = Utility.fromHex(attributes.get("cps")); + appendProp(cps, UcdProperty.Standardized_Variant, desc); + break; + } + case NORMALIZATION_CORRECTION: + final String correction = "old: " + attributes.get("old") + + " new: " + attributes.get("new") + + " version: " + attributes.get("version"); + cps = Utility.fromHex(attributes.get("cp")); + appendProp(cps, UcdProperty.NC_Original, correction); + break; + case INSTEAD: + final String instead = "use: " + attributes.get("use") + + " because: " + attributes.get("because"); + cps = attributes.get("of"); + appendProp(cps, UcdProperty.Do_Not_Emit_Preferred, instead); + break; + case GROUP: + break; // handled above. Leaving case for clarity + default: + leavesNotHandled.add(qName); + break; + } + } catch (final Exception e) { + System.out.println( + "Exception: " + + qName + + "\t" + + e.getClass().getName() + + "\t" + + e.getMessage()); + } + } + + public void addAttributes(Attributes atts, Map map) { + for (int i = 0; i < atts.getLength(); ++i) { + map.put(atts.getQName(i), atts.getValue(i)); + } + } + + public void setProp(String cps, UcdProperty ucdProperty, String docomo) { + if (docomo != null) { + property2data.get(ucdProperty).put(cps, docomo); + } + } + + public void setProp(int cps, UcdProperty ucdProperty, String docomo) { + if (docomo != null) { + property2data.get(ucdProperty).put(cps, docomo); + } + } + + public void appendProp(int cps, UcdProperty ucdProperty, String docomo) { + final UnicodeMap unicodeMap = property2data.get(ucdProperty); + final String former = unicodeMap.get(cps); + unicodeMap.put(cps, former == null ? docomo : former + "; " + docomo); + } + + public void appendProp(String cps, UcdProperty ucdProperty, String docomo) { + final UnicodeMap unicodeMap = property2data.get(ucdProperty); + final String former = unicodeMap.get(cps); + unicodeMap.put(cps, former == null ? docomo : former + "; " + docomo); + } + + public void parseCp(HashMap attributes2) { + final String cpString = attributes2.get("cp"); + if (cpString != null) { + cp.start = cp.end = Integer.parseInt(cpString, 16); + } else { + cp.start = Integer.parseInt(attributes2.get("first-cp"), 16); + cp.end = Integer.parseInt(attributes2.get("last-cp"), 16); + } + } + + public UnicodeMap doAttributes(String key, String value) { + UcdProperty prop = UcdProperty.forString(key); + // if (prop == UcdProperty.Deprecated && cp.start > 0xE0000 && cp.start < + // 0xE00FF) { + // System.out.println(Utility.hex(cp.start) + "," + Utility.hex(cp.end) + + // "\t" + key + "\t" + value); + // } + if (prop == null) { + if (key.endsWith("cp")) { + if (key.equals("cp") || key.equals("last-cp") || key.equals("first-cp")) { + return null; + } + } else if (key.equals("InSC")) { + prop = UcdProperty.Indic_Syllabic_Category; + } else if (key.equals("InMC")) { + prop = UcdProperty.Indic_Syllabic_Category; + } + if (prop == null) { + return null; + } + } + final UnicodeMap data = property2data.get(prop); + if (data == null) { + System.out.println("can't get data for " + key); + return null; + } + data.putAll(cp.start, cp.end, value.intern()); + return data; + } + } + + static class MyErrorHandler implements ErrorHandler { + @Override + public void error(SAXParseException exception) throws SAXException { + // System.out.println("\nerror: " + XMLFileReader.showSAX(exception)); + throw exception; + } + + @Override + public void fatalError(SAXParseException exception) throws SAXException { + // System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception)); + throw exception; + } + + @Override + public void warning(SAXParseException exception) throws SAXException { + // System.out.println("\nwarning: " + XMLFileReader.showSAX(exception)); + throw exception; + } + } + + public UnicodeMap getMap(UcdProperty prop) { + return property2data.get(prop); + } + + public Set getLeavesNotHandled() { + return leavesNotHandled; + } + static String show(String ival) { + if (ival == null) { + return "null"; + } else if (ival.isEmpty()) { + return ""; + } else if (ival.codePointAt(0) < 0x20) { + return "\\u{" + Utility.hex(ival, 4) + "}"; + } + return "«" + ival + "»"; + } + + // private static final String NO_VALUE = + // IndexUnicodeProperties.DefaultValueType.NO_VALUE.toString(); + // private static final String NAN = IndexUnicodeProperties.DefaultValueType.NaN.toString(); + + static final boolean HACK_XML_DEFAULTS = false; + + public static String getXmlResolved(UcdProperty property, int codePoint, String propertyValue) { + if (property == UcdProperty.Name) { + int debug = 0; + } + switch (property.getType()) { + case Binary: + if (HACK_XML_DEFAULTS) { + if (propertyValue == null) { + propertyValue = "No"; + } else { + propertyValue = + IndexUnicodeProperties.normalizeValue(property, propertyValue); + } + break; + } + // $FALL-THROUGH$ + case Enumerated: + case Catalog: + if (propertyValue != null) { + propertyValue = IndexUnicodeProperties.normalizeValue(property, propertyValue); + } + break; + case Numeric: + // if (HACK_XML_DEFAULTS) { + // if (propertyValue == null || propertyValue.isEmpty()) { + // propertyValue = "NaN"; + // } + // } + switch (property) { + case kOtherNumeric: + case kPrimaryNumeric: + case kAccountingNumeric: + if (propertyValue == null || propertyValue.isEmpty()) { + propertyValue = "NaN"; + } + break; + } + break; + case Miscellaneous: + if (propertyValue != null) { + switch (property) { + case Script_Extensions: + propertyValue = + IndexUnicodeProperties.normalizeValue(property, propertyValue); + break; + // case Name: + // break; + default: + propertyValue = propertyValue.replace("#", Utility.hex(codePoint)); + } + } + break; + case String: + if (propertyValue != null) { + propertyValue = propertyValue.replace("#", Utility.hex(codePoint)); + propertyValue = Utility.fromHex(propertyValue); + } + break; + default: + break; + } + return propertyValue; + // return propertyValue == null ? "" : propertyValue; + } +} From 8b870a63aa29c6fb75b85758752b1f7c215b1f6d Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Fri, 23 Aug 2024 09:00:31 -0700 Subject: [PATCH 09/14] Ran spotless --- .../unicode/props/PropertyParsingInfo.java | 19 +-- .../org/unicode/xml/AttributeResolver.java | 16 +- .../java/org/unicode/xml/CompareUcdXML.java | 153 ++++++++++-------- .../java/org/unicode/xml/UCDDataResolver.java | 3 +- .../org/unicode/xml/UcdPropertyDetail.java | 9 +- .../org/unicode/xml/UcdSectionDetail.java | 11 +- .../src/main/java/org/unicode/xml/UcdXML.java | 60 ++++--- .../java/org/unicode/xml/XMLProperties.java | 37 +++-- 8 files changed, 176 insertions(+), 132 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java index 522a6eceb..9e4ebda09 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java @@ -761,20 +761,17 @@ private static void parsePropertyValueFile( value = "Yes"; } else { if (propInfo.property.getType() == PropertyType.Binary) { - //Handle @missing values for binary attributes (see 13.0.0 emoji-data.txt) + // Handle @missing values for binary attributes (see 13.0.0 emoji-data.txt) if (line.getParts().length == 3) { - if (line.getParts()[2].equals("No")) { - value = "No"; - } - else { - value = "Yes"; - } - } - else { + if (line.getParts()[2].equals("No")) { + value = "No"; + } else { + value = "Yes"; + } + } else { value = "Yes"; } - } - else { + } else { value = line.getParts()[2]; } // The value should not be an empty string. diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index a9db3392e..8fc02a328 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -1,9 +1,8 @@ package org.unicode.xml; import com.ibm.icu.dev.util.UnicodeMap; -import java.util.*; - import com.ibm.icu.util.VersionInfo; +import java.util.*; import org.unicode.cldr.draft.FileUtilities; import org.unicode.props.*; @@ -175,11 +174,12 @@ private HashMap> loadNameAliases() { String[] parts = line.getParts(); int codepoint = Integer.parseInt(parts[0], 16); NameAlias nameAlias; - if(parts.length < 3) { + if (parts.length < 3) { nameAlias = new NameAlias(parts[1], AliasType.NONE); - } - else { - nameAlias = new NameAlias(parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); + } else { + nameAlias = + new NameAlias( + parts[1], AliasType.valueOf(parts[2].toUpperCase(Locale.ROOT))); } if (nameAliasesByCodepoint.containsKey(codepoint)) { @@ -426,11 +426,11 @@ public boolean isDifferentRange(VersionInfo ucdVersion, int codepointA, int code UcdProperty prop = propDetail.getUcdProperty(); if (ucdVersion.compareTo(propDetail.getMinVersion()) >= 0 && (propDetail.getMaxVersion() == null - || ucdVersion.compareTo(propDetail.getMaxVersion()) < 0)) { + || ucdVersion.compareTo(propDetail.getMaxVersion()) < 0)) { isDifference = isDifference || !getAttributeValue(prop, codepointA) - .equals(getAttributeValue(prop, codepointB)); + .equals(getAttributeValue(prop, codepointB)); } } return isDifference; diff --git a/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java b/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java index 122b30a71..52d3421e2 100644 --- a/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java +++ b/unicodetools/src/main/java/org/unicode/xml/CompareUcdXML.java @@ -3,53 +3,52 @@ import com.ibm.icu.dev.tool.UOption; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.text.UnicodeSet; -import org.unicode.props.UcdProperty; - import java.io.*; import java.util.HashMap; -import java.util.HashSet; import java.util.Objects; +import org.unicode.props.UcdProperty; public class CompareUcdXML { private static final String NEWLINE = System.getProperty("line.separator"); private static final UOption[] options = { - UOption.HELP_H(), - UOption.create("fileA", 'a', UOption.REQUIRES_ARG), - UOption.create("fileB", 'b', UOption.REQUIRES_ARG) + UOption.HELP_H(), + UOption.create("fileA", 'a', UOption.REQUIRES_ARG), + UOption.create("fileB", 'b', UOption.REQUIRES_ARG) }; - private static final UcdProperty[] codepointSequenceProperties = new UcdProperty[]{ - UcdProperty.Named_Sequences, - UcdProperty.Named_Sequences_Prov, - UcdProperty.Standardized_Variant, - UcdProperty.Emoji_DCM, - UcdProperty.Emoji_KDDI, - UcdProperty.Emoji_SB, - UcdProperty.Do_Not_Emit_Preferred - }; + private static final UcdProperty[] codepointSequenceProperties = + new UcdProperty[] { + UcdProperty.Named_Sequences, + UcdProperty.Named_Sequences_Prov, + UcdProperty.Standardized_Variant, + UcdProperty.Emoji_DCM, + UcdProperty.Emoji_KDDI, + UcdProperty.Emoji_SB, + UcdProperty.Do_Not_Emit_Preferred + }; private static final HashMap knownDifferences; static { knownDifferences = new HashMap<>(); - //https://github.com/unicode-org/properties/issues/296 - knownDifferences.put(0x31E4, new String [] {"Hani", "Zyyy"}); - knownDifferences.put(0x31E5, new String [] {"Hani", "Zyyy"}); - - //https://github.com/unicode-org/unicodetools/issues/325 - knownDifferences.put(0x109F7, new String [] {"1/6", "2/12"}); - knownDifferences.put(0x109F8, new String [] {"1/4", "3/12"}); - knownDifferences.put(0x109F9, new String [] {"1/3", "4/12"}); - knownDifferences.put(0x109FB, new String [] {"1/2", "6/12"}); - knownDifferences.put(0x109FD, new String [] {"2/3", "8/12"}); - knownDifferences.put(0x109FE, new String [] {"3/4", "9/12"}); - knownDifferences.put(0x109FF, new String [] {"5/6", "10/12"}); - - //https://github.com/unicode-org/properties/issues/172 - knownDifferences.put(0x5146, new String [] {"1000000", "1000000 1000000000000"}); - knownDifferences.put(0x79ED, new String [] {"1000000000", "1000000000 1000000000000"}); + // https://github.com/unicode-org/properties/issues/296 + knownDifferences.put(0x31E4, new String[] {"Hani", "Zyyy"}); + knownDifferences.put(0x31E5, new String[] {"Hani", "Zyyy"}); + + // https://github.com/unicode-org/unicodetools/issues/325 + knownDifferences.put(0x109F7, new String[] {"1/6", "2/12"}); + knownDifferences.put(0x109F8, new String[] {"1/4", "3/12"}); + knownDifferences.put(0x109F9, new String[] {"1/3", "4/12"}); + knownDifferences.put(0x109FB, new String[] {"1/2", "6/12"}); + knownDifferences.put(0x109FD, new String[] {"2/3", "8/12"}); + knownDifferences.put(0x109FE, new String[] {"3/4", "9/12"}); + knownDifferences.put(0x109FF, new String[] {"5/6", "10/12"}); + + // https://github.com/unicode-org/properties/issues/172 + knownDifferences.put(0x5146, new String[] {"1000000", "1000000 1000000000000"}); + knownDifferences.put(0x79ED, new String[] {"1000000000", "1000000000 1000000000000"}); } private static final int HELP = 0, FILE_A = 1, FILE_B = 2, LOGFILE = 3; @@ -62,41 +61,34 @@ public static void main(String[] args) throws Exception { UOption.parseArgs(args, options); if (options[HELP].doesOccur) { - System.out.println( - "CompareUcdXML --fileA {file path} --fileB {file path}"); + System.out.println("CompareUcdXML --fileA {file path} --fileB {file path}"); System.exit(0); } if (options[FILE_A].doesOccur) { try { - fileA = - new File(options[FILE_A].value); + fileA = new File(options[FILE_A].value); if (!fileA.exists()) { throw new IOException(); } } catch (Exception e) { - throw new IllegalArgumentException( - "Could not find " + options[FILE_A].value); + throw new IllegalArgumentException("Could not find " + options[FILE_A].value); } } else { - throw new IllegalArgumentException( - "Missing command line option: --fileA (or -a)"); + throw new IllegalArgumentException("Missing command line option: --fileA (or -a)"); } if (options[FILE_B].doesOccur) { try { - fileB = - new File(options[FILE_B].value); + fileB = new File(options[FILE_B].value); if (!fileB.exists()) { throw new IOException(); } } catch (Exception e) { - throw new IllegalArgumentException( - "Could not find " + options[FILE_B].value); + throw new IllegalArgumentException("Could not find " + options[FILE_B].value); } } else { - throw new IllegalArgumentException( - "Missing command line option: --fileB (or -b)"); + throw new IllegalArgumentException("Missing command line option: --fileB (or -b)"); } System.out.println("Comparing " + fileA + " and " + fileB); @@ -104,7 +96,7 @@ public static void main(String[] args) throws Exception { final XMLProperties xmlPropsA = new XMLProperties(fileA); final XMLProperties xmlPropsB = new XMLProperties(fileB); - //First, iterate through the UcdProperties on each codepoint. + // First, iterate through the UcdProperties on each codepoint. for (final UcdProperty prop : UcdProperty.values()) { UnicodeMap fileAMap = xmlPropsA.getMap(prop); UnicodeMap fileBMap = xmlPropsB.getMap(prop); @@ -113,29 +105,42 @@ public static void main(String[] args) throws Exception { try { String xmlValA = fileAMap.get(i); String xmlValB = fileBMap.get(i); - if(!Objects.equals(xmlValA, xmlValB)) { - //At least one string is != null and the strings are different, but we don't care if one + if (!Objects.equals(xmlValA, xmlValB)) { + // At least one string is != null and the strings are different, but we + // don't care if one // is null and one is empty_string - //As far as we care, empty_string == null == "00000" - int lenA = (xmlValA == null ? 0 : (xmlValA.equals("00000") ? 0 : xmlValA.length())); - int lenB = (xmlValB == null ? 0 : (xmlValB.equals("00000") ? 0 : xmlValB.length())); - if (!(lenA == 0 && lenB == 0) && !isKnownDifference(i, xmlValA, xmlValB)) { + // As far as we care, empty_string == null == "00000" + int lenA = + (xmlValA == null + ? 0 + : (xmlValA.equals("00000") ? 0 : xmlValA.length())); + int lenB = + (xmlValB == null + ? 0 + : (xmlValB.equals("00000") ? 0 : xmlValB.length())); + if (!(lenA == 0 && lenB == 0) + && !isKnownDifference(i, xmlValA, xmlValB)) { errorCount++; - System.out.println("For UCDProperty " + prop.name() + " (" + prop.getShortName() + - ") [" + String.format("0x%04X", i) + "], "); + System.out.println( + "For UCDProperty " + + prop.name() + + " (" + + prop.getShortName() + + ") [" + + String.format("0x%04X", i) + + "], "); System.out.println("\t" + fileA + " = " + xmlValA); System.out.println("\t" + fileB + " = " + xmlValB); } } - } - catch (Exception e) { + } catch (Exception e) { System.out.println("Exception thrown for " + String.format("0x%04X", i)); System.out.println(e.getMessage()); } } } } - //Now handle anything that contains codepoint sequences. + // Now handle anything that contains codepoint sequences. for (UcdProperty prop : codepointSequenceProperties) { UnicodeMap fileAMap = xmlPropsA.getMap(prop); UnicodeMap fileBMap = xmlPropsB.getMap(prop); @@ -144,22 +149,34 @@ public static void main(String[] args) throws Exception { try { String xmlValA = fileAMap.get(key); String xmlValB = fileBMap.get(key); - if(!Objects.equals(xmlValA, xmlValB)) { - //At least one string is != null and the strings are different, but we don't care if one + if (!Objects.equals(xmlValA, xmlValB)) { + // At least one string is != null and the strings are different, but we + // don't care if one // is null and one is empty_string - //As far as we care, empty_string == null == "00000" - int lenA = (xmlValA == null ? 0 : (xmlValA.equals("00000") ? 0 : xmlValA.length())); - int lenB = (xmlValB == null ? 0 : (xmlValB.equals("00000") ? 0 : xmlValB.length())); + // As far as we care, empty_string == null == "00000" + int lenA = + (xmlValA == null + ? 0 + : (xmlValA.equals("00000") ? 0 : xmlValA.length())); + int lenB = + (xmlValB == null + ? 0 + : (xmlValB.equals("00000") ? 0 : xmlValB.length())); if (!(lenA == 0 && lenB == 0)) { errorCount++; - System.out.println("For UCDProperty " + prop.name() + " (" + prop.getShortName() + - ") [" + key + "], "); + System.out.println( + "For UCDProperty " + + prop.name() + + " (" + + prop.getShortName() + + ") [" + + key + + "], "); System.out.println("\t" + fileA + " = " + xmlValA); System.out.println("\t" + fileB + " = " + xmlValB); } } - } - catch (Exception e) { + } catch (Exception e) { System.out.println("Exception thrown for " + String.format("0x%04X", key)); System.out.println(e.getMessage()); } @@ -172,8 +189,8 @@ private static boolean isKnownDifference(int codepoint, String xmlValA, String x if (knownDifferences.containsKey(codepoint)) { String knownValue1 = knownDifferences.get(codepoint)[0]; String knownValue2 = knownDifferences.get(codepoint)[1]; - return (knownValue1.equals(xmlValA) && knownValue2.equals(xmlValB)) || - (knownValue1.equals(xmlValB) && knownValue2.equals(xmlValA)); + return (knownValue1.equals(xmlValA) && knownValue2.equals(xmlValB)) + || (knownValue1.equals(xmlValB) && knownValue2.equals(xmlValA)); } return false; } diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java index b2b979467..a30067bbb 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java @@ -84,7 +84,8 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep String[] parts = line.getParts(); provisionalNamedSequences.put(parts[0], parts[1]); } - List psNames = new ArrayList<>(provisionalNamedSequences.keySet()); + List psNames = + new ArrayList<>(provisionalNamedSequences.keySet()); Collections.sort(psNames); for (String name : psNames) { AttributesImpl attributes = diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java index 08a9edc53..a97ef5bab 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java @@ -1278,7 +1278,8 @@ public class UcdPropertyDetail { false, true); // public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( - // UcdProperty.kAlternateHanYu, VersionInfo.getInstance(2,0,0), VersionInfo.getInstance(3,1,1), 143, + // UcdProperty.kAlternateHanYu, VersionInfo.getInstance(2,0,0), + // VersionInfo.getInstance(3,1,1), 143, // false, true, false, true); public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail( @@ -1506,7 +1507,8 @@ public class UcdPropertyDetail { false, true); // public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( - // UcdProperty.kAlternateKangXi, VersionInfo.getInstance(2,0,0), VersionInfo.getInstance(4,0,1), 169, + // UcdProperty.kAlternateKangXi, VersionInfo.getInstance(2,0,0), + // VersionInfo.getInstance(4,0,1), 169, // false, true, false, true); public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail( @@ -1664,7 +1666,8 @@ public class UcdPropertyDetail { false, true); // public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( - // UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(2,0,0), VersionInfo.getInstance(4,0,1), 187, + // UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(2,0,0), + // VersionInfo.getInstance(4,0,1), 187, // false, true, false, true); public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail( diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java index 070c24fc6..ceed693af 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java @@ -136,17 +136,18 @@ public boolean getParserWithMissing() { new UcdSectionDetail( UcdSection.NAMEDSEQUENCES, new UcdSectionComponent[] { - new UcdSectionComponent( - VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Named_Sequences) + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), null, UcdProperty.Named_Sequences) }, 1); public static UcdSectionDetail ProvisionalNamedSequences_Detail = new UcdSectionDetail( UcdSection.PROVISIONALNAMEDSEQUENCES, new UcdSectionComponent[] { - new UcdSectionComponent( - VersionInfo.getInstance(5, 0, 0), VersionInfo.getInstance(13, 0, 0), - UcdProperty.Named_Sequences_Prov) + new UcdSectionComponent( + VersionInfo.getInstance(5, 0, 0), + VersionInfo.getInstance(13, 0, 0), + UcdProperty.Named_Sequences_Prov) }, 1); public static UcdSectionDetail NormalizationCorrections_Detail = diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java index 4366ffc89..c71ac1082 100644 --- a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java +++ b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java @@ -60,15 +60,12 @@ public String toString() { public static void main(String[] args) throws Exception { VersionInfo ucdVersion = null; - UCDXMLOUTPUTRANGE[] ucdxmloutputranges = new UCDXMLOUTPUTRANGE[] { - UCDXMLOUTPUTRANGE.ALL, - UCDXMLOUTPUTRANGE.NOUNIHAN, - UCDXMLOUTPUTRANGE.UNIHAN - }; - UCDXMLOUTPUTTYPE[] ucdxmloutputtypes = new UCDXMLOUTPUTTYPE[] { - UCDXMLOUTPUTTYPE.FLAT, - UCDXMLOUTPUTTYPE.GROUPED - }; + UCDXMLOUTPUTRANGE[] ucdxmloutputranges = + new UCDXMLOUTPUTRANGE[] { + UCDXMLOUTPUTRANGE.ALL, UCDXMLOUTPUTRANGE.NOUNIHAN, UCDXMLOUTPUTRANGE.UNIHAN + }; + UCDXMLOUTPUTTYPE[] ucdxmloutputtypes = + new UCDXMLOUTPUTTYPE[] {UCDXMLOUTPUTTYPE.FLAT, UCDXMLOUTPUTTYPE.GROUPED}; File destinationFolder = null; UOption.parseArgs(args, options); @@ -96,9 +93,10 @@ public static void main(String[] args) throws Exception { } if (options[RANGE].doesOccur) { try { - ucdxmloutputranges = new UCDXMLOUTPUTRANGE[]{ - UCDXMLOUTPUTRANGE.valueOf( - options[RANGE].value.toUpperCase(Locale.ROOT)) + ucdxmloutputranges = + new UCDXMLOUTPUTRANGE[] { + UCDXMLOUTPUTRANGE.valueOf( + options[RANGE].value.toUpperCase(Locale.ROOT)) }; } catch (Exception e) { throw new IllegalArgumentException( @@ -109,10 +107,11 @@ public static void main(String[] args) throws Exception { } if (options[OUTPUT].doesOccur) { try { - ucdxmloutputtypes = new UCDXMLOUTPUTTYPE[] { - UCDXMLOUTPUTTYPE.valueOf( - options[OUTPUT].value.toUpperCase(Locale.ROOT)) - }; + ucdxmloutputtypes = + new UCDXMLOUTPUTTYPE[] { + UCDXMLOUTPUTTYPE.valueOf( + options[OUTPUT].value.toUpperCase(Locale.ROOT)) + }; } catch (Exception e) { throw new IllegalArgumentException( "Could not convert " @@ -148,10 +147,16 @@ public static void main(String[] args) throws Exception { if (ucdVersion != null && destinationFolder.exists()) { for (UCDXMLOUTPUTRANGE ucdxmloutputrange : ucdxmloutputranges) { - for (UCDXMLOUTPUTTYPE ucdxmloutputtype: ucdxmloutputtypes) { - System.out.println("Building the " + ucdxmloutputrange + " " + ucdxmloutputtype + - " UcdXML file for " + ucdVersion); - buildUcdXMLFile(ucdVersion, destinationFolder, ucdxmloutputrange, ucdxmloutputtype); + for (UCDXMLOUTPUTTYPE ucdxmloutputtype : ucdxmloutputtypes) { + System.out.println( + "Building the " + + ucdxmloutputrange + + " " + + ucdxmloutputtype + + " UcdXML file for " + + ucdVersion); + buildUcdXMLFile( + ucdVersion, destinationFolder, ucdxmloutputrange, ucdxmloutputtype); } } System.out.println("End"); @@ -364,7 +369,8 @@ private static int buildChars( Range currentRangeType = getRangeType(attributeResolver, codepoint); if (!range.isEmpty()) { if (!currentRangeType.equals(rangeType) - || attributeResolver.isDifferentRange(ucdVersion, codepoint, codepoint - 1)) { + || attributeResolver.isDifferentRange( + ucdVersion, codepoint, codepoint - 1)) { if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { if (outputType == UCDXMLOUTPUTTYPE.GROUPED) { buildGroupedRange( @@ -470,7 +476,11 @@ private static void buildGroupedChar( String groupAttributeValue = groupAttrs.getValue(qName); if (!Objects.equals(orgCharAttributesValue, groupAttributeValue)) { charAttributes.addAttribute( - NAMESPACE, qName, qName, "CDATA", Objects.requireNonNullElse(orgCharAttributesValue, "")); + NAMESPACE, + qName, + qName, + "CDATA", + Objects.requireNonNullElse(orgCharAttributesValue, "")); } } buildChar(writer, attributeResolver, codepoint, charAttributes); @@ -542,7 +552,11 @@ private static void buildGroupedRange( String groupAttributeValue = groupAttrs.getValue(qName); if (!Objects.equals(orgCharAttributesValue, groupAttributeValue)) { rangeAttributes.addAttribute( - NAMESPACE, qName, qName, "CDATA", Objects.requireNonNullElse(orgCharAttributesValue, "")); + NAMESPACE, + qName, + qName, + "CDATA", + Objects.requireNonNullElse(orgCharAttributesValue, "")); } } writer.startElement(rangeType.tag, rangeAttributes); diff --git a/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java b/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java index b9421886d..396bddeb7 100644 --- a/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java +++ b/unicodetools/src/main/java/org/unicode/xml/XMLProperties.java @@ -1,17 +1,16 @@ package org.unicode.xml; import com.ibm.icu.dev.util.UnicodeMap; -import org.unicode.cldr.util.XMLFileReader; -import org.unicode.props.IndexUnicodeProperties; -import org.unicode.props.UcdProperty; -import org.unicode.text.utility.Utility; -import org.xml.sax.*; - import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.*; import java.util.Map.Entry; +import org.unicode.cldr.util.XMLFileReader; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.text.utility.Utility; +import org.xml.sax.*; public class XMLProperties { @@ -60,6 +59,7 @@ static class IntRange { Map> property2data = new EnumMap>(UcdProperty.class); + { for (final UcdProperty prop : UcdProperty.values()) { property2data.put(prop, new UnicodeMap()); @@ -67,6 +67,7 @@ static class IntRange { } Set leavesNotHandled = new LinkedHashSet(); + public XMLProperties(File ucdxmlFile) { readFile(ucdxmlFile); @@ -161,7 +162,8 @@ public void startDocument() throws SAXException {} public void startPrefixMapping(String arg0, String arg1) throws SAXException {} @Override - public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { + public void startElement( + String namespaceURI, String localName, String qName, Attributes atts) { try { final XmlLeaf xmlLeaf = XmlLeaf.forString(qName); if (xmlLeaf == null) { @@ -245,7 +247,8 @@ public void startElement(String namespaceURI, String localName, String qName, At } break; case NAME_ALIAS: - final String alias = attributes.get("alias") + "(" + attributes.get("type") + ")"; + final String alias = + attributes.get("alias") + "(" + attributes.get("type") + ")"; appendProp(cp.start, UcdProperty.Name_Alias, alias); break; case STANDARDIZED_VARIANT: @@ -260,15 +263,22 @@ public void startElement(String namespaceURI, String localName, String qName, At break; } case NORMALIZATION_CORRECTION: - final String correction = "old: " + attributes.get("old") + - " new: " + attributes.get("new") + - " version: " + attributes.get("version"); + final String correction = + "old: " + + attributes.get("old") + + " new: " + + attributes.get("new") + + " version: " + + attributes.get("version"); cps = Utility.fromHex(attributes.get("cp")); appendProp(cps, UcdProperty.NC_Original, correction); break; case INSTEAD: - final String instead = "use: " + attributes.get("use") + - " because: " + attributes.get("because"); + final String instead = + "use: " + + attributes.get("use") + + " because: " + + attributes.get("because"); cps = attributes.get("of"); appendProp(cps, UcdProperty.Do_Not_Emit_Preferred, instead); break; @@ -387,6 +397,7 @@ public UnicodeMap getMap(UcdProperty prop) { public Set getLeavesNotHandled() { return leavesNotHandled; } + static String show(String ival) { if (ival == null) { return "null"; From d612e969a63126ae0eb3aa7afbafed3798bebe0d Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Mon, 14 Oct 2024 13:58:59 -0700 Subject: [PATCH 10/14] Added support for the generation of UAX42 --- uax/uax42/Readme.md | 23 + uax/uax42/fragments/block/block.xml | 10 + uax/uax42/fragments/boolean/boolean.xml | 4 + .../fragments/cjk-radicals/cjk-radicals.xml | 10 + uax/uax42/fragments/datatypes/code points.xml | 9 + uax/uax42/fragments/datatypes/datatypes.xml | 5 + .../fragments/datatypes/jis-code-point.xml | 5 + .../fragments/description/description.xml | 6 + .../fragments/do-not-emit/do-not-emit.xml | 22 + uax/uax42/fragments/emoji-data/Emoji.xml | 20 + .../fragments/emoji-sources/emoji-sources.xml | 11 + .../named-sequences/named-sequences.xml | 15 + uax/uax42/fragments/namespace/namespace.xml | 5 + .../normalization-corrections.xml | 11 + uax/uax42/fragments/nushu/Nushu.xml | 8 + uax/uax42/fragments/properties/Bidi_C.xml | 5 + uax/uax42/fragments/properties/Bidi_M.xml | 5 + uax/uax42/fragments/properties/InCB.xml | 9 + uax/uax42/fragments/properties/InPC.xml | 21 + uax/uax42/fragments/properties/InSC.xml | 42 + uax/uax42/fragments/properties/JSN.xml | 5 + uax/uax42/fragments/properties/Join_C.xml | 5 + uax/uax42/fragments/properties/Name_Alias.xml | 10 + uax/uax42/fragments/properties/Unihan.xml | 347 ++ uax/uax42/fragments/properties/age.xml | 22 + uax/uax42/fragments/properties/bc.xml | 17 + uax/uax42/fragments/properties/blk.xml | 344 ++ uax/uax42/fragments/properties/bmg.xml | 5 + uax/uax42/fragments/properties/boundaries.xml | 58 + uax/uax42/fragments/properties/bpb.xml | 5 + uax/uax42/fragments/properties/bpt.xml | 5 + .../fragments/properties/case_folding.xml | 8 + .../fragments/properties/case_mapping.xml | 11 + uax/uax42/fragments/properties/case_other.xml | 32 + uax/uax42/fragments/properties/casing.xml | 14 + uax/uax42/fragments/properties/ccc.xml | 5 + uax/uax42/fragments/properties/cjkEACC.xml | 5 + .../fragments/properties/cjkIRG_TSource.xml | 6 + .../fragments/properties/composition.xml | 8 + .../fragments/properties/decomposition.xml | 11 + uax/uax42/fragments/properties/ea.xml | 5 + .../fragments/properties/function_graphic.xml | 68 + uax/uax42/fragments/properties/gc.xml | 12 + uax/uax42/fragments/properties/hst.xml | 5 + uax/uax42/fragments/properties/identifier.xml | 26 + uax/uax42/fragments/properties/ideographs.xml | 23 + uax/uax42/fragments/properties/isc.xml | 5 + uax/uax42/fragments/properties/joining.xml | 52 + uax/uax42/fragments/properties/lb.xml | 24 + .../fragments/properties/miscellaneous.xml | 11 + uax/uax42/fragments/properties/na.xml | 13 + uax/uax42/fragments/properties/na1.xml | 5 + uax/uax42/fragments/properties/numeric.xml | 8 + uax/uax42/fragments/properties/pattern.xml | 8 + uax/uax42/fragments/properties/quickcheck.xml | 31 + uax/uax42/fragments/properties/script.xml | 49 + .../properties/simple_case_mapping.xml | 11 + .../fragments/repertoire/Code points.xml | 23 + .../repertoire/Set of code points.xml | 8 + uax/uax42/fragments/repertoire/groups.xml | 8 + uax/uax42/fragments/repertoire/repertoire.xml | 6 + .../standardized-variants.xml | 10 + uax/uax42/fragments/start/start.xml | 6 + uax/uax42/fragments/tangut/Tangut.xml | 18 + uax/uax42/index.xml | 1353 +++++++ uax/uax42/index2html.xsl | 611 +++ uax/uax42/index2rnc.xsl | 45 + uax/uax42/output/index.html | 3480 +++++++++++++++++ uax/uax42/output/index.rnc | 1453 +++++++ uax/uax42/pom.xml | 72 + .../org/unicode/xml/AttributeResolver.java | 2 +- .../unicode/xml/GeneratePropertyValues.java | 1358 +++++++ .../org/unicode/props/IndexPropertyRegex.txt | 2 + 73 files changed, 9979 insertions(+), 1 deletion(-) create mode 100644 uax/uax42/Readme.md create mode 100644 uax/uax42/fragments/block/block.xml create mode 100644 uax/uax42/fragments/boolean/boolean.xml create mode 100644 uax/uax42/fragments/cjk-radicals/cjk-radicals.xml create mode 100644 uax/uax42/fragments/datatypes/code points.xml create mode 100644 uax/uax42/fragments/datatypes/datatypes.xml create mode 100644 uax/uax42/fragments/datatypes/jis-code-point.xml create mode 100644 uax/uax42/fragments/description/description.xml create mode 100644 uax/uax42/fragments/do-not-emit/do-not-emit.xml create mode 100644 uax/uax42/fragments/emoji-data/Emoji.xml create mode 100644 uax/uax42/fragments/emoji-sources/emoji-sources.xml create mode 100644 uax/uax42/fragments/named-sequences/named-sequences.xml create mode 100644 uax/uax42/fragments/namespace/namespace.xml create mode 100644 uax/uax42/fragments/normalization-corrections/normalization-corrections.xml create mode 100644 uax/uax42/fragments/nushu/Nushu.xml create mode 100644 uax/uax42/fragments/properties/Bidi_C.xml create mode 100644 uax/uax42/fragments/properties/Bidi_M.xml create mode 100644 uax/uax42/fragments/properties/InCB.xml create mode 100644 uax/uax42/fragments/properties/InPC.xml create mode 100644 uax/uax42/fragments/properties/InSC.xml create mode 100644 uax/uax42/fragments/properties/JSN.xml create mode 100644 uax/uax42/fragments/properties/Join_C.xml create mode 100644 uax/uax42/fragments/properties/Name_Alias.xml create mode 100644 uax/uax42/fragments/properties/Unihan.xml create mode 100644 uax/uax42/fragments/properties/age.xml create mode 100644 uax/uax42/fragments/properties/bc.xml create mode 100644 uax/uax42/fragments/properties/blk.xml create mode 100644 uax/uax42/fragments/properties/bmg.xml create mode 100644 uax/uax42/fragments/properties/boundaries.xml create mode 100644 uax/uax42/fragments/properties/bpb.xml create mode 100644 uax/uax42/fragments/properties/bpt.xml create mode 100644 uax/uax42/fragments/properties/case_folding.xml create mode 100644 uax/uax42/fragments/properties/case_mapping.xml create mode 100644 uax/uax42/fragments/properties/case_other.xml create mode 100644 uax/uax42/fragments/properties/casing.xml create mode 100644 uax/uax42/fragments/properties/ccc.xml create mode 100644 uax/uax42/fragments/properties/cjkEACC.xml create mode 100644 uax/uax42/fragments/properties/cjkIRG_TSource.xml create mode 100644 uax/uax42/fragments/properties/composition.xml create mode 100644 uax/uax42/fragments/properties/decomposition.xml create mode 100644 uax/uax42/fragments/properties/ea.xml create mode 100644 uax/uax42/fragments/properties/function_graphic.xml create mode 100644 uax/uax42/fragments/properties/gc.xml create mode 100644 uax/uax42/fragments/properties/hst.xml create mode 100644 uax/uax42/fragments/properties/identifier.xml create mode 100644 uax/uax42/fragments/properties/ideographs.xml create mode 100644 uax/uax42/fragments/properties/isc.xml create mode 100644 uax/uax42/fragments/properties/joining.xml create mode 100644 uax/uax42/fragments/properties/lb.xml create mode 100644 uax/uax42/fragments/properties/miscellaneous.xml create mode 100644 uax/uax42/fragments/properties/na.xml create mode 100644 uax/uax42/fragments/properties/na1.xml create mode 100644 uax/uax42/fragments/properties/numeric.xml create mode 100644 uax/uax42/fragments/properties/pattern.xml create mode 100644 uax/uax42/fragments/properties/quickcheck.xml create mode 100644 uax/uax42/fragments/properties/script.xml create mode 100644 uax/uax42/fragments/properties/simple_case_mapping.xml create mode 100644 uax/uax42/fragments/repertoire/Code points.xml create mode 100644 uax/uax42/fragments/repertoire/Set of code points.xml create mode 100644 uax/uax42/fragments/repertoire/groups.xml create mode 100644 uax/uax42/fragments/repertoire/repertoire.xml create mode 100644 uax/uax42/fragments/standardized-variants/standardized-variants.xml create mode 100644 uax/uax42/fragments/start/start.xml create mode 100644 uax/uax42/fragments/tangut/Tangut.xml create mode 100644 uax/uax42/index.xml create mode 100644 uax/uax42/index2html.xsl create mode 100644 uax/uax42/index2rnc.xsl create mode 100644 uax/uax42/output/index.html create mode 100644 uax/uax42/output/index.rnc create mode 100644 uax/uax42/pom.xml create mode 100644 unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java diff --git a/uax/uax42/Readme.md b/uax/uax42/Readme.md new file mode 100644 index 000000000..9b931569f --- /dev/null +++ b/uax/uax42/Readme.md @@ -0,0 +1,23 @@ +# Generating TR42 + +## Step 1 - Generate property value fragments + +- Run org.unicode.xml.GeneratePropertyValues to populate the UNICODETOOLS_REPO_DIR/uax/uax42/fragments/ folder. + +## Step 2 - Generate TR42 index.html and index.rnc + +- In UNICODETOOLS_REPO_DIR/uax/uax42/ run `mvn xml:transform` + + index.html and index.rnc will be generated in UNICODETOOLS_REPO_DIR/uax/uax42/output/ + +## Step 3 - Validate generated UAX XML files + +You'll need a [RELAX NG](https://relaxng.org/) schema validator. We'll use [jing-trang](https://github. +com/relaxng/jing-trang) in this example. + +1. Clone and build [jing-trang](https://github.com/relaxng/jing-trang) +2. Run the following: + ``` + java -jar C:\_git\jing-trang\build\jing.jar -c UNICODETOOLS_REPO_DIR\uax\uax42\output\index.rnc + ``` + diff --git a/uax/uax42/fragments/block/block.xml b/uax/uax42/fragments/block/block.xml new file mode 100644 index 000000000..1d9b2beb8 --- /dev/null +++ b/uax/uax42/fragments/block/block.xml @@ -0,0 +1,10 @@ + + + + ucd.content &= + element blocks { + element block { + attribute first-cp { single-code-point }, + attribute last-cp { single-code-point }, + attribute name { text } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/boolean/boolean.xml b/uax/uax42/fragments/boolean/boolean.xml new file mode 100644 index 000000000..fae36d68d --- /dev/null +++ b/uax/uax42/fragments/boolean/boolean.xml @@ -0,0 +1,4 @@ + + + boolean = "Y" | "N" + \ No newline at end of file diff --git a/uax/uax42/fragments/cjk-radicals/cjk-radicals.xml b/uax/uax42/fragments/cjk-radicals/cjk-radicals.xml new file mode 100644 index 000000000..45c49ed2c --- /dev/null +++ b/uax/uax42/fragments/cjk-radicals/cjk-radicals.xml @@ -0,0 +1,10 @@ + + + + ucd.content &= + element cjk-radicals { + element cjk-radical { + attribute number { xsd:string {pattern="[0-9]{1,3}'{0,3}"}}, + attribute radical { single-code-point? }, + attribute ideograph { single-code-point } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/datatypes/code points.xml b/uax/uax42/fragments/datatypes/code points.xml new file mode 100644 index 000000000..c3cda88df --- /dev/null +++ b/uax/uax42/fragments/datatypes/code points.xml @@ -0,0 +1,9 @@ + + + + single-code-point = xsd:string { pattern = "(|[1-9A-F]|(10))[0-9A-F]{4}" } + + one-or-more-code-points = list { single-code-point + } + zero-or-more-code-points = list { single-code-point * } + two-code-points = list { single-code-point, single-code-point } + \ No newline at end of file diff --git a/uax/uax42/fragments/datatypes/datatypes.xml b/uax/uax42/fragments/datatypes/datatypes.xml new file mode 100644 index 000000000..c26367d97 --- /dev/null +++ b/uax/uax42/fragments/datatypes/datatypes.xml @@ -0,0 +1,5 @@ + + + + # default; datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes" + \ No newline at end of file diff --git a/uax/uax42/fragments/datatypes/jis-code-point.xml b/uax/uax42/fragments/datatypes/jis-code-point.xml new file mode 100644 index 000000000..9a6820c7b --- /dev/null +++ b/uax/uax42/fragments/datatypes/jis-code-point.xml @@ -0,0 +1,5 @@ + + + + jis-code-point = xsd:string { pattern = "[0-9A-F]{4}" } + \ No newline at end of file diff --git a/uax/uax42/fragments/description/description.xml b/uax/uax42/fragments/description/description.xml new file mode 100644 index 000000000..97bb063e7 --- /dev/null +++ b/uax/uax42/fragments/description/description.xml @@ -0,0 +1,6 @@ + + + + ucd.content &= + element description { text }? + \ No newline at end of file diff --git a/uax/uax42/fragments/do-not-emit/do-not-emit.xml b/uax/uax42/fragments/do-not-emit/do-not-emit.xml new file mode 100644 index 000000000..5381491e7 --- /dev/null +++ b/uax/uax42/fragments/do-not-emit/do-not-emit.xml @@ -0,0 +1,22 @@ + + + ucd.content &= + element do-not-emit { + element instead { + attribute of { one-or-more-code-points }, + attribute use { one-or-more-code-points }, + attribute because { "Bengali_Khanda_Ta" + | "Deprecated" + | "Discouraged" + | "Dotless_Form" + | "Hamza_Form" + | "Indic_Atomic_Consonant" + | "Indic_Consonant_Conjunct" + | "Indic_Vowel_Letter" + | "Malayalam_Chillu" + | "Precomposed_Form" + | "Precomposed_Hieroglyph" + | "Preferred_Spelling" + | "Tamil_Shrii" + } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/emoji-data/Emoji.xml b/uax/uax42/fragments/emoji-data/Emoji.xml new file mode 100644 index 000000000..7c7873459 --- /dev/null +++ b/uax/uax42/fragments/emoji-data/Emoji.xml @@ -0,0 +1,20 @@ + + + code-point-attributes &= + attribute Emoji { boolean }? + + code-point-attributes &= + attribute EPres { boolean }? + + code-point-attributes &= + attribute EMod { boolean }? + + code-point-attributes &= + attribute EBase { boolean }? + + code-point-attributes &= + attribute EComp { boolean }? + + code-point-attributes &= + attribute ExtPict { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/emoji-sources/emoji-sources.xml b/uax/uax42/fragments/emoji-sources/emoji-sources.xml new file mode 100644 index 000000000..96d122953 --- /dev/null +++ b/uax/uax42/fragments/emoji-sources/emoji-sources.xml @@ -0,0 +1,11 @@ + + + + ucd.content &= + element emoji-sources { + element emoji-source { + attribute unicode { one-or-more-code-points }, + attribute docomo { jis-code-point? }, + attribute kddi { jis-code-point? }, + attribute softbank { jis-code-point? } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/named-sequences/named-sequences.xml b/uax/uax42/fragments/named-sequences/named-sequences.xml new file mode 100644 index 000000000..2859ea29d --- /dev/null +++ b/uax/uax42/fragments/named-sequences/named-sequences.xml @@ -0,0 +1,15 @@ + + + + ucd.content &= + element named-sequences { + element named-sequence { + attribute cps { one-or-more-code-points }, + attribute name { text } }+ }? + + ucd.content &= + element provisional-named-sequences { + element named-sequence { + attribute cps { one-or-more-code-points }, + attribute name { text } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/namespace/namespace.xml b/uax/uax42/fragments/namespace/namespace.xml new file mode 100644 index 000000000..e75306a26 --- /dev/null +++ b/uax/uax42/fragments/namespace/namespace.xml @@ -0,0 +1,5 @@ + + + + default namespace ucd = "http://www.unicode.org/ns/2003/ucd/1.0" + \ No newline at end of file diff --git a/uax/uax42/fragments/normalization-corrections/normalization-corrections.xml b/uax/uax42/fragments/normalization-corrections/normalization-corrections.xml new file mode 100644 index 000000000..7231a8c26 --- /dev/null +++ b/uax/uax42/fragments/normalization-corrections/normalization-corrections.xml @@ -0,0 +1,11 @@ + + + + ucd.content &= + element normalization-corrections { + element normalization-correction { + attribute cp { single-code-point }, + attribute old { one-or-more-code-points }, + attribute new { one-or-more-code-points }, + attribute version { text } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/nushu/Nushu.xml b/uax/uax42/fragments/nushu/Nushu.xml new file mode 100644 index 000000000..8919bba32 --- /dev/null +++ b/uax/uax42/fragments/nushu/Nushu.xml @@ -0,0 +1,8 @@ + + + code-point-attributes &= + attribute kSrc_NushuDuben { xsd:string { pattern="[0-9]+\.[0-9]+" } }? + + code-point-attributes &= + attribute kReading { xsd:string }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/Bidi_C.xml b/uax/uax42/fragments/properties/Bidi_C.xml new file mode 100644 index 000000000..617113bf2 --- /dev/null +++ b/uax/uax42/fragments/properties/Bidi_C.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute Bidi_C { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/Bidi_M.xml b/uax/uax42/fragments/properties/Bidi_M.xml new file mode 100644 index 000000000..c1380221b --- /dev/null +++ b/uax/uax42/fragments/properties/Bidi_M.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute Bidi_M { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/InCB.xml b/uax/uax42/fragments/properties/InCB.xml new file mode 100644 index 000000000..8340250dc --- /dev/null +++ b/uax/uax42/fragments/properties/InCB.xml @@ -0,0 +1,9 @@ + + + code-point-attributes &= + attribute InCB { "Consonant" + | "Extend" + | "Linker" + | "None" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/InPC.xml b/uax/uax42/fragments/properties/InPC.xml new file mode 100644 index 000000000..a7de62387 --- /dev/null +++ b/uax/uax42/fragments/properties/InPC.xml @@ -0,0 +1,21 @@ + + + code-point-attributes &= + attribute InPC { "Bottom" + | "Bottom_And_Left" + | "Bottom_And_Right" + | "Left" + | "Left_And_Right" + | "NA" + | "Overstruck" + | "Right" + | "Top" + | "Top_And_Bottom" + | "Top_And_Bottom_And_Left" + | "Top_And_Bottom_And_Right" + | "Top_And_Left" + | "Top_And_Left_And_Right" + | "Top_And_Right" + | "Visual_Order_Left" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/InSC.xml b/uax/uax42/fragments/properties/InSC.xml new file mode 100644 index 000000000..ddddc27a4 --- /dev/null +++ b/uax/uax42/fragments/properties/InSC.xml @@ -0,0 +1,42 @@ + + + code-point-attributes &= + attribute InSC { "Avagraha" + | "Bindu" + | "Brahmi_Joining_Number" + | "Cantillation_Mark" + | "Consonant" + | "Consonant_Dead" + | "Consonant_Final" + | "Consonant_Head_Letter" + | "Consonant_Initial_Postfixed" + | "Consonant_Killer" + | "Consonant_Medial" + | "Consonant_Placeholder" + | "Consonant_Preceding_Repha" + | "Consonant_Prefixed" + | "Consonant_Subjoined" + | "Consonant_Succeeding_Repha" + | "Consonant_With_Stacker" + | "Gemination_Mark" + | "Invisible_Stacker" + | "Joiner" + | "Modifying_Letter" + | "Non_Joiner" + | "Nukta" + | "Number" + | "Number_Joiner" + | "Other" + | "Pure_Killer" + | "Register_Shifter" + | "Reordering_Killer" + | "Syllable_Modifier" + | "Tone_Letter" + | "Tone_Mark" + | "Virama" + | "Visarga" + | "Vowel" + | "Vowel_Dependent" + | "Vowel_Independent" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/JSN.xml b/uax/uax42/fragments/properties/JSN.xml new file mode 100644 index 000000000..568f5e270 --- /dev/null +++ b/uax/uax42/fragments/properties/JSN.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute JSN { xsd:string { pattern="[A-Z]{0,3}" } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/Join_C.xml b/uax/uax42/fragments/properties/Join_C.xml new file mode 100644 index 000000000..4cbf1d0f0 --- /dev/null +++ b/uax/uax42/fragments/properties/Join_C.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute Join_C { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/Name_Alias.xml b/uax/uax42/fragments/properties/Name_Alias.xml new file mode 100644 index 000000000..c2b53b2fe --- /dev/null +++ b/uax/uax42/fragments/properties/Name_Alias.xml @@ -0,0 +1,10 @@ + + + code-point-attributes &= + element name-alias { + attribute alias { xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } }?, + attribute type { "abbreviation" | "alternate" + | "control" | "correction" + | "figment" + }? } * + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/Unihan.xml b/uax/uax42/fragments/properties/Unihan.xml new file mode 100644 index 000000000..ba4c042f8 --- /dev/null +++ b/uax/uax42/fragments/properties/Unihan.xml @@ -0,0 +1,347 @@ + + + code-point-attributes &= attribute kAccountingNumeric + { xsd:string { pattern="[0-9]+" } }? + + code-point-attributes &= attribute kAlternateTotalStrokes + { list { xsd:string { pattern="(\d+:[BHJKMPSUV]+)|-" }+ } }? + + code-point-attributes &= attribute kBigFive + { xsd:string { pattern="[0-9A-F]{4}'?" } }? + + code-point-attributes &= attribute kCangjie + { xsd:string { pattern="[A-Z]+" } }? + + code-point-attributes &= attribute kCantonese + { list { xsd:string { pattern="[a-z]{1,6}[1-6]" }+ } }? + + code-point-attributes &= attribute kCCCII + { list { xsd:string { pattern="[0-9A-F]{6}" }+ } }? + + code-point-attributes &= attribute kCheungBauer + { list { xsd:string { pattern="[0-9]{3}/[0-9]{2};[A-Z]*;[a-z1-6\[\]/,]+" }+ } }? + + code-point-attributes &= attribute kCheungBauerIndex + { list { xsd:string { pattern="[0-9]{3}\.[01][0-9]" }+ } }? + + code-point-attributes &= attribute kCihaiT + { list { xsd:string { pattern="[1-9][0-9]{0,3}\.[0-9]{3}" }+ } }? + + code-point-attributes &= attribute kCNS1986 + { xsd:string { pattern="[12E]-[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCNS1992 + { xsd:string { pattern="[1-9]-[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCompatibilityVariant + { "" | xsd:string { pattern="U\+[23]?[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCowles + { list { xsd:string { pattern="[0-9]{1,4}(\.[0-9]{1,2})?" }+ } }? + + code-point-attributes &= attribute kDaeJaweon + { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" } }? + + code-point-attributes &= attribute kDefinition + { xsd:string { pattern='[^\t"]+' } }? + + code-point-attributes &= attribute kEACC + { xsd:string { pattern="[0-9A-F]{6}" } }? + + code-point-attributes &= attribute kFanqie + { list { xsd:string { pattern="[\x{3400}-\x{4DBF}\x{4E00}-\x{9FFF}\x{20000}-\x{2A6DF}]{2}" }+ } }? + + code-point-attributes &= attribute kFenn + { list { xsd:string { pattern="[0-9]+a?[A-KP*]" }+ } }? + + code-point-attributes &= attribute kFennIndex + { list { xsd:string { pattern="[0-9][0-9]{0,2}\.[01][0-9]" }+ } }? + + code-point-attributes &= attribute kFourCornerCode + { list { xsd:string { pattern="[0-9]{4}(\.[0-9])?" }+ } }? + + code-point-attributes &= attribute kGB0 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB1 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB3 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB5 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB7 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB8 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGradeLevel + { xsd:string { pattern="[1-6]" } }? + + code-point-attributes &= attribute kGSR + { list { xsd:string { pattern="[0-9]{4}[a-vx-z]'?" }+ } }? + + code-point-attributes &= attribute kHangul + { list { xsd:string { pattern="[\x{1100}-\x{1112}][\x{1161}-\x{1175}][\x{11A8}-\x{11C2}]?:[01ENX]{1,3}" }+ } }? + + code-point-attributes &= attribute kHanYu + { list { xsd:string { pattern="[1-8][0-9]{4}\.[0-3][0-9][0-3]" }+ } }? + + code-point-attributes &= attribute kHanyuPinlu + { list { xsd:string { pattern="[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+\([0-9]+\)" }+ } }? + + code-point-attributes &= attribute kHanyuPinyin + { list { xsd:string { pattern="(\d{5}\.\d{2}0,)*\d{5}\.\d{2}0:([a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+,)*[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kHDZRadBreak + { xsd:string { pattern="[\x{2F00}-\x{2FD5}]\[U\+2F[0-9A-D][0-9A-F]\]:[1-8][0-9]{4}\.[0-3][0-9]0" } }? + + code-point-attributes &= attribute kHKGlyph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kIBMJapan + { list { xsd:string { pattern="F[ABC][0-9A-F]{2}" }+ } }? + + code-point-attributes &= attribute kIICore + { list { xsd:string { pattern="[ABC][GHJKMPT]{1,7}" }+ } }? + + code-point-attributes &= attribute kIRG_GSource + { "" | xsd:string { pattern="G[013578EKS]-[0-9A-F]{4}" } + | xsd:string { pattern="G4K(-\d{5})?" } + | xsd:string { pattern="G(DZ|GH|RM|WZ|XC|XH|ZH)-\d{4}\.\d{2}" } + | xsd:string { pattern="G(BK|CH|CY|HC)(-\d{4}\.\d{2})?" } + | xsd:string { pattern="GKX-\d{4}\.\d{2,3}" } + | xsd:string { pattern="G(HZ|HZR)-\d{5}\.\d{2}" } + | xsd:string { pattern="G(CE|FC|IDC23|OCD|XHZ)-\d{3}" } + | xsd:string { pattern="G(H|HF|LGYJ|PGLG|T)-\d{4}" } + | xsd:string { pattern="G(CYY|DM|JZ|KJ|XM|ZFY|ZJW|ZYS)-\d{5}" } + | xsd:string { pattern="G(FZ|IDC)-[0-9A-F]{4}" } + | xsd:string { pattern="GGFZ-\d{6}" } + | xsd:string { pattern="G(LK|Z)-\d{7}" } + | xsd:string { pattern="GU-[023][0-9A-F]{4}" } + | xsd:string { pattern="GZA-[123467]\d{5}" } + }? + + code-point-attributes &= attribute kIRG_HSource + { "" | xsd:string { pattern="H-[0-9A-F]{4}" } + | xsd:string { pattern="H(B[012])-[0-9A-F]{4}" } + | xsd:string { pattern="HD-[23]?[0-9A-F]{4}" } + | xsd:string { pattern="HU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_JSource + { "" | xsd:string { pattern="J[014]-[0-9A-F]{4}" } + | xsd:string { pattern="J3A?-[0-9A-F]{4}" } + | xsd:string { pattern="J13A?-[0-9A-F]{4}" } + | xsd:string { pattern="J14-[0-9A-F]{4}" } + | xsd:string { pattern="JA[34]?-[0-9A-F]{4}" } + | xsd:string { pattern="JARIB-[0-9A-F]{4}" } + | xsd:string { pattern="JH-(JT[ABC][0-9A-F]{3}S?|IB\d{4}|\d{6})" } + | xsd:string { pattern="JK-\d{5}" } + | xsd:string { pattern="JMJ-\d{6}" } + }? + + code-point-attributes &= attribute kIRG_KPSource + { "" | xsd:string { pattern="KP([01]-[0-9A-F]{4}|U-[023][0-9A-F]{4})" } }? + + code-point-attributes &= attribute kIRG_KSource + { "" | xsd:string { pattern="K[0-6]-[0-9A-F]{4}" } + | xsd:string { pattern="KC-\d{5}" } + | xsd:string { pattern="KU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_MSource + { "" | xsd:string { pattern="MA-[0-9A-F]{4}" } + | xsd:string { pattern="MB[12]-[0-9A-F]{4}" } + | xsd:string { pattern="MC-\d{5}" } + | xsd:string { pattern="MDH?-[23]?[0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_SSource + { "" | xsd:string { pattern="SAT-\d{5}" } }? + + code-point-attributes &= attribute kIRG_TSource + { "" | xsd:string { pattern="T([1-7A-F]|1[1-3])-[0-9A-F]{4}" } + | xsd:string { pattern="TU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_UKSource + { "" | xsd:string { pattern="UK-\d{5}" } }? + + code-point-attributes &= attribute kIRG_USource + { "" | xsd:string { pattern="UTC-\d{5}" } }? + + code-point-attributes &= attribute kIRG_VSource + { "" | xsd:string { pattern="V[0-4]-[0-9A-F]{4}" } + | xsd:string { pattern="VN-[023F][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRGDaeJaweon + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" }+ } }? + + code-point-attributes &= attribute kIRGHanyuDaZidian + { list { xsd:string { pattern="[1-8][0-9]{4}\.[0-3][0-9][01]" }+ } }? + + code-point-attributes &= attribute kIRGKangXi + { list { xsd:string { pattern="[01][0-9]{3}\.[0-7][0-9][01]" }+ } }? + + code-point-attributes &= attribute kJa + { list { xsd:string { pattern="[0-9A-F]{4}S?" }+ } }? + + code-point-attributes &= attribute kJapanese + { list { xsd:string { pattern="[\x{3041}-\x{3096}\x{3099}\x{309A}\x{30A1}-\x{30FA}\x{30FC}]+" }+ } }? + + code-point-attributes &= attribute kJapaneseKun + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kJapaneseOn + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kJinmeiyoKanji + { list { xsd:string { pattern="(20[0-9]{2})(:U\+[23]?[0-9A-F]{4})?" }+ } }? + + code-point-attributes &= attribute kJis0 + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kJis1 + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kJIS0213 + { list { xsd:string { pattern="[12],[0-9]{2},[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kJoyoKanji + { list { xsd:string { pattern="(20[0-9]{2})|(U\+[23]?[0-9A-F]{4})" }+ } }? + + code-point-attributes &= attribute kKangXi + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" }+ } }? + + code-point-attributes &= attribute kKarlgren + { list { xsd:string { pattern="[1-9][0-9]{0,3}[A*]?" }+ } }? + + code-point-attributes &= attribute kKorean + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kKoreanEducationHanja + { list { xsd:string { pattern="20[0-9]{2}" }+ } }? + + code-point-attributes &= attribute kKoreanName + { list { xsd:string { pattern="20[0-9]{2}" }+ } }? + + code-point-attributes &= attribute kLau + { list { xsd:string { pattern="[1-9][0-9]{0,3}" }+ } }? + + code-point-attributes &= attribute kMainlandTelegraph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kMandarin + { list { xsd:string { pattern="[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kMatthews + { list { xsd:string { pattern="[1-9][0-9]{0,3}(a|\.5)?" }+ } }? + + code-point-attributes &= attribute kMeyerWempe + { list { xsd:string { pattern="[1-9][0-9]{0,3}[a-t*]?" }+ } }? + + code-point-attributes &= attribute kMojiJoho + { list { xsd:string { pattern="MJ\d{6}(:(FE0[01]|E01[01][0-9A-F]))?" }+ } }? + + code-point-attributes &= attribute kMorohashi + { list { xsd:string { pattern="(\d{5}'{0,2}|H\d{3})(:(FE0[01]|E010[0-9A-F]))?" }+ } }? + + code-point-attributes &= attribute kNelson + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kOtherNumeric + { list { xsd:string { pattern="[0-9]+" }+ } }? + + code-point-attributes &= attribute kPhonetic + { list { xsd:string { pattern="[1-9][0-9]{0,3}[A-D]?\*?" }+ } }? + + code-point-attributes &= attribute kPrimaryNumeric + { list { xsd:string { pattern="[0-9]+" }+ } }? + + code-point-attributes &= attribute kPseudoGB1 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kRSAdobe_Japan1_6 + { list { xsd:string { pattern="[CV]\+[0-9]{1,5}\+[1-9][0-9]{0,2}\.[1-9][0-9]?\.[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kRSUnicode + { list { xsd:string { pattern="[1-9][0-9]{0,2}'{0,3}\.-?[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kSBGY + { list { xsd:string { pattern="[0-9]{3}\.[0-7][0-9]" }+ } }? + + code-point-attributes &= attribute kSemanticVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZFJ]+)?(,[ks][A-Za-z0-9_]+(:[TBZFJ]+)?)*)?" }+ } }? + + code-point-attributes &= attribute kSimplifiedVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kSMSZD2003Index + { list { xsd:string { pattern="\d{1,3}\.\d{2}" }+ } }? + + code-point-attributes &= attribute kSMSZD2003Readings + { list { xsd:string { pattern="[a-z\x{300}\x{301}\x{302}\x{304}\x{308}\x{30C}]+(,[a-z\x{300}\x{301}\x{302}\x{304}\x{308}\x{30C}]+)*\x{7CB5}[a-z]+[1-6]([a-z]+[1-6])?(,[a-z]+[1-6]([a-z]+[1-6])?)*" }+ } }? + + code-point-attributes &= attribute kSpecializedSemanticVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZFJ]+)?(,[ks][A-Za-z0-9_]+(:[TBZFJ]+)?)*)?" }+ } }? + + code-point-attributes &= attribute kSpoofingVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kStrange + { list { ( xsd:string { pattern="[ACU]" } + | xsd:string { pattern="B:U\+31[0-2AB][0-9A-F]" } + | xsd:string { pattern="[FMOR](:U\+[23]?[0-9A-F]{4})?" } + | xsd:string { pattern="H:U\+31[3-8][0-9A-F]" } + | xsd:string { pattern="I(:U\+[23]?[0-9A-F]{4})*" } + | xsd:string { pattern="K(:U\+30[A-F][0-9A-F])+" } + | xsd:string { pattern="S:[4-9][0-9]" } + )+}}? + + code-point-attributes &= attribute kTaiwanTelegraph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kTang + { list { xsd:string { pattern="\*?[A-Za-z()\x{E6}\x{251}\x{259}\x{25B}\x{300}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kTGH + { list { xsd:string { pattern="20[0-9]{2}:[1-9][0-9]{0,3}" }+ } }? + + code-point-attributes &= attribute kTGHZ2013 + { list { xsd:string { pattern="[0-9]{3}\.[0-9]{3}(,[0-9]{3}\.[0-9]{3})*:[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kTotalStrokes + { list { xsd:string { pattern="[1-9][0-9]{0,2}" }+ } }? + + code-point-attributes &= attribute kTraditionalVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kUnihanCore2020 + { xsd:string { pattern="[GHJKMPT]{1,7}" } }? + + code-point-attributes &= attribute kVietnamese + { list { xsd:string { pattern="[A-Za-z\x{110}\x{111}\x{300}-\x{303}\x{306}\x{309}\x{31B}\x{323}]+" }+ } }? + + code-point-attributes &= attribute kVietnameseNumeric + { list { xsd:string { pattern="\d+" }+ } }? + + code-point-attributes &= attribute kXerox + { list { xsd:string { pattern="[0-9]{3}:[0-9]{3}" }+ } }? + + code-point-attributes &= attribute kXHC1983 + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{3}\*?(,[0-9]{4}\.[0-9]{3}\*?)*:[a-z\x{300}\x{301}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kZhuang + { list { xsd:string { pattern="[a-z]+\*?" }+ } }? + + code-point-attributes &= attribute kZhuangNumeric + { list { xsd:string { pattern="\d+" }+ } }? + + code-point-attributes &= attribute kZVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZ]+)?(,[ks][A-Za-z0-9_]+(:[TBZ]+)?)*)?" }+ } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/age.xml b/uax/uax42/fragments/properties/age.xml new file mode 100644 index 000000000..c15963146 --- /dev/null +++ b/uax/uax42/fragments/properties/age.xml @@ -0,0 +1,22 @@ + + + code-point-attributes &= + attribute age { "1.1" + | "2.0" | "2.1" + | "3.0" | "3.1" | "3.2" + | "4.0" | "4.1" + | "5.0" | "5.1" | "5.2" + | "6.0" | "6.1" | "6.2" | "6.3" + | "7.0" + | "8.0" + | "9.0" + | "10.0" + | "11.0" + | "12.0" | "12.1" + | "13.0" + | "14.0" + | "15.0" | "15.1" + | "16.0" + | "unassigned" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/bc.xml b/uax/uax42/fragments/properties/bc.xml new file mode 100644 index 000000000..d3e70a6ab --- /dev/null +++ b/uax/uax42/fragments/properties/bc.xml @@ -0,0 +1,17 @@ + + + code-point-attributes &= + attribute bc { "AL" | "AN" + | "B" | "BN" + | "CS" + | "EN" | "ES" | "ET" + | "FSI" + | "L" | "LRE" | "LRI" | "LRO" + | "NSM" + | "ON" + | "PDF" | "PDI" + | "R" | "RLE" | "RLI" | "RLO" + | "S" + | "WS" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/blk.xml b/uax/uax42/fragments/properties/blk.xml new file mode 100644 index 000000000..ecd721a63 --- /dev/null +++ b/uax/uax42/fragments/properties/blk.xml @@ -0,0 +1,344 @@ + + + code-point-attributes &= + attribute blk { "Adlam" + | "Aegean_Numbers" + | "Ahom" + | "Alchemical" + | "Alphabetic_PF" + | "Anatolian_Hieroglyphs" + | "Ancient_Greek_Music" + | "Ancient_Greek_Numbers" + | "Ancient_Symbols" + | "Arabic" + | "Arabic_Ext_A" + | "Arabic_Ext_B" + | "Arabic_Ext_C" + | "Arabic_Math" + | "Arabic_PF_A" + | "Arabic_PF_B" + | "Arabic_Sup" + | "Armenian" + | "Arrows" + | "ASCII" + | "Avestan" + | "Balinese" + | "Bamum" + | "Bamum_Sup" + | "Bassa_Vah" + | "Batak" + | "Bengali" + | "Bhaiksuki" + | "Block_Elements" + | "Bopomofo" + | "Bopomofo_Ext" + | "Box_Drawing" + | "Brahmi" + | "Braille" + | "Buginese" + | "Buhid" + | "Byzantine_Music" + | "Carian" + | "Caucasian_Albanian" + | "Chakma" + | "Cham" + | "Cherokee" + | "Cherokee_Sup" + | "Chess_Symbols" + | "Chorasmian" + | "CJK" + | "CJK_Compat" + | "CJK_Compat_Forms" + | "CJK_Compat_Ideographs" + | "CJK_Compat_Ideographs_Sup" + | "CJK_Ext_A" + | "CJK_Ext_B" + | "CJK_Ext_C" + | "CJK_Ext_D" + | "CJK_Ext_E" + | "CJK_Ext_F" + | "CJK_Ext_G" + | "CJK_Ext_H" + | "CJK_Ext_I" + | "CJK_Radicals_Sup" + | "CJK_Strokes" + | "CJK_Symbols" + | "Compat_Jamo" + | "Control_Pictures" + | "Coptic" + | "Coptic_Epact_Numbers" + | "Counting_Rod" + | "Cuneiform" + | "Cuneiform_Numbers" + | "Currency_Symbols" + | "Cypriot_Syllabary" + | "Cypro_Minoan" + | "Cyrillic" + | "Cyrillic_Ext_A" + | "Cyrillic_Ext_B" + | "Cyrillic_Ext_C" + | "Cyrillic_Ext_D" + | "Cyrillic_Sup" + | "Deseret" + | "Devanagari" + | "Devanagari_Ext" + | "Devanagari_Ext_A" + | "Diacriticals" + | "Diacriticals_Ext" + | "Diacriticals_For_Symbols" + | "Diacriticals_Sup" + | "Dingbats" + | "Dives_Akuru" + | "Dogra" + | "Domino" + | "Duployan" + | "Early_Dynastic_Cuneiform" + | "Egyptian_Hieroglyph_Format_Controls" + | "Egyptian_Hieroglyphs" + | "Egyptian_Hieroglyphs_Ext_A" + | "Elbasan" + | "Elymaic" + | "Emoticons" + | "Enclosed_Alphanum" + | "Enclosed_Alphanum_Sup" + | "Enclosed_CJK" + | "Enclosed_Ideographic_Sup" + | "Ethiopic" + | "Ethiopic_Ext" + | "Ethiopic_Ext_A" + | "Ethiopic_Ext_B" + | "Ethiopic_Sup" + | "Garay" + | "Geometric_Shapes" + | "Geometric_Shapes_Ext" + | "Georgian" + | "Georgian_Ext" + | "Georgian_Sup" + | "Glagolitic" + | "Glagolitic_Sup" + | "Gothic" + | "Grantha" + | "Greek" + | "Greek_Ext" + | "Gujarati" + | "Gunjala_Gondi" + | "Gurmukhi" + | "Gurung_Khema" + | "Half_And_Full_Forms" + | "Half_Marks" + | "Hangul" + | "Hanifi_Rohingya" + | "Hanunoo" + | "Hatran" + | "Hebrew" + | "High_PU_Surrogates" + | "High_Surrogates" + | "Hiragana" + | "IDC" + | "Ideographic_Symbols" + | "Imperial_Aramaic" + | "Indic_Number_Forms" + | "Indic_Siyaq_Numbers" + | "Inscriptional_Pahlavi" + | "Inscriptional_Parthian" + | "IPA_Ext" + | "Jamo" + | "Jamo_Ext_A" + | "Jamo_Ext_B" + | "Javanese" + | "Kaithi" + | "Kaktovik_Numerals" + | "Kana_Ext_A" + | "Kana_Ext_B" + | "Kana_Sup" + | "Kanbun" + | "Kangxi" + | "Kannada" + | "Katakana" + | "Katakana_Ext" + | "Kawi" + | "Kayah_Li" + | "Kharoshthi" + | "Khitan_Small_Script" + | "Khmer" + | "Khmer_Symbols" + | "Khojki" + | "Khudawadi" + | "Kirat_Rai" + | "Lao" + | "Latin_1_Sup" + | "Latin_Ext_A" + | "Latin_Ext_Additional" + | "Latin_Ext_B" + | "Latin_Ext_C" + | "Latin_Ext_D" + | "Latin_Ext_E" + | "Latin_Ext_F" + | "Latin_Ext_G" + | "Lepcha" + | "Letterlike_Symbols" + | "Limbu" + | "Linear_A" + | "Linear_B_Ideograms" + | "Linear_B_Syllabary" + | "Lisu" + | "Lisu_Sup" + | "Low_Surrogates" + | "Lycian" + | "Lydian" + | "Mahajani" + | "Mahjong" + | "Makasar" + | "Malayalam" + | "Mandaic" + | "Manichaean" + | "Marchen" + | "Masaram_Gondi" + | "Math_Alphanum" + | "Math_Operators" + | "Mayan_Numerals" + | "Medefaidrin" + | "Meetei_Mayek" + | "Meetei_Mayek_Ext" + | "Mende_Kikakui" + | "Meroitic_Cursive" + | "Meroitic_Hieroglyphs" + | "Miao" + | "Misc_Arrows" + | "Misc_Math_Symbols_A" + | "Misc_Math_Symbols_B" + | "Misc_Pictographs" + | "Misc_Symbols" + | "Misc_Technical" + | "Modi" + | "Modifier_Letters" + | "Modifier_Tone_Letters" + | "Mongolian" + | "Mongolian_Sup" + | "Mro" + | "Multani" + | "Music" + | "Myanmar" + | "Myanmar_Ext_A" + | "Myanmar_Ext_B" + | "Myanmar_Ext_C" + | "Nabataean" + | "Nag_Mundari" + | "Nandinagari" + | "NB" + | "New_Tai_Lue" + | "Newa" + | "NKo" + | "Number_Forms" + | "Nushu" + | "Nyiakeng_Puachue_Hmong" + | "OCR" + | "Ogham" + | "Ol_Chiki" + | "Ol_Onal" + | "Old_Hungarian" + | "Old_Italic" + | "Old_North_Arabian" + | "Old_Permic" + | "Old_Persian" + | "Old_Sogdian" + | "Old_South_Arabian" + | "Old_Turkic" + | "Old_Uyghur" + | "Oriya" + | "Ornamental_Dingbats" + | "Osage" + | "Osmanya" + | "Ottoman_Siyaq_Numbers" + | "Pahawh_Hmong" + | "Palmyrene" + | "Pau_Cin_Hau" + | "Phags_Pa" + | "Phaistos" + | "Phoenician" + | "Phonetic_Ext" + | "Phonetic_Ext_Sup" + | "Playing_Cards" + | "Psalter_Pahlavi" + | "PUA" + | "Punctuation" + | "Rejang" + | "Rumi" + | "Runic" + | "Samaritan" + | "Saurashtra" + | "Sharada" + | "Shavian" + | "Shorthand_Format_Controls" + | "Siddham" + | "Sinhala" + | "Sinhala_Archaic_Numbers" + | "Small_Forms" + | "Small_Kana_Ext" + | "Sogdian" + | "Sora_Sompeng" + | "Soyombo" + | "Specials" + | "Sundanese" + | "Sundanese_Sup" + | "Sunuwar" + | "Sup_Arrows_A" + | "Sup_Arrows_B" + | "Sup_Arrows_C" + | "Sup_Math_Operators" + | "Sup_PUA_A" + | "Sup_PUA_B" + | "Sup_Punctuation" + | "Sup_Symbols_And_Pictographs" + | "Super_And_Sub" + | "Sutton_SignWriting" + | "Syloti_Nagri" + | "Symbols_And_Pictographs_Ext_A" + | "Symbols_For_Legacy_Computing" + | "Symbols_For_Legacy_Computing_Sup" + | "Syriac" + | "Syriac_Sup" + | "Tagalog" + | "Tagbanwa" + | "Tags" + | "Tai_Le" + | "Tai_Tham" + | "Tai_Viet" + | "Tai_Xuan_Jing" + | "Takri" + | "Tamil" + | "Tamil_Sup" + | "Tangsa" + | "Tangut" + | "Tangut_Components" + | "Tangut_Sup" + | "Telugu" + | "Thaana" + | "Thai" + | "Tibetan" + | "Tifinagh" + | "Tirhuta" + | "Todhri" + | "Toto" + | "Transport_And_Map" + | "Tulu_Tigalari" + | "UCAS" + | "UCAS_Ext" + | "UCAS_Ext_A" + | "Ugaritic" + | "Vai" + | "Vedic_Ext" + | "Vertical_Forms" + | "Vithkuqi" + | "VS" + | "VS_Sup" + | "Wancho" + | "Warang_Citi" + | "Yezidi" + | "Yi_Radicals" + | "Yi_Syllables" + | "Yijing" + | "Zanabazar_Square" + | "Znamenny_Music" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/bmg.xml b/uax/uax42/fragments/properties/bmg.xml new file mode 100644 index 000000000..d4431070d --- /dev/null +++ b/uax/uax42/fragments/properties/bmg.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute bmg { "" | single-code-point }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/boundaries.xml b/uax/uax42/fragments/properties/boundaries.xml new file mode 100644 index 000000000..abe4ffe9a --- /dev/null +++ b/uax/uax42/fragments/properties/boundaries.xml @@ -0,0 +1,58 @@ + + + code-point-attributes &= + attribute Gr_Base { boolean }? + + code-point-attributes &= + attribute Gr_Ext { boolean }? + + code-point-attributes &= + attribute OGr_Ext { boolean }? + + code-point-attributes &= + attribute Gr_Link { boolean }? + + code-point-attributes &= + attribute GCB { "CN" | "CR" + | "EB" | "EBG" | "EM" | "EX" + | "GAZ" + | "L" | "LF" | "LV" | "LVT" + | "PP" + | "RI" + | "SM" + | "T" + | "V" + | "XX" + | "ZWJ" + }? + + code-point-attributes &= + attribute WB { "CR" + | "DQ" + | "EB" | "EBG" | "EM" | "EX" | "Extend" + | "FO" + | "GAZ" + | "HL" + | "KA" + | "LE" | "LF" + | "MB" | "ML" | "MN" + | "NL" | "NU" + | "RI" + | "SQ" + | "WSegSpace" + | "XX" + | "ZWJ" + }? + + code-point-attributes &= + attribute SB { "AT" + | "CL" | "CR" + | "EX" + | "FO" + | "LE" | "LF" | "LO" + | "NU" + | "SC" | "SE" | "SP" | "ST" + | "UP" + | "XX" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/bpb.xml b/uax/uax42/fragments/properties/bpb.xml new file mode 100644 index 000000000..3924ed3e9 --- /dev/null +++ b/uax/uax42/fragments/properties/bpb.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute bpb { "#" | single-code-point }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/bpt.xml b/uax/uax42/fragments/properties/bpt.xml new file mode 100644 index 000000000..183c9bf3f --- /dev/null +++ b/uax/uax42/fragments/properties/bpt.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute bpt { "o" | "c" | "n" }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/case_folding.xml b/uax/uax42/fragments/properties/case_folding.xml new file mode 100644 index 000000000..8708699be --- /dev/null +++ b/uax/uax42/fragments/properties/case_folding.xml @@ -0,0 +1,8 @@ + + + code-point-attributes &= + attribute scf { "#" | single-code-point }? + + code-point-attributes &= + attribute cf { "#" | one-or-more-code-points }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/case_mapping.xml b/uax/uax42/fragments/properties/case_mapping.xml new file mode 100644 index 000000000..c1296b7b9 --- /dev/null +++ b/uax/uax42/fragments/properties/case_mapping.xml @@ -0,0 +1,11 @@ + + + code-point-attributes &= + attribute uc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute lc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute tc { "#" | one-or-more-code-points }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/case_other.xml b/uax/uax42/fragments/properties/case_other.xml new file mode 100644 index 000000000..df4b97e64 --- /dev/null +++ b/uax/uax42/fragments/properties/case_other.xml @@ -0,0 +1,32 @@ + + + code-point-attributes &= + attribute CI { boolean }? + + code-point-attributes &= + attribute Cased { boolean }? + + code-point-attributes &= + attribute CWCF { boolean }? + + code-point-attributes &= + attribute CWCM { boolean }? + + code-point-attributes &= + attribute CWL { boolean }? + + code-point-attributes &= + attribute CWKCF { boolean }? + + code-point-attributes &= + attribute CWT { boolean }? + + code-point-attributes &= + attribute CWU { boolean }? + + code-point-attributes &= + attribute NFKC_CF { "#" | zero-or-more-code-points }? + + code-point-attributes &= + attribute NFKC_SCF { "#" | zero-or-more-code-points }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/casing.xml b/uax/uax42/fragments/properties/casing.xml new file mode 100644 index 000000000..503f05999 --- /dev/null +++ b/uax/uax42/fragments/properties/casing.xml @@ -0,0 +1,14 @@ + + + code-point-attributes &= + attribute Upper { boolean }? + + code-point-attributes &= + attribute Lower { boolean }? + + code-point-attributes &= + attribute OUpper { boolean }? + + code-point-attributes &= + attribute OLower { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/ccc.xml b/uax/uax42/fragments/properties/ccc.xml new file mode 100644 index 000000000..8226509d7 --- /dev/null +++ b/uax/uax42/fragments/properties/ccc.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute ccc { xsd:integer { minInclusive="0" maxInclusive="254" } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/cjkEACC.xml b/uax/uax42/fragments/properties/cjkEACC.xml new file mode 100644 index 000000000..08222c4f0 --- /dev/null +++ b/uax/uax42/fragments/properties/cjkEACC.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= attribute cjkEACC + { xsd:string { pattern="[0-9A-F]{6}" } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/cjkIRG_TSource.xml b/uax/uax42/fragments/properties/cjkIRG_TSource.xml new file mode 100644 index 000000000..49f9c3917 --- /dev/null +++ b/uax/uax42/fragments/properties/cjkIRG_TSource.xml @@ -0,0 +1,6 @@ + + + code-point-attributes &= attribute cjkIRG_TSource + { xsd:string { pattern="T([1-7A-F]|1[1-3])-[0-9A-F]{4} +| TU-[023][0-9A-F]{4}" } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/composition.xml b/uax/uax42/fragments/properties/composition.xml new file mode 100644 index 000000000..96ce4abcf --- /dev/null +++ b/uax/uax42/fragments/properties/composition.xml @@ -0,0 +1,8 @@ + + + code-point-attributes &= + attribute CE { boolean }? + + code-point-attributes &= + attribute Comp_Ex { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/decomposition.xml b/uax/uax42/fragments/properties/decomposition.xml new file mode 100644 index 000000000..833a7d1e0 --- /dev/null +++ b/uax/uax42/fragments/properties/decomposition.xml @@ -0,0 +1,11 @@ + + + code-point-attributes &= + attribute dt { "can" | "com" | "enc" | "fin" | "font" | "fra" + | "init" | "iso" | "med" | "nar" | "nb" | "sml" + | "sqr" | "sub" | "sup" | "vert" | "wide" | "none" + }? + + code-point-attributes &= + attribute dm { "#" | zero-or-more-code-points }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/ea.xml b/uax/uax42/fragments/properties/ea.xml new file mode 100644 index 000000000..d51bf2441 --- /dev/null +++ b/uax/uax42/fragments/properties/ea.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute ea { "A" | "F" | "H" | "N" | "Na" | "W" }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/function_graphic.xml b/uax/uax42/fragments/properties/function_graphic.xml new file mode 100644 index 000000000..7ce510adc --- /dev/null +++ b/uax/uax42/fragments/properties/function_graphic.xml @@ -0,0 +1,68 @@ + + + code-point-attributes &= + attribute Dash { boolean }? + + code-point-attributes &= + attribute Hyphen { boolean }? + + code-point-attributes &= + attribute QMark { boolean }? + + code-point-attributes &= + attribute Term { boolean }? + + code-point-attributes &= + attribute STerm { boolean }? + + code-point-attributes &= + attribute Dia { boolean }? + + code-point-attributes &= + attribute Ext { boolean }? + + code-point-attributes &= + attribute SD { boolean }? + + code-point-attributes &= + attribute Alpha { boolean }? + + code-point-attributes &= + attribute OAlpha { boolean }? + + code-point-attributes &= + attribute Math { boolean }? + + code-point-attributes &= + attribute OMath { boolean }? + + code-point-attributes &= + attribute Hex { boolean }? + + code-point-attributes &= + attribute AHex { boolean }? + + code-point-attributes &= + attribute DI { boolean }? + + code-point-attributes &= + attribute ODI { boolean }? + + code-point-attributes &= + attribute LOE { boolean }? + + code-point-attributes &= + attribute PCM { boolean }? + + code-point-attributes &= + attribute MCM { boolean }? + + code-point-attributes &= + attribute WSpace { boolean }? + + code-point-attributes &= + attribute vo { "R" | "Tr" | "Tu" | "U" }? + + code-point-attributes &= + attribute RI { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/gc.xml b/uax/uax42/fragments/properties/gc.xml new file mode 100644 index 000000000..36cd1f774 --- /dev/null +++ b/uax/uax42/fragments/properties/gc.xml @@ -0,0 +1,12 @@ + + + code-point-attributes &= + attribute gc { "Cc" | "Cf" | "Cn" | "Co" | "Cs" + | "Ll" | "Lm" | "Lo" | "Lt" | "Lu" + | "Mc" | "Me" | "Mn" + | "Nd" | "Nl" | "No" + | "Pc" | "Pd" | "Pe" | "Pf" | "Pi" | "Po" | "Ps" + | "Sc" | "Sk" | "Sm" | "So" + | "Zl" | "Zp" | "Zs" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/hst.xml b/uax/uax42/fragments/properties/hst.xml new file mode 100644 index 000000000..385cd466a --- /dev/null +++ b/uax/uax42/fragments/properties/hst.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute hst { "L" | "LV" | "LVT" | "NA" | "T" | "V" }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/identifier.xml b/uax/uax42/fragments/properties/identifier.xml new file mode 100644 index 000000000..0ab95a27f --- /dev/null +++ b/uax/uax42/fragments/properties/identifier.xml @@ -0,0 +1,26 @@ + + + code-point-attributes &= + attribute IDS { boolean }? + + code-point-attributes &= + attribute OIDS { boolean }? + + code-point-attributes &= + attribute XIDS { boolean }? + + code-point-attributes &= + attribute IDC { boolean }? + + code-point-attributes &= + attribute OIDC { boolean }? + + code-point-attributes &= + attribute XIDC { boolean }? + + code-point-attributes &= + attribute ID_Compat_Math_Start { boolean }? + + code-point-attributes &= + attribute ID_Compat_Math_Continue { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/ideographs.xml b/uax/uax42/fragments/properties/ideographs.xml new file mode 100644 index 000000000..0c758e342 --- /dev/null +++ b/uax/uax42/fragments/properties/ideographs.xml @@ -0,0 +1,23 @@ + + + code-point-attributes &= + attribute Ideo { boolean }? + + code-point-attributes &= + attribute UIdeo { boolean }? + + code-point-attributes &= + attribute EqUIdeo { single-code-point }? + + code-point-attributes &= + attribute IDSB { boolean }? + + code-point-attributes &= + attribute IDST { boolean }? + + code-point-attributes &= + attribute IDSU { boolean }? + + code-point-attributes &= + attribute Radical { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/isc.xml b/uax/uax42/fragments/properties/isc.xml new file mode 100644 index 000000000..f19b59317 --- /dev/null +++ b/uax/uax42/fragments/properties/isc.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute isc { text }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/joining.xml b/uax/uax42/fragments/properties/joining.xml new file mode 100644 index 000000000..ba6684a27 --- /dev/null +++ b/uax/uax42/fragments/properties/joining.xml @@ -0,0 +1,52 @@ + + + code-point-attributes &= + attribute jt { "C" | "D" | "L" | "R" | "T" | "U" }? + + code-point-attributes &= + attribute jg { "African_Feh" | "African_Noon" | "African_Qaf" + | "Ain" | "Alaph" | "Alef" + | "Beh" | "Beth" | "Burushaski_Yeh_Barree" + | "Dal" | "Dalath_Rish" + | "E" + | "Farsi_Yeh" | "Fe" | "Feh" | "Final_Semkath" + | "Gaf" | "Gamal" + | "Hah" | "Hanifi_Rohingya_Kinna_Ya" + | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" + | "Heth" + | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Lam" | "Lamadh" + | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" + | "Malayalam_Llla" | "Malayalam_Nga" + | "Malayalam_Nna" | "Malayalam_Nnna" + | "Malayalam_Nya" | "Malayalam_Ra" | "Malayalam_Ssa" + | "Malayalam_Tta" | "Manichaean_Aleph" + | "Manichaean_Ayin" | "Manichaean_Beth" + | "Manichaean_Daleth" | "Manichaean_Dhamedh" + | "Manichaean_Five" | "Manichaean_Gimel" + | "Manichaean_Heth" | "Manichaean_Hundred" + | "Manichaean_Kaph" | "Manichaean_Lamedh" + | "Manichaean_Mem" | "Manichaean_Nun" + | "Manichaean_One" | "Manichaean_Pe" + | "Manichaean_Qoph" | "Manichaean_Resh" + | "Manichaean_Sadhe" | "Manichaean_Samekh" + | "Manichaean_Taw" | "Manichaean_Ten" + | "Manichaean_Teth" | "Manichaean_Thamedh" + | "Manichaean_Twenty" | "Manichaean_Waw" + | "Manichaean_Yodh" | "Manichaean_Zayin" | "Meem" + | "Mim" + | "No_Joining_Group" | "Noon" | "Nun" | "Nya" + | "Pe" + | "Qaf" | "Qaph" + | "Reh" | "Reversed_Pe" | "Rohingya_Yeh" + | "Sad" | "Sadhe" | "Seen" | "Semkath" | "Shin" + | "Straight_Waw" | "Swash_Kaf" | "Syriac_Waw" + | "Tah" | "Taw" | "Teh_Marbuta" | "Teh_Marbuta_Goal" + | "Teth" | "Thin_Yeh" + | "Vertical_Tail" + | "Waw" + | "Yeh" | "Yeh_Barree" | "Yeh_With_Tail" | "Yudh" + | "Yudh_He" + | "Zain" | "Zhain" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/lb.xml b/uax/uax42/fragments/properties/lb.xml new file mode 100644 index 000000000..ee1f36cac --- /dev/null +++ b/uax/uax42/fragments/properties/lb.xml @@ -0,0 +1,24 @@ + + + code-point-attributes &= + attribute lb { "AI" | "AK" | "AL" | "AP" | "AS" + | "B2" | "BA" | "BB" | "BK" + | "CB" | "CJ" | "CL" | "CM" | "CP" | "CR" + | "EB" | "EM" | "EX" + | "GL" + | "H2" | "H3" | "HL" | "HY" + | "ID" | "IN" | "IS" + | "JL" | "JT" | "JV" + | "LF" + | "NL" | "NS" | "NU" + | "OP" + | "PO" | "PR" + | "QU" + | "RI" + | "SA" | "SG" | "SP" | "SY" + | "VF" | "VI" + | "WJ" + | "XX" + | "ZW" | "ZWJ" + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/miscellaneous.xml b/uax/uax42/fragments/properties/miscellaneous.xml new file mode 100644 index 000000000..5dafe8c22 --- /dev/null +++ b/uax/uax42/fragments/properties/miscellaneous.xml @@ -0,0 +1,11 @@ + + + code-point-attributes &= + attribute Dep { boolean }? + + code-point-attributes &= + attribute VS { boolean }? + + code-point-attributes &= + attribute NChar { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/na.xml b/uax/uax42/fragments/properties/na.xml new file mode 100644 index 000000000..4c4644c31 --- /dev/null +++ b/uax/uax42/fragments/properties/na.xml @@ -0,0 +1,13 @@ + + + code-point-attributes &= + attribute na { "" | + "CJK UNIFIED IDEOGRAPH-#" | + "CJK COMPATIBILITY IDEOGRAPH-#" | + "EGYPTIAN HIEROGLYPH-#" | + "TANGUT IDEOGRAPH-#" | + "KHITAN SMALL SCRIPT CHARACTER-#" | + "NUSHU CHARACTER-#" | + xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/na1.xml b/uax/uax42/fragments/properties/na1.xml new file mode 100644 index 000000000..592de98c3 --- /dev/null +++ b/uax/uax42/fragments/properties/na1.xml @@ -0,0 +1,5 @@ + + + code-point-attributes &= + attribute na1 { "" | xsd:string { pattern="[a-zA-Z0-9]+([\-_ ][a-zA-Z0-9]+)*( \(.*\))?" } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/numeric.xml b/uax/uax42/fragments/properties/numeric.xml new file mode 100644 index 000000000..24230aee1 --- /dev/null +++ b/uax/uax42/fragments/properties/numeric.xml @@ -0,0 +1,8 @@ + + + code-point-attributes &= + attribute nt { "De" | "Di" | "Nu" | "None" }? + + code-point-attributes &= + attribute nv { "NaN" | xsd:string { pattern="-?[0-9]+(/[0-9]+)?" } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/pattern.xml b/uax/uax42/fragments/properties/pattern.xml new file mode 100644 index 000000000..baa00a73c --- /dev/null +++ b/uax/uax42/fragments/properties/pattern.xml @@ -0,0 +1,8 @@ + + + code-point-attributes &= + attribute Pat_Syn { boolean }? + + code-point-attributes &= + attribute Pat_WS { boolean }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/quickcheck.xml b/uax/uax42/fragments/properties/quickcheck.xml new file mode 100644 index 000000000..224c2287e --- /dev/null +++ b/uax/uax42/fragments/properties/quickcheck.xml @@ -0,0 +1,31 @@ + + + code-point-attributes &= + attribute NFC_QC { "Y" | "N" | "M" }? + + code-point-attributes &= + attribute NFD_QC { "Y" | "N" }? + + code-point-attributes &= + attribute NFKC_QC { "Y" | "N" | "M" }? + + code-point-attributes &= + attribute NFKD_QC { "Y" | "N" }? + + + code-point-attributes &= + attribute XO_NFC { boolean }? + + code-point-attributes &= + attribute XO_NFD { boolean }? + + code-point-attributes &= + attribute XO_NFKC { boolean }? + + code-point-attributes &= + attribute XO_NFKD { boolean }? + + + code-point-attributes &= + attribute FC_NFKC { "#" | one-or-more-code-points }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/script.xml b/uax/uax42/fragments/properties/script.xml new file mode 100644 index 000000000..b22243aaf --- /dev/null +++ b/uax/uax42/fragments/properties/script.xml @@ -0,0 +1,49 @@ + + + script = "Adlm" | "Aghb" | "Ahom" | "Arab" | "Armi" | "Armn" + | "Avst" + | "Bali" | "Bamu" | "Bass" | "Batk" | "Beng" | "Bhks" + | "Bopo" | "Brah" | "Brai" | "Bugi" | "Buhd" + | "Cakm" | "Cans" | "Cari" | "Cham" | "Cher" | "Chrs" + | "Copt" | "Cpmn" | "Cprt" | "Cyrl" + | "Deva" | "Diak" | "Dogr" | "Dsrt" | "Dupl" + | "Egyp" | "Elba" | "Elym" | "Ethi" + | "Gara" | "Geor" | "Glag" | "Gong" | "Gonm" | "Goth" + | "Gran" | "Grek" | "Gujr" | "Gukh" | "Guru" + | "Hang" | "Hani" | "Hano" | "Hatr" | "Hebr" | "Hira" + | "Hluw" | "Hmng" | "Hmnp" | "Hrkt" | "Hung" + | "Ital" + | "Java" + | "Kali" | "Kana" | "Kawi" | "Khar" | "Khmr" | "Khoj" + | "Kits" | "Knda" | "Krai" | "Kthi" + | "Lana" | "Laoo" | "Latn" | "Lepc" | "Limb" | "Lina" + | "Linb" | "Lisu" | "Lyci" | "Lydi" + | "Mahj" | "Maka" | "Mand" | "Mani" | "Marc" | "Medf" + | "Mend" | "Merc" | "Mero" | "Mlym" | "Modi" | "Mong" + | "Mroo" | "Mtei" | "Mult" | "Mymr" + | "Nagm" | "Nand" | "Narb" | "Nbat" | "Newa" | "Nkoo" + | "Nshu" + | "Ogam" | "Olck" | "Onao" | "Orkh" | "Orya" | "Osge" + | "Osma" | "Ougr" + | "Palm" | "Pauc" | "Perm" | "Phag" | "Phli" | "Phlp" + | "Phnx" | "Plrd" | "Prti" + | "Rjng" | "Rohg" | "Runr" + | "Samr" | "Sarb" | "Saur" | "Sgnw" | "Shaw" | "Shrd" + | "Sidd" | "Sind" | "Sinh" | "Sogd" | "Sogo" | "Sora" + | "Soyo" | "Sund" | "Sunu" | "Sylo" | "Syrc" + | "Tagb" | "Takr" | "Tale" | "Talu" | "Taml" | "Tang" + | "Tavt" | "Telu" | "Tfng" | "Tglg" | "Thaa" | "Thai" + | "Tibt" | "Tirh" | "Tnsa" | "Todr" | "Toto" | "Tutg" + | "Ugar" + | "Vaii" | "Vith" + | "Wara" | "Wcho" + | "Xpeo" | "Xsux" + | "Yezi" | "Yiii" + | "Zanb" | "Zinh" | "Zyyy" | "Zzzz" + + code-point-attributes &= + attribute sc { script }? + + code-point-attributes &= + attribute scx { list { script + } }? + \ No newline at end of file diff --git a/uax/uax42/fragments/properties/simple_case_mapping.xml b/uax/uax42/fragments/properties/simple_case_mapping.xml new file mode 100644 index 000000000..e2acb669c --- /dev/null +++ b/uax/uax42/fragments/properties/simple_case_mapping.xml @@ -0,0 +1,11 @@ + + + code-point-attributes &= + attribute suc { "#" | single-code-point }? + + code-point-attributes &= + attribute slc { "#" | single-code-point }? + + code-point-attributes &= + attribute stc { "#" | single-code-point }? + \ No newline at end of file diff --git a/uax/uax42/fragments/repertoire/Code points.xml b/uax/uax42/fragments/repertoire/Code points.xml new file mode 100644 index 000000000..cdfd1ad88 --- /dev/null +++ b/uax/uax42/fragments/repertoire/Code points.xml @@ -0,0 +1,23 @@ + + + + code-point |= + element reserved { + set-of-code-points, + code-point-attributes } + + code-point |= + element noncharacter { + set-of-code-points, + code-point-attributes } + + code-point |= + element surrogate { + set-of-code-points, + code-point-attributes } + + code-point |= + element char { + set-of-code-points, + code-point-attributes } + \ No newline at end of file diff --git a/uax/uax42/fragments/repertoire/Set of code points.xml b/uax/uax42/fragments/repertoire/Set of code points.xml new file mode 100644 index 000000000..a6ff2d092 --- /dev/null +++ b/uax/uax42/fragments/repertoire/Set of code points.xml @@ -0,0 +1,8 @@ + + + + set-of-code-points = + attribute cp { single-code-point } + | ( attribute first-cp { single-code-point }, + attribute last-cp { single-code-point } ) + \ No newline at end of file diff --git a/uax/uax42/fragments/repertoire/groups.xml b/uax/uax42/fragments/repertoire/groups.xml new file mode 100644 index 000000000..11f3b0dd9 --- /dev/null +++ b/uax/uax42/fragments/repertoire/groups.xml @@ -0,0 +1,8 @@ + + + + group = + element group { + code-point-attributes, + code-point* } + \ No newline at end of file diff --git a/uax/uax42/fragments/repertoire/repertoire.xml b/uax/uax42/fragments/repertoire/repertoire.xml new file mode 100644 index 000000000..0cfc86e40 --- /dev/null +++ b/uax/uax42/fragments/repertoire/repertoire.xml @@ -0,0 +1,6 @@ + + + + ucd.content &= + element repertoire { (code-point | group) + }? + \ No newline at end of file diff --git a/uax/uax42/fragments/standardized-variants/standardized-variants.xml b/uax/uax42/fragments/standardized-variants/standardized-variants.xml new file mode 100644 index 000000000..a415a1152 --- /dev/null +++ b/uax/uax42/fragments/standardized-variants/standardized-variants.xml @@ -0,0 +1,10 @@ + + + + ucd.content &= + element standardized-variants { + element standardized-variant { + attribute cps { two-code-points }, + attribute desc { text }, + attribute when { text } }+ }? + \ No newline at end of file diff --git a/uax/uax42/fragments/start/start.xml b/uax/uax42/fragments/start/start.xml new file mode 100644 index 000000000..ba0e2262f --- /dev/null +++ b/uax/uax42/fragments/start/start.xml @@ -0,0 +1,6 @@ + + + + start = + element ucd { ucd.content } + \ No newline at end of file diff --git a/uax/uax42/fragments/tangut/Tangut.xml b/uax/uax42/fragments/tangut/Tangut.xml new file mode 100644 index 000000000..21e52208a --- /dev/null +++ b/uax/uax42/fragments/tangut/Tangut.xml @@ -0,0 +1,18 @@ + + + code-point-attributes &= + attribute kRSTUnicode { xsd:string { pattern="[0-9]+\.[0-9]+" } }? + + code-point-attributes &= + attribute kTGT_MergedSrc + { xsd:string {pattern="L2008-[0-9A-F]{4,5}(-[0-9]{4,5})?"} + | xsd:string {pattern="L2006-[0-9]{4}"} + | xsd:string {pattern="L1997-[0-9]{4}"} + | xsd:string {pattern="L1986-[0-9]{4}"} + | xsd:string {pattern="S1968-[0-9]{4}"} + | xsd:string {pattern="N1966-[0-9]{3}(-[0-9A-Z]{3,4})?"} + | xsd:string {pattern="H2004-[A-Z]-[0-9]{4}"} + | xsd:string {pattern="L2012-[0-9]{4}"} + | xsd:string {pattern="UTN42-[0-9]{3}"} + }? + \ No newline at end of file diff --git a/uax/uax42/index.xml b/uax/uax42/index.xml new file mode 100644 index 000000000..1ea2f0f65 --- /dev/null +++ b/uax/uax42/index.xml @@ -0,0 +1,1353 @@ + + +
+ + Unicode Character Database in XML + + + + + 2024 + + + + Wilcock + John + + + + + + New value for the age attribute: 16.0. + + New values for the blk attribute: Egyptian_Hieroglyphs_Ext_A, + Garay, Gurung_Khema, Kirat_Rai, Myanmar_Ext_C, + Ol_Onal, Sunuwar, Symbols_for_Legacy_Computing_Sup, + Todhri, Tulu_Tigalari. + + New values for the script attribute: Gara, Gukh, + Krai, Onao, Sunu, Todr, Tutg. + + New value for the jg attribute: Kashmiri_Yeh. + New value for the InSC attribute: Reordering_Killer. + + New attributes: MCM, kFanqie, kZhuang. + + Modified patterns for the cjk-radical/@number, kRSUnicode and + kIRG_GSource + attributes. + + Added the do-not-emit element. + + + + Revision 35 being a proposed update, only changes between revisions 34 and 36 are + noted here. + + + + New value for the age attribute: 15.1. + + New value for the blk attribute: CJK_Ext_I. + + New values for the lb attribute: AK, AP, + AS, VF, VI. + + Modified values for the number, radical attributes of the + cjk-radical + element. + + Changed single value into list for the nv code point attribute. + + New code point attributes: ID_Compat_Math_Continue, + ID_Compat_Math_Start, IDSU, NFKC_SCF, InCB. + + Modified patterns for the kBigFive, kIRG_GSource, + kMorohashi, kRSUnicode attributes. + + Changed single values into lists for the kMorohashi, kPrimaryNumeric + Unihan attributes. + + New Unihan attributes: kJapanese, kMojiJoho, + kSMSZD2003Index, kSMSZD2003Readings, kVietnameseNumeric, + kZhuangNumeric. + + + + Revision 33 being a proposed update, only changes between revisions 32 and 34 are + noted here. + + + + New value for the age attribute: 15.0. + + New values for the blk attribute: Arabic_Ext_C, CJK_Ext_H, + Cyrillic_Ext_D, Devanagari_Ext_A, Kaktovik_Numerals, Kawi, + Nag_Mundari. + + New values for the script attribute: Kawi, Nagm. + + New Unihan attribute: kAlternateTotalStrokes. + + Modified patterns for the kIRG_GSource, kIRG_HSource, + kIRG_TSource, kSemanticVariant, kSpecializedSemanticVariant, + kZVariant + attributes. + + + + Revision 31 being a proposed update, only changes between revisions 30 and 32 are + noted here. + + + + New value for the age attribute: 14.0. + + New values for the blk attribute: Arabic_Ext_B, + Cypro_Minoan, Ethiopic_Ext_B, Kana_Ext_B, + Latin_Ext_F, Latin_Ext_G, Old_Uyghur, Tangsa, + Toto, UCAS_Ext_A, Vithkuqi, Znamenny_Music. + + New values for the script attribute: Cpmn, Ougr, + Tnsa, Toto, Vith. + + New values for the jg attribute: Thin_Yeh, Vertical_Tail. + + New Unihan attribute: kStrange. + + Modified patterns for the kIRG_GSource, kIRG_MSource, + kIRG_VSource, kPhonetic, kSpoofingVariant attributes. + + Removal of the kWubi attribute, which has never been present in + released versions of the UCD. + + + + Revision 29 being a proposed update, only changes between revisions 28 and 30 are + noted here. + + + + New value for the age attribute: 13.0. + + New values for the blk attribute: Chorasmian, CJK_Ext_G, + Dives_Akuru, Khitan_Small_Script, Lisu_Sup, + Symbols_For_Legacy_Computing, Tangut_Sup, Yezidi. + + New values for the script attribute: Chrs, Diak, + Kits, Yezi. + + New value for the InPC attribute: Top_And_Bottom_And_Left. + + New Unihan attributes kSpoofingVariant, kUnihanCore2020, + kIRG_SSource, kIRG_UKSource, kTGHZ2013. + + New Emoji attributes Emoji, EPres, EMod, + EBase, EComp, ExtPict. + + Modified patterns for the kIRG_GSource, kIRG_HSource, + kIRG_KPSource, kIRG_KSource, kIRG_TSource, kKangXi, + kSemanticVariant, kSimplifiedVariant, + kSpecializedSemanticVariant, kTraditionalVariant attributes. + + + + Revision 27 being a proposed update, only changes between revisions 26 and 28 are + noted here. + + + + New value for the age attribute: 12.1. + + + + + + New value for the age attribute: 12.0. + + New values for the script attribute: Elym, Hmnp, + Nand, Wcho. + + New values for the blk attribute: + Egyptian_Hieroglyph_Format_Controls, Elymaic, Nandinagari, + Nyiakeng_Puachue_Hmong, Ottoman_Siyaq_Numbers, Small_Kana_Ext, + Symbols_And_Pictographs_Ext_A, Tamil_Sup, Wancho. + + Modified patterns for the kIRG_GSource, kIRG_KSource, + kIRG_TSource, kTaiwanTelegraph attributes. + + + + Revision 24 being a proposed update, only changes between revisions 23 and 25 are + noted here. + + + + New value for the age attribute: 11.0. + + New values for the blk attribute: Chess_Symbols, + Dogra, Georgian_Ext, Gunjala_Gondi, + Hanifi_Rohingya, Indic_Siyaq_Numbers, Makasar, + Mayan_Numerals, Medefaidrin, Old_Sogdian, Sogdian. + + New values for the script attribute: Dogr, Gong, + Maka, Medf, Rohg, Sogd, Sogo. + + New values for the jg attribute: Hanifi_Rohingya_Kinna_Ya, + Hanifi_Rohingya_Pa. + + New value for the wb attribute: WSegSpace. + + New values for the InSC attribute: Consonant_Initial_Postfixed. + + New attributes: EqUIdeo, kJinmeiyoKanji, kJoyoKanji, + kKoreanEducationHanja, kKoreanName, kTGH. + + Modified patterns for the kTGT_MergedSrc attribute. + + Modified patterns for the kIRG_GSource, kIRG_HSource and + kIRG_VSource + attributes. + + + + Revision 22 being a proposed update, only changes between revisions 21 and 23 are + noted here. + + + + New value for the age attribute: 10.0. + + New values for the blk attribute: CJK_Ext_F, Kana_Ext_A, + Masaram_Gondi, Nushu, Soyombo, Syriac_Sup, + Zanabazar_Square. + + New values for the sc attribute: Gonm, Nshu, + Soyo, Zanb. + + New values for the jg attribute: Malayalam_Nga, + Malayalam_Ja, Malayalam_Nya, Malayalam_Tta, Malayalam_Nna, + Malayalam_Nnna, Malayalam_Bha, Malayalam_Ra, + Malayalam_Lla, Malayalam_Llla, Malayalam_Ssa. + + New value for the InPC attribute: Bottom_And_Left. + + Modified patterns for the kIRG_GSource, kIRG_JSource, + kIRG_KSource + attributes. + + New code point attributes: vo, + RI + + New code point attributes for Nushu data: kSrc_NushuDuben and + kReading. + + + + Revision 20 being a proposed update, only changes between revisions 19 and 21 are + noted here. + + + + New value for the age attribute: 9.0. + + New values for the sc attribute: Adlm, Bhks, + Marc, Newa, Osge, Tang. + + New values for the blk attribute: Adlam, Bhaiksuki, + Cyrillic_Ext_C, Glagolitic_Sup, Ideographic_Symbols, + Marchen, Mongolian_Sup, Newa, Osage, + Tangut, Tangut_Components. + + New values for the gcb attribute: EB, EBG, EM, + GAZ, ZWJ. + + New values for the wb attribute: EB, EBG, EM, + GAZ, ZWJ. + + New values for the lb attribute: EB, EM, ZWJ. + + New values for the jg attribute: African_Feh, + African_Noon, African_Qaf. + + New code point attributes: PCM, kRSTUnicode and + kTGT_MergedSrc. + + Modified patterns for the kRSUnicode, kRSKangXi, + kMandarin, kIRG_JSource, kIRG_USource and kFennIndex + attributes. + + + + Revision 18 being a proposed update, only changes between revisions 17 and 19 are + noted here. + + + + New value for the age attribute: 8.0. + + New values for the sc attribute: Ahom, Hatr, + Hluw, Hung, Mult, Sgnw. + + New values for the blk attribute: Ahom, + Anatolian_Hieroglyphs, Cherokee_Sup, CJK_Ext_E, + Early_Dynastic_Cuneiform, Hatran, Multani, Old_Hungarian, + Sup_Symbols_And_Pictographs, Sutton_SignWriting. + + New values for the InSC attribute: Consonant_Killer, + Consonant_Prefixed, Consonant_With_Stacker, Syllable_Modifier. + + New code point attributes: InPC, kJa. + + New patterns for the kIRG_GSource attribute: GFC-, GGFZ-. + + Switched the reference to ISO 19757 from :2003 and :2003 Amd1 to :2008. + + + Revision 16 being a proposed update, only changes between revisions 15 and 17 are + noted here. + + + + New value for the age attribute: 7.0. + + New values for the jg attribute. + + New values for the sc attribute. + + New values for the blk attribute. + + New values for the InSC attribute. + + New values for the kIICore attribute. + + New values for the kIRG_GSource attribute. + + + + Revision 14 being a proposed update, only changes between revisions 13 and 15 are + noted here. + + + + New value for the age attribute: 6.3. + + New values DQ, HL, SQ for the WB attribute(forUnicode6.3). + + New code point attributes bpt and bpb (for Unicode 6.3). + + New values for the bc attribute: LRI, RLI, FSI, + PDI + (for Unicode 6.3). + + Updated the patterns for kHanyuPinlu and kTotalStrokes (for + Unicode6.3). + + Updated the patterns for kIRG_HSource and kIRG_HSource (for + Unicode6.2). + + Clarified that the child elements list-like elements are in no particular order. + + + Revision 12 being a proposed update, only changes between revisions 11 and 13 are + noted here. + + + + New value for the age attribute: 6.2. + + New value for the gcb, wb and lb attributes: + RI + (for Unicode 6.2). + + Updated the patterns for kIRG_GSource and kIRG_HSource (for + Unicode 6.2). + + + + Revision 10 being a proposed update, only changes between revisions 9 and 11 are + noted here. + + + + Clarified the default values. + Indicate that property values may change from one release to the next. + Introduced the blk attributes, for the Block property. + + Introduced the scx attribute, for the ScriptExtensions property. + + Introduced the name-alias element, for the Name_Alias property. + + New value for the age attribute: 6.1. + + New values for the script attribute: Cakm, Merc, + Mero, Plrd, Shrd, Sora, Takr. + + New values for the lb attribute: HL and CJ. + + New value for the jg attribute: Rohingya_Yeh. + + The value of the fc_nfkc attribute must now be either # or + one-or-more-code-points. + + For the nv attribute, the absence of a numeric value is now represented by + NaN + rather than by the empty string. + + The values of the ccc are now restricted to 0..254, instead of 0..255. + + Updated the patterns for kSemanticVariant, + kSpecializedSemanticVariant, kIRG_USource, and kMandarin. + + + + Revision 8 being a proposed update, only changes between revisions 7 and 9 are noted + here. + + + + New value for the age attribute: 6.0. + + New value for the jg attribute: + Teh_Marbuta_Goal + + New values for the script attribute: Batk, Brah, + Mand. + + Updated the patterns for kIRG_GSource, kIRG_HSource, + kIRG_JSource, kIRG_KSource, kIRG_MSource, + kIRG_TSource, kIRG_VSource. + + Added the InSC and InMC elements. + + Added the emoji-sources element. + + + + Revision 6 being a proposed update, only changes between revisions 5 and 7 are noted + here. + + + + Changed the type of block/@first-cp, block/@last-cp and + normalization-corrections/@cp + from text to + single-code-point + + Changed the type of named-sequence/@cps, + provisional-named-sequences/@cps, normalization-correction/@old and + normalization-correction/@new + from text to one-or-more-code-points. + + Changed the type of standardized-variants/@cps from text to + two-code-points. + + New values for the jg attribute: Farsi_Yeh and Nya. + + New value for the age attribute: 5.2. + + New values for the sc attribute: Lana, Tavt, + Avst, Egyp, Samr, Lisu, Bamu, Java, + Mtei, Armi, Sarb, Prti, Phli, Orkh, + Kthi. + + New value for the lb attribute: CP. + + New value for the sc attribute: Zinh. + + New code point attributes CI, Cased, CWCF, + CWCM, CWL, CWKCF, CWT, CWU, + NFKC_CF. + + New attributes kHanyuPinyin and kIRG_MSource. + + New element + cjk-radicals + + Updated the patterns for kIRG_GSource, kIRG_JSource, + kIRG_KPSource, kIRG_KSource, kIRG_TSource, + kIRG_VSource, kHanyuPinlu, kMandarin, + kSemanticVariant, kSpecializedSemanticVariant, + kVietnamese, kZVariant. + + Point out that Relax NG schemas do not modify or augment the infoset, and that it ispossible + to convert mechanically our schema to other schema languages. + + + + Revision 4 being a proposed update, only changes between revisions 3 and 5 are noted + here. + + + + First approved version, for Unicode 5.1.0. + For optional elements which acts as collections, such as repertoire and + named-sequences, impose that there be at least one element in the collection. + + Remove the constraint that the value jg is limited when jt has + certainvalues; similarly for bmg / Bidi_M and for nv / + nt. + + Value NL added to the WB attribute (for Unicode 5.1). + + Value PP added to the GCB attribute (for Unicode 5.1). + + Corrected the Vai script value to Vaii. + + Removed the discussion of elements or attributes in different namespace. + Removed the code-point element. + + + + + + Promoted to Draft UAX. + Changed the title from "An XML representation of the UCD" + Value 5.1 added to the age attribute (for Unicode 5.1). + + Value SM added to the gcb attribute (for Unicode 5.1). + + Values CR, Extend, LF, MB added to the + WB + attribute(forUnicode5.1). + + Values CR, EX, LF, SC added to the SB + attribute(forUnicode5.1). + + Value Burushaski_Yeh_Barree added to the jg attribute (for + Unicode5.1). + + Value Alef_Maqsurah added to the jg attribute (for Unicode 2.x). + + Values Cari, Cham, Kali, Lepc, + Lyci, Lydi, Olck, Rjng, Saur, Sund and + Vai + added to the sc attribute (forUnicode5.0). + + + jamo + attribute renamed to + JSN + + + sfc + attribute renamed to + scf + + Attribute kXHC1983 added (for Unicode 5.1.0). + + Pattern for attribute kIRG_USource extended (for Unicode 5.1.0). + + Element provisional-named-sequences added (for Unicode 5.0) + + + + + + First working draft. + + + + + + + This annex describes an XML representation of the Unicode Character Database. + + + + +
+ Introduction + In working on Unicode implementations, it is often useful to access the full content of the Unicode + Character Database (UCD). For example, in establishing mappings from characters to glyphs in fonts, it is + convenient to see the character scalar value, the character name, the character East Asian width, along with + the shape and metrics of the proposed glyph to map to; looking at all this data simultaneously helps in + evaluating the mapping. + + Directly accessing the data files that constitute the UCD is sometimes a daunting proposition. The data is + dispersed in a number of files of various formats, and there are just enough peculiarities (all justified by + the processing power available at the time the UCD representation was designed) to require a fairly intimate + knowledge of the data format itself, in addition to the meaning of the data. + + Many programming environments (for example, Java or ICU) do give access to the UCD. However, those + environments tend to lag behind releases of the standard, or support only some of the UCD content. + + Unibook is a wonderful tool to explore the UCD and in many cases is just the ticket; however, it is + difficult to use when the task at hand has not been built-in, or when non-UCD data is to be displayed as + well. + + This annex presents an alternative representation of the UCD, which is meant to overcome these + difficulties. We have chosen an XML representation, because parsing becomes a non-issue: there are a number + of XML parsers freely available, and using them is often fairly easy. In addition, there are freely + available tools that can perform powerful operations on XML data; for example, XPATH and XQUERY engines can + be thought of as a “grep” for XML data and XSLT engines can be thought of as + “awk” for XML data. + + It is important to note that we are interested in exploring the content of the UCD, rather than in using + the UCD data to process character streams. Thus, we are not concerned so much by the speed of processing or + the size of our representation. + + Our representation supports the creation of documents that represent only parts of the UCD, either by not + representing all the characters, or by not representing all the properties. This can be useful when only + some of the data is needed. + + This annex presents only the XML representation format of the UCD. The data itself is part of the Unicode + Character Database. + +
+ + + +
+ Overall schema + +
+ General principles + Our schema can be used to create and validate documents which are intended to represent properties of + Unicode code points, blocks, named sequences, normalization corrections, standardized variants, CJK + radicals and emoji sources. A document may represent the values actually assigned in a given version of + the UCD, or it may represent a draft version of the UCD, or a private agreement on Private Use + characters. The validity of a XML document with respect to the schema defined in this annex does not + assert anything about the correctness of the values. + + Valid documents may provide values for only some of the code points, or some of the Unicode + properties. Furthermore, they may also incorporate non-Unicode properties. + + Our schema is defined using English. However, a useful subset of the validity constraints can be + captured using a schema language, thereby simplifying the task of validating documents. We have chosen + Relax NG [ISO 19757], + in the compact syntax , as the schema language. It is important to stress that the schema which is + defined in English imposes more constraints on the documents than can be validated with the Relax NG + schema. + + An important characteristic of Relax NG is that its schemas do not modify or augment the infoset of + the documents. Therefore, it is possible to process our XML representation without using the schema. + Also, the schema is relatively straightforward and can be converted mechanically to other schema + languages. + + While our XML representation is not intended to be used during processing of characters and strings, + it is still a design principle for our schema to support the relatively efficient representation of the + UCD. This is achieved by an inheritance mechanism, similar to property inheritance in CSS or in XSL:FO + (see section 4.3 Group). + + Many invariants impose constraints on the values of the different properties for a given code point. + For example, if the value of the Numeric Type property is None, then the value of the + Numeric Value property should be the empty string; and if the value of the Other + Alphabetic property is true, then the value of the Alphabetic property should be + true. Those invariants are not captured in the schema. + +
+ + +
+ Namespace + The namespace for our elements is “http://www.unicode.org/ns/2003/ucd/1.0”. Our + attributes are in the empty namespace. + + + In all our examples, we assume that this namespace is the default one. + +
+ + +
+ Datatypes + We use a standard XML Schema datatypes: + + Characters are pervasive in the UCD, and will need to be represented. Representing characters directly + by themselves would seem the most obvious choice; for example, we could express that the decomposition + of U+00E8 is “&#x0065;&#x0300;”, that is have exactly two characters in (the + infoset of) the XML document. However, the current XML specification limits the set of characters + that can be part of a document. Another problem is that the various tools (XML parser, XPATH engine, + etc.) may equate U+00E8 with U+0065 U+0300, thus making it difficult to figure out which of the two + sequences is contained in the database (which is sometimes important for our purposes). Therefore, we + chose instead to represent characters by their code points; we follow the usual convention of four to + six hexadecimal digits (uppercase) and code points in a sequence separated by space; for example, the + decomposition of U+00E8 will be represented by the nine characters “0065 0300” in the + infoset. + + +
+ + +
+ Root Element + The root element of valid documents is a ucd. + + +
+ + +
+ Common attributes + A large number of properties are boolean. We uniformly use the values Y and + N for those: + + +
+ + +
+ Ordering of elements + In elements that hold lists of child elements, such as repertoire, + group, or standardized-variants, the schema does not require that the + child elements be in any particular order. + +
+
+ + +
+ Description + The root element may have a description child element, which in turn contains any string, + which is meant to describe what the XML document purports to describe. + + It is recommended that if the document purports to represent the UCD of some Unicode version, the + description be selected in accord with the rules listed in [Versions]; and + conversely, that documents which do not purport to represent the UCD be described as such. + + +
+ + +
+ Repertoire + The repertoire child element of the ucd element describes the code points and + their properties. As we will see shortly, code points can be described individually or as part of a group: + + + + +
+ Sets of code points + It is often the case that successive code points have the same property values, for a given set of + properties. The most striking example is that of an unallocated plane, where all but the last two + code points are reserved and have the same property values. Another example is the URO (U+4E00 + .. U+9FA5) where all the code points have the same property values if we ignore their name and their + Unihan properties. + + + This observation suggests that it is profitable to represent sets of code points which share the + same properties, rather than individual code points. To make the representation of the sets simple, + we restrict them to be segments in the code point space, that is a set is defined by the first and + last code point it contains. Those are captured by the attributes first-cp and + last-cp. The attribute cp is a shorthand notation for the case where the set + has a single code point. + + In the repertoire, there must be at most one code-point + element for a given code point. + +
+ + +
+ Code point types + When thinking about Unicode code points, it is useful to split them into four types: + + + those assigned to abstract characters (PUA or not) + the noncharacters + the surrogate code points + the reserved code points + + This leads to four elements to describe sets of code points: + + +
+ + +
+ Group + While we already recognized the situation where a set of code points have exactly the same set of + property values, another common situation is that of code points which have almost all the same + property values. + + For example, the characters U+1740 BUHID LETTER A .. U+1753 BUHID VOWEL SIGN U all have the age + “3.2”, and all have the script “Buhd”. On the one hand, it is convenient + to support data files in which those properties are explicitly listed with every code point, at this + makes answering questions like “what is the age of U+1749?” easier, because that data + is expressed right there. On the other hand, this leads to rather large data files, and it also tends + to obscure the differences between similar characters. + + + Our representation accounts for this situation with the notion of groups. A + group element is simply a container of code points that also holds default values for + the properties. If a code point inside a group does not list explicitly a property but the + group lists it, then the code point inherits that property from its + group. For example, the fragment with explicit properties: + + + <char cp="1740" age="3.2" na="BUHID LETTER A" gc="Lo" sc="Buhd"/> + <char cp="1741" age="3.2" na="BUHID LETTER I" gc="Lo" sc="Buhd"/> + <char cp="1752" age="3.2" na="BUHID VOWEL SIGN I" gc="Mn" sc="Buhd"/> + <char cp="1820" age="3.0" na="MONGOLIAN LETTER A" gc="Lo" sc="Mong"/> + is equivalent to this fragment which uses a group: + + + <group age="3.2" gc="Lo" sc="Buhd"> + <char cp="1740" na="BUHID LETTER A"/> + <char cp="1741" na="BUHID LETTER I"/> + <char cp="1752" na="BUHID VOWEL SIGN I" gc="Mn"/> + <char cp="1820" age="3.0" na="MONGOLIAN LETTER A" sc="Mong"/> + </group> + The element for U+1740 does not have the age attribute, and it therefore inherits it + from its enclosing group element, that is “3.2”. On the other hand, + the element for U+1820 does have this attribute, so the value is “3.0”. + + As this example illustrates, the notion of group does not necessarily align with the + notion of Unicode block. It is entirely defined and limited to our representation. In particular, the + value of a property for a code point can always be determined from the XML document alone, assuming + that this property and this code point are expressed at all. Of course, one may create an XML + representation where the groups happen to coincide with the Unicode blocks. + + Groups cannot be nested. The motivation for this limitation is to make the life of consumers + easier: either a property is defined by the element for a code point, or it is defined by the + immediately enclosing group element. + + +
+ + +
+ Properties + Each property, except for the Special_Case_Condition and Name_Alias + properties, is represented by an attribute. In an XML data file, the absence of an attribute (may be + only on some code-points) means that the document does not express the value + of the corresponding property. Conversely, the presence of an attribute is an expression of the + corresponding property value; the implied null value is represented by the empty string. + + The Name_Alias property is represented by zero or more name-alias child + elements. Unlike the situation for properties represented by attributes, it is not possible to determine + whether all the aliases have been represented in a data file by inspecting that data file. + + The name of an attribute is the abbreviated name of the property as given in the file + PropertyAliases.txt in the corresponding version of the UCD. For the Unihan + properties, the name is that given in the various versions of the Unihan database. + + For catalog and enumerated properties, the values are those listed in the file + PropertyValueAliases.txt in the corresponding version of the UCD; if there is an abbreviated + name, it is used, otherwise the long name is used. + + Note that the set of possible values for a property captured in this schema may change from one + version to the next. + + + +
+ Age property + The age attribute captures the version of Unicode in which a code point was + assigned to an abstract character, or made a surrogate or non-character. + + +
+ + +
+ Name properties + There are two name properties: the name given by the current version of the standard + (na), and possibly the name this character had in version 1.0 of the standard + (na1). + + + + The majority of the characters in Unicode have a name which is of the form CJK UNIFIED + IDEOGRAPH-<code point>. It also happens that character names cannot + contain the character U+0023 # NUMBER SIGN, so we adopted the following convention: if a + code point has the attribute na (either directly or by inheritance from an enclosing + group), then occurrences of the character # in the name are to be interpreted as the value of the + code point. For example: + + + <char cp="3400" na="CJK UNIFIED IDEOGRAPH-3400"/> + and + + <char cp="3400" na="CJK UNIFIED IDEOGRAPH-#"/> + are equivalent. The # can be in any position in the value of the na + attribute. The convention also applies just as well to a set of multiple code points: + + + <char cp="3400" na="CJK UNIFIED IDEOGRAPH-3400"/> + <char cp="3401" na="CJK UNIFIED IDEOGRAPH-3401"/> + is equivalent to + + <char cp="3400" na="CJK UNIFIED IDEOGRAPH-#"/> + <char cp="3401" na="CJK UNIFIED IDEOGRAPH-#"/> + which in turn is equivalent to: + + <char first-cp="3400" last-cp="3401" na="CJK UNIFIED IDEOGRAPH-#"/> +
+ + +
+ Name Alias properties + The Name_Alias property is represented by zero or more name-alias + child elements: + + +
+ + +
+ Block property + The Block property is represented by the blk attribute: + + +
+ + +
+ General Category + The general category is represented by the gc attribute. + + +
+ + +
+ Combining properties + The combining class is represented by the ccc attribute, which holds the decimal + representation of the combining class. + + Because the set of values that this property has taken across the various versions of the UCD + is rather large, our schema does not restrict the possible values to those actually used. + + +
+ + +
+ Bidirectionality properties + The bidirectional class is represented by the bc attribute. + + + The mirrored property is represented by the Bidi_M attribute, which takes a + boolean value. + + + The bmg attribute is the code point of a character whose glyph is typically + a mirrored image of the glyph for the current character. + + + Note that we do not express the “Best Fit” element recorded in BidiMirroring.txt. + For one thing, it is not meant to be machine readable. More importantly, the idea underlying the + mirrored glyph is delicate to use, since it makes assumptions about the design of the fonts, and + the best fit goes even farther. + + The Bidi_Control property is represented by the Bidi_C attribute. + + + The bidi paired bracket type and bidi paired bracket properties are represented by the + bpt and bpb attributes respectively. + + + +
+ + +
+ Decomposition properties + The decomposition type and decomposition mapping properties are represented by the dt + and dm attributes. + + Most characters have a decomposition mapping to themselves. This is very similar to the + situation we encountered with names, and we adopted a similar convention: if the value of a + decomposition mapping is the character itself, we use the attribute value # (U+0023 # + NUMBER SIGN) as a shorthand notation; this enables those attributes to be captured in groups. + + + The properties Composition_Exclusion and Full_Composition_Exclusion are + represented by the attributes CE and Comp_Ex: + + + The properties NFC_Quick_Check, NFD_Quick_Check, + NFKC_Quick_Check, NFKD_Quick_Check, Expands_On_NFC, + Expands_On_NFD, Expands_On_NFKC, Expands_On_NKFD, + FC_NFKC_Closure have corresponding attributes. + + +
+ + +
+ Numeric Properties + The numeric type is represented by the nt attribute. + + The numeric value is represented by the nv attribute, represented as a whole + number or a fraction. + + +
+ + +
+ Joining properties + The joining class of a character is represented by the jt attribute. + + The jg attribute is the joining group of the character. + + + The Join_Control property is represented by the Join_C attribute. + + +
+ + +
+ Linebreak properties + The Line_Break property is represented by the lb attribute. + + +
+ + +
+ East Asian Width property + The East Asian width property is represented by the ea attribute. + + +
+ + +
+ Case properties + The Uppercase, Lowercase, Other_Uppercase and + Other_Lowercase properties are represented by corresponding attributes. + + + Most characters have a case mapping and case folding properties that simply map or fold to + themselves. This is very similar to the situation we encountered with names, and we adopted a + similar convention: if the value of a case mapping or case folding property is the character + itself, we use the attribute value # (U+0023 # NUMBER SIGN) as a shorthand notation; this + enables those attributes to be captured in groups. + + The simple case mappings are recorded in the suc, slc, stc + attributes. + + + The non-simple casing are recorded in the uc, lc and tc + attributes. + + + The Simple_Case_Folding and Case_Folding properties are recorded in the + scf and cf attributes respectively. + + + The Case_Ignorable, Cased, Changes_When_Casefolded, + Changes_When_Casemapped, Changes_When_Lowercased, + Changes_When_NFKC_Casefolded, Changes_When_Titlecased, + Changes_When_Uppercased, NFKC_Casefold, and + NFKC_Simple_Casefold properties are recorded in these attributes: + + + Note that the UCD records more information about case folding than is expressed in the + properties, specifically the entries in CaseFolding.txt with status T. + +
+ + +
+ Script properties + The script and script extension properties are represented by the sc and + scx attributes respectively. + + +
+ + +
+ ISO Comment properties + The ISO 10646 comment field is represented by the isc attribute. + + +
+ + +
+ Hangul properties + The property Hangul_Syllable_Type is represented by the hst attribute. + + + The property Jamo_Short_Name is represented by the JSN attribute: + + +
+ + +
+ Indic properties + The property Indic_Syllabic_Category is represented by the InSC + attribute. + + + The property Indic_Positional_Category is represented by the InPC + attribute: + + + The property Indic_Conjunct_Break is represented by the InCB attribute: + + +
+ + +
+ Identifier and Pattern and programming language properties + + The properties ID_Start, Other_ID_Start, XID_Start, + ID_Continue, Other_ID_Continue, XID_Continue, + ID_Compat_Math_Start, and ID_Compat_Math_Continue are represented by + corresponding attributes: + + + The properties Pattern_Syntax and Pattern_White_Space are represented + by corresponding attributes: + + +
+ + +
+ Properties related to function and graphic characteristics + The properties Dash, Hyphen, Quotation_Mark, + Terminal_Punctuation, Sentence_Terminal, Diacritic, + Extender, Soft_Dotted, Alphabetic, + Other_Alphabetic, Math, Other_Math, Hex_Digit, + ASCII_Hex_Digit, Default_Ignorable_Code_Point, + Other_Default_Ignorable_Code_Point, Logical_Order_Exception, + Prepended_Concatenation_Mark, Modifier_Combining_Mark, + White_Space, Vertical_Orientation, and Regional_Indicator + describe the function or graphic characteristic of a character, and have each a corresponding + attribute. + + +
+ + +
+ Properties related to boundaries + The properties Grapheme_Base, Grapheme_Extend, + Other_Grapheme_Extend, Grapheme_Link, + Grapheme_Cluster_Break, Word_Break, and Sentence_Break each + have a corresponding attribute: + + +
+ + +
+ Properties related to ideographs + The properties Ideographic, Unified_Ideograph, + Equivalent_Unified_Ideograph, IDS_Binary_Operator, + IDS_Trinary_Operator, IDS_Unary_Operator, and Radical have + corresponding attributes: + + +
+ + +
+ Miscellaneous properties + The properties Deprecated, Variation_Selector, and + Noncharacter_Code_Point have corresponding attributes: + + +
+ + +
+ Unihan properties + The Unihan properties (from the Unihan database) are represented as attributes. + + +
+ + +
+ Tangut data + The Tangut data are represented as attributes. The attribute kRSTUnicode + represents the radical stroke index. The attribute kTGT_MergedSrc indicates the + source reference for the character. + + +
+ + +
+ Nushu data + The Nushu data are represented as attributes. The attribute kSrc_NushuDuben + indicates the page number and order of the item from the NushuDuben reference source. Nushu common + reading is represented as kReading. + +
+ + +
+ Emoji properties + The properties Emoji, EPres, EMod, EBase, + EComp, and ExtPict have corresponding attributes: + + +
+
+
+ + +
+ Blocks + The blocks child of the ucd describes the blocks. It has one child + block element per block, with attributes to describe the extent and name of the block. + + +
+ + +
+ Named Sequences + The named-sequences child of the ucd describes the named sequences. It has one + child named-sequence element per named sequence, with attributes to describe the name and + sequence. + + Similarly, the provisional-named-sequences child of the ucd describes the + provisional named sequences. + + +
+ + +
+ Normalization Corrections + The normalization-corrections child of the ucd describes the normalization + corrections. It has one child normalization-correction element per correction, with + attributes to describe the code point affected, its old normalization, its new normalization and the + version of Unicode in which the correction was made. + + +
+ + +
+ Standardized Variants + The standardized-variants child of the ucd describes the standardized + variant. It has one child element standardized-variant per variant. The attributes on that + last element capture the variation sequence, the description of the desired appearance, and the shaping + environment under which the appearance is different. + + +
+ + +
+ CJK Radicals + The cjk-radicals child of the ucd describes the CJK radicals. It has one + child element cjk-radical per radical. The attributes on that last element capture the + radical number, the corresponding CJK radical character, and the corresponding CJK unified ideograph. + + +
+ + +
+ Emoji sources + The emoji-sources child of the ucd describes the emoji sources. + + + + +
+ + +
+ Do Not Emit + The do-not-emit child of the ucd describes the + character sequences that should not be emitted or generated in newly authored texts. + + + +
+ + +
+ The full schema + Our schema is just the accumulation of the pieces we have described so far: + + + + + + + + + + + + + + + + + + + + + An expanded version is linked from the top of this document. +
+ + +
+ Examples + Here is a fragment of the UCD for a few representative + characters (only some of the properties are represented): + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + +
+ + + + Acknowledgments + Thanks to Markus Scherer and Mark Davis for their help developing this XML representation. Thanks to + the reviewers: Julie Allen, Ernest van den Boogaard, Daniel Bünzli, John Cowan, Asmus Freytag, + Felix Sasaki, Andrew West. Special thanks to Eric Muller and Laurențiu Iancu. + + +
diff --git a/uax/uax42/index2html.xsl b/uax/uax42/index2html.xsl new file mode 100644 index 000000000..f0a95fa95 --- /dev/null +++ b/uax/uax42/index2html.xsl @@ -0,0 +1,611 @@ + + + + + + + + + + + + + + + + + + + + + <xsl:choose> + <xsl:when test="articleinfo/unicode:tr/@class='uax'"> + <xsl:text>UAX</xsl:text> + </xsl:when> + <xsl:when test="articleinfo/unicode:tr/@class='uts'"> + <xsl:text>UTS</xsl:text> + </xsl:when> + <xsl:when test="articleinfo/unicode:tr/@class='utr'"> + <xsl:text>UTR</xsl:text> + </xsl:when> + </xsl:choose> + <xsl:text> #</xsl:text> + <xsl:value-of select="articleinfo/unicode:tr/@number"/> + <xsl:text>: </xsl:text> + <xsl:value-of select="title"/> + + + + + + + + + + + + +
+ + [Unicode] +  Technical Reports +
 
+
+

+ + + + + Unicode® Standard Annex + + + Unicode® Technical Standard + + + Unicode® Technical Report + + + # + +

+

+ + +
+ + +
+ + + + + + +
+ +

Modifications

+

This section indicates the changes introduced by each revision.

+ +
+ +
+ + + + + Working draft + + + Proposed Update + + + + + + + + + + + + + + + + https://www.unicode.org/reports/tr + + /tr + + - + + .html + + + + + + + + https://www.unicode.org/reports/tr + + /tr + + - + + .html + + + + https://www.unicode.org/reports/tr + + / + + + + https://www.unicode.org/reports/tr + + /tr + + - + + .rnc + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Version + Unicode + + + + +
+ + + Editor + + + Editors + + + + +
Date + + + + +
This Version + + + + +
Previous Version + + + n/a + + + + + + + + +
Latest Version + +
Latest Proposed Update + proposed.html +
Schema + + + + +
Revision + + + + + + +
+
+ + + + + + + + + +
+
+ + + ( + mailto: + ) + + + + + +

Summary

+ +
+ + +

+
+ + + + +

Status

+ + +

This document has been reviewed by Unicode members and other interested parties, and has been + approved for publication by the Unicode Consortium. This is a stable document and may be used as reference + material or cited as a normative reference by other specifications.

+
+ +

+ + This is a draft document which may be updated, replaced, or + superseded by other documents at any time. Publication does not imply endorsement by the Unicode + Consortium. This is not a stable document; it is inappropriate to cite this document as other than a + work in progress.

+
+
+ + +
+

A Unicode Standard Annex (UAX) forms an integral part of the Unicode Standard, but is + published online as a separate document. The Unicode Standard may require conformance to normative + content in a Unicode Standard Annex, if so specified in the Conformance chapter of that version of the + Unicode Standard. The version number of a UAX document corresponds to the version of the Unicode Standard + of which it forms a part.

+
+

Please submit corrigenda and other comments with the online reporting form [Feedback]. Related information that is useful in + understanding this annex is found in Unicode Standard Annex #41, “Common References for Unicode Standard + Annexes.” For the latest version of the Unicode Standard, see [Unicode]. For a list of current Unicode + Technical Reports, see [Reports]. For more information about + versions of the Unicode Standard, see [Versions]. For any + errata which may apply to this annex, see [Errata].

+
+ +
+

A Unicode Technical Standard (UTS) is an independent specification. Conformance to the Unicode + Standard does not imply conformance to any UTS.

+
+

Please submit corrigenda and other comments with the online reporting form [ + Feedback]. Related information that is useful in understanding this document is found in References. For the latest version of the Unicode Standard see [Unicode]. For a list of current Unicode Technical Reports see [Reports]. For more information about versions of the Unicode Standard, see + [Versions].

+
+ +
+

A Unicode Technical Report (UTR) contains informative material. Conformance to the Unicode + Standard does not imply conformance to any UTR. Other specifications, however, are free to make normative + references to a UTR.

+
+

Please submit corrigenda and other comments with the online reporting form [ + Feedback]. Related information that is useful in understanding this document is found in References. For the latest version of the Unicode Standard see [Unicode]. For a list of current Unicode Technical Reports see [Reports]. For more information about versions of the Unicode Standard, see + [Versions].

+
+
+
+ + + + +

Contents

+ +
+ + +
  • + + +
      + +
    +
    +
  • +
    + + + + + + +      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + + + +

    +
    + + +

    +
    + + +
    +
    + + + + + + + + + _blank + + + + + + + + + + + + + + + + + + + [ + + + + + + + + + + + + : + + + , ] + + + + +

    + [, + ] + + = + + + +

    +
    + + +

    + [] + + = + +

    +
    + + + + + + + + +
    +

    + Revision +

    + +
    +
    + + +
    +

    + +

    +
    +
    + + +
      + +
    +
    + + +
  • + +
  • +
    + + + + + + + + + + + + + + + + background-color: #ffff00; border-style:dotted; border-width:1px + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/uax/uax42/index2rnc.xsl b/uax/uax42/index2rnc.xsl new file mode 100644 index 000000000..b7a8dfa81 --- /dev/null +++ b/uax/uax42/index2rnc.xsl @@ -0,0 +1,45 @@ + + + + + + + + + + + + # Copyright © Unicode, Inc. + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/uax/uax42/output/index.html b/uax/uax42/output/index.html new file mode 100644 index 000000000..ccde1ac04 --- /dev/null +++ b/uax/uax42/output/index.html @@ -0,0 +1,3480 @@ + + + + + + + UAX #42: Unicode Character Database in XML + + + + + + + + + + + +
    + + [Unicode] +  Technical Reports +
     
    +
    +

    + Proposed Update Unicode® Standard Annex #42

    +

    Unicode Character Database in XML

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    VersionUnicode 16.0.0 +
    + Editor + + John Wilcock
    +
    Date + 2024-08-15 +
    This Version + + https://www.unicode.org/reports/tr42/tr42-36.html + +
    Previous Version + + https://www.unicode.org/reports/tr42/tr42-34.html + +
    Latest Version + https://www.unicode.org/reports/tr42/ +
    Latest Proposed Update + https://www.unicode.org/reports/tr42/proposed.html +
    Schema + + https://www.unicode.org/reports/tr42/tr42-36.rnc + +
    Revision + + 36 + +
    +

    Summary

    +

    + This annex describes an XML representation of the Unicode Character Database. +

    +

    + Status +

    +

    + This is a + draft + document which may be updated, replaced, or + superseded by other documents at any time. Publication does not imply endorsement by the Unicode + Consortium. This is not a stable document; it is inappropriate to cite this document as other than a + work in progress. +

    +
    +

    + + A Unicode Standard Annex (UAX) forms an integral part of the Unicode Standard, but is + published online as a separate document. The Unicode Standard may require conformance to normative + content in a Unicode Standard Annex, if so specified in the Conformance chapter of that version of the + Unicode Standard. The version number of a UAX document corresponds to the version of the Unicode Standard + of which it forms a part. +

    +
    +

    + Please submit corrigenda and other comments with the online reporting form [Feedback]. Related information that is useful in + understanding this annex is found in Unicode Standard Annex #41, “Common References for Unicode Standard + Annexes.” For the latest version of the Unicode Standard, see [Unicode]. For a list of current Unicode + Technical Reports, see [Reports]. For more information about + versions of the Unicode Standard, see [Versions]. For any + errata which may apply to this annex, see [Errata]. +

    +

    Contents

    + +
    +

    + 1 Introduction +

    +

    In working on Unicode implementations, it is often useful to access the full content of the Unicode + Character Database (UCD). For example, in establishing mappings from characters to glyphs in fonts, it is + convenient to see the character scalar value, the character name, the character East Asian width, along with + the shape and metrics of the proposed glyph to map to; looking at all this data simultaneously helps in + evaluating the mapping. +

    +

    Directly accessing the data files that constitute the UCD is sometimes a daunting proposition. The data is + dispersed in a number of files of various formats, and there are just enough peculiarities (all justified by + the processing power available at the time the UCD representation was designed) to require a fairly intimate + knowledge of the data format itself, in addition to the meaning of the data. +

    +

    Many programming environments (for example, Java or ICU) do give access to the UCD. However, those + environments tend to lag behind releases of the standard, or support only some of the UCD content. +

    +

    Unibook is a wonderful tool to explore the UCD and in many cases is just the ticket; however, it is + difficult to use when the task at hand has not been built-in, or when non-UCD data is to be displayed as + well. +

    +

    This annex presents an alternative representation of the UCD, which is meant to overcome these + difficulties. We have chosen an XML representation, because parsing becomes a non-issue: there are a number + of XML parsers freely available, and using them is often fairly easy. In addition, there are freely + available tools that can perform powerful operations on XML data; for example, XPATH and XQUERY engines can + be thought of as a “grep” for XML data and XSLT engines can be thought of as + “awk” for XML data. +

    +

    It is important to note that we are interested in exploring the content of the UCD, rather than in using + the UCD data to process character streams. Thus, we are not concerned so much by the speed of processing or + the size of our representation. +

    +

    Our representation supports the creation of documents that represent only parts of the UCD, either by not + representing all the characters, or by not representing all the properties. This can be useful when only + some of the data is needed. +

    +

    This annex presents only the XML representation format of the UCD. The data itself is part of the Unicode + Character Database. +

    +

    + 2 Overall schema +

    +

    + 2.1 General principles +

    +

    Our schema can be used to create and validate documents which are intended to represent properties of + Unicode code points, blocks, named sequences, normalization corrections, standardized variants, CJK + radicals and emoji sources. A document may represent the values actually assigned in a given version of + the UCD, or it may represent a draft version of the UCD, or a private agreement on Private Use + characters. The validity of a XML document with respect to the schema defined in this annex does not + assert anything about the correctness of the values. +

    +

    Valid documents may provide values for only some of the code points, or some of the Unicode + properties. Furthermore, they may also incorporate non-Unicode properties. +

    +

    Our schema is defined using English. However, a useful subset of the validity constraints can be + captured using a schema language, thereby simplifying the task of validating documents. We have chosen + Relax NG [ISO 19757], + in the compact syntax , as the schema language. It is important to stress that the schema which is + defined in English imposes more constraints on the documents than can be validated with the Relax NG + schema. +

    +

    An important characteristic of Relax NG is that its schemas do not modify or augment the infoset of + the documents. Therefore, it is possible to process our XML representation without using the schema. + Also, the schema is relatively straightforward and can be converted mechanically to other schema + languages. +

    +

    While our XML representation is not intended to be used during processing of characters and strings, + it is still a design principle for our schema to support the relatively efficient representation of the + UCD. This is achieved by an inheritance mechanism, similar to property inheritance in CSS or in XSL:FO + (see section 4.3 Group). +

    +

    Many invariants impose constraints on the values of the different properties for a given code point. + For example, if the value of the Numeric Type property is None, then the value of the + Numeric Value property should be the empty string; and if the value of the Other + Alphabetic property is true, then the value of the Alphabetic property should be + true. Those invariants are not captured in the schema. +

    +

    + 2.2 Namespace +

    +

    The namespace for our elements is “http://www.unicode.org/ns/2003/ucd/1.0”. Our + attributes are in the empty namespace. +

    +

    + + [namespace declaration, + 1] + + = + + default namespace ucd = "http://www.unicode.org/ns/2003/ucd/1.0" + +

    +

    In all our examples, we assume that this namespace is the default one. +

    +

    + 2.3 Datatypes +

    +

    We use a standard XML Schema datatypes:

    +

    + + [datatypes declaration, + 2] + + = + + # default; datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes" + +

    +

    Characters are pervasive in the UCD, and will need to be represented. Representing characters directly + by themselves would seem the most obvious choice; for example, we could express that the decomposition + of U+00E8 is “&#x0065;&#x0300;”, that is have exactly two characters in (the + infoset of) the XML document. However, the current XML specification limits the set of characters + that can be part of a document. Another problem is that the various tools (XML parser, XPATH engine, + etc.) may equate U+00E8 with U+0065 U+0300, thus making it difficult to figure out which of the two + sequences is contained in the database (which is sometimes important for our purposes). Therefore, we + chose instead to represent characters by their code points; we follow the usual convention of four to + six hexadecimal digits (uppercase) and code points in a sequence separated by space; for example, the + decomposition of U+00E8 will be represented by the nine characters “0065 0300” in the + infoset. +

    +

    + + [datatype for code points, + 3] + + = + + single-code-point = xsd:string { pattern = "(|[1-9A-F]|(10))[0-9A-F]{4}" } + + one-or-more-code-points = list { single-code-point + } + zero-or-more-code-points = list { single-code-point * } + two-code-points = list { single-code-point, single-code-point } + +

    +

    + 2.4 Root Element +

    +

    The root element of valid documents is a ucd. +

    +

    + + [schema start, + 4] + + = + + start = + element ucd { ucd.content } + +

    +

    + 2.5 Common attributes +

    +

    A large number of properties are boolean. We uniformly use the values Y and + N for those: +

    +

    + + [boolean, + 5] + + = + + boolean = "Y" | "N" + +

    +

    + 2.6 Ordering of elements +

    +

    In elements that hold lists of child elements, such as repertoire, + group, or standardized-variants, the schema does not require that the + child elements be in any particular order. +

    +

    + 3 Description +

    +

    The root element may have a description child element, which in turn contains any string, + which is meant to describe what the XML document purports to describe. +

    +

    It is recommended that if the document purports to represent the UCD of some Unicode version, the + description be selected in accord with the rules listed in [Versions]; and + conversely, that documents which do not purport to represent the UCD be described as such. +

    +

    + + [description, + 6] + + = + + ucd.content &= + element description { text }? + +

    +

    + 4 Repertoire +

    +

    The repertoire child element of the ucd element describes the code points and + their properties. As we will see shortly, code points can be described individually or as part of a group: +

    +

    + + [repertoire, + 7] + + = + + ucd.content &= + element repertoire { (code-point | group) + }? + +

    +

    + 4.1 Sets of code points +

    +

    It is often the case that successive code points have the same property values, for a given set of + properties. The most striking example is that of an unallocated plane, where all but the last two + code points are reserved and have the same property values. Another example is the URO (U+4E00 + .. U+9FA5) where all the code points have the same property values if we ignore their name and their + Unihan properties. +

    +

    + + [Set of code points, + 8] + + = + + set-of-code-points = + attribute cp { single-code-point } + | ( attribute first-cp { single-code-point }, + attribute last-cp { single-code-point } ) + +

    +

    This observation suggests that it is profitable to represent sets of code points which share the + same properties, rather than individual code points. To make the representation of the sets simple, + we restrict them to be segments in the code point space, that is a set is defined by the first and + last code point it contains. Those are captured by the attributes first-cp and + last-cp. The attribute cp is a shorthand notation for the case where the set + has a single code point. +

    +

    In the repertoire, there must be at most one code-point + element for a given code point. +

    +

    + 4.2 Code point types +

    +

    When thinking about Unicode code points, it is useful to split them into four types: +

    + those assigned to abstract characters (PUA or not) + the noncharacters + the surrogate code points + the reserved code points +

    This leads to four elements to describe sets of code points: +

    +

    + + [Code points, + 9] + + = + + code-point |= + element reserved { + set-of-code-points, + code-point-attributes } + + code-point |= + element noncharacter { + set-of-code-points, + code-point-attributes } + + code-point |= + element surrogate { + set-of-code-points, + code-point-attributes } + + code-point |= + element char { + set-of-code-points, + code-point-attributes } + +

    +

    + 4.3 Group +

    +

    While we already recognized the situation where a set of code points have exactly the same set of + property values, another common situation is that of code points which have almost all the same + property values. +

    +

    For example, the characters U+1740 BUHID LETTER A .. U+1753 BUHID VOWEL SIGN U all have the age + “3.2”, and all have the script “Buhd”. On the one hand, it is convenient + to support data files in which those properties are explicitly listed with every code point, at this + makes answering questions like “what is the age of U+1749?” easier, because that data + is expressed right there. On the other hand, this leads to rather large data files, and it also tends + to obscure the differences between similar characters. +

    +

    Our representation accounts for this situation with the notion of groups. A + group element is simply a container of code points that also holds default values for + the properties. If a code point inside a group does not list explicitly a property but the + group lists it, then the code point inherits that property from its + group. For example, the fragment with explicit properties: +

    +
    +    <char cp="1740" age="3.2" na="BUHID LETTER A" gc="Lo" sc="Buhd"/>
    +    <char cp="1741" age="3.2" na="BUHID LETTER I" gc="Lo" sc="Buhd"/>
    +    <char cp="1752" age="3.2" na="BUHID VOWEL SIGN I" gc="Mn" sc="Buhd"/>
    +    <char cp="1820" age="3.0" na="MONGOLIAN LETTER A" gc="Lo" sc="Mong"/>
    +

    is equivalent to this fragment which uses a group: +

    +
    +    <group age="3.2" gc="Lo" sc="Buhd">
    +        <char cp="1740" na="BUHID LETTER A"/>
    +        <char cp="1741" na="BUHID LETTER I"/>
    +        <char cp="1752" na="BUHID VOWEL SIGN I" gc="Mn"/>
    +        <char cp="1820" age="3.0" na="MONGOLIAN LETTER A" sc="Mong"/>
    +    </group>
    +

    The element for U+1740 does not have the age attribute, and it therefore inherits it + from its enclosing group element, that is “3.2”. On the other hand, + the element for U+1820 does have this attribute, so the value is “3.0”. +

    +

    As this example illustrates, the notion of group does not necessarily align with the + notion of Unicode block. It is entirely defined and limited to our representation. In particular, the + value of a property for a code point can always be determined from the XML document alone, assuming + that this property and this code point are expressed at all. Of course, one may create an XML + representation where the groups happen to coincide with the Unicode blocks. +

    +

    Groups cannot be nested. The motivation for this limitation is to make the life of consumers + easier: either a property is defined by the element for a code point, or it is defined by the + immediately enclosing group element. +

    +

    + + [groups, + 10] + + = + + group = + element group { + code-point-attributes, + code-point* } + +

    +

    + 4.4 Properties +

    +

    Each property, except for the Special_Case_Condition and Name_Alias + properties, is represented by an attribute. In an XML data file, the absence of an attribute (may be + only on some code-points) means that the document does not express the value + of the corresponding property. Conversely, the presence of an attribute is an expression of the + corresponding property value; the implied null value is represented by the empty string. +

    +

    The Name_Alias property is represented by zero or more name-alias child + elements. Unlike the situation for properties represented by attributes, it is not possible to determine + whether all the aliases have been represented in a data file by inspecting that data file. +

    +

    The name of an attribute is the abbreviated name of the property as given in the file + PropertyAliases.txt in the corresponding version of the UCD. For the Unihan + properties, the name is that given in the various versions of the Unihan database. +

    +

    For catalog and enumerated properties, the values are those listed in the file + PropertyValueAliases.txt in the corresponding version of the UCD; if there is an abbreviated + name, it is used, otherwise the long name is used. +

    +

    Note that the set of possible values for a property captured in this schema may change from one + version to the next. +

    +

    + 4.4.1 Age property +

    +

    The age attribute captures the version of Unicode in which a code point was + assigned to an abstract character, or made a surrogate or non-character. +

    +

    + + [age attribute, + 11] + + = + + code-point-attributes &= + attribute age { "1.1" + | "2.0" | "2.1" + | "3.0" | "3.1" | "3.2" + | "4.0" | "4.1" + | "5.0" | "5.1" | "5.2" + | "6.0" | "6.1" | "6.2" | "6.3" + | "7.0" + | "8.0" + | "9.0" + | "10.0" + | "11.0" + | "12.0" | "12.1" + | "13.0" + | "14.0" + | "15.0" | "15.1" + | "16.0" + | "unassigned" + }? + +

    +

    + 4.4.2 Name properties +

    +

    There are two name properties: the name given by the current version of the standard + (na), and possibly the name this character had in version 1.0 of the standard + (na1). +

    +

    + + [na attribute, + 12] + + = + + code-point-attributes &= + attribute na { "" | + "CJK UNIFIED IDEOGRAPH-#" | + "CJK COMPATIBILITY IDEOGRAPH-#" | + "EGYPTIAN HIEROGLYPH-#" | + "TANGUT IDEOGRAPH-#" | + "KHITAN SMALL SCRIPT CHARACTER-#" | + "NUSHU CHARACTER-#" | + xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } + }? + +

    +

    + + [na1 attribute, + 13] + + = + + code-point-attributes &= + attribute na1 { "" | xsd:string { pattern="[a-zA-Z0-9]+([\-_ ][a-zA-Z0-9]+)*( \(.*\))?" } }? + +

    +

    The majority of the characters in Unicode have a name which is of the form CJK UNIFIED + IDEOGRAPH-<code point>. It also happens that character names cannot + contain the character U+0023 # NUMBER SIGN, so we adopted the following convention: if a + code point has the attribute na (either directly or by inheritance from an enclosing + group), then occurrences of the character # in the name are to be interpreted as the value of the + code point. For example: +

    +
    +    <char cp="3400" na="CJK UNIFIED IDEOGRAPH-3400"/>
    +

    and

    +
    +    <char cp="3400" na="CJK UNIFIED IDEOGRAPH-#"/>
    +

    are equivalent. The # can be in any position in the value of the na + attribute. The convention also applies just as well to a set of multiple code points: +

    +
    +    <char cp="3400" na="CJK UNIFIED IDEOGRAPH-3400"/>
    +    <char cp="3401" na="CJK UNIFIED IDEOGRAPH-3401"/>
    +

    is equivalent to

    +
    +    <char cp="3400" na="CJK UNIFIED IDEOGRAPH-#"/>
    +    <char cp="3401" na="CJK UNIFIED IDEOGRAPH-#"/>
    +

    which in turn is equivalent to:

    +
    +    <char first-cp="3400" last-cp="3401" na="CJK UNIFIED IDEOGRAPH-#"/>
    +

    + 4.4.3 Name Alias properties +

    +

    The Name_Alias property is represented by zero or more name-alias + child elements: +

    +

    + + [name-alias element, + 14] + + = + + code-point-attributes &= + element name-alias { + attribute alias { xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } }?, + attribute type { "abbreviation" | "alternate" + | "control" | "correction" + | "figment" + }? } * + +

    +

    + 4.4.4 Block property +

    +

    The Block property is represented by the blk attribute: +

    +

    + + [blk attribute, + 15] + + = + + code-point-attributes &= + attribute blk { "Adlam" + | "Aegean_Numbers" + | "Ahom" + | "Alchemical" + | "Alphabetic_PF" + | "Anatolian_Hieroglyphs" + | "Ancient_Greek_Music" + | "Ancient_Greek_Numbers" + | "Ancient_Symbols" + | "Arabic" + | "Arabic_Ext_A" + | "Arabic_Ext_B" + | "Arabic_Ext_C" + | "Arabic_Math" + | "Arabic_PF_A" + | "Arabic_PF_B" + | "Arabic_Sup" + | "Armenian" + | "Arrows" + | "ASCII" + | "Avestan" + | "Balinese" + | "Bamum" + | "Bamum_Sup" + | "Bassa_Vah" + | "Batak" + | "Bengali" + | "Bhaiksuki" + | "Block_Elements" + | "Bopomofo" + | "Bopomofo_Ext" + | "Box_Drawing" + | "Brahmi" + | "Braille" + | "Buginese" + | "Buhid" + | "Byzantine_Music" + | "Carian" + | "Caucasian_Albanian" + | "Chakma" + | "Cham" + | "Cherokee" + | "Cherokee_Sup" + | "Chess_Symbols" + | "Chorasmian" + | "CJK" + | "CJK_Compat" + | "CJK_Compat_Forms" + | "CJK_Compat_Ideographs" + | "CJK_Compat_Ideographs_Sup" + | "CJK_Ext_A" + | "CJK_Ext_B" + | "CJK_Ext_C" + | "CJK_Ext_D" + | "CJK_Ext_E" + | "CJK_Ext_F" + | "CJK_Ext_G" + | "CJK_Ext_H" + | "CJK_Ext_I" + | "CJK_Radicals_Sup" + | "CJK_Strokes" + | "CJK_Symbols" + | "Compat_Jamo" + | "Control_Pictures" + | "Coptic" + | "Coptic_Epact_Numbers" + | "Counting_Rod" + | "Cuneiform" + | "Cuneiform_Numbers" + | "Currency_Symbols" + | "Cypriot_Syllabary" + | "Cypro_Minoan" + | "Cyrillic" + | "Cyrillic_Ext_A" + | "Cyrillic_Ext_B" + | "Cyrillic_Ext_C" + | "Cyrillic_Ext_D" + | "Cyrillic_Sup" + | "Deseret" + | "Devanagari" + | "Devanagari_Ext" + | "Devanagari_Ext_A" + | "Diacriticals" + | "Diacriticals_Ext" + | "Diacriticals_For_Symbols" + | "Diacriticals_Sup" + | "Dingbats" + | "Dives_Akuru" + | "Dogra" + | "Domino" + | "Duployan" + | "Early_Dynastic_Cuneiform" + | "Egyptian_Hieroglyph_Format_Controls" + | "Egyptian_Hieroglyphs" + | "Egyptian_Hieroglyphs_Ext_A" + | "Elbasan" + | "Elymaic" + | "Emoticons" + | "Enclosed_Alphanum" + | "Enclosed_Alphanum_Sup" + | "Enclosed_CJK" + | "Enclosed_Ideographic_Sup" + | "Ethiopic" + | "Ethiopic_Ext" + | "Ethiopic_Ext_A" + | "Ethiopic_Ext_B" + | "Ethiopic_Sup" + | "Garay" + | "Geometric_Shapes" + | "Geometric_Shapes_Ext" + | "Georgian" + | "Georgian_Ext" + | "Georgian_Sup" + | "Glagolitic" + | "Glagolitic_Sup" + | "Gothic" + | "Grantha" + | "Greek" + | "Greek_Ext" + | "Gujarati" + | "Gunjala_Gondi" + | "Gurmukhi" + | "Gurung_Khema" + | "Half_And_Full_Forms" + | "Half_Marks" + | "Hangul" + | "Hanifi_Rohingya" + | "Hanunoo" + | "Hatran" + | "Hebrew" + | "High_PU_Surrogates" + | "High_Surrogates" + | "Hiragana" + | "IDC" + | "Ideographic_Symbols" + | "Imperial_Aramaic" + | "Indic_Number_Forms" + | "Indic_Siyaq_Numbers" + | "Inscriptional_Pahlavi" + | "Inscriptional_Parthian" + | "IPA_Ext" + | "Jamo" + | "Jamo_Ext_A" + | "Jamo_Ext_B" + | "Javanese" + | "Kaithi" + | "Kaktovik_Numerals" + | "Kana_Ext_A" + | "Kana_Ext_B" + | "Kana_Sup" + | "Kanbun" + | "Kangxi" + | "Kannada" + | "Katakana" + | "Katakana_Ext" + | "Kawi" + | "Kayah_Li" + | "Kharoshthi" + | "Khitan_Small_Script" + | "Khmer" + | "Khmer_Symbols" + | "Khojki" + | "Khudawadi" + | "Kirat_Rai" + | "Lao" + | "Latin_1_Sup" + | "Latin_Ext_A" + | "Latin_Ext_Additional" + | "Latin_Ext_B" + | "Latin_Ext_C" + | "Latin_Ext_D" + | "Latin_Ext_E" + | "Latin_Ext_F" + | "Latin_Ext_G" + | "Lepcha" + | "Letterlike_Symbols" + | "Limbu" + | "Linear_A" + | "Linear_B_Ideograms" + | "Linear_B_Syllabary" + | "Lisu" + | "Lisu_Sup" + | "Low_Surrogates" + | "Lycian" + | "Lydian" + | "Mahajani" + | "Mahjong" + | "Makasar" + | "Malayalam" + | "Mandaic" + | "Manichaean" + | "Marchen" + | "Masaram_Gondi" + | "Math_Alphanum" + | "Math_Operators" + | "Mayan_Numerals" + | "Medefaidrin" + | "Meetei_Mayek" + | "Meetei_Mayek_Ext" + | "Mende_Kikakui" + | "Meroitic_Cursive" + | "Meroitic_Hieroglyphs" + | "Miao" + | "Misc_Arrows" + | "Misc_Math_Symbols_A" + | "Misc_Math_Symbols_B" + | "Misc_Pictographs" + | "Misc_Symbols" + | "Misc_Technical" + | "Modi" + | "Modifier_Letters" + | "Modifier_Tone_Letters" + | "Mongolian" + | "Mongolian_Sup" + | "Mro" + | "Multani" + | "Music" + | "Myanmar" + | "Myanmar_Ext_A" + | "Myanmar_Ext_B" + | "Myanmar_Ext_C" + | "Nabataean" + | "Nag_Mundari" + | "Nandinagari" + | "NB" + | "New_Tai_Lue" + | "Newa" + | "NKo" + | "Number_Forms" + | "Nushu" + | "Nyiakeng_Puachue_Hmong" + | "OCR" + | "Ogham" + | "Ol_Chiki" + | "Ol_Onal" + | "Old_Hungarian" + | "Old_Italic" + | "Old_North_Arabian" + | "Old_Permic" + | "Old_Persian" + | "Old_Sogdian" + | "Old_South_Arabian" + | "Old_Turkic" + | "Old_Uyghur" + | "Oriya" + | "Ornamental_Dingbats" + | "Osage" + | "Osmanya" + | "Ottoman_Siyaq_Numbers" + | "Pahawh_Hmong" + | "Palmyrene" + | "Pau_Cin_Hau" + | "Phags_Pa" + | "Phaistos" + | "Phoenician" + | "Phonetic_Ext" + | "Phonetic_Ext_Sup" + | "Playing_Cards" + | "Psalter_Pahlavi" + | "PUA" + | "Punctuation" + | "Rejang" + | "Rumi" + | "Runic" + | "Samaritan" + | "Saurashtra" + | "Sharada" + | "Shavian" + | "Shorthand_Format_Controls" + | "Siddham" + | "Sinhala" + | "Sinhala_Archaic_Numbers" + | "Small_Forms" + | "Small_Kana_Ext" + | "Sogdian" + | "Sora_Sompeng" + | "Soyombo" + | "Specials" + | "Sundanese" + | "Sundanese_Sup" + | "Sunuwar" + | "Sup_Arrows_A" + | "Sup_Arrows_B" + | "Sup_Arrows_C" + | "Sup_Math_Operators" + | "Sup_PUA_A" + | "Sup_PUA_B" + | "Sup_Punctuation" + | "Sup_Symbols_And_Pictographs" + | "Super_And_Sub" + | "Sutton_SignWriting" + | "Syloti_Nagri" + | "Symbols_And_Pictographs_Ext_A" + | "Symbols_For_Legacy_Computing" + | "Symbols_For_Legacy_Computing_Sup" + | "Syriac" + | "Syriac_Sup" + | "Tagalog" + | "Tagbanwa" + | "Tags" + | "Tai_Le" + | "Tai_Tham" + | "Tai_Viet" + | "Tai_Xuan_Jing" + | "Takri" + | "Tamil" + | "Tamil_Sup" + | "Tangsa" + | "Tangut" + | "Tangut_Components" + | "Tangut_Sup" + | "Telugu" + | "Thaana" + | "Thai" + | "Tibetan" + | "Tifinagh" + | "Tirhuta" + | "Todhri" + | "Toto" + | "Transport_And_Map" + | "Tulu_Tigalari" + | "UCAS" + | "UCAS_Ext" + | "UCAS_Ext_A" + | "Ugaritic" + | "Vai" + | "Vedic_Ext" + | "Vertical_Forms" + | "Vithkuqi" + | "VS" + | "VS_Sup" + | "Wancho" + | "Warang_Citi" + | "Yezidi" + | "Yi_Radicals" + | "Yi_Syllables" + | "Yijing" + | "Zanabazar_Square" + | "Znamenny_Music" + }? + +

    +

    + 4.4.5 General Category +

    +

    The general category is represented by the gc attribute. +

    +

    + + [gc attribute, + 16] + + = + + code-point-attributes &= + attribute gc { "Cc" | "Cf" | "Cn" | "Co" | "Cs" + | "Ll" | "Lm" | "Lo" | "Lt" | "Lu" + | "Mc" | "Me" | "Mn" + | "Nd" | "Nl" | "No" + | "Pc" | "Pd" | "Pe" | "Pf" | "Pi" | "Po" | "Ps" + | "Sc" | "Sk" | "Sm" | "So" + | "Zl" | "Zp" | "Zs" + }? + +

    +

    + 4.4.6 Combining properties +

    +

    The combining class is represented by the ccc attribute, which holds the decimal + representation of the combining class. +

    +

    Because the set of values that this property has taken across the various versions of the UCD + is rather large, our schema does not restrict the possible values to those actually used. +

    +

    + + [ccc attribute, + 17] + + = + + code-point-attributes &= + attribute ccc { xsd:integer { minInclusive="0" maxInclusive="254" } }? + +

    +

    + 4.4.7 Bidirectionality properties +

    +

    The bidirectional class is represented by the bc attribute. +

    +

    + + [bc attribute, + 18] + + = + + code-point-attributes &= + attribute bc { "AL" | "AN" + | "B" | "BN" + | "CS" + | "EN" | "ES" | "ET" + | "FSI" + | "L" | "LRE" | "LRI" | "LRO" + | "NSM" + | "ON" + | "PDF" | "PDI" + | "R" | "RLE" | "RLI" | "RLO" + | "S" + | "WS" + }? + +

    +

    The mirrored property is represented by the Bidi_M attribute, which takes a + boolean value. +

    +

    + + [Bidi_M attribute, + 19] + + = + + code-point-attributes &= + attribute Bidi_M { boolean }? + +

    +

    The bmg attribute is the code point of a character whose glyph is typically + a mirrored image of the glyph for the current character. +

    +

    + + [bmg attribute, + 20] + + = + + code-point-attributes &= + attribute bmg { "" | single-code-point }? + +

    +

    Note that we do not express the “Best Fit” element recorded in BidiMirroring.txt. + For one thing, it is not meant to be machine readable. More importantly, the idea underlying the + mirrored glyph is delicate to use, since it makes assumptions about the design of the fonts, and + the best fit goes even farther. +

    +

    The Bidi_Control property is represented by the Bidi_C attribute. +

    +

    + + [Bidi_C attribute, + 21] + + = + + code-point-attributes &= + attribute Bidi_C { boolean }? + +

    +

    The bidi paired bracket type and bidi paired bracket properties are represented by the + bpt and bpb attributes respectively. +

    +

    + + [bpt attribute, + 22] + + = + + code-point-attributes &= + attribute bpt { "o" | "c" | "n" }? + +

    +

    + + [bpb attribute, + 23] + + = + + code-point-attributes &= + attribute bpb { "#" | single-code-point }? + +

    +

    + 4.4.8 Decomposition properties +

    +

    The decomposition type and decomposition mapping properties are represented by the dt + and dm attributes. +

    +

    Most characters have a decomposition mapping to themselves. This is very similar to the + situation we encountered with names, and we adopted a similar convention: if the value of a + decomposition mapping is the character itself, we use the attribute value # (U+0023 # + NUMBER SIGN) as a shorthand notation; this enables those attributes to be captured in groups. +

    +

    + + [decomposition properties, + 24] + + = + + code-point-attributes &= + attribute dt { "can" | "com" | "enc" | "fin" | "font" | "fra" + | "init" | "iso" | "med" | "nar" | "nb" | "sml" + | "sqr" | "sub" | "sup" | "vert" | "wide" | "none" + }? + + code-point-attributes &= + attribute dm { "#" | zero-or-more-code-points }? + +

    +

    The properties Composition_Exclusion and Full_Composition_Exclusion are + represented by the attributes CE and Comp_Ex: +

    +

    + + [composition properties, + 25] + + = + + code-point-attributes &= + attribute CE { boolean }? + + code-point-attributes &= + attribute Comp_Ex { boolean }? + +

    +

    The properties NFC_Quick_Check, NFD_Quick_Check, + NFKC_Quick_Check, NFKD_Quick_Check, Expands_On_NFC, + Expands_On_NFD, Expands_On_NFKC, Expands_On_NKFD, + FC_NFKC_Closure have corresponding attributes. +

    +

    + + [quick check properties, + 26] + + = + + code-point-attributes &= + attribute NFC_QC { "Y" | "N" | "M" }? + + code-point-attributes &= + attribute NFD_QC { "Y" | "N" }? + + code-point-attributes &= + attribute NFKC_QC { "Y" | "N" | "M" }? + + code-point-attributes &= + attribute NFKD_QC { "Y" | "N" }? + + + code-point-attributes &= + attribute XO_NFC { boolean }? + + code-point-attributes &= + attribute XO_NFD { boolean }? + + code-point-attributes &= + attribute XO_NFKC { boolean }? + + code-point-attributes &= + attribute XO_NFKD { boolean }? + + + code-point-attributes &= + attribute FC_NFKC { "#" | one-or-more-code-points }? + +

    +

    + 4.4.9 Numeric Properties +

    +

    The numeric type is represented by the nt attribute. +

    +

    The numeric value is represented by the nv attribute, represented as a whole + number or a fraction. +

    +

    + + [numeric properties, + 27] + + = + + code-point-attributes &= + attribute nt { "De" | "Di" | "Nu" | "None" }? + + code-point-attributes &= + attribute nv { "NaN" | xsd:string { pattern="-?[0-9]+(/[0-9]+)?" } }? + +

    +

    + 4.4.10 Joining properties +

    +

    The joining class of a character is represented by the jt attribute. +

    +

    The jg attribute is the joining group of the character. +

    +

    + + [joining properties, + 28] + + = + + code-point-attributes &= + attribute jt { "C" | "D" | "L" | "R" | "T" | "U" }? + + code-point-attributes &= + attribute jg { "African_Feh" | "African_Noon" | "African_Qaf" + | "Ain" | "Alaph" | "Alef" + | "Beh" | "Beth" | "Burushaski_Yeh_Barree" + | "Dal" | "Dalath_Rish" + | "E" + | "Farsi_Yeh" | "Fe" | "Feh" | "Final_Semkath" + | "Gaf" | "Gamal" + | "Hah" | "Hanifi_Rohingya_Kinna_Ya" + | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" + | "Heth" + | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Lam" | "Lamadh" + | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" + | "Malayalam_Llla" | "Malayalam_Nga" + | "Malayalam_Nna" | "Malayalam_Nnna" + | "Malayalam_Nya" | "Malayalam_Ra" | "Malayalam_Ssa" + | "Malayalam_Tta" | "Manichaean_Aleph" + | "Manichaean_Ayin" | "Manichaean_Beth" + | "Manichaean_Daleth" | "Manichaean_Dhamedh" + | "Manichaean_Five" | "Manichaean_Gimel" + | "Manichaean_Heth" | "Manichaean_Hundred" + | "Manichaean_Kaph" | "Manichaean_Lamedh" + | "Manichaean_Mem" | "Manichaean_Nun" + | "Manichaean_One" | "Manichaean_Pe" + | "Manichaean_Qoph" | "Manichaean_Resh" + | "Manichaean_Sadhe" | "Manichaean_Samekh" + | "Manichaean_Taw" | "Manichaean_Ten" + | "Manichaean_Teth" | "Manichaean_Thamedh" + | "Manichaean_Twenty" | "Manichaean_Waw" + | "Manichaean_Yodh" | "Manichaean_Zayin" | "Meem" + | "Mim" + | "No_Joining_Group" | "Noon" | "Nun" | "Nya" + | "Pe" + | "Qaf" | "Qaph" + | "Reh" | "Reversed_Pe" | "Rohingya_Yeh" + | "Sad" | "Sadhe" | "Seen" | "Semkath" | "Shin" + | "Straight_Waw" | "Swash_Kaf" | "Syriac_Waw" + | "Tah" | "Taw" | "Teh_Marbuta" | "Teh_Marbuta_Goal" + | "Teth" | "Thin_Yeh" + | "Vertical_Tail" + | "Waw" + | "Yeh" | "Yeh_Barree" | "Yeh_With_Tail" | "Yudh" + | "Yudh_He" + | "Zain" | "Zhain" + }? + +

    +

    The Join_Control property is represented by the Join_C attribute. +

    +

    + + [joining properties, + 29] + + = + + code-point-attributes &= + attribute Join_C { boolean }? + +

    +

    + 4.4.11 Linebreak properties +

    +

    The Line_Break property is represented by the lb attribute. +

    +

    + + [lb attribute, + 30] + + = + + code-point-attributes &= + attribute lb { "AI" | "AK" | "AL" | "AP" | "AS" + | "B2" | "BA" | "BB" | "BK" + | "CB" | "CJ" | "CL" | "CM" | "CP" | "CR" + | "EB" | "EM" | "EX" + | "GL" + | "H2" | "H3" | "HL" | "HY" + | "ID" | "IN" | "IS" + | "JL" | "JT" | "JV" + | "LF" + | "NL" | "NS" | "NU" + | "OP" + | "PO" | "PR" + | "QU" + | "RI" + | "SA" | "SG" | "SP" | "SY" + | "VF" | "VI" + | "WJ" + | "XX" + | "ZW" | "ZWJ" + }? + +

    +

    + 4.4.12 East Asian Width property +

    +

    The East Asian width property is represented by the ea attribute. +

    +

    + + [ea attribute, + 31] + + = + + code-point-attributes &= + attribute ea { "A" | "F" | "H" | "N" | "Na" | "W" }? + +

    +

    + 4.4.13 Case properties +

    +

    The Uppercase, Lowercase, Other_Uppercase and + Other_Lowercase properties are represented by corresponding attributes. +

    +

    + + [casing properties, + 32] + + = + + code-point-attributes &= + attribute Upper { boolean }? + + code-point-attributes &= + attribute Lower { boolean }? + + code-point-attributes &= + attribute OUpper { boolean }? + + code-point-attributes &= + attribute OLower { boolean }? + +

    +

    Most characters have a case mapping and case folding properties that simply map or fold to + themselves. This is very similar to the situation we encountered with names, and we adopted a + similar convention: if the value of a case mapping or case folding property is the character + itself, we use the attribute value # (U+0023 # NUMBER SIGN) as a shorthand notation; this + enables those attributes to be captured in groups. +

    +

    The simple case mappings are recorded in the suc, slc, stc + attributes. +

    +

    + + [casing properties, + 33] + + = + + code-point-attributes &= + attribute suc { "#" | single-code-point }? + + code-point-attributes &= + attribute slc { "#" | single-code-point }? + + code-point-attributes &= + attribute stc { "#" | single-code-point }? + +

    +

    The non-simple casing are recorded in the uc, lc and tc + attributes. +

    +

    + + [casing properties, + 34] + + = + + code-point-attributes &= + attribute uc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute lc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute tc { "#" | one-or-more-code-points }? + +

    +

    The Simple_Case_Folding and Case_Folding properties are recorded in the + scf and cf attributes respectively. +

    +

    + + [casing properties, + 35] + + = + + code-point-attributes &= + attribute scf { "#" | single-code-point }? + + code-point-attributes &= + attribute cf { "#" | one-or-more-code-points }? + +

    +

    The Case_Ignorable, Cased, Changes_When_Casefolded, + Changes_When_Casemapped, Changes_When_Lowercased, + Changes_When_NFKC_Casefolded, Changes_When_Titlecased, + Changes_When_Uppercased, NFKC_Casefold, and + NFKC_Simple_Casefold properties are recorded in these attributes: +

    +

    + + [casing properties, + 36] + + = + + code-point-attributes &= + attribute CI { boolean }? + + code-point-attributes &= + attribute Cased { boolean }? + + code-point-attributes &= + attribute CWCF { boolean }? + + code-point-attributes &= + attribute CWCM { boolean }? + + code-point-attributes &= + attribute CWL { boolean }? + + code-point-attributes &= + attribute CWKCF { boolean }? + + code-point-attributes &= + attribute CWT { boolean }? + + code-point-attributes &= + attribute CWU { boolean }? + + code-point-attributes &= + attribute NFKC_CF { "#" | zero-or-more-code-points }? + + code-point-attributes &= + attribute NFKC_SCF { "#" | zero-or-more-code-points }? + +

    +

    Note that the UCD records more information about case folding than is expressed in the + properties, specifically the entries in CaseFolding.txt with status T. +

    +

    + 4.4.14 Script properties +

    +

    The script and script extension properties are represented by the sc and + scx attributes respectively. +

    +

    + + [script properties, + 37] + + = + + script = "Adlm" | "Aghb" | "Ahom" | "Arab" | "Armi" | "Armn" + | "Avst" + | "Bali" | "Bamu" | "Bass" | "Batk" | "Beng" | "Bhks" + | "Bopo" | "Brah" | "Brai" | "Bugi" | "Buhd" + | "Cakm" | "Cans" | "Cari" | "Cham" | "Cher" | "Chrs" + | "Copt" | "Cpmn" | "Cprt" | "Cyrl" + | "Deva" | "Diak" | "Dogr" | "Dsrt" | "Dupl" + | "Egyp" | "Elba" | "Elym" | "Ethi" + | "Gara" | "Geor" | "Glag" | "Gong" | "Gonm" | "Goth" + | "Gran" | "Grek" | "Gujr" | "Gukh" | "Guru" + | "Hang" | "Hani" | "Hano" | "Hatr" | "Hebr" | "Hira" + | "Hluw" | "Hmng" | "Hmnp" | "Hrkt" | "Hung" + | "Ital" + | "Java" + | "Kali" | "Kana" | "Kawi" | "Khar" | "Khmr" | "Khoj" + | "Kits" | "Knda" | "Krai" | "Kthi" + | "Lana" | "Laoo" | "Latn" | "Lepc" | "Limb" | "Lina" + | "Linb" | "Lisu" | "Lyci" | "Lydi" + | "Mahj" | "Maka" | "Mand" | "Mani" | "Marc" | "Medf" + | "Mend" | "Merc" | "Mero" | "Mlym" | "Modi" | "Mong" + | "Mroo" | "Mtei" | "Mult" | "Mymr" + | "Nagm" | "Nand" | "Narb" | "Nbat" | "Newa" | "Nkoo" + | "Nshu" + | "Ogam" | "Olck" | "Onao" | "Orkh" | "Orya" | "Osge" + | "Osma" | "Ougr" + | "Palm" | "Pauc" | "Perm" | "Phag" | "Phli" | "Phlp" + | "Phnx" | "Plrd" | "Prti" + | "Rjng" | "Rohg" | "Runr" + | "Samr" | "Sarb" | "Saur" | "Sgnw" | "Shaw" | "Shrd" + | "Sidd" | "Sind" | "Sinh" | "Sogd" | "Sogo" | "Sora" + | "Soyo" | "Sund" | "Sunu" | "Sylo" | "Syrc" + | "Tagb" | "Takr" | "Tale" | "Talu" | "Taml" | "Tang" + | "Tavt" | "Telu" | "Tfng" | "Tglg" | "Thaa" | "Thai" + | "Tibt" | "Tirh" | "Tnsa" | "Todr" | "Toto" | "Tutg" + | "Ugar" + | "Vaii" | "Vith" + | "Wara" | "Wcho" + | "Xpeo" | "Xsux" + | "Yezi" | "Yiii" + | "Zanb" | "Zinh" | "Zyyy" | "Zzzz" + + code-point-attributes &= + attribute sc { script }? + + code-point-attributes &= + attribute scx { list { script + } }? + +

    +

    + 4.4.15 ISO Comment properties +

    +

    The ISO 10646 comment field is represented by the isc attribute. +

    +

    + + [isc attribute, + 38] + + = + + code-point-attributes &= + attribute isc { text }? + +

    +

    + 4.4.16 Hangul properties +

    +

    The property Hangul_Syllable_Type is represented by the hst attribute. +

    +

    + + [hst attribute, + 39] + + = + + code-point-attributes &= + attribute hst { "L" | "LV" | "LVT" | "NA" | "T" | "V" }? + +

    +

    The property Jamo_Short_Name is represented by the JSN attribute: +

    +

    + + [JSN attribute, + 40] + + = + + code-point-attributes &= + attribute JSN { xsd:string { pattern="[A-Z]{0,3}" } }? + +

    +

    + 4.4.17 Indic properties +

    +

    The property Indic_Syllabic_Category is represented by the InSC + attribute. +

    +

    + + [InSC attribute, + 41] + + = + + code-point-attributes &= + attribute InSC { "Avagraha" + | "Bindu" + | "Brahmi_Joining_Number" + | "Cantillation_Mark" + | "Consonant" + | "Consonant_Dead" + | "Consonant_Final" + | "Consonant_Head_Letter" + | "Consonant_Initial_Postfixed" + | "Consonant_Killer" + | "Consonant_Medial" + | "Consonant_Placeholder" + | "Consonant_Preceding_Repha" + | "Consonant_Prefixed" + | "Consonant_Subjoined" + | "Consonant_Succeeding_Repha" + | "Consonant_With_Stacker" + | "Gemination_Mark" + | "Invisible_Stacker" + | "Joiner" + | "Modifying_Letter" + | "Non_Joiner" + | "Nukta" + | "Number" + | "Number_Joiner" + | "Other" + | "Pure_Killer" + | "Register_Shifter" + | "Reordering_Killer" + | "Syllable_Modifier" + | "Tone_Letter" + | "Tone_Mark" + | "Virama" + | "Visarga" + | "Vowel" + | "Vowel_Dependent" + | "Vowel_Independent" + }? + +

    +

    The property Indic_Positional_Category is represented by the InPC + attribute: +

    +

    + + [InPC attribute, + 42] + + = + + code-point-attributes &= + attribute InPC { "Bottom" + | "Bottom_And_Left" + | "Bottom_And_Right" + | "Left" + | "Left_And_Right" + | "NA" + | "Overstruck" + | "Right" + | "Top" + | "Top_And_Bottom" + | "Top_And_Bottom_And_Left" + | "Top_And_Bottom_And_Right" + | "Top_And_Left" + | "Top_And_Left_And_Right" + | "Top_And_Right" + | "Visual_Order_Left" + }? + +

    +

    The property Indic_Conjunct_Break is represented by the InCB attribute: +

    +

    + + [InCB attribute, + 43] + + = + + code-point-attributes &= + attribute InCB { "Consonant" + | "Extend" + | "Linker" + | "None" + }? + +

    +

    + 4.4.18 Identifier and Pattern and programming language properties +

    +

    The properties ID_Start, Other_ID_Start, XID_Start, + ID_Continue, Other_ID_Continue, XID_Continue, + ID_Compat_Math_Start, and ID_Compat_Math_Continue are represented by + corresponding attributes: +

    +

    + + [identifier properties, + 44] + + = + + code-point-attributes &= + attribute IDS { boolean }? + + code-point-attributes &= + attribute OIDS { boolean }? + + code-point-attributes &= + attribute XIDS { boolean }? + + code-point-attributes &= + attribute IDC { boolean }? + + code-point-attributes &= + attribute OIDC { boolean }? + + code-point-attributes &= + attribute XIDC { boolean }? + + code-point-attributes &= + attribute ID_Compat_Math_Start { boolean }? + + code-point-attributes &= + attribute ID_Compat_Math_Continue { boolean }? + +

    +

    The properties Pattern_Syntax and Pattern_White_Space are represented + by corresponding attributes: +

    +

    + + [pattern properties, + 45] + + = + + code-point-attributes &= + attribute Pat_Syn { boolean }? + + code-point-attributes &= + attribute Pat_WS { boolean }? + +

    +

    + 4.4.19 Properties related to function and graphic characteristics +

    +

    The properties Dash, Hyphen, Quotation_Mark, + Terminal_Punctuation, Sentence_Terminal, Diacritic, + Extender, Soft_Dotted, Alphabetic, + Other_Alphabetic, Math, Other_Math, Hex_Digit, + ASCII_Hex_Digit, Default_Ignorable_Code_Point, + Other_Default_Ignorable_Code_Point, Logical_Order_Exception, + Prepended_Concatenation_Mark, Modifier_Combining_Mark, + White_Space, Vertical_Orientation, and Regional_Indicator + describe the function or graphic characteristic of a character, and have each a corresponding + attribute. +

    +

    + + [properties related to function and graphic characteristics, + 46] + + = + + code-point-attributes &= + attribute Dash { boolean }? + + code-point-attributes &= + attribute Hyphen { boolean }? + + code-point-attributes &= + attribute QMark { boolean }? + + code-point-attributes &= + attribute Term { boolean }? + + code-point-attributes &= + attribute STerm { boolean }? + + code-point-attributes &= + attribute Dia { boolean }? + + code-point-attributes &= + attribute Ext { boolean }? + + code-point-attributes &= + attribute SD { boolean }? + + code-point-attributes &= + attribute Alpha { boolean }? + + code-point-attributes &= + attribute OAlpha { boolean }? + + code-point-attributes &= + attribute Math { boolean }? + + code-point-attributes &= + attribute OMath { boolean }? + + code-point-attributes &= + attribute Hex { boolean }? + + code-point-attributes &= + attribute AHex { boolean }? + + code-point-attributes &= + attribute DI { boolean }? + + code-point-attributes &= + attribute ODI { boolean }? + + code-point-attributes &= + attribute LOE { boolean }? + + code-point-attributes &= + attribute PCM { boolean }? + + code-point-attributes &= + attribute MCM { boolean }? + + code-point-attributes &= + attribute WSpace { boolean }? + + code-point-attributes &= + attribute vo { "R" | "Tr" | "Tu" | "U" }? + + code-point-attributes &= + attribute RI { boolean }? + +

    +

    + 4.4.20 Properties related to boundaries +

    +

    The properties Grapheme_Base, Grapheme_Extend, + Other_Grapheme_Extend, Grapheme_Link, + Grapheme_Cluster_Break, Word_Break, and Sentence_Break each + have a corresponding attribute: +

    +

    + + [properties related to boundaries, + 47] + + = + + code-point-attributes &= + attribute Gr_Base { boolean }? + + code-point-attributes &= + attribute Gr_Ext { boolean }? + + code-point-attributes &= + attribute OGr_Ext { boolean }? + + code-point-attributes &= + attribute Gr_Link { boolean }? + + code-point-attributes &= + attribute GCB { "CN" | "CR" + | "EB" | "EBG" | "EM" | "EX" + | "GAZ" + | "L" | "LF" | "LV" | "LVT" + | "PP" + | "RI" + | "SM" + | "T" + | "V" + | "XX" + | "ZWJ" + }? + + code-point-attributes &= + attribute WB { "CR" + | "DQ" + | "EB" | "EBG" | "EM" | "EX" | "Extend" + | "FO" + | "GAZ" + | "HL" + | "KA" + | "LE" | "LF" + | "MB" | "ML" | "MN" + | "NL" | "NU" + | "RI" + | "SQ" + | "WSegSpace" + | "XX" + | "ZWJ" + }? + + code-point-attributes &= + attribute SB { "AT" + | "CL" | "CR" + | "EX" + | "FO" + | "LE" | "LF" | "LO" + | "NU" + | "SC" | "SE" | "SP" | "ST" + | "UP" + | "XX" + }? + +

    +

    + 4.4.21 Properties related to ideographs +

    +

    The properties Ideographic, Unified_Ideograph, + Equivalent_Unified_Ideograph, IDS_Binary_Operator, + IDS_Trinary_Operator, IDS_Unary_Operator, and Radical have + corresponding attributes: +

    +

    + + [properties related to ideographs, + 48] + + = + + code-point-attributes &= + attribute Ideo { boolean }? + + code-point-attributes &= + attribute UIdeo { boolean }? + + code-point-attributes &= + attribute EqUIdeo { single-code-point }? + + code-point-attributes &= + attribute IDSB { boolean }? + + code-point-attributes &= + attribute IDST { boolean }? + + code-point-attributes &= + attribute IDSU { boolean }? + + code-point-attributes &= + attribute Radical { boolean }? + +

    +

    + 4.4.22 Miscellaneous properties +

    +

    The properties Deprecated, Variation_Selector, and + Noncharacter_Code_Point have corresponding attributes: +

    +

    + + [miscellaneous properties, + 49] + + = + + code-point-attributes &= + attribute Dep { boolean }? + + code-point-attributes &= + attribute VS { boolean }? + + code-point-attributes &= + attribute NChar { boolean }? + +

    +

    + 4.4.23 Unihan properties +

    +

    The Unihan properties (from the Unihan database) are represented as attributes. +

    +

    + + [Unihan properties, + 50] + + = + + code-point-attributes &= attribute kAccountingNumeric + { xsd:string { pattern="[0-9]+" } }? + + code-point-attributes &= attribute kAlternateTotalStrokes + { list { xsd:string { pattern="(\d+:[BHJKMPSUV]+)|-" }+ } }? + + code-point-attributes &= attribute kBigFive + { xsd:string { pattern="[0-9A-F]{4}'?" } }? + + code-point-attributes &= attribute kCangjie + { xsd:string { pattern="[A-Z]+" } }? + + code-point-attributes &= attribute kCantonese + { list { xsd:string { pattern="[a-z]{1,6}[1-6]" }+ } }? + + code-point-attributes &= attribute kCCCII + { list { xsd:string { pattern="[0-9A-F]{6}" }+ } }? + + code-point-attributes &= attribute kCheungBauer + { list { xsd:string { pattern="[0-9]{3}/[0-9]{2};[A-Z]*;[a-z1-6\[\]/,]+" }+ } }? + + code-point-attributes &= attribute kCheungBauerIndex + { list { xsd:string { pattern="[0-9]{3}\.[01][0-9]" }+ } }? + + code-point-attributes &= attribute kCihaiT + { list { xsd:string { pattern="[1-9][0-9]{0,3}\.[0-9]{3}" }+ } }? + + code-point-attributes &= attribute kCNS1986 + { xsd:string { pattern="[12E]-[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCNS1992 + { xsd:string { pattern="[1-9]-[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCompatibilityVariant + { "" | xsd:string { pattern="U\+[23]?[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCowles + { list { xsd:string { pattern="[0-9]{1,4}(\.[0-9]{1,2})?" }+ } }? + + code-point-attributes &= attribute kDaeJaweon + { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" } }? + + code-point-attributes &= attribute kDefinition + { xsd:string { pattern='[^\t"]+' } }? + + code-point-attributes &= attribute kEACC + { xsd:string { pattern="[0-9A-F]{6}" } }? + + code-point-attributes &= attribute kFanqie + { list { xsd:string { pattern="[\x{3400}-\x{4DBF}\x{4E00}-\x{9FFF}\x{20000}-\x{2A6DF}]{2}" }+ } }? + + code-point-attributes &= attribute kFenn + { list { xsd:string { pattern="[0-9]+a?[A-KP*]" }+ } }? + + code-point-attributes &= attribute kFennIndex + { list { xsd:string { pattern="[0-9][0-9]{0,2}\.[01][0-9]" }+ } }? + + code-point-attributes &= attribute kFourCornerCode + { list { xsd:string { pattern="[0-9]{4}(\.[0-9])?" }+ } }? + + code-point-attributes &= attribute kGB0 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB1 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB3 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB5 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB7 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB8 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGradeLevel + { xsd:string { pattern="[1-6]" } }? + + code-point-attributes &= attribute kGSR + { list { xsd:string { pattern="[0-9]{4}[a-vx-z]'?" }+ } }? + + code-point-attributes &= attribute kHangul + { list { xsd:string { pattern="[\x{1100}-\x{1112}][\x{1161}-\x{1175}][\x{11A8}-\x{11C2}]?:[01ENX]{1,3}" }+ } }? + + code-point-attributes &= attribute kHanYu + { list { xsd:string { pattern="[1-8][0-9]{4}\.[0-3][0-9][0-3]" }+ } }? + + code-point-attributes &= attribute kHanyuPinlu + { list { xsd:string { pattern="[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+\([0-9]+\)" }+ } }? + + code-point-attributes &= attribute kHanyuPinyin + { list { xsd:string { pattern="(\d{5}\.\d{2}0,)*\d{5}\.\d{2}0:([a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+,)*[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kHDZRadBreak + { xsd:string { pattern="[\x{2F00}-\x{2FD5}]\[U\+2F[0-9A-D][0-9A-F]\]:[1-8][0-9]{4}\.[0-3][0-9]0" } }? + + code-point-attributes &= attribute kHKGlyph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kIBMJapan + { list { xsd:string { pattern="F[ABC][0-9A-F]{2}" }+ } }? + + code-point-attributes &= attribute kIICore + { list { xsd:string { pattern="[ABC][GHJKMPT]{1,7}" }+ } }? + + code-point-attributes &= attribute kIRG_GSource + { "" | xsd:string { pattern="G[013578EKS]-[0-9A-F]{4}" } + | xsd:string { pattern="G4K(-\d{5})?" } + | xsd:string { pattern="G(DZ|GH|RM|WZ|XC|XH|ZH)-\d{4}\.\d{2}" } + | xsd:string { pattern="G(BK|CH|CY|HC)(-\d{4}\.\d{2})?" } + | xsd:string { pattern="GKX-\d{4}\.\d{2,3}" } + | xsd:string { pattern="G(HZ|HZR)-\d{5}\.\d{2}" } + | xsd:string { pattern="G(CE|FC|IDC23|OCD|XHZ)-\d{3}" } + | xsd:string { pattern="G(H|HF|LGYJ|PGLG|T)-\d{4}" } + | xsd:string { pattern="G(CYY|DM|JZ|KJ|XM|ZFY|ZJW|ZYS)-\d{5}" } + | xsd:string { pattern="G(FZ|IDC)-[0-9A-F]{4}" } + | xsd:string { pattern="GGFZ-\d{6}" } + | xsd:string { pattern="G(LK|Z)-\d{7}" } + | xsd:string { pattern="GU-[023][0-9A-F]{4}" } + | xsd:string { pattern="GZA-[123467]\d{5}" } + }? + + code-point-attributes &= attribute kIRG_HSource + { "" | xsd:string { pattern="H-[0-9A-F]{4}" } + | xsd:string { pattern="H(B[012])-[0-9A-F]{4}" } + | xsd:string { pattern="HD-[23]?[0-9A-F]{4}" } + | xsd:string { pattern="HU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_JSource + { "" | xsd:string { pattern="J[014]-[0-9A-F]{4}" } + | xsd:string { pattern="J3A?-[0-9A-F]{4}" } + | xsd:string { pattern="J13A?-[0-9A-F]{4}" } + | xsd:string { pattern="J14-[0-9A-F]{4}" } + | xsd:string { pattern="JA[34]?-[0-9A-F]{4}" } + | xsd:string { pattern="JARIB-[0-9A-F]{4}" } + | xsd:string { pattern="JH-(JT[ABC][0-9A-F]{3}S?|IB\d{4}|\d{6})" } + | xsd:string { pattern="JK-\d{5}" } + | xsd:string { pattern="JMJ-\d{6}" } + }? + + code-point-attributes &= attribute kIRG_KPSource + { "" | xsd:string { pattern="KP([01]-[0-9A-F]{4}|U-[023][0-9A-F]{4})" } }? + + code-point-attributes &= attribute kIRG_KSource + { "" | xsd:string { pattern="K[0-6]-[0-9A-F]{4}" } + | xsd:string { pattern="KC-\d{5}" } + | xsd:string { pattern="KU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_MSource + { "" | xsd:string { pattern="MA-[0-9A-F]{4}" } + | xsd:string { pattern="MB[12]-[0-9A-F]{4}" } + | xsd:string { pattern="MC-\d{5}" } + | xsd:string { pattern="MDH?-[23]?[0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_SSource + { "" | xsd:string { pattern="SAT-\d{5}" } }? + + code-point-attributes &= attribute kIRG_TSource + { "" | xsd:string { pattern="T([1-7A-F]|1[1-3])-[0-9A-F]{4}" } + | xsd:string { pattern="TU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_UKSource + { "" | xsd:string { pattern="UK-\d{5}" } }? + + code-point-attributes &= attribute kIRG_USource + { "" | xsd:string { pattern="UTC-\d{5}" } }? + + code-point-attributes &= attribute kIRG_VSource + { "" | xsd:string { pattern="V[0-4]-[0-9A-F]{4}" } + | xsd:string { pattern="VN-[023F][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRGDaeJaweon + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" }+ } }? + + code-point-attributes &= attribute kIRGHanyuDaZidian + { list { xsd:string { pattern="[1-8][0-9]{4}\.[0-3][0-9][01]" }+ } }? + + code-point-attributes &= attribute kIRGKangXi + { list { xsd:string { pattern="[01][0-9]{3}\.[0-7][0-9][01]" }+ } }? + + code-point-attributes &= attribute kJa + { list { xsd:string { pattern="[0-9A-F]{4}S?" }+ } }? + + code-point-attributes &= attribute kJapanese + { list { xsd:string { pattern="[\x{3041}-\x{3096}\x{3099}\x{309A}\x{30A1}-\x{30FA}\x{30FC}]+" }+ } }? + + code-point-attributes &= attribute kJapaneseKun + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kJapaneseOn + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kJinmeiyoKanji + { list { xsd:string { pattern="(20[0-9]{2})(:U\+[23]?[0-9A-F]{4})?" }+ } }? + + code-point-attributes &= attribute kJis0 + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kJis1 + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kJIS0213 + { list { xsd:string { pattern="[12],[0-9]{2},[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kJoyoKanji + { list { xsd:string { pattern="(20[0-9]{2})|(U\+[23]?[0-9A-F]{4})" }+ } }? + + code-point-attributes &= attribute kKangXi + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" }+ } }? + + code-point-attributes &= attribute kKarlgren + { list { xsd:string { pattern="[1-9][0-9]{0,3}[A*]?" }+ } }? + + code-point-attributes &= attribute kKorean + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kKoreanEducationHanja + { list { xsd:string { pattern="20[0-9]{2}" }+ } }? + + code-point-attributes &= attribute kKoreanName + { list { xsd:string { pattern="20[0-9]{2}" }+ } }? + + code-point-attributes &= attribute kLau + { list { xsd:string { pattern="[1-9][0-9]{0,3}" }+ } }? + + code-point-attributes &= attribute kMainlandTelegraph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kMandarin + { list { xsd:string { pattern="[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kMatthews + { list { xsd:string { pattern="[1-9][0-9]{0,3}(a|\.5)?" }+ } }? + + code-point-attributes &= attribute kMeyerWempe + { list { xsd:string { pattern="[1-9][0-9]{0,3}[a-t*]?" }+ } }? + + code-point-attributes &= attribute kMojiJoho + { list { xsd:string { pattern="MJ\d{6}(:(FE0[01]|E01[01][0-9A-F]))?" }+ } }? + + code-point-attributes &= attribute kMorohashi + { list { xsd:string { pattern="(\d{5}'{0,2}|H\d{3})(:(FE0[01]|E010[0-9A-F]))?" }+ } }? + + code-point-attributes &= attribute kNelson + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kOtherNumeric + { list { xsd:string { pattern="[0-9]+" }+ } }? + + code-point-attributes &= attribute kPhonetic + { list { xsd:string { pattern="[1-9][0-9]{0,3}[A-D]?\*?" }+ } }? + + code-point-attributes &= attribute kPrimaryNumeric + { list { xsd:string { pattern="[0-9]+" }+ } }? + + code-point-attributes &= attribute kPseudoGB1 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kRSAdobe_Japan1_6 + { list { xsd:string { pattern="[CV]\+[0-9]{1,5}\+[1-9][0-9]{0,2}\.[1-9][0-9]?\.[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kRSUnicode + { list { xsd:string { pattern="[1-9][0-9]{0,2}'{0,3}\.-?[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kSBGY + { list { xsd:string { pattern="[0-9]{3}\.[0-7][0-9]" }+ } }? + + code-point-attributes &= attribute kSemanticVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZFJ]+)?(,[ks][A-Za-z0-9_]+(:[TBZFJ]+)?)*)?" }+ } }? + + code-point-attributes &= attribute kSimplifiedVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kSMSZD2003Index + { list { xsd:string { pattern="\d{1,3}\.\d{2}" }+ } }? + + code-point-attributes &= attribute kSMSZD2003Readings + { list { xsd:string { pattern="[a-z\x{300}\x{301}\x{302}\x{304}\x{308}\x{30C}]+(,[a-z\x{300}\x{301}\x{302}\x{304}\x{308}\x{30C}]+)*\x{7CB5}[a-z]+[1-6]([a-z]+[1-6])?(,[a-z]+[1-6]([a-z]+[1-6])?)*" }+ } }? + + code-point-attributes &= attribute kSpecializedSemanticVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZFJ]+)?(,[ks][A-Za-z0-9_]+(:[TBZFJ]+)?)*)?" }+ } }? + + code-point-attributes &= attribute kSpoofingVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kStrange + { list { ( xsd:string { pattern="[ACU]" } + | xsd:string { pattern="B:U\+31[0-2AB][0-9A-F]" } + | xsd:string { pattern="[FMOR](:U\+[23]?[0-9A-F]{4})?" } + | xsd:string { pattern="H:U\+31[3-8][0-9A-F]" } + | xsd:string { pattern="I(:U\+[23]?[0-9A-F]{4})*" } + | xsd:string { pattern="K(:U\+30[A-F][0-9A-F])+" } + | xsd:string { pattern="S:[4-9][0-9]" } + )+}}? + + code-point-attributes &= attribute kTaiwanTelegraph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kTang + { list { xsd:string { pattern="\*?[A-Za-z()\x{E6}\x{251}\x{259}\x{25B}\x{300}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kTGH + { list { xsd:string { pattern="20[0-9]{2}:[1-9][0-9]{0,3}" }+ } }? + + code-point-attributes &= attribute kTGHZ2013 + { list { xsd:string { pattern="[0-9]{3}\.[0-9]{3}(,[0-9]{3}\.[0-9]{3})*:[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kTotalStrokes + { list { xsd:string { pattern="[1-9][0-9]{0,2}" }+ } }? + + code-point-attributes &= attribute kTraditionalVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kUnihanCore2020 + { xsd:string { pattern="[GHJKMPT]{1,7}" } }? + + code-point-attributes &= attribute kVietnamese + { list { xsd:string { pattern="[A-Za-z\x{110}\x{111}\x{300}-\x{303}\x{306}\x{309}\x{31B}\x{323}]+" }+ } }? + + code-point-attributes &= attribute kVietnameseNumeric + { list { xsd:string { pattern="\d+" }+ } }? + + code-point-attributes &= attribute kXerox + { list { xsd:string { pattern="[0-9]{3}:[0-9]{3}" }+ } }? + + code-point-attributes &= attribute kXHC1983 + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{3}\*?(,[0-9]{4}\.[0-9]{3}\*?)*:[a-z\x{300}\x{301}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kZhuang + { list { xsd:string { pattern="[a-z]+\*?" }+ } }? + + code-point-attributes &= attribute kZhuangNumeric + { list { xsd:string { pattern="\d+" }+ } }? + + code-point-attributes &= attribute kZVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZ]+)?(,[ks][A-Za-z0-9_]+(:[TBZ]+)?)*)?" }+ } }? + +

    +

    + 4.4.24 Tangut data +

    +

    The Tangut data are represented as attributes. The attribute kRSTUnicode + represents the radical stroke index. The attribute kTGT_MergedSrc indicates the + source reference for the character. +

    +

    + + [Tangut data, + 51] + + = + + code-point-attributes &= + attribute kRSTUnicode { xsd:string { pattern="[0-9]+\.[0-9]+" } }? + + code-point-attributes &= + attribute kTGT_MergedSrc + { xsd:string {pattern="L2008-[0-9A-F]{4,5}(-[0-9]{4,5})?"} + | xsd:string {pattern="L2006-[0-9]{4}"} + | xsd:string {pattern="L1997-[0-9]{4}"} + | xsd:string {pattern="L1986-[0-9]{4}"} + | xsd:string {pattern="S1968-[0-9]{4}"} + | xsd:string {pattern="N1966-[0-9]{3}(-[0-9A-Z]{3,4})?"} + | xsd:string {pattern="H2004-[A-Z]-[0-9]{4}"} + | xsd:string {pattern="L2012-[0-9]{4}"} + | xsd:string {pattern="UTN42-[0-9]{3}"} + }? + +

    +

    + 4.4.25 Nushu data +

    +

    The Nushu data are represented as attributes. The attribute kSrc_NushuDuben + indicates the page number and order of the item from the NushuDuben reference source. Nushu common + reading is represented as kReading.

    +

    + + [Nushu data, + 52] + + = + + code-point-attributes &= + attribute kSrc_NushuDuben { xsd:string { pattern="[0-9]+\.[0-9]+" } }? + + code-point-attributes &= + attribute kReading { xsd:string }? + +

    +

    + 4.4.26 Emoji properties +

    +

    The properties Emoji, EPres, EMod, EBase, + EComp, and ExtPict have corresponding attributes: +

    +

    + + [Emoji properties, + 53] + + = + + code-point-attributes &= + attribute Emoji { boolean }? + + code-point-attributes &= + attribute EPres { boolean }? + + code-point-attributes &= + attribute EMod { boolean }? + + code-point-attributes &= + attribute EBase { boolean }? + + code-point-attributes &= + attribute EComp { boolean }? + + code-point-attributes &= + attribute ExtPict { boolean }? + +

    +

    + 5 Blocks +

    +

    The blocks child of the ucd describes the blocks. It has one child + block element per block, with attributes to describe the extent and name of the block. +

    +

    + + [blocks, + 54] + + = + + ucd.content &= + element blocks { + element block { + attribute first-cp { single-code-point }, + attribute last-cp { single-code-point }, + attribute name { text } }+ }? + +

    +

    + 6 Named Sequences +

    +

    The named-sequences child of the ucd describes the named sequences. It has one + child named-sequence element per named sequence, with attributes to describe the name and + sequence. +

    +

    Similarly, the provisional-named-sequences child of the ucd describes the + provisional named sequences. +

    +

    + + [named sequences, + 55] + + = + + ucd.content &= + element named-sequences { + element named-sequence { + attribute cps { one-or-more-code-points }, + attribute name { text } }+ }? + + ucd.content &= + element provisional-named-sequences { + element named-sequence { + attribute cps { one-or-more-code-points }, + attribute name { text } }+ }? + +

    +

    + 7 Normalization Corrections +

    +

    The normalization-corrections child of the ucd describes the normalization + corrections. It has one child normalization-correction element per correction, with + attributes to describe the code point affected, its old normalization, its new normalization and the + version of Unicode in which the correction was made. +

    +

    + + [normalization corrections, + 56] + + = + + ucd.content &= + element normalization-corrections { + element normalization-correction { + attribute cp { single-code-point }, + attribute old { one-or-more-code-points }, + attribute new { one-or-more-code-points }, + attribute version { text } }+ }? + +

    +

    + 8 Standardized Variants +

    +

    The standardized-variants child of the ucd describes the standardized + variant. It has one child element standardized-variant per variant. The attributes on that + last element capture the variation sequence, the description of the desired appearance, and the shaping + environment under which the appearance is different. +

    +

    + + [standardized variants, + 57] + + = + + ucd.content &= + element standardized-variants { + element standardized-variant { + attribute cps { two-code-points }, + attribute desc { text }, + attribute when { text } }+ }? + +

    +

    + 9 CJK Radicals +

    +

    The cjk-radicals child of the ucd describes the CJK radicals. It has one + child element cjk-radical per radical. The attributes on that last element capture the + radical number, the corresponding CJK radical character, and the corresponding CJK unified ideograph. +

    +

    + + [cjk radicals, + 58] + + = + + ucd.content &= + element cjk-radicals { + element cjk-radical { + attribute number { xsd:string {pattern="[0-9]{1,3}'{0,3}"}}, + attribute radical { single-code-point? }, + attribute ideograph { single-code-point } }+ }? + +

    +

    + 10 Emoji sources +

    +

    The emoji-sources child of the ucd describes the emoji sources. +

    +

    + + [emoji sources, + 59] + + = + + ucd.content &= + element emoji-sources { + element emoji-source { + attribute unicode { one-or-more-code-points }, + attribute docomo { jis-code-point? }, + attribute kddi { jis-code-point? }, + attribute softbank { jis-code-point? } }+ }? + +

    +

    + + [datatype for code points, + 60] + + = + + jis-code-point = xsd:string { pattern = "[0-9A-F]{4}" } + +

    +

    + 11 Do Not Emit +

    +

    + The do-not-emit child of the ucd describes the + character sequences that should not be emitted or generated in newly authored texts. + +

    +

    + + [do-not-emit, + 61] + + = + + ucd.content &= + element do-not-emit { + element instead { + attribute of { one-or-more-code-points }, + attribute use { one-or-more-code-points }, + attribute because { "Bengali_Khanda_Ta" + | "Deprecated" + | "Discouraged" + | "Dotless_Form" + | "Hamza_Form" + | "Indic_Atomic_Consonant" + | "Indic_Consonant_Conjunct" + | "Indic_Vowel_Letter" + | "Malayalam_Chillu" + | "Precomposed_Form" + | "Precomposed_Hieroglyph" + | "Preferred_Spelling" + | "Tamil_Shrii" + } }+ }? + +

    +

    + 12 The full schema +

    +

    Our schema is just the accumulation of the pieces we have described so far: +

    +

    + + [UCD RelaxNG schema] + + = + + + [namespace declaration: 1] + + + [datatypes: 2, 3, 60] + + + [schema start: 4] + + + [boolean: 5] + + + [description: 6] + + + [repertoire: 7, 8, 9, 10] + + + [attributes: 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] + + + [Tangut data: 51] + + + [Nushu data: 52] + + + [blocks: 54] + + + [named sequences: 55] + + + [normalization corrections: 56] + + + [standardized variants: 57] + + + [cjk radicals: 58] + + + [emoji sources: 59] + + + [Emoji properties: 53] + + + [do-not-emit: 61] + + +

    +

    An expanded version is linked from the top of this document.

    +

    + 13 Examples +

    +

    Here is a fragment of the UCD for a few representative + characters (only some of the properties are represented): +

    +
    +            
    +  <ucd xmlns="http://www.unicode.org/ns/2003/ucd/1.0">
    +    <repertoire>
    +      <char cp="001F" age="1.1" na="&lt;control&gt;" na1="UNIT SEPARATOR"
    +            gc="Cc" bc="S" lb="CM"/>
    +
    +      <char cp="0020" age="1.1" na="SPACE" gc="Zs" bc="WS" ea="Na" lb="SP"/>
    +
    +      <char cp="0026" age="1.1" na="AMPERSAND" gc="Po" bc="ON" ea="Na"/>
    +
    +      <char cp="0028" age="1.1" na="LEFT PARENTHESIS" na1="OPENING PARENTHESIS"
    +            gc="Ps" bc="ON" Bidi_M="y" bmg="0029" ea="Na" lb="OP"/>
    +
    +      <char cp="0041" age="1.1" na="LATIN CAPITAL LETTER A"
    +            gc="Lu" slc="0061" ea="Na" sc="Latn"/>
    +
    +      <char cp="AC00" age="2.0" na="HANGUL SYLLABLE GA" gc="Lo"
    +            dt="can" dm="1100 1161" ea="W" lb="ID" sc="Hang"/>
    +
    +      <char cp="20094" age="3.1" na="CJK UNIFIED IDEOGRAPH-20094"
    +            gc="Lo" ea="W" lb="ID" sc="Hani" kIRG_GSource="KX"
    +            kIRGHanyuDaZidian="10036.060" kIRG_TSource="5-214E"
    +           kRSUnicode="4.3" kIRGKangXi="0082.090"/>
    +
    +      <group age="3.2" gc="Lo" sc="Buhd">
    +        <char cp="1740" na="BUHID LETTER A"/>
    +        <char cp="1741" na="BUHID LETTER I"/>
    +        <char cp="1752" na="BUHID VOWEL SIGN I" gc="Mn"/>
    +        <char cp="1820" age="3.0" na="MONGOLIAN LETTER A" sc="Mong"/>
    +      </group>
    +    </repertoire>
    +  </ucd>
    +
    +
    +

    + Acknowledgments +

    +

    Thanks to Markus Scherer and Mark Davis for their help developing this XML representation. Thanks to + the reviewers: Julie Allen, Ernest van den Boogaard, Daniel Bünzli, John Cowan, Asmus Freytag, + Felix Sasaki, Andrew West. Special thanks to Eric Muller and Laurențiu Iancu. +

    +

    + Modifications +

    +

    This section indicates the changes introduced by each revision.

    +
    +

    + Revision 36 +

    +
      +
    • New value for the age attribute: 16.0. +
    • +
    • New values for the blk attribute: Egyptian_Hieroglyphs_Ext_A, + Garay, Gurung_Khema, Kirat_Rai, Myanmar_Ext_C, + Ol_Onal, Sunuwar, Symbols_for_Legacy_Computing_Sup, + Todhri, Tulu_Tigalari. +
    • +
    • New values for the script attribute: Gara, Gukh, + Krai, Onao, Sunu, Todr, Tutg. +
    • +
    • New value for the jg attribute: Kashmiri_Yeh.
    • +
    • New value for the InSC attribute: Reordering_Killer. +
    • +
    • New attributes: MCM, kFanqie, kZhuang. +
    • +
    • Modified patterns for the cjk-radical/@number, kRSUnicode and + kIRG_GSource + attributes. +
    • +
    • Added the do-not-emit element. +
    • +
    +
    +
    +

    Revision 35 being a proposed update, only changes between revisions 34 and 36 are + noted here. +

    +
    +
    +

    + Revision 34 +

    +
      +
    • New value for the age attribute: 15.1. +
    • +
    • New value for the blk attribute: CJK_Ext_I. +
    • +
    • New values for the lb attribute: AK, AP, + AS, VF, VI. +
    • +
    • Modified values for the number, radical attributes of the + cjk-radical + element. +
    • +
    • Changed single value into list for the nv code point attribute. +
    • +
    • New code point attributes: ID_Compat_Math_Continue, + ID_Compat_Math_Start, IDSU, NFKC_SCF, InCB. +
    • +
    • Modified patterns for the kBigFive, kIRG_GSource, + kMorohashi, kRSUnicode attributes. +
    • +
    • Changed single values into lists for the kMorohashi, kPrimaryNumeric + Unihan attributes. +
    • +
    • New Unihan attributes: kJapanese, kMojiJoho, + kSMSZD2003Index, kSMSZD2003Readings, kVietnameseNumeric, + kZhuangNumeric. +
    • +
    +
    +
    +

    Revision 33 being a proposed update, only changes between revisions 32 and 34 are + noted here. +

    +
    +
    +

    + Revision 32 +

    +
      +
    • New value for the age attribute: 15.0. +
    • +
    • New values for the blk attribute: Arabic_Ext_C, CJK_Ext_H, + Cyrillic_Ext_D, Devanagari_Ext_A, Kaktovik_Numerals, Kawi, + Nag_Mundari. +
    • +
    • New values for the script attribute: Kawi, Nagm. +
    • +
    • New Unihan attribute: kAlternateTotalStrokes. +
    • +
    • Modified patterns for the kIRG_GSource, kIRG_HSource, + kIRG_TSource, kSemanticVariant, kSpecializedSemanticVariant, + kZVariant + attributes. +
    • +
    +
    +
    +

    Revision 31 being a proposed update, only changes between revisions 30 and 32 are + noted here. +

    +
    +
    +

    + Revision 30 +

    +
      +
    • New value for the age attribute: 14.0. +
    • +
    • New values for the blk attribute: Arabic_Ext_B, + Cypro_Minoan, Ethiopic_Ext_B, Kana_Ext_B, + Latin_Ext_F, Latin_Ext_G, Old_Uyghur, Tangsa, + Toto, UCAS_Ext_A, Vithkuqi, Znamenny_Music. +
    • +
    • New values for the script attribute: Cpmn, Ougr, + Tnsa, Toto, Vith. +
    • +
    • New values for the jg attribute: Thin_Yeh, Vertical_Tail. +
    • +
    • New Unihan attribute: kStrange. +
    • +
    • Modified patterns for the kIRG_GSource, kIRG_MSource, + kIRG_VSource, kPhonetic, kSpoofingVariant attributes. +
    • +
    • Removal of the kWubi attribute, which has never been present in + released versions of the UCD. +
    • +
    +
    +
    +

    Revision 29 being a proposed update, only changes between revisions 28 and 30 are + noted here. +

    +
    +
    +

    + Revision 28 +

    +
      +
    • New value for the age attribute: 13.0. +
    • +
    • New values for the blk attribute: Chorasmian, CJK_Ext_G, + Dives_Akuru, Khitan_Small_Script, Lisu_Sup, + Symbols_For_Legacy_Computing, Tangut_Sup, Yezidi. +
    • +
    • New values for the script attribute: Chrs, Diak, + Kits, Yezi. +
    • +
    • New value for the InPC attribute: Top_And_Bottom_And_Left. +
    • +
    • New Unihan attributes kSpoofingVariant, kUnihanCore2020, + kIRG_SSource, kIRG_UKSource, kTGHZ2013. +
    • +
    • New Emoji attributes Emoji, EPres, EMod, + EBase, EComp, ExtPict. +
    • +
    • Modified patterns for the kIRG_GSource, kIRG_HSource, + kIRG_KPSource, kIRG_KSource, kIRG_TSource, kKangXi, + kSemanticVariant, kSimplifiedVariant, + kSpecializedSemanticVariant, kTraditionalVariant attributes. +
    • +
    +
    +
    +

    Revision 27 being a proposed update, only changes between revisions 26 and 28 are + noted here. +

    +
    +
    +

    + Revision 26 +

    +
      +
    • New value for the age attribute: 12.1. +
    • +
    +
    +
    +

    + Revision 25 +

    +
      +
    • New value for the age attribute: 12.0. +
    • +
    • New values for the script attribute: Elym, Hmnp, + Nand, Wcho. +
    • +
    • New values for the blk attribute: + Egyptian_Hieroglyph_Format_Controls, Elymaic, Nandinagari, + Nyiakeng_Puachue_Hmong, Ottoman_Siyaq_Numbers, Small_Kana_Ext, + Symbols_And_Pictographs_Ext_A, Tamil_Sup, Wancho. +
    • +
    • Modified patterns for the kIRG_GSource, kIRG_KSource, + kIRG_TSource, kTaiwanTelegraph attributes. +
    • +
    +
    +
    +

    Revision 24 being a proposed update, only changes between revisions 23 and 25 are + noted here. +

    +
    +
    +

    + Revision 23 +

    +
      +
    • New value for the age attribute: 11.0. +
    • +
    • New values for the blk attribute: Chess_Symbols, + Dogra, Georgian_Ext, Gunjala_Gondi, + Hanifi_Rohingya, Indic_Siyaq_Numbers, Makasar, + Mayan_Numerals, Medefaidrin, Old_Sogdian, Sogdian. +
    • +
    • New values for the script attribute: Dogr, Gong, + Maka, Medf, Rohg, Sogd, Sogo. +
    • +
    • New values for the jg attribute: Hanifi_Rohingya_Kinna_Ya, + Hanifi_Rohingya_Pa. +
    • +
    • New value for the wb attribute: WSegSpace. +
    • +
    • New values for the InSC attribute: Consonant_Initial_Postfixed. +
    • +
    • New attributes: EqUIdeo, kJinmeiyoKanji, kJoyoKanji, + kKoreanEducationHanja, kKoreanName, kTGH. +
    • +
    • Modified patterns for the kTGT_MergedSrc attribute. +
    • +
    • Modified patterns for the kIRG_GSource, kIRG_HSource and + kIRG_VSource + attributes. +
    • +
    +
    +
    +

    Revision 22 being a proposed update, only changes between revisions 21 and 23 are + noted here. +

    +
    +
    +

    + Revision 21 +

    +
      +
    • New value for the age attribute: 10.0. +
    • +
    • New values for the blk attribute: CJK_Ext_F, Kana_Ext_A, + Masaram_Gondi, Nushu, Soyombo, Syriac_Sup, + Zanabazar_Square. +
    • +
    • New values for the sc attribute: Gonm, Nshu, + Soyo, Zanb. +
    • +
    • New values for the jg attribute: Malayalam_Nga, + Malayalam_Ja, Malayalam_Nya, Malayalam_Tta, Malayalam_Nna, + Malayalam_Nnna, Malayalam_Bha, Malayalam_Ra, + Malayalam_Lla, Malayalam_Llla, Malayalam_Ssa. +
    • +
    • New value for the InPC attribute: Bottom_And_Left. +
    • +
    • Modified patterns for the kIRG_GSource, kIRG_JSource, + kIRG_KSource + attributes. +
    • +
    • New code point attributes: vo, + RI +
    • +
    • New code point attributes for Nushu data: kSrc_NushuDuben and + kReading. +
    • +
    +
    +
    +

    Revision 20 being a proposed update, only changes between revisions 19 and 21 are + noted here. +

    +
    +
    +

    + Revision 19 +

    +
      +
    • New value for the age attribute: 9.0. +
    • +
    • New values for the sc attribute: Adlm, Bhks, + Marc, Newa, Osge, Tang. +
    • +
    • New values for the blk attribute: Adlam, Bhaiksuki, + Cyrillic_Ext_C, Glagolitic_Sup, Ideographic_Symbols, + Marchen, Mongolian_Sup, Newa, Osage, + Tangut, Tangut_Components. +
    • +
    • New values for the gcb attribute: EB, EBG, EM, + GAZ, ZWJ. +
    • +
    • New values for the wb attribute: EB, EBG, EM, + GAZ, ZWJ. +
    • +
    • New values for the lb attribute: EB, EM, ZWJ. +
    • +
    • New values for the jg attribute: African_Feh, + African_Noon, African_Qaf. +
    • +
    • New code point attributes: PCM, kRSTUnicode and + kTGT_MergedSrc. +
    • +
    • Modified patterns for the kRSUnicode, kRSKangXi, + kMandarin, kIRG_JSource, kIRG_USource and kFennIndex + attributes. +
    • +
    +
    +
    +

    Revision 18 being a proposed update, only changes between revisions 17 and 19 are + noted here. +

    +
    +
    +

    + Revision 17 +

    +
      +
    • New value for the age attribute: 8.0. +
    • +
    • New values for the sc attribute: Ahom, Hatr, + Hluw, Hung, Mult, Sgnw. +
    • +
    • New values for the blk attribute: Ahom, + Anatolian_Hieroglyphs, Cherokee_Sup, CJK_Ext_E, + Early_Dynastic_Cuneiform, Hatran, Multani, Old_Hungarian, + Sup_Symbols_And_Pictographs, Sutton_SignWriting. +
    • +
    • New values for the InSC attribute: Consonant_Killer, + Consonant_Prefixed, Consonant_With_Stacker, Syllable_Modifier. +
    • +
    • New code point attributes: InPC, kJa. +
    • +
    • New patterns for the kIRG_GSource attribute: GFC-, GGFZ-. +
    • +
    • Switched the reference to ISO 19757 from :2003 and :2003 Amd1 to :2008.
    • +
    +
    +
    +

    Revision 16 being a proposed update, only changes between revisions 15 and 17 are + noted here. +

    +
    +
    +

    + Revision 15 +

    +
      +
    • New value for the age attribute: 7.0. +
    • +
    • New values for the jg attribute. +
    • +
    • New values for the sc attribute. +
    • +
    • New values for the blk attribute. +
    • +
    • New values for the InSC attribute. +
    • +
    • New values for the kIICore attribute. +
    • +
    • New values for the kIRG_GSource attribute. +
    • +
    +
    +
    +

    Revision 14 being a proposed update, only changes between revisions 13 and 15 are + noted here. +

    +
    +
    +

    + Revision 13 +

    +
      +
    • New value for the age attribute: 6.3. +
    • +
    • New values DQ, HL, SQ for the WB attribute(forUnicode6.3). +
    • +
    • New code point attributes bpt and bpb (for Unicode 6.3). +
    • +
    • New values for the bc attribute: LRI, RLI, FSI, + PDI + (for Unicode 6.3). +
    • +
    • Updated the patterns for kHanyuPinlu and kTotalStrokes (for + Unicode6.3). +
    • +
    • Updated the patterns for kIRG_HSource and kIRG_HSource (for + Unicode6.2). +
    • +
    • Clarified that the child elements list-like elements are in no particular order.
    • +
    +
    +
    +

    Revision 12 being a proposed update, only changes between revisions 11 and 13 are + noted here. +

    +
    +
    +

    + Revision 11 +

    +
      +
    • New value for the age attribute: 6.2. +
    • +
    • New value for the gcb, wb and lb attributes: + RI + (for Unicode 6.2). +
    • +
    • Updated the patterns for kIRG_GSource and kIRG_HSource (for + Unicode 6.2). +
    • +
    +
    +
    +

    Revision 10 being a proposed update, only changes between revisions 9 and 11 are + noted here. +

    +
    +
    +

    + Revision 9 +

    +
      +
    • Clarified the default values.
    • +
    • Indicate that property values may change from one release to the next.
    • +
    • Introduced the blk attributes, for the Block property. +
    • +
    • Introduced the scx attribute, for the ScriptExtensions property. +
    • +
    • Introduced the name-alias element, for the Name_Alias property. +
    • +
    • New value for the age attribute: 6.1. +
    • +
    • New values for the script attribute: Cakm, Merc, + Mero, Plrd, Shrd, Sora, Takr. +
    • +
    • New values for the lb attribute: HL and CJ. +
    • +
    • New value for the jg attribute: Rohingya_Yeh. +
    • +
    • The value of the fc_nfkc attribute must now be either # or + one-or-more-code-points. +
    • +
    • For the nv attribute, the absence of a numeric value is now represented by + NaN + rather than by the empty string. +
    • +
    • The values of the ccc are now restricted to 0..254, instead of 0..255. +
    • +
    • Updated the patterns for kSemanticVariant, + kSpecializedSemanticVariant, kIRG_USource, and kMandarin. +
    • +
    +
    +
    +

    Revision 8 being a proposed update, only changes between revisions 7 and 9 are noted + here. +

    +
    +
    +

    + Revision 7 +

    +
      +
    • New value for the age attribute: 6.0. +
    • +
    • New value for the jg attribute: + Teh_Marbuta_Goal +
    • +
    • New values for the script attribute: Batk, Brah, + Mand. +
    • +
    • Updated the patterns for kIRG_GSource, kIRG_HSource, + kIRG_JSource, kIRG_KSource, kIRG_MSource, + kIRG_TSource, kIRG_VSource. +
    • +
    • Added the InSC and InMC elements. +
    • +
    • Added the emoji-sources element. +
    • +
    +
    +
    +

    Revision 6 being a proposed update, only changes between revisions 5 and 7 are noted + here. +

    +
    +
    +

    + Revision 5 +

    +
      +
    • Changed the type of block/@first-cp, block/@last-cp and + normalization-corrections/@cp + from text to + single-code-point +
    • +
    • Changed the type of named-sequence/@cps, + provisional-named-sequences/@cps, normalization-correction/@old and + normalization-correction/@new + from text to one-or-more-code-points. +
    • +
    • Changed the type of standardized-variants/@cps from text to + two-code-points. +
    • +
    • New values for the jg attribute: Farsi_Yeh and Nya. +
    • +
    • New value for the age attribute: 5.2. +
    • +
    • New values for the sc attribute: Lana, Tavt, + Avst, Egyp, Samr, Lisu, Bamu, Java, + Mtei, Armi, Sarb, Prti, Phli, Orkh, + Kthi. +
    • +
    • New value for the lb attribute: CP. +
    • +
    • New value for the sc attribute: Zinh. +
    • +
    • New code point attributes CI, Cased, CWCF, + CWCM, CWL, CWKCF, CWT, CWU, + NFKC_CF. +
    • +
    • New attributes kHanyuPinyin and kIRG_MSource. +
    • +
    • New element + cjk-radicals +
    • +
    • Updated the patterns for kIRG_GSource, kIRG_JSource, + kIRG_KPSource, kIRG_KSource, kIRG_TSource, + kIRG_VSource, kHanyuPinlu, kMandarin, + kSemanticVariant, kSpecializedSemanticVariant, + kVietnamese, kZVariant. +
    • +
    • Point out that Relax NG schemas do not modify or augment the infoset, and that it ispossible + to convert mechanically our schema to other schema languages. +
    • +
    +
    +
    +

    Revision 4 being a proposed update, only changes between revisions 3 and 5 are noted + here. +

    +
    +
    +

    + Revision 3 +

    +
      +
    • First approved version, for Unicode 5.1.0.
    • +
    • For optional elements which acts as collections, such as repertoire and + named-sequences, impose that there be at least one element in the collection. +
    • +
    • Remove the constraint that the value jg is limited when jt has + certainvalues; similarly for bmg / Bidi_M and for nv / + nt. +
    • +
    • Value NL added to the WB attribute (for Unicode 5.1). +
    • +
    • Value PP added to the GCB attribute (for Unicode 5.1). +
    • +
    • Corrected the Vai script value to Vaii. +
    • +
    • Removed the discussion of elements or attributes in different namespace.
    • +
    • Removed the code-point element. +
    • +
    +
    +
    +

    + Revision 2 +

    +
      +
    • Promoted to Draft UAX.
    • +
    • Changed the title from "An XML representation of the UCD"
    • +
    • Value 5.1 added to the age attribute (for Unicode 5.1). +
    • +
    • Value SM added to the gcb attribute (for Unicode 5.1). +
    • +
    • Values CR, Extend, LF, MB added to the + WB + attribute(forUnicode5.1). +
    • +
    • Values CR, EX, LF, SC added to the SB + attribute(forUnicode5.1). +
    • +
    • Value Burushaski_Yeh_Barree added to the jg attribute (for + Unicode5.1). +
    • +
    • Value Alef_Maqsurah added to the jg attribute (for Unicode 2.x). +
    • +
    • Values Cari, Cham, Kali, Lepc, + Lyci, Lydi, Olck, Rjng, Saur, Sund and + Vai + added to the sc attribute (forUnicode5.0). +
    • +
    • + jamo + attribute renamed to + JSN +
    • +
    • + sfc + attribute renamed to + scf +
    • +
    • Attribute kXHC1983 added (for Unicode 5.1.0). +
    • +
    • Pattern for attribute kIRG_USource extended (for Unicode 5.1.0). +
    • +
    • Element provisional-named-sequences added (for Unicode 5.0) +
    • +
    +
    +
    +

    + Revision 1 +

    +
      +
    • First working draft.
    • +
    +
    +
    + + + +
    + + diff --git a/uax/uax42/output/index.rnc b/uax/uax42/output/index.rnc new file mode 100644 index 000000000..7cdf380f3 --- /dev/null +++ b/uax/uax42/output/index.rnc @@ -0,0 +1,1453 @@ + + # Copyright © 2024 Unicode, Inc. + + + + default namespace ucd = "http://www.unicode.org/ns/2003/ucd/1.0" + + + # default; datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes" + + single-code-point = xsd:string { pattern = "(|[1-9A-F]|(10))[0-9A-F]{4}" } + + one-or-more-code-points = list { single-code-point + } + zero-or-more-code-points = list { single-code-point * } + two-code-points = list { single-code-point, single-code-point } + + jis-code-point = xsd:string { pattern = "[0-9A-F]{4}" } + + + start = + element ucd { ucd.content } + + + boolean = "Y" | "N" + + + ucd.content &= + element description { text }? + + + ucd.content &= + element repertoire { (code-point | group) + }? + + set-of-code-points = + attribute cp { single-code-point } + | ( attribute first-cp { single-code-point }, + attribute last-cp { single-code-point } ) + + code-point |= + element reserved { + set-of-code-points, + code-point-attributes } + + code-point |= + element noncharacter { + set-of-code-points, + code-point-attributes } + + code-point |= + element surrogate { + set-of-code-points, + code-point-attributes } + + code-point |= + element char { + set-of-code-points, + code-point-attributes } + + group = + element group { + code-point-attributes, + code-point* } + + + code-point-attributes &= + attribute age { "1.1" + | "2.0" | "2.1" + | "3.0" | "3.1" | "3.2" + | "4.0" | "4.1" + | "5.0" | "5.1" | "5.2" + | "6.0" | "6.1" | "6.2" | "6.3" + | "7.0" + | "8.0" + | "9.0" + | "10.0" + | "11.0" + | "12.0" | "12.1" + | "13.0" + | "14.0" + | "15.0" | "15.1" + | "16.0" + | "unassigned" + }? + + code-point-attributes &= + attribute na { "" | + "CJK UNIFIED IDEOGRAPH-#" | + "CJK COMPATIBILITY IDEOGRAPH-#" | + "EGYPTIAN HIEROGLYPH-#" | + "TANGUT IDEOGRAPH-#" | + "KHITAN SMALL SCRIPT CHARACTER-#" | + "NUSHU CHARACTER-#" | + xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } + }? + + code-point-attributes &= + attribute na1 { "" | xsd:string { pattern="[a-zA-Z0-9]+([\-_ ][a-zA-Z0-9]+)*( \(.*\))?" } }? + + code-point-attributes &= + element name-alias { + attribute alias { xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } }?, + attribute type { "abbreviation" | "alternate" + | "control" | "correction" + | "figment" + }? } * + + code-point-attributes &= + attribute blk { "Adlam" + | "Aegean_Numbers" + | "Ahom" + | "Alchemical" + | "Alphabetic_PF" + | "Anatolian_Hieroglyphs" + | "Ancient_Greek_Music" + | "Ancient_Greek_Numbers" + | "Ancient_Symbols" + | "Arabic" + | "Arabic_Ext_A" + | "Arabic_Ext_B" + | "Arabic_Ext_C" + | "Arabic_Math" + | "Arabic_PF_A" + | "Arabic_PF_B" + | "Arabic_Sup" + | "Armenian" + | "Arrows" + | "ASCII" + | "Avestan" + | "Balinese" + | "Bamum" + | "Bamum_Sup" + | "Bassa_Vah" + | "Batak" + | "Bengali" + | "Bhaiksuki" + | "Block_Elements" + | "Bopomofo" + | "Bopomofo_Ext" + | "Box_Drawing" + | "Brahmi" + | "Braille" + | "Buginese" + | "Buhid" + | "Byzantine_Music" + | "Carian" + | "Caucasian_Albanian" + | "Chakma" + | "Cham" + | "Cherokee" + | "Cherokee_Sup" + | "Chess_Symbols" + | "Chorasmian" + | "CJK" + | "CJK_Compat" + | "CJK_Compat_Forms" + | "CJK_Compat_Ideographs" + | "CJK_Compat_Ideographs_Sup" + | "CJK_Ext_A" + | "CJK_Ext_B" + | "CJK_Ext_C" + | "CJK_Ext_D" + | "CJK_Ext_E" + | "CJK_Ext_F" + | "CJK_Ext_G" + | "CJK_Ext_H" + | "CJK_Ext_I" + | "CJK_Radicals_Sup" + | "CJK_Strokes" + | "CJK_Symbols" + | "Compat_Jamo" + | "Control_Pictures" + | "Coptic" + | "Coptic_Epact_Numbers" + | "Counting_Rod" + | "Cuneiform" + | "Cuneiform_Numbers" + | "Currency_Symbols" + | "Cypriot_Syllabary" + | "Cypro_Minoan" + | "Cyrillic" + | "Cyrillic_Ext_A" + | "Cyrillic_Ext_B" + | "Cyrillic_Ext_C" + | "Cyrillic_Ext_D" + | "Cyrillic_Sup" + | "Deseret" + | "Devanagari" + | "Devanagari_Ext" + | "Devanagari_Ext_A" + | "Diacriticals" + | "Diacriticals_Ext" + | "Diacriticals_For_Symbols" + | "Diacriticals_Sup" + | "Dingbats" + | "Dives_Akuru" + | "Dogra" + | "Domino" + | "Duployan" + | "Early_Dynastic_Cuneiform" + | "Egyptian_Hieroglyph_Format_Controls" + | "Egyptian_Hieroglyphs" + | "Egyptian_Hieroglyphs_Ext_A" + | "Elbasan" + | "Elymaic" + | "Emoticons" + | "Enclosed_Alphanum" + | "Enclosed_Alphanum_Sup" + | "Enclosed_CJK" + | "Enclosed_Ideographic_Sup" + | "Ethiopic" + | "Ethiopic_Ext" + | "Ethiopic_Ext_A" + | "Ethiopic_Ext_B" + | "Ethiopic_Sup" + | "Garay" + | "Geometric_Shapes" + | "Geometric_Shapes_Ext" + | "Georgian" + | "Georgian_Ext" + | "Georgian_Sup" + | "Glagolitic" + | "Glagolitic_Sup" + | "Gothic" + | "Grantha" + | "Greek" + | "Greek_Ext" + | "Gujarati" + | "Gunjala_Gondi" + | "Gurmukhi" + | "Gurung_Khema" + | "Half_And_Full_Forms" + | "Half_Marks" + | "Hangul" + | "Hanifi_Rohingya" + | "Hanunoo" + | "Hatran" + | "Hebrew" + | "High_PU_Surrogates" + | "High_Surrogates" + | "Hiragana" + | "IDC" + | "Ideographic_Symbols" + | "Imperial_Aramaic" + | "Indic_Number_Forms" + | "Indic_Siyaq_Numbers" + | "Inscriptional_Pahlavi" + | "Inscriptional_Parthian" + | "IPA_Ext" + | "Jamo" + | "Jamo_Ext_A" + | "Jamo_Ext_B" + | "Javanese" + | "Kaithi" + | "Kaktovik_Numerals" + | "Kana_Ext_A" + | "Kana_Ext_B" + | "Kana_Sup" + | "Kanbun" + | "Kangxi" + | "Kannada" + | "Katakana" + | "Katakana_Ext" + | "Kawi" + | "Kayah_Li" + | "Kharoshthi" + | "Khitan_Small_Script" + | "Khmer" + | "Khmer_Symbols" + | "Khojki" + | "Khudawadi" + | "Kirat_Rai" + | "Lao" + | "Latin_1_Sup" + | "Latin_Ext_A" + | "Latin_Ext_Additional" + | "Latin_Ext_B" + | "Latin_Ext_C" + | "Latin_Ext_D" + | "Latin_Ext_E" + | "Latin_Ext_F" + | "Latin_Ext_G" + | "Lepcha" + | "Letterlike_Symbols" + | "Limbu" + | "Linear_A" + | "Linear_B_Ideograms" + | "Linear_B_Syllabary" + | "Lisu" + | "Lisu_Sup" + | "Low_Surrogates" + | "Lycian" + | "Lydian" + | "Mahajani" + | "Mahjong" + | "Makasar" + | "Malayalam" + | "Mandaic" + | "Manichaean" + | "Marchen" + | "Masaram_Gondi" + | "Math_Alphanum" + | "Math_Operators" + | "Mayan_Numerals" + | "Medefaidrin" + | "Meetei_Mayek" + | "Meetei_Mayek_Ext" + | "Mende_Kikakui" + | "Meroitic_Cursive" + | "Meroitic_Hieroglyphs" + | "Miao" + | "Misc_Arrows" + | "Misc_Math_Symbols_A" + | "Misc_Math_Symbols_B" + | "Misc_Pictographs" + | "Misc_Symbols" + | "Misc_Technical" + | "Modi" + | "Modifier_Letters" + | "Modifier_Tone_Letters" + | "Mongolian" + | "Mongolian_Sup" + | "Mro" + | "Multani" + | "Music" + | "Myanmar" + | "Myanmar_Ext_A" + | "Myanmar_Ext_B" + | "Myanmar_Ext_C" + | "Nabataean" + | "Nag_Mundari" + | "Nandinagari" + | "NB" + | "New_Tai_Lue" + | "Newa" + | "NKo" + | "Number_Forms" + | "Nushu" + | "Nyiakeng_Puachue_Hmong" + | "OCR" + | "Ogham" + | "Ol_Chiki" + | "Ol_Onal" + | "Old_Hungarian" + | "Old_Italic" + | "Old_North_Arabian" + | "Old_Permic" + | "Old_Persian" + | "Old_Sogdian" + | "Old_South_Arabian" + | "Old_Turkic" + | "Old_Uyghur" + | "Oriya" + | "Ornamental_Dingbats" + | "Osage" + | "Osmanya" + | "Ottoman_Siyaq_Numbers" + | "Pahawh_Hmong" + | "Palmyrene" + | "Pau_Cin_Hau" + | "Phags_Pa" + | "Phaistos" + | "Phoenician" + | "Phonetic_Ext" + | "Phonetic_Ext_Sup" + | "Playing_Cards" + | "Psalter_Pahlavi" + | "PUA" + | "Punctuation" + | "Rejang" + | "Rumi" + | "Runic" + | "Samaritan" + | "Saurashtra" + | "Sharada" + | "Shavian" + | "Shorthand_Format_Controls" + | "Siddham" + | "Sinhala" + | "Sinhala_Archaic_Numbers" + | "Small_Forms" + | "Small_Kana_Ext" + | "Sogdian" + | "Sora_Sompeng" + | "Soyombo" + | "Specials" + | "Sundanese" + | "Sundanese_Sup" + | "Sunuwar" + | "Sup_Arrows_A" + | "Sup_Arrows_B" + | "Sup_Arrows_C" + | "Sup_Math_Operators" + | "Sup_PUA_A" + | "Sup_PUA_B" + | "Sup_Punctuation" + | "Sup_Symbols_And_Pictographs" + | "Super_And_Sub" + | "Sutton_SignWriting" + | "Syloti_Nagri" + | "Symbols_And_Pictographs_Ext_A" + | "Symbols_For_Legacy_Computing" + | "Symbols_For_Legacy_Computing_Sup" + | "Syriac" + | "Syriac_Sup" + | "Tagalog" + | "Tagbanwa" + | "Tags" + | "Tai_Le" + | "Tai_Tham" + | "Tai_Viet" + | "Tai_Xuan_Jing" + | "Takri" + | "Tamil" + | "Tamil_Sup" + | "Tangsa" + | "Tangut" + | "Tangut_Components" + | "Tangut_Sup" + | "Telugu" + | "Thaana" + | "Thai" + | "Tibetan" + | "Tifinagh" + | "Tirhuta" + | "Todhri" + | "Toto" + | "Transport_And_Map" + | "Tulu_Tigalari" + | "UCAS" + | "UCAS_Ext" + | "UCAS_Ext_A" + | "Ugaritic" + | "Vai" + | "Vedic_Ext" + | "Vertical_Forms" + | "Vithkuqi" + | "VS" + | "VS_Sup" + | "Wancho" + | "Warang_Citi" + | "Yezidi" + | "Yi_Radicals" + | "Yi_Syllables" + | "Yijing" + | "Zanabazar_Square" + | "Znamenny_Music" + }? + + code-point-attributes &= + attribute gc { "Cc" | "Cf" | "Cn" | "Co" | "Cs" + | "Ll" | "Lm" | "Lo" | "Lt" | "Lu" + | "Mc" | "Me" | "Mn" + | "Nd" | "Nl" | "No" + | "Pc" | "Pd" | "Pe" | "Pf" | "Pi" | "Po" | "Ps" + | "Sc" | "Sk" | "Sm" | "So" + | "Zl" | "Zp" | "Zs" + }? + + code-point-attributes &= + attribute ccc { xsd:integer { minInclusive="0" maxInclusive="254" } }? + + code-point-attributes &= + attribute bc { "AL" | "AN" + | "B" | "BN" + | "CS" + | "EN" | "ES" | "ET" + | "FSI" + | "L" | "LRE" | "LRI" | "LRO" + | "NSM" + | "ON" + | "PDF" | "PDI" + | "R" | "RLE" | "RLI" | "RLO" + | "S" + | "WS" + }? + + code-point-attributes &= + attribute Bidi_M { boolean }? + + code-point-attributes &= + attribute bmg { "" | single-code-point }? + + code-point-attributes &= + attribute Bidi_C { boolean }? + + code-point-attributes &= + attribute bpt { "o" | "c" | "n" }? + + code-point-attributes &= + attribute bpb { "#" | single-code-point }? + + code-point-attributes &= + attribute dt { "can" | "com" | "enc" | "fin" | "font" | "fra" + | "init" | "iso" | "med" | "nar" | "nb" | "sml" + | "sqr" | "sub" | "sup" | "vert" | "wide" | "none" + }? + + code-point-attributes &= + attribute dm { "#" | zero-or-more-code-points }? + + code-point-attributes &= + attribute CE { boolean }? + + code-point-attributes &= + attribute Comp_Ex { boolean }? + + code-point-attributes &= + attribute NFC_QC { "Y" | "N" | "M" }? + + code-point-attributes &= + attribute NFD_QC { "Y" | "N" }? + + code-point-attributes &= + attribute NFKC_QC { "Y" | "N" | "M" }? + + code-point-attributes &= + attribute NFKD_QC { "Y" | "N" }? + + + code-point-attributes &= + attribute XO_NFC { boolean }? + + code-point-attributes &= + attribute XO_NFD { boolean }? + + code-point-attributes &= + attribute XO_NFKC { boolean }? + + code-point-attributes &= + attribute XO_NFKD { boolean }? + + + code-point-attributes &= + attribute FC_NFKC { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute nt { "De" | "Di" | "Nu" | "None" }? + + code-point-attributes &= + attribute nv { "NaN" | xsd:string { pattern="-?[0-9]+(/[0-9]+)?" } }? + + code-point-attributes &= + attribute jt { "C" | "D" | "L" | "R" | "T" | "U" }? + + code-point-attributes &= + attribute jg { "African_Feh" | "African_Noon" | "African_Qaf" + | "Ain" | "Alaph" | "Alef" + | "Beh" | "Beth" | "Burushaski_Yeh_Barree" + | "Dal" | "Dalath_Rish" + | "E" + | "Farsi_Yeh" | "Fe" | "Feh" | "Final_Semkath" + | "Gaf" | "Gamal" + | "Hah" | "Hanifi_Rohingya_Kinna_Ya" + | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" + | "Heth" + | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Lam" | "Lamadh" + | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" + | "Malayalam_Llla" | "Malayalam_Nga" + | "Malayalam_Nna" | "Malayalam_Nnna" + | "Malayalam_Nya" | "Malayalam_Ra" | "Malayalam_Ssa" + | "Malayalam_Tta" | "Manichaean_Aleph" + | "Manichaean_Ayin" | "Manichaean_Beth" + | "Manichaean_Daleth" | "Manichaean_Dhamedh" + | "Manichaean_Five" | "Manichaean_Gimel" + | "Manichaean_Heth" | "Manichaean_Hundred" + | "Manichaean_Kaph" | "Manichaean_Lamedh" + | "Manichaean_Mem" | "Manichaean_Nun" + | "Manichaean_One" | "Manichaean_Pe" + | "Manichaean_Qoph" | "Manichaean_Resh" + | "Manichaean_Sadhe" | "Manichaean_Samekh" + | "Manichaean_Taw" | "Manichaean_Ten" + | "Manichaean_Teth" | "Manichaean_Thamedh" + | "Manichaean_Twenty" | "Manichaean_Waw" + | "Manichaean_Yodh" | "Manichaean_Zayin" | "Meem" + | "Mim" + | "No_Joining_Group" | "Noon" | "Nun" | "Nya" + | "Pe" + | "Qaf" | "Qaph" + | "Reh" | "Reversed_Pe" | "Rohingya_Yeh" + | "Sad" | "Sadhe" | "Seen" | "Semkath" | "Shin" + | "Straight_Waw" | "Swash_Kaf" | "Syriac_Waw" + | "Tah" | "Taw" | "Teh_Marbuta" | "Teh_Marbuta_Goal" + | "Teth" | "Thin_Yeh" + | "Vertical_Tail" + | "Waw" + | "Yeh" | "Yeh_Barree" | "Yeh_With_Tail" | "Yudh" + | "Yudh_He" + | "Zain" | "Zhain" + }? + + code-point-attributes &= + attribute Join_C { boolean }? + + code-point-attributes &= + attribute lb { "AI" | "AK" | "AL" | "AP" | "AS" + | "B2" | "BA" | "BB" | "BK" + | "CB" | "CJ" | "CL" | "CM" | "CP" | "CR" + | "EB" | "EM" | "EX" + | "GL" + | "H2" | "H3" | "HL" | "HY" + | "ID" | "IN" | "IS" + | "JL" | "JT" | "JV" + | "LF" + | "NL" | "NS" | "NU" + | "OP" + | "PO" | "PR" + | "QU" + | "RI" + | "SA" | "SG" | "SP" | "SY" + | "VF" | "VI" + | "WJ" + | "XX" + | "ZW" | "ZWJ" + }? + + code-point-attributes &= + attribute ea { "A" | "F" | "H" | "N" | "Na" | "W" }? + + code-point-attributes &= + attribute Upper { boolean }? + + code-point-attributes &= + attribute Lower { boolean }? + + code-point-attributes &= + attribute OUpper { boolean }? + + code-point-attributes &= + attribute OLower { boolean }? + + code-point-attributes &= + attribute suc { "#" | single-code-point }? + + code-point-attributes &= + attribute slc { "#" | single-code-point }? + + code-point-attributes &= + attribute stc { "#" | single-code-point }? + + code-point-attributes &= + attribute uc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute lc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute tc { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute scf { "#" | single-code-point }? + + code-point-attributes &= + attribute cf { "#" | one-or-more-code-points }? + + code-point-attributes &= + attribute CI { boolean }? + + code-point-attributes &= + attribute Cased { boolean }? + + code-point-attributes &= + attribute CWCF { boolean }? + + code-point-attributes &= + attribute CWCM { boolean }? + + code-point-attributes &= + attribute CWL { boolean }? + + code-point-attributes &= + attribute CWKCF { boolean }? + + code-point-attributes &= + attribute CWT { boolean }? + + code-point-attributes &= + attribute CWU { boolean }? + + code-point-attributes &= + attribute NFKC_CF { "#" | zero-or-more-code-points }? + + code-point-attributes &= + attribute NFKC_SCF { "#" | zero-or-more-code-points }? + + script = "Adlm" | "Aghb" | "Ahom" | "Arab" | "Armi" | "Armn" + | "Avst" + | "Bali" | "Bamu" | "Bass" | "Batk" | "Beng" | "Bhks" + | "Bopo" | "Brah" | "Brai" | "Bugi" | "Buhd" + | "Cakm" | "Cans" | "Cari" | "Cham" | "Cher" | "Chrs" + | "Copt" | "Cpmn" | "Cprt" | "Cyrl" + | "Deva" | "Diak" | "Dogr" | "Dsrt" | "Dupl" + | "Egyp" | "Elba" | "Elym" | "Ethi" + | "Gara" | "Geor" | "Glag" | "Gong" | "Gonm" | "Goth" + | "Gran" | "Grek" | "Gujr" | "Gukh" | "Guru" + | "Hang" | "Hani" | "Hano" | "Hatr" | "Hebr" | "Hira" + | "Hluw" | "Hmng" | "Hmnp" | "Hrkt" | "Hung" + | "Ital" + | "Java" + | "Kali" | "Kana" | "Kawi" | "Khar" | "Khmr" | "Khoj" + | "Kits" | "Knda" | "Krai" | "Kthi" + | "Lana" | "Laoo" | "Latn" | "Lepc" | "Limb" | "Lina" + | "Linb" | "Lisu" | "Lyci" | "Lydi" + | "Mahj" | "Maka" | "Mand" | "Mani" | "Marc" | "Medf" + | "Mend" | "Merc" | "Mero" | "Mlym" | "Modi" | "Mong" + | "Mroo" | "Mtei" | "Mult" | "Mymr" + | "Nagm" | "Nand" | "Narb" | "Nbat" | "Newa" | "Nkoo" + | "Nshu" + | "Ogam" | "Olck" | "Onao" | "Orkh" | "Orya" | "Osge" + | "Osma" | "Ougr" + | "Palm" | "Pauc" | "Perm" | "Phag" | "Phli" | "Phlp" + | "Phnx" | "Plrd" | "Prti" + | "Rjng" | "Rohg" | "Runr" + | "Samr" | "Sarb" | "Saur" | "Sgnw" | "Shaw" | "Shrd" + | "Sidd" | "Sind" | "Sinh" | "Sogd" | "Sogo" | "Sora" + | "Soyo" | "Sund" | "Sunu" | "Sylo" | "Syrc" + | "Tagb" | "Takr" | "Tale" | "Talu" | "Taml" | "Tang" + | "Tavt" | "Telu" | "Tfng" | "Tglg" | "Thaa" | "Thai" + | "Tibt" | "Tirh" | "Tnsa" | "Todr" | "Toto" | "Tutg" + | "Ugar" + | "Vaii" | "Vith" + | "Wara" | "Wcho" + | "Xpeo" | "Xsux" + | "Yezi" | "Yiii" + | "Zanb" | "Zinh" | "Zyyy" | "Zzzz" + + code-point-attributes &= + attribute sc { script }? + + code-point-attributes &= + attribute scx { list { script + } }? + + code-point-attributes &= + attribute isc { text }? + + code-point-attributes &= + attribute hst { "L" | "LV" | "LVT" | "NA" | "T" | "V" }? + + code-point-attributes &= + attribute JSN { xsd:string { pattern="[A-Z]{0,3}" } }? + + code-point-attributes &= + attribute InSC { "Avagraha" + | "Bindu" + | "Brahmi_Joining_Number" + | "Cantillation_Mark" + | "Consonant" + | "Consonant_Dead" + | "Consonant_Final" + | "Consonant_Head_Letter" + | "Consonant_Initial_Postfixed" + | "Consonant_Killer" + | "Consonant_Medial" + | "Consonant_Placeholder" + | "Consonant_Preceding_Repha" + | "Consonant_Prefixed" + | "Consonant_Subjoined" + | "Consonant_Succeeding_Repha" + | "Consonant_With_Stacker" + | "Gemination_Mark" + | "Invisible_Stacker" + | "Joiner" + | "Modifying_Letter" + | "Non_Joiner" + | "Nukta" + | "Number" + | "Number_Joiner" + | "Other" + | "Pure_Killer" + | "Register_Shifter" + | "Reordering_Killer" + | "Syllable_Modifier" + | "Tone_Letter" + | "Tone_Mark" + | "Virama" + | "Visarga" + | "Vowel" + | "Vowel_Dependent" + | "Vowel_Independent" + }? + + code-point-attributes &= + attribute InPC { "Bottom" + | "Bottom_And_Left" + | "Bottom_And_Right" + | "Left" + | "Left_And_Right" + | "NA" + | "Overstruck" + | "Right" + | "Top" + | "Top_And_Bottom" + | "Top_And_Bottom_And_Left" + | "Top_And_Bottom_And_Right" + | "Top_And_Left" + | "Top_And_Left_And_Right" + | "Top_And_Right" + | "Visual_Order_Left" + }? + + code-point-attributes &= + attribute InCB { "Consonant" + | "Extend" + | "Linker" + | "None" + }? + + code-point-attributes &= + attribute IDS { boolean }? + + code-point-attributes &= + attribute OIDS { boolean }? + + code-point-attributes &= + attribute XIDS { boolean }? + + code-point-attributes &= + attribute IDC { boolean }? + + code-point-attributes &= + attribute OIDC { boolean }? + + code-point-attributes &= + attribute XIDC { boolean }? + + code-point-attributes &= + attribute ID_Compat_Math_Start { boolean }? + + code-point-attributes &= + attribute ID_Compat_Math_Continue { boolean }? + + code-point-attributes &= + attribute Pat_Syn { boolean }? + + code-point-attributes &= + attribute Pat_WS { boolean }? + + code-point-attributes &= + attribute Dash { boolean }? + + code-point-attributes &= + attribute Hyphen { boolean }? + + code-point-attributes &= + attribute QMark { boolean }? + + code-point-attributes &= + attribute Term { boolean }? + + code-point-attributes &= + attribute STerm { boolean }? + + code-point-attributes &= + attribute Dia { boolean }? + + code-point-attributes &= + attribute Ext { boolean }? + + code-point-attributes &= + attribute SD { boolean }? + + code-point-attributes &= + attribute Alpha { boolean }? + + code-point-attributes &= + attribute OAlpha { boolean }? + + code-point-attributes &= + attribute Math { boolean }? + + code-point-attributes &= + attribute OMath { boolean }? + + code-point-attributes &= + attribute Hex { boolean }? + + code-point-attributes &= + attribute AHex { boolean }? + + code-point-attributes &= + attribute DI { boolean }? + + code-point-attributes &= + attribute ODI { boolean }? + + code-point-attributes &= + attribute LOE { boolean }? + + code-point-attributes &= + attribute PCM { boolean }? + + code-point-attributes &= + attribute MCM { boolean }? + + code-point-attributes &= + attribute WSpace { boolean }? + + code-point-attributes &= + attribute vo { "R" | "Tr" | "Tu" | "U" }? + + code-point-attributes &= + attribute RI { boolean }? + + code-point-attributes &= + attribute Gr_Base { boolean }? + + code-point-attributes &= + attribute Gr_Ext { boolean }? + + code-point-attributes &= + attribute OGr_Ext { boolean }? + + code-point-attributes &= + attribute Gr_Link { boolean }? + + code-point-attributes &= + attribute GCB { "CN" | "CR" + | "EB" | "EBG" | "EM" | "EX" + | "GAZ" + | "L" | "LF" | "LV" | "LVT" + | "PP" + | "RI" + | "SM" + | "T" + | "V" + | "XX" + | "ZWJ" + }? + + code-point-attributes &= + attribute WB { "CR" + | "DQ" + | "EB" | "EBG" | "EM" | "EX" | "Extend" + | "FO" + | "GAZ" + | "HL" + | "KA" + | "LE" | "LF" + | "MB" | "ML" | "MN" + | "NL" | "NU" + | "RI" + | "SQ" + | "WSegSpace" + | "XX" + | "ZWJ" + }? + + code-point-attributes &= + attribute SB { "AT" + | "CL" | "CR" + | "EX" + | "FO" + | "LE" | "LF" | "LO" + | "NU" + | "SC" | "SE" | "SP" | "ST" + | "UP" + | "XX" + }? + + code-point-attributes &= + attribute Ideo { boolean }? + + code-point-attributes &= + attribute UIdeo { boolean }? + + code-point-attributes &= + attribute EqUIdeo { single-code-point }? + + code-point-attributes &= + attribute IDSB { boolean }? + + code-point-attributes &= + attribute IDST { boolean }? + + code-point-attributes &= + attribute IDSU { boolean }? + + code-point-attributes &= + attribute Radical { boolean }? + + code-point-attributes &= + attribute Dep { boolean }? + + code-point-attributes &= + attribute VS { boolean }? + + code-point-attributes &= + attribute NChar { boolean }? + + code-point-attributes &= attribute kAccountingNumeric + { xsd:string { pattern="[0-9]+" } }? + + code-point-attributes &= attribute kAlternateTotalStrokes + { list { xsd:string { pattern="(\d+:[BHJKMPSUV]+)|-" }+ } }? + + code-point-attributes &= attribute kBigFive + { xsd:string { pattern="[0-9A-F]{4}'?" } }? + + code-point-attributes &= attribute kCangjie + { xsd:string { pattern="[A-Z]+" } }? + + code-point-attributes &= attribute kCantonese + { list { xsd:string { pattern="[a-z]{1,6}[1-6]" }+ } }? + + code-point-attributes &= attribute kCCCII + { list { xsd:string { pattern="[0-9A-F]{6}" }+ } }? + + code-point-attributes &= attribute kCheungBauer + { list { xsd:string { pattern="[0-9]{3}/[0-9]{2};[A-Z]*;[a-z1-6\[\]/,]+" }+ } }? + + code-point-attributes &= attribute kCheungBauerIndex + { list { xsd:string { pattern="[0-9]{3}\.[01][0-9]" }+ } }? + + code-point-attributes &= attribute kCihaiT + { list { xsd:string { pattern="[1-9][0-9]{0,3}\.[0-9]{3}" }+ } }? + + code-point-attributes &= attribute kCNS1986 + { xsd:string { pattern="[12E]-[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCNS1992 + { xsd:string { pattern="[1-9]-[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCompatibilityVariant + { "" | xsd:string { pattern="U\+[23]?[0-9A-F]{4}" } }? + + code-point-attributes &= attribute kCowles + { list { xsd:string { pattern="[0-9]{1,4}(\.[0-9]{1,2})?" }+ } }? + + code-point-attributes &= attribute kDaeJaweon + { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" } }? + + code-point-attributes &= attribute kDefinition + { xsd:string { pattern='[^\t"]+' } }? + + code-point-attributes &= attribute kEACC + { xsd:string { pattern="[0-9A-F]{6}" } }? + + code-point-attributes &= attribute kFanqie + { list { xsd:string { pattern="[\x{3400}-\x{4DBF}\x{4E00}-\x{9FFF}\x{20000}-\x{2A6DF}]{2}" }+ } }? + + code-point-attributes &= attribute kFenn + { list { xsd:string { pattern="[0-9]+a?[A-KP*]" }+ } }? + + code-point-attributes &= attribute kFennIndex + { list { xsd:string { pattern="[0-9][0-9]{0,2}\.[01][0-9]" }+ } }? + + code-point-attributes &= attribute kFourCornerCode + { list { xsd:string { pattern="[0-9]{4}(\.[0-9])?" }+ } }? + + code-point-attributes &= attribute kGB0 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB1 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB3 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB5 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB7 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGB8 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kGradeLevel + { xsd:string { pattern="[1-6]" } }? + + code-point-attributes &= attribute kGSR + { list { xsd:string { pattern="[0-9]{4}[a-vx-z]'?" }+ } }? + + code-point-attributes &= attribute kHangul + { list { xsd:string { pattern="[\x{1100}-\x{1112}][\x{1161}-\x{1175}][\x{11A8}-\x{11C2}]?:[01ENX]{1,3}" }+ } }? + + code-point-attributes &= attribute kHanYu + { list { xsd:string { pattern="[1-8][0-9]{4}\.[0-3][0-9][0-3]" }+ } }? + + code-point-attributes &= attribute kHanyuPinlu + { list { xsd:string { pattern="[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+\([0-9]+\)" }+ } }? + + code-point-attributes &= attribute kHanyuPinyin + { list { xsd:string { pattern="(\d{5}\.\d{2}0,)*\d{5}\.\d{2}0:([a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+,)*[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kHDZRadBreak + { xsd:string { pattern="[\x{2F00}-\x{2FD5}]\[U\+2F[0-9A-D][0-9A-F]\]:[1-8][0-9]{4}\.[0-3][0-9]0" } }? + + code-point-attributes &= attribute kHKGlyph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kIBMJapan + { list { xsd:string { pattern="F[ABC][0-9A-F]{2}" }+ } }? + + code-point-attributes &= attribute kIICore + { list { xsd:string { pattern="[ABC][GHJKMPT]{1,7}" }+ } }? + + code-point-attributes &= attribute kIRG_GSource + { "" | xsd:string { pattern="G[013578EKS]-[0-9A-F]{4}" } + | xsd:string { pattern="G4K(-\d{5})?" } + | xsd:string { pattern="G(DZ|GH|RM|WZ|XC|XH|ZH)-\d{4}\.\d{2}" } + | xsd:string { pattern="G(BK|CH|CY|HC)(-\d{4}\.\d{2})?" } + | xsd:string { pattern="GKX-\d{4}\.\d{2,3}" } + | xsd:string { pattern="G(HZ|HZR)-\d{5}\.\d{2}" } + | xsd:string { pattern="G(CE|FC|IDC23|OCD|XHZ)-\d{3}" } + | xsd:string { pattern="G(H|HF|LGYJ|PGLG|T)-\d{4}" } + | xsd:string { pattern="G(CYY|DM|JZ|KJ|XM|ZFY|ZJW|ZYS)-\d{5}" } + | xsd:string { pattern="G(FZ|IDC)-[0-9A-F]{4}" } + | xsd:string { pattern="GGFZ-\d{6}" } + | xsd:string { pattern="G(LK|Z)-\d{7}" } + | xsd:string { pattern="GU-[023][0-9A-F]{4}" } + | xsd:string { pattern="GZA-[123467]\d{5}" } + }? + + code-point-attributes &= attribute kIRG_HSource + { "" | xsd:string { pattern="H-[0-9A-F]{4}" } + | xsd:string { pattern="H(B[012])-[0-9A-F]{4}" } + | xsd:string { pattern="HD-[23]?[0-9A-F]{4}" } + | xsd:string { pattern="HU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_JSource + { "" | xsd:string { pattern="J[014]-[0-9A-F]{4}" } + | xsd:string { pattern="J3A?-[0-9A-F]{4}" } + | xsd:string { pattern="J13A?-[0-9A-F]{4}" } + | xsd:string { pattern="J14-[0-9A-F]{4}" } + | xsd:string { pattern="JA[34]?-[0-9A-F]{4}" } + | xsd:string { pattern="JARIB-[0-9A-F]{4}" } + | xsd:string { pattern="JH-(JT[ABC][0-9A-F]{3}S?|IB\d{4}|\d{6})" } + | xsd:string { pattern="JK-\d{5}" } + | xsd:string { pattern="JMJ-\d{6}" } + }? + + code-point-attributes &= attribute kIRG_KPSource + { "" | xsd:string { pattern="KP([01]-[0-9A-F]{4}|U-[023][0-9A-F]{4})" } }? + + code-point-attributes &= attribute kIRG_KSource + { "" | xsd:string { pattern="K[0-6]-[0-9A-F]{4}" } + | xsd:string { pattern="KC-\d{5}" } + | xsd:string { pattern="KU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_MSource + { "" | xsd:string { pattern="MA-[0-9A-F]{4}" } + | xsd:string { pattern="MB[12]-[0-9A-F]{4}" } + | xsd:string { pattern="MC-\d{5}" } + | xsd:string { pattern="MDH?-[23]?[0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_SSource + { "" | xsd:string { pattern="SAT-\d{5}" } }? + + code-point-attributes &= attribute kIRG_TSource + { "" | xsd:string { pattern="T([1-7A-F]|1[1-3])-[0-9A-F]{4}" } + | xsd:string { pattern="TU-[023][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRG_UKSource + { "" | xsd:string { pattern="UK-\d{5}" } }? + + code-point-attributes &= attribute kIRG_USource + { "" | xsd:string { pattern="UTC-\d{5}" } }? + + code-point-attributes &= attribute kIRG_VSource + { "" | xsd:string { pattern="V[0-4]-[0-9A-F]{4}" } + | xsd:string { pattern="VN-[023F][0-9A-F]{4}" } + }? + + code-point-attributes &= attribute kIRGDaeJaweon + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" }+ } }? + + code-point-attributes &= attribute kIRGHanyuDaZidian + { list { xsd:string { pattern="[1-8][0-9]{4}\.[0-3][0-9][01]" }+ } }? + + code-point-attributes &= attribute kIRGKangXi + { list { xsd:string { pattern="[01][0-9]{3}\.[0-7][0-9][01]" }+ } }? + + code-point-attributes &= attribute kJa + { list { xsd:string { pattern="[0-9A-F]{4}S?" }+ } }? + + code-point-attributes &= attribute kJapanese + { list { xsd:string { pattern="[\x{3041}-\x{3096}\x{3099}\x{309A}\x{30A1}-\x{30FA}\x{30FC}]+" }+ } }? + + code-point-attributes &= attribute kJapaneseKun + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kJapaneseOn + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kJinmeiyoKanji + { list { xsd:string { pattern="(20[0-9]{2})(:U\+[23]?[0-9A-F]{4})?" }+ } }? + + code-point-attributes &= attribute kJis0 + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kJis1 + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kJIS0213 + { list { xsd:string { pattern="[12],[0-9]{2},[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kJoyoKanji + { list { xsd:string { pattern="(20[0-9]{2})|(U\+[23]?[0-9A-F]{4})" }+ } }? + + code-point-attributes &= attribute kKangXi + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{2}[01]" }+ } }? + + code-point-attributes &= attribute kKarlgren + { list { xsd:string { pattern="[1-9][0-9]{0,3}[A*]?" }+ } }? + + code-point-attributes &= attribute kKorean + { list { xsd:string { pattern="[A-Z]+" }+ } }? + + code-point-attributes &= attribute kKoreanEducationHanja + { list { xsd:string { pattern="20[0-9]{2}" }+ } }? + + code-point-attributes &= attribute kKoreanName + { list { xsd:string { pattern="20[0-9]{2}" }+ } }? + + code-point-attributes &= attribute kLau + { list { xsd:string { pattern="[1-9][0-9]{0,3}" }+ } }? + + code-point-attributes &= attribute kMainlandTelegraph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kMandarin + { list { xsd:string { pattern="[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kMatthews + { list { xsd:string { pattern="[1-9][0-9]{0,3}(a|\.5)?" }+ } }? + + code-point-attributes &= attribute kMeyerWempe + { list { xsd:string { pattern="[1-9][0-9]{0,3}[a-t*]?" }+ } }? + + code-point-attributes &= attribute kMojiJoho + { list { xsd:string { pattern="MJ\d{6}(:(FE0[01]|E01[01][0-9A-F]))?" }+ } }? + + code-point-attributes &= attribute kMorohashi + { list { xsd:string { pattern="(\d{5}'{0,2}|H\d{3})(:(FE0[01]|E010[0-9A-F]))?" }+ } }? + + code-point-attributes &= attribute kNelson + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kOtherNumeric + { list { xsd:string { pattern="[0-9]+" }+ } }? + + code-point-attributes &= attribute kPhonetic + { list { xsd:string { pattern="[1-9][0-9]{0,3}[A-D]?\*?" }+ } }? + + code-point-attributes &= attribute kPrimaryNumeric + { list { xsd:string { pattern="[0-9]+" }+ } }? + + code-point-attributes &= attribute kPseudoGB1 + { xsd:string { pattern="[0-9]{4}" } }? + + code-point-attributes &= attribute kRSAdobe_Japan1_6 + { list { xsd:string { pattern="[CV]\+[0-9]{1,5}\+[1-9][0-9]{0,2}\.[1-9][0-9]?\.[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kRSUnicode + { list { xsd:string { pattern="[1-9][0-9]{0,2}'{0,3}\.-?[0-9]{1,2}" }+ } }? + + code-point-attributes &= attribute kSBGY + { list { xsd:string { pattern="[0-9]{3}\.[0-7][0-9]" }+ } }? + + code-point-attributes &= attribute kSemanticVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZFJ]+)?(,[ks][A-Za-z0-9_]+(:[TBZFJ]+)?)*)?" }+ } }? + + code-point-attributes &= attribute kSimplifiedVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kSMSZD2003Index + { list { xsd:string { pattern="\d{1,3}\.\d{2}" }+ } }? + + code-point-attributes &= attribute kSMSZD2003Readings + { list { xsd:string { pattern="[a-z\x{300}\x{301}\x{302}\x{304}\x{308}\x{30C}]+(,[a-z\x{300}\x{301}\x{302}\x{304}\x{308}\x{30C}]+)*\x{7CB5}[a-z]+[1-6]([a-z]+[1-6])?(,[a-z]+[1-6]([a-z]+[1-6])?)*" }+ } }? + + code-point-attributes &= attribute kSpecializedSemanticVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZFJ]+)?(,[ks][A-Za-z0-9_]+(:[TBZFJ]+)?)*)?" }+ } }? + + code-point-attributes &= attribute kSpoofingVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kStrange + { list { ( xsd:string { pattern="[ACU]" } + | xsd:string { pattern="B:U\+31[0-2AB][0-9A-F]" } + | xsd:string { pattern="[FMOR](:U\+[23]?[0-9A-F]{4})?" } + | xsd:string { pattern="H:U\+31[3-8][0-9A-F]" } + | xsd:string { pattern="I(:U\+[23]?[0-9A-F]{4})*" } + | xsd:string { pattern="K(:U\+30[A-F][0-9A-F])+" } + | xsd:string { pattern="S:[4-9][0-9]" } + )+}}? + + code-point-attributes &= attribute kTaiwanTelegraph + { list { xsd:string { pattern="[0-9]{4}" }+ } }? + + code-point-attributes &= attribute kTang + { list { xsd:string { pattern="\*?[A-Za-z()\x{E6}\x{251}\x{259}\x{25B}\x{300}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kTGH + { list { xsd:string { pattern="20[0-9]{2}:[1-9][0-9]{0,3}" }+ } }? + + code-point-attributes &= attribute kTGHZ2013 + { list { xsd:string { pattern="[0-9]{3}\.[0-9]{3}(,[0-9]{3}\.[0-9]{3})*:[a-z\x{300}-\x{302}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kTotalStrokes + { list { xsd:string { pattern="[1-9][0-9]{0,2}" }+ } }? + + code-point-attributes &= attribute kTraditionalVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}" }+ } }? + + code-point-attributes &= attribute kUnihanCore2020 + { xsd:string { pattern="[GHJKMPT]{1,7}" } }? + + code-point-attributes &= attribute kVietnamese + { list { xsd:string { pattern="[A-Za-z\x{110}\x{111}\x{300}-\x{303}\x{306}\x{309}\x{31B}\x{323}]+" }+ } }? + + code-point-attributes &= attribute kVietnameseNumeric + { list { xsd:string { pattern="\d+" }+ } }? + + code-point-attributes &= attribute kXerox + { list { xsd:string { pattern="[0-9]{3}:[0-9]{3}" }+ } }? + + code-point-attributes &= attribute kXHC1983 + { list { xsd:string { pattern="[0-9]{4}\.[0-9]{3}\*?(,[0-9]{4}\.[0-9]{3}\*?)*:[a-z\x{300}\x{301}\x{304}\x{308}\x{30C}]+" }+ } }? + + code-point-attributes &= attribute kZhuang + { list { xsd:string { pattern="[a-z]+\*?" }+ } }? + + code-point-attributes &= attribute kZhuangNumeric + { list { xsd:string { pattern="\d+" }+ } }? + + code-point-attributes &= attribute kZVariant + { list { xsd:string { pattern="U\+[23]?[0-9A-F]{4}(<[ks][A-Za-z0-9_]+(:[TBZ]+)?(,[ks][A-Za-z0-9_]+(:[TBZ]+)?)*)?" }+ } }? + + + code-point-attributes &= + attribute kRSTUnicode { xsd:string { pattern="[0-9]+\.[0-9]+" } }? + + code-point-attributes &= + attribute kTGT_MergedSrc + { xsd:string {pattern="L2008-[0-9A-F]{4,5}(-[0-9]{4,5})?"} + | xsd:string {pattern="L2006-[0-9]{4}"} + | xsd:string {pattern="L1997-[0-9]{4}"} + | xsd:string {pattern="L1986-[0-9]{4}"} + | xsd:string {pattern="S1968-[0-9]{4}"} + | xsd:string {pattern="N1966-[0-9]{3}(-[0-9A-Z]{3,4})?"} + | xsd:string {pattern="H2004-[A-Z]-[0-9]{4}"} + | xsd:string {pattern="L2012-[0-9]{4}"} + | xsd:string {pattern="UTN42-[0-9]{3}"} + }? + + + code-point-attributes &= + attribute kSrc_NushuDuben { xsd:string { pattern="[0-9]+\.[0-9]+" } }? + + code-point-attributes &= + attribute kReading { xsd:string }? + + + ucd.content &= + element blocks { + element block { + attribute first-cp { single-code-point }, + attribute last-cp { single-code-point }, + attribute name { text } }+ }? + + + ucd.content &= + element named-sequences { + element named-sequence { + attribute cps { one-or-more-code-points }, + attribute name { text } }+ }? + + ucd.content &= + element provisional-named-sequences { + element named-sequence { + attribute cps { one-or-more-code-points }, + attribute name { text } }+ }? + + + ucd.content &= + element normalization-corrections { + element normalization-correction { + attribute cp { single-code-point }, + attribute old { one-or-more-code-points }, + attribute new { one-or-more-code-points }, + attribute version { text } }+ }? + + + ucd.content &= + element standardized-variants { + element standardized-variant { + attribute cps { two-code-points }, + attribute desc { text }, + attribute when { text } }+ }? + + + ucd.content &= + element cjk-radicals { + element cjk-radical { + attribute number { xsd:string {pattern="[0-9]{1,3}'{0,3}"}}, + attribute radical { single-code-point? }, + attribute ideograph { single-code-point } }+ }? + + + ucd.content &= + element emoji-sources { + element emoji-source { + attribute unicode { one-or-more-code-points }, + attribute docomo { jis-code-point? }, + attribute kddi { jis-code-point? }, + attribute softbank { jis-code-point? } }+ }? + + + code-point-attributes &= + attribute Emoji { boolean }? + + code-point-attributes &= + attribute EPres { boolean }? + + code-point-attributes &= + attribute EMod { boolean }? + + code-point-attributes &= + attribute EBase { boolean }? + + code-point-attributes &= + attribute EComp { boolean }? + + code-point-attributes &= + attribute ExtPict { boolean }? + + + ucd.content &= + element do-not-emit { + element instead { + attribute of { one-or-more-code-points }, + attribute use { one-or-more-code-points }, + attribute because { "Bengali_Khanda_Ta" + | "Deprecated" + | "Discouraged" + | "Dotless_Form" + | "Hamza_Form" + | "Indic_Atomic_Consonant" + | "Indic_Consonant_Conjunct" + | "Indic_Vowel_Letter" + | "Malayalam_Chillu" + | "Precomposed_Form" + | "Precomposed_Hieroglyph" + | "Preferred_Spelling" + | "Tamil_Shrii" + } }+ }? + diff --git a/uax/uax42/pom.xml b/uax/uax42/pom.xml new file mode 100644 index 000000000..c18d2f3d0 --- /dev/null +++ b/uax/uax42/pom.xml @@ -0,0 +1,72 @@ + + + + 4.0.0 + + uax42 + Unicode Standard Annex #42 + + + + org.unicode.unicodetools + unicodetools-parent + 1.0.0 + + + + + + org.codehaus.mojo + xml-maven-plugin + 1.1.0 + + + + transform + + + + + + + ${project.basedir} + true + + index.xml + + index2html.xsl + ${project.basedir}/output/ + + + .html + + + + + ${project.basedir} + true + + index.xml + + index2rnc.xsl + ${project.basedir}/output/ + + + .rnc + + + + + + + + net.sf.saxon + Saxon-HE + 12.4 + + + + + + + diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index 8fc02a328..87db09582 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -116,7 +116,7 @@ public AttributeResolver(IndexUnicodeProperties iup) { map_NameAlias = loadNameAliases(); } - private enum AliasType { + protected enum AliasType { ABBREVIATION("abbreviation"), ALTERNATE("alternate"), CONTROL("control"), diff --git a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java new file mode 100644 index 000000000..ee2adb0e8 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java @@ -0,0 +1,1358 @@ +package org.unicode.xml; + +import com.ibm.icu.dev.tool.UOption; +import com.ibm.icu.util.VersionInfo; + +import java.io.*; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.unicode.props.PropertyParsingInfo; +import org.unicode.props.UcdPropertyValues.*; +import org.unicode.props.UcdProperty; + +public class GeneratePropertyValues { + + private enum VALUESOUTPUTTYPE { + VALUE_PER_LINE, + ALPHABETICAL_GROUP, + NUMERICAL_GROUP, + MAX_LINE_LENGTH; + } + + private enum SCHEMA { + // Manual indicates a fragment file that is maintained manually rather than generated from this utility. + //Manual + NAMESPACE("namespace"), + //Manual + DATATYPES("datatypes"), + //Manual + START("start"), + BOOLEAN("boolean"), + //Manual + DESCRIPTION("description"), + //Manual + REPERTOIRE("repertoire"), + PROPERTIES("properties"), + TANGUT("tangut"), + NUSHU("nushu"), + EMOJI_DATA("emoji-data"), + //Manual + BLOCK("block"), + //Manual + NAMED_SEQUENCES("named-sequences"), + //Manual + NORMALIZATION_CORRECTIONS("normalization-corrections"), + //Manual + STANDARDIZED_VARIANTS("standardized-variants"), + //Manual + CJK_RADICALS("cjk-radicals"), + //Manual + EMOJI_SOURCES("emoji-sources"), + DO_NOT_EMIT("do-not-emit"); + + final String name; + + SCHEMA(String name) { + this.name = name; + } + + String getName() { + return this.name; + } + } + + private static final class TR38Details { + boolean isList; + String syntax; + + public TR38Details(boolean isList, String syntax) { + this.isList = isList; + this.syntax = syntax; + } + + public boolean isList() { + return isList; + } + + public String getSyntax() { + return syntax; + } + } + + private static final int MAX_LINE_LENGTH = 70; + private static final String NEWLINE = System.lineSeparator(); + private static final String DOUBLELINE = System.lineSeparator() + System.lineSeparator(); + private static final String TRIPLELINE = System.lineSeparator() + System.lineSeparator() + System.lineSeparator(); + private static File destinationFolder = null; + + private static HashMap syntaxTR38; + private static final String NAMESPACE = "http://unicode.org/ns/2001/ucdxml"; + private static final String TR38URL = "https://www.unicode.org/reports/tr38"; + private static final UOption[] options = { + UOption.HELP_H(), + UOption.create("ucdversion", 'v', UOption.REQUIRES_ARG), + UOption.create("outputfolder", 'f', UOption.REQUIRES_ARG) + }; + + private static final int HELP = 0, UCDVERSION = 1, OUTPUTFOLDER = 2; + + public static void main(String[] args) throws Exception { + + VersionInfo ucdVersion = null; + + UOption.parseArgs(args, options); + + if (options[HELP].doesOccur) { + System.out.println( + "GeneratePropertyValuesList --ucdversion {version number} --outputfolder {destination}"); + System.exit(0); + } + + try { + if (options[UCDVERSION].doesOccur) { + try { + ucdVersion = VersionInfo.getInstance(options[UCDVERSION].value); + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not convert " + + options[UCDVERSION].value + + " to a valid UCD version"); + } + } else { + throw new IllegalArgumentException( + "Missing command line option: --ucdversion (or -v)"); + } + if (options[OUTPUTFOLDER].doesOccur) { + try { + destinationFolder = new File(options[OUTPUTFOLDER].value); + if (!destinationFolder.exists()) { + if (!destinationFolder.mkdir()) { + throw new IOException(); + } + } + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not find or create " + options[OUTPUTFOLDER].value); + } + } else { + throw new IllegalArgumentException( + "Missing command line option: --outputfolder (or -f)"); + } + + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + if (ucdVersion != null && destinationFolder.exists()) { + buildPropertyValues(ucdVersion); + System.out.println("End"); + System.exit(0); + } else { + System.err.println("Unexpected error when building UcdXML file."); + System.exit(1); + } + } + + private static void buildPropertyValues( + //It would be nice to be able to generate values by ucdVersion. Leaving this here for now... + VersionInfo ucdVersion) + throws IOException, URISyntaxException { + syntaxTR38 = parseTR38(); + + createPropertyFragment(SCHEMA.BOOLEAN, getFormattedValues(SCHEMA.BOOLEAN, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment(UcdProperty.Age, SCHEMA.PROPERTIES, getFormattedAttribute(UcdProperty.Age, + VALUESOUTPUTTYPE.NUMERICAL_GROUP)); + createPropertyFragment(UcdProperty.Name, SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Name)); + createPropertyFragment(UcdProperty.Unicode_1_Name, SCHEMA.PROPERTIES, + getFormattedSyntax(UcdProperty.Unicode_1_Name)); + createPropertyFragment(UcdProperty.Name_Alias.getShortName() + ".xml", "name-alias element", SCHEMA.PROPERTIES, + getFormattedElement(UcdProperty.Name_Alias)); + createPropertyFragment(UcdProperty.Block, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Block, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment(UcdProperty.General_Category, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.General_Category, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); + createPropertyFragment(UcdProperty.Canonical_Combining_Class, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Canonical_Combining_Class, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment(UcdProperty.Bidi_Class, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Bidi_Class, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); + createPropertyFragment(UcdProperty.Bidi_Mirrored, SCHEMA.PROPERTIES, + getFormattedBoolean(UcdProperty.Bidi_Mirrored)); + createPropertyFragment(UcdProperty.Bidi_Mirroring_Glyph, SCHEMA.PROPERTIES, + getFormattedSyntax(UcdProperty.Bidi_Mirroring_Glyph)); + createPropertyFragment(UcdProperty.Bidi_Control, SCHEMA.PROPERTIES, + getFormattedBoolean(UcdProperty.Bidi_Control)); + createPropertyFragment(UcdProperty.Bidi_Paired_Bracket_Type, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Bidi_Paired_Bracket_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment(UcdProperty.Bidi_Paired_Bracket, SCHEMA.PROPERTIES, + getFormattedSyntax(UcdProperty.Bidi_Paired_Bracket)); + createPropertyFragment("decomposition.xml", "decomposition properties", SCHEMA.PROPERTIES, + getFormattedDecompositionProperties()); + createPropertyFragment("composition.xml", "composition properties", SCHEMA.PROPERTIES, + getFormattedCompositionProperties()); + createPropertyFragment("quickcheck.xml", "quick check properties", SCHEMA.PROPERTIES, + getFormattedQuickCheckProperties()); + createPropertyFragment("numeric.xml", "numeric properties", SCHEMA.PROPERTIES, + getFormattedNumericProperties()); + createPropertyFragment("joining.xml", "joining properties", SCHEMA.PROPERTIES, + getFormattedJoiningProperties()); + createPropertyFragment(UcdProperty.Join_Control.getShortName() + ".xml", "joining properties", + SCHEMA.PROPERTIES, getFormattedBoolean(UcdProperty.Join_Control)); + createPropertyFragment(UcdProperty.Line_Break, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Line_Break, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); + createPropertyFragment(UcdProperty.East_Asian_Width, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.East_Asian_Width, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment("casing.xml", "casing properties", SCHEMA.PROPERTIES, + getFormattedCasingProperties()); + createPropertyFragment("simple_case_mapping.xml", "casing properties", SCHEMA.PROPERTIES, + getFormattedSimpleCaseMappingProperties()); + createPropertyFragment("case_mapping.xml", "casing properties", SCHEMA.PROPERTIES, + getFormattedCaseMappingProperties()); + createPropertyFragment("case_folding.xml", "casing properties", SCHEMA.PROPERTIES, + getFormattedCaseFoldingProperties()); + createPropertyFragment("case_other.xml", "casing properties", SCHEMA.PROPERTIES, + getFormattedCaseOtherProperties()); + createPropertyFragment("script.xml", "script properties", SCHEMA.PROPERTIES, + getFormattedScriptProperties()); + createPropertyFragment(UcdProperty.ISO_Comment, SCHEMA.PROPERTIES, + getFormattedSyntax(UcdProperty.ISO_Comment)); + createPropertyFragment(UcdProperty.Hangul_Syllable_Type, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Hangul_Syllable_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment(UcdProperty.Jamo_Short_Name, SCHEMA.PROPERTIES, + getFormattedSyntax(UcdProperty.Jamo_Short_Name)); + createPropertyFragment(UcdProperty.Indic_Syllabic_Category, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Indic_Syllabic_Category, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment(UcdProperty.Indic_Positional_Category, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Indic_Positional_Category, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment(UcdProperty.Indic_Conjunct_Break, SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Indic_Conjunct_Break, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment("identifier.xml", "identifier properties", SCHEMA.PROPERTIES, + getFormattedIdentifierProperties()); + createPropertyFragment("pattern.xml", "pattern properties", SCHEMA.PROPERTIES, + getFormattedPatternProperties()); + createPropertyFragment("function_graphic.xml", "properties related to function and graphic characteristics", + SCHEMA.PROPERTIES, getFormattedFunctionGraphicProperties()); + createPropertyFragment("boundaries.xml", "properties related to boundaries", + SCHEMA.PROPERTIES, getFormattedBoundaryProperties()); + createPropertyFragment("ideographs.xml", "properties related to ideographs", + SCHEMA.PROPERTIES, getFormattedIdeographProperties()); + createPropertyFragment("miscellaneous.xml", "miscellaneous properties", + SCHEMA.PROPERTIES, getFormattedMiscellaneousProperties()); + createPropertyFragment("Unihan.xml", "Unihan properties", + SCHEMA.PROPERTIES, getFormattedUnihanProperties()); + createPropertyFragment("Tangut.xml", "Tangut data", + SCHEMA.TANGUT, getFormattedTangutProperties()); + createPropertyFragment("Nushu.xml", "Nushu data", + SCHEMA.NUSHU, getFormattedNushuProperties()); + createPropertyFragment("Emoji.xml", "Emoji properties", + SCHEMA.EMOJI_DATA, getFormattedEmojiProperties()); + createPropertyFragment("do-not-emit.xml", "do-not-emit", + SCHEMA.DO_NOT_EMIT, getFormattedDoNotEmit(VALUESOUTPUTTYPE.VALUE_PER_LINE)); + } + + private static void createPropertyFragment(SCHEMA schema, String formattedFragment) throws IOException { + createPropertyFragment(schema.getName() + ".xml", schema.getName(), schema, formattedFragment); + } + + private static void createPropertyFragment(UcdProperty ucdProperty, SCHEMA schema, String formattedFragment) + throws IOException { + createPropertyFragment(ucdProperty.getShortName() + ".xml", ucdProperty.getShortName() + " attribute", + schema, formattedFragment); + } + + private static void createPropertyFragment(String filename, String title, SCHEMA schema, + String formattedFragment) throws IOException { + BufferedWriter writer = getFragmentWriter(schema.getName(), filename); + writer.write("" + NEWLINE + + "" + NEWLINE); + writer.write(formattedFragment); + writer.write(NEWLINE + ""); + writer.flush(); + writer.close(); + } + + private static BufferedWriter getFragmentWriter(String schema, String filename) + throws IOException { + File fragmentFolder = new File(destinationFolder + File.separator + schema + File.separator); + if (!fragmentFolder.exists()) { + if (!fragmentFolder.mkdir()) { + throw new IOException(); + } + } + File outputFile = new File(fragmentFolder, filename); + FileOutputStream fileOutputStream = new FileOutputStream(outputFile); + OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); + return new BufferedWriter(outputStreamWriter); + } + + private static String getFormattedAttribute(UcdProperty ucdProperty, VALUESOUTPUTTYPE valuesoutputtype) { + String attributeString = " attribute " + ucdProperty.getShortName() + " "; + List values; + StringBuilder stringBuilder = new StringBuilder(); + + switch (ucdProperty) { + case Age: + values = getAgeValues(); + break; + case Block: + values = getBlockValues(); + break; + case General_Category: + values = getGeneralCategoryValues(); + break; + case Canonical_Combining_Class: + values = getCanonicalCombiningClassValues(); + break; + case Bidi_Class: + values = getBidirectionalValues(); + break; + case Bidi_Paired_Bracket_Type: + values = getBidiPairedBracketTypeValues(); + break; + case Decomposition_Type: + values = getDecompositionTypeValues(); + break; + case NFC_Quick_Check: + values = getNFCQuickCheckValues(); + break; + case NFD_Quick_Check: + values = getNFDQuickCheckValues(); + break; + case NFKC_Quick_Check: + values = getNFKCQuickCheckValues(); + break; + case NFKD_Quick_Check: + values = getNFKDQuickCheckValues(); + break; + case Numeric_Type: + values = getNumericTypeValues(); + break; + case Joining_Type: + values = getJoiningTypeValues(); + break; + case Joining_Group: + values = getJoiningGroupValues(); + break; + case Line_Break: + values = getLineBreakValues(); + break; + case East_Asian_Width: + values = getEastAsianWidthValues(); + break; + case Hangul_Syllable_Type: + values = getHangulSyllableTypeValues(); + break; + case Indic_Syllabic_Category: + values = getIndicSyllabicCategoryValues(); + break; + case Indic_Positional_Category: + values = getIndicPositionalCategoryValues(); + break; + case Indic_Conjunct_Break: + values = getIndicConjunctBreakValues(); + break; + case Vertical_Orientation: + values = getVerticalOrientationValues(); + break; + case Grapheme_Cluster_Break: + values = getGraphemeClusterBreakValues(); + break; + case Word_Break: + values = getWordBreakValues(); + break; + case Sentence_Break: + values = getSentenceBreakValues(); + break; + case Do_Not_Emit_Type: + values = getDoNotEmitTypeValues(); + break; + + default: + throw new IllegalStateException(ucdProperty.getShortName() + " is not handled by " + + "getFormattedAttribute."); + } + String formattedValues = formatValues(attributeString.length(), values, valuesoutputtype); + stringBuilder.append(" code-point-attributes &=") + .append(NEWLINE) + .append(attributeString) + .append("{ "); + if (formattedValues.contains(NEWLINE)) { + stringBuilder.append(formattedValues).append(NEWLINE); + stringBuilder.append(String.format("%" + (attributeString.length() + "}?".length()) + "s", "}?")); + } else { + stringBuilder.append(formattedValues).append(" }?"); + } + return stringBuilder.toString(); + } + + private static String getFormattedSyntax(UcdProperty ucdProperty) { + final PropertyParsingInfo propInfo = PropertyParsingInfo.getPropertyInfo(ucdProperty); + if (propInfo.getRegex() == null) { + throw new NullPointerException("Could not find syntax for " + ucdProperty.getShortName()); + } + + String attributeString = ucdProperty.getShortName().startsWith("cjk") ? + " attribute " + ucdProperty.getShortName().substring(2) + " " : + " attribute " + ucdProperty.getShortName() + " "; + String formattedAttributeString; + switch (ucdProperty) { + //{ text } + case ISO_Comment: + formattedAttributeString = attributeString + "{ text }?"; + break; + + //{ single-code-point } + case Equivalent_Unified_Ideograph: + formattedAttributeString = attributeString + "{ single-code-point }?"; + break; + + //{ "" | single-code-point } + case Bidi_Mirroring_Glyph: + formattedAttributeString = attributeString + "{ \"\" | single-code-point }?"; + break; + + //{ "#" | single-code-point } + case Bidi_Paired_Bracket: + case Simple_Uppercase_Mapping: + case Simple_Lowercase_Mapping: + case Simple_Titlecase_Mapping: + case Simple_Case_Folding: + formattedAttributeString = attributeString + "{ \"#\" | single-code-point }?"; + break; + + //{ "#" | zero-or-more-code-points } + case Decomposition_Mapping: + case NFKC_Casefold: + case NFKC_Simple_Casefold: + formattedAttributeString = attributeString + "{ \"#\" | zero-or-more-code-points }?"; + break; + + //{ "#" | one-or-more-code-points } + case FC_NFKC_Closure: + case Uppercase_Mapping: + case Lowercase_Mapping: + case Titlecase_Mapping: + case Case_Folding: + formattedAttributeString = attributeString + "{ \"#\" | one-or-more-code-points }?"; + break; + + //{ "NaN" | RegEx } + case Numeric_Value: + formattedAttributeString = + attributeString + "{ \"NaN\" | xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + "\" } }?"; + break; + + //Special cases + case Name: + formattedAttributeString = attributeString + "{ \"\" |" + NEWLINE + + " \"CJK UNIFIED IDEOGRAPH-#\" |" + NEWLINE + + " \"CJK COMPATIBILITY IDEOGRAPH-#\" |" + NEWLINE + + " \"EGYPTIAN HIEROGLYPH-#\" |" + NEWLINE + + " \"TANGUT IDEOGRAPH-#\" |" + NEWLINE + + " \"KHITAN SMALL SCRIPT CHARACTER-#\" |" + NEWLINE + + " \"NUSHU CHARACTER-#\" |" + NEWLINE + + " xsd:string { pattern=\"" + cleanRegex(propInfo.getRegex().toString()) + + "\" }" + NEWLINE + + " }?"; + break; + case Unicode_1_Name: + formattedAttributeString = + attributeString + "{ \"\" | xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + "\" } }?"; + break; + case Script: + formattedAttributeString = attributeString + "{ script }?"; + break; + case Script_Extensions: + formattedAttributeString = attributeString + "{ list { script + } }?"; + break; + case kTGT_MergedSrc: + //Ideally, should be obtained from a TR. + String kTGT_MergedSrc = NEWLINE + + " { xsd:string {pattern=\"L2008-[0-9A-F]{4,5}(-[0-9]{4,5})?\"}" + NEWLINE + + " | xsd:string {pattern=\"L2006-[0-9]{4}\"}" + NEWLINE + + " | xsd:string {pattern=\"L1997-[0-9]{4}\"}" + NEWLINE + + " | xsd:string {pattern=\"L1986-[0-9]{4}\"}" + NEWLINE + + " | xsd:string {pattern=\"S1968-[0-9]{4}\"}" + NEWLINE + + " | xsd:string {pattern=\"N1966-[0-9]{3}(-[0-9A-Z]{3,4})?\"}" + NEWLINE + + " | xsd:string {pattern=\"H2004-[A-Z]-[0-9]{4}\"}" + NEWLINE + + " | xsd:string {pattern=\"L2012-[0-9]{4}\"}" + NEWLINE + + " | xsd:string {pattern=\"UTN42-[0-9]{3}\"}" + NEWLINE + + " }?"; + formattedAttributeString = attributeString + kTGT_MergedSrc; + break; + case kReading: + //Ideally, should be obtained from a TR. + String kReading = "{ xsd:string }?"; + formattedAttributeString = attributeString + kReading; + break; + + + default: + formattedAttributeString = attributeString + "{ xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + "\" } }?"; + } + return " code-point-attributes &=" + NEWLINE + formattedAttributeString; + } + + private static String getFormattedTR38Syntax(UcdProperty ucdProperty) { + //TODO: We should determine whether we still want to show empty values in the XML files. + //TODO: See org.unicode.xml.UcdPropertyDetail.isCJKShowIfEmpty() + boolean isShowIfEmpty = false; + for (UcdPropertyDetail propDetail : UcdPropertyDetail.cjkValues()) { + if (propDetail.getUcdProperty().equals(ucdProperty)) { + isShowIfEmpty = propDetail.isCJKShowIfEmpty(); + } + } + + String attributeString = " attribute " + ucdProperty.getShortName().substring(2); + TR38Details tr38Details = syntaxTR38.get(ucdProperty.name()); + if (tr38Details == null) { + throw new NullPointerException("Could not locate details for " + ucdProperty.name() + + " in " + TR38URL); + } + String formattedSyntax = formatTR38Syntax(tr38Details, isShowIfEmpty); + + return " code-point-attributes &=" + attributeString + NEWLINE + formattedSyntax; + } + + private static String getFormattedElement(UcdProperty ucdProperty) { + //Currently scoped to UcdProperty.Name_Alias, but might need to handle different properties. + String nameAliasElement = "name-alias"; + List values = getNameAliasTypeValues(); + PropertyParsingInfo propInfo = PropertyParsingInfo.getPropertyInfo(ucdProperty); + + String elementString = " element " + nameAliasElement + " {" + NEWLINE; + String attributeAliasString = + " attribute alias { xsd:string { pattern=\"" + cleanRegex(propInfo.getRegex().toString()) + + "\" } }?," + NEWLINE; + String attributeTypeString = " attribute type "; + + String formattedValues = formatValues(attributeTypeString.length(), values, + VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); + + return " code-point-attributes &=" + NEWLINE + elementString + attributeAliasString + + attributeTypeString + "{ " + formattedValues + NEWLINE + + String.format("%" + (attributeTypeString.length() + "}? } *".length()) + "s", + "}? } *"); + } + + private static String getFormattedBoolean(UcdProperty ucdProperty) { + String attributeString = " attribute " + ucdProperty.getShortName() + " "; + + return " code-point-attributes &=" + NEWLINE + attributeString + "{ boolean }?"; + } + + private static String getFormattedValues(SCHEMA schema, VALUESOUTPUTTYPE valuesoutputtype) { + List values = getBinaryValues(); + String formattedValues = formatValues(2, values, valuesoutputtype); + return " " + schema.getName() + " = " + formattedValues; + } + + private static String getFormattedPropertyValues(UcdProperty ucdProperty, VALUESOUTPUTTYPE valuesoutputtype) { + List values = getScriptValues(); + String formattedValues = formatValues(11, values, valuesoutputtype); + return " " + ucdProperty.name().toLowerCase() + " = " + formattedValues; + } + + private static String getFormattedDoNotEmit(VALUESOUTPUTTYPE valuesoutputtype) { + List values = getDoNotEmitTypeValues(); + String formattedValues = formatValues(26, values, valuesoutputtype); + return " ucd.content &=\n" + + " element do-not-emit {\n" + + " element instead {\n" + + " attribute of { one-or-more-code-points },\n" + + " attribute use { one-or-more-code-points },\n" + + " attribute because { " + formattedValues + NEWLINE + + " } }+ }?"; + } + + private static String formatTR38Syntax(TR38Details tr38Details, boolean isShowIfEmpty) { + //TODO: We should determine whether we still want to show empty values in the XML files. + //TODO: See org.unicode.xml.UcdPropertyDetail.isCJKShowIfEmpty() + boolean isList = tr38Details.isList(); + String syntax = cleanRegex(tr38Details.getSyntax()); + // This is a kludge as it depends on only having single OR double quotes in the syntax. If we have both, we'll + // need to do more investigation on what RELAXNG Compact supports. + String QUOTMARK = syntax.contains("\"") ? "'" : "\""; + + boolean hasNewlines = syntax.contains("\n"); + if (hasNewlines) { + int indent; + String firstLinePrefix; + String ending = isList ? " )+}}?" : " }?"; + if (isShowIfEmpty) { + indent = (isList ? 15 : 8); + firstLinePrefix = isList ? + " { \"\" | list { " : + " { \"\" | "; + } + else { + indent = (isList ? 12 : 4); + firstLinePrefix = isList ? + " { list { ( " : + " { "; + } + String padding = String.format("%" + indent + "s", ""); + StringBuilder formattedSyntaxBuilder = new StringBuilder(); + Pattern syntaxPattern = Pattern.compile("([^\r\n]+)"); + Matcher matcher = syntaxPattern.matcher(syntax); + while (matcher.find()) { + if (formattedSyntaxBuilder.length() == 0) { + //First line + formattedSyntaxBuilder + .append(firstLinePrefix) + .append("xsd:string { pattern=").append(QUOTMARK) + .append(matcher.group(1)) + .append(QUOTMARK).append(" }") + .append(NEWLINE); + } else { + //Everything else + formattedSyntaxBuilder + .append(padding) + .append(matcher.group(1).replaceAll("^[| ]*", " | xsd:string { pattern=" + QUOTMARK)) + .append(QUOTMARK).append(" }") + .append(NEWLINE); + + } + } + formattedSyntaxBuilder.append(ending); + return formattedSyntaxBuilder.toString(); + + } else { + if (isShowIfEmpty) { + if (isList) { + return " { \"\" | list { xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " }+ } }?"; + } + else { + return " { \"\" | xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " } }?"; + } + } + else { + if (isList) { + return " { list { xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " }+ } }?"; + } + else { + return " { xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " } }?"; + } + } + } + + } + + private static String formatValues(int indent, List values, VALUESOUTPUTTYPE valuesoutputtype) { + StringBuilder valueBlock = new StringBuilder(); + StringBuilder currentLine = new StringBuilder(); + String padding = String.format("%" + indent + "s", ""); + String groupPrefix = ""; + for (String value : values) { + StringBuilder formattedValue = new StringBuilder(); + if (valueBlock.length() > 0 || currentLine.length() > 0) { + formattedValue.append("| "); + } + if (value.startsWith("xsd")) { + formattedValue.append(value); + } else{ + formattedValue.append("\"").append(value).append("\""); + } + + switch (valuesoutputtype) { + case NUMERICAL_GROUP: + case ALPHABETICAL_GROUP: + String valuePrefix = getValuePrefix(value, valuesoutputtype); + if (groupPrefix.isEmpty()) { + currentLine.append(formattedValue); + groupPrefix = valuePrefix; + } else if (valuePrefix.equals(groupPrefix)) { + int testLength = valueBlock.length() == 0 ? + padding.length() + currentLine.length() + " ".length() : + currentLine.length() + " ".length(); + if ((testLength + formattedValue.length()) > MAX_LINE_LENGTH) { + valueBlock.append(currentLine).append(NEWLINE); + currentLine.setLength(0); + currentLine.append(padding).append(formattedValue); + } else { + if (currentLine.length() > 0) { + currentLine.append(" "); + } + currentLine.append(formattedValue); + } + } else { + valueBlock.append(currentLine).append(NEWLINE); + currentLine.setLength(0); + currentLine.append(padding).append(formattedValue); + groupPrefix = valuePrefix; + } + break; + + case MAX_LINE_LENGTH: + int testLength = valueBlock.length() == 0 ? + padding.length() + currentLine.length() + " ".length() : + currentLine.length() + " ".length(); + if ((testLength + formattedValue.length()) > MAX_LINE_LENGTH) { + valueBlock.append(currentLine).append(NEWLINE); + currentLine.setLength(0); + currentLine.append(padding).append(formattedValue); + } else { + if (currentLine.length() > 0) { + currentLine.append(" "); + } + currentLine.append(formattedValue); + } + break; + + case VALUE_PER_LINE: + default: + if (valueBlock.length() > 0) { + valueBlock.append(NEWLINE).append(padding).append("| "); + } + if (value.startsWith("xsd")) { + valueBlock.append(value); + } else{ + valueBlock.append("\"").append(value).append("\""); + } + } + } + valueBlock.append(currentLine); + return valueBlock.toString(); + } + + private static String getValuePrefix(String value, VALUESOUTPUTTYPE valuesoutputtype) { + if (valuesoutputtype == VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) { + return value.substring(0, 1); + } + if (valuesoutputtype == VALUESOUTPUTTYPE.NUMERICAL_GROUP) { + if (value.contains(".")) { + return value.substring(0, value.indexOf(".")); + } else { + // String value in list of numbers. See Age_Values for an example. + return value; + } + } else { + throw new IllegalArgumentException(); + } + } + + private static String cleanRegex(String regex) { + return regex.replaceAll("\\[-", "[\\\\-").replaceAll("\\\\/", "/").replaceAll("\\\\'", "'"); + } + + //********************* Combined properties ********************// + + private static String getFormattedDecompositionProperties() { + return getFormattedAttribute(UcdProperty.Decomposition_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Decomposition_Mapping); + } + + private static String getFormattedCompositionProperties() { + return getFormattedBoolean(UcdProperty.Composition_Exclusion) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Full_Composition_Exclusion); + } + + private static String getFormattedQuickCheckProperties() { + return getFormattedAttribute(UcdProperty.NFC_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedAttribute(UcdProperty.NFD_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedAttribute(UcdProperty.NFKC_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedAttribute(UcdProperty.NFKD_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + TRIPLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFC) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFD) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFKC) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFKD) + TRIPLELINE + + getFormattedSyntax(UcdProperty.FC_NFKC_Closure); + } + + private static String getFormattedNumericProperties() { + return getFormattedAttribute(UcdProperty.Numeric_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Numeric_Value); + } + + private static String getFormattedJoiningProperties() { + return getFormattedAttribute(UcdProperty.Joining_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedAttribute(UcdProperty.Joining_Group, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); + } + + private static String getFormattedCasingProperties() { + return getFormattedBoolean(UcdProperty.Uppercase) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Lowercase) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Uppercase) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Lowercase); + } + + private static String getFormattedSimpleCaseMappingProperties() { + return getFormattedSyntax(UcdProperty.Simple_Uppercase_Mapping) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Simple_Lowercase_Mapping) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Simple_Titlecase_Mapping); + } + + private static String getFormattedCaseMappingProperties() { + return getFormattedSyntax(UcdProperty.Uppercase_Mapping) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Lowercase_Mapping) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Titlecase_Mapping); + } + + private static String getFormattedCaseFoldingProperties() { + return getFormattedSyntax(UcdProperty.Simple_Case_Folding) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Case_Folding); + } + + private static String getFormattedCaseOtherProperties() { + return getFormattedBoolean(UcdProperty.Case_Ignorable) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Cased) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Casefolded) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Casemapped) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Lowercased) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_NFKC_Casefolded) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Titlecased) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Uppercased) + DOUBLELINE + + getFormattedSyntax(UcdProperty.NFKC_Casefold) + DOUBLELINE + + getFormattedSyntax(UcdProperty.NFKC_Simple_Casefold); + } + + private static String getFormattedScriptProperties() { + return getFormattedPropertyValues(UcdProperty.Script, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Script) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Script_Extensions); + } + + private static String getFormattedIdentifierProperties() { + return getFormattedBoolean(UcdProperty.ID_Start) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_ID_Start) + DOUBLELINE + + getFormattedBoolean(UcdProperty.XID_Start) + DOUBLELINE + + getFormattedBoolean(UcdProperty.ID_Continue) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_ID_Continue) + DOUBLELINE + + getFormattedBoolean(UcdProperty.XID_Continue) + DOUBLELINE + + getFormattedBoolean(UcdProperty.ID_Compat_Math_Start) + DOUBLELINE + + getFormattedBoolean(UcdProperty.ID_Compat_Math_Continue); + } + + private static String getFormattedPatternProperties() { + return getFormattedBoolean(UcdProperty.Pattern_Syntax) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Pattern_White_Space); + } + + private static String getFormattedFunctionGraphicProperties() { + return getFormattedBoolean(UcdProperty.Dash) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Hyphen) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Quotation_Mark) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Terminal_Punctuation) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Sentence_Terminal) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Diacritic) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Extender) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Soft_Dotted) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Alphabetic) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Alphabetic) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Math) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Math) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Hex_Digit) + DOUBLELINE + + getFormattedBoolean(UcdProperty.ASCII_Hex_Digit) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Default_Ignorable_Code_Point) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Default_Ignorable_Code_Point) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Logical_Order_Exception) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Prepended_Concatenation_Mark) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Modifier_Combining_Mark) + DOUBLELINE + + getFormattedBoolean(UcdProperty.White_Space) + DOUBLELINE + + getFormattedAttribute(UcdProperty.Vertical_Orientation, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Regional_Indicator); + } + + private static String getFormattedBoundaryProperties() { + return getFormattedBoolean(UcdProperty.Grapheme_Base) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Grapheme_Extend) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Grapheme_Extend) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Grapheme_Link) + DOUBLELINE + + getFormattedAttribute(UcdProperty.Grapheme_Cluster_Break, + VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + DOUBLELINE + + getFormattedAttribute(UcdProperty.Word_Break, + VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + DOUBLELINE + + getFormattedAttribute(UcdProperty.Sentence_Break, + VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); + } + + private static String getFormattedIdeographProperties() { + return getFormattedBoolean(UcdProperty.Ideographic) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Unified_Ideograph) + DOUBLELINE + + getFormattedSyntax(UcdProperty.Equivalent_Unified_Ideograph) + DOUBLELINE + + getFormattedBoolean(UcdProperty.IDS_Binary_Operator) + DOUBLELINE + + getFormattedBoolean(UcdProperty.IDS_Trinary_Operator) + DOUBLELINE + + getFormattedBoolean(UcdProperty.IDS_Unary_Operator) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Radical); + } + + private static String getFormattedMiscellaneousProperties() { + return getFormattedBoolean(UcdProperty.Deprecated) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Variation_Selector) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Noncharacter_Code_Point); + } + + private static String getFormattedUnihanProperties() { + return getFormattedTR38Syntax(UcdProperty.kAccountingNumeric) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kAlternateTotalStrokes) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kBigFive) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCangjie) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCantonese) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCCCII) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCheungBauer) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCheungBauerIndex) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCihaiT) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCNS1986) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCNS1992) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCompatibilityVariant) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCowles) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kDaeJaweon) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kDefinition) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kEACC) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFanqie) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFenn) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFennIndex) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFourCornerCode) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB0) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB1) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB3) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB5) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB7) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB8) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGradeLevel) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGSR) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHangul) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHanYu) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHanyuPinlu) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHanyuPinyin) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHDZRadBreak) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHKGlyph) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIBMJapan) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIICore) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_GSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_HSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_JSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_KPSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_KSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_MSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_SSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_TSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_UKSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_USource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_VSource) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRGDaeJaweon) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRGHanyuDaZidian) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRGKangXi) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJa) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJapanese) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJapaneseKun) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJapaneseOn) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJinmeiyoKanji) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJis0) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJis1) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJIS0213) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJoyoKanji) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKangXi) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKarlgren) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKorean) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKoreanEducationHanja) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKoreanName) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kLau) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMainlandTelegraph) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMandarin) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMatthews) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMeyerWempe) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMojiJoho) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMorohashi) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kNelson) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kOtherNumeric) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kPhonetic) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kPrimaryNumeric) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kPseudoGB1) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kRSAdobe_Japan1_6) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kRSUnicode) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSBGY) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSemanticVariant) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSimplifiedVariant) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSMSZD2003Index) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSMSZD2003Readings) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSpecializedSemanticVariant) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSpoofingVariant) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kStrange) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTaiwanTelegraph) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTang) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTGH) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTGHZ2013) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTotalStrokes) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTraditionalVariant) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kUnihanCore2020) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kVietnamese) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kVietnameseNumeric) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kXerox) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kXHC1983) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kZhuang) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kZhuangNumeric) + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kZVariant); + } + + private static String getFormattedTangutProperties() { + return getFormattedSyntax(UcdProperty.kRSTUnicode) + DOUBLELINE + + getFormattedSyntax(UcdProperty.kTGT_MergedSrc); + } + + private static String getFormattedNushuProperties() { + return getFormattedSyntax(UcdProperty.kSrc_NushuDuben) + DOUBLELINE + + getFormattedSyntax(UcdProperty.kReading); + } + + private static String getFormattedEmojiProperties() { + return getFormattedBoolean(UcdProperty.Emoji) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Presentation) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Modifier) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Modifier_Base) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Component) + DOUBLELINE + + getFormattedBoolean(UcdProperty.Extended_Pictographic); + } + + //********************* Attribute values ********************// + + private static List getBinaryValues() { + List values = new ArrayList<>(); + for (Binary binaryValues : Binary.values()) { + values.add(binaryValues.getShortName()); + } + //Binary should display as Y | N. + values.sort(Collections.reverseOrder()); + return values; + } + + private static List getAgeValues() { + List values = new ArrayList<>(); + for (Age_Values ageValues : Age_Values.values()) { + String shortName = ageValues.getShortName(); + if (shortName.equals("NA")) { + values.add("unassigned"); + } else if (shortName.equals("13.1")) { + //https://github.com/unicode-org/unicodetools/issues/100 + } else { + values.add(shortName); + } + } + return values; + } + + private static List getNameAliasTypeValues() { + List values = new ArrayList<>(); + for (AttributeResolver.AliasType aliastypeValues : AttributeResolver.AliasType.values()) { + if (!aliastypeValues.equals(AttributeResolver.AliasType.NONE)) { + values.add(aliastypeValues.toString()); + } + } + return values; + } + + private static List getBlockValues() { + List values = new ArrayList<>(); + for (Block_Values blockValues : Block_Values.values()) { + values.add(blockValues.getShortName()); + } + return values; + } + + private static List getGeneralCategoryValues() { + List values = new ArrayList<>(); + for (General_Category_Values generalCategoryValues : General_Category_Values.values()) { + if (!generalCategoryValues.getShortName().toUpperCase().equals(generalCategoryValues.getShortName())) { + //Some of the General_Category_Values (LC, L, M, N, P, S, Z, C) stand for grouping of related + //General_Category values. They won't occur on any individual code point, so can be ignored. + values.add(generalCategoryValues.getShortName()); + } + } + return values; + } + + private static List getCanonicalCombiningClassValues() { + List values = new ArrayList<>(); + values.add ("xsd:integer { minInclusive=\"0\" maxInclusive=\"254\" }"); + //Because the set of values that this property has taken across the various versions of the UCD is rather + //large, our schema does not restrict the possible values to those actually used. + //for (Canonical_Combining_Class_Values canonicalCombiningClassValues : + // Canonical_Combining_Class_Values.values()) { + // values.add(canonicalCombiningClassValues.getShortName()); + //} + return values; + } + + private static List getBidirectionalValues() { + List values = new ArrayList<>(); + for (Bidi_Class_Values bidiClassValues : Bidi_Class_Values.values()) { + values.add(bidiClassValues.getShortName()); + } + return values; + } + + private static List getBidiPairedBracketTypeValues() { + List values = new ArrayList<>(); + //Order should be Open/Close/None + values.add(Bidi_Paired_Bracket_Type_Values.Open.getShortName()); + values.add(Bidi_Paired_Bracket_Type_Values.Close.getShortName()); + values.add(Bidi_Paired_Bracket_Type_Values.None.getShortName()); + //Now let's check to see if there is anything else that we didn't expect + for (Bidi_Paired_Bracket_Type_Values bidiPairedBracketTypeValue : Bidi_Paired_Bracket_Type_Values.values()) { + if (!values.contains(bidiPairedBracketTypeValue.getShortName())) { + throw new IllegalArgumentException(); + } + } + return values; + } + + private static List getDecompositionTypeValues() { + List values = new ArrayList<>(); + for (Decomposition_Type_Values decompositionTypeValues : Decomposition_Type_Values.values()) { + //We want "none" to be last. + if (decompositionTypeValues != Decomposition_Type_Values.None) { + values.add(decompositionTypeValues.getNames().getOtherNames().get(0)); + } + } + values.add(Decomposition_Type_Values.None.getNames().getOtherNames().get(0)); + return values; + } + + private static List getNFCQuickCheckValues() { + List values = new ArrayList<>(); + //Order should be Yes/No/Maybe + values.add(NFC_Quick_Check_Values.Yes.getShortName()); + values.add(NFC_Quick_Check_Values.No.getShortName()); + values.add(NFC_Quick_Check_Values.Maybe.getShortName()); + //Now let's check to see if there is anything else that we didn't expect + for (NFC_Quick_Check_Values nfcQuickCheckValues : NFC_Quick_Check_Values.values()) { + if (!values.contains(nfcQuickCheckValues.getShortName())) { + throw new IllegalArgumentException(); + } + } + return values; + } + + private static List getNFDQuickCheckValues() { + List values = new ArrayList<>(); + //Order should be Yes/No + values.add(NFD_Quick_Check_Values.Yes.getShortName()); + values.add(NFD_Quick_Check_Values.No.getShortName()); + //Now let's check to see if there is anything else that we didn't expect + for (NFD_Quick_Check_Values nfdQuickCheckValues : NFD_Quick_Check_Values.values()) { + if (!values.contains(nfdQuickCheckValues.getShortName())) { + throw new IllegalArgumentException(); + } + } + return values; + } + + private static List getNFKCQuickCheckValues() { + List values = new ArrayList<>(); + //Order should be Yes/No/Maybe + values.add(NFKC_Quick_Check_Values.Yes.getShortName()); + values.add(NFKC_Quick_Check_Values.No.getShortName()); + values.add(NFKC_Quick_Check_Values.Maybe.getShortName()); + //Now let's check to see if there is anything else that we didn't expect + for (NFKC_Quick_Check_Values nfkcQuickCheckValues : NFKC_Quick_Check_Values.values()) { + if (!values.contains(nfkcQuickCheckValues.getShortName())) { + throw new IllegalArgumentException(); + } + } + return values; + } + + private static List getNFKDQuickCheckValues() { + List values = new ArrayList<>(); + //Order should be Yes/No + values.add(NFKD_Quick_Check_Values.Yes.getShortName()); + values.add(NFKD_Quick_Check_Values.No.getShortName()); + //Now let's check to see if there is anything else that we didn't expect + for (NFKD_Quick_Check_Values nfkdQuickCheckValues : NFKD_Quick_Check_Values.values()) { + if (!values.contains(nfkdQuickCheckValues.getShortName())) { + throw new IllegalArgumentException(); + } + } + return values; + } + + private static List getNumericTypeValues() { + List values = new ArrayList<>(); + //Order should be Decimal/Digit/Numeric/None + values.add(Numeric_Type_Values.Decimal.getShortName()); + values.add(Numeric_Type_Values.Digit.getShortName()); + values.add(Numeric_Type_Values.Numeric.getShortName()); + values.add(Numeric_Type_Values.None.getShortName()); + //Now let's check to see if there is anything else that we didn't expect + for (Numeric_Type_Values numericTypeValues : Numeric_Type_Values.values()) { + if (!values.contains(numericTypeValues.getShortName())) { + throw new IllegalArgumentException(); + } + } + return values; + } + + private static List getJoiningTypeValues() { + List values = new ArrayList<>(); + for (Joining_Type_Values joiningTypeValues : Joining_Type_Values.values()) { + values.add(joiningTypeValues.getShortName()); + } + return values; + } + + private static List getJoiningGroupValues() { + List values = new ArrayList<>(); + for (Joining_Group_Values joiningGroupValues : Joining_Group_Values.values()) { + values.add(joiningGroupValues.getShortName()); + } + return values; + } + + private static List getLineBreakValues() { + List values = new ArrayList<>(); + for (Line_Break_Values lineBreakValues : Line_Break_Values.values()) { + values.add(lineBreakValues.getShortName()); + } + return values; + } + + private static List getEastAsianWidthValues() { + List values = new ArrayList<>(); + for (East_Asian_Width_Values eastAsianWidthValues : East_Asian_Width_Values.values()) { + values.add(eastAsianWidthValues.getShortName()); + } + return values; + } + + private static List getScriptValues() { + List excludedValues = Arrays.asList( + Script_Values.Han_with_Bopomofo, + Script_Values.Japanese, + Script_Values.Korean, + Script_Values.Math_Symbols, + Script_Values.Emoji_Symbols, + Script_Values.Other_Symbols, + Script_Values.Unwritten); + List values = new ArrayList<>(); + for (Script_Values scriptValue : Script_Values.values()) { + if (!excludedValues.contains(scriptValue)) { + values.add(scriptValue.getShortName()); + } + //Include the following if you want to add other names + //if (!scriptValue.getNames().getOtherNames().isEmpty()) { + // values.add(scriptValue.getNames().getOtherNames().get(0)); + //} + } + Collections.sort(values); + return values; + } + + private static List getHangulSyllableTypeValues() { + List values = new ArrayList<>(); + for (Hangul_Syllable_Type_Values hangulSyllableTypeValues : Hangul_Syllable_Type_Values.values()) { + values.add(hangulSyllableTypeValues.getShortName()); + } + return values; + } + + private static List getIndicSyllabicCategoryValues() { + List values = new ArrayList<>(); + for (Indic_Syllabic_Category_Values indicSyllabicCategoryValues : Indic_Syllabic_Category_Values.values()) { + values.add(indicSyllabicCategoryValues.getShortName()); + } + return values; + } + + private static List getIndicPositionalCategoryValues() { + List values = new ArrayList<>(); + for (Indic_Positional_Category_Values indicPositionalCategoryValues : + Indic_Positional_Category_Values.values()) { + values.add(indicPositionalCategoryValues.getShortName()); + } + return values; + } + + private static List getIndicConjunctBreakValues() { + List values = new ArrayList<>(); + for (Indic_Conjunct_Break_Values indicConjunctBreakValues : Indic_Conjunct_Break_Values.values()) { + values.add(indicConjunctBreakValues.getShortName()); + } + return values; + } + + private static List getVerticalOrientationValues() { + List values = new ArrayList<>(); + for (Vertical_Orientation_Values verticalOrientationValues : Vertical_Orientation_Values.values()) { + values.add(verticalOrientationValues.getShortName()); + } + return values; + } + + private static List getGraphemeClusterBreakValues() { + List values = new ArrayList<>(); + for (Grapheme_Cluster_Break_Values graphemeClusterBreakValues : Grapheme_Cluster_Break_Values.values()) { + values.add(graphemeClusterBreakValues.getShortName()); + } + return values; + } + + private static List getWordBreakValues() { + List values = new ArrayList<>(); + for (Word_Break_Values wordBreakValues : Word_Break_Values.values()) { + values.add(wordBreakValues.getShortName()); + } + return values; + } + + private static List getSentenceBreakValues() { + List values = new ArrayList<>(); + for (Sentence_Break_Values sentenceBreakValues : Sentence_Break_Values.values()) { + values.add(sentenceBreakValues.getShortName()); + } + return values; + } + + private static List getDoNotEmitTypeValues() { + List values = new ArrayList<>(); + for (Do_Not_Emit_Type_Values doNotEmitTypeValues : Do_Not_Emit_Type_Values.values()) { + values.add(doNotEmitTypeValues.getShortName()); + } + Collections.sort(values); + return values; + } + + //********************* Utility methods ********************// + + private static HashMap parseTR38() throws IOException, URISyntaxException { + HashMap syntaxTR38 = new HashMap<>(); + URI uri = new URI(TR38URL); + StringBuilder stringBuilder = new StringBuilder(); + try (InputStream is = uri.toURL().openStream()) { + int ptr = 0; + while ((ptr = is.read()) != -1) { + stringBuilder.append((char) ptr); + } + } + Pattern syntaxPattern = Pattern.compile( + ">Property.*?(.*?).*?>Delimiter.*?>(.*?).*?>Syntax.*?>(.*?)", + Pattern.DOTALL); + Matcher matcher = syntaxPattern.matcher(stringBuilder.toString()); + while (matcher.find()) { + String delimiter = matcher.group(2).trim(); + boolean isList = false; + switch(delimiter) { + case "N/A": + break; + case "space": + isList = true; + break; + default: + throw new IllegalArgumentException("Only \"space\" or \"N/A\" are supported values for Delimiter." + + " Found: " + delimiter); + } + TR38Details tr38Details = new TR38Details(isList, matcher.group(3).trim().replaceAll("
    ", "")); + syntaxTR38.put(matcher.group(1).trim(), tr38Details); + } + return syntaxTR38; + } + +} diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt index f3856add4..ac6499419 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt @@ -44,9 +44,11 @@ $codePoint0 = ($codePoints)? # Main data Bidi_Mirroring_Glyph ; SINGLE_VALUED ; $codePoint +Bidi_Paired_Bracket ; SINGLE_VALUED ; $codePoint Simple_Lowercase_Mapping ; SINGLE_VALUED ; $codePoint Simple_Titlecase_Mapping ; SINGLE_VALUED ; $codePoint Simple_Uppercase_Mapping ; SINGLE_VALUED ; $codePoint +Equivalent_Unified_Ideograph; SINGLE_VALUED ; $codePoint NFKC_Casefold ; SINGLE_VALUED ; $codePoint0 NFKC_Simple_Casefold ; SINGLE_VALUED ; $codePoint0 From f552e63f7f925b478b45854e5d027d55a3de8cb1 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Mon, 14 Oct 2024 14:00:46 -0700 Subject: [PATCH 11/14] Added note about NFD --- uax/uax42/Readme.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/uax/uax42/Readme.md b/uax/uax42/Readme.md index 9b931569f..f2533a2a2 100644 --- a/uax/uax42/Readme.md +++ b/uax/uax42/Readme.md @@ -15,9 +15,10 @@ You'll need a [RELAX NG](https://relaxng.org/) schema validator. We'll use [jing-trang](https://github. com/relaxng/jing-trang) in this example. -1. Clone and build [jing-trang](https://github.com/relaxng/jing-trang) +1. Clone and build [jing-trang](https://github.com/relaxng/jing-trang) 2. Run the following: ``` java -jar C:\_git\jing-trang\build\jing.jar -c UNICODETOOLS_REPO_DIR\uax\uax42\output\index.rnc ``` + Note that the UAX xml file has to be saved as NFD as the Unihan syntax regular expressions are expecting NFD. From 242f22b31e20db16413bc65f2be2fd2e446f9b70 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Mon, 14 Oct 2024 14:05:32 -0700 Subject: [PATCH 12/14] Spotless code cleanup --- .../unicode/xml/GeneratePropertyValues.java | 1319 +++++++++++------ 1 file changed, 855 insertions(+), 464 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java index ee2adb0e8..83d4be093 100644 --- a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java @@ -2,7 +2,6 @@ import com.ibm.icu.dev.tool.UOption; import com.ibm.icu.util.VersionInfo; - import java.io.*; import java.net.URI; import java.net.URISyntaxException; @@ -10,10 +9,9 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.unicode.props.PropertyParsingInfo; -import org.unicode.props.UcdPropertyValues.*; import org.unicode.props.UcdProperty; +import org.unicode.props.UcdPropertyValues.*; public class GeneratePropertyValues { @@ -25,33 +23,34 @@ private enum VALUESOUTPUTTYPE { } private enum SCHEMA { - // Manual indicates a fragment file that is maintained manually rather than generated from this utility. - //Manual + // Manual indicates a fragment file that is maintained manually rather than generated from + // this utility. + // Manual NAMESPACE("namespace"), - //Manual + // Manual DATATYPES("datatypes"), - //Manual + // Manual START("start"), BOOLEAN("boolean"), - //Manual + // Manual DESCRIPTION("description"), - //Manual + // Manual REPERTOIRE("repertoire"), PROPERTIES("properties"), TANGUT("tangut"), NUSHU("nushu"), EMOJI_DATA("emoji-data"), - //Manual + // Manual BLOCK("block"), - //Manual + // Manual NAMED_SEQUENCES("named-sequences"), - //Manual + // Manual NORMALIZATION_CORRECTIONS("normalization-corrections"), - //Manual + // Manual STANDARDIZED_VARIANTS("standardized-variants"), - //Manual + // Manual CJK_RADICALS("cjk-radicals"), - //Manual + // Manual EMOJI_SOURCES("emoji-sources"), DO_NOT_EMIT("do-not-emit"); @@ -87,16 +86,17 @@ public String getSyntax() { private static final int MAX_LINE_LENGTH = 70; private static final String NEWLINE = System.lineSeparator(); private static final String DOUBLELINE = System.lineSeparator() + System.lineSeparator(); - private static final String TRIPLELINE = System.lineSeparator() + System.lineSeparator() + System.lineSeparator(); + private static final String TRIPLELINE = + System.lineSeparator() + System.lineSeparator() + System.lineSeparator(); private static File destinationFolder = null; private static HashMap syntaxTR38; private static final String NAMESPACE = "http://unicode.org/ns/2001/ucdxml"; private static final String TR38URL = "https://www.unicode.org/reports/tr38"; private static final UOption[] options = { - UOption.HELP_H(), - UOption.create("ucdversion", 'v', UOption.REQUIRES_ARG), - UOption.create("outputfolder", 'f', UOption.REQUIRES_ARG) + UOption.HELP_H(), + UOption.create("ucdversion", 'v', UOption.REQUIRES_ARG), + UOption.create("outputfolder", 'f', UOption.REQUIRES_ARG) }; private static final int HELP = 0, UCDVERSION = 1, OUTPUTFOLDER = 2; @@ -160,117 +160,243 @@ public static void main(String[] args) throws Exception { } private static void buildPropertyValues( - //It would be nice to be able to generate values by ucdVersion. Leaving this here for now... - VersionInfo ucdVersion) - throws IOException, URISyntaxException { + // It would be nice to be able to generate values by ucdVersion. Leaving this here for + // now... + VersionInfo ucdVersion) throws IOException, URISyntaxException { syntaxTR38 = parseTR38(); - createPropertyFragment(SCHEMA.BOOLEAN, getFormattedValues(SCHEMA.BOOLEAN, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); - createPropertyFragment(UcdProperty.Age, SCHEMA.PROPERTIES, getFormattedAttribute(UcdProperty.Age, - VALUESOUTPUTTYPE.NUMERICAL_GROUP)); - createPropertyFragment(UcdProperty.Name, SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Name)); - createPropertyFragment(UcdProperty.Unicode_1_Name, SCHEMA.PROPERTIES, + createPropertyFragment( + SCHEMA.BOOLEAN, + getFormattedValues(SCHEMA.BOOLEAN, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment( + UcdProperty.Age, + SCHEMA.PROPERTIES, + getFormattedAttribute(UcdProperty.Age, VALUESOUTPUTTYPE.NUMERICAL_GROUP)); + createPropertyFragment( + UcdProperty.Name, SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Name)); + createPropertyFragment( + UcdProperty.Unicode_1_Name, + SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Unicode_1_Name)); - createPropertyFragment(UcdProperty.Name_Alias.getShortName() + ".xml", "name-alias element", SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Name_Alias.getShortName() + ".xml", + "name-alias element", + SCHEMA.PROPERTIES, getFormattedElement(UcdProperty.Name_Alias)); - createPropertyFragment(UcdProperty.Block, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Block, + SCHEMA.PROPERTIES, getFormattedAttribute(UcdProperty.Block, VALUESOUTPUTTYPE.VALUE_PER_LINE)); - createPropertyFragment(UcdProperty.General_Category, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.General_Category, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); - createPropertyFragment(UcdProperty.Canonical_Combining_Class, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.Canonical_Combining_Class, VALUESOUTPUTTYPE.VALUE_PER_LINE)); - createPropertyFragment(UcdProperty.Bidi_Class, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.General_Category, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.General_Category, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); + createPropertyFragment( + UcdProperty.Canonical_Combining_Class, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.Canonical_Combining_Class, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment( + UcdProperty.Bidi_Class, + SCHEMA.PROPERTIES, getFormattedAttribute(UcdProperty.Bidi_Class, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); - createPropertyFragment(UcdProperty.Bidi_Mirrored, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Bidi_Mirrored, + SCHEMA.PROPERTIES, getFormattedBoolean(UcdProperty.Bidi_Mirrored)); - createPropertyFragment(UcdProperty.Bidi_Mirroring_Glyph, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Bidi_Mirroring_Glyph, + SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Bidi_Mirroring_Glyph)); - createPropertyFragment(UcdProperty.Bidi_Control, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Bidi_Control, + SCHEMA.PROPERTIES, getFormattedBoolean(UcdProperty.Bidi_Control)); - createPropertyFragment(UcdProperty.Bidi_Paired_Bracket_Type, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.Bidi_Paired_Bracket_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); - createPropertyFragment(UcdProperty.Bidi_Paired_Bracket, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Bidi_Paired_Bracket_Type, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.Bidi_Paired_Bracket_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment( + UcdProperty.Bidi_Paired_Bracket, + SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Bidi_Paired_Bracket)); - createPropertyFragment("decomposition.xml", "decomposition properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "decomposition.xml", + "decomposition properties", + SCHEMA.PROPERTIES, getFormattedDecompositionProperties()); - createPropertyFragment("composition.xml", "composition properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "composition.xml", + "composition properties", + SCHEMA.PROPERTIES, getFormattedCompositionProperties()); - createPropertyFragment("quickcheck.xml", "quick check properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "quickcheck.xml", + "quick check properties", + SCHEMA.PROPERTIES, getFormattedQuickCheckProperties()); - createPropertyFragment("numeric.xml", "numeric properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "numeric.xml", + "numeric properties", + SCHEMA.PROPERTIES, getFormattedNumericProperties()); - createPropertyFragment("joining.xml", "joining properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "joining.xml", + "joining properties", + SCHEMA.PROPERTIES, getFormattedJoiningProperties()); - createPropertyFragment(UcdProperty.Join_Control.getShortName() + ".xml", "joining properties", - SCHEMA.PROPERTIES, getFormattedBoolean(UcdProperty.Join_Control)); - createPropertyFragment(UcdProperty.Line_Break, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Join_Control.getShortName() + ".xml", + "joining properties", + SCHEMA.PROPERTIES, + getFormattedBoolean(UcdProperty.Join_Control)); + createPropertyFragment( + UcdProperty.Line_Break, + SCHEMA.PROPERTIES, getFormattedAttribute(UcdProperty.Line_Break, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP)); - createPropertyFragment(UcdProperty.East_Asian_Width, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.East_Asian_Width, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); - createPropertyFragment("casing.xml", "casing properties", SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.East_Asian_Width, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.East_Asian_Width, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment( + "casing.xml", + "casing properties", + SCHEMA.PROPERTIES, getFormattedCasingProperties()); - createPropertyFragment("simple_case_mapping.xml", "casing properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "simple_case_mapping.xml", + "casing properties", + SCHEMA.PROPERTIES, getFormattedSimpleCaseMappingProperties()); - createPropertyFragment("case_mapping.xml", "casing properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "case_mapping.xml", + "casing properties", + SCHEMA.PROPERTIES, getFormattedCaseMappingProperties()); - createPropertyFragment("case_folding.xml", "casing properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "case_folding.xml", + "casing properties", + SCHEMA.PROPERTIES, getFormattedCaseFoldingProperties()); - createPropertyFragment("case_other.xml", "casing properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "case_other.xml", + "casing properties", + SCHEMA.PROPERTIES, getFormattedCaseOtherProperties()); - createPropertyFragment("script.xml", "script properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "script.xml", + "script properties", + SCHEMA.PROPERTIES, getFormattedScriptProperties()); - createPropertyFragment(UcdProperty.ISO_Comment, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.ISO_Comment, + SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.ISO_Comment)); - createPropertyFragment(UcdProperty.Hangul_Syllable_Type, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.Hangul_Syllable_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); - createPropertyFragment(UcdProperty.Jamo_Short_Name, SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Hangul_Syllable_Type, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.Hangul_Syllable_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH)); + createPropertyFragment( + UcdProperty.Jamo_Short_Name, + SCHEMA.PROPERTIES, getFormattedSyntax(UcdProperty.Jamo_Short_Name)); - createPropertyFragment(UcdProperty.Indic_Syllabic_Category, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.Indic_Syllabic_Category, VALUESOUTPUTTYPE.VALUE_PER_LINE)); - createPropertyFragment(UcdProperty.Indic_Positional_Category, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.Indic_Positional_Category, VALUESOUTPUTTYPE.VALUE_PER_LINE)); - createPropertyFragment(UcdProperty.Indic_Conjunct_Break, SCHEMA.PROPERTIES, - getFormattedAttribute(UcdProperty.Indic_Conjunct_Break, VALUESOUTPUTTYPE.VALUE_PER_LINE)); - createPropertyFragment("identifier.xml", "identifier properties", SCHEMA.PROPERTIES, + createPropertyFragment( + UcdProperty.Indic_Syllabic_Category, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.Indic_Syllabic_Category, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment( + UcdProperty.Indic_Positional_Category, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.Indic_Positional_Category, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment( + UcdProperty.Indic_Conjunct_Break, + SCHEMA.PROPERTIES, + getFormattedAttribute( + UcdProperty.Indic_Conjunct_Break, VALUESOUTPUTTYPE.VALUE_PER_LINE)); + createPropertyFragment( + "identifier.xml", + "identifier properties", + SCHEMA.PROPERTIES, getFormattedIdentifierProperties()); - createPropertyFragment("pattern.xml", "pattern properties", SCHEMA.PROPERTIES, + createPropertyFragment( + "pattern.xml", + "pattern properties", + SCHEMA.PROPERTIES, getFormattedPatternProperties()); - createPropertyFragment("function_graphic.xml", "properties related to function and graphic characteristics", - SCHEMA.PROPERTIES, getFormattedFunctionGraphicProperties()); - createPropertyFragment("boundaries.xml", "properties related to boundaries", - SCHEMA.PROPERTIES, getFormattedBoundaryProperties()); - createPropertyFragment("ideographs.xml", "properties related to ideographs", - SCHEMA.PROPERTIES, getFormattedIdeographProperties()); - createPropertyFragment("miscellaneous.xml", "miscellaneous properties", - SCHEMA.PROPERTIES, getFormattedMiscellaneousProperties()); - createPropertyFragment("Unihan.xml", "Unihan properties", - SCHEMA.PROPERTIES, getFormattedUnihanProperties()); - createPropertyFragment("Tangut.xml", "Tangut data", - SCHEMA.TANGUT, getFormattedTangutProperties()); - createPropertyFragment("Nushu.xml", "Nushu data", - SCHEMA.NUSHU, getFormattedNushuProperties()); - createPropertyFragment("Emoji.xml", "Emoji properties", - SCHEMA.EMOJI_DATA, getFormattedEmojiProperties()); - createPropertyFragment("do-not-emit.xml", "do-not-emit", - SCHEMA.DO_NOT_EMIT, getFormattedDoNotEmit(VALUESOUTPUTTYPE.VALUE_PER_LINE)); - } - - private static void createPropertyFragment(SCHEMA schema, String formattedFragment) throws IOException { - createPropertyFragment(schema.getName() + ".xml", schema.getName(), schema, formattedFragment); - } - - private static void createPropertyFragment(UcdProperty ucdProperty, SCHEMA schema, String formattedFragment) + createPropertyFragment( + "function_graphic.xml", + "properties related to function and graphic characteristics", + SCHEMA.PROPERTIES, + getFormattedFunctionGraphicProperties()); + createPropertyFragment( + "boundaries.xml", + "properties related to boundaries", + SCHEMA.PROPERTIES, + getFormattedBoundaryProperties()); + createPropertyFragment( + "ideographs.xml", + "properties related to ideographs", + SCHEMA.PROPERTIES, + getFormattedIdeographProperties()); + createPropertyFragment( + "miscellaneous.xml", + "miscellaneous properties", + SCHEMA.PROPERTIES, + getFormattedMiscellaneousProperties()); + createPropertyFragment( + "Unihan.xml", + "Unihan properties", + SCHEMA.PROPERTIES, + getFormattedUnihanProperties()); + createPropertyFragment( + "Tangut.xml", "Tangut data", SCHEMA.TANGUT, getFormattedTangutProperties()); + createPropertyFragment( + "Nushu.xml", "Nushu data", SCHEMA.NUSHU, getFormattedNushuProperties()); + createPropertyFragment( + "Emoji.xml", "Emoji properties", SCHEMA.EMOJI_DATA, getFormattedEmojiProperties()); + createPropertyFragment( + "do-not-emit.xml", + "do-not-emit", + SCHEMA.DO_NOT_EMIT, + getFormattedDoNotEmit(VALUESOUTPUTTYPE.VALUE_PER_LINE)); + } + + private static void createPropertyFragment(SCHEMA schema, String formattedFragment) throws IOException { - createPropertyFragment(ucdProperty.getShortName() + ".xml", ucdProperty.getShortName() + " attribute", - schema, formattedFragment); + createPropertyFragment( + schema.getName() + ".xml", schema.getName(), schema, formattedFragment); } - private static void createPropertyFragment(String filename, String title, SCHEMA schema, - String formattedFragment) throws IOException { + private static void createPropertyFragment( + UcdProperty ucdProperty, SCHEMA schema, String formattedFragment) throws IOException { + createPropertyFragment( + ucdProperty.getShortName() + ".xml", + ucdProperty.getShortName() + " attribute", + schema, + formattedFragment); + } + + private static void createPropertyFragment( + String filename, String title, SCHEMA schema, String formattedFragment) + throws IOException { BufferedWriter writer = getFragmentWriter(schema.getName(), filename); - writer.write("" + NEWLINE + - "" + NEWLINE); + writer.write( + "" + + NEWLINE + + "" + + NEWLINE); writer.write(formattedFragment); writer.write(NEWLINE + ""); writer.flush(); @@ -279,7 +405,8 @@ private static void createPropertyFragment(String filename, String title, SCHEMA private static BufferedWriter getFragmentWriter(String schema, String filename) throws IOException { - File fragmentFolder = new File(destinationFolder + File.separator + schema + File.separator); + File fragmentFolder = + new File(destinationFolder + File.separator + schema + File.separator); if (!fragmentFolder.exists()) { if (!fragmentFolder.mkdir()) { throw new IOException(); @@ -287,11 +414,13 @@ private static BufferedWriter getFragmentWriter(String schema, String filename) } File outputFile = new File(fragmentFolder, filename); FileOutputStream fileOutputStream = new FileOutputStream(outputFile); - OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); + OutputStreamWriter outputStreamWriter = + new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); return new BufferedWriter(outputStreamWriter); } - private static String getFormattedAttribute(UcdProperty ucdProperty, VALUESOUTPUTTYPE valuesoutputtype) { + private static String getFormattedAttribute( + UcdProperty ucdProperty, VALUESOUTPUTTYPE valuesoutputtype) { String attributeString = " attribute " + ucdProperty.getShortName() + " "; List values; StringBuilder stringBuilder = new StringBuilder(); @@ -374,17 +503,21 @@ private static String getFormattedAttribute(UcdProperty ucdProperty, VALUESOUTPU break; default: - throw new IllegalStateException(ucdProperty.getShortName() + " is not handled by " + - "getFormattedAttribute."); + throw new IllegalStateException( + ucdProperty.getShortName() + + " is not handled by " + + "getFormattedAttribute."); } String formattedValues = formatValues(attributeString.length(), values, valuesoutputtype); - stringBuilder.append(" code-point-attributes &=") + stringBuilder + .append(" code-point-attributes &=") .append(NEWLINE) .append(attributeString) .append("{ "); if (formattedValues.contains(NEWLINE)) { stringBuilder.append(formattedValues).append(NEWLINE); - stringBuilder.append(String.format("%" + (attributeString.length() + "}?".length()) + "s", "}?")); + stringBuilder.append( + String.format("%" + (attributeString.length() + "}?".length()) + "s", "}?")); } else { stringBuilder.append(formattedValues).append(" }?"); } @@ -394,30 +527,32 @@ private static String getFormattedAttribute(UcdProperty ucdProperty, VALUESOUTPU private static String getFormattedSyntax(UcdProperty ucdProperty) { final PropertyParsingInfo propInfo = PropertyParsingInfo.getPropertyInfo(ucdProperty); if (propInfo.getRegex() == null) { - throw new NullPointerException("Could not find syntax for " + ucdProperty.getShortName()); + throw new NullPointerException( + "Could not find syntax for " + ucdProperty.getShortName()); } - String attributeString = ucdProperty.getShortName().startsWith("cjk") ? - " attribute " + ucdProperty.getShortName().substring(2) + " " : - " attribute " + ucdProperty.getShortName() + " "; + String attributeString = + ucdProperty.getShortName().startsWith("cjk") + ? " attribute " + ucdProperty.getShortName().substring(2) + " " + : " attribute " + ucdProperty.getShortName() + " "; String formattedAttributeString; switch (ucdProperty) { - //{ text } + // { text } case ISO_Comment: formattedAttributeString = attributeString + "{ text }?"; break; - //{ single-code-point } + // { single-code-point } case Equivalent_Unified_Ideograph: formattedAttributeString = attributeString + "{ single-code-point }?"; break; - //{ "" | single-code-point } + // { "" | single-code-point } case Bidi_Mirroring_Glyph: formattedAttributeString = attributeString + "{ \"\" | single-code-point }?"; break; - //{ "#" | single-code-point } + // { "#" | single-code-point } case Bidi_Paired_Bracket: case Simple_Uppercase_Mapping: case Simple_Lowercase_Mapping: @@ -426,14 +561,15 @@ private static String getFormattedSyntax(UcdProperty ucdProperty) { formattedAttributeString = attributeString + "{ \"#\" | single-code-point }?"; break; - //{ "#" | zero-or-more-code-points } + // { "#" | zero-or-more-code-points } case Decomposition_Mapping: case NFKC_Casefold: case NFKC_Simple_Casefold: - formattedAttributeString = attributeString + "{ \"#\" | zero-or-more-code-points }?"; + formattedAttributeString = + attributeString + "{ \"#\" | zero-or-more-code-points }?"; break; - //{ "#" | one-or-more-code-points } + // { "#" | one-or-more-code-points } case FC_NFKC_Closure: case Uppercase_Mapping: case Lowercase_Mapping: @@ -442,30 +578,45 @@ private static String getFormattedSyntax(UcdProperty ucdProperty) { formattedAttributeString = attributeString + "{ \"#\" | one-or-more-code-points }?"; break; - //{ "NaN" | RegEx } + // { "NaN" | RegEx } case Numeric_Value: formattedAttributeString = - attributeString + "{ \"NaN\" | xsd:string { pattern=\"" + - cleanRegex(propInfo.getRegex().toString()) + "\" } }?"; + attributeString + + "{ \"NaN\" | xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + + "\" } }?"; break; - //Special cases + // Special cases case Name: - formattedAttributeString = attributeString + "{ \"\" |" + NEWLINE + - " \"CJK UNIFIED IDEOGRAPH-#\" |" + NEWLINE + - " \"CJK COMPATIBILITY IDEOGRAPH-#\" |" + NEWLINE + - " \"EGYPTIAN HIEROGLYPH-#\" |" + NEWLINE + - " \"TANGUT IDEOGRAPH-#\" |" + NEWLINE + - " \"KHITAN SMALL SCRIPT CHARACTER-#\" |" + NEWLINE + - " \"NUSHU CHARACTER-#\" |" + NEWLINE + - " xsd:string { pattern=\"" + cleanRegex(propInfo.getRegex().toString()) - + "\" }" + NEWLINE + - " }?"; + formattedAttributeString = + attributeString + + "{ \"\" |" + + NEWLINE + + " \"CJK UNIFIED IDEOGRAPH-#\" |" + + NEWLINE + + " \"CJK COMPATIBILITY IDEOGRAPH-#\" |" + + NEWLINE + + " \"EGYPTIAN HIEROGLYPH-#\" |" + + NEWLINE + + " \"TANGUT IDEOGRAPH-#\" |" + + NEWLINE + + " \"KHITAN SMALL SCRIPT CHARACTER-#\" |" + + NEWLINE + + " \"NUSHU CHARACTER-#\" |" + + NEWLINE + + " xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + + "\" }" + + NEWLINE + + " }?"; break; case Unicode_1_Name: formattedAttributeString = - attributeString + "{ \"\" | xsd:string { pattern=\"" + - cleanRegex(propInfo.getRegex().toString()) + "\" } }?"; + attributeString + + "{ \"\" | xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + + "\" } }?"; break; case Script: formattedAttributeString = attributeString + "{ script }?"; @@ -474,37 +625,49 @@ private static String getFormattedSyntax(UcdProperty ucdProperty) { formattedAttributeString = attributeString + "{ list { script + } }?"; break; case kTGT_MergedSrc: - //Ideally, should be obtained from a TR. - String kTGT_MergedSrc = NEWLINE + - " { xsd:string {pattern=\"L2008-[0-9A-F]{4,5}(-[0-9]{4,5})?\"}" + NEWLINE + - " | xsd:string {pattern=\"L2006-[0-9]{4}\"}" + NEWLINE + - " | xsd:string {pattern=\"L1997-[0-9]{4}\"}" + NEWLINE + - " | xsd:string {pattern=\"L1986-[0-9]{4}\"}" + NEWLINE + - " | xsd:string {pattern=\"S1968-[0-9]{4}\"}" + NEWLINE + - " | xsd:string {pattern=\"N1966-[0-9]{3}(-[0-9A-Z]{3,4})?\"}" + NEWLINE + - " | xsd:string {pattern=\"H2004-[A-Z]-[0-9]{4}\"}" + NEWLINE + - " | xsd:string {pattern=\"L2012-[0-9]{4}\"}" + NEWLINE + - " | xsd:string {pattern=\"UTN42-[0-9]{3}\"}" + NEWLINE + - " }?"; + // Ideally, should be obtained from a TR. + String kTGT_MergedSrc = + NEWLINE + + " { xsd:string {pattern=\"L2008-[0-9A-F]{4,5}(-[0-9]{4,5})?\"}" + + NEWLINE + + " | xsd:string {pattern=\"L2006-[0-9]{4}\"}" + + NEWLINE + + " | xsd:string {pattern=\"L1997-[0-9]{4}\"}" + + NEWLINE + + " | xsd:string {pattern=\"L1986-[0-9]{4}\"}" + + NEWLINE + + " | xsd:string {pattern=\"S1968-[0-9]{4}\"}" + + NEWLINE + + " | xsd:string {pattern=\"N1966-[0-9]{3}(-[0-9A-Z]{3,4})?\"}" + + NEWLINE + + " | xsd:string {pattern=\"H2004-[A-Z]-[0-9]{4}\"}" + + NEWLINE + + " | xsd:string {pattern=\"L2012-[0-9]{4}\"}" + + NEWLINE + + " | xsd:string {pattern=\"UTN42-[0-9]{3}\"}" + + NEWLINE + + " }?"; formattedAttributeString = attributeString + kTGT_MergedSrc; break; case kReading: - //Ideally, should be obtained from a TR. + // Ideally, should be obtained from a TR. String kReading = "{ xsd:string }?"; formattedAttributeString = attributeString + kReading; break; - default: - formattedAttributeString = attributeString + "{ xsd:string { pattern=\"" + - cleanRegex(propInfo.getRegex().toString()) + "\" } }?"; + formattedAttributeString = + attributeString + + "{ xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + + "\" } }?"; } return " code-point-attributes &=" + NEWLINE + formattedAttributeString; } private static String getFormattedTR38Syntax(UcdProperty ucdProperty) { - //TODO: We should determine whether we still want to show empty values in the XML files. - //TODO: See org.unicode.xml.UcdPropertyDetail.isCJKShowIfEmpty() + // TODO: We should determine whether we still want to show empty values in the XML files. + // TODO: See org.unicode.xml.UcdPropertyDetail.isCJKShowIfEmpty() boolean isShowIfEmpty = false; for (UcdPropertyDetail propDetail : UcdPropertyDetail.cjkValues()) { if (propDetail.getUcdProperty().equals(ucdProperty)) { @@ -515,8 +678,8 @@ private static String getFormattedTR38Syntax(UcdProperty ucdProperty) { String attributeString = " attribute " + ucdProperty.getShortName().substring(2); TR38Details tr38Details = syntaxTR38.get(ucdProperty.name()); if (tr38Details == null) { - throw new NullPointerException("Could not locate details for " + ucdProperty.name() + - " in " + TR38URL); + throw new NullPointerException( + "Could not locate details for " + ucdProperty.name() + " in " + TR38URL); } String formattedSyntax = formatTR38Syntax(tr38Details, isShowIfEmpty); @@ -524,24 +687,34 @@ private static String getFormattedTR38Syntax(UcdProperty ucdProperty) { } private static String getFormattedElement(UcdProperty ucdProperty) { - //Currently scoped to UcdProperty.Name_Alias, but might need to handle different properties. + // Currently scoped to UcdProperty.Name_Alias, but might need to handle different + // properties. String nameAliasElement = "name-alias"; List values = getNameAliasTypeValues(); PropertyParsingInfo propInfo = PropertyParsingInfo.getPropertyInfo(ucdProperty); String elementString = " element " + nameAliasElement + " {" + NEWLINE; String attributeAliasString = - " attribute alias { xsd:string { pattern=\"" + cleanRegex(propInfo.getRegex().toString()) - + "\" } }?," + NEWLINE; + " attribute alias { xsd:string { pattern=\"" + + cleanRegex(propInfo.getRegex().toString()) + + "\" } }?," + + NEWLINE; String attributeTypeString = " attribute type "; - String formattedValues = formatValues(attributeTypeString.length(), values, - VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); + String formattedValues = + formatValues( + attributeTypeString.length(), values, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); - return " code-point-attributes &=" + NEWLINE + elementString + attributeAliasString + - attributeTypeString + "{ " + formattedValues + NEWLINE + - String.format("%" + (attributeTypeString.length() + "}? } *".length()) + "s", - "}? } *"); + return " code-point-attributes &=" + + NEWLINE + + elementString + + attributeAliasString + + attributeTypeString + + "{ " + + formattedValues + + NEWLINE + + String.format( + "%" + (attributeTypeString.length() + "}? } *".length()) + "s", "}? } *"); } private static String getFormattedBoolean(UcdProperty ucdProperty) { @@ -556,7 +729,8 @@ private static String getFormattedValues(SCHEMA schema, VALUESOUTPUTTYPE valueso return " " + schema.getName() + " = " + formattedValues; } - private static String getFormattedPropertyValues(UcdProperty ucdProperty, VALUESOUTPUTTYPE valuesoutputtype) { + private static String getFormattedPropertyValues( + UcdProperty ucdProperty, VALUESOUTPUTTYPE valuesoutputtype) { List values = getScriptValues(); String formattedValues = formatValues(11, values, valuesoutputtype); return " " + ucdProperty.name().toLowerCase() + " = " + formattedValues; @@ -565,21 +739,24 @@ private static String getFormattedPropertyValues(UcdProperty ucdProperty, VALUES private static String getFormattedDoNotEmit(VALUESOUTPUTTYPE valuesoutputtype) { List values = getDoNotEmitTypeValues(); String formattedValues = formatValues(26, values, valuesoutputtype); - return " ucd.content &=\n" + - " element do-not-emit {\n" + - " element instead {\n" + - " attribute of { one-or-more-code-points },\n" + - " attribute use { one-or-more-code-points },\n" + - " attribute because { " + formattedValues + NEWLINE + - " } }+ }?"; + return " ucd.content &=\n" + + " element do-not-emit {\n" + + " element instead {\n" + + " attribute of { one-or-more-code-points },\n" + + " attribute use { one-or-more-code-points },\n" + + " attribute because { " + + formattedValues + + NEWLINE + + " } }+ }?"; } private static String formatTR38Syntax(TR38Details tr38Details, boolean isShowIfEmpty) { - //TODO: We should determine whether we still want to show empty values in the XML files. - //TODO: See org.unicode.xml.UcdPropertyDetail.isCJKShowIfEmpty() + // TODO: We should determine whether we still want to show empty values in the XML files. + // TODO: See org.unicode.xml.UcdPropertyDetail.isCJKShowIfEmpty() boolean isList = tr38Details.isList(); String syntax = cleanRegex(tr38Details.getSyntax()); - // This is a kludge as it depends on only having single OR double quotes in the syntax. If we have both, we'll + // This is a kludge as it depends on only having single OR double quotes in the syntax. If + // we have both, we'll // need to do more investigation on what RELAXNG Compact supports. String QUOTMARK = syntax.contains("\"") ? "'" : "\""; @@ -590,15 +767,10 @@ private static String formatTR38Syntax(TR38Details tr38Details, boolean isShowIf String ending = isList ? " )+}}?" : " }?"; if (isShowIfEmpty) { indent = (isList ? 15 : 8); - firstLinePrefix = isList ? - " { \"\" | list { " : - " { \"\" | "; - } - else { + firstLinePrefix = isList ? " { \"\" | list { " : " { \"\" | "; + } else { indent = (isList ? 12 : 4); - firstLinePrefix = isList ? - " { list { ( " : - " { "; + firstLinePrefix = isList ? " { list { ( " : " { "; } String padding = String.format("%" + indent + "s", ""); StringBuilder formattedSyntaxBuilder = new StringBuilder(); @@ -606,21 +778,27 @@ private static String formatTR38Syntax(TR38Details tr38Details, boolean isShowIf Matcher matcher = syntaxPattern.matcher(syntax); while (matcher.find()) { if (formattedSyntaxBuilder.length() == 0) { - //First line + // First line formattedSyntaxBuilder .append(firstLinePrefix) - .append("xsd:string { pattern=").append(QUOTMARK) + .append("xsd:string { pattern=") + .append(QUOTMARK) .append(matcher.group(1)) - .append(QUOTMARK).append(" }") + .append(QUOTMARK) + .append(" }") .append(NEWLINE); } else { - //Everything else + // Everything else formattedSyntaxBuilder .append(padding) - .append(matcher.group(1).replaceAll("^[| ]*", " | xsd:string { pattern=" + QUOTMARK)) - .append(QUOTMARK).append(" }") + .append( + matcher.group(1) + .replaceAll( + "^[| ]*", + " | xsd:string { pattern=" + QUOTMARK)) + .append(QUOTMARK) + .append(" }") .append(NEWLINE); - } } formattedSyntaxBuilder.append(ending); @@ -629,25 +807,34 @@ private static String formatTR38Syntax(TR38Details tr38Details, boolean isShowIf } else { if (isShowIfEmpty) { if (isList) { - return " { \"\" | list { xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " }+ } }?"; - } - else { - return " { \"\" | xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " } }?"; + return " { \"\" | list { xsd:string { pattern=" + + QUOTMARK + + syntax + + QUOTMARK + + " }+ } }?"; + } else { + return " { \"\" | xsd:string { pattern=" + + QUOTMARK + + syntax + + QUOTMARK + + " } }?"; } - } - else { + } else { if (isList) { - return " { list { xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " }+ } }?"; - } - else { + return " { list { xsd:string { pattern=" + + QUOTMARK + + syntax + + QUOTMARK + + " }+ } }?"; + } else { return " { xsd:string { pattern=" + QUOTMARK + syntax + QUOTMARK + " } }?"; } } } - } - private static String formatValues(int indent, List values, VALUESOUTPUTTYPE valuesoutputtype) { + private static String formatValues( + int indent, List values, VALUESOUTPUTTYPE valuesoutputtype) { StringBuilder valueBlock = new StringBuilder(); StringBuilder currentLine = new StringBuilder(); String padding = String.format("%" + indent + "s", ""); @@ -659,7 +846,7 @@ private static String formatValues(int indent, List values, VALUESOUTPUT } if (value.startsWith("xsd")) { formattedValue.append(value); - } else{ + } else { formattedValue.append("\"").append(value).append("\""); } @@ -671,9 +858,10 @@ private static String formatValues(int indent, List values, VALUESOUTPUT currentLine.append(formattedValue); groupPrefix = valuePrefix; } else if (valuePrefix.equals(groupPrefix)) { - int testLength = valueBlock.length() == 0 ? - padding.length() + currentLine.length() + " ".length() : - currentLine.length() + " ".length(); + int testLength = + valueBlock.length() == 0 + ? padding.length() + currentLine.length() + " ".length() + : currentLine.length() + " ".length(); if ((testLength + formattedValue.length()) > MAX_LINE_LENGTH) { valueBlock.append(currentLine).append(NEWLINE); currentLine.setLength(0); @@ -693,9 +881,10 @@ private static String formatValues(int indent, List values, VALUESOUTPUT break; case MAX_LINE_LENGTH: - int testLength = valueBlock.length() == 0 ? - padding.length() + currentLine.length() + " ".length() : - currentLine.length() + " ".length(); + int testLength = + valueBlock.length() == 0 + ? padding.length() + currentLine.length() + " ".length() + : currentLine.length() + " ".length(); if ((testLength + formattedValue.length()) > MAX_LINE_LENGTH) { valueBlock.append(currentLine).append(NEWLINE); currentLine.setLength(0); @@ -715,7 +904,7 @@ private static String formatValues(int indent, List values, VALUESOUTPUT } if (value.startsWith("xsd")) { valueBlock.append(value); - } else{ + } else { valueBlock.append("\"").append(value).append("\""); } } @@ -744,283 +933,468 @@ private static String cleanRegex(String regex) { return regex.replaceAll("\\[-", "[\\\\-").replaceAll("\\\\/", "/").replaceAll("\\\\'", "'"); } - //********************* Combined properties ********************// + // ********************* Combined properties ********************// private static String getFormattedDecompositionProperties() { - return getFormattedAttribute(UcdProperty.Decomposition_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Decomposition_Mapping); + return getFormattedAttribute( + UcdProperty.Decomposition_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Decomposition_Mapping); } private static String getFormattedCompositionProperties() { - return getFormattedBoolean(UcdProperty.Composition_Exclusion) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Full_Composition_Exclusion); + return getFormattedBoolean(UcdProperty.Composition_Exclusion) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Full_Composition_Exclusion); } private static String getFormattedQuickCheckProperties() { - return getFormattedAttribute(UcdProperty.NFC_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedAttribute(UcdProperty.NFD_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedAttribute(UcdProperty.NFKC_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedAttribute(UcdProperty.NFKD_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + TRIPLELINE + - getFormattedBoolean(UcdProperty.Expands_On_NFC) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Expands_On_NFD) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Expands_On_NFKC) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Expands_On_NFKD) + TRIPLELINE + - getFormattedSyntax(UcdProperty.FC_NFKC_Closure); + return getFormattedAttribute(UcdProperty.NFC_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.NFD_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.NFKC_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.NFKD_Quick_Check, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + TRIPLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFC) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFD) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFKC) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Expands_On_NFKD) + + TRIPLELINE + + getFormattedSyntax(UcdProperty.FC_NFKC_Closure); } private static String getFormattedNumericProperties() { - return getFormattedAttribute(UcdProperty.Numeric_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Numeric_Value); + return getFormattedAttribute(UcdProperty.Numeric_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Numeric_Value); } private static String getFormattedJoiningProperties() { - return getFormattedAttribute(UcdProperty.Joining_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedAttribute(UcdProperty.Joining_Group, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); + return getFormattedAttribute(UcdProperty.Joining_Type, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.Joining_Group, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); } private static String getFormattedCasingProperties() { - return getFormattedBoolean(UcdProperty.Uppercase) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Lowercase) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_Uppercase) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_Lowercase); + return getFormattedBoolean(UcdProperty.Uppercase) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Lowercase) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Uppercase) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Lowercase); } private static String getFormattedSimpleCaseMappingProperties() { - return getFormattedSyntax(UcdProperty.Simple_Uppercase_Mapping) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Simple_Lowercase_Mapping) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Simple_Titlecase_Mapping); + return getFormattedSyntax(UcdProperty.Simple_Uppercase_Mapping) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Simple_Lowercase_Mapping) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Simple_Titlecase_Mapping); } private static String getFormattedCaseMappingProperties() { - return getFormattedSyntax(UcdProperty.Uppercase_Mapping) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Lowercase_Mapping) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Titlecase_Mapping); + return getFormattedSyntax(UcdProperty.Uppercase_Mapping) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Lowercase_Mapping) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Titlecase_Mapping); } private static String getFormattedCaseFoldingProperties() { - return getFormattedSyntax(UcdProperty.Simple_Case_Folding) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Case_Folding); + return getFormattedSyntax(UcdProperty.Simple_Case_Folding) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Case_Folding); } private static String getFormattedCaseOtherProperties() { - return getFormattedBoolean(UcdProperty.Case_Ignorable) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Cased) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Changes_When_Casefolded) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Changes_When_Casemapped) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Changes_When_Lowercased) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Changes_When_NFKC_Casefolded) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Changes_When_Titlecased) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Changes_When_Uppercased) + DOUBLELINE + - getFormattedSyntax(UcdProperty.NFKC_Casefold) + DOUBLELINE + - getFormattedSyntax(UcdProperty.NFKC_Simple_Casefold); + return getFormattedBoolean(UcdProperty.Case_Ignorable) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Cased) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Casefolded) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Casemapped) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Lowercased) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_NFKC_Casefolded) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Titlecased) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Changes_When_Uppercased) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.NFKC_Casefold) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.NFKC_Simple_Casefold); } private static String getFormattedScriptProperties() { - return getFormattedPropertyValues(UcdProperty.Script, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Script) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Script_Extensions); + return getFormattedPropertyValues(UcdProperty.Script, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Script) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Script_Extensions); } private static String getFormattedIdentifierProperties() { - return getFormattedBoolean(UcdProperty.ID_Start) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_ID_Start) + DOUBLELINE + - getFormattedBoolean(UcdProperty.XID_Start) + DOUBLELINE + - getFormattedBoolean(UcdProperty.ID_Continue) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_ID_Continue) + DOUBLELINE + - getFormattedBoolean(UcdProperty.XID_Continue) + DOUBLELINE + - getFormattedBoolean(UcdProperty.ID_Compat_Math_Start) + DOUBLELINE + - getFormattedBoolean(UcdProperty.ID_Compat_Math_Continue); + return getFormattedBoolean(UcdProperty.ID_Start) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_ID_Start) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.XID_Start) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.ID_Continue) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_ID_Continue) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.XID_Continue) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.ID_Compat_Math_Start) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.ID_Compat_Math_Continue); } private static String getFormattedPatternProperties() { - return getFormattedBoolean(UcdProperty.Pattern_Syntax) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Pattern_White_Space); + return getFormattedBoolean(UcdProperty.Pattern_Syntax) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Pattern_White_Space); } private static String getFormattedFunctionGraphicProperties() { - return getFormattedBoolean(UcdProperty.Dash) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Hyphen) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Quotation_Mark) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Terminal_Punctuation) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Sentence_Terminal) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Diacritic) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Extender) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Soft_Dotted) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Alphabetic) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_Alphabetic) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Math) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_Math) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Hex_Digit) + DOUBLELINE + - getFormattedBoolean(UcdProperty.ASCII_Hex_Digit) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Default_Ignorable_Code_Point) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_Default_Ignorable_Code_Point) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Logical_Order_Exception) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Prepended_Concatenation_Mark) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Modifier_Combining_Mark) + DOUBLELINE + - getFormattedBoolean(UcdProperty.White_Space) + DOUBLELINE + - getFormattedAttribute(UcdProperty.Vertical_Orientation, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Regional_Indicator); + return getFormattedBoolean(UcdProperty.Dash) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Hyphen) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Quotation_Mark) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Terminal_Punctuation) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Sentence_Terminal) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Diacritic) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Extender) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Soft_Dotted) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Alphabetic) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Alphabetic) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Math) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Math) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Hex_Digit) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.ASCII_Hex_Digit) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Default_Ignorable_Code_Point) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Default_Ignorable_Code_Point) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Logical_Order_Exception) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Prepended_Concatenation_Mark) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Modifier_Combining_Mark) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.White_Space) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.Vertical_Orientation, VALUESOUTPUTTYPE.MAX_LINE_LENGTH) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Regional_Indicator); } private static String getFormattedBoundaryProperties() { - return getFormattedBoolean(UcdProperty.Grapheme_Base) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Grapheme_Extend) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Other_Grapheme_Extend) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Grapheme_Link) + DOUBLELINE + - getFormattedAttribute(UcdProperty.Grapheme_Cluster_Break, - VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + DOUBLELINE + - getFormattedAttribute(UcdProperty.Word_Break, - VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + DOUBLELINE + - getFormattedAttribute(UcdProperty.Sentence_Break, - VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); + return getFormattedBoolean(UcdProperty.Grapheme_Base) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Grapheme_Extend) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Other_Grapheme_Extend) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Grapheme_Link) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.Grapheme_Cluster_Break, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + + DOUBLELINE + + getFormattedAttribute(UcdProperty.Word_Break, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP) + + DOUBLELINE + + getFormattedAttribute( + UcdProperty.Sentence_Break, VALUESOUTPUTTYPE.ALPHABETICAL_GROUP); } private static String getFormattedIdeographProperties() { - return getFormattedBoolean(UcdProperty.Ideographic) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Unified_Ideograph) + DOUBLELINE + - getFormattedSyntax(UcdProperty.Equivalent_Unified_Ideograph) + DOUBLELINE + - getFormattedBoolean(UcdProperty.IDS_Binary_Operator) + DOUBLELINE + - getFormattedBoolean(UcdProperty.IDS_Trinary_Operator) + DOUBLELINE + - getFormattedBoolean(UcdProperty.IDS_Unary_Operator) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Radical); + return getFormattedBoolean(UcdProperty.Ideographic) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Unified_Ideograph) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.Equivalent_Unified_Ideograph) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.IDS_Binary_Operator) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.IDS_Trinary_Operator) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.IDS_Unary_Operator) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Radical); } private static String getFormattedMiscellaneousProperties() { - return getFormattedBoolean(UcdProperty.Deprecated) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Variation_Selector) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Noncharacter_Code_Point); + return getFormattedBoolean(UcdProperty.Deprecated) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Variation_Selector) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Noncharacter_Code_Point); } private static String getFormattedUnihanProperties() { - return getFormattedTR38Syntax(UcdProperty.kAccountingNumeric) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kAlternateTotalStrokes) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kBigFive) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCangjie) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCantonese) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCCCII) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCheungBauer) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCheungBauerIndex) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCihaiT) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCNS1986) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCNS1992) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCompatibilityVariant) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kCowles) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kDaeJaweon) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kDefinition) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kEACC) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kFanqie) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kFenn) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kFennIndex) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kFourCornerCode) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGB0) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGB1) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGB3) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGB5) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGB7) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGB8) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGradeLevel) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kGSR) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kHangul) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kHanYu) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kHanyuPinlu) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kHanyuPinyin) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kHDZRadBreak) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kHKGlyph) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIBMJapan) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIICore) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_GSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_HSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_JSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_KPSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_KSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_MSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_SSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_TSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_UKSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_USource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRG_VSource) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRGDaeJaweon) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRGHanyuDaZidian) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kIRGKangXi) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJa) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJapanese) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJapaneseKun) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJapaneseOn) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJinmeiyoKanji) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJis0) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJis1) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJIS0213) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kJoyoKanji) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kKangXi) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kKarlgren) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kKorean) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kKoreanEducationHanja) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kKoreanName) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kLau) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kMainlandTelegraph) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kMandarin) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kMatthews) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kMeyerWempe) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kMojiJoho) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kMorohashi) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kNelson) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kOtherNumeric) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kPhonetic) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kPrimaryNumeric) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kPseudoGB1) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kRSAdobe_Japan1_6) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kRSUnicode) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSBGY) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSemanticVariant) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSimplifiedVariant) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSMSZD2003Index) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSMSZD2003Readings) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSpecializedSemanticVariant) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kSpoofingVariant) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kStrange) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kTaiwanTelegraph) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kTang) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kTGH) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kTGHZ2013) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kTotalStrokes) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kTraditionalVariant) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kUnihanCore2020) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kVietnamese) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kVietnameseNumeric) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kXerox) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kXHC1983) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kZhuang) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kZhuangNumeric) + DOUBLELINE + - getFormattedTR38Syntax(UcdProperty.kZVariant); + return getFormattedTR38Syntax(UcdProperty.kAccountingNumeric) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kAlternateTotalStrokes) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kBigFive) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCangjie) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCantonese) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCCCII) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCheungBauer) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCheungBauerIndex) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCihaiT) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCNS1986) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCNS1992) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCompatibilityVariant) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kCowles) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kDaeJaweon) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kDefinition) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kEACC) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFanqie) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFenn) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFennIndex) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kFourCornerCode) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB0) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB1) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB3) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB5) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB7) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGB8) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGradeLevel) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kGSR) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHangul) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHanYu) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHanyuPinlu) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHanyuPinyin) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHDZRadBreak) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kHKGlyph) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIBMJapan) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIICore) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_GSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_HSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_JSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_KPSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_KSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_MSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_SSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_TSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_UKSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_USource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRG_VSource) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRGDaeJaweon) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRGHanyuDaZidian) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kIRGKangXi) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJa) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJapanese) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJapaneseKun) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJapaneseOn) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJinmeiyoKanji) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJis0) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJis1) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJIS0213) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kJoyoKanji) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKangXi) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKarlgren) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKorean) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKoreanEducationHanja) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kKoreanName) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kLau) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMainlandTelegraph) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMandarin) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMatthews) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMeyerWempe) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMojiJoho) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kMorohashi) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kNelson) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kOtherNumeric) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kPhonetic) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kPrimaryNumeric) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kPseudoGB1) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kRSAdobe_Japan1_6) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kRSUnicode) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSBGY) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSemanticVariant) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSimplifiedVariant) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSMSZD2003Index) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSMSZD2003Readings) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSpecializedSemanticVariant) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kSpoofingVariant) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kStrange) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTaiwanTelegraph) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTang) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTGH) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTGHZ2013) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTotalStrokes) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kTraditionalVariant) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kUnihanCore2020) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kVietnamese) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kVietnameseNumeric) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kXerox) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kXHC1983) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kZhuang) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kZhuangNumeric) + + DOUBLELINE + + getFormattedTR38Syntax(UcdProperty.kZVariant); } private static String getFormattedTangutProperties() { - return getFormattedSyntax(UcdProperty.kRSTUnicode) + DOUBLELINE + - getFormattedSyntax(UcdProperty.kTGT_MergedSrc); + return getFormattedSyntax(UcdProperty.kRSTUnicode) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.kTGT_MergedSrc); } private static String getFormattedNushuProperties() { - return getFormattedSyntax(UcdProperty.kSrc_NushuDuben) + DOUBLELINE + - getFormattedSyntax(UcdProperty.kReading); + return getFormattedSyntax(UcdProperty.kSrc_NushuDuben) + + DOUBLELINE + + getFormattedSyntax(UcdProperty.kReading); } private static String getFormattedEmojiProperties() { - return getFormattedBoolean(UcdProperty.Emoji) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Emoji_Presentation) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Emoji_Modifier) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Emoji_Modifier_Base) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Emoji_Component) + DOUBLELINE + - getFormattedBoolean(UcdProperty.Extended_Pictographic); + return getFormattedBoolean(UcdProperty.Emoji) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Presentation) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Modifier) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Modifier_Base) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Emoji_Component) + + DOUBLELINE + + getFormattedBoolean(UcdProperty.Extended_Pictographic); } - //********************* Attribute values ********************// + // ********************* Attribute values ********************// private static List getBinaryValues() { List values = new ArrayList<>(); for (Binary binaryValues : Binary.values()) { values.add(binaryValues.getShortName()); } - //Binary should display as Y | N. + // Binary should display as Y | N. values.sort(Collections.reverseOrder()); return values; } @@ -1032,7 +1406,7 @@ private static List getAgeValues() { if (shortName.equals("NA")) { values.add("unassigned"); } else if (shortName.equals("13.1")) { - //https://github.com/unicode-org/unicodetools/issues/100 + // https://github.com/unicode-org/unicodetools/issues/100 } else { values.add(shortName); } @@ -1061,9 +1435,14 @@ private static List getBlockValues() { private static List getGeneralCategoryValues() { List values = new ArrayList<>(); for (General_Category_Values generalCategoryValues : General_Category_Values.values()) { - if (!generalCategoryValues.getShortName().toUpperCase().equals(generalCategoryValues.getShortName())) { - //Some of the General_Category_Values (LC, L, M, N, P, S, Z, C) stand for grouping of related - //General_Category values. They won't occur on any individual code point, so can be ignored. + if (!generalCategoryValues + .getShortName() + .toUpperCase() + .equals(generalCategoryValues.getShortName())) { + // Some of the General_Category_Values (LC, L, M, N, P, S, Z, C) stand for grouping + // of related + // General_Category values. They won't occur on any individual code point, so can be + // ignored. values.add(generalCategoryValues.getShortName()); } } @@ -1072,13 +1451,14 @@ private static List getGeneralCategoryValues() { private static List getCanonicalCombiningClassValues() { List values = new ArrayList<>(); - values.add ("xsd:integer { minInclusive=\"0\" maxInclusive=\"254\" }"); - //Because the set of values that this property has taken across the various versions of the UCD is rather - //large, our schema does not restrict the possible values to those actually used. - //for (Canonical_Combining_Class_Values canonicalCombiningClassValues : + values.add("xsd:integer { minInclusive=\"0\" maxInclusive=\"254\" }"); + // Because the set of values that this property has taken across the various versions of the + // UCD is rather + // large, our schema does not restrict the possible values to those actually used. + // for (Canonical_Combining_Class_Values canonicalCombiningClassValues : // Canonical_Combining_Class_Values.values()) { // values.add(canonicalCombiningClassValues.getShortName()); - //} + // } return values; } @@ -1092,12 +1472,13 @@ private static List getBidirectionalValues() { private static List getBidiPairedBracketTypeValues() { List values = new ArrayList<>(); - //Order should be Open/Close/None + // Order should be Open/Close/None values.add(Bidi_Paired_Bracket_Type_Values.Open.getShortName()); values.add(Bidi_Paired_Bracket_Type_Values.Close.getShortName()); values.add(Bidi_Paired_Bracket_Type_Values.None.getShortName()); - //Now let's check to see if there is anything else that we didn't expect - for (Bidi_Paired_Bracket_Type_Values bidiPairedBracketTypeValue : Bidi_Paired_Bracket_Type_Values.values()) { + // Now let's check to see if there is anything else that we didn't expect + for (Bidi_Paired_Bracket_Type_Values bidiPairedBracketTypeValue : + Bidi_Paired_Bracket_Type_Values.values()) { if (!values.contains(bidiPairedBracketTypeValue.getShortName())) { throw new IllegalArgumentException(); } @@ -1107,8 +1488,9 @@ private static List getBidiPairedBracketTypeValues() { private static List getDecompositionTypeValues() { List values = new ArrayList<>(); - for (Decomposition_Type_Values decompositionTypeValues : Decomposition_Type_Values.values()) { - //We want "none" to be last. + for (Decomposition_Type_Values decompositionTypeValues : + Decomposition_Type_Values.values()) { + // We want "none" to be last. if (decompositionTypeValues != Decomposition_Type_Values.None) { values.add(decompositionTypeValues.getNames().getOtherNames().get(0)); } @@ -1119,11 +1501,11 @@ private static List getDecompositionTypeValues() { private static List getNFCQuickCheckValues() { List values = new ArrayList<>(); - //Order should be Yes/No/Maybe + // Order should be Yes/No/Maybe values.add(NFC_Quick_Check_Values.Yes.getShortName()); values.add(NFC_Quick_Check_Values.No.getShortName()); values.add(NFC_Quick_Check_Values.Maybe.getShortName()); - //Now let's check to see if there is anything else that we didn't expect + // Now let's check to see if there is anything else that we didn't expect for (NFC_Quick_Check_Values nfcQuickCheckValues : NFC_Quick_Check_Values.values()) { if (!values.contains(nfcQuickCheckValues.getShortName())) { throw new IllegalArgumentException(); @@ -1134,10 +1516,10 @@ private static List getNFCQuickCheckValues() { private static List getNFDQuickCheckValues() { List values = new ArrayList<>(); - //Order should be Yes/No + // Order should be Yes/No values.add(NFD_Quick_Check_Values.Yes.getShortName()); values.add(NFD_Quick_Check_Values.No.getShortName()); - //Now let's check to see if there is anything else that we didn't expect + // Now let's check to see if there is anything else that we didn't expect for (NFD_Quick_Check_Values nfdQuickCheckValues : NFD_Quick_Check_Values.values()) { if (!values.contains(nfdQuickCheckValues.getShortName())) { throw new IllegalArgumentException(); @@ -1148,11 +1530,11 @@ private static List getNFDQuickCheckValues() { private static List getNFKCQuickCheckValues() { List values = new ArrayList<>(); - //Order should be Yes/No/Maybe + // Order should be Yes/No/Maybe values.add(NFKC_Quick_Check_Values.Yes.getShortName()); values.add(NFKC_Quick_Check_Values.No.getShortName()); values.add(NFKC_Quick_Check_Values.Maybe.getShortName()); - //Now let's check to see if there is anything else that we didn't expect + // Now let's check to see if there is anything else that we didn't expect for (NFKC_Quick_Check_Values nfkcQuickCheckValues : NFKC_Quick_Check_Values.values()) { if (!values.contains(nfkcQuickCheckValues.getShortName())) { throw new IllegalArgumentException(); @@ -1163,10 +1545,10 @@ private static List getNFKCQuickCheckValues() { private static List getNFKDQuickCheckValues() { List values = new ArrayList<>(); - //Order should be Yes/No + // Order should be Yes/No values.add(NFKD_Quick_Check_Values.Yes.getShortName()); values.add(NFKD_Quick_Check_Values.No.getShortName()); - //Now let's check to see if there is anything else that we didn't expect + // Now let's check to see if there is anything else that we didn't expect for (NFKD_Quick_Check_Values nfkdQuickCheckValues : NFKD_Quick_Check_Values.values()) { if (!values.contains(nfkdQuickCheckValues.getShortName())) { throw new IllegalArgumentException(); @@ -1177,12 +1559,12 @@ private static List getNFKDQuickCheckValues() { private static List getNumericTypeValues() { List values = new ArrayList<>(); - //Order should be Decimal/Digit/Numeric/None + // Order should be Decimal/Digit/Numeric/None values.add(Numeric_Type_Values.Decimal.getShortName()); values.add(Numeric_Type_Values.Digit.getShortName()); values.add(Numeric_Type_Values.Numeric.getShortName()); values.add(Numeric_Type_Values.None.getShortName()); - //Now let's check to see if there is anything else that we didn't expect + // Now let's check to see if there is anything else that we didn't expect for (Numeric_Type_Values numericTypeValues : Numeric_Type_Values.values()) { if (!values.contains(numericTypeValues.getShortName())) { throw new IllegalArgumentException(); @@ -1224,23 +1606,24 @@ private static List getEastAsianWidthValues() { } private static List getScriptValues() { - List excludedValues = Arrays.asList( - Script_Values.Han_with_Bopomofo, - Script_Values.Japanese, - Script_Values.Korean, - Script_Values.Math_Symbols, - Script_Values.Emoji_Symbols, - Script_Values.Other_Symbols, - Script_Values.Unwritten); + List excludedValues = + Arrays.asList( + Script_Values.Han_with_Bopomofo, + Script_Values.Japanese, + Script_Values.Korean, + Script_Values.Math_Symbols, + Script_Values.Emoji_Symbols, + Script_Values.Other_Symbols, + Script_Values.Unwritten); List values = new ArrayList<>(); for (Script_Values scriptValue : Script_Values.values()) { if (!excludedValues.contains(scriptValue)) { values.add(scriptValue.getShortName()); } - //Include the following if you want to add other names - //if (!scriptValue.getNames().getOtherNames().isEmpty()) { + // Include the following if you want to add other names + // if (!scriptValue.getNames().getOtherNames().isEmpty()) { // values.add(scriptValue.getNames().getOtherNames().get(0)); - //} + // } } Collections.sort(values); return values; @@ -1248,7 +1631,8 @@ private static List getScriptValues() { private static List getHangulSyllableTypeValues() { List values = new ArrayList<>(); - for (Hangul_Syllable_Type_Values hangulSyllableTypeValues : Hangul_Syllable_Type_Values.values()) { + for (Hangul_Syllable_Type_Values hangulSyllableTypeValues : + Hangul_Syllable_Type_Values.values()) { values.add(hangulSyllableTypeValues.getShortName()); } return values; @@ -1256,7 +1640,8 @@ private static List getHangulSyllableTypeValues() { private static List getIndicSyllabicCategoryValues() { List values = new ArrayList<>(); - for (Indic_Syllabic_Category_Values indicSyllabicCategoryValues : Indic_Syllabic_Category_Values.values()) { + for (Indic_Syllabic_Category_Values indicSyllabicCategoryValues : + Indic_Syllabic_Category_Values.values()) { values.add(indicSyllabicCategoryValues.getShortName()); } return values; @@ -1273,7 +1658,8 @@ private static List getIndicPositionalCategoryValues() { private static List getIndicConjunctBreakValues() { List values = new ArrayList<>(); - for (Indic_Conjunct_Break_Values indicConjunctBreakValues : Indic_Conjunct_Break_Values.values()) { + for (Indic_Conjunct_Break_Values indicConjunctBreakValues : + Indic_Conjunct_Break_Values.values()) { values.add(indicConjunctBreakValues.getShortName()); } return values; @@ -1281,7 +1667,8 @@ private static List getIndicConjunctBreakValues() { private static List getVerticalOrientationValues() { List values = new ArrayList<>(); - for (Vertical_Orientation_Values verticalOrientationValues : Vertical_Orientation_Values.values()) { + for (Vertical_Orientation_Values verticalOrientationValues : + Vertical_Orientation_Values.values()) { values.add(verticalOrientationValues.getShortName()); } return values; @@ -1289,7 +1676,8 @@ private static List getVerticalOrientationValues() { private static List getGraphemeClusterBreakValues() { List values = new ArrayList<>(); - for (Grapheme_Cluster_Break_Values graphemeClusterBreakValues : Grapheme_Cluster_Break_Values.values()) { + for (Grapheme_Cluster_Break_Values graphemeClusterBreakValues : + Grapheme_Cluster_Break_Values.values()) { values.add(graphemeClusterBreakValues.getShortName()); } return values; @@ -1320,7 +1708,7 @@ private static List getDoNotEmitTypeValues() { return values; } - //********************* Utility methods ********************// + // ********************* Utility methods ********************// private static HashMap parseTR38() throws IOException, URISyntaxException { HashMap syntaxTR38 = new HashMap<>(); @@ -1332,27 +1720,30 @@ private static HashMap parseTR38() throws IOException, URIS stringBuilder.append((char) ptr); } } - Pattern syntaxPattern = Pattern.compile( - ">Property.*?(.*?).*?>Delimiter.*?>(.*?).*?>Syntax.*?>(.*?)", - Pattern.DOTALL); + Pattern syntaxPattern = + Pattern.compile( + ">Property.*?(.*?).*?>Delimiter.*?>(.*?).*?>Syntax.*?>(.*?)", + Pattern.DOTALL); Matcher matcher = syntaxPattern.matcher(stringBuilder.toString()); while (matcher.find()) { String delimiter = matcher.group(2).trim(); boolean isList = false; - switch(delimiter) { + switch (delimiter) { case "N/A": break; case "space": isList = true; break; default: - throw new IllegalArgumentException("Only \"space\" or \"N/A\" are supported values for Delimiter." + - " Found: " + delimiter); + throw new IllegalArgumentException( + "Only \"space\" or \"N/A\" are supported values for Delimiter." + + " Found: " + + delimiter); } - TR38Details tr38Details = new TR38Details(isList, matcher.group(3).trim().replaceAll("
    ", "")); + TR38Details tr38Details = + new TR38Details(isList, matcher.group(3).trim().replaceAll("
    ", "")); syntaxTR38.put(matcher.group(1).trim(), tr38Details); } return syntaxTR38; } - } From 6ee246756b052b36c44aba13b19ef5d918ebb54f Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Tue, 12 Nov 2024 07:55:27 -0800 Subject: [PATCH 13/14] Implemented review comments from eggrobin --- .../src/main/java/org/unicode/props/UcdProperty.java | 8 ++++---- .../resources/org/unicode/props/ExtraPropertyAliases.txt | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java index 3d861a740..f3e5a4fc0 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java @@ -195,7 +195,7 @@ public enum UcdProperty { kRSKanWa(PropertyType.Miscellaneous, "cjkRSKanWa"), kRSKangXi(PropertyType.Miscellaneous, "cjkRSKangXi"), kRSKorean(PropertyType.Miscellaneous, "cjkRSKorean"), - kRSTUnicode(PropertyType.Miscellaneous, "cjkRSTUnicode"), + kRSTUnicode(PropertyType.Miscellaneous, "kRSTUnicode"), kRSUnicode( PropertyType.Miscellaneous, null, @@ -203,7 +203,7 @@ public enum UcdProperty { "cjkRSUnicode", "Unicode_Radical_Stroke", "URS"), - kReading(PropertyType.Miscellaneous, "cjkReading"), + kReading(PropertyType.Miscellaneous, "kReading"), kSBGY(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkSBGY"), kSMSZD2003Index(PropertyType.Miscellaneous, "cjkSMSZD2003Index"), kSMSZD2003Readings(PropertyType.Miscellaneous, "cjkSMSZD2003Readings"), @@ -215,11 +215,11 @@ public enum UcdProperty { ValueCardinality.Unordered, "cjkSpecializedSemanticVariant"), kSpoofingVariant(PropertyType.Miscellaneous, "cjkSpoofingVariant"), - kSrc_NushuDuben(PropertyType.Miscellaneous, "cjkSrc_NushuDuben"), + kSrc_NushuDuben(PropertyType.Miscellaneous, "kSrc_NushuDuben"), kStrange(PropertyType.Miscellaneous, "cjkStrange"), kTGH(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTGH"), kTGHZ2013(PropertyType.Miscellaneous, "cjkTGHZ2013"), - kTGT_MergedSrc(PropertyType.Miscellaneous, "cjkTGT_MergedSrc"), + kTGT_MergedSrc(PropertyType.Miscellaneous, "kTGT_MergedSrc"), kTaiwanTelegraph(PropertyType.Miscellaneous, "cjkTaiwanTelegraph"), kTang(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTang"), kTotalStrokes(PropertyType.Miscellaneous, null, ValueCardinality.Ordered, "cjkTotalStrokes"), diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt index 527abacb9..b2886ab98 100644 --- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt +++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt @@ -151,10 +151,10 @@ cjkJoyoKanji ; kJoyoKanji cjkKoreanEducationHanja ; kKoreanEducationHanja cjkKoreanName ; kKoreanName cjkTGH ; kTGH -cjkRSTUnicode ; kRSTUnicode -cjkReading ; kReading -cjkSrc_NushuDuben ; kSrc_NushuDuben -cjkTGT_MergedSrc ; kTGT_MergedSrc +kRSTUnicode ; kRSTUnicode +kReading ; kReading +kSrc_NushuDuben ; kSrc_NushuDuben +kTGT_MergedSrc ; kTGT_MergedSrc ncCorrected ; NC_Corrected ncOriginal ; NC_Original ncVersion ; NC_Version From dbb5dd384447070f9d97d1a0d690ecac832e225c Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Mon, 16 Dec 2024 15:41:28 -0800 Subject: [PATCH 14/14] Updates from Marcus's review comments --- uax/uax42/Readme.md => docs/ucdxml.md | 6 +- .../org/unicode/xml/AttributeResolver.java | 128 +----------------- .../unicode/xml/GeneratePropertyValues.java | 6 +- .../org/unicode/props/IndexPropertyRegex.txt | 30 ++-- .../org/unicode/uax42/fragments}/Bidi_C.xml | 0 .../org/unicode/uax42/fragments}/Bidi_M.xml | 0 .../org/unicode/uax42/fragments}/Emoji.xml | 0 .../org/unicode/uax42/fragments}/InCB.xml | 0 .../org/unicode/uax42/fragments}/InPC.xml | 0 .../org/unicode/uax42/fragments}/InSC.xml | 0 .../org/unicode/uax42/fragments}/JSN.xml | 0 .../org/unicode/uax42/fragments}/Join_C.xml | 0 .../unicode/uax42/fragments}/Name_Alias.xml | 0 .../org/unicode/uax42/fragments}/Nushu.xml | 0 .../uax42/fragments/Set_of_code_points.xml | 0 .../org/unicode/uax42/fragments}/Tangut.xml | 0 .../org/unicode/uax42/fragments}/Unihan.xml | 0 .../org/unicode/uax42/fragments}/age.xml | 1 + .../org/unicode/uax42/fragments}/bc.xml | 0 .../org/unicode/uax42/fragments}/blk.xml | 0 .../org/unicode/uax42/fragments}/block.xml | 0 .../org/unicode/uax42/fragments}/bmg.xml | 0 .../org/unicode/uax42/fragments}/boolean.xml | 0 .../unicode/uax42/fragments}/boundaries.xml | 0 .../org/unicode/uax42/fragments}/bpb.xml | 0 .../org/unicode/uax42/fragments}/bpt.xml | 0 .../unicode/uax42/fragments}/case_folding.xml | 0 .../unicode/uax42/fragments}/case_mapping.xml | 0 .../unicode/uax42/fragments}/case_other.xml | 0 .../org/unicode/uax42/fragments}/casing.xml | 0 .../org/unicode/uax42/fragments}/ccc.xml | 0 .../unicode/uax42/fragments}/cjk-radicals.xml | 0 .../org/unicode/uax42/fragments}/cjkEACC.xml | 0 .../uax42/fragments}/cjkIRG_TSource.xml | 0 .../unicode/uax42/fragments}/composition.xml | 0 .../unicode/uax42/fragments}/datatypes.xml | 0 .../uax42/fragments/datatypes_code_points.xml | 0 .../uax42/fragments}/decomposition.xml | 0 .../unicode/uax42/fragments}/description.xml | 0 .../unicode/uax42/fragments}/do-not-emit.xml | 0 .../org/unicode/uax42/fragments}/ea.xml | 0 .../uax42/fragments}/emoji-sources.xml | 0 .../uax42/fragments}/function_graphic.xml | 0 .../org/unicode/uax42/fragments}/gc.xml | 0 .../org/unicode/uax42/fragments}/groups.xml | 0 .../org/unicode/uax42/fragments}/hst.xml | 0 .../unicode/uax42/fragments}/identifier.xml | 0 .../unicode/uax42/fragments}/ideographs.xml | 0 .../org/unicode/uax42/fragments}/isc.xml | 0 .../uax42/fragments}/jis-code-point.xml | 0 .../org/unicode/uax42/fragments}/joining.xml | 3 +- .../org/unicode/uax42/fragments}/lb.xml | 0 .../uax42/fragments}/miscellaneous.xml | 0 .../org/unicode/uax42/fragments}/na.xml | 0 .../org/unicode/uax42/fragments}/na1.xml | 0 .../uax42/fragments}/named-sequences.xml | 0 .../unicode/uax42/fragments}/namespace.xml | 0 .../fragments}/normalization-corrections.xml | 0 .../org/unicode/uax42/fragments}/numeric.xml | 0 .../org/unicode/uax42/fragments}/pattern.xml | 0 .../unicode/uax42/fragments}/quickcheck.xml | 0 .../unicode/uax42/fragments}/repertoire.xml | 0 .../fragments/repertoire_Code_points.xml | 0 .../org/unicode/uax42/fragments}/script.xml | 0 .../uax42/fragments}/simple_case_mapping.xml | 0 .../fragments}/standardized-variants.xml | 0 .../org/unicode/uax42/fragments}/start.xml | 0 .../resources/org/unicode}/uax42/index.xml | 122 ++++++++--------- .../org/unicode}/uax42/index2html.xsl | 0 .../org/unicode}/uax42/index2rnc.xsl | 0 .../org/unicode}/uax42/output/index.html | 4 +- .../org/unicode}/uax42/output/index.rnc | 4 +- .../main/resources/org/unicode}/uax42/pom.xml | 4 +- 73 files changed, 95 insertions(+), 213 deletions(-) rename uax/uax42/Readme.md => docs/ucdxml.md (60%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Bidi_C.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Bidi_M.xml (100%) rename {uax/uax42/fragments/emoji-data => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Emoji.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/InCB.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/InPC.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/InSC.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/JSN.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Join_C.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Name_Alias.xml (100%) rename {uax/uax42/fragments/nushu => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Nushu.xml (100%) rename uax/uax42/fragments/repertoire/Set of code points.xml => unicodetools/src/main/resources/org/unicode/uax42/fragments/Set_of_code_points.xml (100%) rename {uax/uax42/fragments/tangut => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Tangut.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Unihan.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/age.xml (96%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bc.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/blk.xml (100%) rename {uax/uax42/fragments/block => unicodetools/src/main/resources/org/unicode/uax42/fragments}/block.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bmg.xml (100%) rename {uax/uax42/fragments/boolean => unicodetools/src/main/resources/org/unicode/uax42/fragments}/boolean.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/boundaries.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bpb.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bpt.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/case_folding.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/case_mapping.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/case_other.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/casing.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/ccc.xml (100%) rename {uax/uax42/fragments/cjk-radicals => unicodetools/src/main/resources/org/unicode/uax42/fragments}/cjk-radicals.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/cjkEACC.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/cjkIRG_TSource.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/composition.xml (100%) rename {uax/uax42/fragments/datatypes => unicodetools/src/main/resources/org/unicode/uax42/fragments}/datatypes.xml (100%) rename uax/uax42/fragments/datatypes/code points.xml => unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes_code_points.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/decomposition.xml (100%) rename {uax/uax42/fragments/description => unicodetools/src/main/resources/org/unicode/uax42/fragments}/description.xml (100%) rename {uax/uax42/fragments/do-not-emit => unicodetools/src/main/resources/org/unicode/uax42/fragments}/do-not-emit.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/ea.xml (100%) rename {uax/uax42/fragments/emoji-sources => unicodetools/src/main/resources/org/unicode/uax42/fragments}/emoji-sources.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/function_graphic.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/gc.xml (100%) rename {uax/uax42/fragments/repertoire => unicodetools/src/main/resources/org/unicode/uax42/fragments}/groups.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/hst.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/identifier.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/ideographs.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/isc.xml (100%) rename {uax/uax42/fragments/datatypes => unicodetools/src/main/resources/org/unicode/uax42/fragments}/jis-code-point.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/joining.xml (96%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/lb.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/miscellaneous.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/na.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/na1.xml (100%) rename {uax/uax42/fragments/named-sequences => unicodetools/src/main/resources/org/unicode/uax42/fragments}/named-sequences.xml (100%) rename {uax/uax42/fragments/namespace => unicodetools/src/main/resources/org/unicode/uax42/fragments}/namespace.xml (100%) rename {uax/uax42/fragments/normalization-corrections => unicodetools/src/main/resources/org/unicode/uax42/fragments}/normalization-corrections.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/numeric.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/pattern.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/quickcheck.xml (100%) rename {uax/uax42/fragments/repertoire => unicodetools/src/main/resources/org/unicode/uax42/fragments}/repertoire.xml (100%) rename uax/uax42/fragments/repertoire/Code points.xml => unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire_Code_points.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/script.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/simple_case_mapping.xml (100%) rename {uax/uax42/fragments/standardized-variants => unicodetools/src/main/resources/org/unicode/uax42/fragments}/standardized-variants.xml (100%) rename {uax/uax42/fragments/start => unicodetools/src/main/resources/org/unicode/uax42/fragments}/start.xml (100%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/index.xml (94%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/index2html.xsl (100%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/index2rnc.xsl (100%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/output/index.html (99%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/output/index.rnc (99%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/pom.xml (94%) diff --git a/uax/uax42/Readme.md b/docs/ucdxml.md similarity index 60% rename from uax/uax42/Readme.md rename to docs/ucdxml.md index f2533a2a2..207842db2 100644 --- a/uax/uax42/Readme.md +++ b/docs/ucdxml.md @@ -2,13 +2,11 @@ ## Step 1 - Generate property value fragments -- Run org.unicode.xml.GeneratePropertyValues to populate the UNICODETOOLS_REPO_DIR/uax/uax42/fragments/ folder. +- mvn compile exec:java '-Dexec.mainClass="org.unicode.xml.GeneratePropertyValues"' '-Dexec.args="--ucdversion 16.0.0 -f $(cd ./unicodetools/src/main/resources/org/unicode/uax42/fragments; pwd)"' -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) ## Step 2 - Generate TR42 index.html and index.rnc -- In UNICODETOOLS_REPO_DIR/uax/uax42/ run `mvn xml:transform` - - index.html and index.rnc will be generated in UNICODETOOLS_REPO_DIR/uax/uax42/output/ +- mvn xml:transform -f $(cd ./unicodetools/src/main/resources/org/unicode/uax42/fragments; pwd) -Doutputdir=../Generated/uax42/ ## Step 3 - Validate generated UAX XML files diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index 87db09582..393bb3281 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -10,46 +10,11 @@ public class AttributeResolver { private final IndexUnicodeProperties indexUnicodeProperties; private final UnicodeMap map_age; - private final UnicodeMap map_bidi_class; - private final UnicodeMap - map_bidi_paired_bracket_type; private final UnicodeMap map_block; - private final UnicodeMap - map_canonical_combining_class; private final UnicodeMap map_decomposition_type; - private final UnicodeMap map_do_not_emit_type; - private final UnicodeMap map_east_asian_width; private final UnicodeMap map_general_category; - private final UnicodeMap - map_grapheme_cluster_break; - private final UnicodeMap - map_hangul_syllable_type; - private final UnicodeMap map_identifier_status; - private final UnicodeMap map_identifier_type; - private final UnicodeMap map_idn_2008; - private final UnicodeMap map_idn_status; - private final UnicodeMap - map_indic_conjunct_break; - private final UnicodeMap - map_indic_positional_category; - private final UnicodeMap - map_indic_syllabic_category; - private final UnicodeMap map_jamo_short_name; - private final UnicodeMap map_joining_group; - private final UnicodeMap map_joining_type; - private final UnicodeMap map_line_break; - private final UnicodeMap map_nfc_quick_check; - private final UnicodeMap map_nfd_quick_check; - private final UnicodeMap map_nfkc_quick_check; - private final UnicodeMap map_nfkd_quick_check; - private final UnicodeMap map_numeric_type; - private final UnicodeMap map_other_joining_type; private final UnicodeMap map_script; private final UnicodeMap map_script_extensions; - private final UnicodeMap map_sentence_break; - private final UnicodeMap - map_vertical_orientation; - private final UnicodeMap map_word_break; private final HashMap> map_NameAlias; // If there is a change in any of these properties between two adjacent characters, it will @@ -67,47 +32,12 @@ public class AttributeResolver { public AttributeResolver(IndexUnicodeProperties iup) { indexUnicodeProperties = iup; map_age = indexUnicodeProperties.loadEnum(UcdProperty.Age); - map_bidi_class = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Class); - map_bidi_paired_bracket_type = - indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Paired_Bracket_Type); map_block = indexUnicodeProperties.loadEnum(UcdProperty.Block); - map_canonical_combining_class = - indexUnicodeProperties.loadEnum(UcdProperty.Canonical_Combining_Class); map_decomposition_type = indexUnicodeProperties.loadEnum(UcdProperty.Decomposition_Type); - map_do_not_emit_type = indexUnicodeProperties.loadEnum(UcdProperty.Do_Not_Emit_Type); - map_east_asian_width = indexUnicodeProperties.loadEnum(UcdProperty.East_Asian_Width); map_general_category = indexUnicodeProperties.loadEnum(UcdProperty.General_Category); - map_grapheme_cluster_break = - indexUnicodeProperties.loadEnum(UcdProperty.Grapheme_Cluster_Break); - map_hangul_syllable_type = - indexUnicodeProperties.loadEnum(UcdProperty.Hangul_Syllable_Type); - map_identifier_status = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Status); - map_identifier_type = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Type); - map_idn_2008 = indexUnicodeProperties.loadEnum(UcdProperty.Idn_2008); - map_idn_status = indexUnicodeProperties.loadEnum(UcdProperty.Idn_Status); - map_indic_conjunct_break = - indexUnicodeProperties.loadEnum(UcdProperty.Indic_Conjunct_Break); - map_indic_positional_category = - indexUnicodeProperties.loadEnum(UcdProperty.Indic_Positional_Category); - map_indic_syllabic_category = - indexUnicodeProperties.loadEnum(UcdProperty.Indic_Syllabic_Category); - map_jamo_short_name = indexUnicodeProperties.loadEnum(UcdProperty.Jamo_Short_Name); - map_joining_group = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Group); - map_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Type); - map_line_break = indexUnicodeProperties.loadEnum(UcdProperty.Line_Break); - map_nfc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFC_Quick_Check); - map_nfd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFD_Quick_Check); - map_nfkc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKC_Quick_Check); - map_nfkd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKD_Quick_Check); - map_numeric_type = indexUnicodeProperties.loadEnum(UcdProperty.Numeric_Type); - map_other_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Other_Joining_Type); map_script = indexUnicodeProperties.loadEnum(UcdProperty.Script); map_script_extensions = indexUnicodeProperties.getProperty(UcdProperty.Script_Extensions).getUnicodeMap(); - map_sentence_break = indexUnicodeProperties.loadEnum(UcdProperty.Sentence_Break); - map_vertical_orientation = - indexUnicodeProperties.loadEnum(UcdProperty.Vertical_Orientation); - map_word_break = indexUnicodeProperties.loadEnum(UcdProperty.Word_Break); // UCD code is only set up to read a single Alias value from NameAliases.txt // Instead, we'll load the Alias and the Type data as part of the constructor. We'll keep in @@ -295,12 +225,6 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { } case Enumerated: switch (prop) { - case Bidi_Class: - return map_bidi_class.get(codepoint).getShortName(); - case Bidi_Paired_Bracket_Type: - return map_bidi_paired_bracket_type.get(codepoint).getShortName(); - case Canonical_Combining_Class: - return map_canonical_combining_class.get(codepoint).getShortName(); case Decomposition_Type: // Returning lower case to maintain compatibility with older generated // files. @@ -308,56 +232,10 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { .get(codepoint) .getShortName() .toLowerCase(Locale.ROOT); - case Do_Not_Emit_Type: - return map_do_not_emit_type.get(codepoint).getShortName(); - case East_Asian_Width: - return map_east_asian_width.get(codepoint).getShortName(); - case General_Category: - return map_general_category.get(codepoint).getShortName(); - case Grapheme_Cluster_Break: - return map_grapheme_cluster_break.get(codepoint).getShortName(); - case Hangul_Syllable_Type: - return map_hangul_syllable_type.get(codepoint).getShortName(); - case Identifier_Status: - return map_identifier_status.get(codepoint).getShortName(); - case Identifier_Type: - return map_identifier_type.get(codepoint).getShortName(); - case Idn_2008: - return map_idn_2008.get(codepoint).getShortName(); - case Idn_Status: - return map_idn_status.get(codepoint).getShortName(); - case Indic_Conjunct_Break: - return map_indic_conjunct_break.get(codepoint).getShortName(); - case Indic_Positional_Category: - return map_indic_positional_category.get(codepoint).getShortName(); - case Indic_Syllabic_Category: - return map_indic_syllabic_category.get(codepoint).getShortName(); - case Joining_Group: - return map_joining_group.get(codepoint).getShortName(); - case Joining_Type: - return map_joining_type.get(codepoint).getShortName(); - case Line_Break: - return map_line_break.get(codepoint).getShortName(); - case NFC_Quick_Check: - return map_nfc_quick_check.get(codepoint).getShortName(); - case NFD_Quick_Check: - return map_nfd_quick_check.get(codepoint).getShortName(); - case NFKC_Quick_Check: - return map_nfkc_quick_check.get(codepoint).getShortName(); - case NFKD_Quick_Check: - return map_nfkd_quick_check.get(codepoint).getShortName(); - case Numeric_Type: - return map_numeric_type.get(codepoint).getShortName(); - case Other_Joining_Type: - return map_other_joining_type.get(codepoint).getShortName(); - case Sentence_Break: - return map_sentence_break.get(codepoint).getShortName(); - case Vertical_Orientation: - return map_vertical_orientation.get(codepoint).getShortName(); - case Word_Break: - return map_word_break.get(codepoint).getShortName(); default: - throw new RuntimeException("Missing Enumerated case"); + final UnicodeProperty property = indexUnicodeProperties.getProperty(prop); + final List valueAliases = property.getValueAliases(property.getValue(codepoint)); + return valueAliases.get(0); } case Binary: { diff --git a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java index 83d4be093..f8a0dfa27 100644 --- a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java @@ -385,7 +385,7 @@ private static void createPropertyFragment( private static void createPropertyFragment( String filename, String title, SCHEMA schema, String formattedFragment) throws IOException { - BufferedWriter writer = getFragmentWriter(schema.getName(), filename); + BufferedWriter writer = getFragmentWriter(filename); writer.write( "" + NEWLINE @@ -403,10 +403,10 @@ private static void createPropertyFragment( writer.close(); } - private static BufferedWriter getFragmentWriter(String schema, String filename) + private static BufferedWriter getFragmentWriter(String filename) throws IOException { File fragmentFolder = - new File(destinationFolder + File.separator + schema + File.separator); + new File(destinationFolder + File.separator); if (!fragmentFolder.exists()) { if (!fragmentFolder.mkdir()) { throw new IOException(); diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt index ac6499419..70c52767f 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt @@ -197,23 +197,23 @@ NC_Version ; SINGLE_VALUED ; [0-9]\.[0-9]\.[0-9] # All not listed are SINGLE_VALUED ; null # ============================= -Script_Extensions ; MULTI_VALUED ; -Standardized_Variant ; MULTI_VALUED ; .* +Script_Extensions ; MULTI_VALUED ; +Standardized_Variant ; MULTI_VALUED ; .* -Idn_Status ; SINGLE_VALUED ; -Idn_Mapping ; SINGLE_VALUED ; $codePoints -Idn_2008 ; SINGLE_VALUED ; +Idn_Status ; SINGLE_VALUED ; +Idn_Mapping ; SINGLE_VALUED ; $codePoints +Idn_2008 ; SINGLE_VALUED ; -Identifier_Status ; SINGLE_VALUED ; -Identifier_Type ; MULTI_VALUED ; +Identifier_Status ; SINGLE_VALUED ; +Identifier_Type ; MULTI_VALUED ; -Confusable_SL ; SINGLE_VALUED ; $codePoints -Confusable_SA ; SINGLE_VALUED ; $codePoints -Confusable_ML ; SINGLE_VALUED ; $codePoints -Confusable_MA ; SINGLE_VALUED ; $codePoints +Confusable_SL ; SINGLE_VALUED ; $codePoints +Confusable_SA ; SINGLE_VALUED ; $codePoints +Confusable_ML ; SINGLE_VALUED ; $codePoints +Confusable_MA ; SINGLE_VALUED ; $codePoints -#Emoji ; SINGLE_VALUED ; -#Emoji_Presentation ; SINGLE_VALUED ; -#Emoji_Modifier ; SINGLE_VALUED ; -#Emoji_Modifier_Base ; SINGLE_VALUED ; +#Emoji ; SINGLE_VALUED ; +#Emoji_Presentation ; SINGLE_VALUED ; +#Emoji_Modifier ; SINGLE_VALUED ; +#Emoji_Modifier_Base ; SINGLE_VALUED ; diff --git a/uax/uax42/fragments/properties/Bidi_C.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_C.xml similarity index 100% rename from uax/uax42/fragments/properties/Bidi_C.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_C.xml diff --git a/uax/uax42/fragments/properties/Bidi_M.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_M.xml similarity index 100% rename from uax/uax42/fragments/properties/Bidi_M.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_M.xml diff --git a/uax/uax42/fragments/emoji-data/Emoji.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Emoji.xml similarity index 100% rename from uax/uax42/fragments/emoji-data/Emoji.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Emoji.xml diff --git a/uax/uax42/fragments/properties/InCB.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/InCB.xml similarity index 100% rename from uax/uax42/fragments/properties/InCB.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/InCB.xml diff --git a/uax/uax42/fragments/properties/InPC.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/InPC.xml similarity index 100% rename from uax/uax42/fragments/properties/InPC.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/InPC.xml diff --git a/uax/uax42/fragments/properties/InSC.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/InSC.xml similarity index 100% rename from uax/uax42/fragments/properties/InSC.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/InSC.xml diff --git a/uax/uax42/fragments/properties/JSN.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/JSN.xml similarity index 100% rename from uax/uax42/fragments/properties/JSN.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/JSN.xml diff --git a/uax/uax42/fragments/properties/Join_C.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Join_C.xml similarity index 100% rename from uax/uax42/fragments/properties/Join_C.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Join_C.xml diff --git a/uax/uax42/fragments/properties/Name_Alias.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Name_Alias.xml similarity index 100% rename from uax/uax42/fragments/properties/Name_Alias.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Name_Alias.xml diff --git a/uax/uax42/fragments/nushu/Nushu.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Nushu.xml similarity index 100% rename from uax/uax42/fragments/nushu/Nushu.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Nushu.xml diff --git a/uax/uax42/fragments/repertoire/Set of code points.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Set_of_code_points.xml similarity index 100% rename from uax/uax42/fragments/repertoire/Set of code points.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Set_of_code_points.xml diff --git a/uax/uax42/fragments/tangut/Tangut.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Tangut.xml similarity index 100% rename from uax/uax42/fragments/tangut/Tangut.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Tangut.xml diff --git a/uax/uax42/fragments/properties/Unihan.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Unihan.xml similarity index 100% rename from uax/uax42/fragments/properties/Unihan.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Unihan.xml diff --git a/uax/uax42/fragments/properties/age.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/age.xml similarity index 96% rename from uax/uax42/fragments/properties/age.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/age.xml index c15963146..8a1722f22 100644 --- a/uax/uax42/fragments/properties/age.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/fragments/age.xml @@ -17,6 +17,7 @@ | "14.0" | "15.0" | "15.1" | "16.0" + | "17.0" | "unassigned" }?
    \ No newline at end of file diff --git a/uax/uax42/fragments/properties/bc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bc.xml similarity index 100% rename from uax/uax42/fragments/properties/bc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bc.xml diff --git a/uax/uax42/fragments/properties/blk.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/blk.xml similarity index 100% rename from uax/uax42/fragments/properties/blk.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/blk.xml diff --git a/uax/uax42/fragments/block/block.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/block.xml similarity index 100% rename from uax/uax42/fragments/block/block.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/block.xml diff --git a/uax/uax42/fragments/properties/bmg.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bmg.xml similarity index 100% rename from uax/uax42/fragments/properties/bmg.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bmg.xml diff --git a/uax/uax42/fragments/boolean/boolean.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/boolean.xml similarity index 100% rename from uax/uax42/fragments/boolean/boolean.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/boolean.xml diff --git a/uax/uax42/fragments/properties/boundaries.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/boundaries.xml similarity index 100% rename from uax/uax42/fragments/properties/boundaries.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/boundaries.xml diff --git a/uax/uax42/fragments/properties/bpb.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bpb.xml similarity index 100% rename from uax/uax42/fragments/properties/bpb.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bpb.xml diff --git a/uax/uax42/fragments/properties/bpt.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bpt.xml similarity index 100% rename from uax/uax42/fragments/properties/bpt.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bpt.xml diff --git a/uax/uax42/fragments/properties/case_folding.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/case_folding.xml similarity index 100% rename from uax/uax42/fragments/properties/case_folding.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/case_folding.xml diff --git a/uax/uax42/fragments/properties/case_mapping.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/case_mapping.xml similarity index 100% rename from uax/uax42/fragments/properties/case_mapping.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/case_mapping.xml diff --git a/uax/uax42/fragments/properties/case_other.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/case_other.xml similarity index 100% rename from uax/uax42/fragments/properties/case_other.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/case_other.xml diff --git a/uax/uax42/fragments/properties/casing.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/casing.xml similarity index 100% rename from uax/uax42/fragments/properties/casing.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/casing.xml diff --git a/uax/uax42/fragments/properties/ccc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/ccc.xml similarity index 100% rename from uax/uax42/fragments/properties/ccc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/ccc.xml diff --git a/uax/uax42/fragments/cjk-radicals/cjk-radicals.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/cjk-radicals.xml similarity index 100% rename from uax/uax42/fragments/cjk-radicals/cjk-radicals.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/cjk-radicals.xml diff --git a/uax/uax42/fragments/properties/cjkEACC.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkEACC.xml similarity index 100% rename from uax/uax42/fragments/properties/cjkEACC.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkEACC.xml diff --git a/uax/uax42/fragments/properties/cjkIRG_TSource.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkIRG_TSource.xml similarity index 100% rename from uax/uax42/fragments/properties/cjkIRG_TSource.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkIRG_TSource.xml diff --git a/uax/uax42/fragments/properties/composition.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/composition.xml similarity index 100% rename from uax/uax42/fragments/properties/composition.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/composition.xml diff --git a/uax/uax42/fragments/datatypes/datatypes.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes.xml similarity index 100% rename from uax/uax42/fragments/datatypes/datatypes.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes.xml diff --git a/uax/uax42/fragments/datatypes/code points.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes_code_points.xml similarity index 100% rename from uax/uax42/fragments/datatypes/code points.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes_code_points.xml diff --git a/uax/uax42/fragments/properties/decomposition.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/decomposition.xml similarity index 100% rename from uax/uax42/fragments/properties/decomposition.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/decomposition.xml diff --git a/uax/uax42/fragments/description/description.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/description.xml similarity index 100% rename from uax/uax42/fragments/description/description.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/description.xml diff --git a/uax/uax42/fragments/do-not-emit/do-not-emit.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/do-not-emit.xml similarity index 100% rename from uax/uax42/fragments/do-not-emit/do-not-emit.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/do-not-emit.xml diff --git a/uax/uax42/fragments/properties/ea.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/ea.xml similarity index 100% rename from uax/uax42/fragments/properties/ea.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/ea.xml diff --git a/uax/uax42/fragments/emoji-sources/emoji-sources.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/emoji-sources.xml similarity index 100% rename from uax/uax42/fragments/emoji-sources/emoji-sources.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/emoji-sources.xml diff --git a/uax/uax42/fragments/properties/function_graphic.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/function_graphic.xml similarity index 100% rename from uax/uax42/fragments/properties/function_graphic.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/function_graphic.xml diff --git a/uax/uax42/fragments/properties/gc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/gc.xml similarity index 100% rename from uax/uax42/fragments/properties/gc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/gc.xml diff --git a/uax/uax42/fragments/repertoire/groups.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/groups.xml similarity index 100% rename from uax/uax42/fragments/repertoire/groups.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/groups.xml diff --git a/uax/uax42/fragments/properties/hst.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/hst.xml similarity index 100% rename from uax/uax42/fragments/properties/hst.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/hst.xml diff --git a/uax/uax42/fragments/properties/identifier.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/identifier.xml similarity index 100% rename from uax/uax42/fragments/properties/identifier.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/identifier.xml diff --git a/uax/uax42/fragments/properties/ideographs.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/ideographs.xml similarity index 100% rename from uax/uax42/fragments/properties/ideographs.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/ideographs.xml diff --git a/uax/uax42/fragments/properties/isc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/isc.xml similarity index 100% rename from uax/uax42/fragments/properties/isc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/isc.xml diff --git a/uax/uax42/fragments/datatypes/jis-code-point.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/jis-code-point.xml similarity index 100% rename from uax/uax42/fragments/datatypes/jis-code-point.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/jis-code-point.xml diff --git a/uax/uax42/fragments/properties/joining.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/joining.xml similarity index 96% rename from uax/uax42/fragments/properties/joining.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/joining.xml index ba6684a27..184fcca14 100644 --- a/uax/uax42/fragments/properties/joining.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/fragments/joining.xml @@ -14,7 +14,8 @@ | "Hah" | "Hanifi_Rohingya_Kinna_Ya" | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" | "Heth" - | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Kaf" | "Kaph" | "Kashmiri_Yeh" | "Khaph" + | "Knotted_Heh" | "Lam" | "Lamadh" | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" | "Malayalam_Llla" | "Malayalam_Nga" diff --git a/uax/uax42/fragments/properties/lb.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/lb.xml similarity index 100% rename from uax/uax42/fragments/properties/lb.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/lb.xml diff --git a/uax/uax42/fragments/properties/miscellaneous.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/miscellaneous.xml similarity index 100% rename from uax/uax42/fragments/properties/miscellaneous.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/miscellaneous.xml diff --git a/uax/uax42/fragments/properties/na.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/na.xml similarity index 100% rename from uax/uax42/fragments/properties/na.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/na.xml diff --git a/uax/uax42/fragments/properties/na1.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/na1.xml similarity index 100% rename from uax/uax42/fragments/properties/na1.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/na1.xml diff --git a/uax/uax42/fragments/named-sequences/named-sequences.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/named-sequences.xml similarity index 100% rename from uax/uax42/fragments/named-sequences/named-sequences.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/named-sequences.xml diff --git a/uax/uax42/fragments/namespace/namespace.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/namespace.xml similarity index 100% rename from uax/uax42/fragments/namespace/namespace.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/namespace.xml diff --git a/uax/uax42/fragments/normalization-corrections/normalization-corrections.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/normalization-corrections.xml similarity index 100% rename from uax/uax42/fragments/normalization-corrections/normalization-corrections.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/normalization-corrections.xml diff --git a/uax/uax42/fragments/properties/numeric.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/numeric.xml similarity index 100% rename from uax/uax42/fragments/properties/numeric.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/numeric.xml diff --git a/uax/uax42/fragments/properties/pattern.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/pattern.xml similarity index 100% rename from uax/uax42/fragments/properties/pattern.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/pattern.xml diff --git a/uax/uax42/fragments/properties/quickcheck.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/quickcheck.xml similarity index 100% rename from uax/uax42/fragments/properties/quickcheck.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/quickcheck.xml diff --git a/uax/uax42/fragments/repertoire/repertoire.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire.xml similarity index 100% rename from uax/uax42/fragments/repertoire/repertoire.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire.xml diff --git a/uax/uax42/fragments/repertoire/Code points.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire_Code_points.xml similarity index 100% rename from uax/uax42/fragments/repertoire/Code points.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire_Code_points.xml diff --git a/uax/uax42/fragments/properties/script.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/script.xml similarity index 100% rename from uax/uax42/fragments/properties/script.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/script.xml diff --git a/uax/uax42/fragments/properties/simple_case_mapping.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/simple_case_mapping.xml similarity index 100% rename from uax/uax42/fragments/properties/simple_case_mapping.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/simple_case_mapping.xml diff --git a/uax/uax42/fragments/standardized-variants/standardized-variants.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/standardized-variants.xml similarity index 100% rename from uax/uax42/fragments/standardized-variants/standardized-variants.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/standardized-variants.xml diff --git a/uax/uax42/fragments/start/start.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/start.xml similarity index 100% rename from uax/uax42/fragments/start/start.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/start.xml diff --git a/uax/uax42/index.xml b/unicodetools/src/main/resources/org/unicode/uax42/index.xml similarity index 94% rename from uax/uax42/index.xml rename to unicodetools/src/main/resources/org/unicode/uax42/index.xml index 1ea2f0f65..6b4733a2b 100644 --- a/uax/uax42/index.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/index.xml @@ -650,7 +650,7 @@ The namespace for our elements is “http://www.unicode.org/ns/2003/ucd/1.0”. Our attributes are in the empty namespace. - + In all our examples, we assume that this namespace is the default one. @@ -659,7 +659,7 @@
    Datatypes We use a standard XML Schema datatypes: - + Characters are pervasive in the UCD, and will need to be represented. Representing characters directly by themselves would seem the most obvious choice; for example, we could express that the decomposition of U+00E8 is “&#x0065;&#x0300;”, that is have exactly two characters in (the @@ -672,7 +672,7 @@ decomposition of U+00E8 will be represented by the nine characters “0065 0300” in the infoset. - +
    @@ -680,7 +680,7 @@ Root Element The root element of valid documents is a ucd. - + @@ -689,7 +689,7 @@ A large number of properties are boolean. We uniformly use the values Y and N for those: - + @@ -713,7 +713,7 @@ url='https://www.unicode.org/reports/tr41/tr41-34.html#Versions'>[Versions]; and conversely, that documents which do not purport to represent the UCD be described as such. - + @@ -722,7 +722,7 @@ The repertoire child element of the ucd element describes the code points and their properties. As we will see shortly, code points can be described individually or as part of a group: - +
    @@ -733,7 +733,7 @@ .. U+9FA5) where all the code points have the same property values if we ignore their name and their Unihan properties. - + This observation suggests that it is profitable to represent sets of code points which share the same properties, rather than individual code points. To make the representation of the sets simple, we restrict them to be segments in the code point space, that is a set is defined by the first and @@ -759,7 +759,7 @@ This leads to four elements to describe sets of code points: - +
    @@ -811,7 +811,7 @@ easier: either a property is defined by the element for a code point, or it is defined by the immediately enclosing group element. - + @@ -845,7 +845,7 @@ The age attribute captures the version of Unicode in which a code point was assigned to an abstract character, or made a surrogate or non-character. - + @@ -855,8 +855,8 @@ (na), and possibly the name this character had in version 1.0 of the standard (na1). - - + + The majority of the characters in Unicode have a name which is of the form CJK UNIFIED IDEOGRAPH-<code point>. It also happens that character names cannot contain the character U+0023 # NUMBER SIGN, so we adopted the following convention: if a @@ -890,7 +890,7 @@ The Name_Alias property is represented by zero or more name-alias child elements: - + @@ -898,7 +898,7 @@ Block property The Block property is represented by the blk attribute: - + @@ -906,7 +906,7 @@ General Category The general category is represented by the gc attribute. - + @@ -918,7 +918,7 @@ Because the set of values that this property has taken across the various versions of the UCD is rather large, our schema does not restrict the possible values to those actually used. - + @@ -926,15 +926,15 @@ Bidirectionality properties The bidirectional class is represented by the bc attribute. - + The mirrored property is represented by the Bidi_M attribute, which takes a boolean value. - + The bmg attribute is the code point of a character whose glyph is typically a mirrored image of the glyph for the current character. - + Note that we do not express the “Best Fit” element recorded in BidiMirroring.txt. For one thing, it is not meant to be machine readable. More importantly, the idea underlying the mirrored glyph is delicate to use, since it makes assumptions about the design of the fonts, and @@ -942,12 +942,12 @@ The Bidi_Control property is represented by the Bidi_C attribute. - + The bidi paired bracket type and bidi paired bracket properties are represented by the bpt and bpb attributes respectively. - - + + @@ -961,17 +961,17 @@ decomposition mapping is the character itself, we use the attribute value # (U+0023 # NUMBER SIGN) as a shorthand notation; this enables those attributes to be captured in groups. - + The properties Composition_Exclusion and Full_Composition_Exclusion are represented by the attributes CE and Comp_Ex: - + The properties NFC_Quick_Check, NFD_Quick_Check, NFKC_Quick_Check, NFKD_Quick_Check, Expands_On_NFC, Expands_On_NFD, Expands_On_NFKC, Expands_On_NKFD, FC_NFKC_Closure have corresponding attributes. - + @@ -982,7 +982,7 @@ The numeric value is represented by the nv attribute, represented as a whole number or a fraction. - + @@ -992,10 +992,10 @@ The jg attribute is the joining group of the character. - + The Join_Control property is represented by the Join_C attribute. - + @@ -1003,7 +1003,7 @@ Linebreak properties The Line_Break property is represented by the lb attribute. - + @@ -1011,7 +1011,7 @@ East Asian Width property The East Asian width property is represented by the ea attribute. - + @@ -1020,7 +1020,7 @@ The Uppercase, Lowercase, Other_Uppercase and Other_Lowercase properties are represented by corresponding attributes. - + Most characters have a case mapping and case folding properties that simply map or fold to themselves. This is very similar to the situation we encountered with names, and we adopted a similar convention: if the value of a case mapping or case folding property is the character @@ -1030,22 +1030,22 @@ The simple case mappings are recorded in the suc, slc, stc attributes. - + The non-simple casing are recorded in the uc, lc and tc attributes. - + The Simple_Case_Folding and Case_Folding properties are recorded in the scf and cf attributes respectively. - + The Case_Ignorable, Cased, Changes_When_Casefolded, Changes_When_Casemapped, Changes_When_Lowercased, Changes_When_NFKC_Casefolded, Changes_When_Titlecased, Changes_When_Uppercased, NFKC_Casefold, and NFKC_Simple_Casefold properties are recorded in these attributes: - + Note that the UCD records more information about case folding than is expressed in the properties, specifically the entries in CaseFolding.txt with status T. @@ -1057,7 +1057,7 @@ The script and script extension properties are represented by the sc and scx attributes respectively. - + @@ -1065,7 +1065,7 @@ ISO Comment properties The ISO 10646 comment field is represented by the isc attribute. - + @@ -1073,10 +1073,10 @@ Hangul properties The property Hangul_Syllable_Type is represented by the hst attribute. - + The property Jamo_Short_Name is represented by the JSN attribute: - + @@ -1085,14 +1085,14 @@ The property Indic_Syllabic_Category is represented by the InSC attribute. - + The property Indic_Positional_Category is represented by the InPC attribute: - + The property Indic_Conjunct_Break is represented by the InCB attribute: - + @@ -1104,11 +1104,11 @@ ID_Compat_Math_Start, and ID_Compat_Math_Continue are represented by corresponding attributes: - + The properties Pattern_Syntax and Pattern_White_Space are represented by corresponding attributes: - + @@ -1125,7 +1125,7 @@ describe the function or graphic characteristic of a character, and have each a corresponding attribute. - + @@ -1136,7 +1136,7 @@ Grapheme_Cluster_Break, Word_Break, and Sentence_Break each have a corresponding attribute: - + @@ -1147,7 +1147,7 @@ IDS_Trinary_Operator, IDS_Unary_Operator, and Radical have corresponding attributes: - + @@ -1156,7 +1156,7 @@ The properties Deprecated, Variation_Selector, and Noncharacter_Code_Point have corresponding attributes: - + @@ -1164,7 +1164,7 @@ Unihan properties The Unihan properties (from the Unihan database) are represented as attributes. - + @@ -1174,7 +1174,7 @@ represents the radical stroke index. The attribute kTGT_MergedSrc indicates the source reference for the character. - + @@ -1183,7 +1183,7 @@ The Nushu data are represented as attributes. The attribute kSrc_NushuDuben indicates the page number and order of the item from the NushuDuben reference source. Nushu common reading is represented as kReading. - + @@ -1192,7 +1192,7 @@ The properties Emoji, EPres, EMod, EBase, EComp, and ExtPict have corresponding attributes: - + @@ -1203,7 +1203,7 @@ The blocks child of the ucd describes the blocks. It has one child block element per block, with attributes to describe the extent and name of the block. - + @@ -1216,7 +1216,7 @@ Similarly, the provisional-named-sequences child of the ucd describes the provisional named sequences. - + @@ -1227,7 +1227,7 @@ attributes to describe the code point affected, its old normalization, its new normalization and the version of Unicode in which the correction was made. - + @@ -1238,7 +1238,7 @@ last element capture the variation sequence, the description of the desired appearance, and the shaping environment under which the appearance is different. - + @@ -1248,7 +1248,7 @@ child element cjk-radical per radical. The attributes on that last element capture the radical number, the corresponding CJK radical character, and the corresponding CJK unified ideograph. - + @@ -1256,9 +1256,9 @@ Emoji sources The emoji-sources child of the ucd describes the emoji sources. - + - + @@ -1268,7 +1268,7 @@ character sequences that should not be emitted or generated in newly authored texts. - + diff --git a/uax/uax42/index2html.xsl b/unicodetools/src/main/resources/org/unicode/uax42/index2html.xsl similarity index 100% rename from uax/uax42/index2html.xsl rename to unicodetools/src/main/resources/org/unicode/uax42/index2html.xsl diff --git a/uax/uax42/index2rnc.xsl b/unicodetools/src/main/resources/org/unicode/uax42/index2rnc.xsl similarity index 100% rename from uax/uax42/index2rnc.xsl rename to unicodetools/src/main/resources/org/unicode/uax42/index2rnc.xsl diff --git a/uax/uax42/output/index.html b/unicodetools/src/main/resources/org/unicode/uax42/output/index.html similarity index 99% rename from uax/uax42/output/index.html rename to unicodetools/src/main/resources/org/unicode/uax42/output/index.html index ccde1ac04..13bf8181d 100644 --- a/uax/uax42/output/index.html +++ b/unicodetools/src/main/resources/org/unicode/uax42/output/index.html @@ -646,6 +646,7 @@

    | "14.0" | "15.0" | "15.1" | "16.0" + | "17.0" | "unassigned" }? @@ -1368,7 +1369,8 @@

    | "Hah" | "Hanifi_Rohingya_Kinna_Ya" | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" | "Heth" - | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Kaf" | "Kaph" | "Kashmiri_Yeh" | "Khaph" + | "Knotted_Heh" | "Lam" | "Lamadh" | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" | "Malayalam_Llla" | "Malayalam_Nga" diff --git a/uax/uax42/output/index.rnc b/unicodetools/src/main/resources/org/unicode/uax42/output/index.rnc similarity index 99% rename from uax/uax42/output/index.rnc rename to unicodetools/src/main/resources/org/unicode/uax42/output/index.rnc index 7cdf380f3..84d9b5875 100644 --- a/uax/uax42/output/index.rnc +++ b/unicodetools/src/main/resources/org/unicode/uax42/output/index.rnc @@ -79,6 +79,7 @@ | "14.0" | "15.0" | "15.1" | "16.0" + | "17.0" | "unassigned" }? @@ -553,7 +554,8 @@ | "Hah" | "Hanifi_Rohingya_Kinna_Ya" | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" | "Heth" - | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Kaf" | "Kaph" | "Kashmiri_Yeh" | "Khaph" + | "Knotted_Heh" | "Lam" | "Lamadh" | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" | "Malayalam_Llla" | "Malayalam_Nga" diff --git a/uax/uax42/pom.xml b/unicodetools/src/main/resources/org/unicode/uax42/pom.xml similarity index 94% rename from uax/uax42/pom.xml rename to unicodetools/src/main/resources/org/unicode/uax42/pom.xml index c18d2f3d0..9ae81d56f 100644 --- a/uax/uax42/pom.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/pom.xml @@ -35,7 +35,7 @@ index.xml index2html.xsl - ${project.basedir}/output/ + ${outputdir} .html @@ -49,7 +49,7 @@ index.xml index2rnc.xsl - ${project.basedir}/output/ + ${outputdir} .rnc