diff --git a/.gitignore b/.gitignore index 44e8ce9b0..005b72fc0 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,8 @@ # Test data repo ignored. Please see instruction in tests-and-benchmarks.markdown /tests/text-test-data/ + +# Data for case functions +/scripts/CaseFolding.txt +/scripts/SpecialCasing.txt +/scripts/db-*.txt diff --git a/cabal.tests.project b/cabal.tests.project new file mode 100644 index 000000000..f6e611425 --- /dev/null +++ b/cabal.tests.project @@ -0,0 +1,5 @@ +-- this project doesn't have local 'text' package, +-- so tests build faster. + +packages: tests +tests: True diff --git a/scripts/CaseFolding.hs b/scripts/CaseFolding.hs index 11d180ca9..627482e26 100644 --- a/scripts/CaseFolding.hs +++ b/scripts/CaseFolding.hs @@ -12,6 +12,12 @@ module CaseFolding import Arsec +import Data.Char (ord) +import Data.Maybe (mapMaybe) +import qualified Data.List as L +import qualified Data.Set as Set +import qualified Data.Map as Map + data Fold = Fold { code :: Char , status :: Char @@ -33,14 +39,104 @@ entries = CF <$> many comment <*> many (entry <* many comment) parseCF :: FilePath -> IO (Either ParseError CaseFolding) parseCF name = parse entries name <$> readFile name -mapCF :: CaseFolding -> [String] -mapCF (CF _ ms) = typ ++ (map nice . filter p $ ms) ++ [last] +-- We generate mapping trying to call toLower +mapCF :: [Map.Map Char (Char,Char,Char)] -> CaseFolding -> [String] +mapCF dbs (CF _ ms) = concat + [ typ + , mapMaybe nice [ minBound .. maxBound ] + , [last] + ] where + -- characters for which base's toLower has different results + different :: Set.Set Char + different + = Map.keysSet + $ Map.filter g + $ L.foldl' (alignWith f) (Map.map ToChar dbh) dbt + where + dbh : dbt = map (Map.mapMaybeWithKey h) dbs + + -- Only valid case is when both lhs and rhs of alignment + -- have the same single character. + -- Everything else is consider as different. + -- + f :: These Maps Char -> Maps + f (This _) = Differently + f (That _) = Differently + f (These Differently _) = Differently + f (These (ToChar l) l') + | l == l' = ToChar l + | otherwise = Differently + + -- We are only interested in Differently + -- for these we cannot trust toLower + g :: Maps -> Bool + g Differently = True + g (ToChar _) = False + + -- look for toLower data only + h c (_, l, _) | c == l = Nothing + | otherwise = Just l + + -- we are only interested in C and F cases + -- * C: common case folding + -- * F: full case folding + -- + -- Case Folding says + -- + -- Usage: + -- A. To do a simple case folding, use the mappings with status C + S. + -- B. To do a full case folding, use the mappings with status C + F. + -- + folds :: Map.Map Char Fold + folds = Map.fromList + $ map (\f -> (code f, f)) + $ filter (\f -> status f `elem` "CF") ms + + -- there are three cases: + -- + nice :: Char -> Maybe String + nice c + -- not mapping to toLower, and toLower is same for all GHCs + | s /= [toLower c], not isDifferent + = Just + $ "-- " ++ n ++ "\n" ++ + "foldMapping " ++ showC c ++ " s = Yield " ++ x ++ " (CC s " ++ y ++ " " ++ z ++ ")" + + -- when toLower cannot be trusted + | isDifferent + = Just + $ "-- " ++ n ++ "\n" ++ + "foldMapping " ++ showC c ++ " s = Yield " ++ x ++ " (CC s " ++ y ++ " " ++ z ++ ")" + + -- otherwise omit, to be handled by catch all toLower case. + | otherwise + = Nothing + where + s :: [Char] -- mapping + n :: String -- name + (n, s) = maybe (defName, [c]) (\f -> (name f, mapping f)) (Map.lookup c folds) + + isDifferent = Set.member c different + + [x,y,z] = (map showC . take 3) (s ++ repeat '\0') + + defName = "NOT FOLDED TO toLower " ++ showC c + typ = ["foldMapping :: forall s. Char -> s -> Step (CC s) Char" ,"{-# NOINLINE foldMapping #-}"] last = "foldMapping c s = Yield (toLower c) (CC s '\\0' '\\0')" - nice c = "-- " ++ name c ++ "\n" ++ - "foldMapping " ++ showC (code c) ++ " s = Yield " ++ x ++ " (CC s " ++ y ++ " " ++ z ++ ")" - where [x,y,z] = (map showC . take 3) (mapping c ++ repeat '\0') - p f = status f `elem` "CF" && - mapping f /= [toLower (code f)] + +-- auxiliary data type used to determine whether toLower is the same for a char +data Maps + = ToChar Char + | Differently + deriving Show + +alignWith :: Ord k => (These a b -> c) -> Map.Map k a -> Map.Map k b -> Map.Map k c +alignWith f = Map.mergeWithKey + (\_ x y -> Just $ f $ These x y) + (Map.map (f . This)) + (Map.map (f . That)) + +data These a b = This a | That b | These a b deriving Show diff --git a/scripts/CaseMapping.hs b/scripts/CaseMapping.hs index e6350ab87..d8e721bb3 100644 --- a/scripts/CaseMapping.hs +++ b/scripts/CaseMapping.hs @@ -5,11 +5,23 @@ import Arsec import CaseFolding import SpecialCasing +import qualified Data.Map as Map + +-- 1. download SpecialCasing.txt and CaseFolding.txt files from unicode.org +-- +-- 2. dump Char DB by running dump.sh +-- +-- 3. run from scripts/ directory with +-- +-- runghc-8.6.5 -package-env=- CaseMapping.hs +-- + main = do args <- getArgs let oname = case args of - [] -> "../Data/Text/Internal/Fusion/CaseMapping.hs" + [] -> "../src/Data/Text/Internal/Fusion/CaseMapping.hs" [o] -> o + dbs <- loadDBs psc <- parseSC "SpecialCasing.txt" pcf <- parseCF "CaseFolding.txt" scs <- case psc of @@ -34,5 +46,29 @@ main = do mapM_ (hPutStrLn h) (mapSC "upper" upper toUpper scs) mapM_ (hPutStrLn h) (mapSC "lower" lower toLower scs) mapM_ (hPutStrLn h) (mapSC "title" title toTitle scs) - mapM_ (hPutStrLn h) (mapCF cfs) + mapM_ (hPutStrLn h) (mapCF dbs cfs) hClose h + +loadDBs :: IO [Map.Map Char (Char,Char,Char)] +loadDBs = mapM loadDB + [ "7.0.4" + , "7.2.2" + , "7.4.2" + , "7.6.3" + , "7.8.4" + , "7.10.3" + + , "8.0.2" + , "8.2.2" + , "8.4.4" + , "8.6.5" + , "8.8.4" + , "8.10.2" + + -- , "9.0.1" + ] + where + loadDB v = fmap (f . read) (readFile ("db-" ++ v ++ ".txt")) + + f :: [(Char,Char,Char,Char)] -> Map.Map Char (Char,Char,Char) + f = Map.fromList . map (\(c,u,l,t) -> (c,(u,l,t))) diff --git a/scripts/Dump.hs b/scripts/Dump.hs new file mode 100644 index 000000000..a870e227a --- /dev/null +++ b/scripts/Dump.hs @@ -0,0 +1,15 @@ +-- This script is used to dump casing DB from GHCs base library + +import Data.Char + +main :: IO () +main = print + [ (c, u, l, t) + | c <- [ minBound .. maxBound ] + , let u = toUpper c + , let l = toLower c + , let t = toTitle c + + -- we dump only characters which have some transformations + , c /= u || c /= l || c /= t + ] diff --git a/scripts/dump.sh b/scripts/dump.sh new file mode 100644 index 000000000..4db5d145f --- /dev/null +++ b/scripts/dump.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -ex + +dump() { + "runghc-$1" Dump.hs > "db-$1.txt" +} + +dump 7.0.4 +dump 7.2.2 +dump 7.4.2 +dump 7.6.3 +dump 7.8.4 +dump 7.10.3 + +dump 8.0.2 +dump 8.2.2 +dump 8.4.4 +dump 8.6.5 +dump 8.8.4 +dump 8.10.2 + +# dump 9.0.1 diff --git a/scripts/tests.sh b/scripts/tests.sh new file mode 100644 index 000000000..b3d6e87f9 --- /dev/null +++ b/scripts/tests.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -ex + +runtest() { + HC=$1 + shift + + # EDIT last line to pass arguments + + cabal run text-tests:test:tests \ + --project-file=cabal.tests.project \ + --builddir="dist-newstyle/$HC" \ + --with-compiler="$HC" \ + -- "$@" +} + +runtest ghc-8.10.2 "$@" +runtest ghc-8.8.4 "$@" +runtest ghc-8.6.5 "$@" +runtest ghc-8.4.4 "$@" +runtest ghc-8.2.2 "$@" +runtest ghc-8.0.2 "$@" + +runtest ghc-7.10.3 "$@" +runtest ghc-7.8.4 "$@" +runtest ghc-7.6.3 "$@" +runtest ghc-7.4.2 "$@" +runtest ghc-7.2.2 "$@" +runtest ghc-7.0.4 "$@" diff --git a/src/Data/Text/Internal/Fusion/CaseMapping.hs b/src/Data/Text/Internal/Fusion/CaseMapping.hs index 0ea4f1a53..93944daed 100644 --- a/src/Data/Text/Internal/Fusion/CaseMapping.hs +++ b/src/Data/Text/Internal/Fusion/CaseMapping.hs @@ -1,10 +1,10 @@ {-# LANGUAGE Rank2Types #-} -- AUTOMATICALLY GENERATED - DO NOT EDIT -- Generated by scripts/CaseMapping.hs --- CaseFolding-9.0.0.txt --- Date: 2016-03-02, 18:54:54 GMT --- SpecialCasing-9.0.0.txt --- Date: 2016-03-02, 18:55:13 GMT +-- CaseFolding-13.0.0.txt +-- Date: 2019-09-08, 23:30:59 GMT +-- SpecialCasing-13.0.0.txt +-- Date: 2019-09-08, 23:31:24 GMT module Data.Text.Internal.Fusion.CaseMapping where import Data.Char @@ -337,6 +337,8 @@ foldMapping '\x017f' s = Yield '\x0073' (CC s '\x0000' '\x0000') foldMapping '\x01f0' s = Yield '\x006a' (CC s '\x030c' '\x0000') -- COMBINING GREEK YPOGEGRAMMENI foldMapping '\x0345' s = Yield '\x03b9' (CC s '\x0000' '\x0000') +-- GREEK CAPITAL LETTER YOT +foldMapping '\x037f' s = Yield '\x03f3' (CC s '\x0000' '\x0000') -- GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS foldMapping '\x0390' s = Yield '\x03b9' (CC s '\x0308' '\x0301') -- GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS @@ -357,8 +359,196 @@ foldMapping '\x03f0' s = Yield '\x03ba' (CC s '\x0000' '\x0000') foldMapping '\x03f1' s = Yield '\x03c1' (CC s '\x0000' '\x0000') -- GREEK LUNATE EPSILON SYMBOL foldMapping '\x03f5' s = Yield '\x03b5' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER PE WITH DESCENDER +foldMapping '\x0524' s = Yield '\x0525' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +foldMapping '\x0526' s = Yield '\x0527' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +foldMapping '\x0528' s = Yield '\x0529' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER DZZHE +foldMapping '\x052a' s = Yield '\x052b' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER DCHE +foldMapping '\x052c' s = Yield '\x052d' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER EL WITH DESCENDER +foldMapping '\x052e' s = Yield '\x052f' (CC s '\x0000' '\x0000') -- ARMENIAN SMALL LIGATURE ECH YIWN foldMapping '\x0587' s = Yield '\x0565' (CC s '\x0582' '\x0000') +-- GEORGIAN CAPITAL LETTER YN +foldMapping '\x10c7' s = Yield '\x2d27' (CC s '\x0000' '\x0000') +-- GEORGIAN CAPITAL LETTER AEN +foldMapping '\x10cd' s = Yield '\x2d2d' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a0' +foldMapping '\x13a0' s = Yield '\x13a0' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a1' +foldMapping '\x13a1' s = Yield '\x13a1' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a2' +foldMapping '\x13a2' s = Yield '\x13a2' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a3' +foldMapping '\x13a3' s = Yield '\x13a3' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a4' +foldMapping '\x13a4' s = Yield '\x13a4' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a5' +foldMapping '\x13a5' s = Yield '\x13a5' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a6' +foldMapping '\x13a6' s = Yield '\x13a6' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a7' +foldMapping '\x13a7' s = Yield '\x13a7' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a8' +foldMapping '\x13a8' s = Yield '\x13a8' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13a9' +foldMapping '\x13a9' s = Yield '\x13a9' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13aa' +foldMapping '\x13aa' s = Yield '\x13aa' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ab' +foldMapping '\x13ab' s = Yield '\x13ab' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ac' +foldMapping '\x13ac' s = Yield '\x13ac' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ad' +foldMapping '\x13ad' s = Yield '\x13ad' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ae' +foldMapping '\x13ae' s = Yield '\x13ae' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13af' +foldMapping '\x13af' s = Yield '\x13af' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b0' +foldMapping '\x13b0' s = Yield '\x13b0' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b1' +foldMapping '\x13b1' s = Yield '\x13b1' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b2' +foldMapping '\x13b2' s = Yield '\x13b2' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b3' +foldMapping '\x13b3' s = Yield '\x13b3' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b4' +foldMapping '\x13b4' s = Yield '\x13b4' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b5' +foldMapping '\x13b5' s = Yield '\x13b5' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b6' +foldMapping '\x13b6' s = Yield '\x13b6' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b7' +foldMapping '\x13b7' s = Yield '\x13b7' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b8' +foldMapping '\x13b8' s = Yield '\x13b8' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13b9' +foldMapping '\x13b9' s = Yield '\x13b9' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ba' +foldMapping '\x13ba' s = Yield '\x13ba' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13bb' +foldMapping '\x13bb' s = Yield '\x13bb' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13bc' +foldMapping '\x13bc' s = Yield '\x13bc' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13bd' +foldMapping '\x13bd' s = Yield '\x13bd' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13be' +foldMapping '\x13be' s = Yield '\x13be' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13bf' +foldMapping '\x13bf' s = Yield '\x13bf' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c0' +foldMapping '\x13c0' s = Yield '\x13c0' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c1' +foldMapping '\x13c1' s = Yield '\x13c1' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c2' +foldMapping '\x13c2' s = Yield '\x13c2' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c3' +foldMapping '\x13c3' s = Yield '\x13c3' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c4' +foldMapping '\x13c4' s = Yield '\x13c4' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c5' +foldMapping '\x13c5' s = Yield '\x13c5' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c6' +foldMapping '\x13c6' s = Yield '\x13c6' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c7' +foldMapping '\x13c7' s = Yield '\x13c7' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c8' +foldMapping '\x13c8' s = Yield '\x13c8' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13c9' +foldMapping '\x13c9' s = Yield '\x13c9' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ca' +foldMapping '\x13ca' s = Yield '\x13ca' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13cb' +foldMapping '\x13cb' s = Yield '\x13cb' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13cc' +foldMapping '\x13cc' s = Yield '\x13cc' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13cd' +foldMapping '\x13cd' s = Yield '\x13cd' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ce' +foldMapping '\x13ce' s = Yield '\x13ce' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13cf' +foldMapping '\x13cf' s = Yield '\x13cf' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d0' +foldMapping '\x13d0' s = Yield '\x13d0' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d1' +foldMapping '\x13d1' s = Yield '\x13d1' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d2' +foldMapping '\x13d2' s = Yield '\x13d2' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d3' +foldMapping '\x13d3' s = Yield '\x13d3' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d4' +foldMapping '\x13d4' s = Yield '\x13d4' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d5' +foldMapping '\x13d5' s = Yield '\x13d5' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d6' +foldMapping '\x13d6' s = Yield '\x13d6' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d7' +foldMapping '\x13d7' s = Yield '\x13d7' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d8' +foldMapping '\x13d8' s = Yield '\x13d8' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13d9' +foldMapping '\x13d9' s = Yield '\x13d9' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13da' +foldMapping '\x13da' s = Yield '\x13da' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13db' +foldMapping '\x13db' s = Yield '\x13db' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13dc' +foldMapping '\x13dc' s = Yield '\x13dc' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13dd' +foldMapping '\x13dd' s = Yield '\x13dd' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13de' +foldMapping '\x13de' s = Yield '\x13de' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13df' +foldMapping '\x13df' s = Yield '\x13df' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e0' +foldMapping '\x13e0' s = Yield '\x13e0' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e1' +foldMapping '\x13e1' s = Yield '\x13e1' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e2' +foldMapping '\x13e2' s = Yield '\x13e2' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e3' +foldMapping '\x13e3' s = Yield '\x13e3' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e4' +foldMapping '\x13e4' s = Yield '\x13e4' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e5' +foldMapping '\x13e5' s = Yield '\x13e5' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e6' +foldMapping '\x13e6' s = Yield '\x13e6' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e7' +foldMapping '\x13e7' s = Yield '\x13e7' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e8' +foldMapping '\x13e8' s = Yield '\x13e8' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13e9' +foldMapping '\x13e9' s = Yield '\x13e9' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ea' +foldMapping '\x13ea' s = Yield '\x13ea' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13eb' +foldMapping '\x13eb' s = Yield '\x13eb' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ec' +foldMapping '\x13ec' s = Yield '\x13ec' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ed' +foldMapping '\x13ed' s = Yield '\x13ed' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ee' +foldMapping '\x13ee' s = Yield '\x13ee' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13ef' +foldMapping '\x13ef' s = Yield '\x13ef' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13f0' +foldMapping '\x13f0' s = Yield '\x13f0' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13f1' +foldMapping '\x13f1' s = Yield '\x13f1' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13f2' +foldMapping '\x13f2' s = Yield '\x13f2' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13f3' +foldMapping '\x13f3' s = Yield '\x13f3' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13f4' +foldMapping '\x13f4' s = Yield '\x13f4' (CC s '\x0000' '\x0000') +-- NOT FOLDED TO toLower '\x13f5' +foldMapping '\x13f5' s = Yield '\x13f5' (CC s '\x0000' '\x0000') -- CHEROKEE SMALL LETTER YE foldMapping '\x13f8' s = Yield '\x13f0' (CC s '\x0000' '\x0000') -- CHEROKEE SMALL LETTER YI @@ -389,6 +579,98 @@ foldMapping '\x1c86' s = Yield '\x044a' (CC s '\x0000' '\x0000') foldMapping '\x1c87' s = Yield '\x0463' (CC s '\x0000' '\x0000') -- CYRILLIC SMALL LETTER UNBLENDED UK foldMapping '\x1c88' s = Yield '\xa64b' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER AN +foldMapping '\x1c90' s = Yield '\x10d0' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER BAN +foldMapping '\x1c91' s = Yield '\x10d1' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER GAN +foldMapping '\x1c92' s = Yield '\x10d2' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER DON +foldMapping '\x1c93' s = Yield '\x10d3' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER EN +foldMapping '\x1c94' s = Yield '\x10d4' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER VIN +foldMapping '\x1c95' s = Yield '\x10d5' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER ZEN +foldMapping '\x1c96' s = Yield '\x10d6' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER TAN +foldMapping '\x1c97' s = Yield '\x10d7' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER IN +foldMapping '\x1c98' s = Yield '\x10d8' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER KAN +foldMapping '\x1c99' s = Yield '\x10d9' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER LAS +foldMapping '\x1c9a' s = Yield '\x10da' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER MAN +foldMapping '\x1c9b' s = Yield '\x10db' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER NAR +foldMapping '\x1c9c' s = Yield '\x10dc' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER ON +foldMapping '\x1c9d' s = Yield '\x10dd' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER PAR +foldMapping '\x1c9e' s = Yield '\x10de' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER ZHAR +foldMapping '\x1c9f' s = Yield '\x10df' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER RAE +foldMapping '\x1ca0' s = Yield '\x10e0' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER SAN +foldMapping '\x1ca1' s = Yield '\x10e1' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER TAR +foldMapping '\x1ca2' s = Yield '\x10e2' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER UN +foldMapping '\x1ca3' s = Yield '\x10e3' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER PHAR +foldMapping '\x1ca4' s = Yield '\x10e4' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER KHAR +foldMapping '\x1ca5' s = Yield '\x10e5' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER GHAN +foldMapping '\x1ca6' s = Yield '\x10e6' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER QAR +foldMapping '\x1ca7' s = Yield '\x10e7' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER SHIN +foldMapping '\x1ca8' s = Yield '\x10e8' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER CHIN +foldMapping '\x1ca9' s = Yield '\x10e9' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER CAN +foldMapping '\x1caa' s = Yield '\x10ea' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER JIL +foldMapping '\x1cab' s = Yield '\x10eb' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER CIL +foldMapping '\x1cac' s = Yield '\x10ec' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER CHAR +foldMapping '\x1cad' s = Yield '\x10ed' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER XAN +foldMapping '\x1cae' s = Yield '\x10ee' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER JHAN +foldMapping '\x1caf' s = Yield '\x10ef' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER HAE +foldMapping '\x1cb0' s = Yield '\x10f0' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER HE +foldMapping '\x1cb1' s = Yield '\x10f1' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER HIE +foldMapping '\x1cb2' s = Yield '\x10f2' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER WE +foldMapping '\x1cb3' s = Yield '\x10f3' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER HAR +foldMapping '\x1cb4' s = Yield '\x10f4' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER HOE +foldMapping '\x1cb5' s = Yield '\x10f5' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER FI +foldMapping '\x1cb6' s = Yield '\x10f6' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER YN +foldMapping '\x1cb7' s = Yield '\x10f7' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER ELIFI +foldMapping '\x1cb8' s = Yield '\x10f8' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN +foldMapping '\x1cb9' s = Yield '\x10f9' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER AIN +foldMapping '\x1cba' s = Yield '\x10fa' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER AEN +foldMapping '\x1cbd' s = Yield '\x10fd' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN +foldMapping '\x1cbe' s = Yield '\x10fe' (CC s '\x0000' '\x0000') +-- GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +foldMapping '\x1cbf' s = Yield '\x10ff' (CC s '\x0000' '\x0000') -- LATIN SMALL LETTER H WITH LINE BELOW foldMapping '\x1e96' s = Yield '\x0068' (CC s '\x0331' '\x0000') -- LATIN SMALL LETTER T WITH DIAERESIS @@ -563,8 +845,64 @@ foldMapping '\x1ff6' s = Yield '\x03c9' (CC s '\x0342' '\x0000') foldMapping '\x1ff7' s = Yield '\x03c9' (CC s '\x0342' '\x03b9') -- GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI foldMapping '\x1ffc' s = Yield '\x03c9' (CC s '\x03b9' '\x0000') +-- LATIN CAPITAL LETTER TURNED ALPHA +foldMapping '\x2c70' s = Yield '\x0252' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER S WITH SWASH TAIL +foldMapping '\x2c7e' s = Yield '\x023f' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER Z WITH SWASH TAIL +foldMapping '\x2c7f' s = Yield '\x0240' (CC s '\x0000' '\x0000') +-- COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +foldMapping '\x2ceb' s = Yield '\x2cec' (CC s '\x0000' '\x0000') +-- COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +foldMapping '\x2ced' s = Yield '\x2cee' (CC s '\x0000' '\x0000') +-- COPTIC CAPITAL LETTER BOHAIRIC KHEI +foldMapping '\x2cf2' s = Yield '\x2cf3' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER REVERSED TSE +foldMapping '\xa660' s = Yield '\xa661' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER DOUBLE O +foldMapping '\xa698' s = Yield '\xa699' (CC s '\x0000' '\x0000') +-- CYRILLIC CAPITAL LETTER CROSSED O +foldMapping '\xa69a' s = Yield '\xa69b' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER TURNED H +foldMapping '\xa78d' s = Yield '\x0265' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER N WITH DESCENDER +foldMapping '\xa790' s = Yield '\xa791' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER C WITH BAR +foldMapping '\xa792' s = Yield '\xa793' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER B WITH FLOURISH +foldMapping '\xa796' s = Yield '\xa797' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER F WITH STROKE +foldMapping '\xa798' s = Yield '\xa799' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER VOLAPUK AE +foldMapping '\xa79a' s = Yield '\xa79b' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER VOLAPUK OE +foldMapping '\xa79c' s = Yield '\xa79d' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER VOLAPUK UE +foldMapping '\xa79e' s = Yield '\xa79f' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +foldMapping '\xa7a0' s = Yield '\xa7a1' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +foldMapping '\xa7a2' s = Yield '\xa7a3' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +foldMapping '\xa7a4' s = Yield '\xa7a5' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +foldMapping '\xa7a6' s = Yield '\xa7a7' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +foldMapping '\xa7a8' s = Yield '\xa7a9' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER H WITH HOOK +foldMapping '\xa7aa' s = Yield '\x0266' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER REVERSED OPEN E +foldMapping '\xa7ab' s = Yield '\x025c' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER SCRIPT G +foldMapping '\xa7ac' s = Yield '\x0261' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER L WITH BELT +foldMapping '\xa7ad' s = Yield '\x026c' (CC s '\x0000' '\x0000') -- LATIN CAPITAL LETTER SMALL CAPITAL I foldMapping '\xa7ae' s = Yield '\x026a' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER TURNED K +foldMapping '\xa7b0' s = Yield '\x029e' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER TURNED T +foldMapping '\xa7b1' s = Yield '\x0287' (CC s '\x0000' '\x0000') -- LATIN CAPITAL LETTER J WITH CROSSED-TAIL foldMapping '\xa7b2' s = Yield '\x029d' (CC s '\x0000' '\x0000') -- LATIN CAPITAL LETTER CHI @@ -573,6 +911,28 @@ foldMapping '\xa7b3' s = Yield '\xab53' (CC s '\x0000' '\x0000') foldMapping '\xa7b4' s = Yield '\xa7b5' (CC s '\x0000' '\x0000') -- LATIN CAPITAL LETTER OMEGA foldMapping '\xa7b6' s = Yield '\xa7b7' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER U WITH STROKE +foldMapping '\xa7b8' s = Yield '\xa7b9' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER GLOTTAL A +foldMapping '\xa7ba' s = Yield '\xa7bb' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER GLOTTAL I +foldMapping '\xa7bc' s = Yield '\xa7bd' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER GLOTTAL U +foldMapping '\xa7be' s = Yield '\xa7bf' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER ANGLICANA W +foldMapping '\xa7c2' s = Yield '\xa7c3' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER C WITH PALATAL HOOK +foldMapping '\xa7c4' s = Yield '\xa794' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER S WITH HOOK +foldMapping '\xa7c5' s = Yield '\x0282' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER Z WITH PALATAL HOOK +foldMapping '\xa7c6' s = Yield '\x1d8e' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +foldMapping '\xa7c7' s = Yield '\xa7c8' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +foldMapping '\xa7c9' s = Yield '\xa7ca' (CC s '\x0000' '\x0000') +-- LATIN CAPITAL LETTER REVERSED HALF H +foldMapping '\xa7f5' s = Yield '\xa7f6' (CC s '\x0000' '\x0000') -- CHEROKEE SMALL LETTER A foldMapping '\xab70' s = Yield '\x13a0' (CC s '\x0000' '\x0000') -- CHEROKEE SMALL LETTER E @@ -931,6 +1291,134 @@ foldMapping '\x10cb0' s = Yield '\x10cf0' (CC s '\x0000' '\x0000') foldMapping '\x10cb1' s = Yield '\x10cf1' (CC s '\x0000' '\x0000') -- OLD HUNGARIAN CAPITAL LETTER US foldMapping '\x10cb2' s = Yield '\x10cf2' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER NGAA +foldMapping '\x118a0' s = Yield '\x118c0' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER A +foldMapping '\x118a1' s = Yield '\x118c1' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER WI +foldMapping '\x118a2' s = Yield '\x118c2' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER YU +foldMapping '\x118a3' s = Yield '\x118c3' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER YA +foldMapping '\x118a4' s = Yield '\x118c4' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER YO +foldMapping '\x118a5' s = Yield '\x118c5' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER II +foldMapping '\x118a6' s = Yield '\x118c6' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER UU +foldMapping '\x118a7' s = Yield '\x118c7' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER E +foldMapping '\x118a8' s = Yield '\x118c8' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER O +foldMapping '\x118a9' s = Yield '\x118c9' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER ANG +foldMapping '\x118aa' s = Yield '\x118ca' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER GA +foldMapping '\x118ab' s = Yield '\x118cb' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER KO +foldMapping '\x118ac' s = Yield '\x118cc' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER ENY +foldMapping '\x118ad' s = Yield '\x118cd' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER YUJ +foldMapping '\x118ae' s = Yield '\x118ce' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER UC +foldMapping '\x118af' s = Yield '\x118cf' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER ENN +foldMapping '\x118b0' s = Yield '\x118d0' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER ODD +foldMapping '\x118b1' s = Yield '\x118d1' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER TTE +foldMapping '\x118b2' s = Yield '\x118d2' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER NUNG +foldMapping '\x118b3' s = Yield '\x118d3' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER DA +foldMapping '\x118b4' s = Yield '\x118d4' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER AT +foldMapping '\x118b5' s = Yield '\x118d5' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER AM +foldMapping '\x118b6' s = Yield '\x118d6' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER BU +foldMapping '\x118b7' s = Yield '\x118d7' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER PU +foldMapping '\x118b8' s = Yield '\x118d8' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER HIYO +foldMapping '\x118b9' s = Yield '\x118d9' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER HOLO +foldMapping '\x118ba' s = Yield '\x118da' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER HORR +foldMapping '\x118bb' s = Yield '\x118db' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER HAR +foldMapping '\x118bc' s = Yield '\x118dc' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER SSUU +foldMapping '\x118bd' s = Yield '\x118dd' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER SII +foldMapping '\x118be' s = Yield '\x118de' (CC s '\x0000' '\x0000') +-- WARANG CITI CAPITAL LETTER VIYO +foldMapping '\x118bf' s = Yield '\x118df' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER M +foldMapping '\x16e40' s = Yield '\x16e60' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER S +foldMapping '\x16e41' s = Yield '\x16e61' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER V +foldMapping '\x16e42' s = Yield '\x16e62' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER W +foldMapping '\x16e43' s = Yield '\x16e63' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER ATIU +foldMapping '\x16e44' s = Yield '\x16e64' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER Z +foldMapping '\x16e45' s = Yield '\x16e65' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER KP +foldMapping '\x16e46' s = Yield '\x16e66' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER P +foldMapping '\x16e47' s = Yield '\x16e67' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER T +foldMapping '\x16e48' s = Yield '\x16e68' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER G +foldMapping '\x16e49' s = Yield '\x16e69' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER F +foldMapping '\x16e4a' s = Yield '\x16e6a' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER I +foldMapping '\x16e4b' s = Yield '\x16e6b' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER K +foldMapping '\x16e4c' s = Yield '\x16e6c' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER A +foldMapping '\x16e4d' s = Yield '\x16e6d' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER J +foldMapping '\x16e4e' s = Yield '\x16e6e' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER E +foldMapping '\x16e4f' s = Yield '\x16e6f' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER B +foldMapping '\x16e50' s = Yield '\x16e70' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER C +foldMapping '\x16e51' s = Yield '\x16e71' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER U +foldMapping '\x16e52' s = Yield '\x16e72' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER YU +foldMapping '\x16e53' s = Yield '\x16e73' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER L +foldMapping '\x16e54' s = Yield '\x16e74' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER Q +foldMapping '\x16e55' s = Yield '\x16e75' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER HP +foldMapping '\x16e56' s = Yield '\x16e76' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER NY +foldMapping '\x16e57' s = Yield '\x16e77' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER X +foldMapping '\x16e58' s = Yield '\x16e78' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER D +foldMapping '\x16e59' s = Yield '\x16e79' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER OE +foldMapping '\x16e5a' s = Yield '\x16e7a' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER N +foldMapping '\x16e5b' s = Yield '\x16e7b' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER R +foldMapping '\x16e5c' s = Yield '\x16e7c' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER O +foldMapping '\x16e5d' s = Yield '\x16e7d' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER AI +foldMapping '\x16e5e' s = Yield '\x16e7e' (CC s '\x0000' '\x0000') +-- MEDEFAIDRIN CAPITAL LETTER Y +foldMapping '\x16e5f' s = Yield '\x16e7f' (CC s '\x0000' '\x0000') -- ADLAM CAPITAL LETTER ALIF foldMapping '\x1e900' s = Yield '\x1e922' (CC s '\x0000' '\x0000') -- ADLAM CAPITAL LETTER DAALI diff --git a/tests-and-benchmarks.markdown b/tests-and-benchmarks.markdown index 3e1e664c1..dd25eb6bf 100644 --- a/tests-and-benchmarks.markdown +++ b/tests-and-benchmarks.markdown @@ -1,6 +1,14 @@ Tests and benchmarks ==================== +This file is most likely outdated. You can run tests by + + cabal run text-tests:test:tests --project-file=cabal.tests.project --with-compiler ghc-8.6.5 -- + +Or to run with all GHCs + + sh scripts/tests.sh -- + Prerequisites ------------- diff --git a/tests/Tests/Properties.hs b/tests/Tests/Properties.hs index 7b9db610b..d8b6e66c3 100644 --- a/tests/Tests/Properties.hs +++ b/tests/Tests/Properties.hs @@ -371,11 +371,15 @@ sf_toCaseFold_length p xs = where s = S.streamList xs t_toCaseFold_length t = T.length (T.toCaseFold t) >= T.length t tl_toCaseFold_length t = TL.length (TL.toCaseFold t) >= TL.length t +t_toCaseFold_idempotent t = T.toCaseFold (T.toCaseFold t) == T.toCaseFold t +tl_toCaseFold_idempotent t = TL.toCaseFold (TL.toCaseFold t) == TL.toCaseFold t + t_toLower_length t = T.length (T.toLower t) >= T.length t t_toLower_lower t = p (T.toLower t) >= p t where p = T.length . T.filter isLower tl_toLower_lower t = p (TL.toLower t) >= p t where p = TL.length . TL.filter isLower + t_toUpper_length t = T.length (T.toUpper t) >= T.length t t_toUpper_upper t = p (T.toUpper t) >= p t where p = T.length . T.filter isUpper @@ -1118,6 +1122,8 @@ tests = testProperty "sf_toCaseFold_length" sf_toCaseFold_length, testProperty "t_toCaseFold_length" t_toCaseFold_length, testProperty "tl_toCaseFold_length" tl_toCaseFold_length, + testProperty "t_toCaseFold_idempotent" t_toCaseFold_idempotent, + testProperty "tl_toCaseFold_idempotent" tl_toCaseFold_idempotent, testProperty "t_toLower_length" t_toLower_length, testProperty "t_toLower_lower" t_toLower_lower, testProperty "tl_toLower_lower" tl_toLower_lower, diff --git a/tests/Tests/Regressions.hs b/tests/Tests/Regressions.hs index 8ff45b3cc..696098560 100644 --- a/tests/Tests/Regressions.hs +++ b/tests/Tests/Regressions.hs @@ -7,7 +7,9 @@ module Tests.Regressions ) where import Control.Exception (SomeException, handle) -import Data.Char (isLetter) +import Control.Monad (forM_) +import Data.Char (isLetter, ord) +import Numeric (showHex) import System.IO import Test.HUnit (assertBool, assertEqual, assertFailure) import qualified Data.ByteString as B @@ -95,6 +97,15 @@ t227 = (T.length $ T.filter isLetter $ T.take (-3) "Hello! How are you doing today?") 0 +t277 :: IO () +t277 = + forM_ [ c | c <- [ minBound .. maxBound ] ] $ \c -> do + let t = T.singleton c + assertEqual + ("toCaseFold should be idempotent: " ++ [c] ++ " 0x" ++ showHex (ord c) "") + (T.toCaseFold (T.toCaseFold t)) + (T.toCaseFold t) + tests :: F.Test tests = F.testGroup "Regressions" [ F.testCase "hGetContents_crash" hGetContents_crash @@ -105,4 +116,5 @@ tests = F.testGroup "Regressions" , F.testCase "t197" t197 , F.testCase "t221" t221 , F.testCase "t227" t227 + , F.testCase "t277" t277 ]