{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveLift #-}
module Text.Collate.Collation
( Collation(..)
, VariableWeighting(..)
, CollationElement(..)
, unfoldCollation
, insertElements
, alterElements
, suppressContractions
, findLast
, findFirst
, matchLongestPrefix
, getCollationElements
, parseCollation
, parseCJKOverrides
, canonicalCombiningClass
)
where
import qualified Data.ByteString.Char8 as B
import qualified Data.IntSet as IntSet
import qualified Data.IntMap as M
import Data.Bits ( Bits((.|.), shiftR, (.&.)) )
import Data.ByteString.Lex.Integral (readHexadecimal)
import Data.List (foldl', permutations, sortOn)
import Text.Collate.CombiningClass (genCombiningClassMap)
import Data.Maybe
import Data.Foldable (minimumBy, maximumBy)
import Data.Word (Word16)
import Data.Binary (Binary(get, put))
import Language.Haskell.TH.Syntax (Lift(..))
import Instances.TH.Lift ()
import qualified Text.Collate.Trie as Trie
import Text.Printf
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup (Semigroup(..))
#endif
data VariableWeighting =
NonIgnorable
| Blanked
| Shifted
| ShiftTrimmed
deriving (Int -> VariableWeighting -> ShowS
[VariableWeighting] -> ShowS
VariableWeighting -> String
(Int -> VariableWeighting -> ShowS)
-> (VariableWeighting -> String)
-> ([VariableWeighting] -> ShowS)
-> Show VariableWeighting
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [VariableWeighting] -> ShowS
$cshowList :: [VariableWeighting] -> ShowS
show :: VariableWeighting -> String
$cshow :: VariableWeighting -> String
showsPrec :: Int -> VariableWeighting -> ShowS
$cshowsPrec :: Int -> VariableWeighting -> ShowS
Show, VariableWeighting -> VariableWeighting -> Bool
(VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> Eq VariableWeighting
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: VariableWeighting -> VariableWeighting -> Bool
$c/= :: VariableWeighting -> VariableWeighting -> Bool
== :: VariableWeighting -> VariableWeighting -> Bool
$c== :: VariableWeighting -> VariableWeighting -> Bool
Eq, Eq VariableWeighting
Eq VariableWeighting
-> (VariableWeighting -> VariableWeighting -> Ordering)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> VariableWeighting)
-> (VariableWeighting -> VariableWeighting -> VariableWeighting)
-> Ord VariableWeighting
VariableWeighting -> VariableWeighting -> Bool
VariableWeighting -> VariableWeighting -> Ordering
VariableWeighting -> VariableWeighting -> VariableWeighting
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: VariableWeighting -> VariableWeighting -> VariableWeighting
$cmin :: VariableWeighting -> VariableWeighting -> VariableWeighting
max :: VariableWeighting -> VariableWeighting -> VariableWeighting
$cmax :: VariableWeighting -> VariableWeighting -> VariableWeighting
>= :: VariableWeighting -> VariableWeighting -> Bool
$c>= :: VariableWeighting -> VariableWeighting -> Bool
> :: VariableWeighting -> VariableWeighting -> Bool
$c> :: VariableWeighting -> VariableWeighting -> Bool
<= :: VariableWeighting -> VariableWeighting -> Bool
$c<= :: VariableWeighting -> VariableWeighting -> Bool
< :: VariableWeighting -> VariableWeighting -> Bool
$c< :: VariableWeighting -> VariableWeighting -> Bool
compare :: VariableWeighting -> VariableWeighting -> Ordering
$ccompare :: VariableWeighting -> VariableWeighting -> Ordering
$cp1Ord :: Eq VariableWeighting
Ord)
data CollationElement =
CollationElement
{ CollationElement -> Bool
collationVariable :: !Bool
, CollationElement -> Word16
collationL1 :: {-# UNPACK #-} !Word16
, CollationElement -> Word16
collationL2 :: {-# UNPACK #-} !Word16
, CollationElement -> Word16
collationL3 :: {-# UNPACK #-} !Word16
, CollationElement -> Word16
collationL4 :: {-# UNPACK #-} !Word16
} deriving (CollationElement -> CollationElement -> Bool
(CollationElement -> CollationElement -> Bool)
-> (CollationElement -> CollationElement -> Bool)
-> Eq CollationElement
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CollationElement -> CollationElement -> Bool
$c/= :: CollationElement -> CollationElement -> Bool
== :: CollationElement -> CollationElement -> Bool
$c== :: CollationElement -> CollationElement -> Bool
Eq, CollationElement -> Q Exp
CollationElement -> Q (TExp CollationElement)
(CollationElement -> Q Exp)
-> (CollationElement -> Q (TExp CollationElement))
-> Lift CollationElement
forall t. (t -> Q Exp) -> (t -> Q (TExp t)) -> Lift t
liftTyped :: CollationElement -> Q (TExp CollationElement)
$cliftTyped :: CollationElement -> Q (TExp CollationElement)
lift :: CollationElement -> Q Exp
$clift :: CollationElement -> Q Exp
Lift)
instance Ord CollationElement where
compare :: CollationElement -> CollationElement -> Ordering
compare (CollationElement Bool
_ Word16
p1 Word16
s1 Word16
t1 Word16
q1) (CollationElement Bool
_ Word16
p2 Word16
s2 Word16
t2 Word16
q2) =
Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
p1 Word16
p2 Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
<> Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
s1 Word16
s2 Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
<> Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
t1 Word16
t2 Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
<> Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
q1 Word16
q2
instance Show CollationElement where
show :: CollationElement -> String
show (CollationElement Bool
v Word16
l1 Word16
l2 Word16
l3 Word16
l4) =
String -> String -> Word16 -> Word16 -> Word16 -> Word16 -> String
forall r. PrintfType r => String -> r
printf String
"CollationElement %s 0x%04X 0x%04X 0x%04X 0x%04X" (Bool -> String
forall a. Show a => a -> String
show Bool
v) Word16
l1 Word16
l2 Word16
l3 Word16
l4
instance Binary CollationElement where
put :: CollationElement -> Put
put (CollationElement Bool
v Word16
w Word16
x Word16
y Word16
z) = (Bool, Word16, Word16, Word16, Word16) -> Put
forall t. Binary t => t -> Put
put (Bool
v,Word16
w,Word16
x,Word16
y,Word16
z)
get :: Get CollationElement
get = do
(Bool
v,Word16
w,Word16
x,Word16
y,Word16
z) <- Get (Bool, Word16, Word16, Word16, Word16)
forall t. Binary t => Get t
get
CollationElement -> Get CollationElement
forall (m :: * -> *) a. Monad m => a -> m a
return (CollationElement -> Get CollationElement)
-> CollationElement -> Get CollationElement
forall a b. (a -> b) -> a -> b
$ Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
v Word16
w Word16
x Word16
y Word16
z
newtype Collation = Collation { Collation -> Trie [CollationElement]
unCollation :: Trie.Trie [CollationElement] }
deriving (Int -> Collation -> ShowS
[Collation] -> ShowS
Collation -> String
(Int -> Collation -> ShowS)
-> (Collation -> String)
-> ([Collation] -> ShowS)
-> Show Collation
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Collation] -> ShowS
$cshowList :: [Collation] -> ShowS
show :: Collation -> String
$cshow :: Collation -> String
showsPrec :: Int -> Collation -> ShowS
$cshowsPrec :: Int -> Collation -> ShowS
Show, Collation -> Collation -> Bool
(Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool) -> Eq Collation
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Collation -> Collation -> Bool
$c/= :: Collation -> Collation -> Bool
== :: Collation -> Collation -> Bool
$c== :: Collation -> Collation -> Bool
Eq, Eq Collation
Eq Collation
-> (Collation -> Collation -> Ordering)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Collation)
-> (Collation -> Collation -> Collation)
-> Ord Collation
Collation -> Collation -> Bool
Collation -> Collation -> Ordering
Collation -> Collation -> Collation
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Collation -> Collation -> Collation
$cmin :: Collation -> Collation -> Collation
max :: Collation -> Collation -> Collation
$cmax :: Collation -> Collation -> Collation
>= :: Collation -> Collation -> Bool
$c>= :: Collation -> Collation -> Bool
> :: Collation -> Collation -> Bool
$c> :: Collation -> Collation -> Bool
<= :: Collation -> Collation -> Bool
$c<= :: Collation -> Collation -> Bool
< :: Collation -> Collation -> Bool
$c< :: Collation -> Collation -> Bool
compare :: Collation -> Collation -> Ordering
$ccompare :: Collation -> Collation -> Ordering
$cp1Ord :: Eq Collation
Ord, Collation -> Q Exp
Collation -> Q (TExp Collation)
(Collation -> Q Exp)
-> (Collation -> Q (TExp Collation)) -> Lift Collation
forall t. (t -> Q Exp) -> (t -> Q (TExp t)) -> Lift t
liftTyped :: Collation -> Q (TExp Collation)
$cliftTyped :: Collation -> Q (TExp Collation)
lift :: Collation -> Q Exp
$clift :: Collation -> Q Exp
Lift, b -> Collation -> Collation
NonEmpty Collation -> Collation
Collation -> Collation -> Collation
(Collation -> Collation -> Collation)
-> (NonEmpty Collation -> Collation)
-> (forall b. Integral b => b -> Collation -> Collation)
-> Semigroup Collation
forall b. Integral b => b -> Collation -> Collation
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> Collation -> Collation
$cstimes :: forall b. Integral b => b -> Collation -> Collation
sconcat :: NonEmpty Collation -> Collation
$csconcat :: NonEmpty Collation -> Collation
<> :: Collation -> Collation -> Collation
$c<> :: Collation -> Collation -> Collation
Semigroup, Semigroup Collation
Collation
Semigroup Collation
-> Collation
-> (Collation -> Collation -> Collation)
-> ([Collation] -> Collation)
-> Monoid Collation
[Collation] -> Collation
Collation -> Collation -> Collation
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [Collation] -> Collation
$cmconcat :: [Collation] -> Collation
mappend :: Collation -> Collation -> Collation
$cmappend :: Collation -> Collation -> Collation
mempty :: Collation
$cmempty :: Collation
$cp1Monoid :: Semigroup Collation
Monoid)
instance Binary Collation where
put :: Collation -> Put
put (Collation Trie [CollationElement]
m) = Trie [CollationElement] -> Put
forall t. Binary t => t -> Put
put Trie [CollationElement]
m
get :: Get Collation
get = Trie [CollationElement] -> Collation
Collation (Trie [CollationElement] -> Collation)
-> Get (Trie [CollationElement]) -> Get Collation
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get (Trie [CollationElement])
forall t. Binary t => Get t
get
unfoldCollation :: Collation -> [([Int], [CollationElement])]
unfoldCollation :: Collation -> [([Int], [CollationElement])]
unfoldCollation (Collation Trie [CollationElement]
trie) = Trie [CollationElement] -> [([Int], [CollationElement])]
forall a. Trie a -> [([Int], a)]
Trie.unfoldTrie Trie [CollationElement]
trie
insertElements :: [Int] -> [CollationElement] -> Collation -> Collation
insertElements :: [Int] -> [CollationElement] -> Collation -> Collation
insertElements [Int]
codepoints [CollationElement]
els (Collation Trie [CollationElement]
trie) =
Trie [CollationElement] -> Collation
Collation (Trie [CollationElement] -> Collation)
-> Trie [CollationElement] -> Collation
forall a b. (a -> b) -> a -> b
$ [Int]
-> [CollationElement]
-> Trie [CollationElement]
-> Trie [CollationElement]
forall a. [Int] -> a -> Trie a -> Trie a
Trie.insert [Int]
codepoints [CollationElement]
els Trie [CollationElement]
trie
suppressContractions :: [Int] -> Collation -> Collation
suppressContractions :: [Int] -> Collation -> Collation
suppressContractions [Int]
cps Collation
coll =
([Int] -> Collation -> Collation)
-> Collation -> [[Int]] -> Collation
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Collation -> Collation
alterElements (Maybe [CollationElement]
-> Maybe [CollationElement] -> Maybe [CollationElement]
forall a b. a -> b -> a
const Maybe [CollationElement]
forall a. Maybe a
Nothing)) Collation
coll
[[Int]
is | is :: [Int]
is@(Int
i:Int
_:[Int]
_) <- [[Int]]
collationKeys, Int
i Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
cps]
where
collationKeys :: [[Int]]
collationKeys = (([Int], [CollationElement]) -> [Int])
-> [([Int], [CollationElement])] -> [[Int]]
forall a b. (a -> b) -> [a] -> [b]
map ([Int], [CollationElement]) -> [Int]
forall a b. (a, b) -> a
fst ([([Int], [CollationElement])] -> [[Int]])
-> [([Int], [CollationElement])] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ Collation -> [([Int], [CollationElement])]
unfoldCollation Collation
coll
alterElements :: (Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Collation -> Collation
alterElements :: (Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Collation -> Collation
alterElements Maybe [CollationElement] -> Maybe [CollationElement]
f [Int]
codepoints (Collation Trie [CollationElement]
trie) =
Trie [CollationElement] -> Collation
Collation (Trie [CollationElement] -> Collation)
-> Trie [CollationElement] -> Collation
forall a b. (a -> b) -> a -> b
$ (Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Trie [CollationElement] -> Trie [CollationElement]
forall a. (Maybe a -> Maybe a) -> [Int] -> Trie a -> Trie a
Trie.alter Maybe [CollationElement] -> Maybe [CollationElement]
f [Int]
codepoints Trie [CollationElement]
trie
matchLongestPrefix :: Collation
-> [Int]
-> Maybe ([CollationElement], [Int], Collation)
matchLongestPrefix :: Collation -> [Int] -> Maybe ([CollationElement], [Int], Collation)
matchLongestPrefix (Collation Trie [CollationElement]
trie) [Int]
codepoints =
case Trie [CollationElement]
-> [Int]
-> Maybe ([CollationElement], [Int], Trie [CollationElement])
forall a. Trie a -> [Int] -> Maybe (a, [Int], Trie a)
Trie.matchLongestPrefix Trie [CollationElement]
trie [Int]
codepoints of
Maybe ([CollationElement], [Int], Trie [CollationElement])
Nothing -> Maybe ([CollationElement], [Int], Collation)
forall a. Maybe a
Nothing
Just ([CollationElement]
els, [Int]
is, Trie [CollationElement]
trie') -> ([CollationElement], [Int], Collation)
-> Maybe ([CollationElement], [Int], Collation)
forall a. a -> Maybe a
Just ([CollationElement]
els, [Int]
is, Trie [CollationElement] -> Collation
Collation Trie [CollationElement]
trie')
findFirst :: ([CollationElement] -> Bool)
-> Collation
-> Maybe ([Int], [CollationElement])
findFirst :: ([CollationElement] -> Bool)
-> Collation -> Maybe ([Int], [CollationElement])
findFirst [CollationElement] -> Bool
f (Collation Trie [CollationElement]
trie) =
case (([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering)
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy ([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering
forall a a.
(a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp ([([Int], [CollationElement])] -> ([Int], [CollationElement]))
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall a b. (a -> b) -> a -> b
$ Trie [CollationElement] -> [([Int], [CollationElement])]
forall a. Trie a -> [([Int], a)]
Trie.unfoldTrie Trie [CollationElement]
trie of
([Int]
is,[CollationElement]
elts) | [CollationElement] -> Bool
f [CollationElement]
elts -> ([Int], [CollationElement]) -> Maybe ([Int], [CollationElement])
forall a. a -> Maybe a
Just ([Int]
is,[CollationElement]
elts)
([Int], [CollationElement])
_ -> Maybe ([Int], [CollationElement])
forall a. Maybe a
Nothing
where
comp :: (a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp (a
_,[CollationElement]
x) (a
_,[CollationElement]
y) =
Either [CollationElement] [CollationElement]
-> Either [CollationElement] [CollationElement] -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (if [CollationElement] -> Bool
f [CollationElement]
x then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
x else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
x)
(if [CollationElement] -> Bool
f [CollationElement]
y then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
y else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
y)
findLast :: ([CollationElement] -> Bool)
-> Collation
-> Maybe ([Int], [CollationElement])
findLast :: ([CollationElement] -> Bool)
-> Collation -> Maybe ([Int], [CollationElement])
findLast [CollationElement] -> Bool
f (Collation Trie [CollationElement]
trie) =
case (([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering)
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy ([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering
forall a a.
(a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp ([([Int], [CollationElement])] -> ([Int], [CollationElement]))
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall a b. (a -> b) -> a -> b
$ Trie [CollationElement] -> [([Int], [CollationElement])]
forall a. Trie a -> [([Int], a)]
Trie.unfoldTrie Trie [CollationElement]
trie of
([Int]
is,[CollationElement]
elts) | [CollationElement] -> Bool
f [CollationElement]
elts -> ([Int], [CollationElement]) -> Maybe ([Int], [CollationElement])
forall a. a -> Maybe a
Just ([Int]
is,[CollationElement]
elts)
([Int], [CollationElement])
_ -> Maybe ([Int], [CollationElement])
forall a. Maybe a
Nothing
where
comp :: (a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp (a
_,[CollationElement]
x) (a
_,[CollationElement]
y) =
Either [CollationElement] [CollationElement]
-> Either [CollationElement] [CollationElement] -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (if [CollationElement] -> Bool
f [CollationElement]
x then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
x else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
x)
(if [CollationElement] -> Bool
f [CollationElement]
y then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
y else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
y)
getCollationElements :: Collation -> [Int] -> [CollationElement]
getCollationElements :: Collation -> [Int] -> [CollationElement]
getCollationElements Collation
collation = [Int] -> [CollationElement]
go
where
matcher :: [Int] -> Maybe ([CollationElement], [Int], Collation)
matcher = Collation -> [Int] -> Maybe ([CollationElement], [Int], Collation)
matchLongestPrefix Collation
collation
go :: [Int] -> [CollationElement]
go [Int]
cs =
case [Int] -> Maybe ([CollationElement], [Int], Collation)
matcher [Int]
cs of
Maybe ([CollationElement], [Int], Collation)
Nothing ->
case [Int]
cs of
(Int
c:[Int]
rest) -> Int -> [CollationElement]
calculateImplicitWeight Int
c [CollationElement] -> [CollationElement] -> [CollationElement]
forall a. [a] -> [a] -> [a]
++ [Int] -> [CollationElement]
go [Int]
rest
[] -> []
Just ([CollationElement]
elts, [], Collation
_) -> [CollationElement]
elts
Just ([CollationElement]
elts, [Int]
is, Collation
subcollation)
| [Int] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
unblockedNonStarters -> [CollationElement]
elts [CollationElement] -> [CollationElement] -> [CollationElement]
forall a. [a] -> [a] -> [a]
++ [Int] -> [CollationElement]
go [Int]
is
| Bool
otherwise ->
case (([CollationElement], [Int], Collation) -> Int)
-> [([CollationElement], [Int], Collation)]
-> [([CollationElement], [Int], Collation)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn ([CollationElement], [Int], Collation) -> Int
forall (t :: * -> *) a a c. Foldable t => (a, t a, c) -> Int
remainderLength [([CollationElement], [Int], Collation)]
matches of
(([CollationElement]
newelts, [Int]
rs, Collation
_):[([CollationElement], [Int], Collation)]
_)
-> [CollationElement]
newelts [CollationElement] -> [CollationElement] -> [CollationElement]
forall a. [a] -> [a] -> [a]
++ [Int] -> [CollationElement]
go ([Int]
rs [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ Int -> [Int] -> [Int]
forall a. Int -> [a] -> [a]
drop ([Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
unblockedNonStarters) [Int]
is)
[] -> [CollationElement]
elts [CollationElement] -> [CollationElement] -> [CollationElement]
forall a. [a] -> [a] -> [a]
++ [Int] -> [CollationElement]
go [Int]
is
where
getUnblockedNonStarters :: Int -> [Int] -> [Int]
getUnblockedNonStarters Int
_ [] = []
getUnblockedNonStarters Int
n (Int
x:[Int]
xs)
= let ccc :: Int
ccc = Int -> Int
canonicalCombiningClass Int
x
in if Int
ccc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
n
then Int
x Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int] -> [Int]
getUnblockedNonStarters Int
ccc [Int]
xs
else []
unblockedNonStarters :: [Int]
unblockedNonStarters = Int -> [Int] -> [Int]
getUnblockedNonStarters Int
0 [Int]
is
matches :: [([CollationElement], [Int], Collation)]
matches = ([Int] -> Maybe ([CollationElement], [Int], Collation))
-> [[Int]] -> [([CollationElement], [Int], Collation)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Collation -> [Int] -> Maybe ([CollationElement], [Int], Collation)
matchLongestPrefix Collation
subcollation)
(Int -> [[Int]] -> [[Int]]
forall a. Int -> [a] -> [a]
take Int
24 ([Int] -> [[Int]]
forall a. [a] -> [[a]]
permutations [Int]
unblockedNonStarters))
remainderLength :: (a, t a, c) -> Int
remainderLength (a
_,t a
ys,c
_) = t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
ys
calculateImplicitWeight :: Int -> [CollationElement]
calculateImplicitWeight :: Int -> [CollationElement]
calculateImplicitWeight Int
cp =
[Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
False (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
aaaa) Word16
0x0020 Word16
0x0002 Word16
0xFFFF,
Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
False (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bbbb) Word16
0 Word16
0 Word16
0xFFFF]
where
range :: Int -> Int -> IntSet
range Int
x Int
y = [Int] -> IntSet
IntSet.fromList [Int
x..Int
y]
singleton :: Int -> IntSet
singleton = Int -> IntSet
IntSet.singleton
union :: IntSet -> IntSet -> IntSet
union = IntSet -> IntSet -> IntSet
IntSet.union
unifiedIdeographs :: IntSet
unifiedIdeographs = Int -> Int -> IntSet
range Int
0x3400 Int
0x4DBF IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x4E00 Int
0x9FFC IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0xFA0E Int
0xFA0F IntSet -> IntSet -> IntSet
`union`
Int -> IntSet
singleton Int
0xFA11 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0xFA13 Int
0xFA14 IntSet -> IntSet -> IntSet
`union`
Int -> IntSet
singleton Int
0xFA1F IntSet -> IntSet -> IntSet
`union`
Int -> IntSet
singleton Int
0xFA21 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0xFA23 Int
0xFA24 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0xFA27 Int
0xFA29 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x20000 Int
0x2A6DD IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x2A700 Int
0x2B734 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x2B740 Int
0x2B81D IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x2B820 Int
0x2CEA1 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x2CEB0 Int
0x2EBE0 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x2CEB0 Int
0x2EBE0 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range Int
0x30000 Int
0x3134A
cjkCompatibilityIdeographs :: IntSet
cjkCompatibilityIdeographs = Int -> Int -> IntSet
range Int
0xF900 Int
0xFAFF
cjkUnifiedIdeographs :: IntSet
cjkUnifiedIdeographs = Int -> Int -> IntSet
range Int
0x4E00 Int
0x9FFF
(Int
aaaa, Int
bbbb) =
case Int
cp of
Int
_ | Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x17000 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x18AFF
-> (Int
0xFB00, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x17000) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
| Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x18D00 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x18D8F
-> (Int
0xFB00, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x17000) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
| Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x1B170 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x1B2FF
-> (Int
0xFB01, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x1B170) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
| Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x18B00 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x18CFF
-> (Int
0xFB02, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x18B00) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
| Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
unifiedIdeographs Bool -> Bool -> Bool
&&
(Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
cjkUnifiedIdeographs Bool -> Bool -> Bool
||
Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
cjkCompatibilityIdeographs)
-> (Int
0xFB40 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
15), (Int
cp Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x7FFF) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
| Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
unifiedIdeographs
-> (Int
0xFB80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
15), (Int
cp Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x7FFF) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
| Bool
otherwise
-> (Int
0xFBC0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
15), (Int
cp Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x7FFFF) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
0x8000)
readCodepoints :: B.ByteString -> ([Int], B.ByteString)
readCodepoints :: ByteString -> ([Int], ByteString)
readCodepoints ByteString
b =
case ByteString -> Maybe (Int, ByteString)
forall a. Integral a => ByteString -> Maybe (a, ByteString)
readHexadecimal ByteString
b of
Maybe (Int, ByteString)
Nothing -> ([], ByteString
b)
Just (Int
codepoint, ByteString
rest) ->
let ([Int]
cps, ByteString
b') = ByteString -> ([Int], ByteString)
readCodepoints ((Char -> Bool) -> ByteString -> ByteString
B.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ') ByteString
rest)
in (Int
codepointInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
cps, ByteString
b')
parseCollation :: B.ByteString -> Collation
parseCollation :: ByteString -> Collation
parseCollation = (Collation -> ByteString -> Collation)
-> Collation -> [ByteString] -> Collation
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Collation -> ByteString -> Collation
processLine Collation
forall a. Monoid a => a
mempty ([ByteString] -> Collation)
-> (ByteString -> [ByteString]) -> ByteString -> Collation
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.lines
where
processLine :: Collation -> ByteString -> Collation
processLine Collation
trie ByteString
b =
case ByteString -> ([Int], ByteString)
readCodepoints ByteString
b of
([],ByteString
_) -> Collation
trie
(Int
c:[Int]
cs, ByteString
rest) -> [Int] -> [CollationElement] -> Collation -> Collation
insertElements (Int
cInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
cs) (ByteString -> [CollationElement]
go ByteString
rest) Collation
trie
go :: ByteString -> [CollationElement]
go ByteString
b =
case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
']') (Int -> ByteString -> ByteString
B.drop Int
1 (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ByteString -> ByteString
B.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'[') ByteString
b) of
(ByteString
contents, ByteString
rest)
| ByteString -> Bool
B.null ByteString
rest -> []
| Bool
otherwise -> ByteString -> CollationElement
parseContents ByteString
contents CollationElement -> [CollationElement] -> [CollationElement]
forall a. a -> [a] -> [a]
: ByteString -> [CollationElement]
go ByteString
rest
parseContents :: ByteString -> CollationElement
parseContents ByteString
b =
let isVariable :: Bool
isVariable = Bool -> Bool
not (ByteString -> Bool
B.null ByteString
b) Bool -> Bool -> Bool
&& ByteString -> Char
B.head ByteString
b Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'*'
isIgnorable :: (a, a, a) -> Bool
isIgnorable (a
0,a
0,a
0) = Bool
True
isIgnorable (a, a, a)
_ = Bool
False
in case (ByteString -> Maybe (Word16, ByteString))
-> [ByteString] -> [Maybe (Word16, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Maybe (Word16, ByteString)
forall a. Integral a => ByteString -> Maybe (a, ByteString)
readHexadecimal ([ByteString] -> [Maybe (Word16, ByteString)])
-> [ByteString] -> [Maybe (Word16, ByteString)]
forall a b. (a -> b) -> a -> b
$ (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Bool
B.null)
((Char -> Bool) -> ByteString -> [ByteString]
B.splitWith Char -> Bool
isSep ByteString
b) of
[Just (Word16
x,ByteString
_), Just (Word16
y,ByteString
_), Just (Word16
z,ByteString
_)]
-> Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
isVariable Word16
x Word16
y Word16
z
(if Bool
isVariable Bool -> Bool -> Bool
|| (Word16, Word16, Word16) -> Bool
forall a a a.
(Eq a, Eq a, Eq a, Num a, Num a, Num a) =>
(a, a, a) -> Bool
isIgnorable (Word16
x,Word16
y,Word16
z)
then Word16
0
else Word16
0xFFFF)
[Maybe (Word16, ByteString)]
_ -> Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
isVariable Word16
0 Word16
0 Word16
0 Word16
0
isSep :: Char -> Bool
isSep Char
'*' = Bool
True
isSep Char
'.' = Bool
True
isSep Char
_ = Bool
False
parseCJKOverrides :: B.ByteString -> [Int]
parseCJKOverrides :: ByteString -> [Int]
parseCJKOverrides = (ByteString -> Maybe Int) -> [ByteString] -> [Int]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ByteString -> Maybe Int
forall a. Integral a => ByteString -> Maybe a
chunkToCp ([ByteString] -> [Int])
-> (ByteString -> [ByteString]) -> ByteString -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.words
where
chunkToCp :: ByteString -> Maybe a
chunkToCp ByteString
b =
case ByteString -> Maybe (a, ByteString)
forall a. Integral a => ByteString -> Maybe (a, ByteString)
readHexadecimal ByteString
b of
Just (a
x,ByteString
rest)
| ByteString -> Bool
B.null ByteString
rest -> a -> Maybe a
forall a. a -> Maybe a
Just a
x
Maybe (a, ByteString)
_ -> Maybe a
forall a. Maybe a
Nothing
combiningClassMap :: M.IntMap Int
combiningClassMap :: IntMap Int
combiningClassMap = [(Int, Int)] -> IntMap Int
forall a. [(Int, a)] -> IntMap a
M.fromList ([(Int, Int)] -> IntMap Int) -> [(Int, Int)] -> IntMap Int
forall a b. (a -> b) -> a -> b
$!
$(genCombiningClassMap "data/DerivedCombiningClass.txt")
canonicalCombiningClass :: Int -> Int
canonicalCombiningClass :: Int -> Int
canonicalCombiningClass Int
cp = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> IntMap Int -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
M.lookup Int
cp IntMap Int
combiningClassMap