{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.Char.GeneralCategory.Database
( genCatLitTable
, mkDatabaseFromUnicodeData
, GenCatDatabase
, query
, checkDatabase
, checkDatabase'
)
where
import Control.Monad
import qualified Data.Array.Unboxed as A
import Data.Bifunctor
import Data.Binary
import Data.Bits
import qualified Data.ByteString.Lazy as BSL
import Data.Char
import Data.Coerce
import Data.Function
import Data.List
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8)
import Numeric
newtype GenCatDatabase
= GenCatDatabase (A.UArray Int Word64)
deriving (Get GenCatDatabase
[GenCatDatabase] -> Put
GenCatDatabase -> Put
(GenCatDatabase -> Put)
-> Get GenCatDatabase
-> ([GenCatDatabase] -> Put)
-> Binary GenCatDatabase
forall t. (t -> Put) -> Get t -> ([t] -> Put) -> Binary t
putList :: [GenCatDatabase] -> Put
$cputList :: [GenCatDatabase] -> Put
get :: Get GenCatDatabase
$cget :: Get GenCatDatabase
put :: GenCatDatabase -> Put
$cput :: GenCatDatabase -> Put
Binary)
type Range =
Either
(Int, Int)
Int
genCatLitTable :: M.Map T.Text GeneralCategory
genCatLitTable :: Map Text GeneralCategory
genCatLitTable = [(Text, GeneralCategory)] -> Map Text GeneralCategory
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, GeneralCategory)] -> Map Text GeneralCategory)
-> [(Text, GeneralCategory)] -> Map Text GeneralCategory
forall a b. (a -> b) -> a -> b
$ [Text] -> [GeneralCategory] -> [(Text, GeneralCategory)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Text -> [Text]
T.words Text
abbrs) [GeneralCategory
forall a. Bounded a => a
minBound .. GeneralCategory
forall a. Bounded a => a
maxBound]
where
abbrs :: Text
abbrs =
Text
"Lu Ll Lt Lm Lo \
\Mn Mc Me \
\Nd Nl No \
\Pc Pd Ps Pe Pi Pf Po \
\Sm Sc Sk So \
\Zs Zl Zp \
\Cc Cf Cs Co Cn"
verifyUnicodeDataAndProcess :: BSL.ByteString -> Either String [(Range, GeneralCategory)]
verifyUnicodeDataAndProcess :: ByteString -> Either String [(Range, GeneralCategory)]
verifyUnicodeDataAndProcess ByteString
raw = do
let dieIf :: Bool -> a -> Either a ()
dieIf Bool
flag a
reason =
Bool -> Either a () -> Either a ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
flag (Either a () -> Either a ()) -> Either a () -> Either a ()
forall a b. (a -> b) -> a -> b
$
a -> Either a ()
forall a b. a -> Either a b
Left a
reason
rawLines :: [Text]
rawLines = Text -> [Text]
T.lines (Text -> [Text]) -> (ByteString -> Text) -> ByteString -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BSL.toStrict (ByteString -> [Text]) -> ByteString -> [Text]
forall a b. (a -> b) -> a -> b
$ ByteString
raw
rows :: [(Int, Text, Text)]
rows = (Text -> (Int, Text, Text)) -> [Text] -> [(Int, Text, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> (Int, Text, Text)
extract [Text]
rawLines
where
extract :: Text -> (Int, Text, Text)
extract Text
rawLine = (Int
code :: Int, Text
desc, Text
gc)
where
[(Int
code, String
"")] = ReadS Int
forall a. (Eq a, Num a) => ReadS a
readHex (Text -> String
T.unpack Text
rawCode)
Text
rawCode : Text
desc : Text
gc : [Text]
_ = Text -> Text -> [Text]
T.splitOn Text
";" Text
rawLine
groupped :: [(T.Text, Either (Int, Int) Int)]
groupped :: [(Text, Range)]
groupped = [(Int, Text, Text)] -> (Text, Range)
forall b b. [(b, b, Text)] -> (Text, Either (b, b) b)
norm ([(Int, Text, Text)] -> (Text, Range))
-> [[(Int, Text, Text)]] -> [(Text, Range)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Int, Text, Text) -> (Int, Text, Text) -> Bool)
-> [(Int, Text, Text)] -> [[(Int, Text, Text)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int, Text, Text) -> (Int, Text, Text) -> Bool
forall a c a c. (a, Text, c) -> (a, Text, c) -> Bool
zCmp [(Int, Text, Text)]
rows
where
norm :: [(b, b, Text)] -> (Text, Either (b, b) b)
norm [(b
c, b
_, Text
gc)] = (Text
gc, b -> Either (b, b) b
forall a b. b -> Either a b
Right b
c)
norm [(b
c0, b
_, Text
gc0), (b
c1, b
_, Text
gc1)]
| Text
gc0 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
gc1
Bool -> Bool -> Bool
&& Int -> Text -> Text
T.dropEnd (Text -> Int
T.length Text
"First>") Text
gc0
Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Text -> Text
T.dropEnd (Text -> Int
T.length Text
"Last>") Text
gc1 =
(Text
gc0, (b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
c0, b
c1))
norm [(b, b, Text)]
_ = String -> (Text, Either (b, b) b)
forall a. HasCallStack => String -> a
error String
"invalid"
zCmp :: (a, Text, c) -> (a, Text, c) -> Bool
zCmp (a
_, Text
desc0, c
_) (a
_, Text
desc1, c
_) =
Text
"First>" Text -> Text -> Bool
`T.isSuffixOf` Text
desc0
Bool -> Bool -> Bool
&& Text
"Last>" Text -> Text -> Bool
`T.isSuffixOf` Text
desc1
gpMinus :: Either (a, a) a -> Either (a, b) a -> a
gpMinus (Left (a
_a, a
b)) (Left (a
c, b
_d)) = a
b a -> a -> a
forall a. Num a => a -> a -> a
- a
c
gpMinus (Left (a
_a, a
b)) (Right a
c) = a
b a -> a -> a
forall a. Num a => a -> a -> a
- a
c
gpMinus (Right a
a) (Left (a
b, b
_c)) = a
a a -> a -> a
forall a. Num a => a -> a -> a
- a
b
gpMinus (Right a
a) (Right a
b) = a
a a -> a -> a
forall a. Num a => a -> a -> a
- a
b
isIncr :: Bool
isIncr = [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
and ([Bool] -> Bool) -> [Bool] -> Bool
forall a b. (a -> b) -> a -> b
$ (Range -> Range -> Bool) -> [Range] -> [Range] -> [Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Range -> Range -> Bool
forall a a b.
(Ord a, Num a) =>
Either (a, a) a -> Either (a, b) a -> Bool
isStrictIncr [Range]
gs ([Range] -> [Range]
forall a. [a] -> [a]
tail [Range]
gs)
where
isStrictIncr :: Either (a, a) a -> Either (a, b) a -> Bool
isStrictIncr Either (a, a) a
l Either (a, b) a
r = Either (a, a) a -> Either (a, b) a -> a
forall a a b. Num a => Either (a, a) a -> Either (a, b) a -> a
gpMinus Either (a, a) a
l Either (a, b) a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0
gs :: [Range]
gs = ((Text, Range) -> Range) -> [(Text, Range)] -> [Range]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Range) -> Range
forall a b. (a, b) -> b
snd [(Text, Range)]
groupped
Bool -> String -> Either String ()
forall a. Bool -> a -> Either a ()
dieIf
(Bool -> Bool
not Bool
isIncr)
String
"Data rows are not strictly ascending."
let gcGroupped :: [(T.Text, [Either (Int, Int) Int])]
gcGroupped :: [(Text, [Range])]
gcGroupped =
(\[(Text, Range)]
ts -> ((Text, Range) -> Text
forall a b. (a, b) -> a
fst ((Text, Range) -> Text)
-> ([(Text, Range)] -> (Text, Range)) -> [(Text, Range)] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Text, Range)] -> (Text, Range)
forall a. [a] -> a
head ([(Text, Range)] -> Text) -> [(Text, Range)] -> Text
forall a b. (a -> b) -> a -> b
$ [(Text, Range)]
ts, ((Text, Range) -> Range) -> [(Text, Range)] -> [Range]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Range) -> Range
forall a b. (a, b) -> b
snd [(Text, Range)]
ts)) ([(Text, Range)] -> (Text, [Range]))
-> [[(Text, Range)]] -> [(Text, [Range])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Text, Range) -> (Text, Range) -> Bool)
-> [(Text, Range)] -> [[(Text, Range)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Text -> Text -> Bool)
-> ((Text, Range) -> Text)
-> (Text, Range)
-> (Text, Range)
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Range) -> Text
forall a b. (a, b) -> a
fst) [(Text, Range)]
groupped
merge :: [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge [Either (b, b) b]
acc [] = [Either (b, b) b] -> [Either (b, b) b]
forall a. [a] -> [a]
reverse [Either (b, b) b]
acc
merge [] (Either (b, b) b
x : [Either (b, b) b]
xs) = [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge [Either (b, b) b
x] [Either (b, b) b]
xs
merge (Either (b, b) b
u : [Either (b, b) b]
us) (Either (b, b) b
x : [Either (b, b) b]
xs) = case (Either (b, b) b
u, Either (b, b) b
x) of
(Left (b
a, b
b), Left (b
c, b
d)) ->
if b
b b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
c then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
d) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
(Left (b
a, b
b), Right b
c) ->
if b
b b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
c then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
c) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
(Right b
a, Left (b
b, b
c)) ->
if b
a b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
b then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
c) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
(Right b
a, Right b
b) ->
if b
a b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
b then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
b) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
gcGroupped' :: [(T.Text, [Either (Int, Int) Int])]
gcGroupped' :: [(Text, [Range])]
gcGroupped' = (((Text, [Range]) -> (Text, [Range]))
-> [(Text, [Range])] -> [(Text, [Range])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((Text, [Range]) -> (Text, [Range]))
-> [(Text, [Range])] -> [(Text, [Range])])
-> (([Range] -> [Range]) -> (Text, [Range]) -> (Text, [Range]))
-> ([Range] -> [Range])
-> [(Text, [Range])]
-> [(Text, [Range])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Range] -> [Range]) -> (Text, [Range]) -> (Text, [Range])
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second) ([Range] -> [Range] -> [Range]
forall b.
(Eq b, Num b) =>
[Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge []) [(Text, [Range])]
gcGroupped
Bool -> String -> Either String ()
forall a. Bool -> a -> Either a ()
dieIf
(Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member Text
"Cn" ([Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList ([Text] -> Set Text) -> [Text] -> Set Text
forall a b. (a -> b) -> a -> b
$ ((Text, [Range]) -> Text) -> [(Text, [Range])] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, [Range]) -> Text
forall a b. (a, b) -> a
fst [(Text, [Range])]
gcGroupped'))
String
"No character should be in 'Cn' category"
[(Range, GeneralCategory)]
-> Either String [(Range, GeneralCategory)]
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([(Range, GeneralCategory)]
-> Either String [(Range, GeneralCategory)])
-> [(Range, GeneralCategory)]
-> Either String [(Range, GeneralCategory)]
forall a b. (a -> b) -> a -> b
$ ((Text, [Range]) -> [(Range, GeneralCategory)])
-> [(Text, [Range])] -> [(Range, GeneralCategory)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\(Text
gc, [Range]
xs) -> [(Range
x, Map Text GeneralCategory
genCatLitTable Map Text GeneralCategory -> Text -> GeneralCategory
forall k a. Ord k => Map k a -> k -> a
M.! Text
gc) | Range
x <- [Range]
xs]) [(Text, [Range])]
gcGroupped'
mkDatabase' :: [(Range, GeneralCategory)] -> GenCatDatabase
mkDatabase' :: [(Range, GeneralCategory)] -> GenCatDatabase
mkDatabase' [(Range, GeneralCategory)]
gs = UArray Int Word64 -> GenCatDatabase
GenCatDatabase (UArray Int Word64 -> GenCatDatabase)
-> UArray Int Word64 -> GenCatDatabase
forall a b. (a -> b) -> a -> b
$ (Int, Int) -> [Word64] -> UArray Int Word64
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (Int
0, Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (((Range, GeneralCategory) -> Word64)
-> [(Range, GeneralCategory)] -> [Word64]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Range, GeneralCategory) -> Word64
forall a a. (Integral a, Enum a) => (Either (a, a) a, a) -> Word64
mkItem [(Range, GeneralCategory)]
gs)
where
l :: Int
l = [(Range, GeneralCategory)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Range, GeneralCategory)]
gs
mkItem :: (Either (a, a) a, a) -> Word64
mkItem (Either (a, a) a
range, a
gc) =
(Word32, Word32, Word8) -> Word64
packTuple
( a -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
lo
, a -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
hi
, Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ a -> Int
forall a. Enum a => a -> Int
fromEnum a
gc
)
where
(a
lo, a
hi) = case Either (a, a) a
range of
Left (a, b) -> (a
a, a
b)
Right v -> (a
v, a
v)
mkDatabaseFromUnicodeData :: BSL.ByteString -> Either String GenCatDatabase
mkDatabaseFromUnicodeData :: ByteString -> Either String GenCatDatabase
mkDatabaseFromUnicodeData = ByteString -> Either String [(Range, GeneralCategory)]
verifyUnicodeDataAndProcess (ByteString -> Either String [(Range, GeneralCategory)])
-> ([(Range, GeneralCategory)] -> Either String GenCatDatabase)
-> ByteString
-> Either String GenCatDatabase
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> GenCatDatabase -> Either String GenCatDatabase
forall (f :: * -> *) a. Applicative f => a -> f a
pure (GenCatDatabase -> Either String GenCatDatabase)
-> ([(Range, GeneralCategory)] -> GenCatDatabase)
-> [(Range, GeneralCategory)]
-> Either String GenCatDatabase
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Range, GeneralCategory)] -> GenCatDatabase
mkDatabase'
packTuple :: (Word32, Word32, Word8) -> Word64
packTuple :: (Word32, Word32, Word8) -> Word64
packTuple (Word32
lo, Word32
high, Word8
gc) = Word32 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
lo Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
high' Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
gc'
where
high' :: Word64
high' = Word32 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
high Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
24
gc' :: Word64
gc' = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
gc Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
48
unpackTuple :: Word64 -> (Word32, Word32, Word8)
unpackTuple :: Word64 -> (Word32, Word32, Word8)
unpackTuple Word64
payload = (Word32
lo, Word32
high, Word8
gc)
where
lo, high :: Word32
lo :: Word32
lo = Word64 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
0xFF_FFFF Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word64
payload)
high :: Word32
high = Word64 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
0xFF_FFFF Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. (Word64
payload Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
24))
gc :: Word8
gc = Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
0xFF Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. (Word64
payload Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
48))
query :: GenCatDatabase -> Char -> GeneralCategory
query :: GenCatDatabase -> Char -> GeneralCategory
query (GenCatDatabase UArray Int Word64
arr) Char
ch = Int -> GeneralCategory
forall a. Enum a => Int -> a
toEnum (Int -> GeneralCategory)
-> (Word8 -> Int) -> Word8 -> GeneralCategory
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> GeneralCategory) -> Word8 -> GeneralCategory
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Word8
search Int
lo Int
hi
where
needle :: Word32
needle :: Word32
needle = Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
ch
(Int
lo, Int
hi) = UArray Int Word64 -> (Int, Int)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int Word64
arr
search :: Int -> Int -> Word8
search Int
l Int
r =
if Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
r
then
let mid :: Int
mid = (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
r) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2
(Word32
rangeL, Word32
rangeR, Word8
val) = Word64 -> (Word32, Word32, Word8)
unpackTuple (UArray Int Word64
arr UArray Int Word64 -> Int -> Word64
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
A.! Int
mid)
in if
| Word32
needle Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
rangeL -> Int -> Int -> Word8
search Int
l (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
| Word32
needle Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
rangeR -> Int -> Int -> Word8
search (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
r
| Word32
rangeL Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
needle Bool -> Bool -> Bool
&& Word32
needle Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
rangeR -> Word8
val
| Bool
otherwise -> String -> Word8
forall a. HasCallStack => String -> a
error String
"unreachable"
else Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum GeneralCategory
NotAssigned
checkDatabase :: A.UArray Int Word64 -> Either String GenCatDatabase
checkDatabase :: UArray Int Word64 -> Either String GenCatDatabase
checkDatabase UArray Int Word64
arr = do
let xs :: [Word64]
xs = UArray Int Word64 -> [Word64]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
A.elems UArray Int Word64
arr
[(Int, (Word32, Word32))]
ys <- [(Int, Word64)]
-> ((Int, Word64) -> Either String (Int, (Word32, Word32)))
-> Either String [(Int, (Word32, Word32))]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Int] -> [Word64] -> [(Int, Word64)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0 :: Int ..] [Word64]
xs) (((Int, Word64) -> Either String (Int, (Word32, Word32)))
-> Either String [(Int, (Word32, Word32))])
-> ((Int, Word64) -> Either String (Int, (Word32, Word32)))
-> Either String [(Int, (Word32, Word32))]
forall a b. (a -> b) -> a -> b
$ \(Int
i, Word64
payload) -> do
let dieIf :: Bool -> String -> Either String ()
dieIf Bool
tt String
msg = Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
tt (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$ String -> Either String ()
forall a b. a -> Either a b
Left (String -> Either String ()) -> String -> Either String ()
forall a b. (a -> b) -> a -> b
$ String
"failed at element " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
": " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
msg
(Word32
lo, Word32
hi, Word8
val) = Word64 -> (Word32, Word32, Word8)
unpackTuple Word64
payload
Bool -> String -> Either String ()
dieIf (Bool -> Bool
not (Word32
0 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
lo Bool -> Bool -> Bool
&& Word32
lo Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
0x10FFFF)) String
"low bound out of range"
Bool -> String -> Either String ()
dieIf (Bool -> Bool
not (Word32
0 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
hi Bool -> Bool -> Bool
&& Word32
hi Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
0x10FFFF)) String
"high bound out of range"
Bool -> String -> Either String ()
dieIf (Word32
lo Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
hi) String
"violates low <= high"
do
let gcLo :: Word8
gcLo = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum (GeneralCategory
forall a. Bounded a => a
minBound :: GeneralCategory)
gcHi :: Word8
gcHi = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum (GeneralCategory
forall a. Bounded a => a
maxBound :: GeneralCategory)
notAssn :: Word8
notAssn = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum GeneralCategory
NotAssigned
Bool -> String -> Either String ()
dieIf (Bool -> Bool
not (Word8
gcLo Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
val Bool -> Bool -> Bool
&& Word8
val Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
gcHi)) String
"general category value out of range"
Bool -> String -> Either String ()
dieIf (Word8
val Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
notAssn) String
"value should not be NotAssigned"
(Int, (Word32, Word32)) -> Either String (Int, (Word32, Word32))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i, (Word32
lo, Word32
hi))
[((Int, (Word32, Word32)), (Int, (Word32, Word32)))]
-> (((Int, (Word32, Word32)), (Int, (Word32, Word32)))
-> Either String ())
-> Either String ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([(Int, (Word32, Word32))]
-> [(Int, (Word32, Word32))]
-> [((Int, (Word32, Word32)), (Int, (Word32, Word32)))]
forall a b. [a] -> [b] -> [(a, b)]
zip [(Int, (Word32, Word32))]
ys ([(Int, (Word32, Word32))] -> [(Int, (Word32, Word32))]
forall a. [a] -> [a]
tail [(Int, (Word32, Word32))]
ys)) ((((Int, (Word32, Word32)), (Int, (Word32, Word32)))
-> Either String ())
-> Either String ())
-> (((Int, (Word32, Word32)), (Int, (Word32, Word32)))
-> Either String ())
-> Either String ()
forall a b. (a -> b) -> a -> b
$ \((Int
i, (Word32
_, Word32
a)), (Int
j, (Word32
b, Word32
_))) -> do
Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word32
a Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word32
b) (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$
String -> Either String ()
forall a b. a -> Either a b
Left (String -> Either String ()) -> String -> Either String ()
forall a b. (a -> b) -> a -> b
$ String
"failed when comparing element pair " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> (Int, Int) -> String
forall a. Show a => a -> String
show (Int
i, Int
j) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"not strictly ascending."
GenCatDatabase -> Either String GenCatDatabase
forall (f :: * -> *) a. Applicative f => a -> f a
pure (UArray Int Word64 -> GenCatDatabase
GenCatDatabase UArray Int Word64
arr)
checkDatabase' :: GenCatDatabase -> Either String GenCatDatabase
checkDatabase' :: GenCatDatabase -> Either String GenCatDatabase
checkDatabase' = (UArray Int Word64 -> Either String GenCatDatabase)
-> GenCatDatabase -> Either String GenCatDatabase
coerce UArray Int Word64 -> Either String GenCatDatabase
checkDatabase