{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}

module Data.Char.GeneralCategory.Database
  ( genCatLitTable
  , mkDatabaseFromUnicodeData
  , GenCatDatabase
  , query
  , checkDatabase
  , checkDatabase'
  )
where

import Control.Monad
import qualified Data.Array.Unboxed as A
import Data.Bifunctor
import Data.Binary
import Data.Bits
import qualified Data.ByteString.Lazy as BSL
import Data.Char
import Data.Coerce
import Data.Function
import Data.List
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8)
import Numeric

-- | General category database.
--   Conceptually this is a sorted array of ascending, non-overlapping inclusive codepoint ranges,
--   with a 'GeneralCategory' attached to each of them.
--   Note that 'NotAssigned' should not be present in this array.
--
--   Internally every element is packed into 'Word64', for the least significant bits:
--
--   * @0-23@ bits represent the low codepoint range (inclusive)
--   * @24-47@ bits represent the high codepoint range (inclusive)
--   * @47-63@ bits represent general category values consistent to 'GeneralCategory'\'s 'Enum' instance.
newtype GenCatDatabase
  = GenCatDatabase (A.UArray Int Word64)
  deriving (Get GenCatDatabase
[GenCatDatabase] -> Put
GenCatDatabase -> Put
(GenCatDatabase -> Put)
-> Get GenCatDatabase
-> ([GenCatDatabase] -> Put)
-> Binary GenCatDatabase
forall t. (t -> Put) -> Get t -> ([t] -> Put) -> Binary t
putList :: [GenCatDatabase] -> Put
$cputList :: [GenCatDatabase] -> Put
get :: Get GenCatDatabase
$cget :: Get GenCatDatabase
put :: GenCatDatabase -> Put
$cput :: GenCatDatabase -> Put
Binary)

type Range =
  Either
    (Int, Int) -- [l .. r] (both inclusive)
    Int

-- | General category abbreviations
genCatLitTable :: M.Map T.Text GeneralCategory
genCatLitTable :: Map Text GeneralCategory
genCatLitTable = [(Text, GeneralCategory)] -> Map Text GeneralCategory
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, GeneralCategory)] -> Map Text GeneralCategory)
-> [(Text, GeneralCategory)] -> Map Text GeneralCategory
forall a b. (a -> b) -> a -> b
$ [Text] -> [GeneralCategory] -> [(Text, GeneralCategory)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Text -> [Text]
T.words Text
abbrs) [GeneralCategory
forall a. Bounded a => a
minBound .. GeneralCategory
forall a. Bounded a => a
maxBound]
  where
    abbrs :: Text
abbrs =
      Text
"Lu Ll Lt Lm Lo \
      \Mn Mc Me \
      \Nd Nl No \
      \Pc Pd Ps Pe Pi Pf Po \
      \Sm Sc Sk So \
      \Zs Zl Zp \
      \Cc Cf Cs Co Cn"

verifyUnicodeDataAndProcess :: BSL.ByteString -> Either String [(Range, GeneralCategory)]
verifyUnicodeDataAndProcess :: ByteString -> Either String [(Range, GeneralCategory)]
verifyUnicodeDataAndProcess ByteString
raw = do
  let dieIf :: Bool -> a -> Either a ()
dieIf Bool
flag a
reason =
        Bool -> Either a () -> Either a ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
flag (Either a () -> Either a ()) -> Either a () -> Either a ()
forall a b. (a -> b) -> a -> b
$
          a -> Either a ()
forall a b. a -> Either a b
Left a
reason
      rawLines :: [Text]
rawLines = Text -> [Text]
T.lines (Text -> [Text]) -> (ByteString -> Text) -> ByteString -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BSL.toStrict (ByteString -> [Text]) -> ByteString -> [Text]
forall a b. (a -> b) -> a -> b
$ ByteString
raw
      rows :: [(Int, Text, Text)]
rows = (Text -> (Int, Text, Text)) -> [Text] -> [(Int, Text, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> (Int, Text, Text)
extract [Text]
rawLines
        where
          extract :: Text -> (Int, Text, Text)
extract Text
rawLine = (Int
code :: Int, Text
desc, Text
gc)
            where
              [(Int
code, String
"")] = ReadS Int
forall a. (Eq a, Num a) => ReadS a
readHex (Text -> String
T.unpack Text
rawCode)
              Text
rawCode : Text
desc : Text
gc : [Text]
_ = Text -> Text -> [Text]
T.splitOn Text
";" Text
rawLine
      groupped :: [(T.Text, Either (Int, Int) Int)]
      groupped :: [(Text, Range)]
groupped = [(Int, Text, Text)] -> (Text, Range)
forall b b. [(b, b, Text)] -> (Text, Either (b, b) b)
norm ([(Int, Text, Text)] -> (Text, Range))
-> [[(Int, Text, Text)]] -> [(Text, Range)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Int, Text, Text) -> (Int, Text, Text) -> Bool)
-> [(Int, Text, Text)] -> [[(Int, Text, Text)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int, Text, Text) -> (Int, Text, Text) -> Bool
forall a c a c. (a, Text, c) -> (a, Text, c) -> Bool
zCmp [(Int, Text, Text)]
rows
        where
          norm :: [(b, b, Text)] -> (Text, Either (b, b) b)
norm [(b
c, b
_, Text
gc)] = (Text
gc, b -> Either (b, b) b
forall a b. b -> Either a b
Right b
c)
          norm [(b
c0, b
_, Text
gc0), (b
c1, b
_, Text
gc1)]
            | Text
gc0 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
gc1
                Bool -> Bool -> Bool
&& Int -> Text -> Text
T.dropEnd (Text -> Int
T.length Text
"First>") Text
gc0
                Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Text -> Text
T.dropEnd (Text -> Int
T.length Text
"Last>") Text
gc1 =
              (Text
gc0, (b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
c0, b
c1))
          norm [(b, b, Text)]
_ = String -> (Text, Either (b, b) b)
forall a. HasCallStack => String -> a
error String
"invalid"
          zCmp :: (a, Text, c) -> (a, Text, c) -> Bool
zCmp (a
_, Text
desc0, c
_) (a
_, Text
desc1, c
_) =
            Text
"First>" Text -> Text -> Bool
`T.isSuffixOf` Text
desc0
              Bool -> Bool -> Bool
&& Text
"Last>" Text -> Text -> Bool
`T.isSuffixOf` Text
desc1
      gpMinus :: Either (a, a) a -> Either (a, b) a -> a
gpMinus (Left (a
_a, a
b)) (Left (a
c, b
_d)) = a
b a -> a -> a
forall a. Num a => a -> a -> a
- a
c
      gpMinus (Left (a
_a, a
b)) (Right a
c) = a
b a -> a -> a
forall a. Num a => a -> a -> a
- a
c
      gpMinus (Right a
a) (Left (a
b, b
_c)) = a
a a -> a -> a
forall a. Num a => a -> a -> a
- a
b
      gpMinus (Right a
a) (Right a
b) = a
a a -> a -> a
forall a. Num a => a -> a -> a
- a
b
      isIncr :: Bool
isIncr = [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
and ([Bool] -> Bool) -> [Bool] -> Bool
forall a b. (a -> b) -> a -> b
$ (Range -> Range -> Bool) -> [Range] -> [Range] -> [Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Range -> Range -> Bool
forall a a b.
(Ord a, Num a) =>
Either (a, a) a -> Either (a, b) a -> Bool
isStrictIncr [Range]
gs ([Range] -> [Range]
forall a. [a] -> [a]
tail [Range]
gs)
        where
          isStrictIncr :: Either (a, a) a -> Either (a, b) a -> Bool
isStrictIncr Either (a, a) a
l Either (a, b) a
r = Either (a, a) a -> Either (a, b) a -> a
forall a a b. Num a => Either (a, a) a -> Either (a, b) a -> a
gpMinus Either (a, a) a
l Either (a, b) a
r a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0
          gs :: [Range]
gs = ((Text, Range) -> Range) -> [(Text, Range)] -> [Range]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Range) -> Range
forall a b. (a, b) -> b
snd [(Text, Range)]
groupped
  Bool -> String -> Either String ()
forall a. Bool -> a -> Either a ()
dieIf
    (Bool -> Bool
not Bool
isIncr)
    String
"Data rows are not strictly ascending."
  let gcGroupped :: [(T.Text, [Either (Int, Int) Int])]
      gcGroupped :: [(Text, [Range])]
gcGroupped =
        (\[(Text, Range)]
ts -> ((Text, Range) -> Text
forall a b. (a, b) -> a
fst ((Text, Range) -> Text)
-> ([(Text, Range)] -> (Text, Range)) -> [(Text, Range)] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Text, Range)] -> (Text, Range)
forall a. [a] -> a
head ([(Text, Range)] -> Text) -> [(Text, Range)] -> Text
forall a b. (a -> b) -> a -> b
$ [(Text, Range)]
ts, ((Text, Range) -> Range) -> [(Text, Range)] -> [Range]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Range) -> Range
forall a b. (a, b) -> b
snd [(Text, Range)]
ts)) ([(Text, Range)] -> (Text, [Range]))
-> [[(Text, Range)]] -> [(Text, [Range])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Text, Range) -> (Text, Range) -> Bool)
-> [(Text, Range)] -> [[(Text, Range)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Text -> Text -> Bool)
-> ((Text, Range) -> Text)
-> (Text, Range)
-> (Text, Range)
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Range) -> Text
forall a b. (a, b) -> a
fst) [(Text, Range)]
groupped
      merge :: [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge [Either (b, b) b]
acc [] = [Either (b, b) b] -> [Either (b, b) b]
forall a. [a] -> [a]
reverse [Either (b, b) b]
acc
      merge [] (Either (b, b) b
x : [Either (b, b) b]
xs) = [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge [Either (b, b) b
x] [Either (b, b) b]
xs
      merge (Either (b, b) b
u : [Either (b, b) b]
us) (Either (b, b) b
x : [Either (b, b) b]
xs) = case (Either (b, b) b
u, Either (b, b) b
x) of
        (Left (b
a, b
b), Left (b
c, b
d)) ->
          if b
b b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
c then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
d) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
        (Left (b
a, b
b), Right b
c) ->
          if b
b b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
c then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
c) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
        (Right b
a, Left (b
b, b
c)) ->
          if b
a b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
b then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
c) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
        (Right b
a, Right b
b) ->
          if b
a b -> b -> b
forall a. Num a => a -> a -> a
+ b
1 b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
b then [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge ((b, b) -> Either (b, b) b
forall a b. a -> Either a b
Left (b
a, b
b) Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs else [Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge (Either (b, b) b
x Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: Either (b, b) b
u Either (b, b) b -> [Either (b, b) b] -> [Either (b, b) b]
forall a. a -> [a] -> [a]
: [Either (b, b) b]
us) [Either (b, b) b]
xs
      gcGroupped' :: [(T.Text, [Either (Int, Int) Int])]
      gcGroupped' :: [(Text, [Range])]
gcGroupped' = (((Text, [Range]) -> (Text, [Range]))
-> [(Text, [Range])] -> [(Text, [Range])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((Text, [Range]) -> (Text, [Range]))
 -> [(Text, [Range])] -> [(Text, [Range])])
-> (([Range] -> [Range]) -> (Text, [Range]) -> (Text, [Range]))
-> ([Range] -> [Range])
-> [(Text, [Range])]
-> [(Text, [Range])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Range] -> [Range]) -> (Text, [Range]) -> (Text, [Range])
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second) ([Range] -> [Range] -> [Range]
forall b.
(Eq b, Num b) =>
[Either (b, b) b] -> [Either (b, b) b] -> [Either (b, b) b]
merge []) [(Text, [Range])]
gcGroupped
  Bool -> String -> Either String ()
forall a. Bool -> a -> Either a ()
dieIf
    (Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member Text
"Cn" ([Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList ([Text] -> Set Text) -> [Text] -> Set Text
forall a b. (a -> b) -> a -> b
$ ((Text, [Range]) -> Text) -> [(Text, [Range])] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, [Range]) -> Text
forall a b. (a, b) -> a
fst [(Text, [Range])]
gcGroupped'))
    String
"No character should be in 'Cn' category"
  [(Range, GeneralCategory)]
-> Either String [(Range, GeneralCategory)]
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([(Range, GeneralCategory)]
 -> Either String [(Range, GeneralCategory)])
-> [(Range, GeneralCategory)]
-> Either String [(Range, GeneralCategory)]
forall a b. (a -> b) -> a -> b
$ ((Text, [Range]) -> [(Range, GeneralCategory)])
-> [(Text, [Range])] -> [(Range, GeneralCategory)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\(Text
gc, [Range]
xs) -> [(Range
x, Map Text GeneralCategory
genCatLitTable Map Text GeneralCategory -> Text -> GeneralCategory
forall k a. Ord k => Map k a -> k -> a
M.! Text
gc) | Range
x <- [Range]
xs]) [(Text, [Range])]
gcGroupped'

mkDatabase' :: [(Range, GeneralCategory)] -> GenCatDatabase
mkDatabase' :: [(Range, GeneralCategory)] -> GenCatDatabase
mkDatabase' [(Range, GeneralCategory)]
gs = UArray Int Word64 -> GenCatDatabase
GenCatDatabase (UArray Int Word64 -> GenCatDatabase)
-> UArray Int Word64 -> GenCatDatabase
forall a b. (a -> b) -> a -> b
$ (Int, Int) -> [Word64] -> UArray Int Word64
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (Int
0, Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (((Range, GeneralCategory) -> Word64)
-> [(Range, GeneralCategory)] -> [Word64]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Range, GeneralCategory) -> Word64
forall a a. (Integral a, Enum a) => (Either (a, a) a, a) -> Word64
mkItem [(Range, GeneralCategory)]
gs)
  where
    l :: Int
l = [(Range, GeneralCategory)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Range, GeneralCategory)]
gs
    mkItem :: (Either (a, a) a, a) -> Word64
mkItem (Either (a, a) a
range, a
gc) =
      (Word32, Word32, Word8) -> Word64
packTuple
        ( a -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
lo
        , a -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
hi
        , Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ a -> Int
forall a. Enum a => a -> Int
fromEnum a
gc
        )
      where
        (a
lo, a
hi) = case Either (a, a) a
range of
          Left (a, b) -> (a
a, a
b)
          Right v -> (a
v, a
v)

-- | Parses a lazy 'BSL.ByteString' from a __UnicodeData.txt__.
--   For example, content of [UnicodeData.txt](https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt)
mkDatabaseFromUnicodeData :: BSL.ByteString -> Either String GenCatDatabase
mkDatabaseFromUnicodeData :: ByteString -> Either String GenCatDatabase
mkDatabaseFromUnicodeData = ByteString -> Either String [(Range, GeneralCategory)]
verifyUnicodeDataAndProcess (ByteString -> Either String [(Range, GeneralCategory)])
-> ([(Range, GeneralCategory)] -> Either String GenCatDatabase)
-> ByteString
-> Either String GenCatDatabase
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> GenCatDatabase -> Either String GenCatDatabase
forall (f :: * -> *) a. Applicative f => a -> f a
pure (GenCatDatabase -> Either String GenCatDatabase)
-> ([(Range, GeneralCategory)] -> GenCatDatabase)
-> [(Range, GeneralCategory)]
-> Either String GenCatDatabase
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Range, GeneralCategory)] -> GenCatDatabase
mkDatabase'

{-
  low: 0~23
  high: 24~47
  gc: 48~
 -}
packTuple :: (Word32, Word32, Word8) -> Word64
packTuple :: (Word32, Word32, Word8) -> Word64
packTuple (Word32
lo, Word32
high, Word8
gc) = Word32 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
lo Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
high' Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
gc'
  where
    high' :: Word64
high' = Word32 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
high Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
24
    gc' :: Word64
gc' = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
gc Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
48

unpackTuple :: Word64 -> (Word32, Word32, Word8)
unpackTuple :: Word64 -> (Word32, Word32, Word8)
unpackTuple Word64
payload = (Word32
lo, Word32
high, Word8
gc)
  where
    lo, high :: Word32
    lo :: Word32
lo = Word64 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
0xFF_FFFF Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word64
payload)
    high :: Word32
high = Word64 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
0xFF_FFFF Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. (Word64
payload Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
24))
    gc :: Word8
gc = Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
0xFF Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. (Word64
payload Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
48))

-- | Queries database. @query db@ should be a function equivalent to 'Data.Char.generalCategory',
--   but queries the argument database instead.
query :: GenCatDatabase -> Char -> GeneralCategory
query :: GenCatDatabase -> Char -> GeneralCategory
query (GenCatDatabase UArray Int Word64
arr) Char
ch = Int -> GeneralCategory
forall a. Enum a => Int -> a
toEnum (Int -> GeneralCategory)
-> (Word8 -> Int) -> Word8 -> GeneralCategory
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> GeneralCategory) -> Word8 -> GeneralCategory
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Word8
search Int
lo Int
hi
  where
    needle :: Word32
    needle :: Word32
needle = Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
ch
    (Int
lo, Int
hi) = UArray Int Word64 -> (Int, Int)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int Word64
arr
    search :: Int -> Int -> Word8
search Int
l Int
r =
      if Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
r
        then
          let mid :: Int
mid = (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
r) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2
              (Word32
rangeL, Word32
rangeR, Word8
val) = Word64 -> (Word32, Word32, Word8)
unpackTuple (UArray Int Word64
arr UArray Int Word64 -> Int -> Word64
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
A.! Int
mid)
           in if
                  | Word32
needle Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
rangeL -> Int -> Int -> Word8
search Int
l (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                  | Word32
needle Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
rangeR -> Int -> Int -> Word8
search (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
r
                  | Word32
rangeL Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
needle Bool -> Bool -> Bool
&& Word32
needle Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
rangeR -> Word8
val
                  | Bool
otherwise -> String -> Word8
forall a. HasCallStack => String -> a
error String
"unreachable"
        else Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum GeneralCategory
NotAssigned

-- | Verifies that all properties of a 'GenCatDatabase' holds,
--   and turns an 'A.UArray' into a database if all requirements are met.
checkDatabase :: A.UArray Int Word64 -> Either String GenCatDatabase
checkDatabase :: UArray Int Word64 -> Either String GenCatDatabase
checkDatabase UArray Int Word64
arr = do
  let xs :: [Word64]
xs = UArray Int Word64 -> [Word64]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
A.elems UArray Int Word64
arr
  [(Int, (Word32, Word32))]
ys <- [(Int, Word64)]
-> ((Int, Word64) -> Either String (Int, (Word32, Word32)))
-> Either String [(Int, (Word32, Word32))]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Int] -> [Word64] -> [(Int, Word64)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0 :: Int ..] [Word64]
xs) (((Int, Word64) -> Either String (Int, (Word32, Word32)))
 -> Either String [(Int, (Word32, Word32))])
-> ((Int, Word64) -> Either String (Int, (Word32, Word32)))
-> Either String [(Int, (Word32, Word32))]
forall a b. (a -> b) -> a -> b
$ \(Int
i, Word64
payload) -> do
    let dieIf :: Bool -> String -> Either String ()
dieIf Bool
tt String
msg = Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
tt (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$ String -> Either String ()
forall a b. a -> Either a b
Left (String -> Either String ()) -> String -> Either String ()
forall a b. (a -> b) -> a -> b
$ String
"failed at element " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
": " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
msg
        (Word32
lo, Word32
hi, Word8
val) = Word64 -> (Word32, Word32, Word8)
unpackTuple Word64
payload
    Bool -> String -> Either String ()
dieIf (Bool -> Bool
not (Word32
0 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
lo Bool -> Bool -> Bool
&& Word32
lo Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
0x10FFFF)) String
"low bound out of range"
    Bool -> String -> Either String ()
dieIf (Bool -> Bool
not (Word32
0 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
hi Bool -> Bool -> Bool
&& Word32
hi Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
0x10FFFF)) String
"high bound out of range"
    Bool -> String -> Either String ()
dieIf (Word32
lo Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
hi) String
"violates low <= high"
    do
      let gcLo :: Word8
gcLo = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum (GeneralCategory
forall a. Bounded a => a
minBound :: GeneralCategory)
          gcHi :: Word8
gcHi = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum (GeneralCategory
forall a. Bounded a => a
maxBound :: GeneralCategory)
          notAssn :: Word8
notAssn = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ GeneralCategory -> Int
forall a. Enum a => a -> Int
fromEnum GeneralCategory
NotAssigned
      Bool -> String -> Either String ()
dieIf (Bool -> Bool
not (Word8
gcLo Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
val Bool -> Bool -> Bool
&& Word8
val Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
gcHi)) String
"general category value out of range"
      Bool -> String -> Either String ()
dieIf (Word8
val Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
notAssn) String
"value should not be NotAssigned"
    (Int, (Word32, Word32)) -> Either String (Int, (Word32, Word32))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i, (Word32
lo, Word32
hi))
  [((Int, (Word32, Word32)), (Int, (Word32, Word32)))]
-> (((Int, (Word32, Word32)), (Int, (Word32, Word32)))
    -> Either String ())
-> Either String ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([(Int, (Word32, Word32))]
-> [(Int, (Word32, Word32))]
-> [((Int, (Word32, Word32)), (Int, (Word32, Word32)))]
forall a b. [a] -> [b] -> [(a, b)]
zip [(Int, (Word32, Word32))]
ys ([(Int, (Word32, Word32))] -> [(Int, (Word32, Word32))]
forall a. [a] -> [a]
tail [(Int, (Word32, Word32))]
ys)) ((((Int, (Word32, Word32)), (Int, (Word32, Word32)))
  -> Either String ())
 -> Either String ())
-> (((Int, (Word32, Word32)), (Int, (Word32, Word32)))
    -> Either String ())
-> Either String ()
forall a b. (a -> b) -> a -> b
$ \((Int
i, (Word32
_, Word32
a)), (Int
j, (Word32
b, Word32
_))) -> do
    Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word32
a Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word32
b) (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$
      String -> Either String ()
forall a b. a -> Either a b
Left (String -> Either String ()) -> String -> Either String ()
forall a b. (a -> b) -> a -> b
$ String
"failed when comparing element pair " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> (Int, Int) -> String
forall a. Show a => a -> String
show (Int
i, Int
j) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"not strictly ascending."
  GenCatDatabase -> Either String GenCatDatabase
forall (f :: * -> *) a. Applicative f => a -> f a
pure (UArray Int Word64 -> GenCatDatabase
GenCatDatabase UArray Int Word64
arr)

-- | Verifies that all properties of a 'GenCatDatabase' holds.
checkDatabase' :: GenCatDatabase -> Either String GenCatDatabase
checkDatabase' :: GenCatDatabase -> Either String GenCatDatabase
checkDatabase' = (UArray Int Word64 -> Either String GenCatDatabase)
-> GenCatDatabase -> Either String GenCatDatabase
coerce UArray Int Word64 -> Either String GenCatDatabase
checkDatabase