{-# language UnboxedTuples #-}

module FlatParse.Internal where

import FlatParse.Internal.UnboxedNumerics

import Data.Bits
import Data.Char
import Data.Foldable (foldl')
import Data.Map (Map)
import GHC.Exts
import GHC.ForeignPtr

import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC8
import qualified Data.ByteString.Internal as B
import qualified Data.Map.Strict as M

#if MIN_VERSION_base(4,15,0)
import GHC.Num.Integer (Integer(..))
#else
import GHC.Integer.GMP.Internals (Integer(..))
#endif

-- Compatibility
--------------------------------------------------------------------------------

shortInteger :: Int# -> Integer
#if MIN_VERSION_base(4,15,0)
shortInteger = IS
#else
shortInteger :: Int# -> Integer
shortInteger = Int# -> Integer
S#
#endif
{-# inline shortInteger #-}


-- Char predicates
--------------------------------------------------------------------------------

-- | @isDigit c = \'0\' <= c && c <= \'9\'@
isDigit :: Char -> Bool
isDigit :: Char -> Bool
isDigit Char
c = Char
'0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9'
{-# inline isDigit #-}

-- | @isLatinLetter c = (\'A\' <= c && c <= \'Z\') || (\'a\' <= c && c <= \'z\')@
isLatinLetter :: Char -> Bool
isLatinLetter :: Char -> Bool
isLatinLetter Char
c = (Char
'A' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z') Bool -> Bool -> Bool
|| (Char
'a' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z')
{-# inline isLatinLetter #-}

-- | @isGreekLetter c = (\'Α\' <= c && c <= \'Ω\') || (\'α\' <= c && c <= \'ω\')@
isGreekLetter :: Char -> Bool
isGreekLetter :: Char -> Bool
isGreekLetter Char
c = (Char
'Α' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Ω') Bool -> Bool -> Bool
|| (Char
'α' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'ω')
{-# inline isGreekLetter #-}

-- Int(eger) reading
--------------------------------------------------------------------------------

mul10 :: Int# -> Int#
mul10 :: Int# -> Int#
mul10 Int#
n = Int# -> Int# -> Int#
uncheckedIShiftL# Int#
n Int#
3# Int# -> Int# -> Int#
+# Int# -> Int# -> Int#
uncheckedIShiftL# Int#
n Int#
1#
{-# inline mul10 #-}

readInt' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
readInt' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
readInt' Int#
acc Addr#
s Addr#
end = case Addr# -> Addr# -> Int#
eqAddr# Addr#
s Addr#
end of
  Int#
1# -> (# Int#
acc, Addr#
s #)
  Int#
_  -> case Addr# -> Int# -> Word8#
indexWord8OffAddr''# Addr#
s Int#
0# of
    Word8#
w | Int#
1# <- Word8# -> Word8# -> Int#
leWord8# (Word# -> Word8#
wordToWord8''# Word#
0x30##) Word8#
w, Int#
1# <- Word8# -> Word8# -> Int#
leWord8# Word8#
w (Word# -> Word8#
wordToWord8''# Word#
0x39##) ->
      Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
readInt' (Int# -> Int#
mul10 Int#
acc Int# -> Int# -> Int#
+# (Word# -> Int#
word2Int# (Word8# -> Word#
word8ToWord''# Word8#
w) Int# -> Int# -> Int#
-# Int#
0x30#)) (Addr# -> Int# -> Addr#
plusAddr# Addr#
s Int#
1#) Addr#
end
    Word8#
_ -> (# Int#
acc, Addr#
s #)


-- | Read an `Int` from the input, as a non-empty digit sequence. The `Int` may
--   overflow in the result.
readInt :: Addr# -> Addr# -> (# (##) | (# Int#, Addr# #) #)
readInt :: Addr# -> Addr# -> (# (# #) | (# Int#, Addr# #) #)
readInt Addr#
eob Addr#
s = case Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
readInt' Int#
0# Addr#
s Addr#
eob of
  (# Int#
n, Addr#
s' #) | Int#
1# <- Addr# -> Addr# -> Int#
eqAddr# Addr#
s Addr#
s' -> (# (##) | #)
              | Bool
otherwise          -> (# | (# Int#
n, Addr#
s' #) #)
{-# inline readInt #-}

-- | Read an `Integer` from the input, as a non-empty digit sequence.
readInteger :: ForeignPtrContents -> Addr# -> Addr# -> (# (##) | (# Integer, Addr# #) #)
readInteger :: ForeignPtrContents
-> Addr# -> Addr# -> (# (# #) | (# Integer, Addr# #) #)
readInteger ForeignPtrContents
fp Addr#
eob Addr#
s = case Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
readInt' Int#
0# Addr#
s Addr#
eob of
  (# Int#
n, Addr#
s' #)
    | Int#
1# <- Addr# -> Addr# -> Int#
eqAddr# Addr#
s Addr#
s'            -> (# (##) | #)
    | Int#
1# <- Addr# -> Addr# -> Int#
minusAddr# Addr#
s' Addr#
s Int# -> Int# -> Int#
<=# Int#
18# -> (# | (# Int# -> Integer
shortInteger Int#
n, Addr#
s' #) #)
    | Bool
otherwise -> case ByteString -> Maybe (Integer, ByteString)
BC8.readInteger (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS (Addr# -> ForeignPtrContents -> ForeignPtr Word8
forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
s ForeignPtrContents
fp) Int
0 (Int# -> Int
I# (Addr# -> Addr# -> Int#
minusAddr# Addr#
s' Addr#
s))) of
        Maybe (Integer, ByteString)
Nothing     -> (# (##) | #)
        Just (Integer
i, ByteString
_) -> (# | (# Integer
i, Addr#
s' #) #)
{-# inline readInteger #-}


-- Positions and spans
--------------------------------------------------------------------------------

-- | Byte offset counted backwards from the end of the buffer.
newtype Pos = Pos Int deriving (Pos -> Pos -> Bool
(Pos -> Pos -> Bool) -> (Pos -> Pos -> Bool) -> Eq Pos
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Pos -> Pos -> Bool
$c/= :: Pos -> Pos -> Bool
== :: Pos -> Pos -> Bool
$c== :: Pos -> Pos -> Bool
Eq, Int -> Pos -> ShowS
[Pos] -> ShowS
Pos -> String
(Int -> Pos -> ShowS)
-> (Pos -> String) -> ([Pos] -> ShowS) -> Show Pos
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Pos] -> ShowS
$cshowList :: [Pos] -> ShowS
show :: Pos -> String
$cshow :: Pos -> String
showsPrec :: Int -> Pos -> ShowS
$cshowsPrec :: Int -> Pos -> ShowS
Show)

-- | A pair of positions.
data Span = Span !Pos !Pos deriving (Span -> Span -> Bool
(Span -> Span -> Bool) -> (Span -> Span -> Bool) -> Eq Span
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Span -> Span -> Bool
$c/= :: Span -> Span -> Bool
== :: Span -> Span -> Bool
$c== :: Span -> Span -> Bool
Eq, Int -> Span -> ShowS
[Span] -> ShowS
Span -> String
(Int -> Span -> ShowS)
-> (Span -> String) -> ([Span] -> ShowS) -> Show Span
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Span] -> ShowS
$cshowList :: [Span] -> ShowS
show :: Span -> String
$cshow :: Span -> String
showsPrec :: Int -> Span -> ShowS
$cshowsPrec :: Int -> Span -> ShowS
Show)

instance Ord Pos where
  Pos Int
p <= :: Pos -> Pos -> Bool
<= Pos Int
p' = Int
p' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
p
  Pos Int
p < :: Pos -> Pos -> Bool
<  Pos Int
p' = Int
p' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<  Int
p
  Pos Int
p > :: Pos -> Pos -> Bool
>  Pos Int
p' = Int
p' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>  Int
p
  Pos Int
p >= :: Pos -> Pos -> Bool
>= Pos Int
p' = Int
p' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
p
  {-# inline (<=) #-}
  {-# inline (<) #-}
  {-# inline (>) #-}
  {-# inline (>=) #-}

addrToPos# :: Addr# -> Addr# -> Pos
addrToPos# :: Addr# -> Addr# -> Pos
addrToPos# Addr#
eob Addr#
s = Int -> Pos
Pos (Int# -> Int
I# (Addr# -> Addr# -> Int#
minusAddr# Addr#
eob Addr#
s))
{-# inline addrToPos# #-}

posToAddr# :: Addr# -> Pos -> Addr#
posToAddr# :: Addr# -> Pos -> Addr#
posToAddr# Addr#
eob (Pos (I# Int#
n)) = Int# -> Addr#
unsafeCoerce# (Addr# -> Addr# -> Int#
minusAddr# Addr#
eob (Int# -> Addr#
unsafeCoerce# Int#
n))
{-# inline posToAddr# #-}

-- | Slice into a `B.ByteString` using a `Span`. The result is invalid if the `Span`
--   is not a valid slice of the first argument.
unsafeSlice :: B.ByteString -> Span -> B.ByteString
unsafeSlice :: ByteString -> Span -> ByteString
unsafeSlice (B.PS (ForeignPtr Addr#
addr ForeignPtrContents
fp) (I# Int#
start) (I# Int#
len))
            (Span (Pos (I# Int#
o1)) (Pos (I# Int#
o2))) =
  let end :: Addr#
end = Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
start Addr# -> Int# -> Addr#
`plusAddr#` Int#
len
  in ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS (Addr# -> ForeignPtrContents -> ForeignPtr Word8
forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr (Addr# -> Int# -> Addr#
plusAddr# Addr#
end (Int# -> Int#
negateInt# Int#
o1)) ForeignPtrContents
fp) (Int# -> Int
I# Int#
0#) (Int# -> Int
I# (Int#
o1 Int# -> Int# -> Int#
-# Int#
o2))
{-# inline unsafeSlice #-}

-- UTF conversions
--------------------------------------------------------------------------------

-- | Convert a `String` to an UTF-8-coded `B.ByteString`.
packUTF8 :: String -> B.ByteString
packUTF8 :: String -> ByteString
packUTF8 String
str = [Word8] -> ByteString
B.pack ([Word8] -> ByteString) -> [Word8] -> ByteString
forall a b. (a -> b) -> a -> b
$ do
  Char
c <- String
str
  Word
w <- Char -> [Word]
charToBytes Char
c
  Word8 -> [Word8]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
w)

charToBytes :: Char -> [Word]
charToBytes :: Char -> [Word]
charToBytes Char
c'
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7f     = [Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c]
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7ff    = [Word
0xc0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
y, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
z]
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff   = [Word
0xe0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
x, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
y, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
z]
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10ffff = [Word
0xf0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
w, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
x, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
y, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
z]
    | Bool
otherwise = String -> [Word]
forall a. HasCallStack => String -> a
error String
"Not a valid Unicode code point"
  where
    c :: Int
c = Char -> Int
ord Char
c'
    z :: Word
z = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
c                 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    y :: Word
y = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftR Int
c Int
6  Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    x :: Word
x = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftR Int
c Int
12 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    w :: Word
w = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftR Int
c Int
18 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x7)

strToBytes :: String -> [Word]
strToBytes :: String -> [Word]
strToBytes = (Char -> [Word]) -> String -> [Word]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Word]
charToBytes
{-# inline strToBytes #-}

packBytes :: [Word] -> Word
packBytes :: [Word] -> Word
packBytes = (Word, Int) -> Word
forall a b. (a, b) -> a
fst ((Word, Int) -> Word) -> ([Word] -> (Word, Int)) -> [Word] -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, Int) -> Word -> (Word, Int))
-> (Word, Int) -> [Word] -> (Word, Int)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Word, Int) -> Word -> (Word, Int)
forall a a.
(Bits a, Integral a, Num a) =>
(a, Int) -> a -> (a, Int)
go (Word
0, Int
0) where
  go :: (a, Int) -> a -> (a, Int)
go (a
acc, Int
shift) a
w | Int
shift Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
64 = String -> (a, Int)
forall a. HasCallStack => String -> a
error String
"packWords: too many bytes"
  go (a
acc, Int
shift) a
w = (a -> Int -> a
forall a. Bits a => a -> Int -> a
unsafeShiftL (a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w) Int
shift a -> a -> a
forall a. Bits a => a -> a -> a
.|. a
acc, Int
shiftInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
8)

splitBytes :: [Word] -> ([Word], [Word])
splitBytes :: [Word] -> ([Word], [Word])
splitBytes [Word]
ws = case Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
quotRem ([Word] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word]
ws) Int
8 of
  (Int
0, Int
_) -> ([Word]
ws, [])
  (Int
_, Int
r) -> ([Word]
as, [Word] -> [Word]
chunk8s [Word]
bs) where
              ([Word]
as, [Word]
bs) = Int -> [Word] -> ([Word], [Word])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
r [Word]
ws
              chunk8s :: [Word] -> [Word]
chunk8s [] = []
              chunk8s [Word]
ws = let ([Word]
as, [Word]
bs) = Int -> [Word] -> ([Word], [Word])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
8 [Word]
ws in
                           [Word] -> Word
packBytes [Word]
as Word -> [Word] -> [Word]
forall k1. k1 -> [k1] -> [k1]
: [Word] -> [Word]
chunk8s [Word]
bs

derefChar8# :: Addr# -> Char#
derefChar8# :: Addr# -> Char#
derefChar8# Addr#
addr = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr Int#
0#
{-# inline derefChar8# #-}

-- Switch trie compilation
--------------------------------------------------------------------------------

data Trie a = Branch !a !(Map Word (Trie a))
  deriving Int -> Trie a -> ShowS
[Trie a] -> ShowS
Trie a -> String
(Int -> Trie a -> ShowS)
-> (Trie a -> String) -> ([Trie a] -> ShowS) -> Show (Trie a)
forall a. Show a => Int -> Trie a -> ShowS
forall a. Show a => [Trie a] -> ShowS
forall a. Show a => Trie a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Trie a] -> ShowS
$cshowList :: forall a. Show a => [Trie a] -> ShowS
show :: Trie a -> String
$cshow :: forall a. Show a => Trie a -> String
showsPrec :: Int -> Trie a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Trie a -> ShowS
Show

type Rule = Maybe Int

nilTrie :: Trie Rule
nilTrie :: Trie Rule
nilTrie = Rule -> Map Word (Trie Rule) -> Trie Rule
forall a. a -> Map Word (Trie a) -> Trie a
Branch Rule
forall k1. Maybe k1
Nothing Map Word (Trie Rule)
forall a. Monoid a => a
mempty

updRule :: Int -> Maybe Int -> Maybe Int
updRule :: Int -> Rule -> Rule
updRule Int
rule = Int -> Rule
forall k1. k1 -> Maybe k1
Just (Int -> Rule) -> (Rule -> Int) -> Rule -> Rule
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> (Int -> Int) -> Rule -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
rule (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rule)

insert :: Int -> [Word] -> Trie Rule -> Trie Rule
insert :: Int -> [Word] -> Trie Rule -> Trie Rule
insert Int
rule = [Word] -> Trie Rule -> Trie Rule
go where
  go :: [Word] -> Trie Rule -> Trie Rule
go [] (Branch Rule
rule' Map Word (Trie Rule)
ts) =
    Rule -> Map Word (Trie Rule) -> Trie Rule
forall a. a -> Map Word (Trie a) -> Trie a
Branch (Int -> Rule -> Rule
updRule Int
rule Rule
rule') Map Word (Trie Rule)
ts
  go (Word
c:[Word]
cs) (Branch Rule
rule' Map Word (Trie Rule)
ts) =
    Rule -> Map Word (Trie Rule) -> Trie Rule
forall a. a -> Map Word (Trie a) -> Trie a
Branch Rule
rule' ((Maybe (Trie Rule) -> Maybe (Trie Rule))
-> Word -> Map Word (Trie Rule) -> Map Word (Trie Rule)
forall k a.
Ord k =>
(Maybe a -> Maybe a) -> k -> Map k a -> Map k a
M.alter (Trie Rule -> Maybe (Trie Rule)
forall k1. k1 -> Maybe k1
Just (Trie Rule -> Maybe (Trie Rule))
-> (Maybe (Trie Rule) -> Trie Rule)
-> Maybe (Trie Rule)
-> Maybe (Trie Rule)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Trie Rule
-> (Trie Rule -> Trie Rule) -> Maybe (Trie Rule) -> Trie Rule
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ([Word] -> Trie Rule -> Trie Rule
go [Word]
cs Trie Rule
nilTrie) ([Word] -> Trie Rule -> Trie Rule
go [Word]
cs)) Word
c Map Word (Trie Rule)
ts)

listToTrie :: [(Int, String)] -> Trie Rule
listToTrie :: [(Int, String)] -> Trie Rule
listToTrie = (Trie Rule -> (Int, String) -> Trie Rule)
-> Trie Rule -> [(Int, String)] -> Trie Rule
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Trie Rule
t (!Int
r, !String
s) -> Int -> [Word] -> Trie Rule -> Trie Rule
insert Int
r (Char -> [Word]
charToBytes (Char -> [Word]) -> String -> [Word]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< String
s) Trie Rule
t) Trie Rule
nilTrie

-- | Decorate a trie with the minimum lengths of non-empty paths. This
--   is used later to place `ensureBytes#`.
mindepths :: Trie Rule -> Trie (Rule, Int)
mindepths :: Trie Rule -> Trie (Rule, Int)
mindepths (Branch Rule
rule Map Word (Trie Rule)
ts) =
  if Map Word (Trie Rule) -> Bool
forall k a. Map k a -> Bool
M.null Map Word (Trie Rule)
ts then
    (Rule, Int) -> Map Word (Trie (Rule, Int)) -> Trie (Rule, Int)
forall a. a -> Map Word (Trie a) -> Trie a
Branch (Rule
rule, Int
0) Map Word (Trie (Rule, Int))
forall a. Monoid a => a
mempty
  else
    let !ts' :: Map Word (Trie (Rule, Int))
ts' = (Trie Rule -> Trie (Rule, Int))
-> Map Word (Trie Rule) -> Map Word (Trie (Rule, Int))
forall a b k. (a -> b) -> Map k a -> Map k b
M.map Trie Rule -> Trie (Rule, Int)
mindepths Map Word (Trie Rule)
ts in
    (Rule, Int) -> Map Word (Trie (Rule, Int)) -> Trie (Rule, Int)
forall a. a -> Map Word (Trie a) -> Trie a
Branch (
      Rule
rule,
      Map Word Int -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ((Trie (Rule, Int) -> Int)
-> Map Word (Trie (Rule, Int)) -> Map Word Int
forall a b k. (a -> b) -> Map k a -> Map k b
M.map (\(Branch (Rule
rule,Int
d) Map Word (Trie (Rule, Int))
_) -> Int -> (Int -> Int) -> Rule -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (\Int
_ -> Int
1) Rule
rule) Map Word (Trie (Rule, Int))
ts'))
      Map Word (Trie (Rule, Int))
ts'

data Trie' a
  = Branch' !a !(Map Word (Trie' a))
  | Path !a ![Word] !(Trie' a)
  deriving Int -> Trie' a -> ShowS
[Trie' a] -> ShowS
Trie' a -> String
(Int -> Trie' a -> ShowS)
-> (Trie' a -> String) -> ([Trie' a] -> ShowS) -> Show (Trie' a)
forall a. Show a => Int -> Trie' a -> ShowS
forall a. Show a => [Trie' a] -> ShowS
forall a. Show a => Trie' a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Trie' a] -> ShowS
$cshowList :: forall a. Show a => [Trie' a] -> ShowS
show :: Trie' a -> String
$cshow :: forall a. Show a => Trie' a -> String
showsPrec :: Int -> Trie' a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Trie' a -> ShowS
Show

-- | Compress linear paths.
pathify :: Trie (Rule, Int) -> Trie' (Rule, Int)
pathify :: Trie (Rule, Int) -> Trie' (Rule, Int)
pathify (Branch (Rule, Int)
a Map Word (Trie (Rule, Int))
ts) = case Map Word (Trie (Rule, Int)) -> [(Word, Trie (Rule, Int))]
forall k a. Map k a -> [(k, a)]
M.toList Map Word (Trie (Rule, Int))
ts of
  [] -> (Rule, Int) -> Map Word (Trie' (Rule, Int)) -> Trie' (Rule, Int)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule, Int)
a Map Word (Trie' (Rule, Int))
forall a. Monoid a => a
mempty
  [(Word
w, Trie (Rule, Int)
t)] -> case Trie (Rule, Int) -> Trie' (Rule, Int)
pathify Trie (Rule, Int)
t of
           Path (Rule
Nothing, Int
_) [Word]
ws Trie' (Rule, Int)
t -> (Rule, Int) -> [Word] -> Trie' (Rule, Int) -> Trie' (Rule, Int)
forall a. a -> [Word] -> Trie' a -> Trie' a
Path (Rule, Int)
a (Word
wWord -> [Word] -> [Word]
forall k1. k1 -> [k1] -> [k1]
:[Word]
ws) Trie' (Rule, Int)
t
           Trie' (Rule, Int)
t                      -> (Rule, Int) -> [Word] -> Trie' (Rule, Int) -> Trie' (Rule, Int)
forall a. a -> [Word] -> Trie' a -> Trie' a
Path (Rule, Int)
a [Word
w] Trie' (Rule, Int)
t
  [(Word, Trie (Rule, Int))]
_   -> (Rule, Int) -> Map Word (Trie' (Rule, Int)) -> Trie' (Rule, Int)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule, Int)
a ((Trie (Rule, Int) -> Trie' (Rule, Int))
-> Map Word (Trie (Rule, Int)) -> Map Word (Trie' (Rule, Int))
forall a b k. (a -> b) -> Map k a -> Map k b
M.map Trie (Rule, Int) -> Trie' (Rule, Int)
pathify Map Word (Trie (Rule, Int))
ts)

-- | Compute where to fall back after we exhausted a branch. If the branch is
--   empty, that means we've succeded at reading and we jump to the rhs rule.
fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
fallbacks = Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
go Rule
forall k1. Maybe k1
Nothing Int
0  where
  go :: Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
  go :: Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
go !Rule
rule !Int
n (Branch' (Rule
rule', Int
d) Map Word (Trie' (Rule, Int))
ts)
    | Map Word (Trie' (Rule, Int)) -> Bool
forall k a. Map k a -> Bool
M.null Map Word (Trie' (Rule, Int))
ts        = (Rule, Int, Int)
-> Map Word (Trie' (Rule, Int, Int)) -> Trie' (Rule, Int, Int)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule
rule', Int
0, Int
d) Map Word (Trie' (Rule, Int, Int))
forall a. Monoid a => a
mempty
    | Rule
Nothing <- Rule
rule' = (Rule, Int, Int)
-> Map Word (Trie' (Rule, Int, Int)) -> Trie' (Rule, Int, Int)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule
rule, Int
n, Int
d) (Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
go Rule
rule (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Trie' (Rule, Int) -> Trie' (Rule, Int, Int))
-> Map Word (Trie' (Rule, Int))
-> Map Word (Trie' (Rule, Int, Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map Word (Trie' (Rule, Int))
ts)
    | Bool
otherwise        = (Rule, Int, Int)
-> Map Word (Trie' (Rule, Int, Int)) -> Trie' (Rule, Int, Int)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule
rule', Int
0, Int
d) (Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
go Rule
rule' Int
1     (Trie' (Rule, Int) -> Trie' (Rule, Int, Int))
-> Map Word (Trie' (Rule, Int))
-> Map Word (Trie' (Rule, Int, Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map Word (Trie' (Rule, Int))
ts)
  go Rule
rule Int
n (Path (Rule
rule', Int
d) [Word]
ws Trie' (Rule, Int)
t)
    | Rule
Nothing <- Rule
rule' = (Rule, Int, Int)
-> [Word] -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Int)
forall a. a -> [Word] -> Trie' a -> Trie' a
Path (Rule
rule, Int
n, Int
d)  [Word]
ws (Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
go Rule
rule (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Word] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word]
ws) Trie' (Rule, Int)
t)
    | Bool
otherwise        = (Rule, Int, Int)
-> [Word] -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Int)
forall a. a -> [Word] -> Trie' a -> Trie' a
Path (Rule
rule', Int
0, Int
d) [Word]
ws (Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
go Rule
rule' ([Word] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word]
ws) Trie' (Rule, Int)
t)

-- | Decorate with `ensureBytes#` invocations, represented as
--   `Maybe Int`.
ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
ensureBytes = Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
go Int
0 where
  go :: Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
  go :: Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
go !Int
res = \case
    Branch' (Rule
r, Int
n, Int
d) Map Word (Trie' (Rule, Int, Int))
ts
      | Map Word (Trie' (Rule, Int, Int)) -> Bool
forall k a. Map k a -> Bool
M.null Map Word (Trie' (Rule, Int, Int))
ts -> (Rule, Int, Rule)
-> Map Word (Trie' (Rule, Int, Rule)) -> Trie' (Rule, Int, Rule)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule
r, Int
n, Rule
forall k1. Maybe k1
Nothing) Map Word (Trie' (Rule, Int, Rule))
forall a. Monoid a => a
mempty
      | Int
res Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
1   -> (Rule, Int, Rule)
-> Map Word (Trie' (Rule, Int, Rule)) -> Trie' (Rule, Int, Rule)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule
r, Int
n, Int -> Rule
forall k1. k1 -> Maybe k1
Just Int
d ) (Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
go (Int
d   Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule))
-> Map Word (Trie' (Rule, Int, Int))
-> Map Word (Trie' (Rule, Int, Rule))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map Word (Trie' (Rule, Int, Int))
ts)
      | Bool
otherwise -> (Rule, Int, Rule)
-> Map Word (Trie' (Rule, Int, Rule)) -> Trie' (Rule, Int, Rule)
forall a. a -> Map Word (Trie' a) -> Trie' a
Branch' (Rule
r, Int
n, Rule
forall k1. Maybe k1
Nothing) (Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
go (Int
res Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule))
-> Map Word (Trie' (Rule, Int, Int))
-> Map Word (Trie' (Rule, Int, Rule))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map Word (Trie' (Rule, Int, Int))
ts)
    Path (Rule
r, Int
n, Int
d) [Word]
ws Trie' (Rule, Int, Int)
t -> case [Word] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word]
ws of
      Int
l | Int
res Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l   -> (Rule, Int, Rule)
-> [Word] -> Trie' (Rule, Int, Rule) -> Trie' (Rule, Int, Rule)
forall a. a -> [Word] -> Trie' a -> Trie' a
Path (Rule
r, Int
n, Int -> Rule
forall k1. k1 -> Maybe k1
Just (Int -> Rule) -> Int -> Rule
forall a b. (a -> b) -> a -> b
$! Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
res) [Word]
ws (Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
go (Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l)   Trie' (Rule, Int, Int)
t)
        | Bool
otherwise -> (Rule, Int, Rule)
-> [Word] -> Trie' (Rule, Int, Rule) -> Trie' (Rule, Int, Rule)
forall a. a -> [Word] -> Trie' a -> Trie' a
Path (Rule
r, Int
n, Rule
forall k1. Maybe k1
Nothing        ) [Word]
ws (Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
go (Int
res Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) Trie' (Rule, Int, Int)
t)

compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Maybe Int)
compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Rule)
compileTrie = Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule)
ensureBytes (Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Rule))
-> ([(Int, String)] -> Trie' (Rule, Int, Int))
-> [(Int, String)]
-> Trie' (Rule, Int, Rule)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
fallbacks (Trie' (Rule, Int) -> Trie' (Rule, Int, Int))
-> ([(Int, String)] -> Trie' (Rule, Int))
-> [(Int, String)]
-> Trie' (Rule, Int, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Trie (Rule, Int) -> Trie' (Rule, Int)
pathify (Trie (Rule, Int) -> Trie' (Rule, Int))
-> ([(Int, String)] -> Trie (Rule, Int))
-> [(Int, String)]
-> Trie' (Rule, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Trie Rule -> Trie (Rule, Int)
mindepths (Trie Rule -> Trie (Rule, Int))
-> ([(Int, String)] -> Trie Rule)
-> [(Int, String)]
-> Trie (Rule, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, String)] -> Trie Rule
listToTrie