{-# LANGUAGE TemplateHaskellQuotes #-}
module Sasha.TTH (
    -- * SaTTH, staged Sasha the lexer.
    SaTTH,
    satth,
    -- * ERE specification
    ERE,
    empty,
    eps,
    char,
    charRange,
    utf8Char,
    anyChar,
    anyUtf8Char,
    appends,
    unions,
    intersections,
    star,
    plus,
    string,
    utf8String,
    complement,
    satisfy,
    digit,
) where

import Language.Haskell.TH (Code, CodeQ, Exp, Q)

import Control.Monad               (forM)
import Data.List                   (sortOn)
import Data.Map                    (Map)
import Data.Maybe                  (listToMaybe)
import Data.Ord                    (Down (..))
import Data.Word                   (Word8)
import Language.Haskell.TTH.LetRec (letrecE)

import qualified Data.ByteString     as BS
import qualified Data.Map.Strict     as Map
import qualified Language.Haskell.TH as TH

import Sasha.Internal.ERE
import Sasha.Internal.Word8Set (Word8Set)

import qualified Sasha.Internal.Word8Set as W8S

-- | Lexer grammar specification: tag codes and regular expressions.
type SaTTH tag = [(Code Q tag, ERE)]

-- | Generate a scanner code.
satth :: forall tag. SaTTH tag -> Code Q (BS.ByteString -> Maybe (tag, BS.ByteString, BS.ByteString))
satth :: forall tag.
SaTTH tag
-> Code Q (ByteString -> Maybe (tag, ByteString, ByteString))
satth SaTTH tag
grammar0 = forall (q :: * -> *) tag r a.
(Ord tag, Quote q, MonadFix q) =>
(tag -> String)
-> (forall (m :: * -> *).
    Monad m =>
    (tag -> m (Code q a)) -> tag -> m (Code q a))
-> (forall (m :: * -> *).
    Monad m =>
    (tag -> m (Code q a)) -> m (Code q r))
-> Code q r
letrecE
    (\SaTTH' tag
_ -> String
"state")
    forall (m :: * -> *).
Monad m =>
(SaTTH' tag -> m (Code Q (R tag)))
-> SaTTH' tag -> m (Code Q (R tag))
trans
    forall (m :: * -> *).
Monad m =>
(SaTTH' tag -> m (Code Q (R tag)))
-> m (Code Q (ByteString -> Maybe (tag, ByteString, ByteString)))
start
  where
    grammar0' :: SaTTH' tag
    grammar0' :: SaTTH' tag
grammar0' =
        [ forall tag. Int -> Code Q tag -> ERE -> S tag
S Int
i Code Q tag
t ERE
ere
        | (Int
i, (Code Q tag
t, ERE
ere)) <- forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] SaTTH tag
grammar0
        ]

    start :: Monad m => (SaTTH' tag -> m (Code Q (R tag))) -> m (Code Q (BS.ByteString -> Maybe (tag, BS.ByteString, BS.ByteString)))
    start :: forall (m :: * -> *).
Monad m =>
(SaTTH' tag -> m (Code Q (R tag)))
-> m (Code Q (ByteString -> Maybe (tag, ByteString, ByteString)))
start SaTTH' tag -> m (Code Q (R tag))
rec = do
        Code Q (R tag)
startCode <- SaTTH' tag -> m (Code Q (R tag))
rec SaTTH' tag
grammar0'
        -- we assume that none of the tokens accepts an empty string,
        -- so we start without specifying last match.
        forall (m :: * -> *) a. Monad m => a -> m a
return [|| \input -> case $$startCode Nothing (0 :: Int) input of
            Nothing       -> Nothing
            Just (tag, i) -> case BS.splitAt i input of
                (pfx, sfx) -> Just (tag, pfx, sfx)
            ||]

    trans :: Monad m => (SaTTH' tag -> m (Code Q (R tag))) -> SaTTH' tag -> m (Code Q (R tag))
    trans :: forall (m :: * -> *).
Monad m =>
(SaTTH' tag -> m (Code Q (R tag)))
-> SaTTH' tag -> m (Code Q (R tag))
trans SaTTH' tag -> m (Code Q (R tag))
_rec SaTTH' tag
grammar
        | forall tag. SaTTH' tag -> Bool
emptySashaTTH SaTTH' tag
grammar
        = forall (m :: * -> *) a. Monad m => a -> m a
return [|| \ !acc _ _ -> acc ||]

    trans  SaTTH' tag -> m (Code Q (R tag))
rec SaTTH' tag
grammar = do
        -- if the input is not empty?
        let grammarM1 :: Map (SaTTH' tag) Word8Set
            grammarM1 :: Map (SaTTH' tag) Word8Set
grammarM1 = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith Word8Set -> Word8Set -> Word8Set
W8S.union
                [ (forall tag. Word8 -> SaTTH' tag -> SaTTH' tag
derivativeSaTTH Word8
c SaTTH' tag
grammar, Word8 -> Word8Set
W8S.singleton Word8
c)
                | Word8
c <- [ forall a. Bounded a => a
minBound .. forall a. Bounded a => a
maxBound ]
                ]

            -- non-empty map
            grammarM :: [(Word8Set, SaTTH' tag, M tag)]
            grammarM :: [(Word8Set, SaTTH' tag, M tag)]
grammarM =
                [ (Word8Set
c, SaTTH' tag
grammar', forall tag. SaTTH' tag -> M tag
makeM SaTTH' tag
grammar')
                | (SaTTH' tag
grammar', Word8Set
c) <- forall k a. Map k a -> [(k, a)]
Map.toList Map (SaTTH' tag) Word8Set
grammarM1
                ]

        -- next states
        [(Word8Set, Next (Code Q (R tag)), M tag)]
nexts0 <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Word8Set, SaTTH' tag, M tag)]
grammarM forall a b. (a -> b) -> a -> b
$ \(Word8Set
ws, SaTTH' tag
grammar', M tag
modify) -> do
            if forall tag. SaTTH' tag -> Bool
emptySashaTTH SaTTH' tag
grammar' then forall (m :: * -> *) a. Monad m => a -> m a
return (Word8Set
ws, forall a. Next a
NextEmpty, M tag
modify)
            else if forall tag. SaTTH' tag -> Bool
epsSashaTTH SaTTH' tag
grammar' then forall (m :: * -> *) a. Monad m => a -> m a
return (Word8Set
ws, forall a. Next a
NextEps, M tag
modify)
            else do
                Code Q (R tag)
next <- SaTTH' tag -> m (Code Q (R tag))
rec SaTTH' tag
grammar'
                forall (m :: * -> *) a. Monad m => a -> m a
return (Word8Set
ws, forall a. a -> Next a
Next Code Q (R tag)
next, M tag
modify)

        -- sort next states
        let nexts :: [(Word8Set, Next (Code Q (R tag)), M tag)]
            nexts :: [(Word8Set, Next (Code Q (R tag)), M tag)]
nexts = forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (\(Word8Set
ws, Next (Code Q (R tag))
_, M tag
_) -> Word8Set -> Meas
meas Word8Set
ws) [(Word8Set, Next (Code Q (R tag)), M tag)]
nexts0

        -- transition case
        let caseAnalysis
                :: Code Q (Maybe (tag, Int))
                -> Code Q Int
                -> Code Q Word8
                -> Code Q BS.ByteString
                -> Code Q (Maybe (tag, Int))
            caseAnalysis :: Code Q (Maybe (tag, Int))
-> Code Q Int
-> Code Q Word8
-> Code Q ByteString
-> Code Q (Maybe (tag, Int))
caseAnalysis Code Q (Maybe (tag, Int))
acc Code Q Int
pfx Code Q Word8
c Code Q ByteString
sfx = forall a r. Code Q a -> [(Code Q Bool, CodeQ r)] -> CodeQ r
caseTTH [|| () ||]
                [ (Code Q Word8 -> Word8Set -> Code Q Bool
W8S.memberCode Code Q Word8
c Word8Set
ws, Code Q (Maybe (tag, Int))
body)

                | (Word8Set
ws, Next (Code Q (R tag))
mnext, M tag
modify) <- [(Word8Set, Next (Code Q (R tag)), M tag)]
nexts
                , let body :: Code Q (Maybe (tag, Int))
body = case Next (Code Q (R tag))
mnext of
                        Next (Code Q (R tag))
NextEmpty -> Code Q (Maybe (tag, Int))
acc
                        Next (Code Q (R tag))
NextEps   -> M tag
modify Code Q (Maybe (tag, Int))
acc [|| $$pfx + 1 ||]
                        Next Code Q (R tag)
next -> [|| let !pfx' = $$pfx + 1 in $$next $$(modify acc [|| pfx' ||]) pfx' $$sfx ||]
                ]

        let debugWarns :: Q ()
            debugWarns :: Q ()
debugWarns = forall (m :: * -> *) a. Monad m => a -> m a
return ()

        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a b. Monad m => m a -> Code m b -> Code m b
TH.bindCode_ Q ()
debugWarns [|| \ !acc !_pfx !input -> case BS.uncons input of
            Nothing        -> acc
            Just (c, _sfx) -> $$(caseAnalysis [|| acc ||] [|| _pfx ||] [|| c ||] [|| _sfx ||])
            ||]

-------------------------------------------------------------------------------
-- Sorting transitions
-------------------------------------------------------------------------------

data Meas
    = MeasLite Word8Set
    | MeasCont !(Down Int) !Word8Set
    | MeasSize !Int !Word8Set
  deriving (Meas -> Meas -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Meas -> Meas -> Bool
$c/= :: Meas -> Meas -> Bool
== :: Meas -> Meas -> Bool
$c== :: Meas -> Meas -> Bool
Eq, Eq Meas
Meas -> Meas -> Bool
Meas -> Meas -> Ordering
Meas -> Meas -> Meas
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Meas -> Meas -> Meas
$cmin :: Meas -> Meas -> Meas
max :: Meas -> Meas -> Meas
$cmax :: Meas -> Meas -> Meas
>= :: Meas -> Meas -> Bool
$c>= :: Meas -> Meas -> Bool
> :: Meas -> Meas -> Bool
$c> :: Meas -> Meas -> Bool
<= :: Meas -> Meas -> Bool
$c<= :: Meas -> Meas -> Bool
< :: Meas -> Meas -> Bool
$c< :: Meas -> Meas -> Bool
compare :: Meas -> Meas -> Ordering
$ccompare :: Meas -> Meas -> Ordering
Ord)

meas :: Word8Set -> Meas
meas :: Word8Set -> Meas
meas Word8Set
ws
    | Word8Set -> Int
W8S.size Word8Set
ws forall a. Ord a => a -> a -> Bool
< Int
2      = Word8Set -> Meas
MeasLite Word8Set
ws
    | Word8Set -> Bool
W8S.isSingleRange Word8Set
ws = Down Int -> Word8Set -> Meas
MeasCont (forall a. a -> Down a
Down (Word8Set -> Int
W8S.size Word8Set
ws)) Word8Set
ws
    | Bool
otherwise            = Int -> Word8Set -> Meas
MeasSize (Word8Set -> Int
W8S.size Word8Set
ws) Word8Set
ws

-------------------------------------------------------------------------------
-- Aliases
-------------------------------------------------------------------------------

-- | Inner scanner function.
--
-- * previous match
-- * position
-- * input
--
type R tag = Maybe (tag, Int) -> Int -> BS.ByteString -> Maybe (tag, Int)

-- | Last accept modifier.
type M tag = Code Q (Maybe (tag, Int)) -> CodeQ Int -> CodeQ (Maybe (tag, Int))

makeM :: forall tag. SaTTH' tag -> M tag
makeM :: forall tag. SaTTH' tag -> M tag
makeM SaTTH' tag
grammar Code Q (Maybe (tag, Int))
acc Code Q Int
pfx = case Maybe (Code Q tag)
acc' of
    Maybe (Code Q tag)
Nothing  -> Code Q (Maybe (tag, Int))
acc
    Just Code Q tag
tag -> [|| Just ($$tag, $$pfx) ||]
  where
    acc' :: Maybe (Code Q tag)
    acc' :: Maybe (Code Q tag)
acc' = forall a. [a] -> Maybe a
listToMaybe
        [ Code Q tag
tag
        | S Int
_ Code Q tag
tag ERE
ere <- SaTTH' tag
grammar
        , ERE -> Bool
nullable ERE
ere
        ]

data Next a
    = NextEmpty
    | NextEps
    | Next a

-------------------------------------------------------------------------------
-- TTH extras
-------------------------------------------------------------------------------

caseTTH :: Code Q a -> [(Code Q Bool, CodeQ r)] -> Code Q r
caseTTH :: forall a r. Code Q a -> [(Code Q Bool, CodeQ r)] -> CodeQ r
caseTTH Code Q a
c [(Code Q Bool, CodeQ r)]
guards = forall a (m :: * -> *). Quote m => m Exp -> Code m a
TH.unsafeCodeCoerce forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *). Quote m => m Exp -> [m Match] -> m Exp
TH.caseE (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q a
c)
    [ forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Match
TH.match forall (m :: * -> *). Quote m => m Pat
TH.wildP (forall (m :: * -> *). Quote m => [m (Guard, Exp)] -> m Body
TH.guardedB (forall r. [(Code Q Bool, Code Q r)] -> [Q (Guard, Exp)]
go [(Code Q Bool, CodeQ r)]
guards))  [] ]
  where
    go :: [(Code Q Bool, Code Q r)] -> [Q (TH.Guard, Exp)]
    go :: forall r. [(Code Q Bool, Code Q r)] -> [Q (Guard, Exp)]
go []          = []
    go [(Code Q Bool
_,Code Q r
b)]     = [forall (m :: * -> *). Quote m => m Exp -> m Exp -> m (Guard, Exp)
TH.normalGE [| otherwise |] (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q r
b)]
    go ((Code Q Bool
g,Code Q r
b):[(Code Q Bool, Code Q r)]
gbs) = forall (m :: * -> *). Quote m => m Exp -> m Exp -> m (Guard, Exp)
TH.normalGE (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q Bool
g) (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q r
b) forall a. a -> [a] -> [a]
: forall r. [(Code Q Bool, Code Q r)] -> [Q (Guard, Exp)]
go [(Code Q Bool, Code Q r)]
gbs

-------------------------------------------------------------------------------
-- State
-------------------------------------------------------------------------------

-- | We give each tag an integer, so we can order them.
data S tag = S !Int !(Code Q tag) !ERE

instance Show (S tag) where
    show :: S tag -> String
show (S Int
i Code Q tag
_ ERE
ere) = forall a. Show a => a -> String
show (Int
i, ERE
ere)

instance Eq (S tag) where
    S Int
i Code Q tag
_ ERE
ere == :: S tag -> S tag -> Bool
== S Int
i' Code Q tag
_ ERE
ere' = (Int
i, ERE
ere) forall a. Eq a => a -> a -> Bool
== (Int
i', ERE
ere')

instance Ord (S tag) where
    compare :: S tag -> S tag -> Ordering
compare (S Int
i Code Q tag
_ ERE
ere) (S Int
i' Code Q tag
_ ERE
ere') = forall a. Ord a => a -> a -> Ordering
compare (Int
i, ERE
ere) (Int
i', ERE
ere')

type SaTTH' tag = [S tag]

-------------------------------------------------------------------------------
-- Derivative
-------------------------------------------------------------------------------

derivativeSaTTH :: Word8 -> SaTTH' tag -> SaTTH' tag
derivativeSaTTH :: forall tag. Word8 -> SaTTH' tag -> SaTTH' tag
derivativeSaTTH Word8
c SaTTH' tag
ts =
    [ forall tag. Int -> Code Q tag -> ERE -> S tag
S Int
i Code Q tag
code ERE
ere''
    | S Int
i Code Q tag
code ERE
ere <- SaTTH' tag
ts
    , let ere' :: ERE
ere' = Word8 -> ERE -> ERE
derivative Word8
c ERE
ere
    , let ere'' :: ERE
ere'' = ERE -> ERE
simplifyERE ERE
ere'
    , Bool -> Bool
not (ERE -> ERE -> Bool
equivalent ERE
empty ERE
ere'')
    ]

simplifyERE :: ERE -> ERE
simplifyERE :: ERE -> ERE
simplifyERE ERE
ere
    | ERE -> ERE -> Bool
equivalent ERE
ere ERE
eps = ERE
eps
    | Bool
otherwise          = ERE
ere

-- does it make sense to look further?
emptySashaTTH :: SaTTH' tag -> Bool
emptySashaTTH :: forall tag. SaTTH' tag -> Bool
emptySashaTTH = forall (t :: * -> *) a. Foldable t => t a -> Bool
null

epsSashaTTH :: SaTTH' tag -> Bool
epsSashaTTH :: forall tag. SaTTH' tag -> Bool
epsSashaTTH SaTTH' tag
grammar = forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ ERE -> ERE -> Bool
equivalent ERE
ere ERE
eps | S Int
_ Code Q tag
_ ERE
ere <- SaTTH' tag
grammar ]