{-# LANGUAGE TemplateHaskellQuotes #-}
module Sasha.TTH (
    -- * SaTTH, staged Sasha the lexer.
    SaTTH,
    satth,
    -- * ERE specification
    ERE,
    empty,
    eps,
    char,
    charRange,
    charSet,
    utf8Char,
    anyChar,
    anyUtf8Char,
    appends,
    unions,
    intersections,
    star,
    plus,
    string,
    utf8String,
    complement,
    satisfy,
    digit,
) where

import Language.Haskell.TH (Code, CodeQ, Exp, Q)

import Control.Monad               (forM)
import Data.List                   (sortOn)
import Data.Map                    (Map)
import Data.Maybe                  (isJust, listToMaybe)
import Data.Ord                    (Down (..))
import Data.Word                   (Word8)
import Data.Word8Set               (Word8Set)
import Language.Haskell.TTH.LetRec (letrecE)

import qualified Data.ByteString     as BS
import qualified Data.Map.Strict     as Map
import qualified Data.Word8Set       as W8S
import qualified Language.Haskell.TH as TH

import Sasha.Internal.ERE
import Sasha.Internal.Word8Set (memberCode)

-- | Lexer grammar specification: regular expression and result builder function
-- which takes a prefix (the matching part) and a suffix (the rest of input).
type SaTTH r = [(ERE, Code Q BS.ByteString -> Code Q BS.ByteString -> Code Q r)]

-- | Scan for a single token.
satth
    :: forall r. Code Q r           -- ^ no match value
    -> SaTTH r                      -- ^ scanner rules definitions
    -> Code Q (BS.ByteString -> r)  -- ^ scanner code
satth :: forall r. Code Q r -> SaTTH r -> Code Q (ByteString -> r)
satth Code Q r
noMatch SaTTH r
rules = [|| \bs -> $$(satth' noMatch rules [|| bs ||]) bs ||]

-- | Generate a scanner code.
satth' :: forall r. Code Q r -> SaTTH r -> Code Q BS.ByteString -> Code Q (BS.ByteString -> r)
satth' :: forall r.
Code Q r
-> SaTTH r -> Code Q ByteString -> Code Q (ByteString -> r)
satth' Code Q r
noMatch SaTTH r
grammar0 Code Q ByteString
input0 = forall (q :: * -> *) tag r a.
(Ord tag, Quote q, MonadFix q) =>
(tag -> String)
-> (forall (m :: * -> *).
    Monad m =>
    (tag -> m (Code q a)) -> tag -> m (Code q a))
-> (forall (m :: * -> *).
    Monad m =>
    (tag -> m (Code q a)) -> m (Code q r))
-> Code q r
letrecE
    (\SaTTH' r
_ -> String
"state")
    forall (m :: * -> *).
Monad m =>
(SaTTH' r -> m (Code Q (R r))) -> SaTTH' r -> m (Code Q (R r))
trans
    forall (m :: * -> *).
Monad m =>
(SaTTH' r -> m (Code Q (R r))) -> m (Code Q (ByteString -> r))
start
  where
    grammar0' :: SaTTH' r
    grammar0' :: SaTTH' r
grammar0' =
        [ forall r.
Int
-> (Code Q ByteString -> Code Q ByteString -> Code Q r)
-> ERE
-> S r
S Int
i Code Q ByteString -> Code Q ByteString -> Code Q r
f ERE
ere
        | (Int
i, (ERE
ere, Code Q ByteString -> Code Q ByteString -> Code Q r
f)) <- forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] SaTTH r
grammar0
        ]

    start :: Monad m => (SaTTH' r -> m (Code Q (R r))) -> m (Code Q (BS.ByteString -> r))
    start :: forall (m :: * -> *).
Monad m =>
(SaTTH' r -> m (Code Q (R r))) -> m (Code Q (ByteString -> r))
start SaTTH' r -> m (Code Q (R r))
rec = do
        Code Q (R r)
startCode <- SaTTH' r -> m (Code Q (R r))
rec SaTTH' r
grammar0'
        -- we assume that none of the tokens accepts an empty string,
        -- so we start without specifying last match.
        forall (m :: * -> *) a. Monad m => a -> m a
return [|| \input -> $$startCode $$noMatch (0 :: Int) input ||]

    trans :: Monad m => (SaTTH' r -> m (Code Q (R r))) -> SaTTH' r -> m (Code Q (R r))
    trans :: forall (m :: * -> *).
Monad m =>
(SaTTH' r -> m (Code Q (R r))) -> SaTTH' r -> m (Code Q (R r))
trans SaTTH' r -> m (Code Q (R r))
_rec SaTTH' r
grammar
        | forall tag. SaTTH' tag -> Bool
emptySashaTTH SaTTH' r
grammar
        = forall (m :: * -> *) a. Monad m => a -> m a
return [|| \ !acc _ _ -> acc ||]

    trans  SaTTH' r -> m (Code Q (R r))
rec SaTTH' r
grammar = do
        -- if the input is not empty?
        let grammarM1 :: Map (SaTTH' r) Word8Set
            grammarM1 :: Map (SaTTH' r) Word8Set
grammarM1 = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith Word8Set -> Word8Set -> Word8Set
W8S.union
                [ (forall tag. Word8 -> SaTTH' tag -> SaTTH' tag
derivativeSaTTH Word8
c SaTTH' r
grammar, Word8 -> Word8Set
W8S.singleton Word8
c)
                | Word8
c <- [ forall a. Bounded a => a
minBound .. forall a. Bounded a => a
maxBound ]
                ]

            -- non-empty map
            grammarM :: [(Word8Set, SaTTH' r, M r)]
            grammarM :: [(Word8Set, SaTTH' r, M r)]
grammarM =
                [ (Word8Set
c, SaTTH' r
grammar', forall r. Code Q ByteString -> SaTTH' r -> M r
makeM Code Q ByteString
input0 SaTTH' r
grammar')
                | (SaTTH' r
grammar', Word8Set
c) <- forall k a. Map k a -> [(k, a)]
Map.toList Map (SaTTH' r) Word8Set
grammarM1
                ]

        -- next states
        [(Word8Set, Next (Code Q (R r)), M r)]
nexts0 <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Word8Set, SaTTH' r, M r)]
grammarM forall a b. (a -> b) -> a -> b
$ \(Word8Set
ws, SaTTH' r
grammar', M r
modify) -> do
            if forall tag. SaTTH' tag -> Bool
emptySashaTTH SaTTH' r
grammar' then forall (m :: * -> *) a. Monad m => a -> m a
return (Word8Set
ws, forall a. Next a
NextEmpty, M r
modify)
            else if forall tag. SaTTH' tag -> Bool
epsSashaTTH SaTTH' r
grammar' then forall (m :: * -> *) a. Monad m => a -> m a
return (Word8Set
ws, forall a. Next a
NextEps, M r
modify)
            else do
                Code Q (R r)
next <- SaTTH' r -> m (Code Q (R r))
rec SaTTH' r
grammar'
                forall (m :: * -> *) a. Monad m => a -> m a
return (Word8Set
ws, forall a. a -> Next a
Next Code Q (R r)
next, M r
modify)

        -- sort next states
        let nexts :: [(Word8Set, Next (Code Q (R r)), M r)]
            nexts :: [(Word8Set, Next (Code Q (R r)), M r)]
nexts = forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (\(Word8Set
ws, Next (Code Q (R r))
_, M r
_) -> Word8Set -> Meas
meas Word8Set
ws) [(Word8Set, Next (Code Q (R r)), M r)]
nexts0

        -- transition case
        let caseAnalysis
                :: Code Q r
                -> Code Q Int
                -> Code Q Word8
                -> Code Q BS.ByteString
                -> Code Q r
            caseAnalysis :: Code Q r
-> Code Q Int -> Code Q Word8 -> Code Q ByteString -> Code Q r
caseAnalysis Code Q r
acc Code Q Int
pos Code Q Word8
c Code Q ByteString
input' = forall a r. Code Q a -> [(Code Q Bool, CodeQ r)] -> CodeQ r
caseTTH [|| () ||]
                [ (Code Q Word8 -> Word8Set -> Code Q Bool
memberCode Code Q Word8
c Word8Set
ws, Code Q r
body)

                | (Word8Set
ws, Next (Code Q (R r))
mnext, M r
modify) <- [(Word8Set, Next (Code Q (R r)), M r)]
nexts
                , let body :: Code Q r
body = case Next (Code Q (R r))
mnext of
                        Next (Code Q (R r))
NextEmpty -> Code Q r
acc
                        Next (Code Q (R r))
NextEps   -> M r
modify Code Q r
acc [|| $$pos + 1 ||]
                        Next Code Q (R r)
next -> [|| let !pos' = $$pos + 1 in $$next $$(modify acc [|| pos' ||]) pos' $$input' ||]
                ]

        let debugWarns :: Q ()
            debugWarns :: Q ()
debugWarns = forall (m :: * -> *) a. Monad m => a -> m a
return ()

        -- Note: acc should stay lazy
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a b. Monad m => m a -> Code m b -> Code m b
TH.bindCode_ Q ()
debugWarns [|| \ acc !_pos !input -> case BS.uncons input of
            Nothing           -> acc
            Just (c, _input') -> $$(caseAnalysis [|| acc ||] [|| _pos ||] [|| c ||] [|| _input' ||])
            ||]

-------------------------------------------------------------------------------
-- Sorting transitions
-------------------------------------------------------------------------------

data Meas
    = MeasLite Word8Set
    | MeasCont !(Down Int) !Word8Set
    | MeasSize !Int !Word8Set
  deriving (Meas -> Meas -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Meas -> Meas -> Bool
$c/= :: Meas -> Meas -> Bool
== :: Meas -> Meas -> Bool
$c== :: Meas -> Meas -> Bool
Eq, Eq Meas
Meas -> Meas -> Bool
Meas -> Meas -> Ordering
Meas -> Meas -> Meas
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Meas -> Meas -> Meas
$cmin :: Meas -> Meas -> Meas
max :: Meas -> Meas -> Meas
$cmax :: Meas -> Meas -> Meas
>= :: Meas -> Meas -> Bool
$c>= :: Meas -> Meas -> Bool
> :: Meas -> Meas -> Bool
$c> :: Meas -> Meas -> Bool
<= :: Meas -> Meas -> Bool
$c<= :: Meas -> Meas -> Bool
< :: Meas -> Meas -> Bool
$c< :: Meas -> Meas -> Bool
compare :: Meas -> Meas -> Ordering
$ccompare :: Meas -> Meas -> Ordering
Ord)

meas :: Word8Set -> Meas
meas :: Word8Set -> Meas
meas Word8Set
ws
    | Word8Set -> Int
W8S.size Word8Set
ws forall a. Ord a => a -> a -> Bool
< Int
2         = Word8Set -> Meas
MeasLite Word8Set
ws
    | forall a. Maybe a -> Bool
isJust (Word8Set -> Maybe (Word8, Word8)
W8S.isRange Word8Set
ws) = Down Int -> Word8Set -> Meas
MeasCont (forall a. a -> Down a
Down (Word8Set -> Int
W8S.size Word8Set
ws)) Word8Set
ws
    | Bool
otherwise               = Int -> Word8Set -> Meas
MeasSize (Word8Set -> Int
W8S.size Word8Set
ws) Word8Set
ws

-------------------------------------------------------------------------------
-- Aliases
-------------------------------------------------------------------------------

-- | Inner scanner function.
--
-- * previous match
-- * position
-- * input
--
type R r = r -> Int -> BS.ByteString -> r

-- | Last accept modifier.
type M r = Code Q r -> CodeQ Int -> CodeQ r

makeM :: forall r. Code Q BS.ByteString -> SaTTH' r -> M r
makeM :: forall r. Code Q ByteString -> SaTTH' r -> M r
makeM Code Q ByteString
input0 SaTTH' r
grammar Code Q r
acc Code Q Int
pos = case Maybe (Code Q ByteString -> Code Q ByteString -> Code Q r)
acc' of
    Maybe (Code Q ByteString -> Code Q ByteString -> Code Q r)
Nothing -> Code Q r
acc
    Just Code Q ByteString -> Code Q ByteString -> Code Q r
f  -> [|| case BS.splitAt $$pos $$input0 of (_pfx, _sfx) -> $$(f [|| _pfx ||] [|| _sfx ||]) ||]
  where
    acc' :: Maybe (Code Q BS.ByteString -> Code Q BS.ByteString -> Code Q r)
    acc' :: Maybe (Code Q ByteString -> Code Q ByteString -> Code Q r)
acc' = forall a. [a] -> Maybe a
listToMaybe
        [ Code Q ByteString -> Code Q ByteString -> Code Q r
f
        | S Int
_ Code Q ByteString -> Code Q ByteString -> Code Q r
f ERE
ere <- SaTTH' r
grammar
        , ERE -> Bool
nullable ERE
ere
        ]

data Next a
    = NextEmpty
    | NextEps
    | Next a

-------------------------------------------------------------------------------
-- TTH extras
-------------------------------------------------------------------------------

caseTTH :: Code Q a -> [(Code Q Bool, CodeQ r)] -> Code Q r
caseTTH :: forall a r. Code Q a -> [(Code Q Bool, CodeQ r)] -> CodeQ r
caseTTH Code Q a
c [(Code Q Bool, CodeQ r)]
guards = forall a (m :: * -> *). Quote m => m Exp -> Code m a
TH.unsafeCodeCoerce forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *). Quote m => m Exp -> [m Match] -> m Exp
TH.caseE (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q a
c)
    [ forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Match
TH.match forall (m :: * -> *). Quote m => m Pat
TH.wildP (forall (m :: * -> *). Quote m => [m (Guard, Exp)] -> m Body
TH.guardedB (forall r. [(Code Q Bool, Code Q r)] -> [Q (Guard, Exp)]
go [(Code Q Bool, CodeQ r)]
guards))  [] ]
  where
    go :: [(Code Q Bool, Code Q r)] -> [Q (TH.Guard, Exp)]
    go :: forall r. [(Code Q Bool, Code Q r)] -> [Q (Guard, Exp)]
go []          = []
    go [(Code Q Bool
_,Code Q r
b)]     = [forall (m :: * -> *). Quote m => m Exp -> m Exp -> m (Guard, Exp)
TH.normalGE [| otherwise |] (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q r
b)]
    go ((Code Q Bool
g,Code Q r
b):[(Code Q Bool, Code Q r)]
gbs) = forall (m :: * -> *). Quote m => m Exp -> m Exp -> m (Guard, Exp)
TH.normalGE (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q Bool
g) (forall a (m :: * -> *). Quote m => Code m a -> m Exp
TH.unTypeCode Code Q r
b) forall a. a -> [a] -> [a]
: forall r. [(Code Q Bool, Code Q r)] -> [Q (Guard, Exp)]
go [(Code Q Bool, Code Q r)]
gbs

-------------------------------------------------------------------------------
-- State
-------------------------------------------------------------------------------

-- | We give each tag an integer, so we can order them.
data S r = S !Int !(Code Q BS.ByteString -> Code Q BS.ByteString -> Code Q r) !ERE

instance Show (S tag) where
    show :: S tag -> String
show (S Int
i Code Q ByteString -> Code Q ByteString -> Code Q tag
_ ERE
ere) = forall a. Show a => a -> String
show (Int
i, ERE
ere)

instance Eq (S tag) where
    S Int
i Code Q ByteString -> Code Q ByteString -> Code Q tag
_ ERE
ere == :: S tag -> S tag -> Bool
== S Int
i' Code Q ByteString -> Code Q ByteString -> Code Q tag
_ ERE
ere' = (Int
i, ERE
ere) forall a. Eq a => a -> a -> Bool
== (Int
i', ERE
ere')

instance Ord (S tag) where
    compare :: S tag -> S tag -> Ordering
compare (S Int
i Code Q ByteString -> Code Q ByteString -> Code Q tag
_ ERE
ere) (S Int
i' Code Q ByteString -> Code Q ByteString -> Code Q tag
_ ERE
ere') = forall a. Ord a => a -> a -> Ordering
compare (Int
i, ERE
ere) (Int
i', ERE
ere')

type SaTTH' tag = [S tag]

-------------------------------------------------------------------------------
-- Derivative
-------------------------------------------------------------------------------

derivativeSaTTH :: Word8 -> SaTTH' tag -> SaTTH' tag
derivativeSaTTH :: forall tag. Word8 -> SaTTH' tag -> SaTTH' tag
derivativeSaTTH Word8
c SaTTH' tag
ts =
    [ forall r.
Int
-> (Code Q ByteString -> Code Q ByteString -> Code Q r)
-> ERE
-> S r
S Int
i Code Q ByteString -> Code Q ByteString -> Code Q tag
code ERE
ere''
    | S Int
i Code Q ByteString -> Code Q ByteString -> Code Q tag
code ERE
ere <- SaTTH' tag
ts
    , let ere' :: ERE
ere' = Word8 -> ERE -> ERE
derivative Word8
c ERE
ere
    , let ere'' :: ERE
ere'' = ERE -> ERE
simplifyERE ERE
ere'
    , Bool -> Bool
not (ERE -> ERE -> Bool
equivalent ERE
empty ERE
ere'')
    ]

simplifyERE :: ERE -> ERE
simplifyERE :: ERE -> ERE
simplifyERE ERE
ere
    | ERE -> ERE -> Bool
equivalent ERE
ere ERE
eps = ERE
eps
    | Bool
otherwise          = ERE
ere

-- does it make sense to look further?
emptySashaTTH :: SaTTH' tag -> Bool
emptySashaTTH :: forall tag. SaTTH' tag -> Bool
emptySashaTTH = forall (t :: * -> *) a. Foldable t => t a -> Bool
null

epsSashaTTH :: SaTTH' tag -> Bool
epsSashaTTH :: forall tag. SaTTH' tag -> Bool
epsSashaTTH SaTTH' tag
grammar = forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ ERE -> ERE -> Bool
equivalent ERE
ere ERE
eps | S Int
_ Code Q ByteString -> Code Q ByteString -> Code Q tag
_ ERE
ere <- SaTTH' tag
grammar ]