{-# LANGUAGE TemplateHaskell, CPP #-}
module AlexToolsBin
(
initialInput, Input(..), inputFile
, Lexeme(..)
, SourcePos(..), startPos, beforeStartPos, prevPos
, SourceRange(..)
, prettySourcePos, prettySourceRange
, prettySourcePosLong, prettySourceRangeLong
, HasRange(..)
, (<->)
, moveSourcePos
, Action(..)
, lexeme
, matchLength
, matchRange
, matchBytes
, getLexerState
, setLexerState
, startInput
, endInput
, AlexInput
, alexInputPrevChar
, alexGetByte
, makeLexer
, LexerConfig(..)
, simpleLexer
, Word8
) where
import Control.DeepSeq
import Data.Word(Word8)
import Data.ByteString(ByteString)
import qualified Data.ByteString as BS
import Data.Text(Text)
import qualified Data.Text as Text
import Control.Monad(liftM,ap,replicateM)
import Language.Haskell.TH
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
data Lexeme t = Lexeme
{ lexemeBytes :: !ByteString
, lexemeToken :: !t
, lexemeRange :: !SourceRange
} deriving (Show, Eq)
instance NFData t => NFData (Lexeme t) where
rnf (Lexeme x y z) = rnf (x,y,z)
data SourcePos = SourcePos
{ sourceIndex :: !Int
, sourceFile :: !Text
} deriving (Show, Eq)
prettySourcePos :: SourcePos -> String
prettySourcePos x = show (sourceIndex x)
prettySourcePosLong :: SourcePos -> String
prettySourcePosLong x = Text.unpack (sourceFile x) ++ ":" ++ show (sourceIndex x)
instance NFData SourcePos where
rnf (SourcePos {}) = ()
moveSourcePos :: SourcePos -> SourcePos
moveSourcePos p = p { sourceIndex = sourceIndex p + 1 }
data SourceRange = SourceRange
{ sourceFrom :: !SourcePos
, sourceTo :: !SourcePos
} deriving (Show, Eq)
prettySourceRange :: SourceRange -> String
prettySourceRange x = prettySourcePos (sourceFrom x) ++ "--" ++
prettySourcePos (sourceTo x)
prettySourceRangeLong :: SourceRange -> String
prettySourceRangeLong x
| sourceFile pfrom == sourceFile pto =
Text.unpack (sourceFile pfrom) ++ ":" ++
prettySourcePos pfrom ++ "--" ++ prettySourcePos pto
| otherwise = prettySourcePosLong pfrom ++ "--" ++
prettySourcePosLong pto
where
pfrom = sourceFrom x
pto = sourceTo x
instance NFData SourceRange where
rnf (SourceRange {}) = ()
class HasRange t where
range :: t -> SourceRange
instance HasRange SourcePos where
range p = SourceRange { sourceFrom = p, sourceTo = p }
instance HasRange SourceRange where
range = id
instance HasRange (Lexeme t) where
range = lexemeRange
instance (HasRange a, HasRange b) => HasRange (Either a b) where
range (Left x) = range x
range (Right x) = range x
(<->) :: (HasRange a, HasRange b) => a -> b -> SourceRange
x <-> y = SourceRange { sourceFrom = sourceFrom (range x)
, sourceTo = sourceTo (range y)
}
newtype Action s a = A { runA :: Input -> Input -> Int -> s -> (s, a) }
instance Functor (Action s) where
fmap = liftM
instance Applicative (Action s) where
pure a = A (\_ _ _ s -> (s,a))
(<*>) = ap
instance Monad (Action s) where
return = pure
A m >>= f = A (\i1 i2 l s -> let (s1,a) = m i1 i2 l s
A m1 = f a
in m1 i1 i2 l s1)
startInput :: Action s Input
startInput = A (\i1 _ _ s -> (s,i1))
endInput :: Action s Input
endInput = A (\_ i2 _ s -> (s,i2))
matchLength :: Action s Int
matchLength = A (\_ _ l s -> (s,l))
getLexerState :: Action s s
getLexerState = A (\_ _ _ s -> (s,s))
setLexerState :: s -> Action s ()
setLexerState s = A (\_ _ _ _ -> (s,()))
matchRange :: Action s SourceRange
matchRange =
do i1 <- startInput
i2 <- endInput
return (inputPos i1 <-> inputPrev i2)
matchBytes :: Action s ByteString
matchBytes =
do i1 <- startInput
n <- matchLength
return (BS.take n (inputBytes i1))
lexeme :: t -> Action s [Lexeme t]
lexeme tok =
do r <- matchRange
txt <- matchBytes
let l = Lexeme { lexemeRange = r
, lexemeToken = tok
, lexemeBytes = txt
}
l `seq` return [ l ]
data Input = Input
{ inputPos :: {-# UNPACK #-} !SourcePos
, inputBytes :: {-# UNPACK #-} !ByteString
, inputPrev :: {-# UNPACK #-} !SourcePos
, inputPrevByte :: {-# UNPACK #-} !Word8
}
initialInput :: Text ->
ByteString -> Input
initialInput file str = Input
{ inputPos = startPos file
, inputPrev = beforeStartPos file
, inputPrevByte = 0
, inputBytes = str
}
startPos :: Text -> SourcePos
startPos file = SourcePos { sourceIndex = 0, sourceFile = file }
beforeStartPos :: Text -> SourcePos
beforeStartPos file = SourcePos { sourceIndex = -1, sourceFile = file }
prevPos :: SourcePos -> SourcePos
prevPos p
| sourceIndex p < 0 = p
| otherwise = p { sourceIndex = sourceIndex p - 1 }
inputFile :: Input -> Text
inputFile = sourceFile . inputPos
data LexerConfig s t = LexerConfig
{ lexerInitialState :: s
, lexerStateMode :: s -> Int
, lexerEOF :: s -> SourcePos -> [Lexeme t]
}
simpleLexer :: LexerConfig () t
simpleLexer = LexerConfig
{ lexerInitialState = ()
, lexerStateMode = \_ -> 0
, lexerEOF = \_ _ -> []
}
makeLexer :: ExpQ
makeLexer =
do let local = do n <- newName "x"
return (varP n, varE n)
([xP,yP,zP], [xE,yE,zE]) <- unzip <$> replicateM 3 local
let
alexEOF = conP (mkName "AlexEOF") [ ]
alexError = conP (mkName "AlexError") [ wildP ]
alexSkip = conP (mkName "AlexSkip") [ xP, wildP ]
alexToken = conP (mkName "AlexToken") [ xP, yP, zP ]
alexScanUser = varE (mkName "alexScanUser")
let p ~> e = match p (normalB e) []
body go mode inp cfg =
caseE [| $alexScanUser $mode $inp (lexerStateMode $cfg $mode) |]
[ alexEOF ~> [| lexerEOF $cfg $mode (inputPrev $inp) |]
, alexError ~>
[| error "internal error in lexer (AlexToolsBin.hs)" |]
, alexSkip ~> [| $go $mode $xE |]
, alexToken ~> [| case runA $zE $inp $xE $yE $mode of
(mode', ts) -> ts ++ $go mode' $xE |]
]
[e| \cfg -> let go mode inp = $(body [|go|] [|mode|] [|inp|] [|cfg|])
in go (lexerInitialState cfg) |]
type AlexInput = Input
{-# INLINE alexInputPrevChar #-}
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = toEnum . fromEnum . inputPrevByte
{-# INLINE alexGetByte #-}
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte inp =
do (b,bs) <- BS.uncons (inputBytes inp)
let inp1 = Input { inputPrev = inputPos inp
, inputPrevByte = b
, inputPos = moveSourcePos (inputPos inp)
, inputBytes = bs
}
inp1 `seq` pure (b,inp1)