module Language.CSPM.AlexWrapper
where
import Language.CSPM.Token
import Language.CSPM.TokenClasses
import Data.Char
import Data.Word (Word8)
import qualified Data.Bits
import Data.List
import Control.Applicative (Applicative(..))
import Control.Monad (liftM, ap)
type AlexInput = (AlexPosn,
Char,
[Byte],
String)
type Byte = Word8
data AlexState = AlexState {
alex_input :: AlexInput
,alex_scd :: !Int
,alex_cnt :: !Int
}
runAlex :: String -> Alex a -> Either LexError a
runAlex input (Alex f)
= case f initState of
Left msg -> Left msg
Right ( _, a ) -> Right a
where
initState
= AlexState {
alex_input = initAlexInput
,alex_scd = 0
,alex_cnt = 0
}
initAlexInput = (alexStartPos,'\n',[],input)
newtype Alex a = Alex { unAlex :: AlexState -> Either LexError (AlexState, a) }
instance Monad Alex where
m >>= k = Alex $ \s -> case unAlex m s of
Left msg -> Left msg
Right (s',a) -> unAlex (k a) s'
return a = Alex $ \s -> Right (s,a)
instance Functor Alex where fmap = liftM
instance Applicative Alex where
pure = return
(<*>) = ap
alexGetInput :: Alex AlexInput
alexGetInput
= Alex $ \s-> Right (s,alex_input s)
alexSetInput :: AlexInput -> Alex ()
alexSetInput input
= Alex $ \state -> case state {alex_input=input} of
s@(AlexState{}) -> Right (s, ())
alexError :: String -> Alex a
alexError message
= Alex $ \st -> let (pos,_,_,_) = alex_input st in
Left $ LexError {lexEPos = pos, lexEMsg = message }
alexGetStartCode :: Alex Int
alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
alexSetStartCode :: Int -> Alex ()
alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
alexCountToken :: Alex Int
alexCountToken
= Alex $ \s -> Right (s {alex_cnt = succ $ alex_cnt s}, alex_cnt s)
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (_,_,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
(b:bs) = utf8Encode c
in p' `seq` Just (b, (p', c, bs, s))
utf8Encode :: Char -> [Word8]
utf8Encode = map fromIntegral . go . ord
where
go oc
| oc <= 0x7f = [oc]
| oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
, 0x80 + oc Data.Bits..&. 0x3f
]
| oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
, 0x80 + oc Data.Bits..&. 0x3f
]
| otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
, 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
, 0x80 + oc Data.Bits..&. 0x3f
]
type AlexAction result = AlexInput -> Int -> result
andBegin :: (t -> t1 -> Alex b) -> Int -> t -> t1 -> Alex b
andBegin action code input len = do alexSetStartCode code; action input len
mkL :: PrimToken -> AlexInput -> Int -> Alex Token
mkL c (pos, _, _, str) len = do
cnt <- alexCountToken
return $ Token {
tokenId = mkTokenId cnt
, tokenStart = pos
, tokenLen = len
, tokenClass = c
, tokenString = take len str
}
block_comment :: AlexInput -> Int -> Alex Token
block_comment (startPos, _ ,[], '\123':'-':input) 2 = do
case go 1 "-{" input of
Nothing -> Alex $ \_-> Left $ LexError {
lexEPos = startPos
,lexEMsg = "Unclosed Blockcomment"
}
Just (acc, rest) -> do
cnt <- alexCountToken
let
tokenId = mkTokenId cnt
tokenString = reverse acc
tokenLen = length tokenString
tokenStart = startPos
tokenClass = case (tokenString, acc) of
('\123':'-':'#':_, '\125':'-':'#':_) -> L_Pragma
('\123':'-':_ , '\125':'-':_ ) -> L_BComment
_ -> error "internal Error: cannot determine variant of block_comment"
alexSetInput (foldl' alexMove startPos tokenString, '\125', [],rest)
return $ Token {..}
where
go :: Int -> String -> String -> Maybe (String,String)
go 0 acc rest = Just (acc, rest)
go nested acc rest = case rest of
'-' : '\125' : r2 -> go (pred nested) ('\125': '-' : acc) r2
'\123' : '-' : r2 -> go (succ nested) ('-' : '\123': acc) r2
h:r2 -> go nested (h : acc) r2
[] -> Nothing
block_comment _ _ = error "internal Error : block_comment called with bad args"
lexError :: String -> Alex a
lexError s = do
(_p, _c,_, input) <- alexGetInput
let
pos = if not $ null input
then " at " ++ (reportChar $ head input)
else " at end of file"
alexError $ s ++ pos
where
reportChar c =
if isPrint c
then show c
else "charcode " ++ (show $ ord c)
alexEOF :: Alex Token
alexEOF = return (Token (mkTokenId 0) (AlexPn 0 0 0) 0 L_EOF "")
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_p, _c, _,_s) = error "alex-input-prev-char not supported ??!"