{-# LANGUAGE BangPatterns #-}
module Config.LexerUtils
(
AlexInput
, alexGetByte
, LexerMode(..)
, startString
, nestMode
, endMode
, token
, token_
, section
, number
, untermString
, eofAction
, errorAction
) where
import Data.Char (GeneralCategory(..), generalCategory, isAscii, isSpace, ord)
import Data.Text (Text)
import Data.Word (Word8)
import qualified Data.Text as Text
import Config.Tokens
import qualified Config.NumberParser
type AlexInput = Located Text
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte (Located p cs)
= do (c,cs') <- Text.uncons cs
let !b = byteForChar c
!inp = Located (move p c) cs'
return (b, inp)
move :: Position -> Char -> Position
move (Position ix line column) c =
case c of
'\t' -> Position (ix + 1) line (((column + 7) `div` 8) * 8 + 1)
'\n' -> Position (ix + 1) (line + 1) 1
_ -> Position (ix + 1) line (column + 1)
eofAction :: Position -> LexerMode -> [Located Token]
eofAction eofPosn st =
case st of
InComment posn _ -> [Located posn (Error UntermComment)]
InCommentString posn _ -> [Located posn (Error UntermComment)]
InString posn _ -> [Located posn (Error UntermString)]
InNormal -> [Located (park eofPosn) EOF]
park :: Position -> Position
park pos
| posColumn pos == 1 = pos { posColumn = 0 }
| otherwise = pos { posColumn = 0, posLine = posLine pos + 1 }
errorAction :: AlexInput -> [Located Token]
errorAction inp = [fmap (Error . NoMatch . Text.head) inp]
data LexerMode
= InNormal
| InComment !Position !LexerMode
| InCommentString !Position !LexerMode
| InString !Position !Text
type Action =
Int ->
Located Text ->
LexerMode ->
(LexerMode, [Located Token])
token :: (Text -> Token) -> Action
token f len match st = (st, [fmap (f . Text.take len) match])
token_ :: Token -> Action
token_ = token . const
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode f _ match st = (f (locPosition match) st, [])
startString :: Action
startString _ (Located posn text) _ = (InString posn text, [])
endMode :: Action
endMode len (Located endPosn _) mode =
case mode of
InNormal -> (InNormal, [])
InCommentString _ st -> (st, [])
InComment _ st -> (st, [])
InString startPosn input ->
let n = posIndex endPosn - posIndex startPosn + len
badEscape = BadEscape (Text.pack "out of range")
in case reads (Text.unpack (Text.take n input)) of
[(s,"")] -> (InNormal, [Located startPosn (String (Text.pack s))])
_ -> (InNormal, [Located startPosn (Error badEscape)])
untermString :: Action
untermString _ _ = \(InString posn _) ->
(InNormal, [Located posn (Error UntermString)])
number ::
Text ->
Token
number = Number . Config.NumberParser.number . Text.unpack . Text.toUpper
section :: Text -> Token
section = Section . Text.dropWhileEnd isSpace . Text.init
byteForChar :: Char -> Word8
byteForChar c
| c <= '\6' = non_graphic
| isAscii c = fromIntegral (ord c)
| otherwise = case generalCategory c of
LowercaseLetter -> lower
OtherLetter -> lower
UppercaseLetter -> upper
TitlecaseLetter -> upper
DecimalNumber -> digit
OtherNumber -> digit
ConnectorPunctuation -> symbol
DashPunctuation -> symbol
OtherPunctuation -> symbol
MathSymbol -> symbol
CurrencySymbol -> symbol
ModifierSymbol -> symbol
OtherSymbol -> symbol
Space -> space
ModifierLetter -> other
NonSpacingMark -> other
SpacingCombiningMark -> other
EnclosingMark -> other
LetterNumber -> other
OpenPunctuation -> other
ClosePunctuation -> other
InitialQuote -> other
FinalQuote -> other
_ -> non_graphic
where
non_graphic = 0
upper = 1
lower = 2
digit = 3
symbol = 4
space = 5
other = 6