{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_HADDOCK show-extensions #-} -- | -- Module : Yi.Lexer.Alex -- License : GPL-2 -- Maintainer : yi-devel@googlegroups.com -- Stability : experimental -- Portability : portable -- -- Utilities to turn a lexer generated by Alex into a 'Scanner' that -- can be used by Yi. Most lexers will use the types defined here. -- Some things are exported for use by lexers themselves through the -- use of @Yi/Lexers/common.hsinc@. module Yi.Lexer.Alex ( module Yi.Lexer.Alex , (+~), (~-), Size(..), Stroke ) where import Lens.Micro.Platform (_1, view, makeLenses) import qualified Data.Bits import Data.Char (ord) import Data.Function (on) import Data.Ix import Data.List (foldl') import Data.Ord (comparing) import Data.Word (Word8) import Yi.Style (StyleName) import Yi.Syntax hiding (mkHighlighter) import Yi.Utils -- | Encode a Haskell String to a list of Word8 values, in UTF8 format. utf8Encode :: Char -> [Word8] utf8Encode = map fromIntegral . go . ord where go oc | oc <= 0x7f = [oc] | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) , 0x80 + oc Data.Bits..&. 0x3f ] | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) , 0x80 + oc Data.Bits..&. 0x3f ] | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) , 0x80 + oc Data.Bits..&. 0x3f ] type Byte = Word8 type IndexedStr = [(Point, Char)] type AlexInput = (Char, [Byte], IndexedStr) type Action hlState token = IndexedStr -> hlState -> (hlState, token) -- | Lexer state data AlexState lexerState = AlexState { stLexer :: lexerState, -- (user defined) lexer state lookedOffset :: !Point, -- Last offset looked at stPosn :: !Posn } deriving Show data Tok t = Tok { tokT :: t , tokLen :: Size , tokPosn :: Posn } deriving Functor instance Eq (Tok a) where (==) = (==) `on` tokPosn tokToSpan :: Tok t -> Span t tokToSpan (Tok t len posn) = Span (posnOfs posn) t (posnOfs posn +~ len) tokFromT :: t -> Tok t tokFromT t = Tok t 0 startPosn tokBegin :: Tok t -> Point tokBegin = posnOfs . tokPosn tokEnd :: Tok t -> Point tokEnd t = tokBegin t +~ tokLen t instance Show t => Show (Tok t) where show tok = show (tokPosn tok) ++ ": " ++ show (tokT tok) data Posn = Posn { posnOfs :: !Point , posnLine :: !Int , posnCol :: !Int } deriving (Eq, Ix) -- TODO: Verify that this is right. /Deniz instance Ord Posn where compare = comparing posnOfs instance Show Posn where show (Posn o l c) = "L" ++ show l ++ " " ++ "C" ++ show c ++ "@" ++ show o startPosn :: Posn startPosn = Posn 0 1 0 moveStr :: Posn -> IndexedStr -> Posn moveStr posn str = foldl' moveCh posn (fmap snd str) moveCh :: Posn -> Char -> Posn moveCh (Posn o l c) '\t' = Posn (o+1) l (((c+8) `div` 8)*8) moveCh (Posn o l _) '\n' = Posn (o+1) (l+1) 0 moveCh (Posn o l c) _ = Posn (o+1) l (c+1) alexGetChar :: AlexInput -> Maybe (Char, AlexInput) alexGetChar (_,_,[]) = Nothing alexGetChar (_,b,(_,c):rest) = Just (c, (c,b,rest)) alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) alexGetByte (c, b:bs, s) = Just (b,(c,bs,s)) alexGetByte (_, [], []) = Nothing alexGetByte (_, [], c:s) = case utf8Encode (snd c) of (b:bs) -> Just (b, ((snd c), bs, s)) [] -> Nothing {-# ANN alexCollectChar "HLint: ignore Use String" #-} alexCollectChar :: AlexInput -> [Char] alexCollectChar (_, _, []) = [] alexCollectChar (_, b, (_, c):rest) = c : alexCollectChar (c, b, rest) alexInputPrevChar :: AlexInput -> Char alexInputPrevChar = view _1 -- | Return a constant token actionConst :: token -> Action lexState token actionConst token = \_str state -> (state, token) -- | Return a constant token, and modify the lexer state actionAndModify :: (lexState -> lexState) -> token -> Action lexState token actionAndModify modifierFct token = \_str state -> (modifierFct state, token) -- | Convert the parsed string into a token, -- and also modify the lexer state actionStringAndModify :: (s -> s) -> (String -> token) -> Action s token actionStringAndModify modF f = \istr s -> (modF s, f $ fmap snd istr) -- | Convert the parsed string into a token actionStringConst :: (String -> token) -> Action lexState token actionStringConst f = \indexedStr state -> (state, f $ fmap snd indexedStr) type ASI s = (AlexState s, AlexInput) -- | Function to (possibly) lex a single token and give us the -- remaining input. type TokenLexer l s t i = (l s, i) -> Maybe (t, (l s, i)) -- | Handy alias type CharScanner = Scanner Point Char -- | Generalises lexers. This allows us to easily use lexers which -- don't want to be cornered into the types we have predefined here -- and use in @common.hsinc@. data Lexer l s t i = Lexer { _step :: TokenLexer l s t i , _starting :: s -> Point -> Posn -> l s , _withChars :: Char -> [(Point, Char)] -> i , _looked :: l s -> Point , _statePosn :: l s -> Posn , _lexEmpty :: t , _startingState :: s } -- | Just like 'Lexer' but also knows how to turn its tokens into -- 'StyleName's. data StyleLexer l s t i = StyleLexer { _tokenToStyle :: t -> StyleName , _styleLexer :: Lexer l s (Tok t) i } -- | 'StyleLexer' over 'ASI'. type StyleLexerASI s t = StyleLexer AlexState s t AlexInput -- | Defines a 'Lexer' for 'ASI'. This exists to make using the new -- 'lexScanner' easier if you're using 'ASI' as all our lexers do -- today, 23-08-2014. commonLexer :: (ASI s -> Maybe (Tok t, ASI s)) -> s -> Lexer AlexState s (Tok t) AlexInput commonLexer l st0 = Lexer { _step = l , _starting = AlexState , _withChars = \c p -> (c, [], p) , _looked = lookedOffset , _statePosn = stPosn , _lexEmpty = error "Yi.Lexer.Alex.commonLexer: lexEmpty" , _startingState = st0 } -- | Combine a character scanner with a lexer to produce a token -- scanner. May be used together with 'mkHighlighter' to produce a -- 'Highlighter', or with 'linearSyntaxMode' to produce a 'Mode'. lexScanner :: Lexer l s t i -> CharScanner -> Scanner (l s) t lexScanner Lexer {..} src = Scanner { scanLooked = _looked , scanInit = _starting _startingState 0 startPosn , scanRun = \st -> case posnOfs $ _statePosn st of 0 -> unfoldLexer _step (st, _withChars '\n' $ scanRun src 0) ofs -> case scanRun src (ofs -1) of [] -> [] (_, ch) : rest -> unfoldLexer _step (st, _withChars ch rest) , scanEmpty = _lexEmpty } -- | unfold lexer into a function that returns a stream of (state, token) unfoldLexer :: ((state, input) -> Maybe (token, (state, input))) -> (state, input) -> [(state, token)] unfoldLexer f b = case f b of Nothing -> [] Just (t, b') -> (fst b, t) : unfoldLexer f b' -- * Lenses makeLensesWithSuffix "A" ''Posn makeLensesWithSuffix "A" ''Tok makeLenses ''Lexer makeLenses ''StyleLexer