-- | Apply the offside rule to a token stream to add braces. module DDC.Core.Lexer.Offside ( Lexeme (..) , applyOffside , addStarts) where import DDC.Core.Lexer.Tokens import DDC.Data.SourcePos --------------------------------------------------------------------------------------------------- -- | Holds a real token or start symbol which is used to apply the offside rule. data Lexeme n = LexemeToken (Located (Token n)) | LexemeStartLine Int -- | Signal that we're starting a block in this column. | LexemeStartBlock Int deriving (Eq, Show) -- | Parenthesis that we're currently inside. data Paren = ParenRound | ParenBrace deriving Show -- | What column number the current layout context started in. type Context = Int -- | Apply the offside rule to this token stream. -- -- It should have been processed with addStarts first to add the -- LexemeStartLine/LexemeStartLine tokens. -- -- Unlike the definition in the Haskell 98 report, we explicitly track -- which parenthesis we're inside. We use these to partly implement -- the layout rule that says we much check for entire parse errors to -- perform the offside rule. applyOffside :: (Eq n, Show n) => [Paren] -- ^ What parenthesis we're inside. -> [Context] -- ^ Current layout context. -> [Lexeme n] -- ^ Input lexemes. -> [Located (Token n)] -- Wait for the module header before we start applying the real offside rule. -- This allows us to write 'module Name with letrec' all on the same line. applyOffside ps [] (LexemeToken t : ts) | isKeyword t EModule || isKNToken t = t : applyOffside ps [] ts -- Enter into a top-level block in the module, and start applying the -- offside rule within it. -- The blocks are introduced by: -- 'exports' 'imports' 'letrec' 'where' -- 'import foreign MODE type' -- 'import foreign MODE capability' -- 'import foreign MODE value' applyOffside ps [] ls | LexemeToken t1 : (LexemeStartBlock n) : ls' <- ls , isKeyword t1 EExport || isKeyword t1 EImport || isKeyword t1 ELetRec || isKeyword t1 EWhere = t1 : newCBra ls' : applyOffside (ParenBrace : ps) [n] ls' -- (import | export) (type | value) { ... } | LexemeToken t1 : LexemeToken t2 : LexemeStartBlock n : ls' <- ls , isKeyword t1 EImport || isKeyword t1 EExport , isKeyword t2 EType || isKeyword t2 EValue = t1 : t2 : newCBra ls' : applyOffside (ParenBrace : ps) [n] ls' -- (import | export) foreign X (type | capability | value) { ... } | LexemeToken t1 : LexemeToken t2 : LexemeToken t3 : LexemeToken t4 : LexemeStartBlock n : ls' <- ls , isKeyword t1 EImport || isKeyword t1 EExport , isKeyword t2 EForeign , isKeyword t4 EType || isKeyword t4 ECapability || isKeyword t4 EValue = t1 : t2 : t3 : t4 : newCBra ls' : applyOffside (ParenBrace : ps) [n] ls' -- At top level without a context. -- Skip over everything until we get the 'with' in 'module Name with ...'' applyOffside ps [] (LexemeStartLine _ : ts) = applyOffside ps [] ts applyOffside ps [] (LexemeStartBlock _ : ts) = applyOffside ps [] ts -- line start applyOffside ps mm@(m : ms) (t@(LexemeStartLine n) : ts) -- add semicolon to get to the next statement in this block | m == n = newSemiColon ts : applyOffside ps mm ts -- end a block | n <= m = case ps of -- Closed a block that we're inside, ok. ParenBrace : ps' -> newCKet ts : applyOffside ps' ms (t : ts) -- We're supposed to close the block we're inside, but we're -- still inside an open '(' context. Just keep passing the -- tokens through, and let the parser give its error when -- it gets to it. ParenRound : _ -> applyOffside ps ms ts -- We always push an element of the layout context -- at the same time as a paren context, so this shouldn't happen. _ -> error $ "ddc-core: paren / layout context mismatch." -- indented continuation of this statement | otherwise = applyOffside ps mm ts -- block start applyOffside ps mm@(m : ms) (LexemeStartBlock n : ts) -- enter into a nested context | n > m = newCBra ts : applyOffside (ParenBrace : ps) (n : m : ms) ts -- new context starts less than the current one. -- This should never happen, -- provided addStarts works. | tNext : _ <- dropNewLinesLexeme ts = error $ "ddc-core: layout error on " ++ show tNext ++ "." -- new context cannot be less indented than outer one -- This should never happen, -- as there is no lexeme to start a new context at the end of the file. | [] <- dropNewLinesLexeme ts = error $ "ddc-core: tried to start new context at end of file." -- an empty block | otherwise = newCBra ts : newCKet ts : applyOffside ps mm (LexemeStartLine n : ts) -- push context for explicit open brace applyOffside ps ms (LexemeToken t@(Located _ (KA (KSymbol SBraceBra))) : ts) = t : applyOffside (ParenBrace : ps) (0 : ms) ts -- pop context from explicit close brace applyOffside ps mm (LexemeToken t@(Located _ (KA (KSymbol SBraceKet))) : ts) -- make sure that explict open braces match explicit close braces | 0 : ms <- mm , ParenBrace : ps' <- ps = t : applyOffside ps' ms ts -- nup | _tNext : _ <- dropNewLinesLexeme ts = [newOffsideClosingBrace ts] -- push context for explict open paren. applyOffside ps ms ( LexemeToken t@(Located _ (KA (KSymbol SRoundBra))) : ts) = t : applyOffside (ParenRound : ps) ms ts -- force close of block on close paren. -- This partially handles the crazy (Note 5) rule from the Haskell98 standard. applyOffside (ParenBrace : ps) (m : ms) (lt@(LexemeToken (Located _ (KA (KSymbol SRoundKet)))) : ts) | m /= 0 = newCKet ts : applyOffside ps ms (lt : ts) -- pop context for explicit close paren. applyOffside (ParenRound : ps) ms ( LexemeToken t@(Located _ (KA (KSymbol SRoundKet))) : ts) = t : applyOffside ps ms ts -- pass over tokens. applyOffside ps ms (LexemeToken t : ts) = t : applyOffside ps ms ts applyOffside _ [] [] = [] -- close off remaining contexts once we've reached the end of the stream. applyOffside ps (_ : ms) [] = newCKet [] : applyOffside ps ms [] isKeyword (Located _ tok) k = case tok of KA (KKeyword k') -> k == k' _ -> False -- addStarts -------------------------------------------------------------------------------------- -- | Add block and line start tokens to this stream. -- -- This is identical to the definition in the Haskell98 report, -- except that we also use multi-token starting strings like -- 'imports' 'foreign' 'type'. addStarts :: (Eq n, Show n) => [Located (Token n)] -> [Lexeme n] addStarts ts = case dropNewLines ts of -- If the first lexeme of a module is not '{' then start a new block. (t1 : tsRest) | not $ or $ map (isToken t1) [KA (KSymbol SBraceBra)] -> LexemeStartBlock (columnOfLocated t1) : addStarts' (t1 : tsRest) | otherwise -> addStarts' (t1 : tsRest) -- empty file [] -> [] addStarts' :: Eq n => [Located (Token n)] -> [Lexeme n] addStarts' ts -- Block started at end of input. | Just (ts1, ts2) <- splitBlockStart ts , [] <- dropNewLines ts2 = [LexemeToken t | t <- ts1] ++ [LexemeStartBlock 0] -- Standard block start. -- If there is not an open brace after a block start sequence then -- insert a new one. | Just (ts1, ts2) <- splitBlockStart ts , t2 : tsRest <- dropNewLines ts2 , not $ isToken t2 (KA (KSymbol SBraceBra)) = [LexemeToken t | t <- ts1] ++ [LexemeStartBlock (columnOfLocated t2)] ++ addStarts' (t2 : tsRest) -- check for start of list | t1 : ts' <- ts , isToken t1 (KA (KSymbol SBraceBra)) = LexemeToken t1 : addStarts' ts' -- check for end of list | t1 : ts' <- ts , isToken t1 (KA (KSymbol SBraceKet)) = LexemeToken t1 : addStarts' ts' -- check for start of new line | t1 : ts' <- ts , isToken t1 (KM KNewLine) , t2 : tsRest <- dropNewLines ts' , not $ isToken t2 (KA (KSymbol SBraceBra)) = LexemeStartLine (columnOfLocated t2) : addStarts' (t2 : tsRest) -- eat up trailine newlines | t1 : ts' <- ts , isToken t1 (KM KNewLine) = addStarts' ts' -- a regular token | t1 : ts' <- ts = LexemeToken t1 : addStarts' ts' -- end of input | otherwise = [] -- | Drop newline tokens at the front of this stream. dropNewLines :: Eq n => [Located (Token n)] -> [Located (Token n)] dropNewLines [] = [] dropNewLines (t1:ts) | isToken t1 (KM KNewLine) = dropNewLines ts | otherwise = t1 : ts -- | Drop newline tokens at the front of this stream. dropNewLinesLexeme :: Eq n => [Lexeme n] -> [Lexeme n] dropNewLinesLexeme ll = case ll of [] -> [] LexemeToken t1 : ts | isToken t1 (KM KNewLine) -> dropNewLinesLexeme ts l : ls -> l : dropNewLinesLexeme ls -- | Check if a token is one that starts a block of statements. splitBlockStart :: [Located (Token n)] -> Maybe ([Located (Token n)], [Located (Token n)]) splitBlockStart toks -- export type | t1@(Located _ (KA (KKeyword EExport))) : t2@(Located _ (KA (KKeyword EType))) : ts <- toks = Just ([t1, t2], ts) -- export value | t1@(Located _ (KA (KKeyword EExport))) : t2@(Located _ (KA (KKeyword EValue))) : ts <- toks = Just ([t1, t2], ts) -- export foreign X value | t1@(Located _ (KA (KKeyword EExport))) : t2@(Located _ (KA (KKeyword EForeign))) : t3 : t4@(Located _ (KA (KKeyword EValue))) : ts <- toks = Just ([t1, t2, t3, t4], ts) -- import type | t1@(Located _ (KA (KKeyword EImport))) : t2@(Located _ (KA (KKeyword EType))) : ts <- toks = Just ([t1, t2], ts) -- import value | t1@(Located _ (KA (KKeyword EImport))) : t2@(Located _ (KA (KKeyword EValue))) : ts <- toks = Just ([t1, t2], ts) -- import data | t1@(Located _ (KA (KKeyword EImport))) : t2@(Located _ (KA (KKeyword EData))) : ts <- toks = Just ([t1, t2], ts) -- import foreign X type | t1@(Located _ (KA (KKeyword EImport))) : t2@(Located _ (KA (KKeyword EForeign))) : t3 : t4@(Located _ (KA (KKeyword EType))) : ts <- toks = Just ([t1, t2, t3, t4], ts) -- import foreign X capability | t1@(Located _ (KA (KKeyword EImport))) : t2@(Located _ (KA (KKeyword EForeign))) : t3 : t4@(Located _ (KA (KKeyword ECapability))) : ts <- toks = Just ([t1, t2, t3, t4], ts) -- import foreign X value | t1@(Located _ (KA (KKeyword EImport))) : t2@(Located _ (KA (KKeyword EForeign))) : t3 : t4@(Located _ (KA (KKeyword EValue))) : ts <- toks = Just ([t1, t2, t3, t4], ts) | t1@(Located _ (KA (KKeyword EDo))) : ts <- toks = Just ([t1], ts) | t1@(Located _ (KA (KKeyword EOf))) : ts <- toks = Just ([t1], ts) | t1@(Located _ (KA (KKeyword ELetRec))) : ts <- toks = Just ([t1], ts) | t1@(Located _ (KA (KKeyword EWhere))) : ts <- toks = Just ([t1], ts) | t1@(Located _ (KA (KKeyword EExport))) : ts <- toks = Just ([t1], ts) | t1@(Located _ (KA (KKeyword EImport))) : ts <- toks = Just ([t1], ts) | t1@(Located _ (KA (KKeyword EMatch))) : ts <- toks = Just ([t1], ts) | otherwise = Nothing -- Utils ------------------------------------------------------------------------------------------ -- | Test whether this wrapper token matches. isToken :: Eq n => Located (Token n) -> Token n -> Bool isToken (Located _ tok) tok2 = tok == tok2 -- | Test whether this wrapper token matches. isKNToken :: Eq n => Located (Token n) -> Bool isKNToken (Located _ (KN _)) = True isKNToken _ = False -- | When generating new source tokens, take the position from the first -- non-newline token in this list newCBra :: [Lexeme n] -> Located (Token n) newCBra ts = case takeTok ts of Located sp _ -> Located sp (KA (KSymbol SBraceBra)) newCKet :: [Lexeme n] -> Located (Token n) newCKet ts = case takeTok ts of Located sp _ -> Located sp (KA (KSymbol SBraceKet)) newSemiColon :: [Lexeme n] -> Located (Token n) newSemiColon ts = case takeTok ts of Located sp _ -> Located sp (KA (KSymbol SSemiColon)) -- | This is injected by `applyOffside` when it finds an explit close -- brace in a position where it would close a synthetic one. newOffsideClosingBrace :: [Lexeme n] -> Located (Token n) newOffsideClosingBrace ts = case takeTok ts of Located sp _ -> Located sp (KM KOffsideClosingBrace) takeTok :: [Lexeme n] -> Located (Token n) takeTok [] = Located (SourcePos "" 0 0) (KErrorJunk "") takeTok (l : ls) = case l of LexemeToken (Located _ (KM KNewLine)) -> takeTok ls LexemeToken t -> t LexemeStartLine _ -> takeTok ls LexemeStartBlock _ -> takeTok ls