DDC/Core/Lexer/Offside.hs


-- | Apply the offside rule to a token stream to add braces.
module DDC.Core.Lexer.Offside
        ( Lexeme        (..)
        , applyOffside
        , addStarts)
where
import DDC.Core.Lexer.Tokens
import DDC.Data.SourcePos
import DDC.Data.Token


-- | Holds a real token or start symbol which is used to apply the offside rule.
data Lexeme n
        = LexemeToken           (Token (Tok n))
        | LexemeStartLine       Int

        -- | Signal that we're starting a block in this column.
        | LexemeStartBlock      Int
        deriving (Eq, Show)

type Context
        = Int

-- | Apply the offside rule to this token stream.
--   It should have been processed with addStarts first to add the
--   LexemeStartLine/LexemeStartLine tokens.
--
applyOffside 
        :: (Eq n, Show n) 
        => [Context] 
        -> [Lexeme n] 
        -> [Token (Tok n)]

-- Wait for the module header before we start applying the real offside rule. 
-- This allows us to write 'module Name with letrec' all on the same line.
applyOffside [] (LexemeToken t : ts) 
        |   isToken t (KA KModule)
         || isKNToken t
        = t : applyOffside [] ts

-- When we see the top-level letrec then enter into the outer-most context.
applyOffside [] (LexemeToken t1 : (LexemeStartBlock n) : ls)
        |   isToken t1 (KA KLetRec)
         || isToken t1 (KA KExports)
         || isToken t1 (KA KImports)
        = t1 : newCBra ls : applyOffside [n] ls 

-- At top level without a context.
-- Skip over everything until we get the 'with' in 'module Name with ...''
applyOffside [] (LexemeStartLine _  : ts)
        = applyOffside [] ts 

applyOffside [] (LexemeStartBlock _ : ts)
        = applyOffside [] ts


-- line start
applyOffside mm@(m : ms) (t@(LexemeStartLine n) : ts)
        -- add semicolon to get to the next statement in this block
        | m == n
        = newSemiColon ts : applyOffside mm ts

        -- end a block
        -- we keep the StartLine token in the recursion in case we're ending
        -- multiple blocks difference from Haskell98: add a semicolon as well
        | n < m 
        = newSemiColon ts : newCKet ts : applyOffside ms (t : ts)

        -- indented continuation of this statement
        | otherwise
        = applyOffside mm ts


-- block start
applyOffside mm@(m : ms) (LexemeStartBlock n : ts)
        -- enter into a nested context
        | n > m
        = newCBra ts : applyOffside (n : m : ms) ts 

        -- new context starts less than the current one.
        --   This should never happen, 
        --     provided addStarts works.
        | tNext : _    <- dropNewLinesLexeme ts
        = error $ "DDC.Core.Lexer.Tokens.Offside: layout error on " ++ show tNext ++ "."

        -- new context cannot be less indented than outer one
        --   This should never happen,
        --      as there is no lexeme to start a new context at the end of the file
        | []            <- dropNewLinesLexeme ts
        = error "DDC.Core.Lexer.Tokens.Offside: tried to start new context at end of file."

        -- an empty block
        | otherwise
        = newCBra ts : newCKet ts : applyOffside mm (LexemeStartLine n : ts)


-- pop contexts from explicit close braces
applyOffside mm (LexemeToken t@Token { tokenTok = KA KBraceKet } : ts) 

        -- make sure that explict open braces match explicit close braces
        | 0 : ms        <- mm
        = t : applyOffside ms ts

        -- nup
        | _tNext : _     <- dropNewLinesLexeme ts
        = [newOffsideClosingBrace ts]


-- push contexts for explicit open braces
applyOffside ms (LexemeToken t@Token { tokenTok = KA KBraceBra } : ts)
        = t : applyOffside (0 : ms) ts

applyOffside ms (LexemeToken t : ts) 
        = t : applyOffside ms ts

applyOffside [] []          = []

-- close off remaining contexts once we've reached the end of the stream.
applyOffside (_ : ms) []    = newCKet [] : applyOffside ms []


-- addStarts ------------------------------------------------------------------
-- | Add block and line start tokens to this stream.
--      This is lifted straight from the Haskell98 report.
addStarts :: Eq n => [Token (Tok n)] -> [Lexeme n]
addStarts ts
 = case dropNewLines ts of

        -- If the first lexeme of a module is not '{' then start a new block.
        (t1 : tsRest)
          |  not $ or $ map (isToken t1) [KA KBraceBra]
          -> LexemeStartBlock (tokenColumn t1) : addStarts' (t1 : tsRest)

          | otherwise
          -> addStarts' (t1 : tsRest)

        -- empty file
        []      -> []


addStarts'  :: Eq n => [Token (Tok n)] -> [Lexeme n]
addStarts' []           = []
addStarts' (t1 : ts) 

        -- We're starting a block
        | isBlockStart t1
        , []            <- dropNewLines ts
        = LexemeToken t1    : [LexemeStartBlock 0]

        | isBlockStart t1
        , t2 : tsRest   <- dropNewLines ts
        , not $ isToken t2 (KA KBraceBra)
        = LexemeToken t1    : LexemeStartBlock (tokenColumn t2)
                            : addStarts' (t2 : tsRest)

        -- check for start of list
        | isToken t1 (KA KBraceBra)
        = LexemeToken t1    : addStarts' ts

        -- check for end of list
        | isToken t1 (KA KBraceKet)
        = LexemeToken t1    : addStarts' ts

        -- check for start of new line
        | isToken t1 (KM KNewLine)
        , t2 : tsRest   <- dropNewLines ts
        , not $ isToken t2 (KA KBraceBra)
        = LexemeStartLine (tokenColumn t2) 
                : addStarts' (t2 : tsRest)

        -- eat up trailine newlines
        | isToken t1 (KM KNewLine)
        = addStarts' ts

        -- a regular token
        | otherwise
        = LexemeToken t1    : addStarts' ts


-- | Drop newline tokens at the front fo this stream.
dropNewLines :: Eq n => [Token (Tok n)] -> [Token (Tok n)]
dropNewLines []              = []
dropNewLines (t1:ts)
        | isToken t1 (KM KNewLine)
        = dropNewLines ts

        | otherwise
        = t1 : ts


-- | Drop newline tokens at the front fo this stream.
dropNewLinesLexeme :: Eq n => [Lexeme n] -> [Lexeme n]
dropNewLinesLexeme ll
 = case ll of
        []                      -> []
        LexemeToken t1 : ts
         |  isToken t1 (KM KNewLine)
         -> dropNewLinesLexeme ts

        l : ls
         -> l : dropNewLinesLexeme ls


-- | Check if a token is one that starts a block of statements.
isBlockStart :: Token (Tok n) -> Bool
isBlockStart Token { tokenTok = tok }
 = case tok of
        KA KDo          -> True
        KA KOf          -> True
        KA KLetRec      -> True
        KA KWhere       -> True
        KA KExports     -> True
        KA KImports     -> True
        _               -> False


-- Utils ----------------------------------------------------------------------
-- | Test whether this wrapper token matches.
isToken :: Eq n => Token (Tok n) -> Tok n -> Bool
isToken (Token tok _) tok2 = tok == tok2


-- | Test whether this wrapper token matches.
isKNToken :: Eq n => Token (Tok n) -> Bool
isKNToken (Token (KN _) _)      = True
isKNToken _                     = False


-- | When generating new source tokens, take the position from the first
--   non-newline token in this list
newCBra :: [Lexeme n] -> Token (Tok n)
newCBra ts
        = (takeTok ts) { tokenTok = KA KBraceBra }


newCKet :: [Lexeme n] -> Token (Tok n)
newCKet ts
        = (takeTok ts) { tokenTok = KA KBraceKet }


newSemiColon :: [Lexeme n] -> Token (Tok n)
newSemiColon ts 
        = (takeTok ts) { tokenTok = KA KSemiColon }


-- | This is injected by `applyOffside` when it finds an explit close
--   brace in a position where it would close a synthetic one.
newOffsideClosingBrace :: [Lexeme n] -> Token (Tok n)
newOffsideClosingBrace ts
        = (takeTok ts) { tokenTok = KM KOffsideClosingBrace }


takeTok :: [Lexeme n] -> Token (Tok n)
takeTok []      
 = Token (KJunk "") (SourcePos "" 0 0)

takeTok (l : ls)
 = case l of
        LexemeToken (Token { tokenTok = KM KNewLine })
         -> takeTok ls

        LexemeToken t           -> t
        LexemeStartLine  _      -> takeTok ls
        LexemeStartBlock _      -> takeTok ls