module Language.Haskell.Preprocessor.Lexer (
  scan, dscan, module T
) where

import Data.Char

import Language.Haskell.Preprocessor.Token as T
import Language.Haskell.Preprocessor.Loc
import qualified Language.Haskell.Preprocessor.SynSpec as SS

scan           :: SS.SynSpec -> String -> String -> [Token]
scan spec file  = pass3 spec . pass2 spec . pass1 spec file

dscan          :: String -> String -> [Token]
dscan           = scan SS.defaultSpec

lexerr         :: Loc -> String -> [Token]
lexerr loc msg  = [newToken { tag = Error, T.loc = loc, val = msg }]

pass1 :: SS.SynSpec -> String -> String -> [Token]
pass1 spec file str = bol (initial file) str where
  unboxed = SS.unboxed spec
  pragmas = SS.pragmas spec

  varinit c = isLower c || c == '_'
  coninit c = isUpper c
  opinit  c = c `elem` ":!$%&*+./<=>?@\\^|-~"

  varcont c = isAlphaNum c || c `elem` "'_"
  concont   = varcont
  opcont  c = opinit c || c == '#'

  rawnext tag loc inp val = loc' `seq` new : token loc' inp where
    new  = newToken { T.tag = tag, T.loc = loc, T.val = val }
    loc' = advance loc val

  next  tag loc inp val = rawnext tag loc inp val
  rnext tag loc inp val = next tag loc inp (reverse val)

  bol loc inp = case inp of
    '#':r -> let (acc, inp) = eatWhile (/= '\n') r "#"
                 val        = reverse acc in
             case fromDirective val of
               Nothing  -> next CPragma loc inp val
               Just loc ->
                 case inp of
                   '\n':r -> bol loc r
                   _      -> []
    c:inp | isSpace c -> bol (advance loc c) inp
    _ -> token loc inp

  token loc inp = case inp of
    '(':'#':r     | unboxed -> next Other loc r "(#"
    '#':')':r     | unboxed -> next Other loc r "#)"
    '{':'-':'#':r | pragmas -> next Other loc r "{-#"
    '#':'-':'}':r | pragmas -> next Other loc r "#-}"
    '{':'-':r -> lexComment loc r "-{" 1
    '(':r     -> next Other loc r "("
    ')':r     -> next Other loc r ")"
    '[':r     -> next Other loc r "["
    ']':r     -> next Other loc r "]"
    '{':r     -> next Other loc r "{"
    '}':r     -> next Other loc r "}"
    ';':r     -> next Other loc r ";"
    ',':r     -> next Other loc r ","
    '`':r     -> next Other loc r "`"
    '\'':r    -> lexChar loc r "'"
    '"':r     -> lexString loc r "\""
    '-':'-':r -> lexHyphen loc r "--"
    '\n':r -> bol (advance loc '\n') r
    c:r | isSpace c -> token (advance loc c) r
        | coninit c -> lexCon loc r [c]
        | varinit c -> lexVar loc r [c]
        | opinit c  -> lexOp loc r [c]
        | isDigit c -> lexNumber loc r [c]
    c:_    -> lexerr loc ("unexpected character " ++ show c)
    []     -> []

  lexChar   loc inp acc = case inp of
    '\'':'#':r
      | unboxed -> rnext CharLit loc r ('#':'\'':acc)
    '\'':r      -> rnext CharLit loc r ('\'':acc)
    '\\':'\'':r -> lexChar loc r ('\'':'\\':acc)
    '\\':'\\':r -> lexChar loc r ('\\':'\\':acc)
    c:r         -> lexChar loc r (c:acc)
    []          -> lexerr loc "eof in character literal"

  lexString loc inp acc = case inp of
    '"':'#':r
      | unboxed -> rnext StringLit loc r ('#':'"':acc)
    '"':r       -> rnext StringLit loc r ('"':acc)
    '\\':'"':r  -> lexString loc r ('"':'\\':acc)
    '\\':'\\':r -> lexString loc r ('\\':'\\':acc)
    c:r         -> lexString loc r (c:acc)
    []          -> lexerr loc "eof in string literal"

  lexNumber loc inp acc = case inp of
    c:r | isDigit c     -> lexNumber loc r (c:acc)
        | c == '.' && not (null r) && isDigit (head r)
                        -> lexFloat loc r (c:acc)
        | c `elem` "eE" -> lexMaybeExp IntLit loc inp acc
    '#':r | unboxed     -> rnext IntLit loc r ('#':acc)
    _                   -> rnext IntLit loc inp acc

  lexFloat loc inp acc = case eatWhile isDigit inp acc of
    (acc, r@(c:_)) | c `elem` "eE" -> lexMaybeExp FloatLit loc r acc
    (acc, '#':r) | unboxed         -> rnext FloatLit loc r ('#':acc)
    (acc, r)                       -> rnext FloatLit loc r acc

  lexMaybeExp tag loc inp acc = case inp of
    e:'-':d:r | isDigit d -> lexExp loc r (d:'-':e:acc)
    e:'+':d:r | isDigit d -> lexExp loc r (d:'+':e:acc)
    e:d:r     | isDigit d -> lexExp loc r (d:e:acc)
    _                     -> rnext tag loc inp acc

  lexExp loc inp acc = case eatWhile isDigit inp acc of
    (acc, '#':r) -> rnext FloatLit loc r ('#':acc)
    (acc, r)     -> rnext FloatLit loc r acc

  lexVar    loc inp acc = case inp of
    c:r | varcont c -> lexVar loc r (c:acc)
    '#':r | unboxed -> rnext Variable loc r ('#':acc)
    _               -> rnext Variable loc inp acc

  lexOp     loc inp acc = case inp of
    c:r | opcont c -> lexOp loc r (c:acc)
    _              -> rnext Operator loc inp acc

  lexCon    loc inp acc = case inp of
    c:r | concont c -> lexVar loc r (c:acc)
    '#':r | unboxed -> rnext Constructor loc r ('#':acc)
    '.':r -> case r of
               c:s | coninit c -> lexCon loc s (c:'.':acc)
                   | varinit c -> lexVar loc s (c:'.':acc)
                   | opcont c  -> lexOp loc s (c:'.':acc)
               _ -> rnext Constructor loc inp acc
    _ -> rnext Constructor loc inp acc

  lexHyphen loc inp acc = case inp of
    '-':r           -> lexHyphen loc r ('-':acc)
    c:r | opcont c  -> lexOp loc r (c:acc)
    _               -> case eatWhile (/= '\n') inp acc of
                         (acc, r) -> rnext Comment loc r acc

  lexComment :: Loc -> String -> String -> Int -> [Token]
  lexComment loc inp acc 0 = rnext Comment loc inp acc
  lexComment loc inp acc n = case inp of
    '-':'}':r -> lexComment loc r ('}':'-':acc) (n - 1)
    '{':'-':r -> lexComment loc r ('-':'{':acc) (n + 1)
    c:r       -> lexComment loc r (c:acc) n
    []        -> lexerr loc "eof in comment"

  eatWhile pred inp acc = case inp of
    c:r | pred c  -> eatWhile pred r (c:acc)
    _             -> (acc, inp)

pass2 :: SS.SynSpec -> [Token] -> [Token]
pass2 _ = loop [] where
  loop acc toks = case toks of
    t@Token { tag = Comment } : rest
       -> loop (t:acc) rest
    t : rest
       -> t { com = reverse acc } : loop [] rest
    [] -> case acc of
      []         -> []
      t:_ -> [newToken { tag = Other,
                         com = reverse acc,
                         loc = loc t `advance` '\n'} ]

pass3 :: SS.SynSpec -> [Token] -> [Token]
pass3 spec = loop [] where
  levelnest = SS.levelnest spec
  indents   = [ s | block <- SS.blocks spec, SS.I s <- block ]

  indent loc = Token VIndent [] loc ""
  semi   loc = Token VSemi   [] loc ""
  dedent loc = Token VDedent [] loc ""

  loop stk toks = case toks of
    []   -> []
    t:ts
      | val t `elem` indents ->
          case ts of
            []   -> t : indent _loc : dedent _loc : check _loc stk []
                    where _loc = loc t
            t':_
              | val t' == "{" ->
                  t : check (loc t') stk ts
              | otherwise     ->
                  t : indent (loc t') : begin (loc t') stk ts
      | otherwise  -> t : check (loc t) stk ts

  begin _loc stk toks = case stk of
      []        -> loop [n] toks
      m:_ -> case n `compare` m of
        GT      -> loop (n:stk) toks
        EQ | levelnest
                -> loop (n:stk) toks
        _       -> dedent _loc : check _loc stk toks
    where n = col _loc

  check _loc stk toks = case (stk, toks) of
    (ms,   [])   -> map (const (dedent _loc)) ms
    (m:ms, t:ts) ->
      case col (loc t) `compare` m of
        LT -> dedent (loc t) : check (loc t) ms (t:ts)
        EQ -> semi (loc t) : loop (m:ms) (t:ts)
        _  -> loop (m:ms) (t:ts)
    ([],   ts) -> loop [] ts