-- |
-- Module      :  $Header$
-- Copyright   :  (c) 2013-2015 Galois, Inc.
-- License     :  BSD3
-- Maintainer  :  cryptol@galois.com
-- Stability   :  provisional
-- Portability :  portable

{-# LANGUAGE PatternGuards #-}
module Cryptol.Parser.LexerUtils where

import Cryptol.Parser.Position
import Cryptol.Parser.Unlit(PreProc(None))
import Cryptol.Utils.PP
import Cryptol.Utils.Panic

import Data.Char(toLower)
import Data.List(foldl')
import Data.Word(Word8)
import Codec.Binary.UTF8.String(encodeChar)


data Config = Config
  { cfgSource      :: !FilePath     -- ^ File that we are working on
  , cfgLayout      :: !Layout       -- ^ Settings for layout processing
  , cfgPreProc     :: PreProc       -- ^ Preprocessor settings
  , cfgAutoInclude :: [FilePath]    -- ^ Implicit includes
  , cfgModuleScope :: Bool          -- ^ When we do layout processing
                                    -- should we add a vCurly (i.e., are
                                    -- we parsing a list of things).
  }

defaultConfig :: Config
defaultConfig  = Config
  { cfgSource      = ""
  , cfgLayout      = Layout
  , cfgPreProc     = None
  , cfgAutoInclude = []
  , cfgModuleScope = True
  }


type Action = Config -> Position -> String -> LexS
           -> (Maybe (Located Token), LexS)

data LexS   = Normal
            | InComment Position ![Position] [String]
            | InString Position String
            | InChar   Position String


startComment :: Action
startComment _ p txt s = (Nothing, InComment p stack chunks)
  where (stack,chunks) = case s of
                           Normal            -> ([], [txt])
                           InComment q qs cs -> (q : qs, txt : cs)
                           _                 -> panic "[Lexer] startComment" ["in a string"]

endComent :: Action
endComent cfg p txt s =
  case s of
    InComment f [] cs     -> (Just (mkToken f cs), Normal)
    InComment _ (q:qs) cs -> (Nothing, InComment q qs (txt : cs))
    _                     -> panic "[Lexer] endComment" ["outside commend"]
  where
  mkToken f cs =
    let r   = Range { from = f, to = moves p txt, source = cfgSource cfg }
        str = concat $ reverse $ txt : cs
    in Located { srcRange = r, thing = Token (White BlockComment) str }

addToComment :: Action
addToComment _ _ txt s = (Nothing, InComment p stack (txt : chunks))
  where
  (p, stack, chunks) =
     case s of
       InComment q qs cs -> (q,qs,cs)
       _                 -> panic "[Lexer] addToComment" ["outside comment"]

startString :: Action
startString _ p txt _ = (Nothing,InString p txt)

endString :: Action
endString cfg pe txt s = case s of
  InString ps str -> (Just (mkToken ps str), Normal)
  _               -> panic "[Lexer] endString" ["outside string"]
  where
  parseStr s1 = case reads s1 of
                  [(cs, "")] -> StrLit cs
                  _          -> Err InvalidString
  mkToken ps str = Located { srcRange = Range
                               { from   = ps
                               , to     = moves pe txt
                               , source = cfgSource cfg
                               }
                           , thing    = Token
                               { tokenType = parseStr (str ++ txt)
                               , tokenText = str ++ txt
                               }
                           }

addToString :: Action
addToString _ _ txt s = case s of
  InString p str -> (Nothing,InString p (str ++ txt))
  _              -> panic "[Lexer] addToString" ["outside string"]


startChar :: Action
startChar _ p txt _   = (Nothing,InChar p txt)

endChar :: Action
endChar cfg pe txt s =
  case s of
    InChar ps str -> (Just (mkToken ps str), Normal)
    _             -> panic "[Lexer] endString" ["outside character"]

  where
  parseChar s1 = case reads s1 of
                   [(cs, "")] -> ChrLit cs
                   _          -> Err InvalidChar

  mkToken ps str = Located { srcRange = Range
                               { from   = ps
                               , to     = moves pe txt
                               , source = cfgSource cfg
                               }
                           , thing    = Token
                               { tokenType = parseChar (str ++ txt)
                               , tokenText = str ++ txt
                               }
                           }



addToChar :: Action
addToChar _ _ txt s = case s of
  InChar p str -> (Nothing,InChar p (str ++ txt))
  _              -> panic "[Lexer] addToChar" ["outside character"]


mkIdent :: Action
mkIdent cfg p s z = (Just Located { srcRange = r, thing = Token t s }, z)
  where
  r = Range { from = p, to = moves p s, source = cfgSource cfg }
  t = Ident s

emit :: TokenT -> Action
emit t cfg p s z  = (Just Located { srcRange = r, thing = Token t s }, z)
  where r = Range { from = p, to = moves p s, source = cfgSource cfg }


emitS :: (String -> TokenT) -> Action
emitS t cfg p s z  = emit (t s) cfg p s z



--------------------------------------------------------------------------------
numToken :: Integer -> String -> TokenT
numToken rad ds = Num (toVal ds) (fromInteger rad) (length ds)
  where
  toVal = foldl' (\x c -> rad * x + toDig c) 0
  toDig = if rad == 16 then fromHexDigit else fromDecDigit

fromDecDigit   :: Char -> Integer
fromDecDigit x  = read [x]

fromHexDigit :: Char -> Integer
fromHexDigit x'
  | 'a' <= x && x <= 'f'  = fromIntegral (10 + fromEnum x - fromEnum 'a')
  | otherwise             = fromDecDigit x
  where x                 = toLower x'



-------------------------------------------------------------------------------

data AlexInput            = Inp { alexPos           :: !Position
                                , alexInputPrevChar :: !Char
                                , input             :: !String
                                , moreBytes         :: ![Word8]
                                } deriving Show

alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte i =
  case moreBytes i of
    b : bs -> Just (b, i { moreBytes = bs })
    [] ->
      case input i of
        c:cs -> alexGetByte Inp { alexPos = move (alexPos i) c
                                , alexInputPrevChar = c
                                , input             = cs
                                , moreBytes         = encodeChar c
                                }
        []   -> Nothing

data Layout = Layout | NoLayout


--------------------------------------------------------------------------------

-- | Drop white-space tokens from the input.
dropWhite :: [Located Token] -> [Located Token]
dropWhite = filter (notWhite . tokenType . thing)
  where notWhite (White _) = False
        notWhite _         = True


data Block = Virtual Int     -- ^ Virtual layout block
           | Explicit TokenT -- ^ An explicit layout block, expecting this ending
                             -- token.
             deriving (Show)

isExplicit :: Block -> Bool
isExplicit Explicit{} = True
isExplicit Virtual{}  = False

startsLayout :: TokenT -> Bool
startsLayout (KW KW_where)   = True
startsLayout (KW KW_private) = True
startsLayout _               = False

-- Add separators computed from layout
layout :: Config -> [Located Token] -> [Located Token]
layout cfg ts0 = loop implicitScope [] ts0
  where

  (_pos0,implicitScope) = case ts0 of
    t : _ -> (from (srcRange t), cfgModuleScope cfg && tokenType (thing t) /= KW KW_module)
    _     -> (start,False)


  loop :: Bool -> [Block] -> [Located Token] -> [Located Token]
  loop startBlock stack (t : ts)
    | startsLayout ty    = toks ++ loop True                             stack'  ts
    | Sym ParenL   <- ty = toks ++ loop False (Explicit (Sym ParenR)   : stack') ts
    | Sym CurlyL   <- ty = toks ++ loop False (Explicit (Sym CurlyR)   : stack') ts
    | Sym BracketL <- ty = toks ++ loop False (Explicit (Sym BracketR) : stack') ts
    | EOF          <- ty = toks
    | otherwise          = toks ++ loop False                            stack'  ts

    where
    ty  = tokenType (thing t)
    pos = srcRange t

    (toks,offStack) = offsides startToks t stack

    -- add any block start tokens, and push a level on the stack
    (startToks,stack')
      | startBlock && ty == EOF = ( [ virt cfg (to pos) VCurlyR
                                    , virt cfg (to pos) VCurlyL ]
                                  , offStack )
      | startBlock = ( [ virt cfg (to pos) VCurlyL ], Virtual (col (from pos)) : offStack )
      | otherwise  = ( [], offStack )

  loop _ _ [] = panic "[Lexer] layout" ["Missing EOF token"]


  offsides :: [Located Token] -> Located Token -> [Block] -> ([Located Token], [Block])
  offsides startToks t = go startToks
    where
    go virts stack = case stack of

      -- delimit or close a layout block
      Virtual c : rest
          -- commas only close to an explicit marker, so if there is none, the
          -- comma doesn't close anything
        | Sym Comma == ty     ->
                         if any isExplicit rest
                            then go   (virt cfg (to pos) VCurlyR : virts) rest
                            else done                              virts  stack

        | closingToken        -> go   (virt cfg (to pos) VCurlyR : virts) rest
        | col (from pos) == c -> done (virt cfg (to pos) VSemi   : virts) stack
        | col (from pos) <  c -> go   (virt cfg (to pos) VCurlyR : virts) rest

      -- close an explicit block
      Explicit close : rest | close     == ty -> done virts rest
                            | Sym Comma == ty -> done virts stack

      _ -> done virts stack

    ty  = tokenType (thing t)
    pos = srcRange t

    done ts s = (reverse (t:ts), s)

    closingToken = ty `elem` [ Sym ParenR, Sym BracketR, Sym CurlyR ]

virt :: Config -> Position -> TokenV -> Located Token
virt cfg pos x = Located { srcRange = Range
                             { from = pos
                             , to = pos
                             , source = cfgSource cfg
                             }
                         , thing = t }
  where t = Token (Virt x) $ case x of
                               VCurlyL -> "beginning of layout block"
                               VCurlyR -> "end of layout block"
                               VSemi   -> "layout block separator"

--------------------------------------------------------------------------------

data Token    = Token { tokenType :: TokenT, tokenText :: String }
                deriving Show

-- | Virtual tokens, inserted by layout processing.
data TokenV   = VCurlyL| VCurlyR | VSemi
                deriving (Eq,Show)

data TokenW   = BlockComment | LineComment | Space
                deriving (Eq,Show)

data TokenKW  = KW_Arith
              | KW_Bit
              | KW_Cmp
              | KW_False
              | KW_True
              | KW_else
              | KW_Eq
              | KW_error
              | KW_extern
              | KW_fin
              | KW_if
              | KW_private
              | KW_include
              | KW_inf
              | KW_join
              | KW_lg2
              | KW_lengthFromThen
              | KW_lengthFromThenTo
              | KW_max
              | KW_min
              | KW_module
              | KW_newtype
              | KW_pragma
              | KW_pmult
              | KW_pdiv
              | KW_pmod
              | KW_property
              | KW_random
              | KW_reverse
              | KW_split
              | KW_splitAt
              | KW_then
              | KW_transpose
              | KW_type
              | KW_where
              | KW_let
              | KW_x
              | KW_zero
              | KW_import
              | KW_as
              | KW_hiding
                deriving (Eq,Show)

data TokenOp  = Plus | Minus | Mul | Div | Exp | Mod
              | NotEqual | Equal | LessThan | GreaterThan | LEQ | GEQ
              | EqualFun | NotEqualFun
              | ShiftL | ShiftR | RotL | RotR
              | Conj | Disj | Xor
              | Complement
              | Bang | BangBang | At | AtAt | Hash
                deriving (Eq,Show)

data TokenSym = Bar
              | ArrL | ArrR | FatArrR
              | Lambda
              | EqDef
              | Comma
              | Semi
              | Dot
              | DotDot
              | DotDotDot
              | Colon
              | ColonColon
              | BackTick
              | ParenL   | ParenR
              | BracketL | BracketR
              | CurlyL   | CurlyR
              | TriL     | TriR
              | Underscore
                deriving (Eq,Show)

data TokenErr = UnterminatedComment
              | UnterminatedString
              | UnterminatedChar
              | InvalidString
              | InvalidChar
              | LexicalError
                deriving (Eq,Show)

data TokenT   = Num Integer Int Int   -- ^ value, base, number of digits
              | ChrLit  Char          -- ^ character literal
              | Ident String          -- ^ identifier
              | StrLit String         -- ^ string literal
              | KW    TokenKW         -- ^ keyword
              | Op    TokenOp         -- ^ operator
              | Sym   TokenSym        -- ^ symbol
              | Virt  TokenV          -- ^ virtual token (for layout)
              | White TokenW          -- ^ white space token
              | Err   TokenErr        -- ^ error token
              | EOF
                deriving (Eq,Show)

instance PP Token where
  ppPrec _ (Token _ s) = text s