{-# LANGUAGE ConstraintKinds   #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE NamedFieldPuns    #-}
{-# LANGUAGE OverloadedStrings #-}

module FastTags.LexerTypes where

import Codec.Binary.UTF8.String (encodeChar)
import Control.Applicative
import Control.Monad.State.Strict
import Data.Char
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Word (Word8)

import Control.Monad.EitherK

import FastTags.Token
import qualified FastTags.Util as Util

{-# INLINE advanceLine #-}
advanceLine :: Char -> Line -> Line
advanceLine '\n' = increaseLine
advanceLine _    = id

countInputSpace :: AlexInput -> Int -> Int
countInputSpace input len =
    countSpace $ Text.take len $ aiInput input
    where
    countSpace :: Text -> Int
    countSpace = Text.foldl' inc 0
        where
        inc acc ' '    = acc + 1
        inc acc '\t'   = acc + 8
        inc acc '\x01' = acc + 1
        inc acc _      = acc


data AlexInput = AlexInput {
    aiInput           :: Text
    , aiPrevChar      :: {-# UNPACK #-} !Char
    , aiBytes         :: [Word8]
    , aiLine          :: {-# UNPACK #-} !Line
    , aiTrackPrefixes :: Bool
    , aiPrefix        :: Text
    , aiAbsPos        :: {-# UNPACK #-} !Int
    } deriving (Show, Eq, Ord)

mkAlexInput :: Text -> Bool -> AlexInput
mkAlexInput s trackPrefixes = AlexInput
    { aiInput         = s'
    , aiPrevChar      = '\n'
    , aiBytes         = []
    , aiLine          = initLine
    , aiTrackPrefixes = trackPrefixes
    , aiPrefix        = Text.empty
    , aiAbsPos        = initAbsPos
    }
    where
    -- Line numbering starts from 0 because we're adding additional newline
    -- at the beginning to simplify processing. Thus, line numbers in the
    -- result are 1-based.
    initLine = Line 0
    -- Same reasoning applies to the initial absolute position.
    initAbsPos = -1

    s' = Text.cons '\n' $ Text.snoc (stripBOM s) '\n'
    stripBOM :: Text -> Text
    stripBOM xs =
        fromMaybe xs $
        Text.stripPrefix utf8BOM xs <|> Text.stripPrefix utf8BOM' xs
    -- utf8BOM = "\xEF\xBB\xBF"
    utf8BOM = "\xFFEF"
    utf8BOM' = "\xFEFF"

mkSrcPos :: FilePath -> AlexInput -> SrcPos
mkSrcPos filename (AlexInput {aiLine, aiPrefix}) =
    SrcPos { posFile = filename, posLine = aiLine, posPrefix = aiPrefix }

-- TODO: Not very efficient to snoc every character here, figure out something
-- better.
updatePrefix :: Char -> AlexInput -> AlexInput
updatePrefix c input@(AlexInput {aiTrackPrefixes, aiPrefix})
    | aiTrackPrefixes = input
        { aiPrefix = case c of
            '\n' -> Text.empty
            _    -> Text.snoc aiPrefix c
        }
    | otherwise       = input

data Context = CtxHaskell | CtxQuasiquoter
    deriving (Show, Eq, Ord)

data AlexState = AlexState {
    asInput                       :: AlexInput
 -- | Current Alex state the lexer is in. E.g. comments, string, TH quasiquoter
 -- or vanilla toplevel mode.
    , asCode                      :: {-# UNPACK #-} !Int
    , asCommentDepth              :: {-# UNPACK #-} !Int
    , asQuasiquoterDepth          :: {-# UNPACK #-} !Int
    , asIndentationSize           :: {-# UNPACK #-} !Int
    , asContextStack              :: [Context]
    , asPositionsOfQuasiQuoteEnds :: Maybe IntSet
    } deriving (Show, Eq, Ord)

mkAlexState :: AlexInput -> AlexState
mkAlexState input = AlexState
    { asInput                     = input
    , asCode                      = 0
    , asCommentDepth              = 0
    , asQuasiquoterDepth          = 0
    , asIndentationSize           = 0
    , asContextStack              = []
    , asPositionsOfQuasiQuoteEnds = Nothing
    }

{-# INLINE pushContext #-}
pushContext :: (MonadState AlexState m) => Context -> m ()
pushContext ctx = modify (\s -> s { asContextStack = ctx : asContextStack s })

{-# INLINE modifyCommentDepth #-}
modifyCommentDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyCommentDepth f = do
    depth <- gets asCommentDepth
    let depth' = f depth
    modify $ \s -> s { asCommentDepth = depth' }
    return depth'

{-# INLINE modifyQuasiquoterDepth #-}
modifyQuasiquoterDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyQuasiquoterDepth f = do
    depth <- gets asQuasiquoterDepth
    let depth' = f depth
    modify $ \s -> s { asQuasiquoterDepth = depth' }
    return depth'

retrieveToken :: AlexInput -> Int -> Text
retrieveToken (AlexInput {aiInput}) len = Text.take len aiInput

{-# INLINE addIndentationSize #-}
addIndentationSize :: (MonadState AlexState m) => Int -> m ()
addIndentationSize x =
    modify (\s -> s { asIndentationSize = x + asIndentationSize s })

data QQEndsState = QQEndsState
    { qqessPos      :: {-# UNPACK #-} !Int
    , qqessMap      :: !IntSet
    , qqessPrevChar :: {-# UNPACK #-} !Char
    }

calculateQuasiQuoteEnds :: Int -> Text -> IntSet
calculateQuasiQuoteEnds startPos =
    qqessMap . Text.foldl' combine (QQEndsState startPos IntSet.empty '\n')
    where
    combine :: QQEndsState -> Char -> QQEndsState
    combine QQEndsState{qqessPos, qqessMap, qqessPrevChar} c = QQEndsState
        { qqessPos      = qqessPos + 1
        , qqessMap      =
              case (qqessPrevChar, c) of
                  ('|', ']') -> IntSet.insert qqessPos qqessMap
                  (_,   '⟧') -> IntSet.insert qqessPos qqessMap
                  _          -> qqessMap
        , qqessPrevChar = c
        }

type AlexM = EitherKT String (State AlexState)

runAlexM :: Bool -> Text -> AlexM a -> Either String a
runAlexM trackPrefixes input action =
    evalState (runEitherKT action (return . Left) (return . Right)) s
    where
    s = mkAlexState $ mkAlexInput input trackPrefixes

{-# INLINE alexSetInput #-}
alexSetInput :: (MonadState AlexState m) => AlexInput -> m ()
alexSetInput input = modify $ \s -> s { asInput = input }

{-# INLINE alexSetStartCode #-}
alexSetStartCode :: (MonadState AlexState m) => Int -> m ()
alexSetStartCode code = modify $ \s -> s { asCode = code }

-- Alex interface
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = aiPrevChar

alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte input@(AlexInput {aiInput, aiBytes, aiLine, aiAbsPos}) =
    case aiBytes of
        b:bs -> Just (b, input { aiBytes = bs })
        []   -> nextChar
    where
    nextChar = case Text.uncons aiInput of
        Nothing      -> Nothing
        Just (c, cs) -> encode (fromMaybe c $ fixChar c) cs
    encode c cs =
        case encodeChar c of
            b:bs -> Just (b, updatePrefix c input')
                where
                input' = input { aiInput    = cs
                               , aiBytes    = bs
                               , aiPrevChar = c
                               , aiLine     = advanceLine c aiLine
                               , aiAbsPos   = aiAbsPos + 1
                               }
            []   -> emptyUtfEncodingError
    emptyUtfEncodingError = error
        "alexGetByte: should not happen - utf8 encoding of a character is empty"

-- Translate unicode character into special symbol we teached Alex to recognize.
fixChar :: Char -> Maybe Char
-- These should not be translated since Alex knows about them
fixChar '→' = Nothing
fixChar '∷' = Nothing
fixChar '⇒' = Nothing
fixChar '∀' = Nothing
fixChar c
    | c <= '\x7f' = Nothing -- Plain ascii needs no fixing.
    | otherwise
    = case generalCategory c of
          UppercaseLetter       -> Just upper
          LowercaseLetter       -> Just lower
          TitlecaseLetter       -> Just upper
          ModifierLetter        -> Just suffix
          OtherLetter           -> Just lower
          DecimalNumber         -> Just digit
          OtherNumber           -> Just digit
          Space                 -> Just space
          other                 ->
              if Util.isSymbolCharacterCategory other
              then Just symbol
              else Nothing
    where
    space  = '\x01'
    upper  = '\x02'
    lower  = '\x03'
    symbol = '\x04'
    digit  = '\x05'
    suffix = '\x06'