{-| This module defines the things required by Alex and some other
    Alex related things.
-}
module Agda.Syntax.Parser.Alex
    ( -- * Alex requirements
      AlexInput(..)
    , lensLexInput
    , alexInputPrevChar
    , alexGetChar, alexGetByte
      -- * Lex actions
    , LexAction, LexPredicate
    , (.&&.), (.||.), not'
    , PreviousInput, CurrentInput, TokenLength
      -- * Monad operations
    , getLexInput, setLexInput
    )
    where

import Control.Monad.State
import Data.Char
import Data.Word

import Agda.Syntax.Position
import Agda.Syntax.Parser.Monad

import Agda.Utils.Lens
import Agda.Utils.Monad
import Agda.Utils.Tuple

-- | This is what the lexer manipulates.
data AlexInput = AlexInput
  { lexSrcFile  :: !SrcFile              -- ^ File.
  , lexPos      :: !PositionWithoutFile  -- ^ Current position.
  , lexInput    :: String                -- ^ Current input.
  , lexPrevChar :: !Char                 -- ^ Previously read character.
  }

-- | A lens for 'lexInput'.
lensLexInput :: Lens' String AlexInput
lensLexInput f r = f (lexInput r) <&> \ s -> r { lexInput = s }

-- | Get the previously lexed character. Same as 'lexPrevChar'. Alex needs this
--   to be defined to handle \"patterns with a left-context\".
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = lexPrevChar

-- | Returns the next character, and updates the 'AlexInput' value.
--
-- This function is not suitable for use by Alex 2, because it can
-- return non-ASCII characters.
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar     (AlexInput { lexInput = []              }) = Nothing
alexGetChar inp@(AlexInput { lexInput = c:s, lexPos = p }) =
    Just (c, AlexInput
                 { lexSrcFile   = lexSrcFile inp
                 , lexInput     = s
                 , lexPos       = movePos p c
                 , lexPrevChar  = c
                 }
         )

-- | Returns the next byte, and updates the 'AlexInput' value.
--
-- A trick is used to handle the fact that there are more than 256
-- Unicode code points. The function translates characters to bytes in
-- the following way:
--
-- * Whitespace characters other than \'\\t\' and \'\\n\' are
--   translated to \' \'.
-- * Non-ASCII alphabetical characters are translated to \'z\'.
-- * Other non-ASCII printable characters are translated to \'+\'.
-- * Everything else is translated to \'\\1\'.
--
-- Note that it is important that there are no keywords containing
-- \'z\', \'+\', \' \' or \'\\1\'.
--
-- This function is used by Alex (version 3).

alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte ai =
  mapFst (fromIntegral . fromEnum . toASCII) <$> alexGetChar ai
  where
  toASCII c
    | isSpace c && c /= '\t' && c /= '\n' = ' '
    | isAscii c                           = c
    | isPrint c                           = if isAlpha c then 'z'
                                                         else '+'
    | otherwise                           = '\1'

{--------------------------------------------------------------------------
    Monad operations
 --------------------------------------------------------------------------}

getLexInput :: Parser AlexInput
getLexInput = getInp <$> get
    where
        getInp s = AlexInput
                    { lexSrcFile    = parseSrcFile s
                    , lexPos        = parsePos s
                    , lexInput      = parseInp s
                    , lexPrevChar   = parsePrevChar s
                    }

setLexInput :: AlexInput -> Parser ()
setLexInput inp = modify upd
    where
        upd s = s { parseSrcFile    = lexSrcFile inp
                  , parsePos        = lexPos inp
                  , parseInp        = lexInput inp
                  , parsePrevChar   = lexPrevChar inp
                  }

{--------------------------------------------------------------------------
    Lex actions
 --------------------------------------------------------------------------}

type PreviousInput  = AlexInput
type CurrentInput   = AlexInput
type TokenLength    = Int

-- | In the lexer, regular expressions are associated with lex actions who's
--   task it is to construct the tokens.
type LexAction r    = PreviousInput -> CurrentInput -> TokenLength -> Parser r

-- | Sometimes regular expressions aren't enough. Alex provides a way to do
--   arbitrary computations to see if the input matches. This is done with a
--   lex predicate.
type LexPredicate   = ([LexState], ParseFlags) -> PreviousInput -> TokenLength -> CurrentInput -> Bool

-- | Conjunction of 'LexPredicate's.
(.&&.) :: LexPredicate -> LexPredicate -> LexPredicate
p1 .&&. p2 = \x y z u -> p1 x y z u && p2 x y z u

-- | Disjunction of 'LexPredicate's.
(.||.) :: LexPredicate -> LexPredicate -> LexPredicate
p1 .||. p2 = \x y z u -> p1 x y z u || p2 x y z u

-- | Negation of 'LexPredicate's.
not' :: LexPredicate -> LexPredicate
not' p = \x y z u -> not (p x y z u)