module Text.Parser.Indentation.Implementation where

-- Implements common code for "Indentation Senstivie Parising: Landin Revisited"
--
-- Primary functions are:
--  - TODO
-- Primary driver functions are:
--  - TODO

-- TODO:
--   Grace style indentation stream
--   Haskell style indentation stream

--import Control.Monad

------------------------
-- Indentations
------------------------

-- We use indent 1 for the first column.  Not only is this consistent
-- with how Parsec counts columns, but it also allows 'Gt' to refer to
-- the first column by setting the indent to 0.
--data Indentation = Indentation# Int# deriving (Eq, Ord)
type Indentation = Int
data IndentationRel = Eq | Any | Const Indentation | Ge | Gt deriving (Show, Eq)

{-# INLINE infIndentation #-}
infIndentation :: Indentation
infIndentation = maxBound

{-
instance Num Indentation where

instance Show Indentation where
  show i@(Indentation# i') | i' == maxBound = "Infinity"
                           | otherwise = show (Int# i')
-}

------------------------
-- Indentable Stream
------------------------

-- We store state information about the current indentation in the
-- Stream.  Encoding the indentation state in the Stream is weird, but
-- the other two places where we could put it don't work.  The monad
-- isn't rolledback when backtracking happens (which we need the
-- indentation state to do), and the user state isn't available when
-- we do an 'uncons'.

{-# INLINE mkIndentationState #-}
mkIndentationState :: Indentation -> Indentation -> Bool -> IndentationRel -> IndentationState
mkIndentationState lo hi mode rel
  | lo == infIndentation = error "mkIndentationState: minimum indentation 'infIndentation' is out of bounds"
  | lo > hi = error "mkIndentationState: minimum indentation is greater than maximum indent"
  | otherwise = IndentationState lo hi mode rel

-- THEOREM: indent sets are all describable by upper and lower bounds (maxBound is infinity)
-- GLOBAL INVARIANT: minIndentation /= infIndentation
-- GLOBAL INVARIANT: minIndentation <= maxIndentation
-- GLOBAL INVARIENT: lo <= lo' where lo and lo' are minIndentation respectively before and after a monadic action
-- GLOBAL INVARIENT: hi' <= hi where hi and hi' are maxIndentation respectively before and after a monadic action

data IndentationState = IndentationState {
  minIndentation :: {-# UNPACK #-} !Indentation, -- inclusive, must not equal infIndentation
  maxIndentation :: {-# UNPACK #-} !Indentation, -- inclusive, infIndentation (i.e., maxBound) means infinity
  absMode :: !Bool, -- true if we are in 'absolute' mode
  tokenRel :: !IndentationRel
  } deriving (Show)
  -- Our representation of maxIndentation will get us in trouble if things
  -- overflow.  In future we may want to use a type representing
  -- Integer+Infinity However, this bug triggers *only* when there are
  -- a large number of nested 'Gt' indentations which shouldn't be all
  -- that common and 'local'

{-# INLINE indentationStateAbsMode #-}
indentationStateAbsMode :: IndentationState -> Bool
indentationStateAbsMode x = absMode x

{-# INLINE updateIndentation #-}
-- PRIVATE: Use assertIndentation instead
updateIndentation :: IndentationState -> Indentation -> (IndentationState -> a) -> (String -> a) -> a
updateIndentation (IndentationState lo hi mode rel) i ok err = updateIndentation' lo hi (if mode then Eq else rel) i ok' err' where
  ok' lo' hi' = ok (IndentationState lo' hi' False rel)
  err' = err

{-# INLINE updateIndentation' #-}
updateIndentation' :: Indentation -> Indentation -> IndentationRel -> Indentation -> (Indentation -> Indentation -> a) -> (String -> a) -> a
updateIndentation' lo hi rel i ok err =
  case rel of
    Any                          -> ok lo hi
    Const c | c  == i            -> ok lo hi
            | otherwise          -> err' $ "indentation "++show c
    Eq      | lo <= i && i <= hi -> ok i i
            | otherwise          -> err' $ "an indentation between "++show lo++" and "++show hi
    Gt      | lo <  i            -> ok lo (min (i-1) hi)
            | otherwise          -> err' $ "an indentation greater than "++show lo
    Ge      | lo <= i            -> ok lo (min i hi)
            | otherwise          -> err' $ "an indentation greater than or equal to "++show lo
  where err' place = err $ "Found a token at indentation "++show i++".  Expecting a token at "++place++"."

-- TODO: error when hi is maxIndentation

-- There is no way to query the current indentation because multiple
-- indentations are tried in parallel and later parsing may disqualify
-- one of these indentations.  However, we can assert that the current
-- indentation must have a particular relation, 'r', to a given
-- indentation, 'i'.  The call 'assertIndent r i' does this.  Calling
-- 'assertIndent r i' is equivalent to consuming a pseudo-token that has
-- been annotated with the relation 'r' at indentation 'i'.
--
-- Note that the absolute indentation mode may override 'r'.
{-
assertIndent :: (Monad m, Stream (IndentStream s) m t) => IndentRel -> Indent -> ParsecT (IndentStream s) u m ()
assertIndent r i = do
  IndentStream lo hi mode rel s <- getInput
  let ok s' = setInput (s' { absMode = mode }) -- Update input sets mode to False by default
      --ok lo' hi' = setInput (IndentStream lo' hi' mode rel s)
      err msg = unexpected $ "Indentation assertion '"++show r++" "++show i++"' failed.  "++msg
  updateIndent lo hi mode r i s ok err
  --updateIndent lo hi (if mode then Eq else r) i ok err
-}

{-
{-# INLINE askTokenMode #-}
askTokenMode :: (Monad m) => ParsecT (IndentStream s) u m IndentRel
askTokenMode = liftM tokenRel getInput
-}

------------------------
-- Token Modes
------------------------

-- Token modes determine how the current indentation must relate to
-- the indentation of a token.  Because several languages have special
-- rules for the first token of the production, we split the token
-- mode into two parts.  The first part is the mode for the first
-- token in a grammatical form while the second part is the mode for
-- the other tokens in a grammatical form.
--
-- Because of this split, while token modes generally follow a reader
-- monad pattern, there is one important exception.  Namely the
-- first-token mode may follow a state monad pattern.  (Thus we have
-- the divergent names for the first-token and other-token query
-- operators.)

-- THEOREM: tokenMode == tokenMode'
-- THEOREM: firstTokenMode' == firstTokenMode \/ firstTokenMode' == otherTokenMode

type LocalState a = (IndentationState -> IndentationState) -- pre
                  -> (IndentationState {-old-} -> IndentationState {-new-} -> IndentationState) -- post
                  -> a -> a

{-# INLINE localTokenMode #-}
localTokenMode :: (LocalState a)
               -> (IndentationRel -> IndentationRel)
               -> a -> a
localTokenMode localState f_rel = localState pre post where
  pre  i1    = i1 { tokenRel = f_rel (tokenRel i1) }
  post i1 i2 = i2 { tokenRel =        tokenRel i1  }

{-# INLINE absoluteIndentation #-}
absoluteIndentation :: LocalState a -> a -> a
absoluteIndentation localState = localState pre post where
  pre  i1    = i1 { absMode = True }
  post i1 i2 = i2 { absMode = absMode i1 && absMode i2 }

{-# INLINE ignoreAbsoluteIndentation #-}
ignoreAbsoluteIndentation :: LocalState a -> a -> a
ignoreAbsoluteIndentation localState = localState pre post where
  pre  i1    = i1 { absMode = False }
  post i1 i2 = i2 { absMode = absMode i1 }

{-# INLINE localAbsoluteIndentation #-}
localAbsoluteIndentation :: LocalState a -> a -> a
localAbsoluteIndentation localState = localState pre post where
  pre  i1    = i1 { absMode = True }
  post i1 i2 = i2 { absMode = absMode i1 }

--{-# INLINE askTokenMode #-}
--askTokenMode :: (Monad m) => ParsecT (IndentationStream s) u m IndentationRel
--askTokenMode = liftM tokenRel getInput
-- TODO: assertNotAbsMod/askAbsMode
-- when (absMode i2) (fail "absoluteIndentation: no tokens consumed") >>

------------------------
-- Local Indentations
------------------------

{-# INLINE localIndentation' #-}
-- PRIVATE: locally violates global invariants but used in a way that does not
localIndentation' :: LocalState a -> (Indentation -> Indentation) -> (Indentation -> Indentation) -> (Indentation -> Indentation -> Indentation) -> a -> a
localIndentation' localState f_lo f_hi f_hi' m = localState pre post m
  where pre (IndentationState lo hi mode rel) = IndentationState (f_lo lo) (f_hi hi) mode rel
        post (IndentationState lo hi _ _) i2 = i2 { minIndentation = lo, maxIndentation = f_hi' hi (maxIndentation i2) }
--        post (IndentationStream lo hi mode rel s) i2 = IndentationStream lo (f_hi' hi (maxIndentation i2)) mode rel s

-- 'localIndentation r p' specifies that the current indentation for 'p' must have relation 'r'
-- relative to the current indentation of the context in which 'localIndentation r p' is called.
{-# INLINE localIndentation #-}
-- NOTE: it is the responsibility of 'localState' to *not* use it's arguments if we are in absMode
localIndentation :: LocalState a -> IndentationRel -> a -> a
localIndentation _localState Eq m = m
localIndentation localState Any m = localIndentation' localState (const 0) (const infIndentation) (const) m
localIndentation localState (Const c) m
    | c == infIndentation = error "localIndentation: Const indentation 'infIndentation' is out of bounds"
    | otherwise = localIndentation' localState (const c) (const c) (const) m
localIndentation localState Ge m = localIndentation' localState (id) (const infIndentation) (flip const) m
localIndentation localState Gt m = localIndentation' localState (+1) (const infIndentation) (f) ({-TODO: checkOverflow >>-} m) where
  f hi hi' | hi' == infIndentation || hi < hi' = hi
           | hi' > 0 = hi' - 1 -- Safe only b/c hi' > 0
           | otherwise = error "localIndentation: assertion failed: hi' > 0"
{-
  checkOverflow = do
    IndentationStream { minIndentation = lo } <- getState
    when (lo == infIndentation) $ fail "localIndentation: Overflow in indentation lower bound."
-}

----------------
-- SourcePos

{-
mkSourcePosIndentStream s = SourcePosIndentStream s
newtype SourcePosIndentStream s = SourcePosIndentStream s
instance (Stream s m t) => Stream (SourcePosIndentStream s) m (Indent, t) where
  uncons (SourcePosIndentStream s) = do
    col <- liftM sourceColumn $ getPosition
    x <- uncons s
    case x of
      Nothing -> return Nothing
      Just x -> return (Just ((col, x), SourcePosIndentStream s))
-}

----------------
-- Unicode char
-- newtype UnicodeIndentStream

{-
----------------
-- Based on Char
mkCharIndentStream :: s -> CharIndentStream s
mkCharIndentStream s = CharIndentStream 1 s
data CharIndentStream s = CharIndentStream { charIndentStreamColumn :: !Indent,
                                             charIndentStreamStream :: s } deriving (Show)

instance (Stream s m Char) => Stream (CharIndentStream s) m (Indent, Char) where
  uncons (CharIndentStream i s) = do
    x <- uncons s
    case x of
      Nothing -> return Nothing
      Just (c, cs) -> return (Just ((i, c), CharIndentStream (f c) cs)) where
        f '\n' = 1
        f '\t' = i + 8 - ((i-1) `mod` 8)
        f _    = i + 1

charIndentStreamParser :: (Monad m) => ParsecT s u m t -> ParsecT (CharIndentStream s) u m (Indent, t)
charIndentStreamParser p = mkPT $ \state ->
  let go (Ok a state' e) = return (Ok (sourceColumn $ statePos state, a) (state' { stateInput = CharIndentStream (sourceColumn $ statePos state') (stateInput state') }) e)
      go (Error e) = return (Error e)
  in runParsecT p (state { stateInput = charIndentStreamStream (stateInput state) })
         >>= consumed (return . Consumed . go) (return . Empty . go)

----------------
-- TODO: parser based on first non-whitespace char

----------------
-- First token of line indents

----------------
-- Based on Indents

-- Note that if 'p' consumes input but is at the wrong indentation, then
-- 'indentStreamParser p' signals an error but does *not* consume input.
-- This allows Parsec primitives like 'string' to be properly backtracked.
indentStreamParser :: (Monad m) => ParsecT s u m (Indent, t) -> ParsecT (IndentStream s) u m t
indentStreamParser p = mkPT $ \state ->
  let IndentStream lo hi mode rel _ = stateInput state
      go f (Ok (i, a) state' e) = updateIndent lo hi (if mode then Eq else rel) i ok err where
        ok lo' hi' = return $ f $ return (Ok a (state' {stateInput = IndentStream lo' hi' False rel (stateInput state') }) e)
        err msg = return $ Empty $ return $ Error (Message ("Invalid indentation.  "++msg++show ((stateInput state) { tokenStream = ""})) `addErrorMessage` e)
      go f (Error e) = return $ f $ return (Error e)
  in runParsecT p (state { stateInput = tokenStream (stateInput state) }) >>= consumed (go Consumed) (go Empty)

-- lifting operator
-- token, tokens, tokenPrim, tokenPrimEx ???
-- whiteSpace
-- ByteString
-- ByteString.Lazy
-- Text

delimitedLayout :: Stream (IndentStream s) m t =>
  ParsecT (IndentStream s) u m open -> Bool ->
  ParsecT (IndentStream s) u m close -> Bool ->
  ParsecT (IndentStream s) u m a -> ParsecT (IndentStream s) u m a
delimitedLayout open openAny close closeAny body = between open' close' (localIndent (Const 0) body) where
  open'  | openAny = localIndent (Const 0) open
         | otherwise = open
  close' | closeAny = localIndent (Const 0) close
         | otherwise = close

indentedLayout :: Stream (IndentStream s) m t =>
  (Maybe (ParsecT (IndentStream s) u m sep)) ->
  ParsecT (IndentStream s) u m a -> ParsecT (IndentStream s) u m [a]
indentedLayout (Nothing ) clause = localIndent Gt $ many $ absoluteIndent $ clause
indentedLayout (Just sep) clause = liftM concat $ localIndent Gt $ many $ absoluteIndent $ sepBy1 clause sep

{-
layout p = delimitedLayout (symbol "{") False (symbol "}") True (semiSep p)
       <|> indentedLayout (Just semi) p

identifier pred = liftM fromString $ try $ identifier >>= \x -> guard (pred x) >> return x
operator pred = liftM fromString $ try $ operator >>= \x -> guard (pred x) >> return x

reserved name = (if name `elem` middleKeywords then localFirstTokenMode (const Ge) else id) $ reserved name

Numbers, Integers and Naturals are custom

dotSep
dotSep1

-}

{-
test :: String
test = foo where
          foo = "abc \
\def" ++ ""

test2 :: Int
test2 = foo where
          foo = let { x = 1;
 } in x


--- All code indented?
  foo = 3
  bar = 4
-}
-}