{-|
Module      : Text.Ogmarkup.Private.Parser
Copyright   : (c) Ogma Project, 2016
License     : MIT
Stability   : experimental

This module provides several parsers that can be used in order to
extract the 'Ast' of an Ogmarkup document.

Please consider that only 'document' should be used outside this
module.
-}

{-# LANGUAGE TypeFamilies #-}

module Text.Ogmarkup.Private.Parser where

import           Text.Megaparsec
import           Control.Monad.State
import           Data.String

import qualified Text.Ogmarkup.Private.Ast     as Ast

-- | Keep track of the currently opened formats.
data ParserState = ParserState { -- | Already parsing text with emphasis
                                 parseWithEmph        :: Bool
                                 -- | Already parsing text with strong
                                 --   emphasis
                               , parseWithStrongEmph  :: Bool
                                 -- | Already parsing a quote
                               , parseWithinQuote     :: Bool
                               }

-- | An ogmarkup parser processes 'Char' tokens and carries a 'ParserState'.
type OgmarkupParser a = StateT ParserState (Parsec Dec a)

-- | Update the 'ParserState' to guard against nested emphasis.
enterEmph :: Stream a
          => OgmarkupParser a ()
enterEmph = do st <- get
               if parseWithEmph st
                 then fail "guard against nested emphasis"
                 else do put st { parseWithEmph = True }
                         return ()

-- | Update the 'ParserState' to be able to parse input with emphasis
-- again.
leaveEmph :: Stream a
          => OgmarkupParser a ()
leaveEmph = do st <- get
               if parseWithEmph st
                 then do put st { parseWithEmph = False }
                         return ()
                 else fail "cannot leave emphasis when you did not enter"

-- | Update the 'ParserState' to guard against nested strong emphasis.
enterStrongEmph :: Stream a
                => OgmarkupParser a ()
enterStrongEmph = do st <- get
                     if parseWithStrongEmph st
                       then fail "guard against nested strong emphasis"
                       else do put st { parseWithStrongEmph = True }
                               return ()

-- | Update the 'ParserState' to be able to parse input with strong emphasis
-- again.
leaveStrongEmph :: Stream a
                => OgmarkupParser a ()
leaveStrongEmph = do st <- get
                     if parseWithStrongEmph st
                       then do put st { parseWithStrongEmph = False }
                               return ()
                       else fail "cannot leave strong emphasis when you did not enter"

-- | Update the 'ParserState' to guard against nested quoted inputs.
enterQuote :: Stream a
           => OgmarkupParser a ()
enterQuote = do st <- get
                if parseWithinQuote st
                  then fail "guard against nested quotes"
                  else do put st { parseWithinQuote = True }
                          return ()

-- | Update the 'ParserState' to be able to parse an input
-- surrounded by quotes again.
leaveQuote :: Stream a
           => OgmarkupParser a ()
leaveQuote = do st <- get
                if parseWithinQuote st
                  then do put st { parseWithinQuote = False }
                          return ()
                  else fail "cannot leave quote when you did not enter"

-- | A initial ParserState instance to be used at the begining of
-- a document parsing.
initParserState :: ParserState
initParserState = ParserState False False False

-- | A wrapper around the 'runParser' function of Megaparsec. It uses
-- 'initParserState' as an initial state.
parse :: (Stream a, Token a ~ Char)
      => OgmarkupParser a b
      -> String
      -> a
      -> Either (ParseError (Token a) Dec) b
parse ogma file = runParser (evalStateT ogma initParserState) file

-- | Try its best to parse an ogmarkup document. When it encounters an
--   error, it returns an Ast and the remaining input.
--
--   See 'Ast.Document'.
document :: (Stream a, Token a ~ Char, IsString b)
          => OgmarkupParser a (Ast.Document b)
document = doc []
  where doc :: (Stream a, Token a ~ Char, IsString b)
             => Ast.Document b
             -> OgmarkupParser a (Ast.Document b)
        doc ast = do space
                     sects <- many (try section)
                     let ast' = ast `mappend` sects
                     (eof >> return ast') <|> (recover ast' >>= doc)

        recover :: (Stream a, Token a ~ Char, IsString b)
                => Ast.Document b
                -> OgmarkupParser a (Ast.Document b)
        recover ast = do failure <- someTill anyChar (char '\n')
                         return $ ast `mappend` [Ast.Failing $ fromString failure]
-- | See 'Ast.Section'.
section :: (Stream a, Token a ~ Char, IsString b)
           => OgmarkupParser a (Ast.Section b)
section = aside <|> story

-- | See 'Ast.Aside'.
aside :: (Stream a, Token a ~ Char, IsString b)
         => OgmarkupParser a (Ast.Section b)
aside = do asideSeparator
           cls <- optional asideClass
           space
           ps <- some (paragraph <* space)
           asideSeparator
           manyTill space (skip (char '\n') <|> eof)
           space

           return $ Ast.Aside cls ps
  where
    asideClass :: (Stream a, Token a ~ Char, IsString b)
               => OgmarkupParser a b
    asideClass = do cls <- some letterChar
                    asideSeparator

                    return $ fromString cls

-- | See 'Ast.Story'.
story :: (Stream a, Token a ~ Char, IsString b)
      => OgmarkupParser a (Ast.Section b)
story = Ast.Story `fmap` some (paragraph <* space)

-- | See 'Ast.Paragraph'.
paragraph :: (Stream a, Token a ~ Char, IsString b)
          => OgmarkupParser a (Ast.Paragraph b)
paragraph = some component <* blank

-- | See 'Ast.Component'.
component :: (Stream a, Token a ~ Char, IsString b)
          => OgmarkupParser a (Ast.Component b)
component = try (dialogue <|> thought <|> teller) <|> illformed

-- | See 'Ast.IllFormed'.
illformed :: (Stream a, Token a ~ Char, IsString b)
          => OgmarkupParser a (Ast.Component b)
illformed = Ast.IllFormed `fmap` restOfParagraph

-- | Parse the rest of the current paragraph with no regards for the
-- ogmarkup syntax. This Parser is used when the document is ill-formed, to
-- find a new point of synchronization.
restOfParagraph :: (Stream a, Token a ~ Char, IsString b)
                => OgmarkupParser a b
restOfParagraph = do lookAhead anyChar
                     notFollowedBy endOfParagraph
                     str <- manyTill anyChar (lookAhead $ try endOfParagraph)
                     return $ fromString str

-- | See 'Ast.Teller'.
teller :: (Stream a, Token a ~ Char, IsString b)
       => OgmarkupParser a (Ast.Component b)
teller = Ast.Teller `fmap` some format

-- | See 'Ast.Dialogue'.
dialogue :: (Stream a, Token a ~ Char, IsString b)
         => OgmarkupParser a (Ast.Component b)
dialogue = talk '[' ']' Ast.Dialogue

-- | See 'Ast.Thought'.
thought :: (Stream a, Token a ~ Char, IsString b)
        => OgmarkupParser a (Ast.Component b)
thought = talk '<' '>' Ast.Thought

-- | @'talk' c c' constr@ wraps a reply surrounded by @c@ and @c'@ inside
--   @constr@ (either 'Ast.Dialogue' or 'Ast.Thought').
talk :: (Stream a, Token a ~ Char, IsString b)
     => Char -- ^ A character to mark the begining of a reply
     -> Char -- ^ A character to mark the end of a reply
     -> (Ast.Reply b -> Maybe b -> Ast.Component b) -- ^ Either 'Ast.Dialogue' or 'Ast.Thought' according to the situation
     -> OgmarkupParser a (Ast.Component b)
talk c c' constructor = do
  rep <- reply c c'
  auth <- optional characterName
  blank

  return $ constructor rep auth

-- | Parse the name of the character which speaks or thinks. According to
-- the ogmarkup syntax, it is surrounded by parentheses.
characterName :: (Stream a, Token a ~ Char, IsString b)
           => OgmarkupParser a b
characterName = do
  char '('
  notFollowedBy (char ')') <?> "Empty character names are not allowed"
  auth <- manyTill anyChar (char ')') <?> "Missing closing )"

  return $ fromString auth

-- | 'reply' parses a 'Ast.Reply'.
reply :: (Stream a, Token a ~ Char, IsString b)
      => Char
      -> Char
      -> OgmarkupParser a (Ast.Reply b)
reply c c' = do char c
                blank
                p1 <- some format
                x <- oneOf ['|', c']

                case x of '|' -> do blank
                                    ws <- some format
                                    char '|' <?> "Missing | to close the with say"
                                    blank
                                    p2 <- many format
                                    char c'

                                    return $ Ast.WithSay p1 ws p2
                          _ -> return $ Ast.Simple p1

-- | See 'Ast.Format'.
format :: (Stream a, Token a ~ Char, IsString b)
       => OgmarkupParser a (Ast.Format b)
format = choice [ raw
                , emph
                , strongEmph
                , quote
                ]

-- | See 'Ast.Raw'.
raw :: (Stream a, Token a ~ Char, IsString b)
    => OgmarkupParser a (Ast.Format b)
raw = Ast.Raw `fmap` some atom

-- | See 'Ast.Emph'.
emph :: (Stream a, Token a ~ Char, IsString b)
     => OgmarkupParser a (Ast.Format b)
emph = do char '*'
          blank
          enterEmph
          f <- format
          fs <- manyTill format (char '*' >> blank)
          leaveEmph
          return . Ast.Emph $ (f:fs)

-- | See 'Ast.StrongEmph'.
strongEmph :: (Stream a, Token a ~ Char, IsString b)
           => OgmarkupParser a (Ast.Format b)
strongEmph = do char '+'
                blank
                enterStrongEmph
                f <- format
                fs <- manyTill format (char '+' >> blank)
                leaveStrongEmph
                return . Ast.StrongEmph $ (f:fs)

-- | See 'Ast.Quote'.
quote :: (Stream a, Token a ~ Char, IsString b)
      => OgmarkupParser a (Ast.Format b)
quote = do openQuote
           enterQuote
           f <- format
           fs <- manyTill format closeQuote
           leaveQuote
           return . Ast.Quote $ (f:fs)

-- | See 'Ast.Atom'.
atom :: (Stream a, Token a ~ Char, IsString b)
     => OgmarkupParser a (Ast.Atom b)
atom = (mark <|> longword <|> word) <* blank

-- | See 'Ast.Word'. This parser does not consume the following spaces, so
--   the caller needs to take care of it.
word :: (Stream a, Token a ~ Char, IsString b)
     => OgmarkupParser a (Ast.Atom b)
word = do notFollowedBy endOfWord
          str <- manyTill anyChar (lookAhead $ try endOfWord)
          return $ Ast.Word (fromString str)
  where
    endOfWord :: (Stream a, Token a ~ Char)
              => OgmarkupParser a ()
    endOfWord = eof <|> (skip spaceChar) <|> (skip $ oneOf specChar) <|> (skip mark)
    specChar = "\"«»`+*[]<>|_\'’"


-- | Wrap a raw string surrounded by @`@ inside a 'Ast.Word'.
--
--   >>> parse longword "" "`test *ei*`"
--   Right (Ast.Word "test *ei*")
--
--   Therefore, @`@ can be used to insert normally reserved symbol
--   inside a generated document.
longword :: (Stream a, Token a ~ Char, IsString b)
         => OgmarkupParser a (Ast.Atom b)
longword = do char '`'
              notFollowedBy (char '`') <?> "empty raw string are not accepted"
              str <- manyTill anyChar (char '`')
              return $ Ast.Word (fromString str)

-- | See 'Ast.Punctuation'. Be aware that 'mark' does not parse the quotes
--   because they are processed 'quote'.
mark :: (Stream a, Token a ~ Char)
     => OgmarkupParser a (Ast.Atom b)
mark = Ast.Punctuation `fmap` (semicolon
        <|> colon
        <|> question
        <|> exclamation
        <|> try longDash
        <|> try dash
        <|> hyphen
        <|> comma
        <|> apostrophe
        <|> try suspensionPoints
        <|> point)
  where
    parseMark p m = p >> return m

    semicolon        = parseMark (char ';') Ast.Semicolon
    colon            = parseMark (char ':') Ast.Colon
    question         = parseMark (char '?') Ast.Question
    exclamation      = parseMark (char '!') Ast.Exclamation
    longDash         = parseMark (string "—" <|> string "---") Ast.LongDash
    dash             = parseMark (string "–" <|> string "--") Ast.Dash
    hyphen           = parseMark (char '-') Ast.Hyphen
    comma            = parseMark (char ',') Ast.Comma
    point            = parseMark (char '.') Ast.Point
    apostrophe       = parseMark (char '\'' <|> char '’') Ast.Apostrophe
    suspensionPoints = parseMark (string ".." >> many (char '.')) Ast.SuspensionPoints

-- | See 'Ast.OpenQuote'. This parser consumes the following blank (see 'blank')
--   and skip the result.
openQuote :: (Stream a, Token a ~ Char)
          => OgmarkupParser a ()
openQuote = do char '«' <|> char '"'
               blank

-- | See 'Ast.CloseQuote'. This parser consumes the following blank (see 'blank')
--   and skip the result.
closeQuote :: (Stream a, Token a ~ Char)
           => OgmarkupParser a ()
closeQuote = do char '»' <|> char '"'
                blank

-- | An aside section (see 'Ast.Aside') is a particular region
--   surrounded by two lines of underscores (at least three).
--   This parser consumes one such line.
asideSeparator :: (Stream a, Token a ~ Char)
               => OgmarkupParser a ()
asideSeparator = do string "__"
                    some (char '_')
                    return ()

-- | The end of a paragraph is the end of the document or two blank lines
-- or an aside separator, that is a line of underscores.
endOfParagraph :: (Stream a, Token a ~ Char)
               => OgmarkupParser a ()
endOfParagraph = try betweenTwoSections
                 <|> asideSeparator
                 <|> eof
  where
    betweenTwoSections :: (Stream a, Token a ~ Char)
                       => OgmarkupParser a ()
    betweenTwoSections = do count 2 $ manyTill spaceChar (eof <|> skip (char '\n'))
                            space


-- | This parser consumes all the white spaces until it finds either an aside
--   surrounding marker (see 'Ast.Aside'), the end of the document or
--   one blank line. The latter marks the end of the current paragraph.
blank :: (Stream a, Token a ~ Char)
      => OgmarkupParser a ()
blank = do skip $ optional (notFollowedBy endOfParagraph >> space)


-- | @skip p@ parses @p@ and skips the result.
skip :: (Stream a)
     => OgmarkupParser a b
     -> OgmarkupParser a ()
skip =  (>> return ())