{-| Module : Text.Ogmarkup.Private.Parser Copyright : (c) Ogma Project, 2016 License : MIT Stability : experimental This module provides several parsers that can be used in order to extract the 'Ast' of an Ogmarkup document. Please consider that only 'document' should be used outside this module. -} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeFamilies #-} module Text.Ogmarkup.Private.Parser where import Control.Monad.State import Data.String import Data.Void (Void) import Text.Megaparsec import Text.Megaparsec.Char import qualified Text.Ogmarkup.Private.Ast as Ast -- | Keep track of the currently opened formats. data ParserState = ParserState { -- | Already parsing text with emphasis parseWithEmph :: Bool -- | Already parsing text with strong -- emphasis , parseWithStrongEmph :: Bool -- | Already parsing a quote , parseWithinQuote :: Bool } -- | An ogmarkup parser processes 'Char' tokens and carries a 'ParserState'. type OgmarkupParser a = StateT ParserState (Parsec Void a) -- | Update the 'ParserState' to guard against nested emphasis. enterEmph :: Stream a => OgmarkupParser a () enterEmph = do st <- get if parseWithEmph st then fail "guard against nested emphasis" else do put st { parseWithEmph = True } return () -- | Update the 'ParserState' to be able to parse input with emphasis -- again. leaveEmph :: Stream a => OgmarkupParser a () leaveEmph = do st <- get if parseWithEmph st then do put st { parseWithEmph = False } return () else fail "cannot leave emphasis when you did not enter" -- | Update the 'ParserState' to guard against nested strong emphasis. enterStrongEmph :: Stream a => OgmarkupParser a () enterStrongEmph = do st <- get if parseWithStrongEmph st then fail "guard against nested strong emphasis" else do put st { parseWithStrongEmph = True } return () -- | Update the 'ParserState' to be able to parse input with strong emphasis -- again. leaveStrongEmph :: Stream a => OgmarkupParser a () leaveStrongEmph = do st <- get if parseWithStrongEmph st then do put st { parseWithStrongEmph = False } return () else fail "cannot leave strong emphasis when you did not enter" -- | Update the 'ParserState' to guard against nested quoted inputs. enterQuote :: Stream a => OgmarkupParser a () enterQuote = do st <- get if parseWithinQuote st then fail "guard against nested quotes" else do put st { parseWithinQuote = True } return () -- | Update the 'ParserState' to be able to parse an input -- surrounded by quotes again. leaveQuote :: Stream a => OgmarkupParser a () leaveQuote = do st <- get if parseWithinQuote st then do put st { parseWithinQuote = False } return () else fail "cannot leave quote when you did not enter" -- | A initial ParserState instance to be used at the begining of -- a document parsing. initParserState :: ParserState initParserState = ParserState False False False -- | A wrapper around the 'runParser' function of Megaparsec. It uses -- 'initParserState' as an initial state. parse :: Stream a => OgmarkupParser a b -> String -> a -> Either (ParseError (Token a) Void) b parse ogma = runParser (evalStateT ogma initParserState) -- | Try its best to parse an ogmarkup document. When it encounters an -- error, it returns an Ast and the remaining input. -- -- See 'Ast.Document'. document :: (Stream a, Token a ~ Char, IsString (Tokens a), IsString b) => OgmarkupParser a (Ast.Document b) document = doc [] where doc :: (Stream a, Token a ~ Char, IsString (Tokens a), IsString b) => Ast.Document b -> OgmarkupParser a (Ast.Document b) doc ast = do space sects <- many (try section) let ast' = ast `mappend` sects (eof >> return ast') <|> (recover ast' >>= doc) recover :: (Stream a, Token a ~ Char, IsString (Tokens a), IsString b) => Ast.Document b -> OgmarkupParser a (Ast.Document b) recover ast = do failure <- someTill anyChar (char '\n') return $ ast `mappend` [Ast.Failing $ fromString failure] -- | See 'Ast.Section'. section :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Section b) section = aside <|> story -- | See 'Ast.Aside'. aside :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Section b) aside = do asideSeparator cls <- optional asideClass space ps <- some (paragraph <* space) asideSeparator manyTill space (skip (char '\n') <|> eof) space return $ Ast.Aside cls ps where asideClass :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a b asideClass = do cls <- some letterChar asideSeparator return $ fromString cls -- | See 'Ast.Story'. story :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Section b) story = Ast.Story `fmap` some (paragraph <* space) -- | See 'Ast.Paragraph'. paragraph :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Paragraph b) paragraph = some component <* blank -- | See 'Ast.Component'. component :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Component b) component = try (dialogue <|> thought <|> teller) <|> illformed -- | See 'Ast.IllFormed'. illformed :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Component b) illformed = Ast.IllFormed `fmap` restOfParagraph -- | Parse the rest of the current paragraph with no regards for the -- ogmarkup syntax. This Parser is used when the document is ill-formed, to -- find a new point of synchronization. restOfParagraph :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a b restOfParagraph = do lookAhead anyChar notFollowedBy endOfParagraph str <- manyTill anyChar (lookAhead $ try endOfParagraph) return $ fromString str -- | See 'Ast.Teller'. teller :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Component b) teller = Ast.Teller `fmap` some format -- | See 'Ast.Dialogue'. dialogue :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Component b) dialogue = talk '[' ']' Ast.Dialogue -- | See 'Ast.Thought'. thought :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Component b) thought = talk '<' '>' Ast.Thought -- | @'talk' c c' constr@ wraps a reply surrounded by @c@ and @c'@ inside -- @constr@ (either 'Ast.Dialogue' or 'Ast.Thought'). talk :: (Stream a, Token a ~ Char, IsString b) => Char -- ^ A character to mark the begining of a reply -> Char -- ^ A character to mark the end of a reply -> (Ast.Reply b -> Maybe b -> Ast.Component b) -- ^ Either 'Ast.Dialogue' or 'Ast.Thought' according to the situation -> OgmarkupParser a (Ast.Component b) talk c c' constructor = do rep <- reply c c' auth <- optional characterName blank return $ constructor rep auth -- | Parse the name of the character which speaks or thinks. According to -- the ogmarkup syntax, it is surrounded by parentheses. characterName :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a b characterName = do char '(' notFollowedBy (char ')') auth <- manyTill anyChar (char ')') return $ fromString auth -- | 'reply' parses a 'Ast.Reply'. reply :: (Stream a, Token a ~ Char, IsString b) => Char -> Char -> OgmarkupParser a (Ast.Reply b) reply c c' = do char c blank p1 <- some format x <- oneOf ['|', c'] case x of '|' -> do blank ws <- some format char '|' blank p2 <- many format char c' return $ Ast.WithSay p1 ws p2 _ -> return $ Ast.Simple p1 -- | See 'Ast.Format'. format :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Format b) format = choice [ raw , emph , strongEmph , quote ] -- | See 'Ast.Raw'. raw :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Format b) raw = Ast.Raw `fmap` some atom -- | See 'Ast.Emph'. emph :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Format b) emph = do char '*' blank enterEmph f <- format fs <- manyTill format (char '*' >> blank) leaveEmph return . Ast.Emph $ (f:fs) -- | See 'Ast.StrongEmph'. strongEmph :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Format b) strongEmph = do char '+' blank enterStrongEmph f <- format fs <- manyTill format (char '+' >> blank) leaveStrongEmph return . Ast.StrongEmph $ (f:fs) -- | See 'Ast.Quote'. quote :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Format b) quote = do openQuote enterQuote f <- format fs <- manyTill format closeQuote leaveQuote return . Ast.Quote $ (f:fs) -- | See 'Ast.Atom'. atom :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Atom b) atom = (word <|> mark <|> longword) <* blank {-# INLINE atom #-} -- | See 'Ast.Word'. This parser does not consume the following spaces, so -- the caller needs to take care of it. word :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Atom b) word = do notFollowedBy endOfWord str <- manyTill anyChar (lookAhead $ try endOfWord) return $ Ast.Word (fromString str) where endOfWord :: (Stream a, Token a ~ Char) => OgmarkupParser a () endOfWord = eof <|> skip spaceChar <|> skip (satisfy specChar) specChar :: Char -> Bool specChar x = case x of '"' -> True '«' -> True '»' -> True '`' -> True '+' -> True '*' -> True '[' -> True ']' -> True '<' -> True '>' -> True '|' -> True '_' -> True '\'' -> True '’' -> True '.' -> True ',' -> True ';' -> True '-' -> True '–' -> True '—' -> True '!' -> True '?' -> True ':' -> True _ -> False {-# INLINE word #-} -- | Wrap a raw string surrounded by @`@ inside a 'Ast.Word'. -- -- >>> parse longword "" "`test *ei*`" -- Right (Ast.Word "test *ei*") -- -- Therefore, @`@ can be used to insert normally reserved symbol -- inside a generated document. longword :: (Stream a, Token a ~ Char, IsString b) => OgmarkupParser a (Ast.Atom b) longword = do char '`' notFollowedBy (char '`') str <- manyTill anyChar (char '`') return $ Ast.Word (fromString str) {-# INLINE longword #-} -- | See 'Ast.Punctuation'. Be aware that 'mark' does not parse the quotes -- because they are processed 'quote'. mark :: (Stream a, Token a ~ Char) => OgmarkupParser a (Ast.Atom b) mark = Ast.Punctuation `fmap` (semicolon <|> colon <|> question <|> exclamation <|> try longDash <|> try dash <|> hyphen <|> comma <|> apostrophe <|> try suspensionPoints <|> point) where parseMark p m = p >> return m semicolon = parseMark (char ';') Ast.Semicolon colon = parseMark (char ':') Ast.Colon question = parseMark (char '?') Ast.Question exclamation = parseMark (char '!') Ast.Exclamation longDash = parseMark (char '—' <|> (char '-' >> char '-' >> char '-')) Ast.LongDash dash = parseMark (char '–' <|> (char '-' >> char '-')) Ast.Dash hyphen = parseMark (char '-') Ast.Hyphen comma = parseMark (char ',') Ast.Comma point = parseMark (char '.') Ast.Point apostrophe = parseMark (char '\'' <|> char '’') Ast.Apostrophe suspensionPoints = parseMark (char '.' >> char '.' >> many (char '.')) Ast.SuspensionPoints {-# INLINE mark #-} -- | See 'Ast.OpenQuote'. This parser consumes the following blank (see 'blank') -- and skip the result. openQuote :: (Stream a, Token a ~ Char) => OgmarkupParser a () openQuote = do char '«' <|> char '"' blank -- | See 'Ast.CloseQuote'. This parser consumes the following blank (see 'blank') -- and skip the result. closeQuote :: (Stream a, Token a ~ Char) => OgmarkupParser a () closeQuote = do char '»' <|> char '"' blank -- | An aside section (see 'Ast.Aside') is a particular region -- surrounded by two lines of underscores (at least three). -- This parser consumes one such line. asideSeparator :: (Stream a, Token a ~ Char) => OgmarkupParser a () asideSeparator = do char '_' char '_' takeWhile1P Nothing (== '_') return () -- | The end of a paragraph is the end of the document or two blank lines -- or an aside separator, that is a line of underscores. endOfParagraph :: (Stream a, Token a ~ Char) => OgmarkupParser a () endOfParagraph = try betweenTwoSections <|> asideSeparator <|> eof where betweenTwoSections :: (Stream a, Token a ~ Char) => OgmarkupParser a () betweenTwoSections = do count 2 $ manyTill spaceChar (eof <|> skip (char '\n')) space {-# INLINE endOfParagraph #-} -- | This parser consumes all the white spaces until it finds either an aside -- surrounding marker (see 'Ast.Aside'), the end of the document or -- one blank line. The latter marks the end of the current paragraph. blank :: (Stream a, Token a ~ Char) => OgmarkupParser a () blank = skip $ optional (notFollowedBy endOfParagraph >> space) {-# INLINE blank #-} -- | @skip p@ parses @p@ and skips the result. skip :: (Stream a) => OgmarkupParser a b -> OgmarkupParser a () skip = (>> return ()) {-# INLINE skip #-}