-- | Parse our DSL module Text.Madlibs.Ana.Parse ( parseTok , parseTokF , parseInclusions , parseTreeF , parseTokM ) where import Text.Madlibs.Internal.Types import Text.Madlibs.Internal.Utils import Text.Madlibs.Ana.ParseUtils import Text.Madlibs.Cata.SemErr import qualified Data.Text as T import Text.Megaparsec import Text.Megaparsec.Text import qualified Text.Megaparsec.Lexer as L import Control.Monad import qualified Data.Map as M import Control.Monad.State import Data.Composition import Data.Maybe -- | Parse a lexeme, aka deal with whitespace nicely. lexeme :: Parser a -> Parser a lexeme = L.lexeme spaceConsumer -- | space consumer with awareness for comments spaceConsumer :: Parser () spaceConsumer = L.space (void . some $ spaceChar) (L.skipLineComment "#") (L.skipBlockComment "{#" "#}") -- | parse a symbol, i.e. string plus surrouding whitespace symbol :: String -> Parser String symbol = L.symbol spaceConsumer -- | Parse a number/probability float :: Parser Prob float = lexeme L.float <|> (fromIntegral <$> integer) "Number" -- | Parse an integer integer :: Parser Integer integer = lexeme (L.integer {--<|> parseNumber--}) "Integer" -- | Make sure definition blocks start un-indented nonIndented = L.nonIndented spaceConsumer -- | Make contents of definition blocks are indented. indentGuard = L.indentGuard spaceConsumer GT (unsafePos 4) -- | Parse between quotes quote :: Parser a -> Parser a quote = between .$ (char '"') -- | Parse a keyword keyword :: String -> Parser String keyword str = (char ':') >> (symbol str) "keyword" -- | Parse a var var :: Parser Int var = fromIntegral <$> do char '$' integer "variable" -- | Parse the `define` keyword. define :: Parser () define = void (nonIndented (keyword "define")) "define block" -- | Parse the `include` keyword. include :: Parser () include = void (nonIndented (keyword "include")) "include" -- | Parse the `:return` keyword. main :: Parser () main = void (nonIndented (keyword "return")) "return block" -- | Parse a template name (what follows a `:define` or `return` block) name :: Parser String name = lexeme (some (letterChar <|> oneOf ("-/" :: String))) "template name" -- TODO make this broader in terms of what it includes -- | Parse a modifier modifier :: Parser (T.Text -> T.Text) modifier = do char '.' str <- foldr (<|>) (pure "") $ map (try . string) ["to_upper", "to_lower", "reverse", "reverse_words", "oulipo"] pure (fromMaybe id (M.lookup str modifierList)) "modifier" -- | Parse template into a `PreTok` of referents and strings preStr :: [T.Text] -> Parser PreTok preStr ins = do { n <- name ; mod <- many modifier ; spaceConsumer ; pure $ Name (T.pack n) (foldr (.) id mod) } <|> do { v <- var ; mod <- many modifier ; spaceConsumer ; pure . PreTok . (foldr (.) id mod) $ ins `access` (v-1) -- ins !! (v - 1) } <|> do { s <- quote (many $ noneOf ("\n\"" :: String)) ; mod <- many modifier ; spaceConsumer ; pure . PreTok . (foldr (.) id mod) . T.pack $ s } "string or function name" -- | Parse a probability/corresponding template pair :: [T.Text] -> Parser (Prob, [PreTok]) pair ins = do indentGuard p <- float str <- some (preStr ins) pure (p, str) "Probability-text pair" -- | Parse an `include` inclusions :: Parser [String] inclusions = many . try $ do include str <- name string ".mad" pure (str ++ ".mad") -- | Parse a `define` block definition :: [T.Text] -> Parser (Key, [(Prob, [PreTok])]) definition ins = do define str <- name val <- fmap normalize . some $ pair ins pure (T.pack str, val) "define block" -- | Parse the `:return` block final :: [T.Text] -> Parser [(Prob, [PreTok])] final ins = do main val <- fmap normalize . some $ pair ins pure val -- | Parse the program in terms of `PreTok` and the `Key`s to link them. program :: [T.Text] -> Parser [(Key, [(Prob, [PreTok])])] program ins = sortKeys <$> (checkSemantics =<< do inclusions p <- many (try (definition ins) <|> ((,) "Return" <$> final ins)) lexeme eof pure p) -- | Parse text as a token + context (aka a reader monad with all the other functions) parseTokM :: [T.Text] -> Parser (Context RandTok) parseTokM ins = build <$> program ins -- | Parse text as token + context parseTreeM :: [T.Text] -> Parser (Context RandTok) parseTreeM ins = buildTree <$> program ins -- | Parse text as a list of functions parseTokF :: FilePath -> [(Key, RandTok)] -> [T.Text] -> T.Text -> Either (ParseError Char Dec) [(Key, RandTok)] parseTokF filename state ins f = (flip execState (filterTemplate state)) <$> runParser (parseTokM ins) filename f where filterTemplate = map (\(i,j) -> if i == "Return" then (strip filename, j) else (i,j)) -- TODO fix the extras -- | Parse text as a list of tokens, suitable for printing as a tree. parseTreeF :: FilePath -> [(Key, RandTok)] -> [T.Text] -> T.Text -> Either (ParseError Char Dec) [(Key, RandTok)] parseTreeF filename state ins f = (flip execState (filterTemplate state)) <$> runParser (parseTreeM ins) filename f where filterTemplate = map (\(i,j) -> if i == "Return" then (strip filename, j) else (i,j)) -- | Parse text given a context -- -- > import qualified Data.Text.IO as TIO -- > -- > getParsed = do -- > f <- TIO.readFile "template.mad" -- > parseTok "filename.mad" [] [] f parseTok :: FilePath -- ^ File name to use for parse errors -> [(Key, RandTok)] -- ^ Context, i.e. other random data paired with a key. -> [T.Text] -- ^ list of variables to substitute into the template -> T.Text -- ^ Actaul text to parse -> Either (ParseError Char Dec) RandTok -- ^ Result parseTok = (fmap takeTemplate) .*** parseTokF -- | Parse text as a token, suitable for printing as a tree.. parseTree :: FilePath -> [(Key, RandTok)] -> [T.Text] -> T.Text -> Either (ParseError Char Dec) RandTok parseTree = (fmap takeTemplate) .*** parseTreeF -- | Parse inclustions parseInclusions :: FilePath -> T.Text -> Either (ParseError Char Dec) [String] parseInclusions = runParser inclusions