-- | Markup parsing. module Text.HMarkup.Parse where import Text.HMarkup.Entities import Text.HMarkup.Types import Control.Monad import Data.Char import Network.URI import Numeric import Text.ParserCombinators.Parsec parseMarkup :: Monad m => String -> String -> m Doc parseMarkup source input = case parse pMarkup source input of Left err -> fail $ show err Right x -> return x pMarkup :: Parser Doc pMarkup = liftM Doc $ skipWhite >> many pBlock pBlock :: Parser Block pBlock = do b <- try pChunk <|> try pHeader <|> try pItemList <|> pPara skipWhite return b pChunk :: Parser Block pChunk = do string "{{{" sps x <- pIdent sps string ":" optional nl s <- manyTill anyCharToUnixNL (try (string "}}}")) white return $ Chunk x s anyCharToUnixNL :: Parser Char anyCharToUnixNL = (nl >> return '\n') <|> anyChar pHeader :: Parser Block pHeader = do n <- liftM length $ many1 (char '=') x <- pTextsTill (try (sps >> count n (char '=') >> endBlock)) return $ Header n x pItemList :: Parser Block pItemList = liftM ItemList (many1 pItem) pItem :: Parser [Text] pItem = char '-' >> sps >> pTextsTill (try endItem <|> try endBlock) where endItem = sps >> nl >> lookAhead (char '-') >> return () pPara :: Parser Block pPara = liftM (Para . concat) $ manyTill1 pText (try endBlock) endBlock :: Parser () endBlock = try (sps >> nl >> sps >> nl >> skipWhite) <|> (white >> eof) pText :: Parser [Text] pText = do s <- white t <- try pRef <|> try pEmph <|> try pTT <|> pWord return $ (if s then [WhiteSpace] else []) ++ [t] pTextsTill :: Parser a -> Parser [Text] pTextsTill end = liftM concat $ manyTill pText end pEmph :: Parser Text pEmph = char '*' >> liftM Emph (pTextsTill (char '*')) pTT :: Parser Text pTT = string "``" >> liftM TT (pTextsTill (string "``")) pRef :: Parser Text pRef = char '[' >> liftM2 Ref (white >> pURI) (pTextsTill (char ']')) pWord :: Parser Text pWord = liftM Word pToken pIdent :: Parser String pIdent = many1 (alphaNum <|> char '_') pURI :: Parser URI pURI = do t <- pToken case parseURIReference t of Nothing -> fail $ "Malformed URI: " ++ show t Just uri -> return uri -- -- * Tokens -- pToken :: Parser String pToken = liftM concat $ many1 pTokenPiece pTokenPiece :: Parser String pTokenPiece = liftM (:[]) (try pEsc) <|> try pEntity <|> liftM return (satisfy (\c -> not (isSpace c || isSpecial c))) isSpecial :: Char -> Bool isSpecial c = c `elem` "[]*{}`" pEsc :: Parser Char pEsc = char '\\' >> anyChar pEntity :: Parser String pEntity = do char '&' x <- liftM return numEntity <|> namedEntity char ';' return x numEntity :: Parser Char numEntity = char '#' >> liftM chr ((char 'x' >> pHexInt) <|> pDecInt) namedEntity :: Parser String namedEntity = many1 alphaNum >>= getEntity -- -- * Parsec utilities -- nl :: Parser () nl = (char '\n' >> optional (char '\r')) <|> (char '\r' >> optional (char '\n')) sp :: Parser () sp = oneOf [' ','\t'] >> return () sps :: Parser () sps = skipMany sp white :: Parser Bool white = liftM (not . null) $ many (sp <|> nl) skipWhite :: Parser () skipWhite = skipMany (sp <|> nl) manyTill1 :: GenParser tok st a -> GenParser tok st end -> GenParser tok st [a] manyTill1 p end = liftM2 (:) p (manyTill p end) followedBy :: Monad m => m a -> m b -> m a followedBy x y = x >>= \v -> y >> return v pDecInt :: Parser Int pDecInt = do ds <- many1 digit case readDec ds of [(x,"")] -> return x _ -> error $ "pDecInt: " ++ show ds pHexInt :: Parser Int pHexInt = do ds <- many1 hexDigit case readHex ds of [(x,"")] -> return x _ -> error $ "pHexInt: " ++ show ds inFirstColumn :: GenParser tok st a -> GenParser tok st a inFirstColumn p = do c <- liftM sourceColumn getPosition if c == 1 then p else pzero