module Text.HMarkup.Parse where
import Text.HMarkup.Entities
import Text.HMarkup.Types
import Control.Monad
import Data.Char
import Network.URI
import Numeric
import Text.ParserCombinators.Parsec
parseMarkup :: Monad m => String -> String -> m Doc
parseMarkup source input =
case parse pMarkup source input of
Left err -> fail $ show err
Right x -> return x
pMarkup :: Parser Doc
pMarkup = liftM Doc $ skipWhite >> many pBlock
pBlock :: Parser Block
pBlock = do b <- try pChunk <|> try pHeader <|> try pItemList <|> pPara
skipWhite
return b
pChunk :: Parser Block
pChunk = do string "{{{"
sps
x <- pIdent
sps
string ":"
optional nl
s <- manyTill anyCharToUnixNL (try (string "}}}"))
white
return $ Chunk x s
anyCharToUnixNL :: Parser Char
anyCharToUnixNL = (nl >> return '\n') <|> anyChar
pHeader :: Parser Block
pHeader = do n <- liftM length $ many1 (char '=')
x <- pTextsTill (try (sps >> count n (char '=') >> endBlock))
return $ Header n x
pItemList :: Parser Block
pItemList = liftM ItemList (many1 pItem)
pItem :: Parser [Text]
pItem = char '-' >> sps >> pTextsTill (try endItem <|> try endBlock)
where endItem = sps >> nl >> lookAhead (char '-') >> return ()
pPara :: Parser Block
pPara = liftM (Para . concat) $ manyTill1 pText (try endBlock)
endBlock :: Parser ()
endBlock = try (sps >> nl >> sps >> nl >> skipWhite) <|> (white >> eof)
pText :: Parser [Text]
pText = do s <- white
t <- try pRef <|> try pEmph <|> try pTT <|> pWord
return $ (if s then [WhiteSpace] else []) ++ [t]
pTextsTill :: Parser a -> Parser [Text]
pTextsTill end = liftM concat $ manyTill pText end
pEmph :: Parser Text
pEmph = char '*' >> liftM Emph (pTextsTill (char '*'))
pTT :: Parser Text
pTT = string "``" >> liftM TT (pTextsTill (string "``"))
pRef :: Parser Text
pRef = char '[' >> liftM2 Ref (white >> pURI) (pTextsTill (char ']'))
pWord :: Parser Text
pWord = liftM Word pToken
pIdent :: Parser String
pIdent = many1 (alphaNum <|> char '_')
pURI :: Parser URI
pURI = do t <- pToken
case parseURIReference t of
Nothing -> fail $ "Malformed URI: " ++ show t
Just uri -> return uri
pToken :: Parser String
pToken = liftM concat $ many1 pTokenPiece
pTokenPiece :: Parser String
pTokenPiece = liftM (:[]) (try pEsc)
<|> try pEntity
<|> liftM return (satisfy (\c -> not (isSpace c || isSpecial c)))
isSpecial :: Char -> Bool
isSpecial c = c `elem` "[]*{}`"
pEsc :: Parser Char
pEsc = char '\\' >> anyChar
pEntity :: Parser String
pEntity = do char '&'
x <- liftM return numEntity <|> namedEntity
char ';'
return x
numEntity :: Parser Char
numEntity = char '#' >> liftM chr ((char 'x' >> pHexInt) <|> pDecInt)
namedEntity :: Parser String
namedEntity = many1 alphaNum >>= getEntity
nl :: Parser ()
nl = (char '\n' >> optional (char '\r'))
<|> (char '\r' >> optional (char '\n'))
sp :: Parser ()
sp = oneOf [' ','\t'] >> return ()
sps :: Parser ()
sps = skipMany sp
white :: Parser Bool
white = liftM (not . null) $ many (sp <|> nl)
skipWhite :: Parser ()
skipWhite = skipMany (sp <|> nl)
manyTill1 :: GenParser tok st a -> GenParser tok st end -> GenParser tok st [a]
manyTill1 p end = liftM2 (:) p (manyTill p end)
followedBy :: Monad m => m a -> m b -> m a
followedBy x y = x >>= \v -> y >> return v
pDecInt :: Parser Int
pDecInt = do ds <- many1 digit
case readDec ds of
[(x,"")] -> return x
_ -> error $ "pDecInt: " ++ show ds
pHexInt :: Parser Int
pHexInt = do ds <- many1 hexDigit
case readHex ds of
[(x,"")] -> return x
_ -> error $ "pHexInt: " ++ show ds
inFirstColumn :: GenParser tok st a -> GenParser tok st a
inFirstColumn p = do c <- liftM sourceColumn getPosition
if c == 1 then p else pzero