-- | Markup parsing.
module Text.HMarkup.Parse where

import Text.HMarkup.Entities
import Text.HMarkup.Types

import Control.Monad
import Data.Char
import Network.URI
import Numeric
import Text.ParserCombinators.Parsec



parseMarkup :: Monad m => String -> String -> m Doc
parseMarkup source input = 
    case parse pMarkup source input of
      Left err -> fail $ show err
      Right x  -> return x

pMarkup :: Parser Doc
pMarkup = liftM Doc $ skipWhite >> many pBlock

pBlock :: Parser Block
pBlock = do b <- try pChunk <|> try pHeader <|> try pItemList <|> pPara
            skipWhite
            return b

pChunk :: Parser Block
pChunk = do string "{{{"
            sps
            x <- pIdent
            sps
            string ":"
            optional nl
            s <- manyTill anyCharToUnixNL (try (string "}}}"))
            white
            return $ Chunk x s

anyCharToUnixNL :: Parser Char
anyCharToUnixNL = (nl >> return '\n') <|> anyChar

pHeader :: Parser Block
pHeader = do n <- liftM length $ many1 (char '=')
             x <- pTextsTill (try (sps >> count n (char '=') >> endBlock))
             return $ Header n x

pItemList :: Parser Block
pItemList = liftM ItemList (many1 pItem)

pItem :: Parser [Text]
pItem = char '-' >> sps >> pTextsTill (try endItem <|> try endBlock)
  where endItem = sps >> nl >> lookAhead (char '-') >> return ()

pPara :: Parser Block
pPara = liftM (Para . concat) $ manyTill1 pText (try endBlock)

endBlock :: Parser ()
endBlock = try (sps >> nl >> sps >> nl >> skipWhite) <|> (white >> eof)

pText :: Parser [Text]
pText = do s <- white
           t <- try pRef <|> try pEmph <|> try pTT <|> pWord
           return $ (if s then [WhiteSpace] else []) ++ [t]

pTextsTill :: Parser a -> Parser [Text]
pTextsTill end = liftM concat $ manyTill pText end

pEmph :: Parser Text
pEmph = char '*' >> liftM Emph (pTextsTill (char '*'))

pTT :: Parser Text
pTT = string "``" >> liftM TT (pTextsTill (string "``"))

pRef :: Parser Text
pRef = char '[' >> liftM2 Ref (white >> pURI) (pTextsTill (char ']'))

pWord :: Parser Text
pWord = liftM Word pToken

pIdent :: Parser String
pIdent = many1 (alphaNum <|> char '_')

pURI :: Parser URI
pURI = do t <- pToken
          case parseURIReference t of
            Nothing -> fail $ "Malformed URI: " ++ show t
            Just uri -> return uri

--
-- * Tokens
--

pToken :: Parser String
pToken = liftM concat $ many1 pTokenPiece

pTokenPiece :: Parser String
pTokenPiece = liftM (:[]) (try pEsc)
              <|> try pEntity
              <|> liftM return (satisfy (\c -> not (isSpace c || isSpecial c)))

isSpecial :: Char -> Bool
isSpecial c = c `elem` "[]*{}`"

pEsc :: Parser Char
pEsc = char '\\' >> anyChar

pEntity :: Parser String
pEntity = do char '&'
             x <- liftM return numEntity <|> namedEntity
             char ';'
             return x

numEntity :: Parser Char
numEntity = char '#' >> liftM chr ((char 'x' >> pHexInt) <|> pDecInt)

namedEntity :: Parser String
namedEntity = many1 alphaNum >>= getEntity

--
-- * Parsec utilities
--

nl :: Parser ()
nl = (char '\n' >> optional (char '\r')) 
     <|> (char '\r' >> optional (char '\n'))

sp :: Parser ()
sp = oneOf [' ','\t'] >> return ()

sps :: Parser ()
sps = skipMany sp

white :: Parser Bool
white = liftM (not . null) $ many (sp <|> nl)

skipWhite :: Parser ()
skipWhite = skipMany (sp <|> nl)

manyTill1 :: GenParser tok st a -> GenParser tok st end -> GenParser tok st [a]
manyTill1 p end = liftM2 (:) p (manyTill p end)

followedBy :: Monad m => m a -> m b -> m a
followedBy x y = x >>= \v -> y >> return v


pDecInt :: Parser Int
pDecInt = do ds <- many1 digit
             case readDec ds of
               [(x,"")] -> return x
               _ -> error $ "pDecInt: " ++ show ds

pHexInt :: Parser Int
pHexInt = do ds <- many1 hexDigit
             case readHex ds of
               [(x,"")] -> return x
               _ -> error $ "pHexInt: " ++ show ds

inFirstColumn :: GenParser tok st a -> GenParser tok st a
inFirstColumn p = do c <- liftM sourceColumn getPosition
                     if c == 1 then p else pzero