module Text.HTML.Chunks.Parser
(findChunks,
Chunk(..),
Content(..),
isText,
isVariable
)
where
import Text.ParserCombinators.Parsec
import Data.Either
data Chunk = Chunk String [Content]
deriving (Show, Eq)
data Content = Text String
| Variable String
deriving (Show, Eq)
isText :: Content -> Bool
isText (Text _) = True
isText _ = False
isVariable :: Content -> Bool
isVariable (Variable _) = True
isVariable _ = False
data ParsingUnit = ChunkUnit Chunk
| Chr Char
| Var String
deriving (Show, Eq)
findChunks :: String -> Either ParseError [Chunk]
findChunks = runParser parseChunks () "Text.HTML.Chunks.Parser"
parseChunks :: Parser [Chunk]
parseChunks = do { name <- parseBegin
; bodyChunks <- try parseBody
; let (body, chunks) = buildBodyAndChunks bodyChunks [] []
; return ((Chunk name body) : (reverse chunks))
}
convertParseUnit :: ParsingUnit -> [Content] -> [Chunk] -> ([Content], [Chunk])
convertParseUnit (Chr c) ((Text t):contents) chunks
= (((Text (c:t)):contents), chunks)
convertParseUnit pu@(ChunkUnit _) contents chunks
= convertParseUnit' pu contents chunks
convertParseUnit pu ((Text t):contents) chunks
= convertParseUnit' pu ((Text (reverse t)):contents) chunks
convertParseUnit pu contents chunks = convertParseUnit' pu contents chunks
convertParseUnit' :: ParsingUnit -> [Content] -> [Chunk] -> ([Content], [Chunk])
convertParseUnit' (Chr c) contents chunks = (((Text [c]):contents), chunks)
convertParseUnit' (Var v) contents chunks = (((Variable v):contents), chunks)
convertParseUnit' (ChunkUnit cu) contents chunks = (contents, (cu:chunks))
buildBodyAndChunks :: [ParsingUnit] -> [Content] -> [Chunk] -> ([Content], [Chunk])
buildBodyAndChunks [] ((Text t):body) chunks = (reverse ((Text (reverse t)):body), chunks)
buildBodyAndChunks [] body chunks = (reverse body, chunks)
buildBodyAndChunks (pu:pus) body chunks = buildBodyAndChunks pus body' chunks'
where
(body', chunks') = convertParseUnit pu body chunks
parseBegin :: Parser String
parseBegin = do { string "<!--"
; many1 space
; string "BEGIN"
; many1 space
; name <- many1 letter
; many1 space
; string "-->"
; return name
}
parseEnd :: Parser ()
parseEnd = do { string "<!--"
; many1 space
; string "END"
; many1 space
; string "-->"
; return ()
}
parseVar :: Parser String
parseVar = do { string "<!--"
; many1 space
; string "##"
; name <- many1 (letter <|> char '_')
; string "##"
; many1 space
; string "-->"
; return name
}
<|> do { string "##"
; name <- many1 (letter <|> char '_')
; string "##"
; return name
}
parseBody :: Parser [ParsingUnit]
parseBody = do { try parseEnd
; return []
}
<|> do { var <- try parseVar
; body <- parseBody
; return ((Var var) : body)
}
<|> do { chunks <- try parseChunks
; body <- parseBody
; return ((map ChunkUnit chunks) ++ body)
}
<|> do { c <- anyChar
; body <- parseBody
; return ((Chr c) : body)
}