module ListT.HTMLParser
(
Parser,
Error,
ErrorDetails(..),
run,
eoi,
token,
openingTag,
closingTag,
text,
comment,
html,
many1,
manyTill,
skipTill,
total,
)
where
import BasePrelude hiding (uncons, cons)
import MTLPrelude hiding (Error, shift)
import Control.Monad.Trans.Either hiding (left, right)
import ListT (ListT)
import Data.Text (Text)
import qualified Data.Text.Lazy.Builder as Text (Builder)
import qualified Data.Text.Lazy.Builder as Text.Builder
import qualified ListT as L
import qualified HTMLTokenizer.Parser as HT
import qualified ListT.HTMLParser.Renderer as Renderer
newtype Parser m a =
Parser { unwrap :: EitherT Error (StateT (ListT m HT.Token, [HT.Token]) m) a }
deriving (Functor, Applicative, MonadError Error)
type Error =
Maybe ErrorDetails
data ErrorDetails =
ErrorDetails_Message Text |
ErrorDetails_UnexpectedToken |
ErrorDetails_EOI
deriving (Show, Eq)
instance Monad m => Monad (Parser m) where
return =
Parser . return
(>>=) a b =
Parser $ unwrap a >>= unwrap . b
fail a =
throwError $ Just $ ErrorDetails_Message $ fromString a
instance Monad m => Alternative (Parser m) where
empty =
Parser $ EitherT $ return $ Left Nothing
(<|>) a b =
Parser $ EitherT $ StateT $ \(incoming, backtrack) -> do
(aResult, (incoming', backtrack')) <- flip runStateT (incoming, []) $ runEitherT $ unwrap $ a
(result'', (incoming'', backtrack'')) <-
case aResult of
Left _ -> do
flip runStateT (foldl' (flip L.cons) incoming' backtrack', []) $ runEitherT $ unwrap $ b
Right aResult -> do
return (Right aResult, (incoming', backtrack'))
return (result'', (incoming'', backtrack'' <> backtrack))
instance Monad m => MonadPlus (Parser m) where
mzero = empty
mplus = (<|>)
run :: Monad m => Parser m a -> ListT m HT.Token -> m (Either Error a)
run p l =
flip evalStateT (l, []) $ runEitherT $ unwrap $ p
eoi :: Monad m => Parser m ()
eoi =
token $> () <|> pure ()
token :: Monad m => Parser m HT.Token
token =
Parser $ EitherT $ StateT $ \(incoming, backtrack) ->
liftM (maybe (Left (Just ErrorDetails_EOI), (incoming, backtrack))
(\(a, incoming') -> (Right a, (incoming', a : backtrack)))) $
L.uncons incoming
openingTag :: Monad m => Parser m HT.OpeningTag
openingTag =
token >>= \case
HT.Token_OpeningTag x -> return x
_ -> throwError (Just ErrorDetails_UnexpectedToken)
closingTag :: Monad m => Parser m HT.ClosingTag
closingTag =
token >>= \case
HT.Token_ClosingTag x -> return x
_ -> throwError (Just ErrorDetails_UnexpectedToken)
text :: Monad m => Parser m Text
text =
token >>= \case
HT.Token_Text x -> return x
_ -> throwError (Just ErrorDetails_UnexpectedToken)
comment :: Monad m => Parser m Text
comment =
token >>= \case
HT.Token_Comment x -> return x
_ -> throwError (Just ErrorDetails_UnexpectedToken)
many1 :: Monad m => Parser m a -> Parser m [a]
many1 a =
(:) <$> a <*> many a
manyTill :: Monad m => Parser m a -> Parser m b -> Parser m ([a], b)
manyTill a b =
fix $ \loop ->
([],) <$> b <|>
(\a (al, b) -> (a : al, b)) <$> a <*> loop
skipTill :: Monad m => Parser m a -> Parser m a
skipTill a =
fix $ \loop ->
a <|> (token *> loop)
total :: Monad m => Parser m a -> Parser m a
total a =
a <* eoi
html :: Monad m => Parser m Text.Builder
html =
enclosingTag <|> brokenOpenTag <|> text' <|> comment'
where
enclosingTag =
do
ot@(n, _, False) <- openingTag
theHTML <- mconcat <$> many html
ct <- closingTag
guard $ ct == n
return $ Renderer.openingTag ot <> theHTML <> Renderer.closingTag ct
brokenOpenTag =
Renderer.openingTag . repair <$> openingTag
where
repair (name, attrs, _) = (name, attrs, True)
text' =
Renderer.text <$> text
comment' =
Renderer.comment <$> comment