module Text.Megaparsec.TagSoup
( TagParser
, space
, whitespace
, lexeme
, satisfy
, anyTag
, anyTagOpen
, anyTagClose
, tagText
, tagOpen
, tagClose
) where
import Data.Char (isSpace)
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Semigroup ((<>))
import qualified Data.Set as Set
import Text.HTML.TagSoup
import Text.StringLike
import Text.Megaparsec.Combinator
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
type TagParser str = Parsec Dec [Tag str]
instance (Show str) => ShowToken (Tag str) where
showTokens tags = unwords (NE.toList (NE.map show tags))
instance (Ord str) => Stream [Tag str] where
type Token [Tag str] = Tag str
uncons [] = Nothing
uncons (t:ts) = Just (t, ts)
updatePos = const updatePosTag
updatePosTag
:: Pos
-> SourcePos
-> Tag str
-> (SourcePos, SourcePos)
updatePosTag _ apos@(SourcePos n l c) _ = (apos, npos)
where
u = unsafePos 1
npos = SourcePos n l (c <> u)
space :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m (Tag str)
space = satisfy (\tag -> case tag of
TagText x | all isSpace (toString x) -> True
_ -> False)
whitespace :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m ()
whitespace = skipMany space
lexeme :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m (Tag str) -> m (Tag str)
lexeme p = p <* whitespace
anyTag :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m (Tag str)
anyTag = lexeme $ token Right Nothing
satisfy :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => (Tag str -> Bool) -> m (Tag str)
satisfy f = lexeme $ token testTag Nothing
where testTag x = if f x
then Right x
else Left (Set.singleton (Tokens (x:|[])), Set.empty, Set.empty)
anyTagOpen :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m (Tag str)
anyTagOpen = satisfy isTagOpen <?> "any tag open"
anyTagClose :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m (Tag str)
anyTagClose = satisfy isTagClose <?> "any tag close"
tagText :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => m (Tag str)
tagText = satisfy isTagText <?> "text"
tagOpen :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => str -> m (Tag str)
tagOpen s = satisfy (isTagOpenName s) <?> "tag open"
tagClose :: (StringLike str, MonadParsec e s m, Token s ~ Tag str) => str -> m (Tag str)
tagClose s = satisfy (isTagCloseName s) <?> "tag close"