module Data.Html.TagSoup(
Tag(..), Attribute, parseTags,
module Data.Html.Download,
(~==), (~/=),
isTagOpen, isTagClose, isTagText,
fromTagText, fromAttrib,
isTagOpenName, isTagCloseName,
sections, partitions
) where
import Data.Char
import Data.List
import Data.Maybe
import Data.Html.Download
type Attribute = (String,String)
data Tag = TagOpen String [Attribute]
| TagClose String
| TagText String
deriving (Show, Eq, Ord)
parseTags :: String -> [Tag]
parseTags [] = []
parseTags ('<':'/':xs) = TagClose tag : parseTags trail
where
(tag,rest) = span isAlphaNum xs
trail = drop 1 $ dropWhile (/= '>') rest
parseTags ('<':xs)
| "/>" `isPrefixOf` rest2 = res : TagClose tag : parseTags (drop 2 rest2)
| ">" `isPrefixOf` rest2 = res : parseTags (drop 1 rest2)
| otherwise = res : parseTags (drop 1 $ dropWhile (/= '>') rest2)
where
res = TagOpen tag attrs
(tag,rest) = span isAlphaNum xs
(attrs,rest2) = parseAttributes rest
parseTags (x:xs) = [TagText $ parseString pre | not $ null pre] ++ parseTags post
where (pre,post) = break (== '<') (x:xs)
parseAttributes :: String -> ([Attribute], String)
parseAttributes (x:xs) | isSpace x = parseAttributes xs
| not $ isAlpha x = ([], x:xs)
| otherwise = ((parseString lhs, parseString rhs):attrs, over)
where
(attrs,over) = parseAttributes (dropWhile isSpace other)
(lhs,rest) = span isAlphaNum (x:xs)
rest2 = dropWhile isSpace rest
(rhs,other) = if "=" `isPrefixOf` rest2 then parseValue (dropWhile isSpace $ tail rest2) else ("", rest2)
parseValue :: String -> (String, String)
parseValue ('\"':xs) = (a, drop 1 b)
where (a,b) = break (== '\"') xs
parseValue x = span isValid x
where isValid x = isAlphaNum x || x `elem` "_-"
escapes = [("gt",">")
,("lt","<")
,("amp","&")
,("quot","\"")
]
parseEscape :: String -> Maybe String
parseEscape ('#':xs) | all isDigit xs = Just [chr $ read xs]
parseEscape xs = lookup xs escapes
parseString :: String -> String
parseString ('&':xs) = case parseEscape a of
Nothing -> '&' : parseString xs
Just x -> x ++ parseString (drop 1 b)
where (a,b) = break (== ';') xs
parseString (x:xs) = x : parseString xs
parseString [] = []
isTagOpen :: Tag -> Bool
isTagOpen (TagOpen {}) = True; isTagOpen _ = False
isTagClose :: Tag -> Bool
isTagClose (TagClose {}) = True; isTagClose _ = False
isTagText :: Tag -> Bool
isTagText (TagText {}) = True; isTagText _ = False
fromTagText :: Tag -> String
fromTagText (TagText x) = x
fromAttrib :: String -> Tag -> String
fromAttrib att (TagOpen _ atts) = fromMaybe "" $ lookup att atts
isTagOpenName :: String -> Tag -> Bool
isTagOpenName name (TagOpen n _) = n == name
isTagOpenName _ _ = False
isTagCloseName :: String -> Tag -> Bool
isTagCloseName name (TagClose n) = n == name
isTagCloseName _ _ = False
(~==) :: Tag -> Tag -> Bool
(TagText y) ~== (TagText x) = null x || x == y
(TagClose y) ~== (TagClose x) = null x || x == y
(TagOpen y ys) ~== (TagOpen x xs) = (null x || x == y) && all f xs
where
f ("",val) = val `elem` map snd ys
f (name,"") = name `elem` map fst ys
f nameval = nameval `elem` ys
_ ~== _ = False
(~/=) :: Tag -> Tag -> Bool
(~/=) a b = not (a ~== b)
sections :: (a -> Bool) -> [a] -> [[a]]
sections f [] = []
sections f (x:xs) = [x:xs | f x] ++ sections f xs
partitions :: (a -> Bool) -> [a] -> [[a]]
partitions f xs = g $ dropWhile (not . f) xs
where
g [] = []
g (x:xs) = (x:a) : g b
where (a,b) = break f xs