module Text.HTML.TagSoup.HT.Tag where import Data.Char (toLower, toUpper, ) import Data.Maybe (mapMaybe, ) -- * type definitions {- | An HTML attribute @id=\"name\"@ generates @(\"id\",\"name\")@ -} type Attribute char = (String,[char]) {- | An HTML element, a document is @[T]@. There is no requirement for 'Open' and 'Close' to match. The type parameter @char@ lets you choose between @Char@ for interpreted HTML entity references and @HTMLChar.T@ for uninterpreted HTML entity. You will most oftenly want plain @Char@, since @HTMLChar.T@ is only necessary if you want to know, whether a non-ASCII character was encoded as HTML entity or as non-ASCII Unicode character. -} data T char = Open String [Attribute char] -- ^ An open tag with 'Attribute's in their original order. | Close String -- ^ A closing tag | Text [char] -- ^ A text node, guaranteed not to be the empty string | Comment String -- ^ A comment | Special String String -- ^ A tag like @\@ | Processing String (Processing char) -- ^ A tag like @\@ | Warning String -- ^ Mark a syntax error in the input file deriving (Show, Eq, Ord) data Processing char = KnownProcessing [Attribute char] | UnknownProcessing String deriving (Show, Eq, Ord) -- * check for certain tag types -- | Test if a 'T' is a 'Open' isOpen :: T char -> Bool isOpen tag = case tag of (Open {}) -> True; _ -> False maybeOpen :: T char -> Maybe (String, [Attribute char]) maybeOpen tag = case tag of Open name attrs -> Just (name, attrs); _ -> Nothing -- | Test if a 'T' is a 'Close' isClose :: T char -> Bool isClose tag = case tag of (Close {}) -> True; _ -> False maybeClose :: T char -> Maybe String maybeClose tag = case tag of Close x -> Just x; _ -> Nothing -- | Test if a 'T' is a 'Text' isText :: T char -> Bool isText tag = case tag of (Text {}) -> True; _ -> False -- | Extract the string from within 'Text', otherwise 'Nothing' maybeText :: T char -> Maybe [char] maybeText tag = case tag of Text x -> Just x; _ -> Nothing -- maybeText tag = do Text x <- Just tag; return x -- | Extract all text content from tags (similar to Verbatim found in HaXml) innerText :: [T char] -> [char] innerText = concat . mapMaybe maybeText isComment :: T char -> Bool isComment tag = case tag of (Comment {}) -> True; _ -> False maybeComment :: T char -> Maybe String maybeComment tag = case tag of Comment x -> Just x; _ -> Nothing isSpecial :: T char -> Bool isSpecial tag = case tag of (Special {}) -> True; _ -> False maybeSpecial :: T char -> Maybe (String, String) maybeSpecial tag = case tag of Special name content -> Just (name, content); _ -> Nothing isProcessing :: T char -> Bool isProcessing tag = case tag of (Processing {}) -> True; _ -> False maybeProcessing :: T char -> Maybe (String, Processing char) maybeProcessing tag = case tag of Processing target instr -> Just (target, instr); _ -> Nothing isWarning :: T char -> Bool isWarning tag = case tag of (Warning {}) -> True; _ -> False maybeWarning :: T char -> Maybe String maybeWarning tag = case tag of Warning x -> Just x; _ -> Nothing -- maybeWarning tag = do Warning x <- Just tag; return x -- * tag processing canonicalizeSoup :: [T char] -> [T char] canonicalizeSoup = map canonicalize {- | Turns all tag names to lower case and converts DOCTYPE to upper case. -} canonicalize :: T char -> T char canonicalize t = case t of Open name attrs -> Open (map toLower name) attrs Close name -> Close (map toLower name) Special name info -> Special (map toUpper name) info _ -> t {- | Replace CDATA sections by plain text. -} textFromCData :: T Char -> T Char textFromCData t = case t of Special "[CDATA[" text -> Text text _ -> t