module Text.HTML.TagSoup.HT.Tag where
import Data.Char (toLower, toUpper, )
import Data.Maybe (mapMaybe, )
-- * type definitions
{- | An HTML attribute @id=\"name\"@ generates @(\"id\",\"name\")@ -}
type Attribute char = (String,[char])
{- |
An HTML element, a document is @[T]@.
There is no requirement for 'Open' and 'Close' to match.
The type parameter @char@ lets you choose between
@Char@ for interpreted HTML entity references and
@HTMLChar.T@ for uninterpreted HTML entity.
You will most oftenly want plain @Char@,
since @HTMLChar.T@ is only necessary if you want to know,
whether a non-ASCII character was encoded as HTML entity
or as non-ASCII Unicode character.
-}
data T char =
Open String [Attribute char]
-- ^ An open tag with 'Attribute's in their original order.
| Close String
-- ^ A closing tag
| Text [char]
-- ^ A text node, guaranteed not to be the empty string
| Comment String
-- ^ A comment
| Special String String
-- ^ A tag like @\@
| Processing String (Processing char)
-- ^ A tag like @\@
| Warning String
-- ^ Mark a syntax error in the input file
deriving (Show, Eq, Ord)
data Processing char =
KnownProcessing [Attribute char]
| UnknownProcessing String
deriving (Show, Eq, Ord)
-- * check for certain tag types
-- | Test if a 'T' is a 'Open'
isOpen :: T char -> Bool
isOpen tag = case tag of (Open {}) -> True; _ -> False
maybeOpen :: T char -> Maybe (String, [Attribute char])
maybeOpen tag = case tag of Open name attrs -> Just (name, attrs); _ -> Nothing
-- | Test if a 'T' is a 'Close'
isClose :: T char -> Bool
isClose tag = case tag of (Close {}) -> True; _ -> False
maybeClose :: T char -> Maybe String
maybeClose tag = case tag of Close x -> Just x; _ -> Nothing
-- | Test if a 'T' is a 'Text'
isText :: T char -> Bool
isText tag = case tag of (Text {}) -> True; _ -> False
-- | Extract the string from within 'Text', otherwise 'Nothing'
maybeText :: T char -> Maybe [char]
maybeText tag = case tag of Text x -> Just x; _ -> Nothing
-- maybeText tag = do Text x <- Just tag; return x
-- | Extract all text content from tags (similar to Verbatim found in HaXml)
innerText :: [T char] -> [char]
innerText = concat . mapMaybe maybeText
isComment :: T char -> Bool
isComment tag = case tag of (Comment {}) -> True; _ -> False
maybeComment :: T char -> Maybe String
maybeComment tag = case tag of Comment x -> Just x; _ -> Nothing
isSpecial :: T char -> Bool
isSpecial tag = case tag of (Special {}) -> True; _ -> False
maybeSpecial :: T char -> Maybe (String, String)
maybeSpecial tag = case tag of Special name content -> Just (name, content); _ -> Nothing
isProcessing :: T char -> Bool
isProcessing tag = case tag of (Processing {}) -> True; _ -> False
maybeProcessing :: T char -> Maybe (String, Processing char)
maybeProcessing tag = case tag of Processing target instr -> Just (target, instr); _ -> Nothing
isWarning :: T char -> Bool
isWarning tag = case tag of (Warning {}) -> True; _ -> False
maybeWarning :: T char -> Maybe String
maybeWarning tag = case tag of Warning x -> Just x; _ -> Nothing
-- maybeWarning tag = do Warning x <- Just tag; return x
-- * tag processing
canonicalizeSoup :: [T char] -> [T char]
canonicalizeSoup =
map canonicalize
{- |
Turns all tag names to lower case and
converts DOCTYPE to upper case.
-}
canonicalize :: T char -> T char
canonicalize t =
case t of
Open name attrs -> Open (map toLower name) attrs
Close name -> Close (map toLower name)
Special name info -> Special (map toUpper name) info
_ -> t
{- |
Replace CDATA sections by plain text.
-}
textFromCData :: T Char -> T Char
textFromCData t =
case t of
Special "[CDATA[" text -> Text text
_ -> t