module Text.HTML.TagSoup.HT.Tag where

import Data.Char (toLower, toUpper, )
import Data.Maybe (mapMaybe, )


-- * type definitions

{- | An HTML attribute @id=\"name\"@ generates @(\"id\",\"name\")@ -}
type Attribute char = (String,[char])

{- |
An HTML element, a document is @[T]@.
There is no requirement for 'Open' and 'Close' to match.

The type parameter @char@ lets you choose between
@Char@ for interpreted HTML entity references and
@HTMLChar.T@ for uninterpreted HTML entity.
You will most oftenly want plain @Char@,
since @HTMLChar.T@ is only necessary if you want to know,
whether a non-ASCII character was encoded as HTML entity
or as non-ASCII Unicode character.
-}
data T char =
     Open String [Attribute char]
        -- ^ An open tag with 'Attribute's in their original order.
   | Close String
        -- ^ A closing tag
   | Text [char]
        -- ^ A text node, guaranteed not to be the empty string
   | Comment String
        -- ^ A comment
   | Special String String
        -- ^ A tag like @\<!DOCTYPE ...\>@
   | Processing String (Processing char)
        -- ^ A tag like @\<?xml ...\>@
   | Warning String
        -- ^ Mark a syntax error in the input file
     deriving (Show, Eq, Ord)

data Processing char =
     KnownProcessing [Attribute char]
   | UnknownProcessing String
     deriving (Show, Eq, Ord)


-- * check for certain tag types

-- | Test if a 'T' is a 'Open'
isOpen :: T char -> Bool
isOpen tag = case tag of (Open {}) -> True; _ -> False

maybeOpen :: T char -> Maybe (String, [Attribute char])
maybeOpen tag = case tag of Open name attrs -> Just (name, attrs); _ -> Nothing


-- | Test if a 'T' is a 'Close'
isClose :: T char -> Bool
isClose tag = case tag of (Close {}) -> True; _ -> False

maybeClose :: T char -> Maybe String
maybeClose tag = case tag of Close x -> Just x; _ -> Nothing


-- | Test if a 'T' is a 'Text'
isText :: T char -> Bool
isText tag = case tag of (Text {}) -> True; _ -> False

-- | Extract the string from within 'Text', otherwise 'Nothing'
maybeText :: T char -> Maybe [char]
maybeText tag = case tag of Text x -> Just x; _ -> Nothing
-- maybeText tag = do Text x <- Just tag; return x

-- | Extract all text content from tags (similar to Verbatim found in HaXml)
innerText :: [T char] -> [char]
innerText = concat . mapMaybe maybeText


isComment :: T char -> Bool
isComment tag = case tag of (Comment {}) -> True; _ -> False

maybeComment :: T char -> Maybe String
maybeComment tag = case tag of Comment x -> Just x; _ -> Nothing


isSpecial :: T char -> Bool
isSpecial tag = case tag of (Special {}) -> True; _ -> False

maybeSpecial :: T char -> Maybe (String, String)
maybeSpecial tag = case tag of Special name content -> Just (name, content); _ -> Nothing


isProcessing :: T char -> Bool
isProcessing tag = case tag of (Processing {}) -> True; _ -> False

maybeProcessing :: T char -> Maybe (String, Processing char)
maybeProcessing tag = case tag of Processing target instr -> Just (target, instr); _ -> Nothing


isWarning :: T char -> Bool
isWarning tag = case tag of (Warning {}) -> True; _ -> False

maybeWarning :: T char -> Maybe String
maybeWarning tag = case tag of Warning x -> Just x; _ -> Nothing
-- maybeWarning tag = do Warning x <- Just tag; return x



-- * tag processing

canonicalizeSoup :: [T char] -> [T char]
canonicalizeSoup =
   map canonicalize

{- |
Turns all tag names to lower case and
converts DOCTYPE to upper case.
-}
canonicalize :: T char -> T char
canonicalize t =
   case t of
      Open  name attrs  -> Open  (map toLower name) attrs
      Close name        -> Close (map toLower name)
      Special name info -> Special (map toUpper name) info
      _ -> t

{- |
Replace CDATA sections by plain text.
-}
textFromCData :: T Char -> T Char
textFromCData t =
   case t of
      Special "[CDATA[" text -> Text text
      _ -> t