{- |
We do not define a tag data type here,
since this is too much bound to the particular use
(e.g. list or tree structure).
However we define a tag name and several
-}
module Text.HTML.Basic.Tag (
   Tag.Name(..),
   Tag.doctype, Tag.doctypeName, Tag.doctypeString,
   Tag.cdata,   Tag.cdataName,   Tag.cdataString,
   isEmpty, isSloppy, isInnerOf, closes,
   maybeMetaHTTPHeader, maybeMetaEncoding, maybeMetaCharset,
   encodingFromContentType,
   ) where


import Text.XML.Basic.Tag (Name, )

import qualified Text.XML.Basic.Tag as Tag
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name

import qualified Data.Map as Map
import qualified Data.Set as Set

import qualified Data.List.Reverse.StrictElement as ListRev
import qualified Data.Char as Char
import           Data.Tuple.HT (mapFst, )
import           Control.Monad (guard, liftM2, )


{- |
Check whether a HTML tag is empty.
-}
isEmpty :: (Name.Tag name) =>
   Name name -> Bool
isEmpty :: forall name. Tag name => Name name -> Bool
isEmpty = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Ord a => a -> Set a -> Bool
Set.member forall name. Tag name => Set (Name name)
emptySet

{- |
Set of empty HTML tags.
-}
emptySet :: (Name.Tag name) =>
   Set.Set (Name name)
emptySet :: forall name. Tag name => Set (Name name)
emptySet =
   forall name. Tag name => [String] -> Set (Name name)
nameSet forall a b. (a -> b) -> a -> b
$
   String
"area" forall a. a -> [a] -> [a]
:
   String
"base" forall a. a -> [a] -> [a]
:
   String
"br" forall a. a -> [a] -> [a]
:
   String
"col" forall a. a -> [a] -> [a]
:
   String
"frame" forall a. a -> [a] -> [a]
:
   String
"hr" forall a. a -> [a] -> [a]
:
   String
"img" forall a. a -> [a] -> [a]
:
   String
"input" forall a. a -> [a] -> [a]
:
   String
"link" forall a. a -> [a] -> [a]
:
   String
"meta" forall a. a -> [a] -> [a]
:
   String
"param" forall a. a -> [a] -> [a]
:
   []


{- |
Some tags, namely those for text styles like FONT, B, I,
are used quite sloppily.
That is, they are not terminated or not terminated in the right order.
We close them implicitly, if another tag closes
and ignore non-matching closing tags.
-}
isSloppy ::  (Name.Tag name) =>
   Name name -> Bool
isSloppy :: forall name. Tag name => Name name -> Bool
isSloppy = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Ord a => a -> Set a -> Bool
Set.member forall name. Tag name => Set (Name name)
sloppySet

{-
Example page:
http://extremetracking.com/open;unique?login=crsucks
-}

sloppySet :: (Name.Tag name) =>
   Set.Set (Name name)
sloppySet :: forall name. Tag name => Set (Name name)
sloppySet =
   forall name. Tag name => [String] -> Set (Name name)
nameSet forall a b. (a -> b) -> a -> b
$
   String
"font" forall a. a -> [a] -> [a]
:
   String
"b" forall a. a -> [a] -> [a]
:
   String
"i" forall a. a -> [a] -> [a]
:
   String
"tt" forall a. a -> [a] -> [a]
:
   String
"u" forall a. a -> [a] -> [a]
:
   String
"strike" forall a. a -> [a] -> [a]
:
   String
"s" forall a. a -> [a] -> [a]
:
   String
"big" forall a. a -> [a] -> [a]
:
   String
"small" forall a. a -> [a] -> [a]
:
   []


isInnerOf :: (Name.Tag name) =>
   Name name -> Name name -> Bool
isInnerOf :: forall name. Tag name => Name name -> Name name -> Bool
isInnerOf Name name
outer Name name
inner =
   forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (forall a. Ord a => a -> Set a -> Bool
Set.member Name name
inner) forall a b. (a -> b) -> a -> b
$
   forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Name name
outer forall name. Tag name => Map (Name name) (Set (Name name))
innerMap


innerMap :: (Name.Tag name) =>
   Map.Map (Name name) (Set.Set (Name name))
innerMap :: forall name. Tag name => Map (Name name) (Set (Name name))
innerMap =
   forall name a. Tag name => [(String, a)] -> Map (Name name) a
nameMap forall a b. (a -> b) -> a -> b
$
   (String
"body",    forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"caption", forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"dd",      forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"div",     forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"dl",      forall name. Tag name => Set (Name name)
dtdSet) forall a. a -> [a] -> [a]
:
   (String
"dt",      forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"li",      forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"map",     forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"object",  forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"ol",      forall name. Tag name => Set (Name name)
liSet) forall a. a -> [a] -> [a]
:
   (String
"table",   forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"]) forall a. a -> [a] -> [a]
:
   (String
"tbody",   forall name. Tag name => Set (Name name)
thdrSet) forall a. a -> [a] -> [a]
:
   (String
"td",      forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"tfoot",   forall name. Tag name => Set (Name name)
thdrSet) forall a. a -> [a] -> [a]
:
   (String
"th",      forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"thead",   forall name. Tag name => Set (Name name)
thdrSet) forall a. a -> [a] -> [a]
:
   (String
"tr",      forall name. Tag name => Set (Name name)
thdSet) forall a. a -> [a] -> [a]
:
   (String
"ul",      forall name. Tag name => Set (Name name)
liSet) forall a. a -> [a] -> [a]
:
   []


closes :: (Name.Tag name) =>
   Name name -> Name name -> Bool
closes :: forall name. Tag name => Name name -> Name name -> Bool
closes Name name
closing Name name
opening =
   (Bool -> Bool
not (forall name. C name => String -> name -> Bool
Name.match String
"option" Name name
closing) Bool -> Bool -> Bool
&& forall name. C name => String -> name -> Bool
Name.match String
"select" Name name
opening) Bool -> Bool -> Bool
||
   (forall name. C name => [String] -> name -> Bool
Name.matchAny [String
"option", String
"script", String
"style",String
"textarea",String
"title"] Name name
opening) Bool -> Bool -> Bool
||
   (forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (forall a. Ord a => a -> Set a -> Bool
Set.member Name name
opening) forall a b. (a -> b) -> a -> b
$
    forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Name name
closing forall name. Tag name => Map (Name name) (Set (Name name))
closesMap)


closesMap :: (Name.Tag name) =>
   Map.Map (Name name) (Set.Set (Name name))
closesMap :: forall name. Tag name => Map (Name name) (Set (Name name))
closesMap =
   forall name a. Tag name => [(String, a)] -> Map (Name name) a
nameMap forall a b. (a -> b) -> a -> b
$
   (String
"a"        , forall name. Tag name => String -> Set (Name name)
nameSingle String
"a") forall a. a -> [a] -> [a]
:
   (String
"li"       , forall name. Tag name => Set (Name name)
liSet) forall a. a -> [a] -> [a]
:
   (String
"th"       , forall name. Tag name => Set (Name name)
thdSet) forall a. a -> [a] -> [a]
:
   (String
"td"       , forall name. Tag name => Set (Name name)
thdSet) forall a. a -> [a] -> [a]
:
   (String
"tr"       , forall name. Tag name => Set (Name name)
thdrSet) forall a. a -> [a] -> [a]
:
   (String
"dt"       , forall name. Tag name => Set (Name name)
dtdSet) forall a. a -> [a] -> [a]
:
   (String
"dd"       , forall name. Tag name => Set (Name name)
dtdSet) forall a. a -> [a] -> [a]
:
   (String
"hr"       , forall name. Tag name => Set (Name name)
pSet) forall a. a -> [a] -> [a]
:
   (String
"colgroup" , forall name. Tag name => String -> Set (Name name)
nameSingle String
"colgroup") forall a. a -> [a] -> [a]
:
   (String
"form"     , forall name. Tag name => String -> Set (Name name)
nameSingle String
"form") forall a. a -> [a] -> [a]
:
   (String
"label"    , forall name. Tag name => String -> Set (Name name)
nameSingle String
"label") forall a. a -> [a] -> [a]
:
   (String
"map"      , forall name. Tag name => String -> Set (Name name)
nameSingle String
"map") forall a. a -> [a] -> [a]
:
   (String
"object"   , forall name. Tag name => String -> Set (Name name)
nameSingle String
"object") forall a. a -> [a] -> [a]
:
   (String
"thead"    , forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"colgroup"]) forall a. a -> [a] -> [a]
:
   (String
"tfoot"    , forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"thead", String
"colgroup"]) forall a. a -> [a] -> [a]
:
   (String
"tbody"    , forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"tbody", String
"tfoot", String
"thead", String
"colgroup"]) forall a. a -> [a] -> [a]
:
   (String
"h1"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"h2"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"h3"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"h4"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"h5"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"h6"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"dl"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"ol"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"ul"       , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"table"    , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"div"      , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   (String
"p"        , forall name. Tag name => Set (Name name)
headingSet) forall a. a -> [a] -> [a]
:
   []


nameMap :: (Name.Tag name) => [(String,a)] -> Map.Map (Name name) a
nameMap :: forall name a. Tag name => [(String, a)] -> Map (Name name) a
nameMap = forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (forall a c b. (a -> c) -> (a, b) -> (c, b)
mapFst forall name. C name => String -> name
Name.fromString)

nameSet :: (Name.Tag name) => [String] -> Set.Set (Name name)
nameSet :: forall name. Tag name => [String] -> Set (Name name)
nameSet = forall a. Ord a => [a] -> Set a
Set.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall name. C name => String -> name
Name.fromString

nameSingle :: (Name.Tag name) => String -> Set.Set (Name name)
nameSingle :: forall name. Tag name => String -> Set (Name name)
nameSingle = forall a. a -> Set a
Set.singleton forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name. C name => String -> name
Name.fromString

pSet, dtdSet, thdSet, thdrSet, liSet, headingSet ::
   (Name.Tag name) => Set.Set (Name name)
pSet :: forall name. Tag name => Set (Name name)
pSet       = forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"p"]
dtdSet :: forall name. Tag name => Set (Name name)
dtdSet     = forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"dt",String
"dd"]
thdSet :: forall name. Tag name => Set (Name name)
thdSet     = forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"th",String
"td"]
thdrSet :: forall name. Tag name => Set (Name name)
thdrSet    = forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"th",String
"td",String
"tr"]
liSet :: forall name. Tag name => Set (Name name)
liSet      = forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"li"]
headingSet :: forall name. Tag name => Set (Name name)
headingSet = forall name. Tag name => [String] -> Set (Name name)
nameSet [String
"h1",String
"h2",String
"h3",String
"h4",String
"h5",String
"h6",String
"p" {- not "div" -}]




maybeMetaHTTPHeader ::
   (Name.Tag name, Name.Attribute name) =>
   Tag.Name name -> [Attr.T name string] -> Maybe (string, string)
maybeMetaHTTPHeader :: forall name string.
(Tag name, Attribute name) =>
Name name -> [T name string] -> Maybe (string, string)
maybeMetaHTTPHeader Name name
name [T name string]
attrs =
   do forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. C name => String -> name -> Bool
Name.match String
"meta" Name name
name)
      forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 (,)
         (forall name string.
Attribute name =>
String -> [T name string] -> Maybe string
Attr.lookupLit String
"http-equiv" [T name string]
attrs)
         (forall name string.
Attribute name =>
String -> [T name string] -> Maybe string
Attr.lookupLit String
"content" [T name string]
attrs)


{- |
Extract charset from Content-Type declaration.

> encodingFromContentType "text/html; charset=UTF-8" == "utf-8"

The routine does not perform any syntax check.
-}
encodingFromContentType :: String -> String
encodingFromContentType :: String -> String
encodingFromContentType = forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
Char.toLower forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
ListRev.takeWhile (Char
'='forall a. Eq a => a -> a -> Bool
/=)

{- |
A simple routine that does not check for valid syntax
of the Content-Type specification.

In future we might use a distinct @Encoding@ type instead of plain String.
-}
maybeMetaEncoding ::
   (Name.Tag name, Name.Attribute name) =>
   Tag.Name name -> [Attr.T name String] -> Maybe String
maybeMetaEncoding :: forall name.
(Tag name, Attribute name) =>
Name name -> [T name String] -> Maybe String
maybeMetaEncoding Name name
name [T name String]
attrs =
   do (String
headerName, String
content) <- forall name string.
(Tag name, Attribute name) =>
Name name -> [T name string] -> Maybe (string, string)
maybeMetaHTTPHeader Name name
name [T name String]
attrs
      forall (f :: * -> *). Alternative f => Bool -> f ()
guard ((String
"content-type"forall a. Eq a => a -> a -> Bool
==) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
Char.toLower forall a b. (a -> b) -> a -> b
$ String
headerName)
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> String
encodingFromContentType String
content

maybeMetaCharset ::
   (Name.Tag name, Name.Attribute name) =>
   Tag.Name name -> [Attr.T name string] -> Maybe string
maybeMetaCharset :: forall name string.
(Tag name, Attribute name) =>
Name name -> [T name string] -> Maybe string
maybeMetaCharset Name name
name [T name string]
attrs =
   do forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. C name => String -> name -> Bool
Name.match String
"meta" Name name
name)
      forall name string.
Attribute name =>
String -> [T name string] -> Maybe string
Attr.lookupLit String
"charset" [T name string]
attrs