module Text.HTML.TagSoup.HT.Process where

import qualified Text.HTML.TagSoup.HT.Tag as Tag
import qualified Text.HTML.TagSoup.HT.Tag.Match as Match

import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name

import qualified Data.Char as Char
import           Data.List.HT (viewL, takeWhileRev, )
import           Data.Tuple.HT (mapFst, )
import           Data.Maybe (fromMaybe, mapMaybe, )
import           Control.Monad (guard, liftM2, )


-- * analyse soup

{-
Rather the same as wraxml:HTML.Tree.findMetaEncoding
-}
findMetaEncoding ::
   (Name.Tag name, Name.Attribute name, Eq name) =>
   [Tag.T name String] -> Maybe String
findMetaEncoding =
   fmap (map Char.toLower . takeWhileRev ('='/=)) .
   lookup "content-type" .
   map (mapFst (map Char.toLower)) .
   getMetaHTTPHeaders

{- |
Extract META tags which contain HTTP-EQUIV attribute
and present these values like HTTP headers.
-}
getMetaHTTPHeaders ::
   (Name.Tag name, Name.Attribute name, Eq name) =>
   [Tag.T name string] -> [(string, string)]
getMetaHTTPHeaders =
   mapMaybe
      (\tag ->
         do (name, attrs) <- Tag.maybeOpen tag
            guard (Name.match "meta" name)
            liftM2 (,)
               (Attr.lookupLit "http-equiv" attrs)
               (Attr.lookupLit "content" attrs)) .
   getHeadTags

getHeadTags ::
   (Name.Tag name, Name.Attribute name, Eq name) =>
   [Tag.T name string] -> [Tag.T name string]
getHeadTags =
   takeWhile (not . Match.closeLit "head") .
   drop 1 .
   dropWhile (not . Match.openLit "head" (const True)) .
   takeWhile (not . Match.openLit "body" (const True))


-- * transform soup

{- |
Modify attributes and tags of certain parts.
For limitations, see 'parts'.
-}
partAttrs ::
   (Eq name) =>
   (Tag.Name name -> Bool) ->
   (([Attr.T name string], [Tag.T name string]) ->
    ([Attr.T name string], [Tag.T name string])) ->
   [Tag.T name string] -> [Tag.T name string]
partAttrs p f =
   concatMap
      (either
          (\((name,attrs),part) ->
              let (newAttrs, newPart) = f (attrs, part)
              in  Tag.Open name newAttrs : newPart ++ [Tag.Close name])
          id) .
   parts p

{- |
Extract parts from the tag soup
that are enclosed in corresponding open and close tags.
If a close tag is missing, the soup end is considered as end of the part.
However nested tags are not supported,
e.g. in @<a><a></a></a>@ the second @<a>@ is considered
to be enclosed in the first @<a>@ and the first @</a>@
and the second @</a>@ is ignored.
-}
parts ::
   (Eq name) =>
   (Tag.Name name -> Bool) ->
   [Tag.T name string] ->
   [Either
       ((Tag.Name name, [Attr.T name string]), [Tag.T name string])
       [Tag.T name string]]
parts p =
   let recourse ts =
          let (prefix0,suffix0) = break (Match.open p (const True)) ts
          in  Right prefix0 :
              fromMaybe []
                 (do (t, suffix1) <- viewL suffix0
                     (name, attrs) <- Tag.maybeOpen t
                     let (part,suffix2) = break (Match.close (name==)) suffix1
                     return $ Left ((name, attrs), part) : recourse (drop 1 suffix2))
   in  recourse