module Text.HTML.Tagchup.Process (
   Encoding, Encoded,
   evalDecodeAdaptive, decodeAdaptive, decodeTagAdaptive,
   getXMLEncoding,
   findMetaEncoding,
   getMetaHTTPHeaders,
   getHeadTags,
   partAttrs,
   parts,
   ) where

import qualified Text.HTML.Tagchup.Tag as Tag
import qualified Text.HTML.Tagchup.Tag.Match as Match

import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.Tag as TagX
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character as HTMLChar
import qualified Text.HTML.Basic.String as HTMLString

import Text.HTML.Basic.String (Encoded, )

import Control.Monad.Trans.State (State, put, get, evalState, )
import Data.Traversable (traverse, )

import           Data.List.HT (viewL, )
import           Data.Maybe (fromMaybe, mapMaybe, )
import           Control.Monad.HT ((<=<), )
import           Control.Monad (msum, mplus, )


-- * analyse soup


type Encoding = String



evalDecodeAdaptive ::
   State (Encoded -> String) a -> a
evalDecodeAdaptive =
   flip evalState id

{- |
Selects a decoder dynamically according
to xml-encoding and meta-http-equiv tags.
The @?xml@ tag should only appear at the beginning of a document,
but we respect it at every occurence.

> import qualified Text.XML.HXT.DOM.Unicode as Unicode

> evalDecodeAdaptive .
> decodeAdaptive
>    (maybe Unicode.latin1ToUnicode (fst.) .
>     Unicode.getDecodingFct)
-}
decodeAdaptive ::
   (Name.Attribute name, Name.Tag name) =>
   (Encoding -> Encoded -> String) ->
   [Tag.T name [HTMLChar.T]] ->
   State (Encoded -> String) [Tag.T name String]
decodeAdaptive getDecoder =
   traverse (decodeTagAdaptive getDecoder)

{- |
@decodeTagAdaptive decoderSelector tag@ generates a state monad,
with a decoder as state.
It decodes encoding specific byte sequences
using the current decoder
and XML references using a fixed table.
-}
decodeTagAdaptive ::
   (Name.Attribute name, Name.Tag name) =>
   (Encoding -> Encoded -> String) ->
   Tag.T name [HTMLChar.T] ->
   State (Encoded -> String) (Tag.T name String)
decodeTagAdaptive getDecoder tag0 =
   do decoder <- get
      let tag1 =
             -- this is less elegant than using maybeCData but lazier
             maybe
                (fmap (HTMLString.decode decoder) tag0)
                (\(name, s) ->
                   Tag.special name $
                      if TagH.cdataName == name
                        then decoder s
                        else s)
                (Tag.maybeSpecial tag0)
      maybe
         (return ())
         (put . getDecoder) $
         mplus
            (uncurry TagH.maybeMetaEncoding =<<
             Tag.maybeOpen tag1)
            (uncurry TagX.maybeXMLEncoding =<<
             Tag.maybeProcessing tag1)
      return tag1

{- |
Check whether the first tag is an @xml@ processing instruction tag
and return the value of its @encoding@ attribute.
-}
getXMLEncoding ::
   (Name.Tag name, Name.Attribute name) =>
   [Tag.T name String] -> Maybe String
getXMLEncoding tags =
   do (t,_) <- viewL tags
      uncurry TagX.maybeXMLEncoding =<< Tag.maybeProcessing t

{- |
Rather the same as @wraxml:HTML.Tree.findMetaEncoding@
-}
findMetaEncoding ::
   (Name.Tag name, Name.Attribute name) =>
   [Tag.T name String] -> Maybe String
findMetaEncoding =
   msum .
   map (uncurry TagH.maybeMetaEncoding <=< Tag.maybeOpen) .
   getHeadTags


{- |
Extract META tags which contain HTTP-EQUIV attribute
and present these values like HTTP headers.
-}
getMetaHTTPHeaders ::
   (Name.Tag name, Name.Attribute name) =>
   [Tag.T name string] -> [(string, string)]
getMetaHTTPHeaders =
   mapMaybe (uncurry TagH.maybeMetaHTTPHeader <=< Tag.maybeOpen) .
   getHeadTags


getHeadTags ::
   (Name.Tag name, Name.Attribute name) =>
   [Tag.T name string] -> [Tag.T name string]
getHeadTags =
   takeWhile (not . Match.closeLit "head") .
   drop 1 .
   dropWhile (not . Match.openNameLit "head") .
   takeWhile (not . Match.openNameLit "body")


-- * transform soup

{- |
Modify attributes and tags of certain parts.
For limitations, see 'parts'.
-}
partAttrs ::
   (Name.Tag name) =>
   (Tag.Name name -> Bool) ->
   (([Attr.T name string], [Tag.T name string]) ->
    ([Attr.T name string], [Tag.T name string])) ->
   [Tag.T name string] -> [Tag.T name string]
partAttrs p f =
   concatMap
      (either
          (\((name,attrs),part) ->
              let (newAttrs, newPart) = f (attrs, part)
              in  Tag.Open name newAttrs : newPart ++ [Tag.Close name])
          id) .
   parts p

{- |
Extract parts from the tag soup
that are enclosed in corresponding open and close tags.
If a close tag is missing, the soup end is considered as end of the part.
However nested tags are not supported,
e.g. in @<a><a></a></a>@ the second @<a>@ is considered
to be enclosed in the first @<a>@ and the first @</a>@
and the second @</a>@ is ignored.
-}
parts ::
   (Name.Tag name) =>
   (Tag.Name name -> Bool) ->
   [Tag.T name string] ->
   [Either
       ((Tag.Name name, [Attr.T name string]), [Tag.T name string])
       [Tag.T name string]]
parts p =
   let recourse ts =
          let (prefix0,suffix0) = break (Match.open p (const True)) ts
          in  Right prefix0 :
              fromMaybe []
                 (do (t, suffix1) <- viewL suffix0
                     (name, attrs) <- Tag.maybeOpen t
                     let (part,suffix2) = break (Match.close (name==)) suffix1
                     return $ Left ((name, attrs), part) : recourse (drop 1 suffix2))
   in  recourse