module Text.HTML.Tagchup.Process (
Encoding, Encoded,
evalDecodeAdaptive, decodeAdaptive, decodeTagAdaptive,
getXMLEncoding,
findMetaEncoding,
getMetaHTTPHeaders,
getHeadTags,
partAttrs,
parts,
) where
import qualified Text.HTML.Tagchup.Tag as Tag
import qualified Text.HTML.Tagchup.Tag.Match as Match
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.Tag as TagX
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character as HTMLChar
import qualified Text.HTML.Basic.String as HTMLString
import Text.HTML.Basic.String (Encoded, )
import Control.Monad.Trans.State (State, put, get, evalState, )
import Control.Monad.HT ((<=<), )
import Control.Monad (msum, mplus, )
import Data.Traversable (traverse, )
import Data.List.HT (viewL, )
import Data.Maybe (fromMaybe, mapMaybe, )
type Encoding = String
evalDecodeAdaptive ::
State (Encoded -> String) a -> a
evalDecodeAdaptive =
flip evalState id
decodeAdaptive ::
(Name.Attribute name, Name.Tag name) =>
(Encoding -> Encoded -> String) ->
[Tag.T name [HTMLChar.T]] ->
State (Encoded -> String) [Tag.T name String]
decodeAdaptive getDecoder =
traverse (decodeTagAdaptive getDecoder)
decodeTagAdaptive ::
(Name.Attribute name, Name.Tag name) =>
(Encoding -> Encoded -> String) ->
Tag.T name [HTMLChar.T] ->
State (Encoded -> String) (Tag.T name String)
decodeTagAdaptive getDecoder tag0 =
do decoder <- get
let tag1 =
maybe
(fmap (HTMLString.decode decoder) tag0)
(\(name, s) ->
Tag.special name $
if TagH.cdataName == name
then decoder s
else s)
(Tag.maybeSpecial tag0)
maybe
(return ())
(put . getDecoder) $
mplus
(uncurry TagH.maybeMetaEncoding =<<
Tag.maybeOpen tag1)
(uncurry TagX.maybeXMLEncoding =<<
Tag.maybeProcessing tag1)
return tag1
getXMLEncoding ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name String] -> Maybe String
getXMLEncoding tags =
do (t,_) <- viewL tags
uncurry TagX.maybeXMLEncoding =<< Tag.maybeProcessing t
findMetaEncoding ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name String] -> Maybe String
findMetaEncoding =
msum .
map (uncurry TagH.maybeMetaEncoding <=< Tag.maybeOpen) .
getHeadTags
getMetaHTTPHeaders ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name string] -> [(string, string)]
getMetaHTTPHeaders =
mapMaybe (uncurry TagH.maybeMetaHTTPHeader <=< Tag.maybeOpen) .
getHeadTags
getHeadTags ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name string] -> [Tag.T name string]
getHeadTags =
takeWhile (not . Match.closeLit "head") .
drop 1 .
dropWhile (not . Match.openNameLit "head") .
takeWhile (not . Match.openNameLit "body")
partAttrs ::
(Name.Tag name) =>
(Tag.Name name -> Bool) ->
(([Attr.T name string], [Tag.T name string]) ->
([Attr.T name string], [Tag.T name string])) ->
[Tag.T name string] -> [Tag.T name string]
partAttrs p f =
concatMap
(either
(\((name,attrs),part) ->
let (newAttrs, newPart) = f (attrs, part)
in Tag.Open name newAttrs : newPart ++ [Tag.Close name])
id) .
parts p
parts ::
(Name.Tag name) =>
(Tag.Name name -> Bool) ->
[Tag.T name string] ->
[Either
((Tag.Name name, [Attr.T name string]), [Tag.T name string])
[Tag.T name string]]
parts p =
let recourse ts =
let (prefix0,suffix0) = break (Match.open p (const True)) ts
in Right prefix0 :
fromMaybe []
(do (t, suffix1) <- viewL suffix0
(name, attrs) <- Tag.maybeOpen t
let (part,suffix2) = break (Match.close (name==)) suffix1
return $ Left ((name, attrs), part) : recourse (drop 1 suffix2))
in recourse