module Text.HTML.Tagchup.Process (
Encoding, Encoded,
evalDecodeAdaptive, decodeAdaptive, decodeTagAdaptive,
getEmbeddedEncoding,
getXMLEncoding,
findMetaEncoding,
getMetaHTTPHeaders,
getHeadTags,
partAttrs,
parts,
takeBeforeMatchingClose,
takeUntilMatchingClose,
) where
import qualified Text.HTML.Tagchup.Tag as Tag
import qualified Text.HTML.Tagchup.Tag.Match as Match
import qualified Text.XML.Basic.ProcessingInstruction as PI
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.Tag as TagX
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character as HTMLChar
import qualified Text.HTML.Basic.String as HTMLString
import Text.HTML.Basic.String (Encoded, )
import Control.Monad.Trans.State (State, put, get, evalState, )
import Control.Monad.HT ((<=<), )
import Control.Monad (msum, guard, )
import Control.Applicative ((<|>))
import qualified Data.NonEmpty as NonEmpty
import qualified Data.List.Match as ListMatch
import qualified Data.Foldable as Fold
import Data.Traversable (traverse, )
import Data.List.HT (viewL, takeUntil, switchR, )
import Data.Maybe.HT (toMaybe, )
import Data.Maybe (fromMaybe, mapMaybe, )
type Encoding = String
evalDecodeAdaptive ::
State (Encoded -> String) a -> a
evalDecodeAdaptive :: forall a. State (Encoded -> Encoded) a -> a
evalDecodeAdaptive =
forall a b c. (a -> b -> c) -> b -> a -> c
flip forall s a. State s a -> s -> a
evalState forall a. a -> a
id
decodeAdaptive ::
(Name.Attribute name, Name.Tag name) =>
(Encoding -> Encoded -> String) ->
[Tag.T name [HTMLChar.T]] ->
State (Encoded -> String) [Tag.T name String]
decodeAdaptive :: forall name.
(Attribute name, Tag name) =>
(Encoded -> Encoded -> Encoded)
-> [T name [T]] -> State (Encoded -> Encoded) [T name Encoded]
decodeAdaptive Encoded -> Encoded -> Encoded
getDecoder =
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse (forall name.
(Attribute name, Tag name) =>
(Encoded -> Encoded -> Encoded)
-> T name [T] -> State (Encoded -> Encoded) (T name Encoded)
decodeTagAdaptive Encoded -> Encoded -> Encoded
getDecoder)
decodeTagAdaptive ::
(Name.Attribute name, Name.Tag name) =>
(Encoding -> Encoded -> String) ->
Tag.T name [HTMLChar.T] ->
State (Encoded -> String) (Tag.T name String)
decodeTagAdaptive :: forall name.
(Attribute name, Tag name) =>
(Encoded -> Encoded -> Encoded)
-> T name [T] -> State (Encoded -> Encoded) (T name Encoded)
decodeTagAdaptive Encoded -> Encoded -> Encoded
getDecoder T name [T]
tag0 =
do Encoded -> Encoded
decoder <- forall (m :: * -> *) s. Monad m => StateT s m s
get
let tag1 :: T name Encoded
tag1 =
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
(forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Encoded -> Encoded) -> [T] -> Encoded
HTMLString.decode Encoded -> Encoded
decoder) T name [T]
tag0)
(\(Name name
name, Encoded
s) ->
forall name string. Name name -> Encoded -> T name string
Tag.special Name name
name forall a b. (a -> b) -> a -> b
$
if forall name. Tag name => Name name
TagH.cdataName forall a. Eq a => a -> a -> Bool
== Name name
name
then Encoded -> Encoded
decoder Encoded
s
else Encoded
s)
(forall name string. T name string -> Maybe (Name name, Encoded)
Tag.maybeSpecial T name [T]
tag0)
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
Fold.mapM_ (forall (m :: * -> *) s. Monad m => s -> StateT s m ()
put forall b c a. (b -> c) -> (a -> b) -> a -> c
. Encoded -> Encoded -> Encoded
getDecoder) forall a b. (a -> b) -> a -> b
$
(do (Name name, [T name Encoded])
openTag <- forall name string.
T name string -> Maybe (Name name, [T name string])
Tag.maybeOpen T name Encoded
tag1
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name.
(Tag name, Attribute name) =>
Name name -> [T name Encoded] -> Maybe Encoded
TagH.maybeMetaEncoding (Name name, [T name Encoded])
openTag forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name string.
(Tag name, Attribute name) =>
Name name -> [T name string] -> Maybe string
TagH.maybeMetaCharset (Name name, [T name Encoded])
openTag)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name string.
(Tag name, Attribute name) =>
Name name -> T name string -> Maybe string
TagX.maybeXMLEncoding forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall name string.
T name string -> Maybe (Name name, T name string)
Tag.maybeProcessing T name Encoded
tag1)
forall (m :: * -> *) a. Monad m => a -> m a
return T name Encoded
tag1
getEmbeddedEncoding ::
(Name.Attribute name, Name.Tag name) =>
[Tag.T name String] -> Maybe Encoding
getEmbeddedEncoding :: forall name.
(Attribute name, Tag name) =>
[T name Encoded] -> Maybe Encoded
getEmbeddedEncoding [T name Encoded]
leadingTags =
let xmlEncoding :: Maybe Encoded
xmlEncoding = do
(T name Encoded
t,[T name Encoded]
_) <- forall a. [a] -> Maybe (a, [a])
viewL [T name Encoded]
leadingTags
(Name name
name, PI.Known [T name Encoded]
attrs) <- forall name string.
T name string -> Maybe (Name name, T name string)
Tag.maybeProcessing T name Encoded
t
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. Tag name => Name name
TagX.xmlName forall a. Eq a => a -> a -> Bool
== Name name
name)
forall name string.
Attribute name =>
Name name -> [T name string] -> Maybe string
Attr.lookup forall name. Attribute name => Name name
Attr.encodingName [T name Encoded]
attrs
in forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum forall a b. (a -> b) -> a -> b
$
Maybe Encoded
xmlEncoding forall a. a -> [a] -> [a]
:
forall a b. (a -> b) -> [a] -> [b]
map
(\(Name name, [T name Encoded])
tag ->
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name string.
(Tag name, Attribute name) =>
Name name -> [T name string] -> Maybe string
TagH.maybeMetaCharset (Name name, [T name Encoded])
tag forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name.
(Tag name, Attribute name) =>
Name name -> [T name Encoded] -> Maybe Encoded
TagH.maybeMetaEncoding (Name name, [T name Encoded])
tag)
(forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe forall name string.
T name string -> Maybe (Name name, [T name string])
Tag.maybeOpen forall a b. (a -> b) -> a -> b
$ forall name string.
(Tag name, Attribute name) =>
[T name string] -> [T name string]
getHeadTags [T name Encoded]
leadingTags)
getXMLEncoding ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name String] -> Maybe String
getXMLEncoding :: forall name.
(Tag name, Attribute name) =>
[T name Encoded] -> Maybe Encoded
getXMLEncoding [T name Encoded]
tags =
do (T name Encoded
t,[T name Encoded]
_) <- forall a. [a] -> Maybe (a, [a])
viewL [T name Encoded]
tags
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name string.
(Tag name, Attribute name) =>
Name name -> T name string -> Maybe string
TagX.maybeXMLEncoding forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall name string.
T name string -> Maybe (Name name, T name string)
Tag.maybeProcessing T name Encoded
t
findMetaEncoding ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name String] -> Maybe String
findMetaEncoding :: forall name.
(Tag name, Attribute name) =>
[T name Encoded] -> Maybe Encoded
findMetaEncoding =
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name.
(Tag name, Attribute name) =>
Name name -> [T name Encoded] -> Maybe Encoded
TagH.maybeMetaEncoding forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall name string.
T name string -> Maybe (Name name, [T name string])
Tag.maybeOpen) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall name string.
(Tag name, Attribute name) =>
[T name string] -> [T name string]
getHeadTags
getMetaHTTPHeaders ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name string] -> [(string, string)]
=
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name string.
(Tag name, Attribute name) =>
Name name -> [T name string] -> Maybe (string, string)
TagH.maybeMetaHTTPHeader forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall name string.
T name string -> Maybe (Name name, [T name string])
Tag.maybeOpen) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall name string.
(Tag name, Attribute name) =>
[T name string] -> [T name string]
getHeadTags
getHeadTags ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name string] -> [Tag.T name string]
getHeadTags :: forall name string.
(Tag name, Attribute name) =>
[T name string] -> [T name string]
getHeadTags =
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name string. Tag name => Encoded -> T name string -> Bool
Match.closeLit Encoded
"head") forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a. Int -> [a] -> [a]
drop Int
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name string. Tag name => Encoded -> T name string -> Bool
Match.openNameLit Encoded
"head") forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name string. Tag name => Encoded -> T name string -> Bool
Match.openNameLit Encoded
"body")
partAttrs ::
(Name.Tag name) =>
(Tag.Name name -> Bool) ->
(([Attr.T name string], [Tag.T name string]) ->
([Attr.T name string], [Tag.T name string])) ->
[Tag.T name string] -> [Tag.T name string]
partAttrs :: forall name string.
Tag name =>
(Name name -> Bool)
-> (([T name string], [T name string])
-> ([T name string], [T name string]))
-> [T name string]
-> [T name string]
partAttrs Name name -> Bool
p ([T name string], [T name string])
-> ([T name string], [T name string])
f =
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap
(forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
(\((Name name
name,[T name string]
attrs),[T name string]
part) ->
let ([T name string]
newAttrs, [T name string]
newPart) = ([T name string], [T name string])
-> ([T name string], [T name string])
f ([T name string]
attrs, [T name string]
part)
in forall name string. Name name -> [T name string] -> T name string
Tag.Open Name name
name [T name string]
newAttrs forall a. a -> [a] -> [a]
: [T name string]
newPart forall a. [a] -> [a] -> [a]
++ [forall name string. Name name -> T name string
Tag.Close Name name
name])
forall a. a -> a
id) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall name string.
Tag name =>
(Name name -> Bool)
-> [T name string]
-> [Either
((Name name, [T name string]), [T name string]) [T name string]]
parts Name name -> Bool
p
parts ::
(Name.Tag name) =>
(Tag.Name name -> Bool) ->
[Tag.T name string] ->
[Either
((Tag.Name name, [Attr.T name string]), [Tag.T name string])
[Tag.T name string]]
parts :: forall name string.
Tag name =>
(Name name -> Bool)
-> [T name string]
-> [Either
((Name name, [T name string]), [T name string]) [T name string]]
parts Name name -> Bool
p =
let recourse :: [T name string]
-> [Either
((Name name, [T name string]), [T name string]) [T name string]]
recourse [T name string]
ts =
let ([T name string]
prefix0,[T name string]
suffix0) = forall a. (a -> Bool) -> [a] -> ([a], [a])
break (forall name string.
(Name name -> Bool)
-> ([T name string] -> Bool) -> T name string -> Bool
Match.open Name name -> Bool
p (forall a b. a -> b -> a
const Bool
True)) [T name string]
ts
in forall a b. b -> Either a b
Right [T name string]
prefix0 forall a. a -> [a] -> [a]
:
forall a. a -> Maybe a -> a
fromMaybe []
(do (T name string
t, [T name string]
suffix1) <- forall a. [a] -> Maybe (a, [a])
viewL [T name string]
suffix0
(Name name
name, [T name string]
attrs) <- forall name string.
T name string -> Maybe (Name name, [T name string])
Tag.maybeOpen T name string
t
let ([T name string]
part,[T name string]
suffix2) = forall a. (a -> Bool) -> [a] -> ([a], [a])
break (forall name string. (Name name -> Bool) -> T name string -> Bool
Match.close (Name name
nameforall a. Eq a => a -> a -> Bool
==)) [T name string]
suffix1
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left ((Name name
name, [T name string]
attrs), [T name string]
part) forall a. a -> [a] -> [a]
: [T name string]
-> [Either
((Name name, [T name string]), [T name string]) [T name string]]
recourse (forall a. Int -> [a] -> [a]
drop Int
1 [T name string]
suffix2))
in forall {string}.
[T name string]
-> [Either
((Name name, [T name string]), [T name string]) [T name string]]
recourse
nestDiff :: (Eq name) => TagH.Name name -> Tag.T name string -> Int
nestDiff :: forall name string. Eq name => Name name -> T name string -> Int
nestDiff Name name
name T name string
tag =
forall a. Enum a => a -> Int
fromEnum (forall name string.
(Name name -> Bool)
-> ([T name string] -> Bool) -> T name string -> Bool
Match.open (Name name
nameforall a. Eq a => a -> a -> Bool
==) (forall a b. a -> b -> a
const Bool
True) T name string
tag)
forall a. Num a => a -> a -> a
-
forall a. Enum a => a -> Int
fromEnum (forall name string. (Name name -> Bool) -> T name string -> Bool
Match.close (Name name
nameforall a. Eq a => a -> a -> Bool
==) T name string
tag)
countNesting :: (a -> Int) -> [a] -> [Int]
countNesting :: forall a. (a -> Int) -> [a] -> [Int]
countNesting a -> Int
p = forall (f :: * -> *) a. T f a -> f a
NonEmpty.tail forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) b a.
Traversable f =>
(b -> a -> b) -> b -> f a -> T f b
NonEmpty.scanl forall a. Num a => a -> a -> a
(+) Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map a -> Int
p
takeBeforeMatch :: (a -> Int) -> [a] -> [a]
takeBeforeMatch :: forall a. (a -> Int) -> [a] -> [a]
takeBeforeMatch a -> Int
p [a]
xs =
forall a b c. (a -> b -> c) -> b -> a -> c
flip forall b a. [b] -> [a] -> [a]
ListMatch.take [a]
xs forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Ord a => a -> a -> Bool
>Int
0) forall a b. (a -> b) -> a -> b
$ forall a. (a -> Int) -> [a] -> [Int]
countNesting a -> Int
p [a]
xs
takeBeforeMatchingClose ::
(Eq name) => TagH.Name name -> [Tag.T name string] -> [Tag.T name string]
takeBeforeMatchingClose :: forall name string.
Eq name =>
Name name -> [T name string] -> [T name string]
takeBeforeMatchingClose Name name
name = forall a. (a -> Int) -> [a] -> [a]
takeBeforeMatch forall a b. (a -> b) -> a -> b
$ forall name string. Eq name => Name name -> T name string -> Int
nestDiff Name name
name
takeUntilMatch :: (a -> Int) -> [a] -> Maybe [a]
takeUntilMatch :: forall a. (a -> Int) -> [a] -> Maybe [a]
takeUntilMatch a -> Int
p [a]
xs =
(\[Int]
ys -> forall b a. b -> ([a] -> a -> b) -> [a] -> b
switchR (forall a. a -> Maybe a
Just []) (\[Int]
_ Int
y -> forall a. Bool -> a -> Maybe a
toMaybe (Int
yforall a. Eq a => a -> a -> Bool
==Int
0) forall a b. (a -> b) -> a -> b
$ forall b a. [b] -> [a] -> [a]
ListMatch.take [Int]
ys [a]
xs) [Int]
ys) forall a b. (a -> b) -> a -> b
$
forall a. (a -> Bool) -> [a] -> [a]
takeUntil (forall a. Eq a => a -> a -> Bool
==Int
0) forall a b. (a -> b) -> a -> b
$ forall a. (a -> Int) -> [a] -> [Int]
countNesting a -> Int
p [a]
xs
takeUntilMatchingClose ::
(Eq name) =>
TagH.Name name -> [Tag.T name string] -> Maybe [Tag.T name string]
takeUntilMatchingClose :: forall name string.
Eq name =>
Name name -> [T name string] -> Maybe [T name string]
takeUntilMatchingClose Name name
name = forall a. (a -> Int) -> [a] -> Maybe [a]
takeUntilMatch forall a b. (a -> b) -> a -> b
$ forall name string. Eq name => Name name -> T name string -> Int
nestDiff Name name
name