tagchup-0.4: alternative package for processing of tag soupsSource codeContentsIndex
Text.HTML.Tagchup.Process
Synopsis
type Encoding = String
type Encoded = String
evalDecodeAdaptive :: State (Encoded -> String) a -> a
decodeAdaptive :: (Attribute name, Tag name) => (Encoding -> Encoded -> String) -> [T name [T]] -> State (Encoded -> String) [T name String]
decodeTagAdaptive :: (Attribute name, Tag name) => (Encoding -> Encoded -> String) -> T name [T] -> State (Encoded -> String) (T name String)
getXMLEncoding :: (Tag name, Attribute name) => [T name String] -> Maybe String
findMetaEncoding :: (Tag name, Attribute name) => [T name String] -> Maybe String
getMetaHTTPHeaders :: (Tag name, Attribute name) => [T name string] -> [(string, string)]
getHeadTags :: (Tag name, Attribute name) => [T name string] -> [T name string]
partAttrs :: Tag name => (Name name -> Bool) -> (([T name string], [T name string]) -> ([T name string], [T name string])) -> [T name string] -> [T name string]
parts :: Tag name => (Name name -> Bool) -> [T name string] -> [Either ((Name name, [T name string]), [T name string]) [T name string]]
Documentation
type Encoding = StringSource
type Encoded = StringSource
should be [Word8]
evalDecodeAdaptive :: State (Encoded -> String) a -> aSource
decodeAdaptive :: (Attribute name, Tag name) => (Encoding -> Encoded -> String) -> [T name [T]] -> State (Encoded -> String) [T name String]Source

Selects a decoder dynamically according to xml-encoding and meta-http-equiv tags. The ?xml tag should only appear at the beginning of a document, but we respect it at every occurence.

 import qualified Text.XML.HXT.DOM.Unicode as Unicode
 evalDecodeAdaptive .
 decodeAdaptive
    (maybe Unicode.latin1ToUnicode (fst.) .
     Unicode.getDecodingFct)
decodeTagAdaptive :: (Attribute name, Tag name) => (Encoding -> Encoded -> String) -> T name [T] -> State (Encoded -> String) (T name String)Source
decodeTagAdaptive decoderSelector tag generates a state monad, with a decoder as state. It decodes encoding specific byte sequences using the current decoder and XML references using a fixed table.
getXMLEncoding :: (Tag name, Attribute name) => [T name String] -> Maybe StringSource
Check whether the first tag is an xml processing instruction tag and return the value of its encoding attribute.
findMetaEncoding :: (Tag name, Attribute name) => [T name String] -> Maybe StringSource
Rather the same as wraxml:HTML.Tree.findMetaEncoding
getMetaHTTPHeaders :: (Tag name, Attribute name) => [T name string] -> [(string, string)]Source
Extract META tags which contain HTTP-EQUIV attribute and present these values like HTTP headers.
getHeadTags :: (Tag name, Attribute name) => [T name string] -> [T name string]Source
partAttrs :: Tag name => (Name name -> Bool) -> (([T name string], [T name string]) -> ([T name string], [T name string])) -> [T name string] -> [T name string]Source
Modify attributes and tags of certain parts. For limitations, see parts.
parts :: Tag name => (Name name -> Bool) -> [T name string] -> [Either ((Name name, [T name string]), [T name string]) [T name string]]Source
Extract parts from the tag soup that are enclosed in corresponding open and close tags. If a close tag is missing, the soup end is considered as end of the part. However nested tags are not supported, e.g. in a><a></a></a>@ the second @<a is considered to be enclosed in the first a>@ and the first @</a and the second /a is ignored.
Produced by Haddock version 2.6.0