{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}

module Text.XmlHtml.Common where

import           Data.ByteString (ByteString)
import           Blaze.ByteString.Builder
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Builder as B
import           Data.Char (isAscii, isLatin1)
import qualified Data.HashMap.Strict as M
import qualified Data.HashSet as S
import qualified Data.Map as Map
import           Data.Maybe

import           Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

import           Text.XmlHtml.HTML.Meta (reversePredefinedRefs,
                                         explicitAttributes)


------------------------------------------------------------------------------
-- | Represents a document fragment, including the format, encoding, and
-- document type declaration as well as its content.
data Document = XmlDocument  {
                    Document -> Encoding
docEncoding :: !Encoding,
                    Document -> Maybe DocType
docType     :: !(Maybe DocType),
                    Document -> [Node]
docContent  :: ![Node]
                }
              | HtmlDocument {
                    docEncoding :: !Encoding,
                    docType     :: !(Maybe DocType),
                    docContent  :: ![Node]
                }
    deriving (Document -> Document -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Document -> Document -> Bool
$c/= :: Document -> Document -> Bool
== :: Document -> Document -> Bool
$c== :: Document -> Document -> Bool
Eq, Int -> Document -> ShowS
[Document] -> ShowS
Document -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Document] -> ShowS
$cshowList :: [Document] -> ShowS
show :: Document -> String
$cshow :: Document -> String
showsPrec :: Int -> Document -> ShowS
$cshowsPrec :: Int -> Document -> ShowS
Show)


------------------------------------------------------------------------------
-- | A node of a document structure.  A node can be text, a comment, or an
-- element.  XML processing instructions are intentionally omitted as a
-- simplification, and CDATA and plain text are both text nodes, since they
-- ought to be semantically interchangeable.
data Node = TextNode !Text
          | Comment  !Text
          | Element {
                Node -> Text
elementTag      :: !Text,
                Node -> [(Text, Text)]
elementAttrs    :: ![(Text, Text)],
                Node -> [Node]
elementChildren :: ![Node]
            }
    deriving (Node -> Node -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Node -> Node -> Bool
$c/= :: Node -> Node -> Bool
== :: Node -> Node -> Bool
$c== :: Node -> Node -> Bool
Eq, Int -> Node -> ShowS
[Node] -> ShowS
Node -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Node] -> ShowS
$cshowList :: [Node] -> ShowS
show :: Node -> String
$cshow :: Node -> String
showsPrec :: Int -> Node -> ShowS
$cshowsPrec :: Int -> Node -> ShowS
Show)


------------------------------------------------------------------------------
-- | Rendering options
data RenderOptions = RenderOptions {
      RenderOptions -> AttrSurround
roAttributeSurround :: AttrSurround
      -- ^ Single or double-quotes used around attribute values

    , RenderOptions -> AttrResolveInternalQuotes
roAttributeResolveInternal :: AttrResolveInternalQuotes
      -- ^ Quotes inside attribute values that conflict with the surround
      -- are escaped, or the outer quotes are changed to avoid conflicting
      -- with the internal ones

    , RenderOptions -> Maybe (HashMap Text (HashSet Text))
roExplicitEmptyAttrs :: Maybe (M.HashMap Text (S.HashSet Text))
      -- ^ Attributes in the whitelist with empty values are
      -- rendered as <div example="">
      -- 'Nothing' applies this rule to all attributes with empty values

    } deriving (RenderOptions -> RenderOptions -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RenderOptions -> RenderOptions -> Bool
$c/= :: RenderOptions -> RenderOptions -> Bool
== :: RenderOptions -> RenderOptions -> Bool
$c== :: RenderOptions -> RenderOptions -> Bool
Eq, Int -> RenderOptions -> ShowS
[RenderOptions] -> ShowS
RenderOptions -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RenderOptions] -> ShowS
$cshowList :: [RenderOptions] -> ShowS
show :: RenderOptions -> String
$cshow :: RenderOptions -> String
showsPrec :: Int -> RenderOptions -> ShowS
$cshowsPrec :: Int -> RenderOptions -> ShowS
Show)

data AttrSurround = SurroundDoubleQuote | SurroundSingleQuote
    deriving (AttrSurround -> AttrSurround -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AttrSurround -> AttrSurround -> Bool
$c/= :: AttrSurround -> AttrSurround -> Bool
== :: AttrSurround -> AttrSurround -> Bool
$c== :: AttrSurround -> AttrSurround -> Bool
Eq, Eq AttrSurround
AttrSurround -> AttrSurround -> Bool
AttrSurround -> AttrSurround -> Ordering
AttrSurround -> AttrSurround -> AttrSurround
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: AttrSurround -> AttrSurround -> AttrSurround
$cmin :: AttrSurround -> AttrSurround -> AttrSurround
max :: AttrSurround -> AttrSurround -> AttrSurround
$cmax :: AttrSurround -> AttrSurround -> AttrSurround
>= :: AttrSurround -> AttrSurround -> Bool
$c>= :: AttrSurround -> AttrSurround -> Bool
> :: AttrSurround -> AttrSurround -> Bool
$c> :: AttrSurround -> AttrSurround -> Bool
<= :: AttrSurround -> AttrSurround -> Bool
$c<= :: AttrSurround -> AttrSurround -> Bool
< :: AttrSurround -> AttrSurround -> Bool
$c< :: AttrSurround -> AttrSurround -> Bool
compare :: AttrSurround -> AttrSurround -> Ordering
$ccompare :: AttrSurround -> AttrSurround -> Ordering
Ord, Int -> AttrSurround -> ShowS
[AttrSurround] -> ShowS
AttrSurround -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AttrSurround] -> ShowS
$cshowList :: [AttrSurround] -> ShowS
show :: AttrSurround -> String
$cshow :: AttrSurround -> String
showsPrec :: Int -> AttrSurround -> ShowS
$cshowsPrec :: Int -> AttrSurround -> ShowS
Show)

data AttrResolveInternalQuotes = AttrResolveByEscape | AttrResolveAvoidEscape
    deriving (AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
$c/= :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
== :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
$c== :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
Eq, Eq AttrResolveInternalQuotes
AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Ordering
AttrResolveInternalQuotes
-> AttrResolveInternalQuotes -> AttrResolveInternalQuotes
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: AttrResolveInternalQuotes
-> AttrResolveInternalQuotes -> AttrResolveInternalQuotes
$cmin :: AttrResolveInternalQuotes
-> AttrResolveInternalQuotes -> AttrResolveInternalQuotes
max :: AttrResolveInternalQuotes
-> AttrResolveInternalQuotes -> AttrResolveInternalQuotes
$cmax :: AttrResolveInternalQuotes
-> AttrResolveInternalQuotes -> AttrResolveInternalQuotes
>= :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
$c>= :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
> :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
$c> :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
<= :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
$c<= :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
< :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
$c< :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Bool
compare :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Ordering
$ccompare :: AttrResolveInternalQuotes -> AttrResolveInternalQuotes -> Ordering
Ord, Int -> AttrResolveInternalQuotes -> ShowS
[AttrResolveInternalQuotes] -> ShowS
AttrResolveInternalQuotes -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AttrResolveInternalQuotes] -> ShowS
$cshowList :: [AttrResolveInternalQuotes] -> ShowS
show :: AttrResolveInternalQuotes -> String
$cshow :: AttrResolveInternalQuotes -> String
showsPrec :: Int -> AttrResolveInternalQuotes -> ShowS
$cshowsPrec :: Int -> AttrResolveInternalQuotes -> ShowS
Show)

defaultRenderOptions :: RenderOptions
defaultRenderOptions :: RenderOptions
defaultRenderOptions = RenderOptions
    { roAttributeSurround :: AttrSurround
roAttributeSurround        = AttrSurround
SurroundSingleQuote
    , roAttributeResolveInternal :: AttrResolveInternalQuotes
roAttributeResolveInternal = AttrResolveInternalQuotes
AttrResolveAvoidEscape
    , roExplicitEmptyAttrs :: Maybe (HashMap Text (HashSet Text))
roExplicitEmptyAttrs       = forall a. a -> Maybe a
Just HashMap Text (HashSet Text)
explicitAttributes
    }

------------------------------------------------------------------------------
-- | Determines whether the node is text or not.
isTextNode :: Node -> Bool
isTextNode :: Node -> Bool
isTextNode (TextNode Text
_) = Bool
True
isTextNode Node
_            = Bool
False


------------------------------------------------------------------------------
-- | Determines whether the node is a comment or not.
isComment :: Node -> Bool
isComment :: Node -> Bool
isComment (Comment Text
_) = Bool
True
isComment Node
_           = Bool
False


------------------------------------------------------------------------------
-- | Determines whether the node is an element or not.
isElement :: Node -> Bool
isElement :: Node -> Bool
isElement (Element Text
_ [(Text, Text)]
_ [Node]
_) = Bool
True
isElement Node
_               = Bool
False


------------------------------------------------------------------------------
-- | Gives the tag name of an element, or 'Nothing' if the node isn't an
-- element.
tagName :: Node -> Maybe Text
tagName :: Node -> Maybe Text
tagName (Element Text
t [(Text, Text)]
_ [Node]
_) = forall a. a -> Maybe a
Just Text
t
tagName Node
_               = forall a. Maybe a
Nothing


------------------------------------------------------------------------------
-- | Retrieves the attribute with the given name.  If the 'Node' is not an
-- element, the result is always 'Nothing'
getAttribute :: Text -> Node -> Maybe Text
getAttribute :: Text -> Node -> Maybe Text
getAttribute Text
name (Element Text
_ [(Text, Text)]
attrs [Node]
_) = forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
name [(Text, Text)]
attrs
getAttribute Text
_    Node
_                   = forall a. Maybe a
Nothing


------------------------------------------------------------------------------
-- | Checks if a given attribute exists in a 'Node'.
hasAttribute :: Text -> Node -> Bool
hasAttribute :: Text -> Node -> Bool
hasAttribute Text
name = forall a. Maybe a -> Bool
isJust forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Node -> Maybe Text
getAttribute Text
name


------------------------------------------------------------------------------
-- | Sets the attribute name to the given value.  If the 'Node' is not an
-- element, this is the identity.
setAttribute :: Text -> Text -> Node -> Node
setAttribute :: Text -> Text -> Node -> Node
setAttribute Text
name Text
val (Element Text
t [(Text, Text)]
a [Node]
c) = Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
newAttrs [Node]
c
  where newAttrs :: [(Text, Text)]
newAttrs = (Text
name, Text
val) forall a. a -> [a] -> [a]
: forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
/= Text
name) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) [(Text, Text)]
a
setAttribute Text
_    Text
_   Node
n                   = Node
n


------------------------------------------------------------------------------
-- | Gives the entire text content of a node, ignoring markup.
nodeText :: Node -> Text
nodeText :: Node -> Text
nodeText (TextNode Text
t)    = Text
t
nodeText (Comment Text
_)     = Text
""
nodeText (Element Text
_ [(Text, Text)]
_ [Node]
c) = [Text] -> Text
T.concat (forall a b. (a -> b) -> [a] -> [b]
map Node -> Text
nodeText [Node]
c)


------------------------------------------------------------------------------
-- | Gives the child nodes of the given node.  Only elements have child nodes.
childNodes :: Node -> [Node]
childNodes :: Node -> [Node]
childNodes (Element Text
_ [(Text, Text)]
_ [Node]
c) = [Node]
c
childNodes Node
_               = []


------------------------------------------------------------------------------
-- | Gives the child elements of the given node.
childElements :: Node -> [Node]
childElements :: Node -> [Node]
childElements = forall a. (a -> Bool) -> [a] -> [a]
filter Node -> Bool
isElement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> [Node]
childNodes


------------------------------------------------------------------------------
-- | Gives all of the child elements of the node with the given tag
-- name.
childElementsTag :: Text -> Node -> [Node]
childElementsTag :: Text -> Node -> [Node]
childElementsTag Text
tag = forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just Text
tag) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Maybe Text
tagName) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> [Node]
childNodes


------------------------------------------------------------------------------
-- | Gives the first child element of the node with the given tag name,
-- or 'Nothing' if there is no such child element.
childElementTag :: Text -> Node -> Maybe Node
childElementTag :: Text -> Node -> Maybe Node
childElementTag Text
tag = forall a. [a] -> Maybe a
listToMaybe forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Node -> [Node]
childElementsTag Text
tag


------------------------------------------------------------------------------
-- | Gives the descendants of the given node in the order that they begin in
-- the document.
descendantNodes :: Node -> [Node]
descendantNodes :: Node -> [Node]
descendantNodes = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Node
n -> Node
n forall a. a -> [a] -> [a]
: Node -> [Node]
descendantNodes Node
n) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> [Node]
childNodes

------------------------------------------------------------------------------
-- | Gives the descendant elements of the given node, in the order that their
-- start tags appear in the document.
descendantElements :: Node -> [Node]
descendantElements :: Node -> [Node]
descendantElements = forall a. (a -> Bool) -> [a] -> [a]
filter Node -> Bool
isElement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> [Node]
descendantNodes


------------------------------------------------------------------------------
-- | Gives the descendant elements with a given tag name.
descendantElementsTag :: Text -> Node -> [Node]
descendantElementsTag :: Text -> Node -> [Node]
descendantElementsTag Text
tag = forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just Text
tag) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Maybe Text
tagName) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> [Node]
descendantNodes


------------------------------------------------------------------------------
-- | Gives the first descendant element of the node with the given tag name,
-- or 'Nothing' if there is no such element.
descendantElementTag :: Text -> Node -> Maybe Node
descendantElementTag :: Text -> Node -> Maybe Node
descendantElementTag Text
tag = forall a. [a] -> Maybe a
listToMaybe forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Node -> [Node]
descendantElementsTag Text
tag


------------------------------------------------------------------------------
-- | A document type declaration.  Note that DTD internal subsets are
-- currently unimplemented.
data DocType = DocType !Text !ExternalID !InternalSubset
    deriving (DocType -> DocType -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DocType -> DocType -> Bool
$c/= :: DocType -> DocType -> Bool
== :: DocType -> DocType -> Bool
$c== :: DocType -> DocType -> Bool
Eq, Int -> DocType -> ShowS
[DocType] -> ShowS
DocType -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DocType] -> ShowS
$cshowList :: [DocType] -> ShowS
show :: DocType -> String
$cshow :: DocType -> String
showsPrec :: Int -> DocType -> ShowS
$cshowsPrec :: Int -> DocType -> ShowS
Show)


------------------------------------------------------------------------------
-- | An external ID, as in a document type declaration.  This can be a
-- SYSTEM identifier, or a PUBLIC identifier, or can be omitted.
data ExternalID = Public !Text !Text
                | System !Text
                | NoExternalID
    deriving (ExternalID -> ExternalID -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ExternalID -> ExternalID -> Bool
$c/= :: ExternalID -> ExternalID -> Bool
== :: ExternalID -> ExternalID -> Bool
$c== :: ExternalID -> ExternalID -> Bool
Eq, Int -> ExternalID -> ShowS
[ExternalID] -> ShowS
ExternalID -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ExternalID] -> ShowS
$cshowList :: [ExternalID] -> ShowS
show :: ExternalID -> String
$cshow :: ExternalID -> String
showsPrec :: Int -> ExternalID -> ShowS
$cshowsPrec :: Int -> ExternalID -> ShowS
Show)


------------------------------------------------------------------------------
-- | The internal subset is unparsed, but preserved in case it's actually
-- wanted.
data InternalSubset = InternalText !Text
                    | NoInternalSubset
    deriving (InternalSubset -> InternalSubset -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: InternalSubset -> InternalSubset -> Bool
$c/= :: InternalSubset -> InternalSubset -> Bool
== :: InternalSubset -> InternalSubset -> Bool
$c== :: InternalSubset -> InternalSubset -> Bool
Eq, Int -> InternalSubset -> ShowS
[InternalSubset] -> ShowS
InternalSubset -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [InternalSubset] -> ShowS
$cshowList :: [InternalSubset] -> ShowS
show :: InternalSubset -> String
$cshow :: InternalSubset -> String
showsPrec :: Int -> InternalSubset -> ShowS
$cshowsPrec :: Int -> InternalSubset -> ShowS
Show)


------------------------------------------------------------------------------
-- | The character encoding of a document.  Currently only the required
-- character encodings are implemented.
data Encoding = UTF8 | UTF16BE | UTF16LE | ISO_8859_1 deriving (Encoding -> Encoding -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Encoding -> Encoding -> Bool
$c/= :: Encoding -> Encoding -> Bool
== :: Encoding -> Encoding -> Bool
$c== :: Encoding -> Encoding -> Bool
Eq, Int -> Encoding -> ShowS
[Encoding] -> ShowS
Encoding -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Encoding] -> ShowS
$cshowList :: [Encoding] -> ShowS
show :: Encoding -> String
$cshow :: Encoding -> String
showsPrec :: Int -> Encoding -> ShowS
$cshowsPrec :: Int -> Encoding -> ShowS
Show)


------------------------------------------------------------------------------
-- | Retrieves the preferred name of a character encoding for embedding in
-- a document.
encodingName :: Encoding -> Text
encodingName :: Encoding -> Text
encodingName Encoding
UTF8       = Text
"UTF-8"
encodingName Encoding
UTF16BE    = Text
"UTF-16"
encodingName Encoding
UTF16LE    = Text
"UTF-16"
encodingName Encoding
ISO_8859_1 = Text
"ISO-8859-1"


------------------------------------------------------------------------------
-- | Gets the encoding function from 'Text' to 'ByteString' for an encoding.
encoder :: Encoding -> Text -> ByteString
encoder :: Encoding -> Text -> ByteString
encoder Encoding
UTF8       = Text -> ByteString
T.encodeUtf8
encoder Encoding
UTF16BE    = Text -> ByteString
T.encodeUtf16BE
encoder Encoding
UTF16LE    = Text -> ByteString
T.encodeUtf16LE
encoder Encoding
ISO_8859_1 = Text -> ByteString
encodeAscii


------------------------------------------------------------------------------
-- | Encodes UTF-8 Text into bytestring with only latin1 characters
-- UTF-8 characters found in the input and present in the
-- 'Text.XmlHtml.Meta.references' map are mapped to their escape sequences,
-- and any other UTF-8 characters are replaced with ascii "?"
encodeAscii :: Text -> ByteString
encodeAscii :: Text -> ByteString
encodeAscii Text
t = Text -> ByteString
T.encodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
toAsciiChunk forall a b. (a -> b) -> a -> b
$
                (Char -> Char -> Bool) -> Text -> [Text]
T.groupBy Char -> Char -> Bool
asciiSplits Text
t
  where

    -- Identify long strings of all-acceptable or all-unacceptable characters
    -- Acceptable strings are passed through
    -- Unacceptable strings are mapped to ASCII character by character
    toAsciiChunk :: Text -> Text
toAsciiChunk Text
sub =
      if (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isAscii Text
sub
      then Text
sub
      else [Text] -> Text
T.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Char -> Text
toAsciiChar forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
sub
    asciiSplits :: Char -> Char -> Bool
asciiSplits Char
x Char
y = Char -> Bool
isAscii Char
x forall a. Eq a => a -> a -> Bool
== Char -> Bool
isAscii Char
y

    -- A character's mapping to ascii goes through html entity escaping
    -- if that character is in the references table
    -- Otherwise its unicode index is printed to decimal and "&#" is appended
    toAsciiChar :: Char -> Text
toAsciiChar Char
c = forall b a. b -> (a -> b) -> Maybe a -> b
maybe
        (Char -> Text
uniEscape Char
c) (\Text
esc -> [Text] -> Text
T.concat [Text
"&", Text
esc, Text
";"])
        (forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup (Char -> Text
T.singleton Char
c) Map Text Text
reversePredefinedRefs)

    uniEscape :: Char -> Text
uniEscape = Text -> Text -> Text
T.append Text
"&#" forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> Char -> Text
T.snoc Char
';' forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                (forall a. Show a => a -> String
show :: Int -> String) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum


------------------------------------------------------------------------------
-- | Gets the decoding function from 'ByteString' to 'Text' for an encoding.
decoder :: Encoding -> ByteString -> Text
decoder :: Encoding -> ByteString -> Text
decoder Encoding
UTF8       = OnDecodeError -> ByteString -> Text
T.decodeUtf8With    (forall b a. b -> OnError a b
TE.replace Char
'\xFFFF')
decoder Encoding
UTF16BE    = OnDecodeError -> ByteString -> Text
T.decodeUtf16BEWith (forall b a. b -> OnError a b
TE.replace Char
'\xFFFF')
decoder Encoding
UTF16LE    = OnDecodeError -> ByteString -> Text
T.decodeUtf16LEWith (forall b a. b -> OnError a b
TE.replace Char
'\xFFFF')
decoder Encoding
ISO_8859_1 = ByteString -> Text
T.decodeLatin1 forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                     (Char -> Char) -> ByteString -> ByteString
BS.map (\Char
c -> if Char -> Bool
isLatin1 Char
c then Char
c else Char
'?')


------------------------------------------------------------------------------
isUTF16 :: Encoding -> Bool
isUTF16 :: Encoding -> Bool
isUTF16 Encoding
e = Encoding
e forall a. Eq a => a -> a -> Bool
== Encoding
UTF16BE Bool -> Bool -> Bool
|| Encoding
e forall a. Eq a => a -> a -> Bool
== Encoding
UTF16LE


------------------------------------------------------------------------------
fromText :: Encoding -> Text -> Builder
fromText :: Encoding -> Text -> Builder
fromText Encoding
e Text
t = ByteString -> Builder
fromByteString (Encoding -> Text -> ByteString
encoder Encoding
e Text
t)


bmap :: (Text -> Text) -> B.Builder -> B.Builder
bmap :: (Text -> Text) -> Builder -> Builder
bmap Text -> Text
f   = ByteString -> Builder
B.byteString
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
f
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
TL.toStrict
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TL.decodeUtf8
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
B.toLazyByteString