-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | An implementation of the web Document Object Model, and its rendering.
--   
--   <a>willow</a> is the basis of a web browser suite, providing the
--   underlying types to represent various documents found on the internet.
--   It does <i>not</i> provide parsing algorithms for anything but the
--   simplest filetypes, instead expecting them to be outsourced to other
--   modules.
@package willow
@version 0.1.0.0


module Web.Willow.Common.Encoding.Character

-- | The Unicode character <tt>\xFFFD</tt>, safely (but unrecoverably)
--   representing an illegal, invalid, or otherwise unknown character.
replacementChar :: Char

-- | <b>Infra:</b> <tt><a>ASCII whitespace</a></tt>
--   
--   The ASCII characters defined as whitespace in the HTML standard.
--   Unlike Haskell's <a>isSpace</a> and anything following that example,
--   does <i>not</i> include <tt>\x11</tt> (VT).
asciiWhitespace :: [Char]

-- | <b>Infra:</b> <tt><a>ASCII alpha</a></tt>
--   
--   Test whether the character is an alphabetic character in the ASCII
--   range (<tt>[A-Za-z]</tt>).
isAsciiAlpha :: Char -> Bool

-- | <b>Infra:</b> <tt><a>ASCII alphanumeric</a></tt>
--   
--   Test whether the character is either an alphabetic character or a
--   digit in the ASCII range (<tt>[A-Za-z0-9]</tt>).
isAsciiAlphaNum :: Char -> Bool

-- | <b>Infra:</b> <tt><a>ASCII whitespace</a></tt>
--   
--   Test whether the character fits the spec's definition of
--   <a>asciiWhitespace</a>.
isAsciiWhitespace :: Char -> Bool

-- | Convert an uppercase, alphabetic, ASCII character to its lowercase
--   form. This has the same semantics within the ASCII range as
--   <a>toLower</a>, but leaves any non-ASCII characters unchanged.
--   
--   <pre>
--   &gt;&gt;&gt; toAsciiLower 'A'
--   'a'
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; toAsciiLower 'Á'
--   'Á'
--   </pre>
toAsciiLower :: Char -> Char

-- | Convert a lowercase, alphabetic, ASCII character to its uppercase
--   form. This has the same semantics within the ASCII range as
--   <a>toUpper</a>, but leaves any non-ASCII characters unchanged.
--   
--   <pre>
--   &gt;&gt;&gt; toAsciiUpper 'a'
--   'A'
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; toAsciiUpper 'á'
--   'á'
--   </pre>
toAsciiUpper :: Char -> Char


-- | The existing parsing libraries are wonderful, but backtracking parsers
--   have a bad habit of being strict in their output; sure, you might be
--   able to operate over <a>Data.ByteString.Lazy</a>, but they all expect
--   to consume their entire input before handing you their result.
--   <a>Data.Attoparsec</a>'s continuations fully lean into that---even
--   though you don't have to provide all the input in one block, you can't
--   get a value before closing it out. <a>Text.Megaparsec</a> does provide
--   a reentrant form in <a>runParser'</a>, but it also comes with
--   comparatively heavyweight error and pretty-printing features.
--   
--   For complicated formats, those all can indeed be desirable. However,
--   the HTML algorithms have been optimized for minimal lookahead and
--   certainly no output revocation---once something is shipped out, it's
--   not going to be called back. Not taking advantage of that by using a
--   lazy output type means that parsing would always be subject to the
--   whims of slow or unreliable network connections. Moreover, the entire
--   complexity of the parsing algorithm is built around never reaching a
--   fatal failure condition, so error handling and especially recovery are
--   unnecessary overhead.
--   
--   And so, a custom parsing framework must be defined.
module Web.Willow.Common.Parser

-- | Unlike most monad transformers, a <a>Parser</a> is built around the
--   concept of success and failure, so its "default" form is better
--   structured over <a>Maybe</a> than over <a>Identity</a>.
type Parser stream = ParserT stream Maybe

-- | Set the constructed parser loose on a given input. Returns both the
--   resulting value and the remaining contents of the <a>Stream</a>.
runParser :: Parser stream out -> stream -> Maybe (out, stream)

-- | Encapsulation of an operation for transforming the head of a
--   <a>Stream</a> into some other value. Standard usage, with similar
--   behaviour to other <a>Text.Parsec</a>-derived parsers, ("accept the
--   first which matches") may be obtained by instantiating <tt>gather</tt>
--   with <a>Maybe</a>, or non-deterministic parsing ("accept any of
--   these") through <tt>[]</tt>.
--   
--   Notably, this implementation is designed to allow laziness in both
--   input and output. For the best usage, therefore, consume as little
--   input at a time as possible, and so call <a>runParser</a> often).
--   
--   As part of this simplification, all <a>Text.Parsec</a>-style
--   integrated state (use <a>StateT</a>) and <a>Text.Megaparsec</a>-style
--   error pretty-printing (build your position tracking into the
--   <tt>stream</tt>, and/or wrap the output in <a>Either</a>) has been
--   stripped out.
newtype ParserT stream gather out
ParserT :: (stream -> gather (out, stream)) -> ParserT stream gather out
[runParserT] :: ParserT stream gather out -> stream -> gather (out, stream)

-- | Purely a convenience of the package rather than the module, the state
--   machines described by the HTML standard all involve some degree of
--   persistence, and so are built over a deeper monad stack. This could
--   easily one of the most common transformers to add, anyway, no matter
--   what input is being parsed.
type StateParser state stream = StateT state (Parser stream)

-- | Generalize the transformation of an input <a>Stream</a> into a more
--   meaningful value. This class provides the basic building blocks from
--   which more expressive such parsers may be constructed.
--   
--   See also the description of <a>ParserT</a> for some of the design
--   decisions.
class (Alternative m, Monad m, Stream stream token, Monoid stream) => MonadParser m stream token | m -> stream

-- | Runs the argument parser on the current input, without consuming any
--   of it; these are identical semantics to saving and restoring the input
--   after running the computation, assuming the <a>MonadState</a> instance
--   runs over the input stream (see <a>ParserT</a>):
--   
--   <pre>
--   input &lt;- <a>get</a>
--   a &lt;- parser
--   <a>put</a> input
--   </pre>
--   
--   <pre>
--   a &lt;- <a>lookAhead</a> parser
--   </pre>
lookAhead :: MonadParser m stream token => m out -> m out

-- | Succeeds if and only if the argument parser fails (the input is not
--   consumed).
avoiding :: MonadParser m stream token => m out -> m ()

-- | Retrieve the next token in the stream, whatever it may be. Identical
--   to <tt><a>uncons</a></tt> in all but type.
next :: MonadParser m stream token => m token

-- | Retrieve the next several tokens in the stream. Identical to
--   <a>count</a> (with a safer index type) in the case that
--   <tt>gather</tt> is a list <tt>[token]</tt>.
--   
--   If fewer tokens are in the input stream than asked for, returns what
--   does remain in the input stream.
nextChunk :: MonadParser m stream token => Word -> m stream

-- | Prepend a token to the input stream to be processed next. Identical to
--   operating on the stream directly through <a>MonadState</a>, if that
--   instance also exists.
--   
--   <pre>
--   stream &lt;- <a>get</a>
--   <a>put</a> $ <a>cons</a> tok stream
--   </pre>
--   
--   <pre>
--   <a>push</a> tok
--   </pre>
push :: MonadParser m stream token => token -> m ()

-- | Concatenate the given sequence with the existing input, processing the
--   argument before the older <tt>stream</tt>.
pushChunk :: MonadParser m stream token => stream -> m ()

-- | Drop the remainder of the input, simulating an early end-of-stream.
--   Can be emulated through appropriate <a>MonadState</a> and
--   <a>Monoid</a> instances:
--   
--   <pre>
--   stream &lt;- <a>get</a>
--   <a>put</a> <a>mempty</a>
--   <a>return</a> stream
--   </pre>
--   
--   <pre>
--   <a>abridge</a>
--   </pre>
abridge :: MonadParser m stream token => m stream

-- | Succeeds if and only if the input is empty.
end :: MonadParser trans stream token => trans ()

-- | Succeeds if and only if the value parsed by the argument parser
--   satisfies the predicate. No further input is consumed.
satisfying :: MonadParser trans stream token => (out -> Bool) -> out -> trans out

-- | Expect a specific token from the <a>Stream</a>, and fail if a
--   different token is found instead. Identical to running
--   <a>satisfying</a> with equality in the (by far most likely) case that
--   <tt>gather</tt> is a <a>Monad</a> in addition to an
--   <a>Alternative</a>:
--   
--   <pre>
--   tok &lt;- <a>next</a> <a>&gt;&gt;=</a> <a>satisfying</a> (<a>==</a> desired)
--   </pre>
--   
--   <pre>
--   tok &lt;- <a>token</a> desired
--   </pre>
token :: (MonadParser trans stream token, Eq token) => token -> trans token

-- | Expect a specific sequence of tokens from the <a>Stream</a>, and fail
--   if anything else is found instead, or if the <a>Stream</a> doesn't
--   have enough characters before its end. Identical to running
--   <a>satisfying</a> with equality over <a>nextChunk</a> in the case that
--   <tt>stream</tt> is an <a>Eq</a> (which all provided instances are) and
--   can easily provide a <a>length</a> (which they do, unless the sequence
--   to test against also needs to be lazy).
--   
--   <pre>
--   stream &lt;- <a>nextChunk</a> (<a>length</a> desired) <a>&gt;&gt;=</a> <a>satisfying</a> (<a>==</a> desired)
--   </pre>
--   
--   <pre>
--   stream &lt;- <a>chunk</a> desired
--   </pre>
chunk :: (MonadParser trans stream token, Eq stream) => stream -> trans stream

-- | A sequence of values which may be processed via a <a>MonadParser</a>.
--   This class is essentially just a unification of the various list-like
--   interfaces (<tt><a>uncons</a> == <a>head</a></tt>, etc.) as Haskell's
--   abstractions are slightly lacking in that area.
--   
--   <pre>
--   &gt;&gt;&gt; Just (tok, str) == uncons (cons tok str)
--   True
--   </pre>
class Monoid stream => Stream stream token | stream -> token

-- | Prepend a token to the stream for proximate processing, before
--   everything already in it.
cons :: Stream stream token => token -> stream -> stream

-- | As <a>cons</a>, but append multiple tokens at once.
consChunk :: Stream stream token => stream -> stream -> stream

-- | Retrieve the next token from the stream.
--   
--   This should only return <a>Nothing</a> if the stream is actually
--   empty---if the next value is not available yet due to slow IO or other
--   computation, <a>uncons</a> waits until it is.
uncons :: Stream stream token => stream -> Maybe (token, stream)

-- | Retrieve the next several tokens from the stream.
--   
--   If fewer tokens are in the input stream than asked for, the left side
--   of the return value is the (shorter than requested) entire input
--   stream and the right is <a>mempty</a>.
unconsChunk :: Stream stream token => Word -> stream -> (stream, stream)

-- | The number of tokens remaining in the stream.
chunkLen :: Stream stream token => stream -> Word
instance (GHC.Base.Alternative gather, GHC.Base.Monad gather, Web.Willow.Common.Parser.Stream stream token, GHC.Base.Monoid stream) => Web.Willow.Common.Parser.MonadParser (Web.Willow.Common.Parser.ParserT stream gather) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.Monoid accum, GHC.Base.MonadPlus trans) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Accum.AccumT accum trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.Monoid except) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Except.ExceptT except trans) stream token
instance Web.Willow.Common.Parser.MonadParser trans stream token => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Identity.IdentityT trans) stream token
instance Web.Willow.Common.Parser.MonadParser trans stream token => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Maybe.MaybeT trans) stream token
instance Web.Willow.Common.Parser.MonadParser trans stream token => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Reader.ReaderT reader trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.MonadPlus trans) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.State.Lazy.StateT state trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.MonadPlus trans) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.State.Strict.StateT state trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.Monoid writer) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Writer.Lazy.WriterT writer trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.Monoid writer) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.Writer.Strict.WriterT writer trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.Monoid writer, GHC.Base.MonadPlus trans) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.RWS.Strict.RWST reader writer state trans) stream token
instance (Web.Willow.Common.Parser.MonadParser trans stream token, GHC.Base.Monoid writer, GHC.Base.MonadPlus trans) => Web.Willow.Common.Parser.MonadParser (Control.Monad.Trans.RWS.Lazy.RWST reader writer state trans) stream token
instance Web.Willow.Common.Parser.Stream Data.ByteString.Lazy.Internal.ByteString GHC.Word.Word8
instance Web.Willow.Common.Parser.Stream Data.ByteString.Internal.ByteString GHC.Word.Word8
instance Web.Willow.Common.Parser.Stream Data.Text.Internal.Lazy.Text GHC.Types.Char
instance Web.Willow.Common.Parser.Stream Data.Text.Internal.Text GHC.Types.Char
instance Web.Willow.Common.Parser.Stream [token] token
instance GHC.Base.Functor gather => GHC.Base.Functor (Web.Willow.Common.Parser.ParserT stream gather)
instance GHC.Base.Monad gather => GHC.Base.Applicative (Web.Willow.Common.Parser.ParserT stream gather)
instance (GHC.Base.Alternative gather, GHC.Base.Monad gather) => GHC.Base.Alternative (Web.Willow.Common.Parser.ParserT stream gather)
instance (GHC.Base.Monad gather, GHC.Base.Semigroup out) => GHC.Base.Semigroup (Web.Willow.Common.Parser.ParserT stream gather out)
instance (GHC.Base.Monad gather, GHC.Base.Monoid out) => GHC.Base.Monoid (Web.Willow.Common.Parser.ParserT stream gather out)
instance GHC.Base.Monad gather => GHC.Base.Monad (Web.Willow.Common.Parser.ParserT stream gather)
instance (GHC.Base.Alternative gather, GHC.Base.Monad gather) => GHC.Base.MonadPlus (Web.Willow.Common.Parser.ParserT stream gather)
instance Control.Monad.Fail.MonadFail gather => Control.Monad.Fail.MonadFail (Web.Willow.Common.Parser.ParserT stream gather)
instance Control.Monad.Error.Class.MonadError err gather => Control.Monad.Error.Class.MonadError err (Web.Willow.Common.Parser.ParserT stream gather)
instance GHC.Base.Monad gather => Control.Monad.Fix.MonadFix (Web.Willow.Common.Parser.ParserT stream gather)
instance Control.Monad.Trans.Class.MonadTrans (Web.Willow.Common.Parser.ParserT stream)
instance GHC.Base.Monad gather => Control.Monad.Reader.Class.MonadReader stream (Web.Willow.Common.Parser.ParserT stream gather)
instance GHC.Base.Monad gather => Control.Monad.State.Class.MonadState stream (Web.Willow.Common.Parser.ParserT stream gather)
instance Control.Monad.IO.Class.MonadIO gather => Control.Monad.IO.Class.MonadIO (Web.Willow.Common.Parser.ParserT stream gather)
instance Control.Monad.Cont.Class.MonadCont gather => Control.Monad.Cont.Class.MonadCont (Web.Willow.Common.Parser.ParserT stream gather)


-- | <a>Alternative</a> instances can provide a form of pattern matching if
--   given a fail-on-false combinator (e.g. <a>when</a>), however the exact
--   behaviour isn't guaranteed; an underlying <a>Maybe</a> does provide a
--   greedy match, but <tt>[]</tt> will match later overlapping tests even
--   if they are intended to be masked; compare the masking to standard,
--   cascading pattern guards. This module provides a means of formalizing
--   that behaviour into a predictable form, no matter which
--   <a>Alternative</a> winds up being used.
module Web.Willow.Common.Parser.Switch

-- | The building blocks for predictable pattern matches over
--   <a>Alternative</a>. The constructors are distinguished along three
--   axes (see also the examples in the documentation for <a>switch</a>):
--   
--   <ul>
--   <li>"masking" vs. "non-masking": only the first "masking" case
--   fulfilled will be returned, while <i>every</i> "non-masking" one is
--   returned</li>
--   <li>"matching" vs. "catchall": whether the output is gated by a
--   predicate test or not</li>
--   <li>"piped" vs. "static": whether the output is passed the original
--   test token</li>
--   </ul>
data SwitchCase test m out

-- | Masking, matching, and piped
If :: (test -> Bool) -> (test -> m out) -> SwitchCase test m out

-- | Masking, matching, and static
If_ :: (test -> Bool) -> m out -> SwitchCase test m out

-- | Masking, catchall, and piped
Else :: (test -> m out) -> SwitchCase test m out

-- | Masking, catchall, and static
Else_ :: m out -> SwitchCase test m out

-- | Non-masking, matching, and piped
When :: (test -> Bool) -> (test -> m out) -> SwitchCase test m out

-- | Non-masking, matching, and static
When_ :: (test -> Bool) -> m out -> SwitchCase test m out

-- | Non-masking, catchall, and piped
Always :: (test -> m out) -> SwitchCase test m out

-- | Non-masking, catchall, and static
Always_ :: m out -> SwitchCase test m out

-- | Run a block of <a>SwitchCase</a>s, collapsing any masking cases so
--   that only the first matched test remains. This is strictly more
--   powerful than pattern matching, as it allows interspersing non-masking
--   tests alongside masking ones; for compatibility with refactoring to
--   single-return <a>Alternative</a> instances, however (i.e.
--   <a>Maybe</a>), it's best to order everything <i>as if</i> every case
--   could mask the ones after it. Note that the masking only affects the
--   output; the tests themselves may still be run, so expensive
--   computations are best put elsewhere.
--   
--   Only the first overlapping (maskable) case is selected:
--   
--   <pre>
--   &gt;&gt;&gt; uppercase = If_   isUpper  $ return "uppercase"
--   
--   &gt;&gt;&gt; one       = When_ (== '1') $ return "single '1'"
--   
--   &gt;&gt;&gt; alpha     = If_   isAlpha  $ return "ASCII letter"       -- Matches
--   
--   &gt;&gt;&gt; catchall  = Else_          $ return "none of the above"  -- Matches
--   
--   &gt;&gt;&gt; switch [uppercase, one, alpha, catchall] 'a' :: [String]
--   ["ASCII letter"]
--   </pre>
--   
--   Non-masking cases don't interact with the masking calculations:
--   
--   <pre>
--   &gt;&gt;&gt; uppercase = If_   isUpper  $ return "uppercase"
--   
--   &gt;&gt;&gt; one       = When_ (== '1') $ return "single '1'"         -- Matches
--   
--   &gt;&gt;&gt; alpha     = If_   isAlpha  $ return "ASCII letter"
--   
--   &gt;&gt;&gt; catchall  = Else_          $ return "none of the above"  -- Matches
--   
--   &gt;&gt;&gt; switch [uppercase, one, alpha, catchall] '1' :: [String]
--   ["single '1'", "none of the above"]
--   </pre>
--   
--   <a>Maybe</a> always takes the earliest successful test:
--   
--   <pre>
--   &gt;&gt;&gt; uppercase = If_   isUpper  $ return "uppercase"
--   
--   &gt;&gt;&gt; one       = When_ (== '1') $ return "single '1'"         -- Matches
--   
--   &gt;&gt;&gt; alpha     = If_   isAlpha  $ return "ASCII letter"
--   
--   &gt;&gt;&gt; catchall  = Else_          $ return "none of the above"  -- Matches
--   
--   &gt;&gt;&gt; switch [uppercase, one, alpha, catchall] '1' :: Maybe String
--   Just "single '1'"
--   </pre>
--   
--   <a>Always</a> and <a>Always_</a> function as a standard
--   <a>Alternative</a> computation:
--   
--   <pre>
--   &gt;&gt;&gt; switch [Always a, Always b, Always_ c] tok == a tok &lt;|&gt; b tok &lt;|&gt; c
--   True
--   </pre>
switch :: Alternative m => [SwitchCase test m out] -> test -> m out


module Web.Willow.Common.Parser.Util

-- | Test whether a given value falls within the range defined by the two
--   bounds, inclusive.
--   
--   <pre>
--   &gt;&gt;&gt; range 1 2 3
--   False
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; range 1 3 2
--   True
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; range 1 2 2
--   True
--   </pre>
range :: Ord a => a -> a -> a -> Bool

-- | Reduce a list of <a>Alternative</a>s, such that the first successful
--   instance will be run. If the list is empty, the resulting value will
--   always fail.
choice :: Alternative m => [m a] -> m a

-- | Scan through the stream, until the given parser succeeds (discarding
--   any tokens between the initial location and where the first success is
--   found). Fails if the parser does not succeed at any point in the
--   remainder of the stream.
findNext :: MonadParser parser stream token => parser out -> parser out


-- | The <b><a>Encoding</a></b> spec uses a conceptual model of an
--   "encoding" as being the function between Unicode values and bytes. As
--   this is a bit more complex than any content author wants to specify
--   every document, HTML (and other interfaces) represent them as
--   semi-standardized but freeform text strings; the standard document
--   then collects the various strings authors have used across the web and
--   associates the most common as "labels" of those abstract encodings.
--   
--   To refer to them internally, however, it also promotes one of the
--   labels of each encoding as the canonical form; this library implements
--   that set (with modifications to fit Haskell identifiers) in
--   <a>Encoding</a>. The labels are described via a reversible many-to-one
--   mapping with those names, which as the reverse is rarely used, lends
--   itself well to being adapted as a lookup table. This then is a
--   machine-readable formatting of that table.
module Web.Willow.Common.Encoding.Labels

-- | <b>Encoding:</b> <tt><a>get an encoding</a></tt>
--   
--   Given an encoding's case-insensitive label, try to retrieve an
--   appropriate <a>Encoding</a>. The set prescribed by the HTML
--   specification is smaller than that used by other registries for
--   security and interoperability reasons, and may not always return the
--   expected <a>Encoding</a> if an alternate one has been determined to be
--   more internet-compatible.
lookupEncoding :: Text -> Maybe Encoding
instance GHC.Read.Read Web.Willow.Common.Encoding.Labels.EncodingDesc
instance GHC.Show.Show Web.Willow.Common.Encoding.Labels.EncodingDesc
instance GHC.Classes.Eq Web.Willow.Common.Encoding.Labels.EncodingDesc
instance GHC.Read.Read Web.Willow.Common.Encoding.Labels.EncodingTable
instance GHC.Show.Show Web.Willow.Common.Encoding.Labels.EncodingTable
instance GHC.Classes.Eq Web.Willow.Common.Encoding.Labels.EncodingTable
instance Data.Aeson.Types.FromJSON.FromJSON Web.Willow.Common.Encoding.Labels.EncodingTable
instance Data.Aeson.Types.FromJSON.FromJSON Web.Willow.Common.Encoding.Labels.EncodingDesc


-- | This module and the internal branch it heads implement the
--   <b><a>Encoding</a></b> specification for translating text to and from
--   UTF-8 and a selection of less-favoured but grandfathered encoding
--   schemes. As the standard authors' primary goal has been security
--   followed closely by compatibility with existing web pages, the
--   algorithms described and the names associated with them do not
--   perfectly match the descriptions originally given by the various
--   original encoding specifications themselves.
module Web.Willow.Common.Encoding

-- | <b>Encoding:</b> <tt><a>encoding</a></tt>
--   
--   All character encoding schemes supported by the HTML standard, defined
--   as a bidirectional map between characters and binary sequences.
--   <a>Utf8</a> is strongly encouraged for new content (including all
--   encoding purposes), but the others are retained for compatibility with
--   existing pages.
--   
--   Note that none of these are complete functions, to one degree or
--   another, and that no guarantee is made that the mapping round-trips.
data Encoding

-- | The UTF-8 encoding for Unicode.
Utf8 :: Encoding

-- | The UTF-16 encoding for Unicode, in big endian order.
--   
--   No encoder is provided for this scheme.
Utf16be :: Encoding

-- | The UTF-16 encoding for Unicode, in little endian order.
--   
--   No encoder is provided for this scheme.
Utf16le :: Encoding

-- | <a>Big5</a>, primarily covering traditional Chinese characters.
Big5 :: Encoding

-- | EUC-JP, primarily covering Japanese as the union of <a>JIS-0208</a>
--   and <a>JIS-0212</a>.
EucJp :: Encoding

-- | <a>EUC-KR</a>, primarily covering Hangul.
EucKr :: Encoding

-- | The <a>GB18030-2005 extension</a> to GBK, with one tweak for web
--   compatibility, primarily covering both forms of Chinese characters.
--   
--   Note that this encoding also includes a large number of four-byte
--   sequences which aren't listed in the linked visualization.
Gb18030 :: Encoding

-- | GBK, primarily covering simplified Chinese characters.
--   
--   In practice, this is just <a>Gb18030</a> with a restricted set of
--   encodable characters; the decoder is identical.
Gbk :: Encoding

-- | DOS and OS/2 <a>code page</a> for Cyrillic characters.
Ibm866 :: Encoding

-- | A Japanese-focused implementation of the ISO 2022 meta-encoding,
--   including both <a>JIS-0208</a> and halfwidth katakana.
Iso2022Jp :: Encoding

-- | <a>Latin-2</a> (Central European).
Iso8859_2 :: Encoding

-- | <a>Latin-3</a> (South European and Esperanto)
Iso8859_3 :: Encoding

-- | <a>Latin-4</a> (North European).
Iso8859_4 :: Encoding

-- | <a>Latin/Cyrillic</a>.
Iso8859_5 :: Encoding

-- | <a>Latin/Arabic</a>.
Iso8859_6 :: Encoding

-- | <a>Latin/Greek</a> (modern monotonic).
Iso8859_7 :: Encoding

-- | <a>Latin/Hebrew</a> (visual order).
Iso8859_8 :: Encoding

-- | <a>Latin/Hebrew</a> (logical order).
Iso8859_8i :: Encoding

-- | <a>Latin-6</a> (Nordic).
Iso8859_10 :: Encoding

-- | <a>Latin-7</a> (Baltic Rim).
Iso8859_13 :: Encoding

-- | <a>Latin-8</a> (Celtic).
Iso8859_14 :: Encoding

-- | <a>Latin-9</a> (revision of ISO 8859-1 Latin-1, Western European).
Iso8859_15 :: Encoding

-- | <a>Latin-10</a> (South-Eastern European).
Iso8859_16 :: Encoding

-- | KOI-8 <a>specialized</a> for Russian Cyrillic.
Koi8R :: Encoding

-- | KOI-8 <a>specialized</a> for Ukrainian Cyrillic.
Koi8U :: Encoding

-- | <a>Mac OS Roman</a>.
Macintosh :: Encoding

-- | <a>Mac OS Cyrillic</a> (as of Mac OS 9.0)
MacintoshCyrillic :: Encoding

-- | The <a>Windows variant</a> (code page 932) of Shift JIS.
ShiftJis :: Encoding

-- | ISO 8859-11 <a>Latin/Thai</a> with Windows extensions in the C1
--   control character slots.
--   
--   Note that this encoding is always used instead of pure Latin/Thai.
Windows874 :: Encoding

-- | The Windows <a>extension and rearrangement</a> of ISO 8859-2 Latin-2.
Windows1250 :: Encoding

-- | <a>Windows Cyrillic</a>.
Windows1251 :: Encoding

-- | The Windows extension of ISO 8859-1 <a>Latin-1</a>, replacing most of
--   the C1 control characters with printable glyphs.
--   
--   Note that this encoding is always used instead of pure Latin-1.
Windows1252 :: Encoding

-- | <a>Windows Greek</a> (modern monotonic).
Windows1253 :: Encoding

-- | The Windows extension of ISO 8859-9 <a>Latin-5 (Turkish)</a>,
--   replacing most of the C1 control characters with printable glyphs.
--   
--   Note that this encoding is always used instead of pure Latin-5.
Windows1254 :: Encoding

-- | The Windows <a>extension and rearrangement</a> of ISO 8859-8
--   Latin/Hebrew.
Windows1255 :: Encoding

-- | <a>Windows Arabic</a>.
Windows1256 :: Encoding

-- | <a>Windows Baltic</a>.
Windows1257 :: Encoding

-- | <a>Windows Vietnamese</a>.
Windows1258 :: Encoding

-- | The input is reduced to a single <tt>\xFFFD</tt> replacement
--   character.
--   
--   No encoder is provided for this scheme.
Replacement :: Encoding

-- | Non-ASCII bytes (<tt>\x80</tt> through <tt>\xFF</tt>) are mapped to a
--   portion of the Unicode Private Use Area (<tt>\xF780</tt> through
--   <tt>\xF7FF</tt>).
UserDefined :: Encoding

-- | All the data which needs to be tracked for correct behaviour in
--   decoding a binary stream into readable text.
data DecoderState

-- | Retrieve the encoding scheme currently used by the decoder to decode
--   the binary document stream.
decoderEncoding :: DecoderState -> Encoding

-- | Any leftover bytes at the end of the binary stream, which require
--   further input to be processed in order to correctly map to a character
--   or error value.
decoderRemainder :: DecoderState -> ShortByteString

-- | <b>HTML:</b> <tt><a>change the encoding</a></tt>
--   
--   The data required to determine if a new encoding would produce an
--   identical output to what the current one has already done, and to
--   restart the parsing with the new one if the two are incompatible.
--   Values may be easily initialized via <a>emptyReparseData</a>.
data ReparseData

-- | All the data which needs to be tracked for correct behaviour in
--   decoding a binary stream into readable text.
data EncoderState

-- | The collection of data which, for any given encoding scheme, results
--   in behaviour according to the vanilla decoder before any bytes have
--   been read.
initialDecoderState :: Encoding -> DecoderState

-- | Instruct the decoder that the binary document stream is <i>known</i>
--   to be in the certain encoding.
setEncodingCertain :: Encoding -> DecoderState -> DecoderState

-- | Store the given binary sequence as unparsable without further input,
--   to be prepended to the beginning of stream on the next <a>decode</a>
--   or <a>decode'</a> call.
setRemainder :: ShortByteString -> DecoderState -> DecoderState

-- | The collection of data which, for any given encoding scheme, results
--   in behaviour according to the vanilla decoder before any bytes have
--   been read.
initialEncoderState :: Encoding -> EncoderState

-- | <b>Encoding:</b> <tt><a>run an encoding's decoder</a></tt> with error
--   mode <tt>fatal</tt>
--   
--   Given a character encoding scheme, transform a dependant
--   <a>ByteString</a> into portable <a>Char</a>s. If any byte sequences
--   are meaningless or illegal, they are returned verbatim for error
--   reporting; a <a>Left</a> should not be parsed further.
--   
--   See <a>decodeStep</a> to decode only a minimal section, or
--   <a>decode'</a> for simple error replacement. Call
--   <a>finalizeDecode</a> on the returned <a>DecoderState</a> if no
--   further bytes will be added to the document stream.
decode :: DecoderState -> ByteString -> ([Either ShortByteString String], DecoderState)

-- | <b>Encoding:</b> <tt><a>decode</a></tt>
--   
--   Given a character encoding scheme, transform a dependant
--   <a>ByteString</a> into a portable <a>Text</a>. If any byte sequences
--   are meaningless or illegal, they are replaced with the Unicode
--   replacement character <tt>\xFFFD</tt>.
--   
--   See <a>decodeStep'</a> to decode only a minimal section, or
--   <a>decode</a> if the original data should be retained for custom error
--   reporting. Call <a>finalizeDecode'</a> on the returned
--   <a>DecoderState</a> if no further bytes will be added to the document
--   stream.
decode' :: DecoderState -> ByteString -> (Text, DecoderState)

-- | <b>Encoding:</b> <tt><a>BOM sniff</a></tt>
--   
--   Checks for a "byte-order mark" signature character in various
--   encodings. If present, returns both the encoding found and the
--   remainder of the stream, otherwise returns the input unchanged.
byteOrderMark :: ByteString -> (Maybe Encoding, ByteString)

-- | Explicitly indicate that the input stream will not contain any further
--   bytes, and perform any finalization processing based on that.
--   
--   See <a>finalizeDecode'</a> for simple error replacement.
finalizeDecode :: DecoderState -> [Either ShortByteString String]

-- | Explicitly indicate that the input stream will not contain any further
--   bytes, and perform any finalization processing based on that.
--   
--   See <a>finalizeDecode</a> if the original data should be retained for
--   custom error reporting.
finalizeDecode' :: DecoderState -> Text

-- | Read a binary stream of UTF-8 encoded text. If the stream begins with
--   a UTF-8 byte-order mark, it's silently dropped (any other BOM is
--   ignored but remains in the output). Fails (returning a <a>Left</a>) if
--   the stream contains byte sequences which don't represent any
--   character, or which encode a surrogate character.
--   
--   See <a>decodeUtf8'</a> for simple error replacement, or
--   <a>decodeUtf8NoBom</a> if the BOM should always be retained.
decodeUtf8 :: ByteString -> ([Either ShortByteString String], DecoderState)

-- | <b>Encoding:</b> <tt><a>UTF-8 decode without BOM or fail</a></tt>
--   
--   Read a binary stream of UTF-8 encoded text. If the stream begins with
--   a byte-order mark, it is kept as the first character of the output.
--   Fails (returning a <a>Left</a>) if the stream contains byte sequences
--   which don't represent any character, or which encode a surrogate
--   character.
--   
--   See <a>decodeUtf8NoBom'</a> for simple error replacement, or
--   <a>decodeUtf8'</a> if a redundant UTF-8 BOM should be dropped.
decodeUtf8NoBom :: ByteString -> ([Either ShortByteString String], DecoderState)

-- | <b>Encoding:</b> <tt><a>UTF-8 decode</a></tt>
--   
--   Read a binary stream of UTF-8 encoded text. If the stream begins with
--   a UTF-8 byte-order mark, it's silently dropped (any other BOM is
--   ignored but remains in the output). Any surrogate characters or
--   invalid byte sequences are replaced with the Unicode replacement
--   character <tt>\xFFFD</tt>.
--   
--   See <a>decodeUtf8</a> if the original data should be retained for
--   custom error reporting, or <a>decodeUtf8NoBom'</a> if the BOM should
--   always be retained.
decodeUtf8' :: ByteString -> (Text, DecoderState)

-- | <b>Encoding:</b> <tt><a>UTF-8 decode without BOM</a></tt>
--   
--   Read a binary stream of UTF-8 encoded text. If the stream begins with
--   a byte-order mark, it is kept as the first character of the output.
--   Any surrogate characters or invalid byte sequences are replaced with
--   the Unicode replacement character <tt>\xFFFD</tt>.
--   
--   See <a>decodeUtf8NoBom</a> if the original data should be retained for
--   custom error reporting, or <a>decodeUtf8'</a> if a redundant UTF-8 BOM
--   should be dropped.
decodeUtf8NoBom' :: ByteString -> (Text, DecoderState)

-- | <b>Encoding:</b> <tt><a>run an encoding's encoder</a></tt> with error
--   mode <tt>fatal</tt>
--   
--   Given a character encoding scheme, transform a portable <a>Text</a>
--   into a sequence of bytes representing those characters. If the
--   encoding scheme does not define a binary representation for a
--   character in the input, the original <a>Char</a> is returned unchanged
--   for custom error reporting.
--   
--   See <a>encodeStep</a> to encode only a minimal section, or
--   <a>encode'</a> for escaping with HTML-style character codes.
encode :: EncoderState -> Text -> ([Either Char ShortByteString], EncoderState)

-- | <b>Encoding:</b> <tt><a>encode</a></tt>
--   
--   Given a character encoding scheme, transform a portable <a>Text</a>
--   into a sequence of bytes representing those characters. If the
--   encoding scheme does not define a binary representation for a
--   character in the input, they are replaced with an HTML-style escape
--   (e.g. <tt>"&amp;#0000;"</tt>).
--   
--   See <a>encodeStep'</a> to encode only a minimal section, or
--   <a>encode</a> if the original data should be retained for custom error
--   reporting.
encode' :: EncoderState -> Text -> (ByteString, EncoderState)

-- | <b>Encoding:</b> <tt><a>UTF-8 encode</a></tt>
--   
--   Transform a portable <a>Text</a> into a sequence of bytes according to
--   the UTF-8 encoding scheme.
encodeUtf8 :: Text -> (ByteString, EncoderState)

-- | <b>Encoding:</b> <tt><a>run an encoding's decoder</a></tt> with error
--   mode <tt>fatal</tt>
--   
--   Read the smallest number of bytes from the head of the
--   <a>ByteString</a> which would leave the decoder in a re-enterable
--   state. If any byte sequences are meaningless or illegal, they are
--   returned verbatim for error reporting; a <a>Left</a> should not be
--   parsed further.
--   
--   See <a>decode</a> to decode the entire string at once, or
--   <a>decodeStep'</a> for simple error replacement.
decodeStep :: DecoderState -> ByteString -> (Maybe (Either ShortByteString String), DecoderState, ByteString)

-- | <b>Encoding:</b> <tt><a>run an encoding's encoder</a></tt> with error
--   mode <tt>fatal</tt>
--   
--   Read the smallest number of characters from the head of the
--   <a>Text</a> which would leave the encoder in a re-enterable state. If
--   the encoding scheme does not define a binary representation for a
--   character in the input, the original <a>Char</a> is returned unchanged
--   for custom error reporting.
--   
--   See <a>encode</a> to decode the entire string at once, or
--   <a>encodeStep'</a> for simple error replacement.
encodeStep :: EncoderState -> Text -> Maybe (Either Char ShortByteString, EncoderState, Text)

-- | <b>Encoding:</b> <tt><a>run an encoding's decoder</a></tt> with error
--   mode <tt>replacement</tt>
--   
--   Read the smallest number of bytes from the head of the
--   <a>ByteString</a> which would leave the decoder in a re-enterable
--   state. Any byte sequences which are meaningless or illegal are
--   replaced with the Unicode replacement character <tt>\xFFFD</tt>.
--   
--   See <a>decode'</a> to decode the entire string at once, or
--   <a>decodeStep</a> if the original data should be retained for custom
--   error reporting.
decodeStep' :: DecoderState -> ByteString -> (Maybe String, DecoderState, ByteString)

-- | <b>Encoding:</b> <tt><a>run an encoding's encoder</a></tt> with error
--   mode <tt>html</tt>
--   
--   Read the smallest number of characters from the head of the
--   <a>Text</a> which would leave the encoder in a re-enterable state. If
--   the encoding scheme does not define a binary representation for a
--   character in the input, they are replaced with an HTML-style escape
--   (e.g. <tt>"&amp;#0000;"</tt>).
--   
--   See <a>encode'</a> to encode the entire string at once, or
--   <a>encodeStep</a> if the original data should be retained for custom
--   error reporting.
encodeStep' :: EncoderState -> Text -> Maybe (ShortByteString, EncoderState, Text)

-- | The union of all state variables tracked by the bytes-to-<a>Char</a>
--   decoding algorithm of a single encoding scheme.
data InnerDecoderState

-- | The union of all state variables tracked by the <a>Char</a>-to-bytes
--   encoding algorithm of a single encoding scheme.
data InnerEncoderState
instance GHC.Read.Read Web.Willow.Common.Encoding.InnerEncoderState
instance GHC.Show.Show Web.Willow.Common.Encoding.InnerEncoderState
instance GHC.Classes.Eq Web.Willow.Common.Encoding.InnerEncoderState
instance GHC.Read.Read Web.Willow.Common.Encoding.InnerDecoderState
instance GHC.Show.Show Web.Willow.Common.Encoding.InnerDecoderState
instance GHC.Classes.Eq Web.Willow.Common.Encoding.InnerDecoderState


-- | In an ideal internet, every server would declare the binary encoding
--   with which it is transmitting a file (actually, the <i>true</i> ideal
--   would be for it to always be <a>Utf8</a>, but there are still a lot of
--   legacy documents out there). However, that's not always the case.
--   
--   A good fallback would be for every document to declare itself what
--   encoding it has been saved in. However, not every one does, and the
--   ones that do may still get it wrong (take, for instance, the case of a
--   server which <i>does</i> translate everything it sends to
--   <a>Utf8</a>).
--   
--   And so, the <a>HTML standard</a> describes an algorithm for guessing
--   the proper bytes-to-text translation to use in <a>decode</a>. While
--   this does therefore assume some HTML syntax and specific tags, none of
--   the semantics should cause an issue for other filetypes.
module Web.Willow.Common.Encoding.Sniffer

-- | <b>Encoding:</b> <tt><a>encoding</a></tt>
--   
--   All character encoding schemes supported by the HTML standard, defined
--   as a bidirectional map between characters and binary sequences.
--   <a>Utf8</a> is strongly encouraged for new content (including all
--   encoding purposes), but the others are retained for compatibility with
--   existing pages.
--   
--   Note that none of these are complete functions, to one degree or
--   another, and that no guarantee is made that the mapping round-trips.
data Encoding

-- | The UTF-8 encoding for Unicode.
Utf8 :: Encoding

-- | The UTF-16 encoding for Unicode, in big endian order.
--   
--   No encoder is provided for this scheme.
Utf16be :: Encoding

-- | The UTF-16 encoding for Unicode, in little endian order.
--   
--   No encoder is provided for this scheme.
Utf16le :: Encoding

-- | <a>Big5</a>, primarily covering traditional Chinese characters.
Big5 :: Encoding

-- | EUC-JP, primarily covering Japanese as the union of <a>JIS-0208</a>
--   and <a>JIS-0212</a>.
EucJp :: Encoding

-- | <a>EUC-KR</a>, primarily covering Hangul.
EucKr :: Encoding

-- | The <a>GB18030-2005 extension</a> to GBK, with one tweak for web
--   compatibility, primarily covering both forms of Chinese characters.
--   
--   Note that this encoding also includes a large number of four-byte
--   sequences which aren't listed in the linked visualization.
Gb18030 :: Encoding

-- | GBK, primarily covering simplified Chinese characters.
--   
--   In practice, this is just <a>Gb18030</a> with a restricted set of
--   encodable characters; the decoder is identical.
Gbk :: Encoding

-- | DOS and OS/2 <a>code page</a> for Cyrillic characters.
Ibm866 :: Encoding

-- | A Japanese-focused implementation of the ISO 2022 meta-encoding,
--   including both <a>JIS-0208</a> and halfwidth katakana.
Iso2022Jp :: Encoding

-- | <a>Latin-2</a> (Central European).
Iso8859_2 :: Encoding

-- | <a>Latin-3</a> (South European and Esperanto)
Iso8859_3 :: Encoding

-- | <a>Latin-4</a> (North European).
Iso8859_4 :: Encoding

-- | <a>Latin/Cyrillic</a>.
Iso8859_5 :: Encoding

-- | <a>Latin/Arabic</a>.
Iso8859_6 :: Encoding

-- | <a>Latin/Greek</a> (modern monotonic).
Iso8859_7 :: Encoding

-- | <a>Latin/Hebrew</a> (visual order).
Iso8859_8 :: Encoding

-- | <a>Latin/Hebrew</a> (logical order).
Iso8859_8i :: Encoding

-- | <a>Latin-6</a> (Nordic).
Iso8859_10 :: Encoding

-- | <a>Latin-7</a> (Baltic Rim).
Iso8859_13 :: Encoding

-- | <a>Latin-8</a> (Celtic).
Iso8859_14 :: Encoding

-- | <a>Latin-9</a> (revision of ISO 8859-1 Latin-1, Western European).
Iso8859_15 :: Encoding

-- | <a>Latin-10</a> (South-Eastern European).
Iso8859_16 :: Encoding

-- | KOI-8 <a>specialized</a> for Russian Cyrillic.
Koi8R :: Encoding

-- | KOI-8 <a>specialized</a> for Ukrainian Cyrillic.
Koi8U :: Encoding

-- | <a>Mac OS Roman</a>.
Macintosh :: Encoding

-- | <a>Mac OS Cyrillic</a> (as of Mac OS 9.0)
MacintoshCyrillic :: Encoding

-- | The <a>Windows variant</a> (code page 932) of Shift JIS.
ShiftJis :: Encoding

-- | ISO 8859-11 <a>Latin/Thai</a> with Windows extensions in the C1
--   control character slots.
--   
--   Note that this encoding is always used instead of pure Latin/Thai.
Windows874 :: Encoding

-- | The Windows <a>extension and rearrangement</a> of ISO 8859-2 Latin-2.
Windows1250 :: Encoding

-- | <a>Windows Cyrillic</a>.
Windows1251 :: Encoding

-- | The Windows extension of ISO 8859-1 <a>Latin-1</a>, replacing most of
--   the C1 control characters with printable glyphs.
--   
--   Note that this encoding is always used instead of pure Latin-1.
Windows1252 :: Encoding

-- | <a>Windows Greek</a> (modern monotonic).
Windows1253 :: Encoding

-- | The Windows extension of ISO 8859-9 <a>Latin-5 (Turkish)</a>,
--   replacing most of the C1 control characters with printable glyphs.
--   
--   Note that this encoding is always used instead of pure Latin-5.
Windows1254 :: Encoding

-- | The Windows <a>extension and rearrangement</a> of ISO 8859-8
--   Latin/Hebrew.
Windows1255 :: Encoding

-- | <a>Windows Arabic</a>.
Windows1256 :: Encoding

-- | <a>Windows Baltic</a>.
Windows1257 :: Encoding

-- | <a>Windows Vietnamese</a>.
Windows1258 :: Encoding

-- | The input is reduced to a single <tt>\xFFFD</tt> replacement
--   character.
--   
--   No encoder is provided for this scheme.
Replacement :: Encoding

-- | Non-ASCII bytes (<tt>\x80</tt> through <tt>\xFF</tt>) are mapped to a
--   portion of the Unicode Private Use Area (<tt>\xF780</tt> through
--   <tt>\xF7FF</tt>).
UserDefined :: Encoding

-- | <b>HTML:</b> <tt><a>confidence</a></tt>
--   
--   How likely the specified encoding is to be the actual stream encoding.
--   
--   The spec names a third confidence level <tt>irrelevant</tt>, to be
--   used when the stream doesn't depend on any particular encoding scheme
--   (i.e. it is composed directly of <a>Char</a>s rather than parsed from
--   a binary stream). This has not been included in the sum type, as it
--   makes little sense to have that as a parameter of the <i>decoding</i>
--   stage. Use <tt><a>Maybe</a> <a>DecoderState</a></tt> to represent it
--   instead.
data Confidence

-- | The binary stream is likely the named encoding, but more data may
--   prove it to be something else. In the latter case, the
--   <a>ReparseData</a> (if available) may be used to transition to the
--   proper encoding, or restart the stream if necessary.
Tentative :: Encoding -> ReparseData -> Confidence

-- | The binary stream is confirmed to be of the given encoding.
Certain :: Encoding -> Confidence

-- | <b>HTML:</b> <tt><a>change the encoding</a></tt>
--   
--   The data required to determine if a new encoding would produce an
--   identical output to what the current one has already done, and to
--   restart the parsing with the new one if the two are incompatible.
--   Values may be easily initialized via <a>emptyReparseData</a>.
data ReparseData
ReparseData :: HashMap ShortByteString Char -> ByteString -> ReparseData

-- | The input binary sequences and the resulting characters which are
--   already emitted to the output.
[parsedChars] :: ReparseData -> HashMap ShortByteString Char

-- | The complete binary sequence parsed thus far, in case it needs to be
--   re-processed under a new, incompatible encoding.
[streamStart] :: ReparseData -> ByteString

-- | The collection of data which would indicate nothing has yet been
--   parsed.
emptyReparseData :: ReparseData

-- | <b>HTML:</b> <tt><a>encoding sniffing algorithm</a></tt>
--   
--   Given a stream and related metadata, try to determine what encoding
--   may have been used to write it.
--   
--   Will resolve and/or wait for the number of bytes requested by
--   <a>prescanDepth</a> to be available in the stream (or, if it comes
--   sooner, the end of the stream), if they have not yet been produced.
sniff :: SnifferEnvironment -> ByteString -> Confidence

-- | Various datapoints which may indicate a document's binary encoding, to
--   be fed into the <a>sniff</a> algorithm. Values may be easily
--   instantiated as updates to <a>emptySnifferEnvironment</a>.
data SnifferEnvironment
SnifferEnvironment :: Maybe Encoding -> Maybe Encoding -> Word -> Maybe Encoding -> Maybe Encoding -> Maybe Encoding -> Maybe Encoding -> SnifferEnvironment

-- | The encoding the end user has specified should be used. Note that even
--   this can still be overridden by the presence of a byte-order mark at
--   the head of the stream.
[userOverride] :: SnifferEnvironment -> Maybe Encoding

-- | The encoding given by the transport layer (e.g. through an HTTP
--   <tt>Content-Type</tt> header).
[transportHeader] :: SnifferEnvironment -> Maybe Encoding

-- | The number of bytes which should be skimmed for <tt><a>meta</a></tt>
--   attributes specifying an encoding.
[prescanDepth] :: SnifferEnvironment -> Word

-- | The encoding used for the enclosing document (e.g., if this document
--   is loaded via an <tt>&lt;iframe&gt;</tt>).
[parentEncoding] :: SnifferEnvironment -> Maybe Encoding

-- | The encoding from the last time this page was loaded, other pages on
--   the site, or other cached data.
[cachedInfo] :: SnifferEnvironment -> Maybe Encoding

-- | The encoding the end user has specified as being their preferred
--   default, if no better encoding can be determined.
[userDefault] :: SnifferEnvironment -> Maybe Encoding

-- | The encoding recommended as a reasonable guess based on the current
--   language of the user's system.

-- | <i>Warning: The type of this argument will be changed in a future
--   release</i>
[localeEncoding] :: SnifferEnvironment -> Maybe Encoding

-- | A neutral set of parameters to pass to the <a>sniff</a> algorithm: no
--   accessory data, and a <a>prescanDepth</a> limit of 1024 bytes.
emptySnifferEnvironment :: SnifferEnvironment

-- | Guess what encoding may be in use by the binary stream, and generate a
--   collection of data based on that which results in the behaviour
--   described by the decoding algorithm at the start of the stream.
sniffDecoderState :: SnifferEnvironment -> ByteString -> DecoderState

-- | The encoding scheme currently in use by the parser, along with how
--   likely that scheme actually represents the binary stream.
decoderConfidence :: DecoderState -> Confidence

-- | Extract the underlying encoding scheme from the wrapping data.
confidenceEncoding :: Confidence -> Encoding

-- | <b>HTML:</b> <tt><a>algorithm for extracting a character encoding from
--   a meta element</a></tt>
--   
--   Find the first occurrence of an ASCII-encoded string <tt>charset</tt>
--   in the stream, and try to parse its attribute-style value into an
--   <a>Encoding</a>.
--   
--   Returns <a>Nothing</a> if the stream does not contain <tt>charset</tt>
--   followed by <tt>=</tt>, or if the value can not be successfully parsed
--   as an encoding label.
extractEncoding :: ByteString -> Maybe Encoding
instance GHC.Read.Read Web.Willow.Common.Encoding.Sniffer.SnifferEnvironment
instance GHC.Show.Show Web.Willow.Common.Encoding.Sniffer.SnifferEnvironment
instance GHC.Classes.Eq Web.Willow.Common.Encoding.Sniffer.SnifferEnvironment


-- | In lieu of a fully-featured <b><a>DOM</a></b> implementation ---and
--   even, for that matter, a styled tree--- this module provides
--   bare-bones data structures to temporarily contain the minimal data
--   currently returned by tree parsing. Eventually this will be padded out
--   into a fully-featured DOM implementation, but doing so now would be
--   creating much more work than necessary.
module Web.Willow.DOM

-- | <b>DOM:</b> <tt><a>tree</a></tt>
--   
--   The core concept underlying HTML and related languages: a nested
--   collection of data and metadata marked up according to several broad
--   categories. Values may be easily instantiated as updates to
--   <a>emptyTree</a>.
data Tree
Tree :: Node -> [Tree] -> Tree

-- | The atomic portion of the tree at the current location.
[node] :: Tree -> Node

-- | All parts of the tree nested below the current location.
[children] :: Tree -> [Tree]

-- | A sane default collection for easy record initialization; namely, a
--   <a>Document</a> without any <a>children</a>.
emptyTree :: Tree

-- | <b>DOM:</b> <tt><a>node</a></tt>
--   
--   The sum type of all different classes of behaviour a particular point
--   of data may fill.
data Node

-- | <b>DOM:</b> <tt><a>Text</a></tt>
--   
--   A simple character string to be rendered to the output or to be
--   processed further, according to which <a>Element</a>s enclose it.
Text :: Text -> Node

-- | <b>DOM:</b> <tt><a>Comment</a></tt>
--   
--   An author's aside, not intended to be shown to the end user.
Comment :: Text -> Node

-- | <b>DOM:</b> <tt><a>DocumentType</a></tt>
--   
--   Largely vestigial in HTML5, but used in previous versions and related
--   languages to specify the semantics of <a>Element</a>s used in the
--   document.
DocumentType :: DocumentTypeParams -> Node

-- | <b>DOM:</b> <tt><a>Element</a></tt>
--   
--   Markup instructions directing the behaviour or classifying a portion
--   of the document's content.
Element :: ElementParams -> Node

-- | <b>DOM:</b> <tt><a>Attr</a></tt>
--   
--   Metadata allowing finer customization and description of the heavier
--   <a>Element</a>s.
Attribute :: AttributeParams -> Node

-- | <b>DOM:</b> <tt><a>DocumentType</a></tt>
--   
--   As like <a>Document</a>, but requiring less precise structure in its
--   <a>children</a> and generally only containing a small slice of a
--   larger document.
DocumentFragment :: Node

-- | <b>DOM:</b> <tt><a>Document</a></tt>
--   
--   The root of a <a>Tree</a>, typically imposing a principled structure.
Document :: QuirksMode -> Node

-- | A simplified view of the <a>Node</a> constructors, for use in testing
--   via <a>nodeType</a>.
data NodeType

-- | <b>DOM:</b> <tt><a>ELEMENT_NODE</a></tt>
--   
--   <a>Element</a>
ElementNode :: NodeType

-- | <b>DOM:</b> <tt><a>ATTRIBUTE_NODE</a></tt>
--   
--   <a>Attribute</a>
AttributeNode :: NodeType

-- | <b>DOM:</b> <tt><a>TEXT_NODE</a></tt>
--   
--   <a>Text</a>
TextNode :: NodeType

-- | <b>DOM:</b> <tt><a>CDATA_SECTION_NODE</a></tt>
CDataSectionNode :: NodeType

-- | <b>DOM:</b> <tt><a>ENTITY_REFERENCE_NODE</a></tt>

-- | <i>Deprecated: historical</i>
EntityReferenceNode :: NodeType

-- | <b>DOM:</b> <tt><a>ENTITY_NODE</a></tt>

-- | <i>Deprecated: historical</i>
EntityNode :: NodeType

-- | <b>DOM:</b> <tt><a>PROCESSING_INSTRUCTION_NODE</a></tt>
ProcessingInstructionNode :: NodeType

-- | <b>DOM:</b> <tt><a>COMMENT_NODE</a></tt>
--   
--   <a>Comment</a>
CommentNode :: NodeType

-- | <b>DOM:</b> <tt><a>DOCUMENT_NODE</a></tt>
--   
--   <a>Document</a>
DocumentNode :: NodeType

-- | <b>DOM:</b> <tt><a>DOCUMENT_TYPE_NODE</a></tt>
--   
--   <a>DocumentType</a>
DocumentTypeNode :: NodeType

-- | <b>DOM:</b> <tt><a>DOCUMENT_FRAGMENT_NODE</a></tt>
--   
--   <a>DocumentFragment</a>
DocumentFragmentNode :: NodeType

-- | <b>DOM:</b> <tt><a>NOTATION_NODE</a></tt>

-- | <i>Deprecated: historical</i>
NotationNode :: NodeType

-- | <b>DOM:</b> <tt><a>nodeType</a></tt>
--   
--   Simplify the algebraic data type to a one-dimensional <a>Enum</a> to
--   allow equality testing rather than requiring pattern matching.
nodeType :: Node -> Maybe NodeType

-- | Through the long history of HTML browsers, many unique and/or buggy
--   behaviours have become enshrined due to the simple fact that website
--   authors used them. As the standards and the parse engines have
--   continued to develop, three separated degrees of emulation have
--   emerged for that backwards compatibility.
data QuirksMode

-- | <b>DOM:</b> <tt><a>no-quirks mode</a></tt>
--   
--   Fully compliant with the modern standard.
NoQuirks :: QuirksMode

-- | <b>DOM:</b> <tt><a>limited-quirks mode</a></tt>
--   
--   Largely compliant with the standard, except for a couple height
--   calculations.
LimitedQuirks :: QuirksMode

-- | <b>DOM:</b> <tt><a>quirks mode</a></tt>
--   
--   Backwards compatibility with 1990's-era technology.
FullQuirks :: QuirksMode

-- | <b>DOM:</b> <tt><a>Element</a></tt>
--   
--   The collection of metadata identifying and describing a markup tag
--   used to associate text or other data with its broader role in the
--   document, or to indicate a preferred rendering. Values may be easily
--   instantiated as updates to <a>emptyElementParams</a>.
data ElementParams
ElementParams :: Maybe ElementPrefix -> ElementName -> Maybe Namespace -> AttributeMap -> ElementParams

-- | The variable fragment used to represent the <a>elementNamespace</a> in
--   the original source.
[elementPrefix] :: ElementParams -> Maybe ElementPrefix

-- | The key defining what role the markup tag is meant to represent, as
--   defined by the <a>elementNamespace</a>.
[elementName] :: ElementParams -> ElementName

-- | The scope defining the language by which the elementibute participates
--   in the document.
[elementNamespace] :: ElementParams -> Maybe Namespace

-- | The points of metadata further describing rendering behaviour or
--   adding other information.
[elementAttributes] :: ElementParams -> AttributeMap

-- | A sane default collection for easy record initialization.
emptyElementParams :: ElementParams

-- | Type-level clarification for the name of a markup tag.
type ElementName = Text

-- | Type-level clarification for the short namespace reference classifying
--   a markup tag.
type ElementPrefix = Text

-- | <b>DOM:</b> <tt><a>NamedNodeMap</a></tt>
--   
--   Type-level clarification for the collection of key-value points of
--   supplemental metadata attached to an <a>Element</a>. Note that, while
--   an <a>Attribute</a>'s prefix is used to determine the associated
--   namespace (and needs to be tracked for round-trip serialization), it
--   doesn't factor into testing equality or in lookups.
type AttributeMap = HashMap (Maybe Namespace, AttributeName) (Maybe AttributePrefix, AttributeValue)

-- | Pack a list of key-value metadata pairs into a form better optimized
--   for random lookup.
fromAttrList :: [AttributeParams] -> AttributeMap

-- | Extract the key-value metadata pairs from a indexed collection into an
--   iterable form. The order of elements is unspecified.
toAttrList :: AttributeMap -> [AttributeParams]

-- | As <a>insert</a>, performing the required data reordering for the
--   less-comfortable internal type representation.
insertAttribute :: AttributeParams -> AttributeMap -> AttributeMap

-- | A simple key-value representation of an attribute on an HTML tag,
--   before any namespace processing.
type BasicAttribute = (AttributeName, AttributeValue)

-- | <b>DOM:</b> <tt><a>Attr</a></tt>
--   
--   A more complete representation of an attribute, including extensions
--   beyond the <a>BasicAttribute</a> to support more structured (XML-like)
--   markup languages. Values may be easily instantiated as updates to
--   <a>emptyAttributeParams</a>.
data AttributeParams
AttributeParams :: Maybe AttributePrefix -> AttributeName -> Maybe Namespace -> AttributeValue -> AttributeParams

-- | The variable fragment used to represent the <a>attrNamespace</a> in
--   the original source.
[attrPrefix] :: AttributeParams -> Maybe AttributePrefix

-- | The key defining what role the metadata value point at
--   <a>attrValue</a> is meant to represent, as defined by the
--   <a>attrNamespace</a>.
[attrName] :: AttributeParams -> AttributeName

-- | The scope defining the language by which the attribute participates in
--   the document.
[attrNamespace] :: AttributeParams -> Maybe Namespace

-- | A point of metadata further describing rendering behaviour or adding
--   other information.
[attrValue] :: AttributeParams -> AttributeValue

-- | A sane default collection for easy record initialization; namely,
--   <a>Nothing</a>s and <a>empty</a>s.
emptyAttributeParams :: AttributeParams

-- | Type-level clarification for the key of a supplemental point of
--   metadata.
type AttributeName = Text

-- | Type-level clarification for the value of a supplemental point of
--   metadata.
type AttributeValue = Text

-- | Type-level clarification for the short namespace reference classifying
--   a supplemental point of metadata.
type AttributePrefix = Text

-- | <b>DOM:</b> <tt><a>DocumentType</a></tt>
--   
--   The collection of metadata representing a document type declaration
--   describing the markup language used in a document; of vestigal use in
--   HTML, but important for related languages. Values may be easily
--   instantiated as updates to <a>emptyDocumentTypeParams</a>.
data DocumentTypeParams
DocumentTypeParams :: DoctypeName -> DoctypePublicId -> DoctypeSystemId -> DocumentTypeParams

-- | The root element of the document, which may also identify the primary
--   language used.
[documentTypeName] :: DocumentTypeParams -> DoctypeName

-- | A globally-unique reference to the definition of the language.
[documentTypePublicId] :: DocumentTypeParams -> DoctypePublicId

-- | A system-dependant (but perhaps easier to access) reference to the
--   definition of the language.
[documentTypeSystemId] :: DocumentTypeParams -> DoctypeSystemId

-- | A sane default collection for easy record initialization; namely,
--   <a>empty</a>s.
emptyDocumentTypeParams :: DocumentTypeParams

-- | Type-level clarification for the language used in the document or,
--   equivalently, the name of the root node.
type DoctypeName = Text

-- | Type-level clarification for a registered or otherwise globally-unique
--   reference to a description of the language used in the document.
type DoctypePublicId = Text

-- | Type-level clarification for a reference to the description of the
--   language used in the document, dependant on the state of the system
--   (and/or the internet).
type DoctypeSystemId = Text

-- | <b>XML-NAMES:</b> <tt><a>XML namespace</a></tt>
--   
--   An identifier (theoretically) pointing to a reference defining a
--   particular element or attribute ---though not necessarily in
--   machine-readable form--- and so providing a scope for differentiating
--   multiple elements with the same local name but different semantics.
type Namespace = Text

-- | <b>Infra:</b> <tt><a>HTML namespace</a></tt>
--   
--   The canonical scope value for elements and attributes defined by the
--   HTML standard when used in XML or XML-compatible documents.
htmlNamespace :: Namespace

-- | <b>Infra:</b> <tt><a>MathML namespace</a></tt>
--   
--   The canonical scope value for elements and attributes defined by the
--   MathML standard.
mathMLNamespace :: Namespace

-- | <b>Infra:</b> <tt><a>SVG namespace</a></tt>
--   
--   The canonical scope value for elements and attributes defined by the
--   SVG standard.
svgNamespace :: Namespace

-- | <b>Infra:</b> <tt><a>XLink namespace</a></tt>
--   
--   The canonical scope value for elements and attributes defined by the
--   XLink standard.
xlinkNamespace :: Namespace

-- | <b>Infra:</b> <tt><a>XML namespace</a></tt>
--   
--   The canonical scope value for elements and attributes defined by the
--   XML standard.
xmlNamespace :: Namespace

-- | <b>Infra:</b> <tt><a>XMLNS namespace</a></tt>
--   
--   The canonical scope value for elements and attributes defined by the
--   XMLNS standard.
xmlnsNamespace :: Namespace
instance GHC.Read.Read Web.Willow.DOM.QuirksMode
instance GHC.Show.Show Web.Willow.DOM.QuirksMode
instance GHC.Enum.Bounded Web.Willow.DOM.QuirksMode
instance GHC.Enum.Enum Web.Willow.DOM.QuirksMode
instance GHC.Classes.Ord Web.Willow.DOM.QuirksMode
instance GHC.Classes.Eq Web.Willow.DOM.QuirksMode
instance GHC.Read.Read Web.Willow.DOM.DocumentTypeParams
instance GHC.Show.Show Web.Willow.DOM.DocumentTypeParams
instance GHC.Classes.Eq Web.Willow.DOM.DocumentTypeParams
instance GHC.Read.Read Web.Willow.DOM.NodeType
instance GHC.Show.Show Web.Willow.DOM.NodeType
instance GHC.Enum.Bounded Web.Willow.DOM.NodeType
instance GHC.Classes.Ord Web.Willow.DOM.NodeType
instance GHC.Classes.Eq Web.Willow.DOM.NodeType
instance GHC.Read.Read Web.Willow.DOM.ElementParams
instance GHC.Show.Show Web.Willow.DOM.ElementParams
instance GHC.Classes.Eq Web.Willow.DOM.ElementParams
instance GHC.Read.Read Web.Willow.DOM.AttributeParams
instance GHC.Show.Show Web.Willow.DOM.AttributeParams
instance GHC.Classes.Eq Web.Willow.DOM.AttributeParams
instance GHC.Read.Read Web.Willow.DOM.Node
instance GHC.Show.Show Web.Willow.DOM.Node
instance GHC.Classes.Eq Web.Willow.DOM.Node
instance GHC.Read.Read Web.Willow.DOM.Tree
instance GHC.Show.Show Web.Willow.DOM.Tree
instance GHC.Classes.Eq Web.Willow.DOM.Tree
instance GHC.Enum.Enum Web.Willow.DOM.NodeType