module Text.HTML.Tagchup.Parser.Tag where

import Text.HTML.Tagchup.Parser.Combinator
   (allowFail, withDefault,
    voidChar, dropSpaces, getPos,
    many, many0toN, many1toN,
    many1Satisfy, readUntil,
    satisfy, voidString,
    emit, modifyEmission, )

import qualified Text.HTML.Tagchup.Parser.Combinator as Parser
import qualified Text.HTML.Tagchup.Parser.Status as Status
import qualified Text.HTML.Tagchup.Parser.Stream as Stream

import qualified Text.HTML.Tagchup.PositionTag as PosTag
import qualified Text.HTML.Tagchup.Tag         as Tag
import qualified Text.XML.Basic.Position    as Position
import qualified Text.HTML.Basic.Character    as HTMLChar
import qualified Text.XML.Basic.ProcessingInstruction as PI
import qualified Text.XML.Basic.Attribute   as Attr
import qualified Text.XML.Basic.Name        as Name
import qualified Text.XML.Basic.Tag         as TagName

import qualified Text.HTML.Tagchup.Character as Chr
import Text.HTML.Tagchup.Character (fromChar, )

import qualified Text.HTML.Basic.Entity as HTMLEntity

import qualified Control.Monad.Exception.Synchronous as Exc

import Control.Monad.Trans.Writer (runWriterT, )
import Control.Monad.Trans.State (StateT(..), )
import Control.Monad (mplus, msum, when, liftM, )

import Data.Monoid (Monoid, mempty, mconcat, )

import qualified Data.Map as Map

import Data.Tuple.HT (mapSnd, )
import Data.Char (isAlphaNum, chr, ord, )
import Data.Maybe (maybeToList, )

-- import qualified Numeric

type Warning = (Position.T, String)

type Parser     source a = Parser.Full     source Warning a
type ParserEmit source a = Parser.Emitting source Warning a


parsePosTagMergeWarnings ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   StateT (Status.T source) Maybe [PosTag.T name sink]
parsePosTagMergeWarnings :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
StateT (T source) Maybe [T name sink]
parsePosTagMergeWarnings =
   forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM (\((T name sink
ot,Maybe (T name sink)
ct),[(T, String)]
warns) ->
      T name sink
ot forall a. a -> [a] -> [a]
:
      forall a b. (a -> b) -> [a] -> [b]
map (\(T
pos,String
warn) -> forall name string. T -> T name string -> T name string
PosTag.cons T
pos forall a b. (a -> b) -> a -> b
$ forall name string. String -> T name string
Tag.Warning String
warn) [(T, String)]
warns forall a. [a] -> [a] -> [a]
++
      forall a. Maybe a -> [a]
maybeToList Maybe (T name sink)
ct) forall a b. (a -> b) -> a -> b
$
   forall w (m :: * -> *) a. WriterT w m a -> m (a, w)
runWriterT forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
Parser source (T name sink, Maybe (T name sink))
parsePosTag

parsePosTag ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   Parser source (PosTag.T name sink, Maybe (PosTag.T name sink))
parsePosTag :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
Parser source (T name sink, Maybe (T name sink))
parsePosTag = do
   let omitClose :: Monad m => m t -> m (t, Maybe t)
       omitClose :: forall (m :: * -> *) t. Monad m => m t -> m (t, Maybe t)
omitClose = forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM (\t
t -> (t
t, forall a. Maybe a
Nothing))
   T
pos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
   forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
mplus
      (do forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
'<'
          forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall input output a.
T input output Maybe a
-> T input output Identity a -> T input output Identity a
withDefault
             (forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum forall a b. (a -> b) -> a -> b
$
                forall (m :: * -> *) t. Monad m => m t -> m (t, Maybe t)
omitClose (forall source name sink.
(C source, Tag name) =>
T -> Parser source (T name sink)
parseSpecialTag T
pos) forall a. a -> [a] -> [a]
:
                forall (m :: * -> *) t. Monad m => m t -> m (t, Maybe t)
omitClose (forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
T -> Parser source (T name sink)
parseProcessingTag T
pos) forall a. a -> [a] -> [a]
:
                forall (m :: * -> *) t. Monad m => m t -> m (t, Maybe t)
omitClose (forall source name sink.
(C source, Tag name) =>
T -> Parser source (T name sink)
parseCloseTag T
pos) forall a. a -> [a] -> [a]
:
                forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
T -> Parser source (T name sink, Maybe (T name sink))
parseOpenTag T
pos forall a. a -> [a] -> [a]
:
                [])
             (do forall source. T -> String -> ParserEmit source ()
emitWarning T
pos String
"A '<', that is not part of a tag. Encode it as &lt; please."
                 forall (m :: * -> *) t. Monad m => m t -> m (t, Maybe t)
omitClose (forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
pos (forall name string. string -> T name string
Tag.Text forall a b. (a -> b) -> a -> b
$ forall sink. StringType sink => Char -> sink
stringFromChar Char
'<'))))
      (forall (m :: * -> *) t. Monad m => m t -> m (t, Maybe t)
omitClose (forall source sink name.
(C source, StringType sink) =>
T -> Parser source (T name sink)
parseText T
pos))


{- |
Parsing an open tag may also emit a close tag
if the tag is self-closing, e.g. @\<br\/\>@.

For formatting self-closing tags correctly
it would be better to emit tags in the order @open tag, close tag, warnings@.
However, if there are infinitely many warnings,
we don't know whether a self-closing slash comes
and thus whether there is a close tag or not.
This implies, that we cannot even emit the warnings.

Thus we choose the order @open tag, warnings, close tag@.
-}
parseOpenTag ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   Position.T ->
   Parser source
      (PosTag.T name sink, Maybe (PosTag.T name sink))
parseOpenTag :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
T -> Parser source (T name sink, Maybe (T name sink))
parseOpenTag T
pos =
   do Name name
name <- forall source pname. (C source, C pname) => Parser source pname
parseName
      forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
         do forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces
            T name sink
tag <- forall (m :: * -> *) a name sink.
Monad m =>
T -> (a -> T name sink) -> m a -> m (T name sink)
returningTag T
pos (forall name string. Name name -> [T name string] -> T name string
Tag.Open Name name
name) forall a b. (a -> b) -> a -> b
$
               forall (fail :: * -> *) output input a.
(Monad fail, Monoid output) =>
(output -> output)
-> T input output fail a -> T input output fail a
modifyEmission (Int -> [(T, String)] -> [(T, String)]
restrictWarnings Int
10) forall a b. (a -> b) -> a -> b
$ forall output input a.
Monoid output =>
T input output Maybe a -> T input output Identity [a]
many forall source sink name.
(C source, StringType sink, Attribute name) =>
Parser source (T name sink)
parseAttribute
            forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM ((,) T name sink
tag) forall a b. (a -> b) -> a -> b
$ forall input output a.
T input output Maybe a
-> T input output Identity a -> T input output Identity a
withDefault
               (do T
closePos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
                   forall output input.
(Monoid output, C input) =>
String -> T input output Maybe ()
voidString String
"/>"
                   forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
closePos (forall name string. Name name -> T name string
Tag.Close Name name
name))
               (do T
junkPos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
                   forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm
                      (\ String
junk ->
                         forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen
                            (Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
junk)
                            T
junkPos (String
"Junk in opening tag: \"" forall a. [a] -> [a] -> [a]
++ String
junk forall a. [a] -> [a] -> [a]
++ String
"\""))
                      (String
"Unterminated open tag \"" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++ String
"\"") String
">"
                   forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing)

parseCloseTag ::
   (Stream.C source, Name.Tag name) =>
   Position.T -> Parser source (PosTag.T name sink)
parseCloseTag :: forall source name sink.
(C source, Tag name) =>
T -> Parser source (T name sink)
parseCloseTag T
pos =
   do forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
'/'
      Name name
name <- forall source pname. (C source, C pname) => Parser source pname
parseName
      forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
         do T name sink
tag <- forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
pos (forall name string. Name name -> T name string
Tag.Close Name name
name)
            forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces
            T
junkPos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
            forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm
               (\ String
junk ->
                  forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen
                     (Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
junk)
                     T
junkPos (String
"Junk in closing tag: \"" forall a. [a] -> [a] -> [a]
++ String
junk forall a. [a] -> [a] -> [a]
++String
"\""))
               (String
"Unterminated closing tag \"" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++String
"\"") String
">"
            forall (m :: * -> *) a. Monad m => a -> m a
return T name sink
tag

parseSpecialTag ::
   (Stream.C source, Name.Tag name) =>
   Position.T -> Parser source (PosTag.T name sink)
parseSpecialTag :: forall source name sink.
(C source, Tag name) =>
T -> Parser source (T name sink)
parseSpecialTag T
pos =
   do forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
'!'
      forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum forall a b. (a -> b) -> a -> b
$
       (do forall output input.
(Monoid output, C input) =>
String -> T input output Maybe ()
voidString String
"--"
           forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm
              (\ String
cmt -> forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
pos (forall name string. String -> T name string
Tag.Comment String
cmt))
              String
"Unterminated comment" String
"-->") forall a. a -> [a] -> [a]
:
       (do forall output input.
(Monoid output, C input) =>
String -> T input output Maybe ()
voidString String
TagName.cdataString
           forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm
              (\ String
cdata -> forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
pos (forall name string. Tag name => String -> T name string
Tag.cdata String
cdata))
              String
"Unterminated cdata" String
"]]>") forall a. a -> [a] -> [a]
:
       (do Name name
name <- forall source pname. (C source, C pname) => Parser source pname
parseName
           forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
              do forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces
                 forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm
                    (\ String
info -> forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
pos (forall name string. Name name -> String -> T name string
Tag.Special Name name
name String
info))
                    (String
"Unterminated special tag \"" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++ String
"\"") String
">") forall a. a -> [a] -> [a]
:
       []

parseProcessingTag ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   Position.T -> Parser source (PosTag.T name sink)
parseProcessingTag :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
T -> Parser source (T name sink)
parseProcessingTag T
pos =
   do forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
'?'
      Name name
name <- forall source pname. (C source, C pname) => Parser source pname
parseName
      forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
         do forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces
            forall (m :: * -> *) a name sink.
Monad m =>
T -> (a -> T name sink) -> m a -> m (T name sink)
returningTag T
pos (forall name string. Name name -> T name string -> T name string
Tag.Processing Name name
name) forall a b. (a -> b) -> a -> b
$
               if forall name. C name => [String] -> name -> Bool
Name.matchAny [String
"xml", String
"xml-stylesheet"] Name name
name
                 then
                   do [T name sink]
attrs <- forall output input a.
Monoid output =>
T input output Maybe a -> T input output Identity [a]
many forall source sink name.
(C source, StringType sink, Attribute name) =>
Parser source (T name sink)
parseAttribute
                      T
junkPos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
                      forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm
                         (\ String
junk ->
                            forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen (Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
junk) T
junkPos
                               (String
"Junk in processing info tag: \"" forall a. [a] -> [a] -> [a]
++ String
junk forall a. [a] -> [a] -> [a]
++ String
"\""))
                         (String
"Unterminated processing info tag \"" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++ String
"\"") String
"?>"
                      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall name string. [T name string] -> T name string
PI.Known [T name sink]
attrs
                 else forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name string. String -> T name string
PI.Unknown)
                         String
"Unterminated processing instruction" String
"?>"

parseText ::
   (Stream.C source, StringType sink) =>
   Position.T -> Parser source (PosTag.T name sink)
parseText :: forall source sink name.
(C source, StringType sink) =>
T -> Parser source (T name sink)
parseText T
pos =
   forall (m :: * -> *) a name sink.
Monad m =>
T -> (a -> T name sink) -> m a -> m (T name sink)
returningTag T
pos forall name string. string -> T name string
Tag.Text (forall sink source.
(StringType sink, C source) =>
(Char -> Bool) -> Parser source sink
parseCharAsString (forall a b. a -> b -> a
const Bool
True))
--   returningTag pos Tag.Text (parseCharAsString ('<'/=))
--   returningTag pos Tag.Text (parseString1 ('<'/=))


parseAttribute ::
   (Stream.C source, StringType sink, Name.Attribute name) =>
   Parser source (Attr.T name sink)
parseAttribute :: forall source sink name.
(C source, StringType sink, Attribute name) =>
Parser source (T name sink)
parseAttribute =
   forall source pname. (C source, C pname) => Parser source pname
parseName forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Name name
name -> forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
   do forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces
      sink
value <-
         forall input output a.
T input output Maybe a
-> T input output Identity a -> T input output Identity a
withDefault
            (forall output input.
(Monoid output, C input) =>
String -> T input output Maybe ()
voidString String
"=" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall input output a.
T input output Identity a -> T input output Maybe a
allowFail (forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall source sink.
(C source, StringType sink) =>
ParserEmit source sink
parseValue))
            (forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Monoid a => a
mempty)
      forall output input.
(Monoid output, C input) =>
T input output Identity ()
dropSpaces
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall name string. Name name -> string -> T name string
Attr.Cons Name name
name sink
value

parseName ::
   (Stream.C source, Name.C pname) =>
   Parser source pname
parseName :: forall source pname. (C source, C pname) => Parser source pname
parseName =
   forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall name. C name => String -> name
Name.fromString forall a b. (a -> b) -> a -> b
$
   forall output input.
(Monoid output, C input) =>
(Char -> Bool) -> T input output Maybe String
many1Satisfy (\Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"_-.:")

parseValue ::
   (Stream.C source, StringType sink) =>
   ParserEmit source sink
parseValue :: forall source sink.
(C source, StringType sink) =>
ParserEmit source sink
parseValue =
   (forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum forall a b. (a -> b) -> a -> b
$
      forall source sink.
(C source, StringType sink) =>
String -> Char -> Parser source sink
parseQuoted String
"Unterminated doubly quoted value string" Char
'"' forall a. a -> [a] -> [a]
:
      forall source sink.
(C source, StringType sink) =>
String -> Char -> Parser source sink
parseQuoted String
"Unterminated singly quoted value string" Char
'\'' forall a. a -> [a] -> [a]
:
      [])
   forall input output a.
T input output Maybe a
-> T input output Identity a -> T input output Identity a
`withDefault`
   forall sink source.
(StringType sink, C source) =>
ParserEmit source sink
parseUnquotedValueAsString

parseUnquotedValueChar ::
   (Stream.C source) =>
   ParserEmit source String
parseUnquotedValueChar :: forall source. C source => ParserEmit source String
parseUnquotedValueChar =
   let parseValueChar :: WriterT [(T, String)] (StateT (T source) Maybe) String
parseValueChar =
          do T
pos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
             String
str <- forall source. C source => (Char -> Bool) -> Parser source String
parseUnicodeChar (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
" >\"\'")
             let wrong :: String
wrong = forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isValidValueChar) String
str
             forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
                forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen (Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
wrong)) T
pos forall a b. (a -> b) -> a -> b
$
                String
"Illegal characters in unquoted value: " forall a. [a] -> [a] -> [a]
++ String
wrong
             forall (m :: * -> *) a. Monad m => a -> m a
return String
str
   in  forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ forall output input a.
Monoid output =>
T input output Maybe a -> T input output Identity [a]
many WriterT [(T, String)] (StateT (T source) Maybe) String
parseValueChar

parseUnquotedValueHTMLChar ::
   (Stream.C source) =>
   ParserEmit source [HTMLChar.T]
parseUnquotedValueHTMLChar :: forall source. C source => ParserEmit source [T]
parseUnquotedValueHTMLChar =
   let parseValueChar :: WriterT [(T, String)] (StateT (T source) Maybe) [T]
parseValueChar =
          do T
pos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
             [T]
hc <- forall source. C source => (Char -> Bool) -> Parser source [T]
parseHTMLChar (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
" >\"\'")
             {- We do the check after each parseHTMLChar
                and not after (many parseValueChar)
                in order to correctly interleave warnings. -}
             forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall source. T -> T -> ParserEmit source ()
checkUnquotedChar T
pos) [T]
hc
             forall (m :: * -> *) a. Monad m => a -> m a
return [T]
hc
   in  forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ forall output input a.
Monoid output =>
T input output Maybe a -> T input output Identity [a]
many WriterT [(T, String)] (StateT (T source) Maybe) [T]
parseValueChar

checkUnquotedChar :: Position.T -> HTMLChar.T -> ParserEmit source ()
checkUnquotedChar :: forall source. T -> T -> ParserEmit source ()
checkUnquotedChar T
pos T
x =
   case T
x of
      HTMLChar.Unicode Char
c ->
         forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen (Bool -> Bool
not (Char -> Bool
isValidValueChar Char
c)) T
pos forall a b. (a -> b) -> a -> b
$
            String
"Illegal characters in unquoted value: '" forall a. [a] -> [a] -> [a]
++ Char
c forall a. a -> [a] -> [a]
: String
"'"
      T
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return ()


isValidValueChar :: Char -> Bool
isValidValueChar :: Char -> Bool
isValidValueChar Char
c  =  Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"_-:."

parseQuoted ::
   (Stream.C source, StringType sink) =>
   String -> Char -> Parser source sink
parseQuoted :: forall source sink.
(C source, StringType sink) =>
String -> Char -> Parser source sink
parseQuoted String
termMsg Char
quote =
   forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
quote forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
   (forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
    do sink
str <- forall source sink.
(C source, StringType sink) =>
(Char -> Bool) -> ParserEmit source sink
parseString (Char
quoteforall a. Eq a => a -> a -> Bool
/=)
       forall input output a.
T input output Maybe a
-> T input output Identity a -> T input output Identity a
withDefault
          (forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
quote)
          (do T
termPos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
              forall source. T -> String -> ParserEmit source ()
emitWarning T
termPos String
termMsg)
       forall (m :: * -> *) a. Monad m => a -> m a
return sink
str)

{-
Instead of using 'generateTag' we could also wrap the call to 'readUntilTerm'
in 'mfix' in order to emit a tag, where some information is read later.
-}
readUntilTerm ::
   (Stream.C source) =>
   (String -> ParserEmit source a) -> String -> String -> ParserEmit source a
readUntilTerm :: forall source a.
C source =>
(String -> ParserEmit source a)
-> String -> String -> ParserEmit source a
readUntilTerm String -> ParserEmit source a
generateTag String
termWarning String
termPat =
   do ~(Bool
termFound,String
str) <- forall output input.
(Monoid output, C input) =>
String -> T input output Identity (Bool, String)
readUntil String
termPat
      a
result <- String -> ParserEmit source a
generateTag String
str
      T
termPos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
      forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen (Bool -> Bool
not Bool
termFound) T
termPos String
termWarning
      forall (m :: * -> *) a. Monad m => a -> m a
return a
result


class Chr.C char => CharType char where
   parseChar :: (Stream.C source) => (Char -> Bool) -> Parser source [char]
   parseUnquotedValue :: (Stream.C source) => ParserEmit source [char]

instance CharType Char where
   parseChar :: forall source. C source => (Char -> Bool) -> Parser source String
parseChar = forall source. C source => (Char -> Bool) -> Parser source String
parseUnicodeChar
   parseUnquotedValue :: forall source. C source => ParserEmit source String
parseUnquotedValue = forall source. C source => ParserEmit source String
parseUnquotedValueChar

instance CharType HTMLChar.T where
   parseChar :: forall source. C source => (Char -> Bool) -> Parser source [T]
parseChar = forall source. C source => (Char -> Bool) -> Parser source [T]
parseHTMLChar
   parseUnquotedValue :: forall source. C source => ParserEmit source [T]
parseUnquotedValue = forall source. C source => ParserEmit source [T]
parseUnquotedValueHTMLChar


class Monoid sink => StringType sink where
   stringFromChar :: Char -> sink
   parseCharAsString ::
      (Stream.C source) =>
      (Char -> Bool) -> Parser source sink
   parseUnquotedValueAsString ::
      (Stream.C source) =>
      ParserEmit source sink

instance CharType char => StringType [char] where
   stringFromChar :: Char -> [char]
stringFromChar Char
c = [forall char. C char => Char -> char
fromChar Char
c]
   parseCharAsString :: forall source. C source => (Char -> Bool) -> Parser source [char]
parseCharAsString = forall char source.
(CharType char, C source) =>
(Char -> Bool) -> Parser source [char]
parseChar
   parseUnquotedValueAsString :: forall source. C source => ParserEmit source [char]
parseUnquotedValueAsString = forall char source.
(CharType char, C source) =>
ParserEmit source [char]
parseUnquotedValue


parseString  ::
   (Stream.C source, StringType sink) =>
   (Char -> Bool) -> ParserEmit source sink
parseString :: forall source sink.
(C source, StringType sink) =>
(Char -> Bool) -> ParserEmit source sink
parseString  Char -> Bool
p = forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall output input a.
Monoid output =>
T input output Maybe a -> T input output Identity [a]
many  (forall sink source.
(StringType sink, C source) =>
(Char -> Bool) -> Parser source sink
parseCharAsString Char -> Bool
p)

{-
parseString1 ::
   (Stream.C source, StringType sink) =>
   (Char -> Bool) -> Parser     name source sink sink
parseString1 p = liftM mconcat $ many1 (parseCharAsString p)
-}



parseUnicodeChar ::
   (Stream.C source) =>
   (Char -> Bool) -> Parser source String
parseUnicodeChar :: forall source. C source => (Char -> Bool) -> Parser source String
parseUnicodeChar Char -> Bool
p =
   do T
pos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
      [T]
x <- forall source. C source => (Char -> Bool) -> Parser source [T]
parseHTMLChar Char -> Bool
p
      forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$
         forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall source. T -> T -> ParserEmit source String
htmlCharToString T
pos) [T]
x

htmlCharToString ::
   Position.T -> HTMLChar.T -> ParserEmit source String
htmlCharToString :: forall source. T -> T -> ParserEmit source String
htmlCharToString T
pos T
x =
   let returnChar :: a -> m [a]
returnChar a
c = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ a
cforall a. a -> [a] -> [a]
:[]
   in  case T
x of
          HTMLChar.Unicode Char
c -> forall {m :: * -> *} {a}. Monad m => a -> m [a]
returnChar Char
c
          HTMLChar.CharRef Int
num -> forall {m :: * -> *} {a}. Monad m => a -> m [a]
returnChar (Int -> Char
chr Int
num)
          HTMLChar.EntityRef String
name ->
             forall b a. b -> (a -> b) -> Maybe a -> b
maybe
                (let refName :: String
refName = Char
'&'forall a. a -> [a] -> [a]
:String
nameforall a. [a] -> [a] -> [a]
++String
";"
                 in  forall source. T -> String -> ParserEmit source ()
emitWarning T
pos (String
"Unknown HTML entity " forall a. [a] -> [a] -> [a]
++ String
refName) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                     forall (m :: * -> *) a. Monad m => a -> m a
return String
refName)
                forall {m :: * -> *} {a}. Monad m => a -> m [a]
returnChar
                (forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
name Map String Char
HTMLEntity.mapNameToChar)

{- |
Only well formed entity references are interpreted as single HTMLChars,
whereas ill-formed entity references are interpreted as sequence of unicode characters without special meaning.
E.g. "&amp ;" is considered as plain "&amp ;",
and only "&amp;" is considered an escaped ampersand.
It is a very common error in HTML documents to not escape an ampersand.
With the interpretation used here,
those ampersands are left as they are.

At most one warning can be emitted.
-}
parseHTMLChar ::
   (Stream.C source) =>
   (Char -> Bool) -> Parser source [HTMLChar.T]
parseHTMLChar :: forall source. C source => (Char -> Bool) -> Parser source [T]
parseHTMLChar Char -> Bool
p =
   do T
pos <- forall output (fail :: * -> *) input.
(Monoid output, Monad fail) =>
T input output fail T
getPos
      Char
c <- forall output input.
(Monoid output, C input) =>
(Char -> Bool) -> T input output Maybe Char
satisfy Char -> Bool
p
      forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$
        if Char
cforall a. Eq a => a -> a -> Bool
==Char
'&'
          then
            forall input output a.
T input output Maybe a
-> T input output Identity a -> T input output Identity a
withDefault
              (do [T]
ent <-
                     forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
mplus
                        (do forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
'#'
                            String
digits <- forall input output a.
T input output Identity a -> T input output Maybe a
allowFail forall a b. (a -> b) -> a -> b
$ forall output input a.
Monoid output =>
Int -> T input output Maybe a -> T input output Identity [a]
many0toN Int
10 (forall output input.
(Monoid output, C input) =>
(Char -> Bool) -> T input output Maybe Char
satisfy Char -> Bool
isAlphaNum)
                               -- exclude ';', '"', '<'
                               -- include 'x'
                            forall e b a. (e -> b) -> (a -> b) -> Exceptional e a -> b
Exc.switch
                               (\String
e ->
                                  forall input output a.
T input output Identity a -> T input output Maybe a
allowFail (forall source. T -> String -> ParserEmit source ()
emitWarning T
pos (String
"Error in numeric entity: " forall a. [a] -> [a] -> [a]
++ String
e)) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                                  forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. (a -> b) -> [a] -> [b]
map Char -> T
HTMLChar.fromUnicode (Char
'&'forall a. a -> [a] -> [a]
:Char
'#'forall a. a -> [a] -> [a]
:String
digits)))
                               (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> T
HTMLChar.CharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord)
                               (String -> Exceptional String Char
HTMLEntity.numberToChar String
digits))
                        (forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM ((forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> T
HTMLChar.EntityRef) forall a b. (a -> b) -> a -> b
$
                         forall output input a.
Monoid output =>
Int -> T input output Maybe a -> T input output Maybe [a]
many1toN Int
10 (forall output input.
(Monoid output, C input) =>
(Char -> Bool) -> T input output Maybe Char
satisfy Char -> Bool
isAlphaNum))
                  forall output input.
(Monoid output, C input) =>
Char -> T input output Maybe ()
voidChar Char
';'
                  forall (m :: * -> *) a. Monad m => a -> m a
return [T]
ent)
              (forall source. T -> String -> ParserEmit source ()
emitWarning T
pos String
"Non-terminated entity reference" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
               forall (m :: * -> *) a. Monad m => a -> m a
return [Char -> T
HTMLChar.Unicode Char
'&'])
          else forall (m :: * -> *) a. Monad m => a -> m a
return [Char -> T
HTMLChar.Unicode Char
c]

{-
readHex :: (Num a) => String -> a
readHex str =
   case Numeric.readHex str of
      [(n,"")] -> n
      _ -> error "readHex: no parse"

{-
We cannot emit specific warnings,
because the sub-parsers simply fail
and then throw away the warnings.
-}
parseHTMLCharGenericWarning ::
   (Stream.C source) =>
   (Char -> Bool) -> Parser source [HTMLChar.T]
parseHTMLCharGenericWarning p =
   do pos <- getPos
      c <- satisfy p
      allowFail $
        if c=='&'
          then
            withDefault
              (do ent <-
                     mplus
                        (voidChar '#' >>
                         liftM HTMLChar.CharRef
                            (mplus
                               (voidChar 'x' >> liftM readHex (many1toN 8 (satisfy isHexDigit)))
                               (liftM read (many1toN 10 (satisfy isDigit)))))
                        (liftM HTMLChar.EntityRef $ many1toN 10 (satisfy isAlphaNum))
                  voidChar ';'
                  return [ent])
              (emitWarning pos "Ill formed entity" >>
               return [HTMLChar.Unicode '&'])
          else return [HTMLChar.Unicode c]
-}


restrictWarnings :: Int -> [Warning] -> [Warning]
restrictWarnings :: Int -> [(T, String)] -> [(T, String)]
restrictWarnings Int
n =
   forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall a. [a] -> [a] -> [a]
(++) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall b c a. (b -> c) -> (a, b) -> (a, c)
mapSnd
      (\[(T, String)]
rest ->
          case [(T, String)]
rest of
             (T
pos, String
_) : [(T, String)]
_ ->
                [(T
pos, String
"further warnings suppressed")]
             [(T, String)]
_ -> []) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall a. Int -> [a] -> ([a], [a])
splitAt Int
n


-- these functions have intentionally restricted types

emitWarningWhen :: Bool -> Position.T -> String -> ParserEmit source ()
emitWarningWhen :: forall source. Bool -> T -> String -> ParserEmit source ()
emitWarningWhen Bool
cond T
pos String
msg =
   forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
cond forall a b. (a -> b) -> a -> b
$ forall source. T -> String -> ParserEmit source ()
emitWarning T
pos String
msg

emitWarning :: Position.T -> String -> ParserEmit source ()
emitWarning :: forall source. T -> String -> ParserEmit source ()
emitWarning = forall a b c. ((a, b) -> c) -> a -> b -> c
curry forall (fail :: * -> *) w input.
Monad fail =>
w -> T input [w] fail ()
emit

returnTag ::
   Position.T ->
   Tag.T name sink ->
   ParserEmit source (PosTag.T name sink)
returnTag :: forall name sink source.
T -> T name sink -> ParserEmit source (T name sink)
returnTag T
p T name sink
t = forall (m :: * -> *) a. Monad m => a -> m a
return (forall name string. T -> T name string -> T name string
PosTag.cons T
p T name sink
t)

returningTag ::
   (Monad m) =>
   Position.T ->
   (a -> Tag.T name sink) ->
   m a ->
   m (PosTag.T name sink)
returningTag :: forall (m :: * -> *) a name sink.
Monad m =>
T -> (a -> T name sink) -> m a -> m (T name sink)
returningTag T
pos a -> T name sink
f =
   forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM (forall name string. T -> T name string -> T name string
PosTag.cons T
pos forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> T name sink
f)