{-|
Parse a string into our custom tag soup data structure.

The parser works only on proper Unicode texts.
That is, you must have decoded it before,
e.g. using decoding functions from hxt or encoding package.
'Text.HTML.Tagchup.Process.findMetaEncoding'
can assist you retrieving the character set encoding
from meta information of the document at hand.
-}
module Text.HTML.Tagchup.Parser (
    CharType,
    runSoup, runSoupWithPositions, runSoupWithPositionsName,
    runTag, runInnerOfTag,
  ) where

import Text.HTML.Tagchup.Parser.Tag
   (CharType, StringType, parsePosTag, parsePosTagMergeWarnings, )

import Text.HTML.Tagchup.Parser.Combinator (manyS, )

import qualified Text.HTML.Tagchup.Parser.Combinator as Parser
import qualified Text.HTML.Tagchup.Parser.Stream as Stream

import qualified Text.HTML.Tagchup.PositionTag as PosTag
import qualified Text.HTML.Tagchup.Tag         as Tag
import qualified Text.XML.Basic.Name        as Name

import Control.Monad (liftM, )

import Data.Maybe (fromMaybe, )

-- import qualified Numeric


-- * run parser in several ways

{- |
Parse a single tag, throws an error if there is a syntax error.
This is useful for parsing a match pattern.
-}
runTag ::
   (Stream.C source, StringType sink, Show sink,
    Name.Attribute name, Name.Tag name, Show name) =>
   source -> Tag.T name sink
runTag :: forall source sink name.
(C source, StringType sink, Show sink, Attribute name, Tag name,
 Show name) =>
source -> T name sink
runTag source
str =
   let makeError :: [Char] -> a
makeError [Char]
msg =
          forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"runTag: " forall a. [a] -> [a] -> [a]
++ [Char]
msg
       ((T name sink
openTag, Maybe (T name sink)
closeTag), [Warning]
warnings) =
          forall a. a -> Maybe a -> a
fromMaybe (forall {a}. [Char] -> a
makeError [Char]
"no parse at all") forall a b. (a -> b) -> a -> b
$
          forall (fail :: * -> *) input output a.
Monad fail =>
[Char] -> T input output fail a -> input -> fail (a, output)
Parser.run [Char]
"pattern string" forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
Parser source (T name sink, Maybe (T name sink))
parsePosTag source
str
   in  if Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Warning]
warnings)
         then
           forall {a}. [Char] -> a
makeError
              ([[Char]] -> [Char]
unlines forall a b. (a -> b) -> a -> b
$ [Char]
"parsing results in" forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map forall a. Show a => a -> [Char]
show [Warning]
warnings)
         else
           case Maybe (T name sink)
closeTag of
              Maybe (T name sink)
Nothing -> forall name string. T name string -> T name string
PosTag.tag_ T name sink
openTag
              Maybe (T name sink)
_ -> forall {a}. [Char] -> a
makeError [Char]
"self-closing tag not supported"

{- |
Parse the inner of a single tag.
That is, @runTag \"\<bla\>\"@ is the same as @runInnerOfTag \"bla\"@.
-}
runInnerOfTag ::
   (StringType sink, Show sink,
    Name.Attribute name, Name.Tag name, Show name) =>
   String -> Tag.T name sink
runInnerOfTag :: forall sink name.
(StringType sink, Show sink, Attribute name, Tag name,
 Show name) =>
[Char] -> T name sink
runInnerOfTag [Char]
str = forall source sink name.
(C source, StringType sink, Show sink, Attribute name, Tag name,
 Show name) =>
source -> T name sink
runTag forall a b. (a -> b) -> a -> b
$ [Char]
"<"forall a. [a] -> [a] -> [a]
++[Char]
strforall a. [a] -> [a] -> [a]
++[Char]
">"



runSoupWithPositionsName ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   FilePath -> source -> [PosTag.T name sink]
runSoupWithPositionsName :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
[Char] -> source -> [T name sink]
runSoupWithPositionsName [Char]
fileName =
   forall string name.
Monoid string =>
[T name string] -> [T name string]
PosTag.concatTexts forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall a. Identity a -> a
Parser.runIdentity forall b c a. (b -> c) -> (a -> b) -> a -> c
.
   forall (fail :: * -> *) input a.
Monad fail =>
[Char] -> StateT (T input) fail a -> input -> fail a
Parser.eval [Char]
fileName
      (forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ forall s a. StateT s Maybe a -> StateT s Identity [a]
manyS forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
StateT (T source) Maybe [T name sink]
parsePosTagMergeWarnings)


-- | Parse an HTML document to a list of 'Tag.T'.
-- Automatically expands out escape characters.
runSoupWithPositions ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   source -> [PosTag.T name sink]
runSoupWithPositions :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
source -> [T name sink]
runSoupWithPositions =
   forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
[Char] -> source -> [T name sink]
runSoupWithPositionsName [Char]
"input"

-- | Like 'runSoupWithPositions' but hides source file positions.
runSoup ::
   (Stream.C source, StringType sink,
    Name.Attribute name, Name.Tag name) =>
   source -> [Tag.T name sink]
runSoup :: forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
source -> [T name sink]
runSoup = forall a b. (a -> b) -> [a] -> [b]
map forall name string. T name string -> T name string
PosTag.tag_ forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
source -> [T name sink]
runSoupWithPositions