{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE BinaryLiterals      #-}
{-# LANGUAGE LambdaCase          #-}
{-# LANGUAGE NamedFieldPuns      #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# options_ghc -Wno-unused-imports #-}
-- | SAX parser and API for XML.

module Xeno.SAX
  ( process
  , Process(..)
  , StringLike(..)
  , fold
  , validate
  , validateEx
  , dump
  , skipDoctype
  ) where

import           Control.Exception
import           Control.Monad.ST
import           Control.Monad.State.Strict
import           Control.Spork
import           Data.Bits
import           Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Unsafe as SU
import           Data.Char(isSpace)
import           Data.Functor.Identity
import           Data.Semigroup
import           Data.STRef
import           Data.Word
import           Xeno.Types


class StringLike str where
    s_index'       :: str -> Int -> Word8
    elemIndexFrom' :: Word8 -> str -> Int -> Maybe Int
    drop'          :: Int -> str -> str
    substring'     :: str -> Int -> Int -> ByteString
    toBS           :: str -> ByteString

instance StringLike ByteString where
    s_index' :: ByteString -> Int -> Word8
s_index'       = ByteString -> Int -> Word8
s_index
    {-# INLINE s_index' #-}
    elemIndexFrom' :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom' = Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom
    {-# INLINE elemIndexFrom' #-}
    drop' :: Int -> ByteString -> ByteString
drop'          = Int -> ByteString -> ByteString
S.drop
    {-# INLINE drop' #-}
    substring' :: ByteString -> Int -> Int -> ByteString
substring'     = ByteString -> Int -> Int -> ByteString
substring
    {-# INLINE substring' #-}
    toBS :: ByteString -> ByteString
toBS           = ByteString -> ByteString
forall a. a -> a
id
    {-# INLINE toBS #-}

instance StringLike ByteStringZeroTerminated where
    s_index' :: ByteStringZeroTerminated -> Int -> Word8
s_index' (BSZT ByteString
ps) Int
n = ByteString
ps ByteString -> Int -> Word8
`SU.unsafeIndex` Int
n
    {-# INLINE s_index' #-}
    elemIndexFrom' :: Word8 -> ByteStringZeroTerminated -> Int -> Maybe Int
elemIndexFrom' Word8
w (BSZT ByteString
bs) Int
i = Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom Word8
w ByteString
bs Int
i
    {-# INLINE elemIndexFrom' #-}
    drop' :: Int -> ByteStringZeroTerminated -> ByteStringZeroTerminated
drop' Int
i (BSZT ByteString
bs) = ByteString -> ByteStringZeroTerminated
BSZT (ByteString -> ByteStringZeroTerminated)
-> ByteString -> ByteStringZeroTerminated
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
S.drop Int
i ByteString
bs
    {-# INLINE drop' #-}
    substring' :: ByteStringZeroTerminated -> Int -> Int -> ByteString
substring' (BSZT ByteString
bs) Int
s Int
t = ByteString -> Int -> Int -> ByteString
substring ByteString
bs Int
s Int
t
    {-# INLINE substring' #-}
    toBS :: ByteStringZeroTerminated -> ByteString
toBS (BSZT ByteString
bs) = ByteString
bs
    {-# INLINE toBS #-}

-- | Parameters to the 'process' function
data Process a =
  Process {
      Process a -> ByteString -> a
openF    :: !(ByteString ->               a) -- ^ Open tag.
    , Process a -> ByteString -> ByteString -> a
attrF    :: !(ByteString -> ByteString -> a) -- ^ Tag attribute.
    , Process a -> ByteString -> a
endOpenF :: !(ByteString ->               a) -- ^ End open tag.
    , Process a -> ByteString -> a
textF    :: !(ByteString ->               a) -- ^ Text.
    , Process a -> ByteString -> a
closeF   :: !(ByteString ->               a) -- ^ Close tag.
    , Process a -> ByteString -> a
cdataF   :: !(ByteString ->               a) -- ^ CDATA.
    }

--------------------------------------------------------------------------------
-- Helpful interfaces to the parser

-- | Parse the XML but return no result, process no events.
--
-- N.B.: Only the lexical correctness of the input string is checked, not its XML semantics (e.g. only if tags are well formed, not whether tags are properly closed)
--
-- > > :set -XOverloadedStrings
-- > > validate "<b>"
-- > True
--
-- > > validate "<b"
-- > False
validate :: (StringLike str) => str -> Bool
validate :: str -> Bool
validate str
s =
  case () -> Either XenoException ()
forall e a. Exception e => a -> Either e a
spork
         (Identity () -> ()
forall a. Identity a -> a
runIdentity
            (Process (Identity ()) -> str -> Identity ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
               Process :: forall a.
(ByteString -> a)
-> (ByteString -> ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> Process a
Process {
                 openF :: ByteString -> Identity ()
openF    = \ByteString
_   -> () -> Identity ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , attrF :: ByteString -> ByteString -> Identity ()
attrF    = \ByteString
_ ByteString
_ -> () -> Identity ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , endOpenF :: ByteString -> Identity ()
endOpenF = \ByteString
_   -> () -> Identity ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , textF :: ByteString -> Identity ()
textF    = \ByteString
_   -> () -> Identity ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , closeF :: ByteString -> Identity ()
closeF   = \ByteString
_   -> () -> Identity ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , cdataF :: ByteString -> Identity ()
cdataF   = \ByteString
_   -> () -> Identity ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               }
               str
s)) of
    Left (XenoException
_ :: XenoException) -> Bool
False
    Right ()
_ -> Bool
True
-- It must be inlined or specialised to ByteString/ByteStringZeroTerminated
{-# INLINE validate #-}
{-# SPECIALISE validate :: ByteString -> Bool #-}
{-# SPECIALISE validate :: ByteStringZeroTerminated -> Bool #-}


-- | Parse the XML and checks tags nesting.
--
validateEx :: (StringLike str) => str -> Bool
validateEx :: str -> Bool
validateEx str
s =
  case () -> Either XenoException ()
forall e a. Exception e => a -> Either e a
spork
         ((forall s. ST s ()) -> ()
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ()) -> ()) -> (forall s. ST s ()) -> ()
forall a b. (a -> b) -> a -> b
$ do
            STRef s [ByteString]
tags <- [ByteString] -> ST s (STRef s [ByteString])
forall a s. a -> ST s (STRef s a)
newSTRef []
            (Process (ST s ()) -> str -> ST s ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
               Process :: forall a.
(ByteString -> a)
-> (ByteString -> ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> Process a
Process {
                 openF :: ByteString -> ST s ()
openF    = \ByteString
tag   -> STRef s [ByteString] -> ([ByteString] -> [ByteString]) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef' STRef s [ByteString]
tags (ByteString
tagByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)
               , attrF :: ByteString -> ByteString -> ST s ()
attrF    = \ByteString
_ ByteString
_ -> () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , endOpenF :: ByteString -> ST s ()
endOpenF = \ByteString
_   -> () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , textF :: ByteString -> ST s ()
textF    = \ByteString
_   -> () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , closeF :: ByteString -> ST s ()
closeF   = \ByteString
tag  ->
                   STRef s [ByteString] -> ([ByteString] -> [ByteString]) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef' STRef s [ByteString]
tags (([ByteString] -> [ByteString]) -> ST s ())
-> ([ByteString] -> [ByteString]) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
                      [] -> String -> [ByteString]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> [ByteString]) -> String -> [ByteString]
forall a b. (a -> b) -> a -> b
$ String
"Unexpected close tag \"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
tag String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\""
                      (ByteString
expectedTag:[ByteString]
tags') ->
                          if ByteString
expectedTag ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
tag
                          then [ByteString]
tags'
                          else String -> [ByteString]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> [ByteString]) -> String -> [ByteString]
forall a b. (a -> b) -> a -> b
$ String
"Unexpected close tag. Expected \"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
expectedTag String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\", but got \"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
tag String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\""
               , cdataF :: ByteString -> ST s ()
cdataF   = \ByteString
_   -> () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               }
               str
s)
            STRef s [ByteString] -> ST s [ByteString]
forall s a. STRef s a -> ST s a
readSTRef STRef s [ByteString]
tags ST s [ByteString] -> ([ByteString] -> ST s ()) -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
                [] -> () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                [ByteString]
tags' -> String -> ST s ()
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> ST s ()) -> String -> ST s ()
forall a b. (a -> b) -> a -> b
$ String
"Not all tags closed: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [ByteString] -> String
forall a. Show a => a -> String
show [ByteString]
tags'
         ) of
    Left (XenoException
_ :: XenoException) -> Bool
False
    Right ()
_ -> Bool
True
{-# INLINE validateEx #-}
{-# SPECIALISE validateEx :: ByteString -> Bool #-}
{-# SPECIALISE validateEx :: ByteStringZeroTerminated -> Bool #-}


-- | Parse the XML and pretty print it to stdout.
dump :: ByteString -> IO ()
dump :: ByteString -> IO ()
dump ByteString
str =
  StateT Int IO () -> Int -> IO ()
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT
    (Process (StateT Int IO ()) -> ByteString -> StateT Int IO ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
       Process :: forall a.
(ByteString -> a)
-> (ByteString -> ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> Process a
Process {
         openF :: ByteString -> StateT Int IO ()
openF = \ByteString
name -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          IO () -> StateT Int IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStr (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"<" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
name ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
""))
       , attrF :: ByteString -> ByteString -> StateT Int IO ()
attrF = \ByteString
key ByteString
value -> IO () -> StateT Int IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStr (ByteString
" " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
key ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"=\"" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
value ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\""))
       , endOpenF :: ByteString -> StateT Int IO ()
endOpenF = \ByteString
_ -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          let !level' :: Int
level' = Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
          Int -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
level'
          IO () -> StateT Int IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (ByteString
">"))
       , textF :: ByteString -> StateT Int IO ()
textF = \ByteString
text -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          IO () -> StateT Int IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> String -> ByteString
S8.pack (ByteString -> String
forall a. Show a => a -> String
show ByteString
text)))
       , closeF :: ByteString -> StateT Int IO ()
closeF = \ByteString
name -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          let !level' :: Int
level' = Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
          Int -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
level'
          IO () -> StateT Int IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level' Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"</" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
name ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
">"))
       , cdataF :: ByteString -> StateT Int IO ()
cdataF = \ByteString
cdata -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          IO () -> StateT Int IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"CDATA: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> String -> ByteString
S8.pack (ByteString -> String
forall a. Show a => a -> String
show ByteString
cdata)))
       }
       ByteString
str)
    (Int
0 :: Int)

-- | Fold over the XML input.
fold
  :: (s -> ByteString -> s) -- ^ Open tag.
  -> (s -> ByteString -> ByteString -> s) -- ^ Attribute key/value.
  -> (s -> ByteString -> s) -- ^ End of open tag.
  -> (s -> ByteString -> s) -- ^ Text.
  -> (s -> ByteString -> s) -- ^ Close tag.
  -> (s -> ByteString -> s) -- ^ CDATA.
  -> s
  -> ByteString
  -> Either XenoException s
fold :: (s -> ByteString -> s)
-> (s -> ByteString -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> s
-> ByteString
-> Either XenoException s
fold s -> ByteString -> s
openF s -> ByteString -> ByteString -> s
attrF s -> ByteString -> s
endOpenF s -> ByteString -> s
textF s -> ByteString -> s
closeF s -> ByteString -> s
cdataF s
s ByteString
str =
  s -> Either XenoException s
forall e a. Exception e => a -> Either e a
spork
    (State s () -> s -> s
forall s a. State s a -> s -> s
execState
       (Process (State s ()) -> ByteString -> State s ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process Process :: forall a.
(ByteString -> a)
-> (ByteString -> ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> (ByteString -> a)
-> Process a
Process {
            openF :: ByteString -> State s ()
openF    = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
openF s
s' ByteString
name)
          , attrF :: ByteString -> ByteString -> State s ()
attrF    = \ByteString
key ByteString
value -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> ByteString -> s
attrF s
s' ByteString
key ByteString
value)
          , endOpenF :: ByteString -> State s ()
endOpenF = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
endOpenF s
s' ByteString
name)
          , textF :: ByteString -> State s ()
textF    = \ByteString
text -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
textF s
s' ByteString
text)
          , closeF :: ByteString -> State s ()
closeF   = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
closeF s
s' ByteString
name)
          , cdataF :: ByteString -> State s ()
cdataF   = \ByteString
cdata -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
cdataF s
s' ByteString
cdata)
        } ByteString
str)
       s
s)

--------------------------------------------------------------------------------
-- Main parsing function

-- | Process events with callbacks in the XML input.
process
  :: (Monad m, StringLike str)
  => Process (m ())
  -> str
  -> m ()
process :: Process (m ()) -> str -> m ()
process !(Process {ByteString -> m ()
openF :: ByteString -> m ()
openF :: forall a. Process a -> ByteString -> a
openF, ByteString -> ByteString -> m ()
attrF :: ByteString -> ByteString -> m ()
attrF :: forall a. Process a -> ByteString -> ByteString -> a
attrF, ByteString -> m ()
endOpenF :: ByteString -> m ()
endOpenF :: forall a. Process a -> ByteString -> a
endOpenF, ByteString -> m ()
textF :: ByteString -> m ()
textF :: forall a. Process a -> ByteString -> a
textF, ByteString -> m ()
closeF :: ByteString -> m ()
closeF :: forall a. Process a -> ByteString -> a
closeF, ByteString -> m ()
cdataF :: ByteString -> m ()
cdataF :: forall a. Process a -> ByteString -> a
cdataF}) str
str = Int -> m ()
findLT Int
0
  where
    findLT :: Int -> m ()
findLT Int
index =
      case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
openTagChar str
str Int
index of
        Maybe Int
Nothing -> Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
text) (ByteString -> m ()
textF ByteString
text)
          where text :: ByteString
text = str -> ByteString
forall str. StringLike str => str -> ByteString
toBS (str -> ByteString) -> str -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
        Just Int
fromLt -> do
          Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
text) (ByteString -> m ()
textF ByteString
text)
          Int -> m ()
checkOpenComment (Int
fromLt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          where text :: ByteString
text = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
fromLt
    -- Find open comment, CDATA or tag name.
    checkOpenComment :: Int -> m ()
checkOpenComment Int
index
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
bangChar -- !
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar -- -
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar -- -
      =  Int -> m ()
findCommentEnd (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)

      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
bangChar -- !
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
openAngleBracketChar -- [
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
67 -- C
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
3 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
68 -- D
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
4 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
65 -- A
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
5 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
84 -- T
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
6 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
65 -- A
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
7 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
openAngleBracketChar -- [
      =  Int -> Int -> m ()
findCDataEnd (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8) (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8)

      | Bool
otherwise
      = Int -> m ()
findTagName Int
index
      where
        this :: str
this = Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
    findCommentEnd :: Int -> m ()
findCommentEnd Int
index =
      case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
commentChar str
str Int
index of
        Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the closing comment dash."
        Just Int
fromDash ->
          if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar Bool -> Bool -> Bool
&& str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
            then Int -> m ()
findLT (Int
fromDash Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
            else Int -> m ()
findCommentEnd (Int
fromDash Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          where this :: str
this = Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
    findCDataEnd :: Int -> Int -> m ()
findCDataEnd Int
cdata_start Int
index =
      case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
closeAngleBracketChar str
str Int
index of
        Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find closing angle bracket for CDATA."
        Just Int
fromCloseAngleBracket ->
          if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeAngleBracketChar
             then do
               ByteString -> m ()
cdataF (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
cdata_start Int
fromCloseAngleBracket)
               Int -> m ()
findLT (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) -- Start after ]]>
             else
               -- We only found one ], that means that we need to keep searching.
               Int -> Int -> m ()
findCDataEnd Int
cdata_start (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    findTagName :: Int -> m ()
findTagName Int
index0
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
questionChar =
        case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
closeTagChar str
str Int
spaceOrCloseTag of
          Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the end of the tag."
          Just Int
fromGt -> do
            Int -> m ()
findLT (Int
fromGt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
spaceOrCloseTag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar = do
        let tagname :: ByteString
tagname = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
spaceOrCloseTag
        if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
          then ByteString -> m ()
closeF ByteString
tagname
          else do
            ByteString -> m ()
openF ByteString
tagname
            ByteString -> m ()
endOpenF ByteString
tagname
        Int -> m ()
findLT (Int
spaceOrCloseTag Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      | Bool
otherwise = do
        let tagname :: ByteString
tagname = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
spaceOrCloseTag
        ByteString -> m ()
openF ByteString
tagname
        Either Int Int
result <- Int -> m (Either Int Int)
findAttributes Int
spaceOrCloseTag
        ByteString -> m ()
endOpenF ByteString
tagname
        case Either Int Int
result of
          Right Int
closingTag -> Int -> m ()
findLT (Int
closingTag Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          Left Int
closingPair -> do
            ByteString -> m ()
closeF ByteString
tagname
            Int -> m ()
findLT (Int
closingPair Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
      where
        index :: Int
index =
          if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
            then Int
index0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
            else Int
index0
        spaceOrCloseTag :: Int
spaceOrCloseTag = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index
    findAttributes :: Int -> m (Either Int Int)
findAttributes Int
index0
      -- case: />
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
      = Either Int Int -> m (Either Int Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Either Int Int
forall a b. a -> Either a b
Left Int
index)

      -- case: >
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
      = Either Int Int -> m (Either Int Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Either Int Int
forall a b. b -> Either a b
Right Int
index)

      -- case: attr=' or attr="
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
equalChar
      , Word8
usedChar Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
quoteChar Bool -> Bool -> Bool
|| Word8
usedChar Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuoteChar
      = case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
usedChar str
str (Int
quoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) of
          Maybe Int
Nothing ->
            XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw
              (Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the matching quote character.")
          Just Int
endQuoteIndex -> do
            ByteString -> ByteString -> m ()
attrF
              (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
afterAttrName)
              (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring'
                 str
str
                 (Int
quoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                 (Int
endQuoteIndex))
            Int -> m (Either Int Int)
findAttributes (Int
endQuoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

      -- case: attr= without following quote
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
equalChar
      = XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoParseError Int
index(ByteString
"Expected ' or \", got: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word8 -> ByteString
S.singleton Word8
usedChar))

      | Bool
otherwise
      = XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoParseError Int
index (ByteString
"Expected =, got: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word8 -> ByteString
S.singleton (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
" at character index: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> (String -> ByteString
S8.pack (String -> ByteString) -> (Int -> String) -> Int -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show) Int
afterAttrName))
      where
        index :: Int
index = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
skipSpaces str
str Int
index0
        afterAttrName :: Int
afterAttrName = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index
        quoteIndex :: Int
quoteIndex = Int
afterAttrName Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
        usedChar :: Word8
usedChar = str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
quoteIndex

{-# INLINE process #-}
{-# SPECIALISE process :: Process (Identity ()) -> ByteString -> Identity ()
               #-}
{-# SPECIALISE process :: Process (State s ()) -> ByteString -> State s ()
               #-}
{-# SPECIALISE process :: Process (ST s ()) -> ByteString -> ST s ()
               #-}
{-# SPECIALISE process :: Process (IO ()) -> ByteString -> IO ()
               #-}
{-# SPECIALISE process :: Process (Identity ()) -> ByteStringZeroTerminated -> Identity ()
               #-}
{-# SPECIALISE process :: Process (State s ()) -> ByteStringZeroTerminated -> State s ()
               #-}
{-# SPECIALISE process :: Process (ST s ()) -> ByteStringZeroTerminated -> ST s ()
               #-}
{-# SPECIALISE process :: Process (IO ()) -> ByteStringZeroTerminated -> IO ()
               #-}

--------------------------------------------------------------------------------
-- ByteString utilities

-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
s_index :: ByteString -> Int -> Word8
s_index :: ByteString -> Int -> Word8
s_index ByteString
ps Int
n
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0            = XenoException -> Word8
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoStringIndexProblem Int
n ByteString
ps)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
S.length ByteString
ps = XenoException -> Word8
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoStringIndexProblem Int
n ByteString
ps)
    | Bool
otherwise        = ByteString
ps ByteString -> Int -> Word8
`SU.unsafeIndex` Int
n
{-# INLINE s_index #-}

-- | A fast space skipping function.
skipSpaces :: (StringLike str) => str -> Int -> Int
skipSpaces :: str -> Int -> Int
skipSpaces str
str Int
i =
  if Word8 -> Bool
isSpaceChar (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
i)
    then str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
skipSpaces str
str (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    else Int
i
{-# INLINE skipSpaces #-}

-- | Get a substring of a string.
substring :: ByteString -> Int -> Int -> ByteString
substring :: ByteString -> Int -> Int -> ByteString
substring ByteString
s Int
start Int
end = Int -> ByteString -> ByteString
S.take (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) (Int -> ByteString -> ByteString
S.drop Int
start ByteString
s)
{-# INLINE substring #-}

-- | Basically @findIndex (not . isNameChar)@, but doesn't allocate.
parseName :: (StringLike str) => str -> Int -> Int
parseName :: str -> Int -> Int
parseName str
str Int
index =
  if Bool -> Bool
not (Word8 -> Bool
isNameChar1 (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index))
     then Int
index
     else str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE parseName #-}

-- | Basically @findIndex (not . isNameChar)@, but doesn't allocate.
parseName' :: (StringLike str) => str -> Int -> Int
parseName' :: str -> Int -> Int
parseName' str
str Int
index =
  if Bool -> Bool
not (Word8 -> Bool
isNameChar (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index))
     then Int
index
     else str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE parseName' #-}

-- | Get index of an element starting from offset.
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom Word8
c ByteString
str Int
offset = (Int -> Int) -> Maybe Int -> Maybe Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset) (Word8 -> ByteString -> Maybe Int
S.elemIndex Word8
c (Int -> ByteString -> ByteString
S.drop Int
offset ByteString
str))
-- Without the INLINE below, the whole function is twice as slow and
-- has linear allocation. See git commit with this comment for
-- results.
{-# INLINE elemIndexFrom #-}

--------------------------------------------------------------------------------
-- Character types

isSpaceChar :: Word8 -> Bool
isSpaceChar :: Word8 -> Bool
isSpaceChar = Int -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit (Int
0b100000000000000000010011000000000 :: Int) (Int -> Bool) -> (Word8 -> Int) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
--                       |                  |  ||  bits:
--                       |                  |  |+-- 9
--                       |                  |  +--- 10
--                       |                  +------ 13
--                       +------------------------- 32
{-# INLINE isSpaceChar #-}

-- | Is the character a valid first tag/attribute name constituent?
-- 'a'-'z', 'A'-'Z', '_', ':'
isNameChar1 :: Word8 -> Bool
isNameChar1 :: Word8 -> Bool
isNameChar1 Word8
c =
  (Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
97 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
122) Bool -> Bool -> Bool
|| (Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
65 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
90) Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
95 Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
58
{-# INLINE isNameChar1 #-}

-- isNameCharOriginal :: Word8 -> Bool
-- isNameCharOriginal c =
--   (c >= 97 && c <= 122) || (c >= 65 && c <= 90) || c == 95 || c == 58 ||
--   c == 45 || c == 46 || (c >= 48 && c <= 57)
-- {-# INLINE isNameCharOriginal #-}
--
-- TODO Strange, but highMaskIsNameChar, lowMaskIsNameChar don't calculate fast... FIX IT
-- highMaskIsNameChar, lowMaskIsNameChar :: Word64
-- (highMaskIsNameChar, lowMaskIsNameChar) =
--     foldl (\(hi,low) char -> (hi `setBit` (char - 64), low `setBit` char)) -- NB: `setBit` can process overflowed values (where char < 64; -- TODO fix it
--           (0::Word64, 0::Word64)
--           (map fromIntegral (filter isNameCharOriginal [0..128]))
-- {-# INLINE highMaskIsNameChar #-}
-- {-# INLINE lowMaskIsNameChar #-}

-- | Is the character a valid tag/attribute name constituent?
-- isNameChar1 + '-', '.', '0'-'9'
isNameChar :: Word8 -> Bool
isNameChar :: Word8 -> Bool
isNameChar Word8
char = (Word64
lowMaskIsNameChar Word64 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` Int
char'low) Bool -> Bool -> Bool
|| (Word64
highMaskIsNameChar Word64 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` Int
char'high)
   -- TODO 1) change code to use W# instead of Word64
   --      2) Document `ii - 64` -- there is underflow, but `testBit` can process this!
  where
    char'low :: Int
char'low  = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
char
    char'high :: Int
char'high = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
char Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
64)
    highMaskIsNameChar :: Word64
    highMaskIsNameChar :: Word64
highMaskIsNameChar = Word64
0b11111111111111111111111111010000111111111111111111111111110
    --                     ------------+------------- |    ------------+-------------
    --                                 |              |                |  bits:
    --                                 |              |                +-- 65-90
    --                                 |              +------------------- 95
    --                                 +---------------------------------- 97-122
    lowMaskIsNameChar :: Word64
    lowMaskIsNameChar :: Word64
lowMaskIsNameChar =  Word64
0b11111111111011000000000000000000000000000000000000000000000
    --                     -----+----- ||
    --                          |      ||  bits:
    --                          |      |+-- 45
    --                          |      +--- 46
    --                          +---------- 48-58
{-# INLINE isNameChar #-}

-- | Char for '\''.
quoteChar :: Word8
quoteChar :: Word8
quoteChar = Word8
39

-- | Char for '"'.
doubleQuoteChar :: Word8
doubleQuoteChar :: Word8
doubleQuoteChar = Word8
34

-- | Char for '='.
equalChar :: Word8
equalChar :: Word8
equalChar = Word8
61

-- | Char for '?'.
questionChar :: Word8
questionChar :: Word8
questionChar = Word8
63

-- | Char for '/'.
slashChar :: Word8
slashChar :: Word8
slashChar = Word8
47

-- | Exclaimation character !.
bangChar :: Word8
bangChar :: Word8
bangChar = Word8
33

-- | The dash character.
commentChar :: Word8
commentChar :: Word8
commentChar = Word8
45 -- '-'

-- | Open tag character.
openTagChar :: Word8
openTagChar :: Word8
openTagChar = Word8
60 -- '<'

-- | Close tag character.
closeTagChar :: Word8
closeTagChar :: Word8
closeTagChar = Word8
62 -- '>'

-- | Open angle bracket character.
openAngleBracketChar :: Word8
openAngleBracketChar :: Word8
openAngleBracketChar = Word8
91

-- | Close angle bracket character.
closeAngleBracketChar :: Word8
closeAngleBracketChar :: Word8
closeAngleBracketChar = Word8
93

-- | Skip initial DOCTYPE declaration
skipDoctype :: ByteString -> ByteString
skipDoctype :: ByteString -> ByteString
skipDoctype ByteString
arg =
    if ByteString
"<!DOCTYPE" ByteString -> ByteString -> Bool
`S8.isPrefixOf` ByteString
bs
      then let (ByteString
_, ByteString
rest)=ByteString
">" ByteString -> ByteString -> (ByteString, ByteString)
`S8.breakSubstring` ByteString
bs
           in ByteString -> ByteString
skipBlanks (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
S8.drop Int
1 ByteString
rest
      else ByteString
bs
  where
    bs :: ByteString
bs = ByteString -> ByteString
skipBlanks ByteString
arg
    skipBlanks :: ByteString -> ByteString
skipBlanks = (Char -> Bool) -> ByteString -> ByteString
S8.dropWhile Char -> Bool
isSpace