{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE DeriveAnyClass             #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE DeriveGeneric              #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE CPP                        #-}
-- | DOM parser and API for XML.
--   Slightly slower DOM parsing,
--   but add missing close tags.
module Xeno.DOM.Robust
  ( parse
  , Node
  , Content(..)
  , name
  , attributes
  , contents
  , children
  ) where

import           Control.Monad.ST
import           Control.Spork
#if MIN_VERSION_bytestring(0,11,0)
import           Data.ByteString.Internal as BS (ByteString(..), plusForeignPtr)
#else
import           Data.ByteString.Internal(ByteString(..))
#endif
import           Data.STRef
import qualified Data.Vector.Unboxed         as UV
import qualified Data.Vector.Unboxed.Mutable as UMV
import           Data.Mutable(asURef, newRef, readRef, writeRef)
#if MIN_VERSION_bytestring(0,11,0)
import           Foreign.Ptr (minusPtr)
import           Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import           System.IO.Unsafe (unsafeDupablePerformIO)
#endif
import           Xeno.SAX
import           Xeno.Types
import           Xeno.DOM.Internal(Node(..), Content(..), name, attributes, contents, children)

-- | Parse a complete Nodes document.
parse :: ByteString -> Either XenoException Node
parse :: ByteString -> Either XenoException Node
parse ByteString
inp =
  case forall e a. Exception e => a -> Either e a
spork Vector Int
node of
    Left XenoException
e -> forall a b. a -> Either a b
Left XenoException
e
    Right Vector Int
r ->
      case Vector Int -> Maybe Node
findRootNode Vector Int
r of
        Just Node
n -> forall a b. b -> Either a b
Right Node
n
        Maybe Node
Nothing -> forall a b. a -> Either a b
Left XenoException
XenoExpectRootNode
  where
    findRootNode :: Vector Int -> Maybe Node
findRootNode Vector Int
r = Int -> Maybe Node
go Int
0
      where
        go :: Int -> Maybe Node
go Int
n = case Vector Int
r forall a. Unbox a => Vector a -> Int -> Maybe a
UV.!? Int
n of
          Just Int
0x0 -> forall a. a -> Maybe a
Just (ByteString -> Int -> Vector Int -> Node
Node ByteString
str Int
n Vector Int
r)
          -- skipping text assuming that it contains only white space
          -- characters
          Just Int
0x1 -> Int -> Maybe Node
go (Int
nforall a. Num a => a -> a -> a
+Int
3)
          Maybe Int
_ -> forall a. Maybe a
Nothing
#if MIN_VERSION_bytestring(0,11,0)
    BS ForeignPtr Word8
offset0 Int
_ = ByteString
str
#else
    PS _ offset0 _ = str
#endif
    str :: ByteString
str = ByteString -> ByteString
skipDoctype ByteString
inp
    node :: Vector Int
node =
      forall a. (forall s. ST s a) -> a
runST
        (do MVector s Int
nil <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
UMV.new Int
1000
            STRef s (MVector s Int)
vecRef    <- forall a s. a -> ST s (STRef s a)
newSTRef MVector s Int
nil
            URef s Int
sizeRef   <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall s a. URef s a -> URef s a
asURef forall a b. (a -> b) -> a -> b
$ forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
RefElement c -> m c
newRef Int
0
            URef s Int
parentRef <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall s a. URef s a -> URef s a
asURef forall a b. (a -> b) -> a -> b
$ forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
RefElement c -> m c
newRef Int
0
            forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process Process {
#if MIN_VERSION_bytestring(0,11,0)
                openF :: ByteString -> ST s ()
openF = \(BS ForeignPtr Word8
name_start Int
name_len) -> do
#else
                openF = \(PS _ name_start name_len) -> do
#endif
                 let tag :: Int
tag = Int
0x00
                     tag_end :: Int
tag_end = -Int
1
                 Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
                 MVector s Int
v' <-
                   do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
                      if Int
index forall a. Num a => a -> a -> a
+ Int
5 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
                        then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
                        else do
                          MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.grow MVector s Int
v (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v)
                          forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
                          forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
                 Int
tag_parent <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
parentRef
                 do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
parentRef Int
index
                    forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
5)
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' Int
index Int
tag
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) Int
tag_parent
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
name_start ForeignPtr Word8
offset0)
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
3) Int
name_len
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
4) Int
tag_end
#if MIN_VERSION_bytestring(0,11,0)
              , attrF :: ByteString -> ByteString -> ST s ()
attrF = \(BS ForeignPtr Word8
key_start Int
key_len) (BS ForeignPtr Word8
value_start Int
value_len) -> do
#else
              , attrF = \(PS _ key_start key_len) (PS _ value_start value_len) -> do
#endif
                 Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
                 MVector s Int
v' <-
                   do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
                      if Int
index forall a. Num a => a -> a -> a
+ Int
5 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
                        then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
                        else do
                          MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.grow MVector s Int
v (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v)
                          forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
                          forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
                 let tag :: Int
tag = Int
0x02
                 do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
5)
                 do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' Int
index Int
tag
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
key_start ForeignPtr Word8
offset0)
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) Int
key_len
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
3) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
value_start ForeignPtr Word8
offset0)
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
4) Int
value_len
              , endOpenF :: ByteString -> ST s ()
endOpenF = \ByteString
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
#if MIN_VERSION_bytestring(0,11,0)
              , textF :: ByteString -> ST s ()
textF = \(BS ForeignPtr Word8
text_start Int
text_len) -> do
#else
              , textF = \(PS _ text_start text_len) -> do
#endif
                 let tag :: Int
tag = Int
0x01
                 Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
                 MVector s Int
v' <-
                   do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
                      if Int
index forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
                        then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
                        else do
                          MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.grow MVector s Int
v (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v)
                          forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
                          forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
                 do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
3)
                 do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' Int
index Int
tag
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
text_start ForeignPtr Word8
offset0)
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) Int
text_len
#if MIN_VERSION_bytestring(0,11,0)
              , closeF :: ByteString -> ST s ()
closeF = \closeTag :: ByteString
closeTag@(BS ForeignPtr Word8
_ Int
_) -> do
#else
              , closeF = \closeTag@(PS s _ _) -> do
#endif
                 MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
                 -- Set the tag_end slot of the parent.
                 Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
                 forall (m :: * -> *). Monad m => m Bool -> m ()
untilM forall a b. (a -> b) -> a -> b
$ do
                   Int
parent <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
parentRef
                   Bool
correctTag <- if Int
parent forall a. Eq a => a -> a -> Bool
== Int
0
                                    then forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True -- no more tags to close!!!
                                    else do
                                      Int
parent_name <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
UMV.read MVector s Int
v (Int
parent forall a. Num a => a -> a -> a
+ Int
2)
                                      Int
parent_len  <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
UMV.read MVector s Int
v (Int
parent forall a. Num a => a -> a -> a
+ Int
3)
#if MIN_VERSION_bytestring(0,11,0)
                                      let openTag :: ByteString
openTag  = ForeignPtr Word8 -> Int -> ByteString
BS (forall a b. ForeignPtr a -> Int -> ForeignPtr b
BS.plusForeignPtr ForeignPtr Word8
offset0 Int
parent_name) Int
parent_len
#else
                                      let openTag  = PS s (parent_name+offset0) parent_len
#endif
                                      forall (m :: * -> *) a. Monad m => a -> m a
return       forall a b. (a -> b) -> a -> b
$ ByteString
openTag forall a. Eq a => a -> a -> Bool
== ByteString
closeTag
                   forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write                  MVector s Int
v (Int
parent forall a. Num a => a -> a -> a
+ Int
4) Int
index
                   -- Pop the stack and return to the parent element.
                   Int
previousParent <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
UMV.read MVector s Int
v (Int
parent forall a. Num a => a -> a -> a
+ Int
1)
                   forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
parentRef Int
previousParent
                   forall (m :: * -> *) a. Monad m => a -> m a
return Bool
correctTag -- continue closing tags, until matching one is found
#if MIN_VERSION_bytestring(0,11,0)
              , cdataF :: ByteString -> ST s ()
cdataF = \(BS ForeignPtr Word8
cdata_start Int
cdata_len) -> do
#else
              , cdataF = \(PS _ cdata_start cdata_len) -> do
#endif
                 let tag :: Int
tag = Int
0x03
                 Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
                 MVector s Int
v' <-
                   do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
                      if Int
index forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
                        then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
                        else do
                          MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.grow MVector s Int
v (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v)
                          forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
                          forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
                 do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
3)
                 do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' Int
index Int
tag
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
cdata_start ForeignPtr Word8
offset0)
                    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.write MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) Int
cdata_len
              } ByteString
str
            MVector s Int
wet <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
            Vector Int
arr <- forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
UV.unsafeFreeze MVector s Int
wet
            Int
size <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
            forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. Unbox a => Int -> Int -> Vector a -> Vector a
UV.unsafeSlice Int
0 Int
size Vector Int
arr))

untilM :: Monad m => m Bool -> m ()
untilM :: forall (m :: * -> *). Monad m => m Bool -> m ()
untilM m Bool
loop = do
  Bool
cond <- m Bool
loop
  case Bool
cond of
    Bool
True  -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
    Bool
False -> forall (m :: * -> *). Monad m => m Bool -> m ()
untilM m Bool
loop

#if MIN_VERSION_bytestring(0,11,0)
minusForeignPtr :: ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr :: forall a b. ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr ForeignPtr a
fpA ForeignPtr b
fpB = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$
  forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr a
fpA forall a b. (a -> b) -> a -> b
$ \Ptr a
ptrA -> forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr b
fpB forall a b. (a -> b) -> a -> b
$ \Ptr b
ptrB ->
    forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. Ptr a -> Ptr b -> Int
minusPtr Ptr a
ptrA Ptr b
ptrB)

distance :: ForeignPtr a -> ForeignPtr b -> Int
distance :: forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance = forall a b. ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr
#else
distance :: Int -> Int -> Int
distance a b = a - b
#endif