{-# language BangPatterns #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language NamedFieldPuns #-}

module Xml
  ( Node(..)
  , Content(..)
  , Attribute(..)
  , decode
  ) where

import Data.Word (Word8)
import Data.Bytes (Bytes)
import Data.Text.Short (ShortText)
import Data.Primitive (SmallArray)
import Data.Bytes.Parser (Parser)
import GHC.Exts (Char(C#),Char#)
import Data.Chunks (Chunks)
import Data.Builder.ST (Builder)

import qualified Data.Chunks as Chunks
import qualified Data.Text.Short as TS
import qualified Data.Text.Short.Unsafe as TS
import qualified Data.Builder.ST as Builder
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Parser as Parser
import qualified Data.Bytes.Parser.Rebindable as R
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.Bytes.Parser.Unsafe as Unsafe
import qualified Data.Bytes.Parser.Utf8 as Utf8

data Node
  = Text !ShortText
  | Element {-# UNPACK #-} !Content
  deriving (Int -> Node -> ShowS
[Node] -> ShowS
Node -> String
(Int -> Node -> ShowS)
-> (Node -> String) -> ([Node] -> ShowS) -> Show Node
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Node] -> ShowS
$cshowList :: [Node] -> ShowS
show :: Node -> String
$cshow :: Node -> String
showsPrec :: Int -> Node -> ShowS
$cshowsPrec :: Int -> Node -> ShowS
Show,Node -> Node -> Bool
(Node -> Node -> Bool) -> (Node -> Node -> Bool) -> Eq Node
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Node -> Node -> Bool
$c/= :: Node -> Node -> Bool
== :: Node -> Node -> Bool
$c== :: Node -> Node -> Bool
Eq)

data Content = Content
  { Content -> ShortText
tag :: !ShortText
  , Content -> SmallArray Attribute
attributes :: !(SmallArray Attribute)
  , Content -> SmallArray Node
children :: !(SmallArray Node)
  } deriving (Int -> Content -> ShowS
[Content] -> ShowS
Content -> String
(Int -> Content -> ShowS)
-> (Content -> String) -> ([Content] -> ShowS) -> Show Content
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Content] -> ShowS
$cshowList :: [Content] -> ShowS
show :: Content -> String
$cshow :: Content -> String
showsPrec :: Int -> Content -> ShowS
$cshowsPrec :: Int -> Content -> ShowS
Show,Content -> Content -> Bool
(Content -> Content -> Bool)
-> (Content -> Content -> Bool) -> Eq Content
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Content -> Content -> Bool
$c/= :: Content -> Content -> Bool
== :: Content -> Content -> Bool
$c== :: Content -> Content -> Bool
Eq)

data Attribute = Attribute
  { Attribute -> ShortText
name :: !ShortText
  , Attribute -> ShortText
value :: !ShortText
  } deriving (Int -> Attribute -> ShowS
[Attribute] -> ShowS
Attribute -> String
(Int -> Attribute -> ShowS)
-> (Attribute -> String)
-> ([Attribute] -> ShowS)
-> Show Attribute
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Attribute] -> ShowS
$cshowList :: [Attribute] -> ShowS
show :: Attribute -> String
$cshow :: Attribute -> String
showsPrec :: Int -> Attribute -> ShowS
$cshowsPrec :: Int -> Attribute -> ShowS
Show,Attribute -> Attribute -> Bool
(Attribute -> Attribute -> Bool)
-> (Attribute -> Attribute -> Bool) -> Eq Attribute
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Attribute -> Attribute -> Bool
$c/= :: Attribute -> Attribute -> Bool
== :: Attribute -> Attribute -> Bool
$c== :: Attribute -> Attribute -> Bool
Eq)

decode :: Bytes -> Maybe Node
decode :: Bytes -> Maybe Node
decode !Bytes
b = (forall s. Parser () s Node) -> Bytes -> Maybe Node
forall e a. (forall s. Parser e s a) -> Bytes -> Maybe a
Parser.parseBytesMaybe forall s. Parser () s Node
elementNodeParser Bytes
b

elementNodeParser :: Parser () s Node
elementNodeParser :: Parser () s Node
elementNodeParser = do
  () -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'<'
  Bytes
btag <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Bool -> Bool
not (Word8 -> Bool
isXmlSpace Word8
w) Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x3E Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x2F)
  case Bytes -> Int
Bytes.length Bytes
btag of
    Int
0 -> () -> Parser () s ()
forall e s a. e -> Parser e s a
Parser.fail ()
    Int
_ -> () -> Parser () s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
  ShortText
tag <- case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
btag) of
    Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
    Just ShortText
ttag -> ShortText -> Parser () s ShortText
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
ttag
  -- Note that parserAttributes consumes leading and trailing whitespace.
  Chunks Attribute
attrs <- Builder s Attribute -> Parser () s (Chunks Attribute)
forall s. Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes (Builder s Attribute -> Parser () s (Chunks Attribute))
-> Parser () s (Builder s Attribute)
-> Parser () s (Chunks Attribute)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ST s (Builder s Attribute) -> Parser () s (Builder s Attribute)
forall s a e. ST s a -> Parser e s a
Parser.effect ST s (Builder s Attribute)
forall s a. ST s (Builder s a)
Builder.new
  let !attributes :: SmallArray Attribute
attributes = Chunks Attribute -> SmallArray Attribute
forall a. Chunks a -> SmallArray a
Chunks.concat Chunks Attribute
attrs
  () -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char -> (Char -> Parser () s Node) -> Parser () s Node
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Char
'>' -> do
      Chunks Node
nodes <- ShortText -> Parser () s (Chunks Node)
forall s. ShortText -> Parser () s (Chunks Node)
childrenParser ShortText
tag
      Node -> Parser () s Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Content -> Node
Element Content :: ShortText -> SmallArray Attribute -> SmallArray Node -> Content
Content{ShortText
tag :: ShortText
tag :: ShortText
tag,SmallArray Attribute
attributes :: SmallArray Attribute
attributes :: SmallArray Attribute
attributes,children :: SmallArray Node
children=Chunks Node -> SmallArray Node
forall a. Chunks a -> SmallArray a
Chunks.concat Chunks Node
nodes})
    Char
'/' -> do
      () -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'>'
      Node -> Parser () s Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Content -> Node
Element Content :: ShortText -> SmallArray Attribute -> SmallArray Node -> Content
Content{ShortText
tag :: ShortText
tag :: ShortText
tag,SmallArray Attribute
attributes :: SmallArray Attribute
attributes :: SmallArray Attribute
attributes,children :: SmallArray Node
children=SmallArray Node
forall a. Monoid a => a
mempty})
    Char
_ -> () -> Parser () s Node
forall e s a. e -> Parser e s a
Parser.fail ()
      
textNodeParser :: Parser () s Node
textNodeParser :: Parser () s Node
textNodeParser = do
  Bytes
raw <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x3C)
  case (Word8 -> Bool) -> Bytes -> Bool
Bytes.any (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x7F Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x26) Bytes
raw of
    Bool
True -> () -> Parser () s Node
forall e s a. e -> Parser e s a
Parser.fail () -- TODO: escape or check UTF-8 encoding here instead
    Bool
False -> Node -> Parser () s Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortText -> Node
Text (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
raw)))

-- This eats the closing tag as well.
childrenParser ::
     ShortText -- opening tag name, looking for a closing tag that matches
  -> Parser () s (Chunks Node)
childrenParser :: ShortText -> Parser () s (Chunks Node)
childrenParser !ShortText
tag = do
  Builder s Node
b0 <- ST s (Builder s Node) -> Parser () s (Builder s Node)
forall s a e. ST s a -> Parser e s a
Parser.effect ST s (Builder s Node)
forall s a. ST s (Builder s a)
Builder.new
  ShortText -> Builder s Node -> Parser () s (Chunks Node)
forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop ShortText
tag Builder s Node
b0

childrenParserLoop ::
     ShortText -- opening tag name, looking for a closing tag that matches
  -> Builder s Node
  -> Parser () s (Chunks Node)
childrenParserLoop :: ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop !ShortText
tag !Builder s Node
b0 = () -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char
-> (Char -> Parser () s (Chunks Node)) -> Parser () s (Chunks Node)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Char
'<' -> () -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char
-> (Char -> Parser () s (Chunks Node)) -> Parser () s (Chunks Node)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Char
'/' -> do
      () -> ShortText -> Parser () s ()
forall e s. e -> ShortText -> Parser e s ()
Utf8.shortText () ShortText
tag
      (Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
      () -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'>'
      ST s (Chunks Node) -> Parser () s (Chunks Node)
forall s a e. ST s a -> Parser e s a
Parser.effect (Builder s Node -> ST s (Chunks Node)
forall s a. Builder s a -> ST s (Chunks a)
Builder.freeze Builder s Node
b0)
    Char
_ -> do
      Int -> Parser () s ()
forall e s. Int -> Parser e s ()
Unsafe.unconsume Int
2
      Node
node <- Parser () s Node
forall s. Parser () s Node
elementNodeParser
      Builder s Node
b1 <- ST s (Builder s Node) -> Parser () s (Builder s Node)
forall s a e. ST s a -> Parser e s a
Parser.effect (Node -> Builder s Node -> ST s (Builder s Node)
forall a s. a -> Builder s a -> ST s (Builder s a)
Builder.push Node
node Builder s Node
b0)
      ShortText -> Builder s Node -> Parser () s (Chunks Node)
forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop ShortText
tag Builder s Node
b1
  Char
_ -> do
    Int -> Parser () s ()
forall e s. Int -> Parser e s ()
Unsafe.unconsume Int
1
    Node
node <- Parser () s Node
forall s. Parser () s Node
textNodeParser
    Builder s Node
b1 <- ST s (Builder s Node) -> Parser () s (Builder s Node)
forall s a e. ST s a -> Parser e s a
Parser.effect (Node -> Builder s Node -> ST s (Builder s Node)
forall a s. a -> Builder s a -> ST s (Builder s a)
Builder.push Node
node Builder s Node
b0)
    ShortText -> Builder s Node -> Parser () s (Chunks Node)
forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop ShortText
tag Builder s Node
b1

isXmlSpace :: Word8 -> Bool
isXmlSpace :: Word8 -> Bool
isXmlSpace = \case
  Word8
0x20 -> Bool
True
  Word8
0x09 -> Bool
True
  Word8
0x0D -> Bool
True
  Word8
0x0A -> Bool
True
  Word8
_ -> Bool
False

parserAttributes :: Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes :: Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes !Builder s Attribute
b0 = do
  (Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
  Parser () s Bool
forall s. Parser () s Bool
peekIsNameStartChar Parser () s Bool
-> (Bool -> Parser () s (Chunks Attribute))
-> Parser () s (Chunks Attribute)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Bool
True -> do
      Attribute
attr <- Parser () s Attribute
forall s. Parser () s Attribute
parserAttribute
      Builder s Attribute
b1 <- ST s (Builder s Attribute) -> Parser () s (Builder s Attribute)
forall s a e. ST s a -> Parser e s a
Parser.effect (Attribute -> Builder s Attribute -> ST s (Builder s Attribute)
forall a s. a -> Builder s a -> ST s (Builder s a)
Builder.push Attribute
attr Builder s Attribute
b0)
      Builder s Attribute -> Parser () s (Chunks Attribute)
forall s. Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes Builder s Attribute
b1
    Bool
False -> do
      (Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
      ST s (Chunks Attribute) -> Parser () s (Chunks Attribute)
forall s a e. ST s a -> Parser e s a
Parser.effect (Builder s Attribute -> ST s (Chunks Attribute)
forall s a. Builder s a -> ST s (Chunks a)
Builder.freeze Builder s Attribute
b0)

-- From the spec, we have:
--   Attribute ::= Name Eq AttValue
--   Eq        ::= S? '=' S?
--   Name      ::= NameStartChar (NameChar)*
--
-- Precondition A: The first character is a NameStartChar. This parser
-- does not check this.
parserAttribute :: Parser () s Attribute
parserAttribute :: Parser () s Attribute
parserAttribute = do
  Bytes
bname <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Bool -> Bool
not (Word8 -> Bool
isXmlSpace Word8
w) Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x3D)
  -- We may assume that length of bname is at least one because of
  -- precondition A.
  !ShortText
name <- case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
bname) of
    Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
    Just ShortText
tname -> ShortText -> Parser () s ShortText
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
tname
  (Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
  () -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'='
  (Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
  !ShortText
value <- Parser () s ShortText
forall s. Parser () s ShortText
parserAttributeValue
  Attribute -> Parser () s Attribute
forall (f :: * -> *) a. Applicative f => a -> f a
pure Attribute :: ShortText -> ShortText -> Attribute
Attribute{ShortText
name :: ShortText
name :: ShortText
name,ShortText
value :: ShortText
value :: ShortText
value}

-- TODO: This is woefully incomplete
parserAttributeValue :: Parser () s ShortText
parserAttributeValue :: Parser () s ShortText
parserAttributeValue = do
  () -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char
-> (Char -> Parser () s ShortText) -> Parser () s ShortText
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Char
'"' -> do
      Bytes
bval <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x22)
      () -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'"'
      case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
bval) of
        Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
        Just ShortText
tval -> ShortText -> Parser () s ShortText
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
tval
    Char
'\'' -> do
      Bytes
bval <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x27)
      () -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'\''
      case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
bval) of
        Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
        Just ShortText
tval -> ShortText -> Parser () s ShortText
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
tval
    Char
_ -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
  
peekIsNameStartChar :: Parser () s Bool
peekIsNameStartChar :: Parser () s Bool
peekIsNameStartChar =
  Parser () s Int
forall e s. Parser e s Int
Unsafe.cursor Parser () s Int -> (Int -> Parser () s Bool) -> Parser () s Bool
forall e s a b.
Bind 'LiftedRep 'LiftedRep =>
Parser e s a -> (a -> Parser e s b) -> Parser e s b
R.>>= \Int
pos ->
  () -> Parser () s Char#
forall e s. e -> Parser e s Char#
Utf8.any# () Parser () s Char#
-> (Char# -> Parser () s Bool) -> Parser () s Bool
forall e s a b.
Bind 'LiftedRep 'LiftedRep =>
Parser e s a -> (a -> Parser e s b) -> Parser e s b
R.>>= \Char#
c -> 
  Int -> Parser () s ()
forall e s. Int -> Parser e s ()
Unsafe.jump Int
pos Parser () s () -> (() -> Parser () s Bool) -> Parser () s Bool
forall e s a b.
Bind 'LiftedRep 'LiftedRep =>
Parser e s a -> (a -> Parser e s b) -> Parser e s b
R.>>= \()
_ ->
  Bool -> Parser () s Bool
forall e s a. Pure 'LiftedRep => a -> Parser e s a
R.pure (Char# -> Bool
isNameStartChar Char#
c)
  
isNameStartChar :: Char# -> Bool
isNameStartChar :: Char# -> Bool
isNameStartChar Char#
c = case Char# -> Char
C# Char#
c of
  Char
':' -> Bool
True
  Char
'_' -> Bool
True
  Char
_ | Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' -> Bool
True
  Char
_ | Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z' -> Bool
True
  Char
_ -> Bool
False