{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.XML.Light
   Copyright   : Copyright (C) 2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

xml-light, which we used in pandoc's the XML-based readers, has
some limitations: in particular, it produces nodes with String
instead of Text, and the parser falls over on processing instructions
(see #7091).

This module exports much of the API of xml-light, but using Text instead
of String. In addition, the xml-light parsers are replaced by xml-conduit's
well-tested parser.  (The xml-conduit types are mapped to types
isomorphic to xml-light's, to avoid the need for massive code modifications
elsewhere.)  Bridge functions to map xml-light types to this module's
types are also provided (since libraries like texmath still use xml-light).

Another advantage of the xml-conduit parser is that it gives us
detailed information on xml parse errors.

In the future we may want to move to using xml-conduit or another
xml library in the code base, but this change gives us
better performance and accuracy without much change in the
code that used xml-light.
-}
module Text.Pandoc.XML.Light
  ( module Text.Pandoc.XML.Light.Types
  , module Text.Pandoc.XML.Light.Proc
  , module Text.Pandoc.XML.Light.Output
    -- * Replacement for xml-light's Text.XML.Input
  , parseXMLElement
  , parseXMLContents
  ) where

import qualified Control.Exception as E
import qualified Text.XML as Conduit
import Text.XML.Unresolved (InvalidEventStream(..))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Text.Pandoc.XML.Light.Types
import Text.Pandoc.XML.Light.Proc
import Text.Pandoc.XML.Light.Output

-- Drop in replacement for parseXMLDoc in xml-light.
parseXMLElement :: TL.Text -> Either T.Text Element
parseXMLElement :: Text -> Either Text Element
parseXMLElement Text
t =
  Element -> Element
elementToElement (Element -> Element)
-> (Document -> Element) -> Document -> Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
.  Document -> Element
Conduit.documentRoot (Document -> Element)
-> Either Text Document -> Either Text Element
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    (SomeException -> Either Text Document)
-> (Document -> Either Text Document)
-> Either SomeException Document
-> Either Text Document
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Text -> Either Text Document
forall a b. a -> Either a b
Left (Text -> Either Text Document)
-> (SomeException -> Text) -> SomeException -> Either Text Document
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text)
-> (SomeException -> String) -> SomeException -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SomeException -> String
forall e. Exception e => e -> String
E.displayException) Document -> Either Text Document
forall a b. b -> Either a b
Right
    (ParseSettings -> Text -> Either SomeException Document
Conduit.parseText ParseSettings
forall a. Default a => a
Conduit.def{ psRetainNamespaces :: Bool
Conduit.psRetainNamespaces = Bool
True } Text
t)

parseXMLContents :: TL.Text -> Either T.Text [Content]
parseXMLContents :: Text -> Either Text [Content]
parseXMLContents Text
t =
  case ParseSettings -> Text -> Either SomeException Document
Conduit.parseText ParseSettings
forall a. Default a => a
Conduit.def{ psRetainNamespaces :: Bool
Conduit.psRetainNamespaces = Bool
True } Text
t of
    Left SomeException
e ->
      case SomeException -> Maybe InvalidEventStream
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e of
        Just (ContentAfterRoot EventPos
_) ->
          Element -> [Content]
elContent (Element -> [Content])
-> Either Text Element -> Either Text [Content]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Either Text Element
parseXMLElement (Text
"<wrapper>" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"</wrapper>")
        Maybe InvalidEventStream
_ -> Text -> Either Text [Content]
forall a b. a -> Either a b
Left (Text -> Either Text [Content])
-> (SomeException -> Text)
-> SomeException
-> Either Text [Content]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text)
-> (SomeException -> String) -> SomeException -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SomeException -> String
forall e. Exception e => e -> String
E.displayException (SomeException -> Either Text [Content])
-> SomeException -> Either Text [Content]
forall a b. (a -> b) -> a -> b
$ SomeException
e
    Right Document
x -> [Content] -> Either Text [Content]
forall a b. b -> Either a b
Right [Element -> Content
Elem (Element -> Content)
-> (Document -> Element) -> Document -> Content
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Element
elementToElement (Element -> Element)
-> (Document -> Element) -> Document -> Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Document -> Element
Conduit.documentRoot (Document -> Content) -> Document -> Content
forall a b. (a -> b) -> a -> b
$ Document
x]

elementToElement :: Conduit.Element -> Element
elementToElement :: Element -> Element
elementToElement (Conduit.Element Name
name Map Name Text
attribMap [Node]
nodes) =
  QName -> [Attr] -> [Content] -> Maybe Line -> Element
Element (Name -> QName
nameToQname Name
name) [Attr]
attrs ((Node -> Maybe Content) -> [Node] -> [Content]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Node -> Maybe Content
nodeToContent [Node]
nodes) Maybe Line
forall a. Maybe a
Nothing
 where
  attrs :: [Attr]
attrs = ((Name, Text) -> Attr) -> [(Name, Text)] -> [Attr]
forall a b. (a -> b) -> [a] -> [b]
map (\(Name
n,Text
v) -> QName -> Text -> Attr
Attr (Name -> QName
nameToQname Name
n) Text
v) ([(Name, Text)] -> [Attr]) -> [(Name, Text)] -> [Attr]
forall a b. (a -> b) -> a -> b
$
              Map Name Text -> [(Name, Text)]
forall k a. Map k a -> [(k, a)]
M.toList Map Name Text
attribMap
  nameToQname :: Name -> QName
nameToQname (Conduit.Name Text
localName Maybe Text
mbns Maybe Text
mbpref) =
    case Maybe Text
mbpref of
      Maybe Text
Nothing ->
        case Text -> Text -> Maybe Text
T.stripPrefix Text
"xmlns:" Text
localName of
          Just Text
rest -> Text -> Maybe Text -> Maybe Text -> QName
QName Text
rest Maybe Text
mbns (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"xmlns")
          Maybe Text
Nothing   -> Text -> Maybe Text -> Maybe Text -> QName
QName Text
localName Maybe Text
mbns Maybe Text
mbpref
      Maybe Text
_ -> Text -> Maybe Text -> Maybe Text -> QName
QName Text
localName Maybe Text
mbns Maybe Text
mbpref

nodeToContent :: Conduit.Node -> Maybe Content
nodeToContent :: Node -> Maybe Content
nodeToContent (Conduit.NodeElement Element
el) =
  Content -> Maybe Content
forall a. a -> Maybe a
Just (Element -> Content
Elem (Element -> Element
elementToElement Element
el))
nodeToContent (Conduit.NodeContent Text
t) =
  Content -> Maybe Content
forall a. a -> Maybe a
Just (CData -> Content
Text (CDataKind -> Text -> Maybe Line -> CData
CData CDataKind
CDataText Text
t Maybe Line
forall a. Maybe a
Nothing))
nodeToContent Node
_ = Maybe Content
forall a. Maybe a
Nothing