{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.XML.Light
   Copyright   : Copyright (C) 2021-2022 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

xml-light, which we used in pandoc's the XML-based readers, has
some limitations: in particular, it produces nodes with String
instead of Text, and the parser falls over on processing instructions
(see #7091).

This module exports much of the API of xml-light, but using Text instead
of String. In addition, the xml-light parsers are replaced by xml-conduit's
well-tested parser.  (The xml-conduit types are mapped to types
isomorphic to xml-light's, to avoid the need for massive code modifications
elsewhere.)  Bridge functions to map xml-light types to this module's
types are also provided (since libraries like texmath still use xml-light).

Another advantage of the xml-conduit parser is that it gives us
detailed information on xml parse errors.

In the future we may want to move to using xml-conduit or another
xml library in the code base, but this change gives us
better performance and accuracy without much change in the
code that used xml-light.
-}
module Text.Pandoc.XML.Light
  ( module Text.Pandoc.XML.Light.Types
  , module Text.Pandoc.XML.Light.Proc
  , module Text.Pandoc.XML.Light.Output
    -- * Replacement for xml-light's Text.XML.Input
  , parseXMLElement
  , parseXMLContents
    --  * Versions that allow passing in a custom entity table
  , parseXMLElementWithEntities
  , parseXMLContentsWithEntities
  ) where

import qualified Control.Exception as E
import qualified Text.XML as Conduit
import Text.XML.Unresolved (InvalidEventStream(..))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Text.Pandoc.XML.Light.Types
import Text.Pandoc.XML.Light.Proc
import Text.Pandoc.XML.Light.Output
import qualified Data.XML.Types as XML

-- Drop in replacement for parseXMLDoc in xml-light.
parseXMLElement :: TL.Text -> Either T.Text Element
parseXMLElement :: Text -> Either Text Element
parseXMLElement = Map Text Text -> Text -> Either Text Element
parseXMLElementWithEntities forall a. Monoid a => a
mempty

-- Drop in replacement for parseXMLDoc in xml-light.
parseXMLElementWithEntities :: M.Map T.Text T.Text
                            -> TL.Text -> Either T.Text Element
parseXMLElementWithEntities :: Map Text Text -> Text -> Either Text Element
parseXMLElementWithEntities Map Text Text
entityMap Text
t =
  Element -> Element
elementToElement forall b c a. (b -> c) -> (a -> b) -> a -> c
.  Document -> Element
Conduit.documentRoot forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> Either a b
Left forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e. Exception e => e -> String
E.displayException) forall a b. b -> Either a b
Right
    (ParseSettings -> Text -> Either SomeException Document
Conduit.parseText forall a. Default a => a
Conduit.def{ psRetainNamespaces :: Bool
Conduit.psRetainNamespaces = Bool
True
                                  , psDecodeEntities :: DecodeEntities
Conduit.psDecodeEntities = DecodeEntities
decodeEnts } Text
t)
 where
   decodeEnts :: DecodeEntities
decodeEnts Text
ref = case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
ref Map Text Text
entityMap of
                      Maybe Text
Nothing -> DecodeEntities
XML.ContentEntity Text
ref
                      Just Text
t' -> DecodeEntities
XML.ContentText Text
t'

parseXMLContents :: TL.Text -> Either T.Text [Content]
parseXMLContents :: Text -> Either Text [Content]
parseXMLContents = Map Text Text -> Text -> Either Text [Content]
parseXMLContentsWithEntities forall a. Monoid a => a
mempty

parseXMLContentsWithEntities :: M.Map T.Text T.Text
                             -> TL.Text -> Either T.Text [Content]
parseXMLContentsWithEntities :: Map Text Text -> Text -> Either Text [Content]
parseXMLContentsWithEntities Map Text Text
entityMap Text
t =
  case ParseSettings -> Text -> Either SomeException Document
Conduit.parseText forall a. Default a => a
Conduit.def{ psRetainNamespaces :: Bool
Conduit.psRetainNamespaces = Bool
True
                                    , psDecodeEntities :: DecodeEntities
Conduit.psDecodeEntities = DecodeEntities
decodeEnts
                                    } Text
t of
    Left SomeException
e ->
      case forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e of
        Just (ContentAfterRoot EventPos
_) ->
          Element -> [Content]
elContent forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map Text Text -> Text -> Either Text Element
parseXMLElementWithEntities Map Text Text
entityMap
                          (Text
"<wrapper>" forall a. Semigroup a => a -> a -> a
<> Text
t forall a. Semigroup a => a -> a -> a
<> Text
"</wrapper>")
        Maybe InvalidEventStream
_ -> forall a b. a -> Either a b
Left forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e. Exception e => e -> String
E.displayException forall a b. (a -> b) -> a -> b
$ SomeException
e
    Right Document
x -> forall a b. b -> Either a b
Right [Element -> Content
Elem forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Element
elementToElement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Document -> Element
Conduit.documentRoot forall a b. (a -> b) -> a -> b
$ Document
x]
 where
   decodeEnts :: DecodeEntities
decodeEnts Text
ref = case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
ref Map Text Text
entityMap of
                      Maybe Text
Nothing -> DecodeEntities
XML.ContentEntity Text
ref
                      Just Text
t' -> DecodeEntities
XML.ContentText Text
t'

elementToElement :: Conduit.Element -> Element
elementToElement :: Element -> Element
elementToElement (Conduit.Element Name
name Map Name Text
attribMap [Node]
nodes) =
  QName -> [Attr] -> [Content] -> Maybe Line -> Element
Element (Name -> QName
nameToQname Name
name) [Attr]
attrs (forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Node -> Maybe Content
nodeToContent [Node]
nodes) forall a. Maybe a
Nothing
 where
  attrs :: [Attr]
attrs = forall a b. (a -> b) -> [a] -> [b]
map (\(Name
n,Text
v) -> QName -> Text -> Attr
Attr (Name -> QName
nameToQname Name
n) Text
v) forall a b. (a -> b) -> a -> b
$
              forall k a. Map k a -> [(k, a)]
M.toList Map Name Text
attribMap
  nameToQname :: Name -> QName
nameToQname (Conduit.Name Text
localName Maybe Text
mbns Maybe Text
mbpref) =
    case Maybe Text
mbpref of
      Maybe Text
Nothing ->
        case Text -> Text -> Maybe Text
T.stripPrefix Text
"xmlns:" Text
localName of
          Just Text
rest -> Text -> Maybe Text -> Maybe Text -> QName
QName Text
rest Maybe Text
mbns (forall a. a -> Maybe a
Just Text
"xmlns")
          Maybe Text
Nothing   -> Text -> Maybe Text -> Maybe Text -> QName
QName Text
localName Maybe Text
mbns Maybe Text
mbpref
      Maybe Text
_ -> Text -> Maybe Text -> Maybe Text -> QName
QName Text
localName Maybe Text
mbns Maybe Text
mbpref

nodeToContent :: Conduit.Node -> Maybe Content
nodeToContent :: Node -> Maybe Content
nodeToContent (Conduit.NodeElement Element
el) =
  forall a. a -> Maybe a
Just (Element -> Content
Elem (Element -> Element
elementToElement Element
el))
nodeToContent (Conduit.NodeContent Text
t) =
  forall a. a -> Maybe a
Just (CData -> Content
Text (CDataKind -> Text -> Maybe Line -> CData
CData CDataKind
CDataText Text
t forall a. Maybe a
Nothing))
nodeToContent Node
_ = forall a. Maybe a
Nothing