{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Symantic.XML.Document
( module Symantic.XML.Document
, TS.Tree(..)
, TS.Trees
, TS.tree0
) where
import Control.Applicative (Alternative(..))
import Data.Bool
import Data.Char (Char)
import Data.Default.Class (Default(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), all)
import Data.Function (($), (.), id)
import Data.Functor (Functor(..), (<$>))
import Data.Hashable (Hashable(..))
import Data.Int (Int)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..), fromMaybe)
import Data.Monoid (Monoid(..))
import Data.Ord (Ord(..))
import Data.Semigroup (Semigroup(..))
import Data.Sequence (Seq)
import Data.String (String, IsString(..))
import GHC.Generics (Generic)
import Prelude ((+), error)
import System.IO (FilePath)
import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
import qualified Data.Char.Properties.XMLCharProps as XC
import qualified Data.HashMap.Strict as HM
import qualified Data.HashSet as HS
import qualified Data.List as List
import qualified Data.Sequence as Seq
import qualified Data.Text.Lazy as TL
import qualified Data.TreeSeq.Strict as TS
type XML src = TS.Tree (Sourced src Node)
type XMLs src = Seq (XML src)
union :: Semigroup (Sourced src EscapedText) => XMLs src -> XMLs src -> XMLs src
union x y =
case (Seq.viewr x, Seq.viewl y) of
(xs Seq.:> x0, y0 Seq.:< ys) ->
case (x0,y0) of
( Tree0 (Sourced sx (NodeText tx))
, Tree0 (Sourced sy (NodeText ty)) ) ->
xs `union`
Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
ys
_ -> x <> y
(Seq.EmptyR, _) -> y
(_, Seq.EmptyL) -> x
unions ::
Semigroup (Sourced src EscapedText) =>
Foldable f => f (XMLs src) -> XMLs src
unions = foldl' union mempty
pattern Tree0 :: a -> TS.Tree a
pattern Tree0 a <- TS.Tree a (null -> True)
where Tree0 a = TS.Tree a Seq.empty
data Node
= NodeElem QName
| NodeAttr QName
| NodePI PName TL.Text
| NodeText EscapedText
| NodeComment TL.Text
| NodeCDATA TL.Text
deriving (Eq, Ord, Show)
newtype EscapedText = EscapedText (Seq Escaped)
deriving (Eq, Ord, Show)
escapeText :: TL.Text -> EscapedText
escapeText s =
EscapedText $
case TL.span (`List.notElem` ("<>&'\""::String)) s of
(t, r) | TL.null t -> escape r
| otherwise -> EscapedPlain t Seq.<| escape r
where
escape t = case TL.uncons t of
Nothing -> mempty
Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
escapeChar :: Char -> Escaped
escapeChar c =
case c of
'<' -> EscapedEntityRef entityRef_lt
'>' -> EscapedEntityRef entityRef_gt
'&' -> EscapedEntityRef entityRef_amp
'\'' -> EscapedEntityRef entityRef_apos
'"' -> EscapedEntityRef entityRef_quot
_ -> EscapedPlain $ TL.singleton c
unescapeText :: EscapedText -> TL.Text
unescapeText (EscapedText et) = (`foldMap` et) $ \case
EscapedPlain t -> t
EscapedEntityRef EntityRef{..} -> entityRef_value
EscapedCharRef (CharRef c) -> TL.singleton c
instance Semigroup EscapedText where
EscapedText x <> EscapedText y =
case (x,y) of
(xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
(EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
_ -> EscapedText $ x <> y
instance Monoid EscapedText where
mempty = EscapedText mempty
mappend = (<>)
data Escaped
= EscapedPlain TL.Text
| EscapedEntityRef EntityRef
| EscapedCharRef CharRef
deriving (Eq, Ord, Show)
data EntityRef = EntityRef
{ entityRef_name :: NCName
, entityRef_value :: TL.Text
} deriving (Eq, Ord, Show)
entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
entityRef_lt = EntityRef (NCName "lt") "<"
entityRef_gt = EntityRef (NCName "gt") ">"
entityRef_amp = EntityRef (NCName "amp") "&"
entityRef_quot = EntityRef (NCName "quot") "\""
entityRef_apos = EntityRef (NCName "apos") "'"
newtype CharRef = CharRef Char
deriving (Eq, Ord, Show)
newtype Name = Name { unName :: TL.Text }
deriving (Eq, Ord, Hashable)
instance Show Name where
showsPrec _p = showString . TL.unpack . unName
instance IsString Name where
fromString s
| c:cs <- s
, XC.isXmlNameStartChar c
&& all XC.isXmlNameChar cs
= Name (TL.pack s)
| otherwise = error $ "Invalid XML Name: "<>show s
newtype Namespace = Namespace { unNamespace :: TL.Text }
deriving (Eq, Ord, Show, Hashable)
instance IsString Namespace where
fromString s =
if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
then Namespace (fromString s)
else error $ "Invalid XML Namespace: "<>show s
xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
xmlns_empty = Namespace ""
data Namespaces prefix = Namespaces
{ namespaces_prefixes :: (HM.HashMap Namespace prefix)
, namespaces_default :: Namespace
} deriving (Show)
instance Default (Namespaces NCName) where
def = Namespaces
{ namespaces_prefixes = HM.fromList
[ (xmlns_xml , "xml")
, (xmlns_xmlns, "xmlns")
]
, namespaces_default = ""
}
instance Default (Namespaces (Maybe NCName)) where
def = Namespaces
{ namespaces_prefixes = HM.fromList
[ (xmlns_xml , Just "xml")
, (xmlns_xmlns, Just "xmlns")
]
, namespaces_default = ""
}
instance Semigroup (Namespaces NCName) where
x <> y = Namespaces
{ namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
, namespaces_default = namespaces_default x
}
instance Semigroup (Namespaces (Maybe NCName)) where
x <> y = Namespaces
{ namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
, namespaces_default = namespaces_default x
}
instance Monoid (Namespaces NCName) where
mempty = def
mappend = (<>)
instance Monoid (Namespaces (Maybe NCName)) where
mempty = def
mappend = (<>)
prefixifyQName :: Namespaces NCName -> QName -> PName
prefixifyQName Namespaces{..} QName{..} =
PName
{ pNameSpace =
if qNameSpace == namespaces_default
then Nothing
else HM.lookup qNameSpace namespaces_prefixes
, pNameLocal = qNameLocal
}
newtype NCName = NCName { unNCName :: TL.Text }
deriving (Eq, Ord, Hashable)
instance Show NCName where
showsPrec _p = showString . TL.unpack . unNCName
instance IsString NCName where
fromString s =
fromMaybe (error $ "Invalid XML NCName: "<>show s) $
ncName (TL.pack s)
ncName :: TL.Text -> Maybe NCName
ncName t =
case TL.uncons t of
Just (c, cs)
| XC.isXmlNCNameStartChar c
, TL.all XC.isXmlNCNameChar cs
-> Just (NCName t)
_ -> Nothing
poolNCNames :: [NCName]
poolNCNames =
[ NCName $ TL.pack ("ns"<>show i)
| i <- [1 :: Int ..]
]
freshNCName :: HS.HashSet NCName -> NCName
freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
freshifyNCName ns (NCName n) =
let ints = [1..] :: [Int] in
List.head
[ fresh
| suffix <- mempty : (show <$> ints)
, fresh <- [ NCName $ n <> TL.pack suffix]
, not $ fresh `HS.member` ns
]
data PName = PName
{ pNameSpace :: (Maybe NCName)
, pNameLocal :: NCName
} deriving (Eq, Ord, Generic)
instance Show PName where
showsPrec p PName{pNameSpace=Nothing, ..} =
showsPrec p pNameLocal
showsPrec _p PName{pNameSpace=Just p, ..} =
showsPrec 10 p .
showChar ':' .
showsPrec 10 pNameLocal
instance IsString PName where
fromString "" = PName Nothing ""
fromString s =
case List.break (== ':') s of
(_, "") -> PName Nothing $ fromString s
(p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
instance Hashable PName
pName :: NCName -> PName
pName = PName Nothing
{-# INLINE pName #-}
data QName = QName
{ qNameSpace :: Namespace
, qNameLocal :: NCName
} deriving (Eq, Ord, Generic)
instance Show QName where
showsPrec _p QName{..} =
(if TL.null $ unNamespace qNameSpace then id
else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
) . showsPrec 10 qNameLocal
instance IsString QName where
fromString "" = QName "" ""
fromString full@('{':rest) =
case List.break (== '}') rest of
(_, "") -> error $ "Invalid XML Clark notation: "<>show full
(ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
fromString local = QName "" $ fromString local
instance Hashable QName
qName :: NCName -> QName
qName = QName (Namespace "")
{-# INLINE qName #-}
data Sourced src a
= Sourced
{ source :: src
, unSourced :: a
} deriving (Eq, Ord, Functor)
instance (Show src, Show a) => Show (Sourced src a) where
showsPrec p Sourced{..} =
showParen (p > 10) $
showsPrec 11 unSourced .
showString " @" . showsPrec 10 source
instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
(<>)
(Sourced rx@(FileRange xf xb xe :| xs) x)
(Sourced (FileRange yf yb ye :| _ys) y)
| xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
| otherwise = Sourced rx (x<>y)
class NoSource src where
noSource :: src
instance Default pos => NoSource (FileSource pos) where
noSource = noSource :| []
instance Default pos => NoSource (FileRange pos) where
noSource = FileRange "" def def
instance NoSource Offset where
noSource = Offset def
notSourced :: NoSource src => a -> Sourced src a
notSourced = Sourced noSource
type FileSource pos = NonEmpty (FileRange pos)
type FileSourced = Sourced (FileSource Offset)
data FileRange pos
= FileRange
{ fileRange_file :: FilePath
, fileRange_begin :: pos
, fileRange_end :: pos
} deriving (Eq, Ord)
instance Default (FileRange Offset) where
def = FileRange "" def def
instance Default (FileRange LineColumn) where
def = FileRange "" def def
instance Show (FileRange Offset) where
showsPrec _p FileRange{..} =
showString fileRange_file .
showChar '@' . showsPrec 10 fileRange_begin .
showChar '-' . showsPrec 10 fileRange_end
instance Show (FileRange LineColumn) where
showsPrec _p FileRange{..} =
showString fileRange_file .
showChar '#' . showsPrec 10 fileRange_begin .
showChar '-' . showsPrec 10 fileRange_end
newtype Offset = Offset Int
deriving (Eq, Ord)
instance Show Offset where
showsPrec p (Offset o) = showsPrec p o
instance Default Offset where
def = Offset 0
instance Semigroup Offset where
Offset x <> Offset y = Offset (x+y)
instance Monoid Offset where
mempty = def
mappend = (<>)
data LineColumn = LineColumn
{ lineNum :: {-# UNPACK #-} Offset
, colNum :: {-# UNPACK #-} Offset
} deriving (Eq, Ord)
instance Default LineColumn where
def = LineColumn def def
instance Show LineColumn where
showsPrec _p LineColumn{..} =
showsPrec 11 lineNum .
showChar ':' .
showsPrec 11 colNum
filePos1 :: LineColumn
filePos1 = def