{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Symantic.XML.Document ( module Symantic.XML.Document , TS.Tree(..) , TS.Trees , TS.tree0 ) where import Control.Applicative (Alternative(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), all) import Data.Function (($), (.), id) import Data.Functor (Functor(..), (<$>)) import Data.Hashable (Hashable(..)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..), fromMaybe) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq) import Data.String (String, IsString(..)) import GHC.Generics (Generic) import Prelude ((+), error) import System.IO (FilePath) import Text.Show (Show(..), showsPrec, showChar, showParen, showString) import qualified Data.Char.Properties.XMLCharProps as XC import qualified Data.HashMap.Strict as HM import qualified Data.HashSet as HS import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Data.TreeSeq.Strict as TS -- * Type 'XML' type XML src = TS.Tree (Sourced src Node) type XMLs src = Seq (XML src) -- | Unify two 'XMLs', merging border 'NodeText's if any. union :: Semigroup (Sourced src EscapedText) => XMLs src -> XMLs src -> XMLs src union x y = case (Seq.viewr x, Seq.viewl y) of (xs Seq.:> x0, y0 Seq.:< ys) -> case (x0,y0) of ( Tree0 (Sourced sx (NodeText tx)) , Tree0 (Sourced sy (NodeText ty)) ) -> xs `union` Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x unions :: Semigroup (Sourced src EscapedText) => Foldable f => f (XMLs src) -> XMLs src unions = foldl' union mempty pattern Tree0 :: a -> TS.Tree a pattern Tree0 a <- TS.Tree a (null -> True) where Tree0 a = TS.Tree a Seq.empty -- ** Type 'Node' data Node = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children. | NodeAttr QName -- ^ Node with a 'NodeText' child. | NodePI PName TL.Text -- ^ Leaf (except for @@ which has 'NodeAttr's. | NodeText EscapedText -- ^ Leaf. | NodeComment TL.Text -- ^ Leaf. | NodeCDATA TL.Text -- ^ Leaf. deriving (Eq, Ord, Show) -- ** Type 'EscapedText' newtype EscapedText = EscapedText (Seq Escaped) deriving (Eq, Ord, Show) escapeText :: TL.Text -> EscapedText escapeText s = EscapedText $ case TL.span (`List.notElem` ("<>&'\""::String)) s of (t, r) | TL.null t -> escape r | otherwise -> EscapedPlain t Seq.<| escape r where escape t = case TL.uncons t of Nothing -> mempty Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs escapeChar :: Char -> Escaped escapeChar c = case c of '<' -> EscapedEntityRef entityRef_lt '>' -> EscapedEntityRef entityRef_gt '&' -> EscapedEntityRef entityRef_amp '\'' -> EscapedEntityRef entityRef_apos '"' -> EscapedEntityRef entityRef_quot _ -> EscapedPlain $ TL.singleton c unescapeText :: EscapedText -> TL.Text unescapeText (EscapedText et) = (`foldMap` et) $ \case EscapedPlain t -> t EscapedEntityRef EntityRef{..} -> entityRef_value EscapedCharRef (CharRef c) -> TL.singleton c instance Semigroup EscapedText where EscapedText x <> EscapedText y = case (x,y) of (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) -> (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr _ -> EscapedText $ x <> y instance Monoid EscapedText where mempty = EscapedText mempty mappend = (<>) -- *** Type 'Escaped' -- | 'EscapedText' lexemes. data Escaped = EscapedPlain TL.Text | EscapedEntityRef EntityRef | EscapedCharRef CharRef deriving (Eq, Ord, Show) -- *** Type 'EntityRef' data EntityRef = EntityRef { entityRef_name :: NCName , entityRef_value :: TL.Text } deriving (Eq, Ord, Show) entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef entityRef_lt = EntityRef (NCName "lt") "<" entityRef_gt = EntityRef (NCName "gt") ">" entityRef_amp = EntityRef (NCName "amp") "&" entityRef_quot = EntityRef (NCName "quot") "\"" entityRef_apos = EntityRef (NCName "apos") "'" -- *** Type 'CharRef' newtype CharRef = CharRef Char deriving (Eq, Ord, Show) -- ** Type 'Name' newtype Name = Name { unName :: TL.Text } deriving (Eq, Ord, Hashable) instance Show Name where showsPrec _p = showString . TL.unpack . unName instance IsString Name where fromString s | c:cs <- s , XC.isXmlNameStartChar c && all XC.isXmlNameChar cs = Name (TL.pack s) | otherwise = error $ "Invalid XML Name: "<>show s -- ** Type 'Namespace' newtype Namespace = Namespace { unNamespace :: TL.Text } deriving (Eq, Ord, Show, Hashable) instance IsString Namespace where fromString s = if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s then Namespace (fromString s) else error $ "Invalid XML Namespace: "<>show s xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace" xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/" xmlns_empty = Namespace "" -- * Type 'Namespaces' data Namespaces prefix = Namespaces { namespaces_prefixes :: (HM.HashMap Namespace prefix) , namespaces_default :: Namespace } deriving (Show) instance Default (Namespaces NCName) where def = Namespaces { namespaces_prefixes = HM.fromList [ (xmlns_xml , "xml") , (xmlns_xmlns, "xmlns") ] , namespaces_default = "" } instance Default (Namespaces (Maybe NCName)) where def = Namespaces { namespaces_prefixes = HM.fromList [ (xmlns_xml , Just "xml") , (xmlns_xmlns, Just "xmlns") ] , namespaces_default = "" } instance Semigroup (Namespaces NCName) where x <> y = Namespaces { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y , namespaces_default = namespaces_default x } instance Semigroup (Namespaces (Maybe NCName)) where x <> y = Namespaces { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y) , namespaces_default = namespaces_default x } instance Monoid (Namespaces NCName) where mempty = def mappend = (<>) instance Monoid (Namespaces (Maybe NCName)) where mempty = def mappend = (<>) prefixifyQName :: Namespaces NCName -> QName -> PName prefixifyQName Namespaces{..} QName{..} = PName { pNameSpace = if qNameSpace == namespaces_default then Nothing else HM.lookup qNameSpace namespaces_prefixes , pNameLocal = qNameLocal } -- ** Type 'NCName' -- | Non-colonized name. newtype NCName = NCName { unNCName :: TL.Text } deriving (Eq, Ord, Hashable) instance Show NCName where showsPrec _p = showString . TL.unpack . unNCName instance IsString NCName where fromString s = fromMaybe (error $ "Invalid XML NCName: "<>show s) $ ncName (TL.pack s) ncName :: TL.Text -> Maybe NCName ncName t = case TL.uncons t of Just (c, cs) | XC.isXmlNCNameStartChar c , TL.all XC.isXmlNCNameChar cs -> Just (NCName t) _ -> Nothing poolNCNames :: [NCName] poolNCNames = [ NCName $ TL.pack ("ns"<>show i) | i <- [1 :: Int ..] ] freshNCName :: HS.HashSet NCName -> NCName freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps freshifyNCName :: HS.HashSet NCName -> NCName -> NCName freshifyNCName ns (NCName n) = let ints = [1..] :: [Int] in List.head [ fresh | suffix <- mempty : (show <$> ints) , fresh <- [ NCName $ n <> TL.pack suffix] , not $ fresh `HS.member` ns ] -- ** Type 'PName' -- | Prefixed name. data PName = PName { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml" , pNameLocal :: NCName -- ^ eg. "stylesheet" } deriving (Eq, Ord, Generic) instance Show PName where showsPrec p PName{pNameSpace=Nothing, ..} = showsPrec p pNameLocal showsPrec _p PName{pNameSpace=Just p, ..} = showsPrec 10 p . showChar ':' . showsPrec 10 pNameLocal instance IsString PName where fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error. fromString s = case List.break (== ':') s of (_, "") -> PName Nothing $ fromString s (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local) instance Hashable PName pName :: NCName -> PName pName = PName Nothing {-# INLINE pName #-} -- ** Type 'QName' -- | Qualified name. data QName = QName { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform" , qNameLocal :: NCName -- ^ eg. "stylesheet" } deriving (Eq, Ord, Generic) instance Show QName where showsPrec _p QName{..} = (if TL.null $ unNamespace qNameSpace then id else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}' ) . showsPrec 10 qNameLocal instance IsString QName where fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error. fromString full@('{':rest) = case List.break (== '}') rest of (_, "") -> error $ "Invalid XML Clark notation: "<>show full (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local fromString local = QName "" $ fromString local instance Hashable QName qName :: NCName -> QName qName = QName (Namespace "") {-# INLINE qName #-} -- * Type 'Sourced' data Sourced src a = Sourced { source :: src , unSourced :: a } deriving (Eq, Ord, Functor) instance (Show src, Show a) => Show (Sourced src a) where showsPrec p Sourced{..} = showParen (p > 10) $ showsPrec 11 unSourced . showString " @" . showsPrec 10 source instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where (<>) (Sourced rx@(FileRange xf xb xe :| xs) x) (Sourced (FileRange yf yb ye :| _ys) y) | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y | otherwise = Sourced rx (x<>y) {- instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where (<>) (Sourced rx@(FileRange xf xb xe :| xs) x) (Sourced (FileRange yf yb ye :| _ys) y) | xf == yf = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y | otherwise = Sourced rx (x<>y) where l = lineNum yb - lineNum xe c = colNum yb - colNum (if l <= 0 then xe else xb) -- ** Class 'FromPad' class FromPad a where fromPad :: LineColumn -> a instance FromPad T.Text where fromPad LineColumn{..} = T.replicate lineNum "\n" <> T.replicate colNum " " instance FromPad TL.Text where fromPad LineColumn{..} = TL.replicate (fromIntegral lineNum) "\n" <> TL.replicate (fromIntegral colNum) " " instance FromPad EscapedText where fromPad = EscapedText . pure . fromPad instance FromPad Escaped where fromPad = EscapedPlain . fromPad -} -- ** Class 'NoSource' class NoSource src where noSource :: src instance Default pos => NoSource (FileSource pos) where noSource = noSource :| [] instance Default pos => NoSource (FileRange pos) where noSource = FileRange "" def def instance NoSource Offset where noSource = Offset def {- instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where mempty = sourced0 mempty mappend = (<>) -} notSourced :: NoSource src => a -> Sourced src a notSourced = Sourced noSource -- * Type 'FileSource' type FileSource pos = NonEmpty (FileRange pos) -- ** Type 'FileSourced' type FileSourced = Sourced (FileSource Offset) -- ** Type 'FileRange' data FileRange pos = FileRange { fileRange_file :: FilePath , fileRange_begin :: pos , fileRange_end :: pos } deriving (Eq, Ord) instance Default (FileRange Offset) where def = FileRange "" def def instance Default (FileRange LineColumn) where def = FileRange "" def def instance Show (FileRange Offset) where showsPrec _p FileRange{..} = showString fileRange_file . showChar '@' . showsPrec 10 fileRange_begin . showChar '-' . showsPrec 10 fileRange_end instance Show (FileRange LineColumn) where showsPrec _p FileRange{..} = showString fileRange_file . showChar '#' . showsPrec 10 fileRange_begin . showChar '-' . showsPrec 10 fileRange_end -- *** Type 'Offset' newtype Offset = Offset Int deriving (Eq, Ord) instance Show Offset where showsPrec p (Offset o) = showsPrec p o instance Default Offset where def = Offset 0 instance Semigroup Offset where Offset x <> Offset y = Offset (x+y) instance Monoid Offset where mempty = def mappend = (<>) -- *** Type 'LineColumn' -- | Absolute text file position. data LineColumn = LineColumn { lineNum :: {-# UNPACK #-} Offset , colNum :: {-# UNPACK #-} Offset } deriving (Eq, Ord) instance Default LineColumn where def = LineColumn def def instance Show LineColumn where showsPrec _p LineColumn{..} = showsPrec 11 lineNum . showChar ':' . showsPrec 11 colNum filePos1 :: LineColumn filePos1 = def