{- | This module defines an internal (generic) representation for XML documents including their DTDs. History: The original module was derived by hand from the XML specification, following the grammar precisely. Then we simplified the types, removing layers of indirection and redundancy, and generally making things easier to work with. Then we allowed PEReferences to be ubiquitous, by removing them from the types and resolving all PE references at parse-time. Finally, we added a per-document symbol table for GEReferences, and a whitespace-significance flag for plaintext. -} module Text.XML.HaXml.Types ( -- * A simple symbol table mapping strings (references) to values. SymTab -- ** Symbol table operations , emptyST , addST , lookupST -- * XML Types -- ** The top-level document container , Document(..) -- ** The main document content , Element(..) , ElemTag(..) , Content(..) , Attribute , AttValue(..) , info -- ** Administrative parts of the document , Prolog(..) , XMLDecl(..) , Misc(..) , ProcessingInstruction , SDDecl , VersionInfo , Comment , PITarget -- ** The DTD -- *** content model , DocTypeDecl(..) , MarkupDecl(..) , ExtSubset(..) , ExtSubsetDecl(..) , ElementDecl(..) , ContentSpec(..) , CP(..) , Modifier(..) , Mixed(..) -- *** attribute model , AttListDecl(..) , AttDef(..) , AttType(..) , TokenizedType(..) , EnumeratedType(..) , NotationType , Enumeration , DefaultDecl(..) , FIXED(..) -- *** conditional sections , ConditionalSect(..) , IncludeSect , IgnoreSect , Ignore(..) , IgnoreSectContents(..) -- ** References , Reference(..) , EntityRef , CharRef , PEReference -- ** Entities , EntityDecl(..) , GEDecl(..) , PEDecl(..) , EntityDef(..) , PEDef(..) , ExternalID(..) , NDataDecl(..) , TextDecl(..) , ExtParsedEnt(..) , ExtPE(..) , NotationDecl(..) , PublicID(..) , EncodingDecl(..) , EntityValue(..) , EV(..) , PubidLiteral(..) , SystemLiteral(..) -- ** Basic value types , Name , Names , NmToken , NmTokens , CharData , CDSect ) where {- A simple symbol table for storing macros whilst parsing. -} type SymTab a = [(String,a)] emptyST :: SymTab a emptyST = [] addST :: String -> a -> SymTab a -> SymTab a addST n v = ((n,v):) lookupST :: String -> SymTab a -> Maybe a lookupST = lookup {- XML types start here -} -- | The symbol table stored in a document holds all its general entity -- reference definitions. data Document i = Document Prolog (SymTab EntityDef) (Element i) [Misc] deriving Eq data Prolog = Prolog (Maybe XMLDecl) [Misc] (Maybe DocTypeDecl) [Misc] deriving Eq data XMLDecl = XMLDecl VersionInfo (Maybe EncodingDecl) (Maybe SDDecl) deriving Eq data Misc = Comment Comment | PI ProcessingInstruction deriving Eq type ProcessingInstruction = (PITarget,String) type SDDecl = Bool type VersionInfo = String type Comment = String type PITarget = String data DocTypeDecl = DTD Name (Maybe ExternalID) [MarkupDecl] deriving Eq data MarkupDecl = Element ElementDecl | AttList AttListDecl | Entity EntityDecl | Notation NotationDecl | MarkupMisc Misc deriving Eq data ExtSubset = ExtSubset (Maybe TextDecl) [ExtSubsetDecl] deriving Eq data ExtSubsetDecl = ExtMarkupDecl MarkupDecl | ExtConditionalSect ConditionalSect deriving Eq data Element i = Elem Name [Attribute] [Content i] deriving Eq -- intermediate for parsing data ElemTag = ElemTag Name [Attribute] type Attribute = (Name, AttValue) data Content i = CElem (Element i) i | CString Bool CharData i -- ^ bool is whether whitespace is significant | CRef Reference i | CMisc Misc i deriving Eq info :: Content t -> t info (CElem _ i) = i info (CString _ _ i) = i info (CRef _ i) = i info (CMisc _ i) = i instance Functor Document where fmap f (Document p st e ms) = Document p st (fmap f e) ms instance Functor Element where fmap f (Elem t as cs) = Elem t as (map (fmap f) cs) instance Functor Content where fmap f (CElem e i) = CElem (fmap f e) (f i) fmap f (CString b s i) = CString b s (f i) fmap f (CRef r i) = CRef r (f i) fmap f (CMisc m i) = CMisc m (f i) data ElementDecl = ElementDecl Name ContentSpec deriving Eq data ContentSpec = EMPTY | ANY | Mixed Mixed | ContentSpec CP deriving Eq -- FIXME: What is TagName here? Seems to be in disagreement with XML spec. data CP = TagName Name Modifier | Choice [CP] Modifier | Seq [CP] Modifier deriving Eq data Modifier = None -- ^ Just One | Query -- ^ Zero Or One | Star -- ^ Zero Or More | Plus -- ^ One Or More deriving Eq data Mixed = PCDATA | PCDATAplus [Name] deriving Eq data AttListDecl = AttListDecl Name [AttDef] deriving Eq data AttDef = AttDef Name AttType DefaultDecl deriving Eq data AttType = StringType | TokenizedType TokenizedType | EnumeratedType EnumeratedType deriving Eq data TokenizedType = ID | IDREF | IDREFS | ENTITY | ENTITIES | NMTOKEN | NMTOKENS deriving Eq data EnumeratedType = NotationType NotationType | Enumeration Enumeration deriving Eq type NotationType = [Name] -- nonempty list type Enumeration = [NmToken] -- nonempty list data DefaultDecl = REQUIRED | IMPLIED | DefaultTo AttValue (Maybe FIXED) deriving Eq data FIXED = FIXED deriving Eq data ConditionalSect = IncludeSect IncludeSect | IgnoreSect IgnoreSect deriving Eq type IncludeSect = [ExtSubsetDecl] type IgnoreSect = [IgnoreSectContents] data Ignore = Ignore deriving Eq data IgnoreSectContents = IgnoreSectContents Ignore [(IgnoreSectContents,Ignore)] deriving Eq data Reference = RefEntity EntityRef | RefChar CharRef deriving (Eq,Show) type EntityRef = Name type CharRef = Int type PEReference = Name data EntityDecl = EntityGEDecl GEDecl | EntityPEDecl PEDecl deriving Eq data GEDecl = GEDecl Name EntityDef deriving Eq data PEDecl = PEDecl Name PEDef deriving Eq data EntityDef = DefEntityValue EntityValue | DefExternalID ExternalID (Maybe NDataDecl) deriving Eq data PEDef = PEDefEntityValue EntityValue | PEDefExternalID ExternalID deriving (Eq,Show) data ExternalID = SYSTEM SystemLiteral | PUBLIC PubidLiteral SystemLiteral deriving (Eq,Show) newtype NDataDecl = NDATA Name deriving Eq data TextDecl = TextDecl (Maybe VersionInfo) EncodingDecl deriving Eq data ExtParsedEnt i = ExtParsedEnt (Maybe TextDecl) (Content i) deriving Eq data ExtPE = ExtPE (Maybe TextDecl) [ExtSubsetDecl] deriving Eq data NotationDecl = NOTATION Name (Either ExternalID PublicID) deriving Eq newtype PublicID = PUBLICID PubidLiteral deriving Eq newtype EncodingDecl = EncodingDecl String deriving Eq type Name = String -- non-empty string type Names = [Name] -- non-empty list type NmToken = String -- non-empty string type NmTokens = [NmToken] -- non-empty list data AttValue = AttValue [Either String Reference] deriving Eq instance Show AttValue where show (AttValue v) = concatMap decode v where decode (Left w) = w decode (Right (RefEntity ent)) = "&"++ent++";" decode (Right (RefChar cref)) = "&"++show cref++";" data EntityValue = EntityValue [EV] deriving (Eq,Show) data EV = EVString String -- -- | EVPERef PEReference | EVRef Reference deriving (Eq,Show) newtype PubidLiteral = PubidLiteral String deriving (Eq,Show) newtype SystemLiteral = SystemLiteral String deriving (Eq,Show) type CharData = String type CDSect = CharData instance Eq ElemTag where (ElemTag n _) == (ElemTag m _) = n==m