{- |
   This module defines an internal (generic) representation for XML
   documents including their DTDs.

   History:
   The original module was derived by hand from the XML specification,
   following the grammar precisely.  Then we simplified the types,
   removing layers of indirection and redundancy, and generally making
   things easier to work with.  Then we allowed PEReferences to be
   ubiquitous, by removing them from the types and resolving all
   PE references at parse-time.  Finally, we added a per-document
   symbol table for GEReferences, and a whitespace-significance flag
   for plaintext.
-}

module Text.XML.HaXml.Types
  (
  -- * A simple symbol table mapping strings (references) to values.
    SymTab
  -- ** Symbol table operations
  , emptyST
  , addST
  , lookupST

  -- * XML Types
  -- ** The top-level document container
  , Document(..)

  -- ** The main document content
  , Element(..)
  , ElemTag(..)
  , Content(..)
  , Attribute
  , AttValue(..)

  -- ** Administrative parts of the document
  , Prolog(..)
  , XMLDecl(..)
  , Misc(..)
  , ProcessingInstruction
  , SDDecl
  , VersionInfo
  , Comment
  , PITarget

  -- ** The DTD
  -- *** content model
  , DocTypeDecl(..)
  , MarkupDecl(..)
  , ExtSubset(..)
  , ExtSubsetDecl(..)
  , ElementDecl(..)
  , ContentSpec(..)
  , CP(..)
  , Modifier(..)
  , Mixed(..)

  -- *** attribute model
  , AttListDecl(..)
  , AttDef(..)
  , AttType(..)
  , TokenizedType(..)
  , EnumeratedType(..)
  , NotationType
  , Enumeration
  , DefaultDecl(..)
  , FIXED(..)

  -- *** conditional sections
  , ConditionalSect(..)
  , IncludeSect
  , IgnoreSect
  , Ignore(..)
  , IgnoreSectContents(..)

  -- ** References
  , Reference(..)
  , EntityRef
  , CharRef
  , PEReference

  -- ** Entities
  , EntityDecl(..)
  , GEDecl(..)
  , PEDecl(..)
  , EntityDef(..)
  , PEDef(..)
  , ExternalID(..)
  , NDataDecl(..)
  , TextDecl(..)
  , ExtParsedEnt(..)
  , ExtPE(..)
  , NotationDecl(..)
  , PublicID(..)
  , EncodingDecl(..)
  , EntityValue(..)
  , EV(..)
  , PubidLiteral(..)
  , SystemLiteral(..)

  -- ** Basic value types
  , Name
  , Names
  , NmToken
  , NmTokens
  , CharData
  , CDSect
  ) where



{- A simple symbol table for storing macros whilst parsing. -}

type SymTab a = [(String,a)]

emptyST :: SymTab a
emptyST  = []

addST :: String -> a -> SymTab a -> SymTab a
addST n v = ((n,v):)

lookupST :: String -> SymTab a -> Maybe a
lookupST = lookup



{- XML types start here -}

-- | The symbol table stored in a document holds all its general entity
--   reference definitions.
data Document = Document Prolog (SymTab EntityDef) Element [Misc]
data Prolog   = Prolog (Maybe XMLDecl) [Misc] (Maybe DocTypeDecl) [Misc]
data XMLDecl  = XMLDecl VersionInfo (Maybe EncodingDecl) (Maybe SDDecl) 
data Misc     = Comment Comment
              | PI ProcessingInstruction
              
type ProcessingInstruction = (PITarget,String)

type SDDecl      = Bool 
type VersionInfo = String 
type Comment     = String 
type PITarget    = String 

data DocTypeDecl = DTD Name (Maybe ExternalID) [MarkupDecl] 
data MarkupDecl  = Element  ElementDecl
                 | AttList  AttListDecl
                 | Entity   EntityDecl
                 | Notation NotationDecl
                 | MarkupMisc Misc

data ExtSubset     = ExtSubset (Maybe TextDecl) [ExtSubsetDecl] 
data ExtSubsetDecl = ExtMarkupDecl MarkupDecl
                   | ExtConditionalSect ConditionalSect

data Element   = Elem Name [Attribute] [Content]
data ElemTag   = ElemTag Name [Attribute]	-- ^ intermediate for parsing
type Attribute = (Name, AttValue)
data Content   = CElem Element
               | CString Bool CharData
			-- ^ bool is whether whitespace is significant
               | CRef Reference
               | CMisc Misc

data ElementDecl = ElementDecl Name ContentSpec
data ContentSpec = EMPTY
                 | ANY
                 | Mixed Mixed
                 | ContentSpec CP
data CP = TagName Name Modifier
        | Choice [CP] Modifier
        | Seq [CP] Modifier 
data Modifier = None  -- ^ Just One
              | Query -- ^ Zero Or One
              | Star  -- ^ Zero Or More
              | Plus  -- ^ One Or More 
data Mixed = PCDATA
           | PCDATAplus [Name] 

data AttListDecl = AttListDecl Name [AttDef]
data AttDef      = AttDef Name AttType DefaultDecl 
data AttType     = StringType
                 | TokenizedType TokenizedType
                 | EnumeratedType EnumeratedType 
data TokenizedType = ID
                   | IDREF
                   | IDREFS
                   | ENTITY
                   | ENTITIES
                   | NMTOKEN
                   | NMTOKENS 
data EnumeratedType = NotationType NotationType
                    | Enumeration Enumeration 
type NotationType   = [Name]	-- nonempty list
type Enumeration    = [NmToken]	-- nonempty list
data DefaultDecl    = REQUIRED
                    | IMPLIED
                    | DefaultTo AttValue (Maybe FIXED) 
data FIXED          = FIXED 

data ConditionalSect = IncludeSect IncludeSect
                     | IgnoreSect IgnoreSect 
type IncludeSect = [ExtSubsetDecl]
type IgnoreSect  = [IgnoreSectContents]
data Ignore      = Ignore
data IgnoreSectContents = IgnoreSectContents Ignore [(IgnoreSectContents,Ignore)] 

data Reference    = RefEntity EntityRef
                  | RefChar CharRef 
                  deriving Eq
type EntityRef    = Name 
type CharRef      = Int
type PEReference  = Name 

data EntityDecl   = EntityGEDecl GEDecl
                  | EntityPEDecl PEDecl 
data GEDecl       = GEDecl Name EntityDef 
data PEDecl       = PEDecl Name PEDef 
data EntityDef    = DefEntityValue EntityValue
                  | DefExternalID ExternalID (Maybe NDataDecl) 
data PEDef        = PEDefEntityValue EntityValue
                  | PEDefExternalID ExternalID 
data ExternalID   = SYSTEM SystemLiteral
                  | PUBLIC PubidLiteral SystemLiteral 
newtype NDataDecl = NDATA Name  

data TextDecl     = TextDecl (Maybe VersionInfo) EncodingDecl 
data ExtParsedEnt = ExtParsedEnt (Maybe TextDecl) Content 
data ExtPE        = ExtPE (Maybe TextDecl) [ExtSubsetDecl]

data NotationDecl    = NOTATION Name (Either ExternalID PublicID) 
newtype PublicID     = PUBLICID PubidLiteral 
newtype EncodingDecl = EncodingDecl String 

type Name     = String		 -- non-empty string
type Names    = [Name]		 -- non-empty list
type NmToken  = String		 -- non-empty string
type NmTokens = [NmToken]	 -- non-empty list

data AttValue    = AttValue [Either String Reference]
                 deriving Eq
data EntityValue = EntityValue [EV] 
data EV = EVString String
 --  -- | EVPERef PEReference
        | EVRef Reference 
newtype PubidLiteral  = PubidLiteral String 
newtype SystemLiteral = SystemLiteral String 
type CharData         = String 
type CDSect           = CharData

instance Eq ElemTag where
    (ElemTag n _) == (ElemTag m _)  = n==m