---------------------------------------------------------------------------- -- -- Module : HXML.DTD -- Copyright : (C) 2000-2002 Joe English. Freely redistributable. -- License : "MIT-style" -- -- Author : Joe English -- Stability : provisional -- Portability : portable -- -- CVS : $Id: DTD.hs,v 1.6 2002/10/12 01:58:57 joe Exp $ -- ---------------------------------------------------------------------------- -- -- | Data types for SGML and XML document type definitions. -- This is based on the SGML property set found in the DSSSL spec, -- section 9.6. -- -- History: -- [23 Jan 2000], taken from earlier work, [4 Jan 1997] -- module DTD where import XML import qualified AssocList as FM type GI = Name -- generic identifier (element type name) type DCN = Name -- data content notation name -- | Content expression, parameterized over type of primitive tokens data CE a = Prim a -- ^ Primitive content token | Rep (CE a) -- ^ Zero or more, '*' occurrence indicator | Opt (CE a) -- ^ Optional, '?' occurrence indicator | Plus (CE a) -- ^ One or more, '+' occurrence indicator | Seq [(CE a)] -- ^ Sequence, ',' connector | Or [(CE a)] -- ^ Alternation, '|' connector | And [(CE a)] -- ^ Permutation, '&' connector deriving Eq data PrimitiveToken = PCDATA -- ^ Parsed character data | ELEMENT GI -- ^ Element deriving Eq type ModelGroup = CE PrimitiveToken data CONTYPE = -- (element) content type DC_EMPTY -- ^ declared content (also: CDATA, RCDATA in SGML) | DC_ANY -- ^ "ANY" | DC_MODELGRP ModelGroup deriving Show data ELEMTYPE = ELEMTYPE { -- element type definition gi :: GI, -- ^ generic identifier contype :: CONTYPE, -- ^ content type omissibility:: (Bool,Bool), -- ^ omitstrt+omitend inclusions :: [GI], exclusions :: [GI] } deriving Show -- Missing: attdefs, srmap(nm); all from DTGABS data ATT_TYPE = -- (dcltype/decl value type) ATcdata | ATentity | ATentities | ATid | ATidref | ATidrefs | ATnmtoken -- %%% or name/number/nutoken | ATnmtokens -- %%% or names/numbers/nutokens | ATnotation [DCN] -- List of notation names | ATenumerated [Name] -- nmtkgrp / name token group deriving Show data ATT_DV = -- attribute default value (dflttype/default value type) ADVfixed String -- "#FIXED ..." | ADVrequired -- "#REQUIRED" | ADVimplied -- "#IMPLIED" | ADVdefault String -- "..." -- SGML only: | ADVcurrent -- "#CURRENT" | ADVconref -- "#CONREF" deriving Show data ATTDEF = ATTDEF { -- attribute definition att_name :: Name, att_type :: ATT_TYPE, att_dv :: ATT_DV } deriving Show type ATTSPEC = (Name,String) -- (attasgn/attribute assignment) -- -- Entities: -- type ExternalID = (Maybe PUBID, Maybe SYSID) type PUBID = String type SYSID = String data ENTTYPE = -- entity type ETtext -- SGML text entity | ETcdata | ETsdata | ETndata | ETsubdoc | ETpi -- processing instruction entity data EntityText = EN_INTERNAL String -- entity.text/replacement text | EN_EXTERNAL ExternalID -- entity.extid/external identifier deriving Show data Entity = Entity { ename :: Name, -- name etype :: ENTTYPE, -- enttype/entity type etext :: EntityText, -- see above edcn :: Maybe DCN, -- notname/notation name eatts :: [ATTSPEC] -- atts/attributes } type EntityMap = FM.FM Name EntityText predefinedEntities :: EntityMap predefinedEntities = foldr (uncurry FM.insert) FM.empty predefinedGEs where (==>) = \a b -> (a,EN_INTERNAL b) predefinedGEs = [ "lt" ==> "<", "amp" ==> "&", "gt" ==> ">", "apos" ==> "'", "quot" ==> "\"" ] -- -- Utility routine, used by scanner: -- expandInternalEntity :: EntityMap -> Name -> Maybe String expandInternalEntity entities name = case FM.lookupM entities name of Just (EN_INTERNAL text) -> Just text _ -> Nothing -- -- DTDS: -- data DTD = DTD { elements :: FM.FM Name ELEMTYPE, -- elemtps / element types attlists :: FM.FM Name [ATTDEF], -- elemtype.attdefs genents :: FM.FM Name EntityText, -- general entities parments :: FM.FM Name EntityText, -- parameter entities notations:: [DCN], -- nots/notations dtdname :: Name -- name (document type name) } deriving Show emptyDTD :: DTD emptyDTD = DTD { elements = FM.empty, attlists = FM.empty, genents = predefinedEntities, parments = FM.empty, dtdname = "", notations= [] } declareParameterEntity,declareGeneralEntity :: Name -> EntityText -> DTD -> DTD declareParameterEntity name entityText dtd = dtd { parments = FM.insertWith keepOld name entityText (parments dtd) } where keepOld old _new = old declareGeneralEntity name entityText dtd = dtd { genents = FM.insertWith keepOld name entityText (genents dtd) } where keepOld old _new = old -- %%% DEAL WITH DUPLICATE DEFINITIONS HERE: declareElements :: [GI] -> (Bool,Bool) -> CONTYPE -> ([GI],[GI]) -> DTD -> DTD declareElements elementNames omissibility contentDefinition (incl,excl) dtd = dtd { elements = foldl mkElement (elements dtd) elementNames } where mkElement fm gi = FM.insert gi el fm where el = ELEMTYPE { gi = gi, contype = contentDefinition, omissibility = omissibility, inclusions = incl, exclusions = excl } -- %%% DEAL WITH DUPLICATES: declareAttlist :: [GI] -> [ATTDEF] -> DTD -> DTD declareAttlist elementNames attdefs dtd = dtd { attlists = foldl addAttdefs (attlists dtd) elementNames } where addAttdefs fm gi = FM.insert gi attdefs fm declareNotation :: DCN -> ExternalID -> DTD -> DTD declareNotation dcn _unused dtd = dtd { notations = dcn : notations dtd } -- Need srmaps::Dict[SRASSOC]+usemaps::Dict{-GI-}srmap(nm)|elemtype.srmap(nm) -- notation: name, extid, attdefs instance Show PrimitiveToken where showsPrec _ PCDATA = showString "#PCDATA" showsPrec _ (ELEMENT gi) = showString gi instance (Show prim) => Show (CE prim) where showsPrec _ mg = pp mg where pp (Prim p) = shows p pp (Rep x) = shows x . showString "*" pp (Opt x) = shows x . showString "?" pp (Plus x) = shows x . showString "+" pp (Seq x) = showgroup ", " x pp (Or x) = showgroup " | " x pp (And x) = showgroup " & " x showgroup delim l = showString "(" . showl l . showString ")" where showl [x] = shows x showl (x:xs) = shows x . showString delim . showl xs showl [] = showString "-- ERROR: empty model group! --" -- EOF --