-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.GeneralEntitySubstitution Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable general entity substitution -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.GeneralEntitySubstitution ( processGeneralEntities ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlEntityValueAsAttrValue , parseXmlEntityValueAsContent ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ data GEContext = ReferenceInContent | ReferenceInAttributeValue | ReferenceInEntityValue -- or OccursInAttributeValue -- not used during substitution but during validation -- or ReferenceInDTD -- not used: syntax check detects errors type GESubstArrow = GEContext -> RecList -> GEArrow XmlTree XmlTree type GEArrow b c = IOStateArrow GEEnv b c type RecList = [String] -- ------------------------------------------------------------ newtype GEEnv = GEEnv (M.Map String GESubstArrow) emptyGeEnv :: GEEnv emptyGeEnv = GEEnv M.empty lookupGeEnv :: String -> GEEnv -> Maybe GESubstArrow lookupGeEnv k (GEEnv env) = M.lookup k env addGeEntry :: String -> GESubstArrow -> GEEnv -> GEEnv addGeEntry k a (GEEnv env) = GEEnv $ M.insert k a env -- ------------------------------------------------------------ -- | -- substitution of general entities -- -- input: a complete document tree including root node processGeneralEntities :: IOStateArrow s XmlTree XmlTree processGeneralEntities = ( traceMsg 1 "processGeneralEntities: collect and substitute general entities" >>> withOtherUserState emptyGeEnv (processChildren (processGeneralEntity ReferenceInContent [])) >>> setDocumentStatusFromSystemState "in general entity processing" >>> traceTree >>> traceSource ) `when` documentStatusOk processGeneralEntity :: GESubstArrow processGeneralEntity context recl = choiceA [ isElem :-> ( processAttrl (processChildren substEntitiesInAttrValue) >>> processChildren (processGeneralEntity context recl) ) , isEntityRef :-> substEntityRef , isDTDDoctype :-> processChildren (processGeneralEntity context recl) , isDTDEntity :-> addEntityDecl , isDTDAttlist :-> substEntitiesInAttrDefaultValue , this :-> this ] where addEntityDecl :: GEArrow XmlTree XmlTree addEntityDecl = perform ( choiceA [ isIntern :-> addInternalEntity -- don't change sequence of cases , isExtern :-> addExternalEntity , isUnparsed :-> addUnparsedEntity ] ) where isIntern = none `when` hasDTDAttr k_system isExtern = none `when` hasDTDAttr k_ndata isUnparsed = this addInternalEntity :: GEArrow XmlTree b addInternalEntity = insertInternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: general entity definition for " ++) . show) ) &&& xshow (getChildren >>> isText) ) where insertInternal entity contents = insertEntity (substInternal contents) entity >>> none addExternalEntity :: GEArrow XmlTree b addExternalEntity = insertExternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: external entity definition for " ++) . show) ) &&& getDTDAttrValue a_url -- the absolute URL, not the relative in attr: k_system ) where insertExternal entity uri = insertEntity (substExternalParsed1Time uri) entity >>> none addUnparsedEntity :: GEArrow XmlTree b addUnparsedEntity = getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: unparsed entity definition for " ++) . show) >>> applyA (arr (insertEntity substUnparsed)) >>> none insertEntity :: (String -> GESubstArrow) -> String -> GEArrow b b insertEntity fct entity = ( getUserState >>> applyA (arr checkDefined) ) `guards` addEntity fct entity where checkDefined geEnv = maybe ok alreadyDefined . lookupGeEnv entity $ geEnv where ok = this alreadyDefined _ = issueWarn ("entity " ++ show entity ++ " already defined, repeated definition ignored") >>> none addEntity :: (String -> GESubstArrow) -> String -> GEArrow b b addEntity fct entity = changeUserState ins where ins _ geEnv = addGeEntry entity (fct entity) geEnv substEntitiesInAttrDefaultValue :: GEArrow XmlTree XmlTree substEntitiesInAttrDefaultValue = applyA ( xshow ( getDTDAttrValue a_default -- parse the default value >>> -- substitute entities mkText -- and convert value into a string >>> parseXmlEntityValueAsAttrValue "default value of attribute" >>> filterErrorMsg >>> substEntitiesInAttrValue ) >>> arr (setDTDAttrValue a_default) ) `when` hasDTDAttr a_default substEntitiesInAttrValue :: GEArrow XmlTree XmlTree substEntitiesInAttrValue = ( processGeneralEntity ReferenceInAttributeValue recl `when` isEntityRef ) >>> changeText normalizeWhiteSpace >>> transfCharRef where normalizeWhiteSpace = map ( \c -> if c `elem` "\n\t\r" then ' ' else c ) substEntityRef :: GEArrow XmlTree XmlTree substEntityRef = applyA ( ( ( getEntityRef -- get the entity name and the env >>> -- and compute the arrow to be applied traceValue 2 (("processGeneralEntity: entity reference for entity " ++) . show) >>> traceMsg 3 ("recursion list = " ++ show recl) ) &&& getUserState ) >>> arr2 substA ) where substA :: String -> GEEnv -> GEArrow XmlTree XmlTree substA entity geEnv = maybe entityNotFound entityFound . lookupGeEnv entity $ geEnv where errMsg msg = issueErr msg entityNotFound = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, no definition found, (forward reference?)") entityFound fct | entity `elem` recl = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, cyclic definition") | otherwise = fct context recl substExternalParsed1Time :: String -> String -> GESubstArrow substExternalParsed1Time uri entity cx rl = perform ( traceMsg 2 ("substExternalParsed1Time: read and parse external parsed entity " ++ show entity) >>> runInLocalURIContext ( root [sattr a_source uri] [] -- uri must be an absolute uri >>> -- abs uri is computed during parameter entity handling getXmlEntityContents >>> processExternalEntityContents ) >>> applyA ( arr $ \ s -> addEntity (substExternalParsed s) entity ) ) >>> processGeneralEntity cx rl where processExternalEntityContents :: IOStateArrow s XmlTree String processExternalEntityContents = ( ( ( documentStatusOk -- reading entity succeeded >>> -- with content stored in a text node (getChildren >>> isText) ) `guards` this ) `orElse` issueErr ("illegal value for external parsed entity " ++ show entity) ) >>> xshow (getChildren >>> isText) substExternalParsed :: String -> String -> GESubstArrow substExternalParsed s entity ReferenceInContent rl = includedIfValidating s rl entity substExternalParsed _ entity ReferenceInAttributeValue _ = forbidden entity "external parsed general" "in attribute value" substExternalParsed _ _ ReferenceInEntityValue _ = bypassed substInternal :: String -> String -> GESubstArrow substInternal s entity ReferenceInContent rl = included s rl entity substInternal s entity ReferenceInAttributeValue rl = includedInLiteral s rl entity substInternal _ _ ReferenceInEntityValue _ = bypassed substUnparsed :: String -> GESubstArrow substUnparsed entity ReferenceInContent _ = forbidden entity "unparsed" "content" substUnparsed entity ReferenceInAttributeValue _ = forbidden entity "unparsed" "attribute value" substUnparsed entity ReferenceInEntityValue _ = forbidden entity "unparsed" "entity value" -- XML 1.0 chapter 4.4.2 included :: String -> RecList -> String -> GEArrow XmlTree XmlTree included s rl entity = traceMsg 3 ("substituting general entity " ++ show entity ++ " with value " ++ show s) >>> txt s >>> parseXmlEntityValueAsContent ("substituting general entity " ++ show entity ++ " in contents") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.3 includedIfValidating :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedIfValidating = included -- XML 1.0 chapter 4.4.4 forbidden :: String -> String -> String -> GEArrow XmlTree XmlTree forbidden entity msg cx = issueErr ("reference of " ++ msg ++ show entity ++ " forbidden in " ++ cx) -- XML 1.0 chapter 4.4.5 includedInLiteral :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedInLiteral s rl entity = txt s >>> parseXmlEntityValueAsAttrValue ("substituting general entity " ++ show entity ++ " in attribute value") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.7 bypassed :: GEArrow XmlTree XmlTree bypassed = this -- ------------------------------------------------------------