{- Copyright (C) Stilo International plc, 2012 This file contains the implementation of concrete-relaxng-parser, a parser driven by a standard RELAX NG schema with concrete syntax extensions. The concrete-relaxng-parser project is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The concrete-relaxng-parser project is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with concrete-relaxng-parser. If not, see . -} module Text.XML.HXT.RelaxNG.Parser where import Prelude hiding (any, null) import Control.Monad (liftM2) import Control.Exception (assert) import Data.Char (isSpace) import Data.Foldable (any) import Data.List (find, groupBy, inits, isInfixOf, isPrefixOf, minimumBy, sortBy, stripPrefix) import Data.Maybe (fromJust, isNothing, mapMaybe, maybe) import Data.Ord (comparing) import Data.Set (Set, empty, null, singleton, union) import Debug.Trace (trace) import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.DOM.ShowXml (xshow) import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.RelaxNG (contains, Uri, LocalName, ParamList, Prefix, Context, Datatype, NameClass(..)) import Text.XML.HXT.RelaxNG.DataTypeLibraries (datatypeAllows, datatypeEqual) data Pattern = Empty | NotAllowed XmlTrees | Text | Choice !Pattern !Pattern | Interleave Int !Pattern !Pattern | Group !Pattern !Pattern | OneOrMore !Pattern | List !Pattern | Data Datatype ParamList | DataExcept Datatype ParamList !Pattern | Value Datatype String Context | Attribute NameClass !Pattern | Element NameClass Pattern | Region Bool QName !Pattern | Result [ResultDelta] !Pattern deriving Show data ResultDelta = StartTag XNode | Add XmlTree | EndTag | Penalty | Inferred ResultDelta deriving (Eq, Show) type RecoveryLimits = (Int, [QName]) pretty :: String -> Int -> RecoveryLimits -> Pattern -> String pretty _ _ _ Empty = "empty" pretty _ _ _ (NotAllowed ts) = "notAllowed " ++ show ts pretty _ _ _ Text = "text" pretty indent 0 rl (Choice p1 p2) = pretty indent 0 rl p1 ++ "\n" ++ indent ++ "| " ++ pretty indent 0 rl p2 pretty indent prec rl p@Choice{} = "(" ++ pretty (' ':indent) 0 rl p ++ ")" pretty indent prec rl (Interleave balance p1 p2) | prec < 3 = "(" ++ pretty indent 2 rl p1 ++ " " ++ replicate (-balance) '<' ++ "&" ++ replicate balance '>' ++ " " ++ pretty indent 2 rl p2 ++ ")" pretty indent prec rl p@Interleave{} = "(" ++ pretty (' ':indent) 0 rl p ++ ")" pretty indent prec rl (Group p1 p2) | prec < 2 = pretty (' ':indent) 1 rl p1 ++ "\n" ++ indent ++ ", " ++ pretty (' ':indent) 1 rl p2 pretty indent prec rl p@Group{} = "(" ++ pretty (' ':indent) 0 rl p ++ ")" pretty indent prec rl (OneOrMore p) = "(" ++ pretty (' ':indent) 0 rl p ++ ")+" pretty indent prec rl (Element nc@(Name uri local) p) = "element " ++ show nc ++ maybe "" (\rl'-> " {\n " ++ indent ++ pretty (' ':indent) 0 rl' p ++ "}") (recoveryAllowed (mkNsName local uri) rl) pretty _ _ rl (Element nc p) = "element " ++ show nc pretty indent prec rl (Region inf qn p) = "region " ++ show qn ++ " {\n " ++ indent ++ pretty (' ':indent) 0 rl p ++ "}" pretty indent prec rl (Result r p) = "(result " ++ shows r ("\n " ++ indent ++ pretty (' ':indent) 0 rl p ++ ")") pretty _ _ rl p = show p noRecovery :: RecoveryLimits noRecovery = (0, []) omissibleNamespacePrefix :: String omissibleNamespacePrefix = "omissible+" terminalNamespace :: String terminalNamespace ="http://en.wikipedia.org/wiki/Terminal_symbol" syntacticNamespace :: String syntacticNamespace ="http://en.wikipedia.org/wiki/Nonterminal_symbol" noContext :: Context noContext = ("", []) recoveryAllowed :: QName -> RecoveryLimits -> Maybe RecoveryLimits recoveryAllowed _ (0, _) = Nothing recoveryAllowed qn (rl, stk) | elem qn stk = Nothing | omissibleNamespacePrefix `isPrefixOf` namespaceUri qn = Just (pred rl, qn:stk) | namespaceUri qn `elem` [terminalNamespace, syntacticNamespace] = Just (pred rl, stk) | otherwise = Nothing startSet :: RecoveryLimits -> Pattern -> Set String startSet rl (Group p1 p2) = (if nullable rl p1 then startSet rl p2 else empty) `union` startSet rl p1 startSet rl (Interleave balance p1 p2) = startSet rl p1 `union` startSet rl p2 startSet rl (Choice p1 p2) = startSet rl p1 `union` startSet rl p2 startSet rl (OneOrMore p) = startSet rl p startSet rl (Element (Name uri local) p) = maybe empty (flip startSet p) (recoveryAllowed (mkNsName local uri) rl) startSet rl (Element (NameClassChoice nc1 nc2) p) = startSet rl (Element nc1 p) `union` startSet rl (Element nc2 p) startSet rl Element{} = empty startSet _ (Attribute _ _) = empty startSet _ (List _) = empty startSet _ (Value _ s _) = singleton s startSet _ (Data _ _) = empty startSet _ (DataExcept _ _ _) = empty startSet _ NotAllowed{} = empty startSet _ Empty = empty startSet _ Text = empty startSet rl (Region _ _ p) = startSet rl p startSet rl (Result _ p) = startSet rl p nullable :: RecoveryLimits -> Pattern -> Bool nullable rl (Group p1 p2) = nullable rl p1 && nullable rl p2 nullable rl (Interleave balance p1 p2) = balance == 0 && nullable rl p1 && nullable rl p2 nullable rl (Choice p1 p2) = nullable rl p1 || nullable rl p2 nullable rl (OneOrMore p) = nullable rl p nullable rl (Element (Name uri local) p) = maybe False (\rl'-> nullable rl' p) (recoveryAllowed qn rl) where qn = mkNsName local uri nullable rl (Element (NameClassChoice n1 n2) p) = nullable rl (Element n1 p) || nullable rl (Element n2 p) nullable _ (Element _ _) = False nullable _ (Attribute _ _) = False nullable _ (List _) = False nullable _ (Value _ _ _) = False nullable _ (Data _ _) = False nullable _ (DataExcept _ _ _) = False nullable _ NotAllowed{} = False nullable _ Empty = True nullable _ Text = True nullable _ (Region False _ _) = False nullable rl (Region True _ p) = nullable rl p nullable rl (Result _ p) = nullable rl p inferred :: ResultDelta -> Bool inferred (Inferred _) = True inferred _ = False nullableResults:: RecoveryLimits -> Pattern -> [[ResultDelta]] nullableResults rl (Group p1 p2) = case nullableResults rl p2 of [] -> [] l -> assert (all (==[]) l) (nullableResults rl p1) nullableResults rl (Interleave 0 p1 p2) = liftM2 (++) (nullableResults rl p1) (nullableResults rl p2) nullableResults rl Interleave{} = [] nullableResults rl (Choice p1 p2) = nullableResults rl p1 ++ nullableResults rl p2 nullableResults rl (OneOrMore p) = nullableResults rl p nullableResults rl (Element (Name uri local) p) = maybe [] (\rl'-> map enclose $ nullableResults rl' p) (recoveryAllowed qn rl) where enclose r = Inferred (StartTag (XN.mkElementNode qn [])) : (r ++ [Inferred EndTag]) qn = mkNsName local uri nullableResults rl (Element (NameClassChoice n1 n2) p) = nullableResults rl (Element n1 p) ++ nullableResults rl (Element n2 p) nullableResults _ (Element _ _) = [] nullableResults _ (Attribute _ _) = [] nullableResults _ (List _) = [] nullableResults _ (Value _ _ _) = [] nullableResults _ (Data _ _) = [] nullableResults _ (DataExcept _ _ _) = [] nullableResults _ NotAllowed{} = [] nullableResults _ (Region False _ _) = [] nullableResults rl (Region True q p) = map (++ [Inferred EndTag]) (nullableResults rl p) nullableResults _ Empty = [[]] nullableResults _ Text = [[]] nullableResults rl (Result r p) = map (r ++) (nullableResults rl p) prepend :: [ResultDelta] -> ([ResultDelta], String) -> ([ResultDelta], String) prepend r1 (r2, l) = (r1 ++ r2, l) findSplit :: (String -> Bool) -> String -> (String, String) findSplit p s = splitAt (findTailIndex p 0 s) s findTailIndex :: (String -> Bool) -> Int -> String -> Int findTailIndex _ n [] = n findTailIndex pred n l@(h:t) = if pred l then n else findTailIndex pred (succ n) t findSuffix :: (String -> Bool) -> (String -> String) -> String -> (String, String) findSuffix pred f [] = (f [], []) findSuffix pred f l@(h:t) = if pred l then (f [], l) else findSuffix pred (f . (h:)) t upto :: Set String -> String -> (String, String) upto limits s = findSplit startsWithAnyLimit s where startsWithAnyLimit tail = any (\limit-> limit `isPrefixOf` tail || tail `isPrefixOf` limit) limits splits :: String -> [(String, String)] splits [] = [([], [])] splits l@(head : tail) = ([], l) : map (\(init, tail)-> (head:init, tail)) (splits tail) bestNullableResult :: RecoveryLimits -> Pattern -> Maybe [ResultDelta] bestNullableResult rl p = case nullableResults rl p of [] -> Nothing r -> Just (minimumBy (comparing (semanticLength 0)) r) semanticLength :: Int -> [ResultDelta] -> Int semanticLength s (StartTag node : tail) | XN.getNamespaceUri node == Just syntacticNamespace = semanticLength s tail - 1 semanticLength s (StartTag node : tail) | XN.getNamespaceUri node == Just terminalNamespace = semanticLength s (dropContent 0 tail) where dropContent 0 (EndTag : rest) = rest dropContent n (EndTag : rest) = dropContent (pred n) rest dropContent n (StartTag{} : rest) = dropContent (succ n) rest dropContent n (Inferred d : rest) = dropContent n (d:rest) dropContent n (_ : rest) = dropContent n rest semanticLength s (Inferred d : tail) = semanticLength s (d : tail) semanticLength s (Add t : tail) = semanticLength (s + maybe 1 length (XN.getText t)) tail semanticLength s (_ : tail) = semanticLength (succ s) tail semanticLength s [] = s hasResults :: Pattern -> Bool hasResults Result{} = True hasResults (Choice p1 p2) = hasResults p1 || hasResults p2 hasResults (Interleave _ p1 p2) = hasResults p1 || hasResults p2 hasResults (Region _ _ p) = hasResults p hasResults _ = False childDeriv :: RecoveryLimits -> Context -> Pattern -> XmlTree -> Pattern childDeriv rl cx p t | XN.isText t = let Just s = XN.getText t cont r s = if whitespace s then result (r ++ [Add $ XN.mkText s]) Empty else NotAllowed [XN.mkText s, t] in textDeriv rl cx False empty empty p s cont id | XN.isElem t = let p1 = startTagOpenDeriv rl p (XN.mkElementNode qn atts) p2 = attsDeriv cx p1 atts p3 = startTagCloseDeriv p2 p4 = childrenDeriv rl startContext p3 children qn = fromJust (XN.getElemName t) atts = fromJust (XN.getAttrl t) children = XN.getChildren t in endTagDeriv rl qn p4 textDeriv :: RecoveryLimits -> Context -> Bool -> Set String -> Set String -> Pattern -> String -> ([ResultDelta] -> String -> Pattern) -> (Pattern -> Pattern) -> Pattern textDeriv rl cx inf follow alt (Choice p1 p2) s c1 c2 = let newAlt p = alt `union` startSet rl p `union` if nullable rl p then follow else empty in choice (textDeriv rl cx inf follow (newAlt p2) p1 s c1 c2) (textDeriv rl cx inf follow (newAlt p1) p2 s c1 c2) textDeriv rl cx inf follow alt (Interleave 0 p1 p2) s c1 c2 = choice (textDeriv rl cx inf (follow `union` start2) (alt `union` start2) p1 s (cont p2) (c2 . flip (interleave 0) p2)) (textDeriv rl cx inf (follow `union` start1) (alt `union` start1) p2 s (cont p1) (c2 . interleave 0 p1)) where start1 = startSet rl p1 start2 = startSet rl p2 cont p r s' = textDeriv rl cx inf follow alt p s' (c1 . (r ++)) (c2 . result r) textDeriv rl cx inf follow alt (Interleave balance p1 p2) s c1 c2 | balance < 0 = textDeriv rl cx inf follow alt p1 s fail (c2 . flip (interleave balance) p2) | balance > 0 = textDeriv rl cx inf follow alt p2 s fail (c2 . interleave balance p1) where fail _ s = NotAllowed [XN.mkText s] textDeriv rl cx inf follow alt (Group p1 p2) s c1 c2 = textDeriv rl cx inf follow' alt p1 s (\r s'-> textDeriv rl cx inf follow (altAfter r) p2 s' (c1 . (r ++)) (c2 . result r)) (c2 . flip group p2) where follow1 = startSet rl p2 follow' = if nullable rl p2 then follow1 `union` follow else follow1 altAfter r = if blankResult r then alt else empty textDeriv rl cx inf follow alt (OneOrMore p) s c1 c2 = textDeriv rl cx inf follow' alt p s (\r s'-> case r of [] -> c1 r s' _ -> textDeriv rl cx inf follow' (altAfter r) zeroOrMore s' (c1 . (r ++)) (c2 . result r)) (c2 . flip group zeroOrMore) where follow' = follow `union` startSet rl p zeroOrMore = choice (OneOrMore p) Empty altAfter r = if blankResult r then alt else empty textDeriv rl cx inf follow alt Text s c1 c2 = case upto (alt `union` follow) s of ("", c:s') -> NotAllowed [XN.mkText s] (_, "") -> c2 $ result [(if inf then Inferred else id) (Add $ XN.mkText s)] Text (init, tail) -> c1 [(if inf then Inferred else id) (Add $ XN.mkText init)] tail textDeriv rl cx inf follow alt Empty "" c1 c2 = c2 Empty textDeriv rl cx inf follow alt Empty s c1 c2 = c1 [] s textDeriv rl cx1 inf follow alt (Value (uri, name) value cx2) s c1 c2 | isNothing (datatypeEqual uri name value cx2 s cx1) = c2 $ result [(if inf then Inferred else id) (Add (XN.mkText s))] Empty | otherwise = case datatypePrefix uri name value cx2 s cx1 of Just (p, s) -> c1 [(if inf then Inferred else id) (Add $ XN.mkText p)] s Nothing -> NotAllowed [XN.mkText s] textDeriv rl cx inf follow alt (Data (uri, name) params) s c1 c2 = case longestAllowedPrefix cx uri name params init of Just (allowed, rest) -> case rest ++ tail of "" -> c2 $ result [(if inf then Inferred else id) (Add (XN.mkText allowed))] Empty s' -> c1 [(if inf then Inferred else id) (Add (XN.mkText allowed))] s' Nothing -> NotAllowed [XN.mkText s] where (init, tail) = upto (alt `union` follow) s textDeriv rl cx inf follow alt (DataExcept (uri, name) params p) s c1 c2 | isNothing (datatypeAllows uri name params s cx) && not (nullable rl (textDeriv rl cx inf follow alt p s (\_ _-> NotAllowed [XN.mkText s]) id)) = c2 $ result [(if inf then Inferred else id) (Add (XN.mkText s))] Empty textDeriv rl cx inf follow alt (List p) s c1 c2 | nullable rl (listDeriv cx follow alt p (words s)) = c2 $ result [(if inf then Inferred else id) (Add (XN.mkText s))] Empty textDeriv rl cx inf follow alt (Element (Name uri local) p) s c1 c2 = let qn = mkNsName local uri in case recoveryAllowed qn rl of Just rl' -> textDeriv rl' cx inf follow alt (startTagCloseDeriv p) s (\r-> c1 ([Inferred $ StartTag $ XN.mkElementNode qn []] ++ r ++ [Inferred EndTag])) (c2 . result [Inferred $ StartTag $ XN.mkElementNode qn []] . region True qn) Nothing -> if whitespace s then c2 $ result [Add $ XN.mkText s] p else NotAllowed [XN.mkText s] textDeriv rl cx inf follow alt (Element (NameClassChoice n1 n2) p) s c1 c2 = choice (textDeriv rl cx inf follow alt (Element n1 p) s c1 c2) (textDeriv rl cx inf follow alt (Element n2 p) s c1 c2) textDeriv rl cx inf follow alt p@(Element _ _) s c1 c2 = if whitespace s then c2 $ result [Add $ XN.mkText s] p else NotAllowed [XN.mkText s] textDeriv rl cx inf follow alt (Region False qn p) s c1 c2 = textDeriv rl cx inf empty empty p s (\r-> region False qn . c1 r) (c2 . region False qn) textDeriv rl cx inf follow alt (Region True qn p) s c1 c2 = maybe (NotAllowed [XN.mkText s]) (\rl'-> textDeriv rl' cx inf follow alt p s (\r-> c1 (r ++ [Inferred EndTag])) (c2 . region True qn)) (recoveryAllowed qn rl) textDeriv rl cx inf follow alt (Result r p) s c1 c2 = textDeriv rl cx inf follow alt p s (\r'-> c1 (r ++ r')) (c2 . result r) textDeriv _ _ _ _ _ p@NotAllowed{} _ _ _ = p textDeriv _ _ _ _ _ _ s _ _ = NotAllowed [XN.mkText s] listDeriv :: Context -> Set String -> Set String -> Pattern -> [String] -> Pattern listDeriv _ _ _ p [] = p listDeriv cx follow alt p (h:t) = textDeriv noRecovery cx False follow alt p h (\_ _-> NotAllowed [XN.mkText h]) (flip group $ listDeriv cx follow alt p t) startRecovery :: RecoveryLimits -> QName -> Context -> Pattern -> Pattern startRecovery rl qn cx p = result [Inferred (StartTag (XN.mkElementNode qn []))] (startTagCloseDeriv p) region :: Bool -> QName -> Pattern -> Pattern region inferred qname p@NotAllowed{} = p region inferred qname (Result r p) = result r (region inferred qname p) region inferred qname p = Region inferred qname p result :: [ResultDelta] -> Pattern -> Pattern result _ p@NotAllowed{} = p result r1 (Result r2 p) = Result (r1 ++ r2) p result [] p = p result r p = Result r p addResult :: Pattern -> [ResultDelta] -> Pattern addResult p@NotAllowed{} _ = p addResult (Choice p1 p2) r = Choice (addResult p1 r) (addResult p2 r) addResult (Group p1 p2) r = group (addResult p1 r) p2 addResult p@(Interleave balance p1 p2) r = interleave balance (addResult p1 r) p2 addResult (Result r1 p) r2 = result r1 (addResult p r2) addResult (Region inferred qn p) r = Region inferred qn (addResult p r) addResult p r = Result r p choice :: Pattern -> Pattern -> Pattern choice p NotAllowed{} = p choice NotAllowed{} p = p choice Empty t@Text{} = t choice t@Text{} Empty = t choice p1 p2 = Choice p1 p2 group :: Pattern -> Pattern -> Pattern group _ p@NotAllowed{} = p group p@NotAllowed{} _ = p group p Empty = p group Empty p = p group _ (Result _ _) = error "Can't have Result on the RHS of Group." group (Result r p1) p2 = result r (group p1 p2) group Text Text = Text group p1 p2 = Group p1 p2 interleave :: Int -> Pattern -> Pattern -> Pattern interleave _ _ p@NotAllowed{} = p interleave _ p@NotAllowed{} _ = p interleave _ p Empty = p interleave _ Empty p = p interleave _ Text Text = Text interleave balance p1 p2 = Interleave balance p1 p2 startContext = ("", []) startTagOpenDeriv :: RecoveryLimits -> Pattern -> XNode -> Pattern startTagOpenDeriv rl (Choice p1 p2) node = choice (startTagOpenDeriv rl p1 node) (startTagOpenDeriv rl p2 node) startTagOpenDeriv rl (Element (NameClassChoice n1 n2) p) node = choice (startTagOpenDeriv rl (Element n1 p) node) (startTagOpenDeriv rl (Element n2 p) node) startTagOpenDeriv rl (Element nc@(Name uri local) p) node@(XTag qn _) = if contains nc qn then Region False qn (result [StartTag node] p) else let qn = mkNsName local uri in case recoveryAllowed qn rl of Just rl' -> region True qn (startTagOpenDeriv rl' (startRecovery rl' qn startContext p) node) Nothing -> NotAllowed [XN.mkLeaf node] startTagOpenDeriv rl (Element nc p) node@(XTag qn _) = if contains nc qn then Region False qn (result [StartTag node] p) else NotAllowed [XN.mkLeaf node] startTagOpenDeriv rl (Interleave 0 p1 p2) node = choice (let p = interleave (-1) (startTagOpenDeriv rl p1 node) p2 in if hasResults p2 then result [Penalty] p else p) (interleave 1 p1 (startTagOpenDeriv rl p2 node)) startTagOpenDeriv rl (Interleave balance p1 p2) node = if balance < 0 then interleave (balance-1) (startTagOpenDeriv rl p1 node) p2 else interleave (balance+1) p1 (startTagOpenDeriv rl p2 node) startTagOpenDeriv rl (OneOrMore p) node = group (startTagOpenDeriv rl p node) (choice (OneOrMore p) Empty) startTagOpenDeriv rl (Group p1 p2) node = let p = group (startTagOpenDeriv rl p1 node) p2 in maybe p (\r-> choice p (result r $ startTagOpenDeriv rl p2 node)) (bestNullableResult rl p1) startTagOpenDeriv rl (Region False qn p) node = region False qn $ startTagOpenDeriv rl p node startTagOpenDeriv rl (Region True qn p) node = maybe (NotAllowed [XN.mkLeaf node]) (\rl'-> region True qn $ startTagOpenDeriv rl' p node) (recoveryAllowed qn rl) startTagOpenDeriv rl (Result r p) node = result r (startTagOpenDeriv rl p node) startTagOpenDeriv _ p@NotAllowed{} _ = p startTagOpenDeriv _ _ node = NotAllowed [XN.mkLeaf node] attsDeriv :: Context -> Pattern -> XmlTrees -> Pattern attsDeriv _ p [] = p attsDeriv cx p (t : ts) | XN.isAttr t = attsDeriv cx (attDeriv cx p t) ts | otherwise = NotAllowed [t] attDeriv :: Context -> Pattern -> XmlTree -> Pattern attDeriv cx (Choice p1 p2) att = choice (attDeriv cx p1 att) (attDeriv cx p2 att) attDeriv cx (Group p1 p2) att = choice (group (attDeriv cx p1 att) p2) (group p1 (attDeriv cx p2 att)) attDeriv cx (Interleave balance p1 p2) att = choice (interleave balance (attDeriv cx p1 att) p2) (interleave balance p1 (attDeriv cx p2 att)) attDeriv cx (OneOrMore p) att = group (attDeriv cx p att) (choice (OneOrMore p) Empty) attDeriv cx (Attribute nc p) att | XN.isAttr att = if contains nc (fromJust $ XN.getAttrName att) && valueMatch cx p (xshow $ XN.getChildren att) then Empty else NotAllowed [att] attDeriv cx (Region inferred qn p) att = Region inferred qn (attDeriv cx p att) attDeriv cx (Result r p) att = result r (attDeriv cx p att) attDeriv _ _ t = NotAllowed [t] valueMatch :: Context -> Pattern -> String -> Bool valueMatch cx p s = (nullable noRecovery p && whitespace s) || nullable noRecovery (textDeriv noRecovery cx False empty empty p s (\_ _-> NotAllowed [XN.mkText s]) id) startTagCloseDeriv :: Pattern -> Pattern startTagCloseDeriv (Choice p1 p2) = choice (startTagCloseDeriv p1) (startTagCloseDeriv p2) startTagCloseDeriv (Group p1 p2) = group (startTagCloseDeriv p1) (startTagCloseDeriv p2) startTagCloseDeriv (Interleave balance p1 p2) = interleave balance (startTagCloseDeriv p1) (startTagCloseDeriv p2) startTagCloseDeriv (OneOrMore p) = oneOrMore (startTagCloseDeriv p) startTagCloseDeriv (Attribute nc _) = NotAllowed [XN.mkError' c_err ("Missing required attribute " ++ show nc)] startTagCloseDeriv (Region inferred qn p) = Region inferred qn (startTagCloseDeriv p) startTagCloseDeriv (Result r p) = result r (startTagCloseDeriv p) startTagCloseDeriv p = p oneOrMore :: Pattern -> Pattern oneOrMore p@NotAllowed{} = p oneOrMore Empty = Empty oneOrMore p@Text{} = p oneOrMore p@OneOrMore{} = p oneOrMore (Choice p Empty) = Choice (oneOrMore p) Empty oneOrMore (Choice Empty p) = Choice Empty (oneOrMore p) oneOrMore (Result r p) = result r (oneOrMore p) oneOrMore p = OneOrMore p childrenDeriv :: RecoveryLimits -> Context -> Pattern -> XmlTrees -> Pattern childrenDeriv _ _ p@NotAllowed{} _ = p childrenDeriv rl cx p [] = p childrenDeriv rl cx p [t] | XN.isText t = childDeriv rl cx p t childrenDeriv rl cx p children = stripChildrenDeriv rl cx p children stripChildrenDeriv :: RecoveryLimits -> Context -> Pattern -> XmlTrees -> Pattern stripChildrenDeriv _ _ p@NotAllowed{} _ = p stripChildrenDeriv _ _ p [] = p stripChildrenDeriv rl cx p (h:t) = stripChildrenDeriv rl cx (if strip h then (addResult p [Add h]) else childDeriv rl cx p h) t endTagDeriv :: RecoveryLimits -> QName -> Pattern -> Pattern endTagDeriv rl qn (Choice p1 p2) = choice (endTagDeriv rl qn p1) (endTagDeriv rl qn p2) endTagDeriv rl qn1 (Region False qn2 p) | qn1 == qn2 = maybe (NotAllowed [XN.mkError' c_err ("Cannot close element " ++ show qn1)]) (\r-> result (r ++ [EndTag]) Empty) (bestNullableResult rl p) endTagDeriv rl qn1 (Region inf qn2 p) = region inf qn2 (endTagDeriv rl qn1 p) endTagDeriv rl qn (Result r p) = result r (endTagDeriv rl qn p) endTagDeriv rl qn (Group p1 p2) = group (endTagDeriv rl qn p1) p2 endTagDeriv rl qn (Interleave balance p1 p2) | balance < 0 = interleave (balance+1) (endTagDeriv rl qn p1) p2 | balance == 0 = NotAllowed [XN.mkError' c_err ("Cannot close interleaved element " ++ show qn)] | balance > 0 = interleave (balance-1) p1 (endTagDeriv rl qn p2) endTagDeriv _ _ p@NotAllowed{} = p endTagDeriv _ qn _ = NotAllowed [XN.mkError' c_err ("Cannot find and close element " ++ show qn)] longestAllowedPrefix :: Context -> String -> String -> ParamList -> String -> Maybe (String, String) longestAllowedPrefix _ "" "string" [] s = Just (s, "") longestAllowedPrefix _ "http://www.w3.org/XML/2001/XMLSchema-datatypes" "string" [] s = Just (s, "") longestAllowedPrefix cx uri name params s = find (\(init, tail)-> isNothing (datatypeAllows uri name params init cx)) (reverse $ splits s) commonPrefixes :: Eq x => ([x] -> [x]) -> [x] -> [x] -> ([x], [x], [x]) commonPrefixes c [] l = (c [], [], l) commonPrefixes c l [] = (c [], l, []) commonPrefixes c l1@(h1:t1) l2@(h2:t2) | h1 == h2 = commonPrefixes (c . (h1:)) t1 t2 | otherwise = (c [], l1, l2) datatypePrefix :: String -> String -> String -> Context -> String -> Context -> Maybe (String, String) datatypePrefix "" "string" s1 _ s2 _ = fmap ((,) s1) (stripPrefix s1 s2) datatypePrefix "http://www.w3.org/XML/2001/XMLSchema-datatypes" "string" s1 _ s2 _ = fmap ((,) s1) (stripPrefix s1 s2) datatypePrefix "http://www.w3.org/2001/XMLSchema-datatypes" "string" s1 _ s2 _ = fmap ((,) s1) (stripPrefix s1 s2) datatypePrefix "" "token" s1 _ s2 _ = case span isSpace s2 of (spaces, s2') -> let s1' = dropWhile isSpace s1 in fmap ((,) (spaces ++ s1')) (stripPrefix s1' s2') datatypePrefix "http://www.w3.org/XML/2001/XMLSchema-datatypes" "token" s1 cx1 s2 cx2 = datatypePrefix "" "token" s1 cx1 s2 cx2 datatypePrefix "http://www.w3.org/2001/XMLSchema-datatypes" "token" s1 cx1 s2 cx2 = datatypePrefix "" "token" s1 cx1 s2 cx2 datatypePrefix uri name s1 cx1 s2 cx2 | s1 == s2 = Just (s1, "") | otherwise = {-# SCC "datatypePrefix.generic" #-} fmap (\prefix-> (prefix, fromJust $ stripPrefix prefix s2)) $ find (\prefix-> isNothing $ datatypeEqual uri name s1 cx1 prefix cx2) (inits s2) blankResult :: [ResultDelta] -> Bool blankResult = all blank where blank (Add t) = maybe False whitespace (XN.getText t) blank (Inferred n) = blank n blank _ = False strip :: XmlTree -> Bool strip = maybe False whitespace . XN.getText whitespace :: String -> Bool whitespace = all isXmlSpaceChar