{- Copyright (C) Stilo International plc, 2012 This file contains the main file of concrete-relaxng-parser, a parser driven by a standard RELAX NG schema with concrete syntax extensions. The concrete-relaxng-parser project is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The concrete-relaxng-parser project is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with concrete-relaxng-parser. If not, see . -} {-# LANGUAGE DeriveDataTypeable #-} module Main where import Data.List (stripPrefix) import Data.Maybe (fromMaybe) import System.Console.CmdArgs.Implicit import Text.XML.HXT.Core hiding (trace) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.DOM.ShowXml (xshow) import qualified Text.XML.HXT.DOM.TypeDefs (XNode) import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.Curl (withCurl) import Text.XML.HXT.TagSoup (withTagSoup) import Text.XML.HXT.RelaxNG (Uri, LocalName, ParamList, Prefix, Context, Datatype, NameClass(..)) import Text.XML.HXT.RelaxNG.CreatePattern (createPatternFromXmlTree) import Text.XML.HXT.RelaxNG.Validator (createSimpleForm) import qualified Text.XML.HXT.RelaxNG.Validation as Validator import qualified Text.XML.HXT.RelaxNG.DataTypes as Validator import Text.XML.HXT.RelaxNG.Parser data Args = Args {raw :: Bool, passthrough :: Bool, schema :: String, input :: String} deriving (Show, Data, Typeable) main :: IO () main = do arg <- cmdArgs Args{raw = def &= help "Output the raw syntax tree with the terminal nodes.", passthrough = def &= help "Reproduce the unparsed input.", schema = def &= typFile &= argPos 0, input = def &= typFile &= argPos 1} pattern <- loadPattern (schema arg) runX (pipeline (passthrough arg) (raw arg) pattern (input arg)) return () recoveryLimit = (20, []) pipeline :: Bool -> Bool -> Validator.Pattern -> String -> IOSArrow XmlTree XmlTree pipeline passthrough rawOutput pattern input = readDocument [withValidate no, withInputEncoding utf8] input >>> changeChildren (\l-> if null l then [XN.mkText ""] else l) >>> canonicalizeAllNodes >>> propagateAndValidateNamespaces >>> Validator.normalizeForRelaxValidation >>> (if passthrough then this else processChildren ( arrL (parse pattern) >>> (if rawOutput then this else stripSyntax ) >>> fromLA (cleanupNamespaces collectPrefixUriPairs) ) ) >>> writeDocument [withIndent no, withOutputEncoding utf8] "" stripSyntax :: IOSArrow XmlTree XmlTree stripSyntax = processTopDown ((getChildren >>> stripSyntax) `when` (isElem >>> hasNamespaceUri syntacticNamespace) >>> none `when` (isElem >>> hasNamespaceUri terminalNamespace) >>> changeElemName stripOmissiblePrefix) loadPattern :: String -> IO Validator.Pattern loadPattern schema = fmap head $ runX ( readDocument [withInputEncoding utf8] schema >>> canonicalizeAllNodes >>> propagateAndValidateNamespaces >>> createSimpleForm True False True >>> fromLA createPatternFromXmlTree ) parse :: Validator.Pattern -> XmlTree -> XmlTrees parse pattern input = case bestNullableResult recoveryLimit derivative of Just r -> fst $ reNest False r Nothing -> [XN.mkError' 1 ("The input cannot be parsed.\n" ++ "pattern: " ++ pretty "" 0 noRecovery (translate pattern) ++ "\n" ++ "input: " ++ show input ++ "\n" ++ "derivative: " ++ show derivative ++ "\n")] where derivative = childDeriv recoveryLimit startContext (translate pattern) input stripOmissiblePrefix :: QName -> QName stripOmissiblePrefix qn = maybe qn (\s-> setNamespaceUri' (newXName s) qn) (stripPrefix omissibleNamespacePrefix $ namespaceUri qn) translate :: Validator.Pattern -> Pattern translate Validator.Empty = Empty translate (Validator.NotAllowed _) = NotAllowed [] translate Validator.Text = Text translate (Validator.Choice p1 p2) = Choice (translate p1) (translate p2) translate (Validator.Interleave p1 p2) = Interleave 0 (translate p1) (translate p2) translate (Validator.Group p1 p2) = Group (translate p1) (translate p2) translate (Validator.OneOrMore p) = OneOrMore (translate p) translate (Validator.List p) = List (translate p) translate (Validator.Data d l) = Data d l translate (Validator.DataExcept d l p) = DataExcept d l (translate p) translate (Validator.Value d s c) = Value d s c translate (Validator.Attribute c p) = Attribute c (translate p) translate (Validator.Element c p) = Element c (translate p) reNest :: Bool -> [ResultDelta] -> (XmlTrees, [ResultDelta]) reNest _ [] = ([], []) reNest original (Add c : rest) = (c : nested, rest') where (nested, rest') = reNest original rest reNest original (StartTag t : rest) = (XN.mkElement name atts content : nested, rest'') where (content, rest') = reNest original rest (nested, rest'') = reNest original rest' name = fromMaybe (error ("No name in start tag " ++ show t) :: QName) (XN.getElemName t) atts = fromMaybe (error ("No attribute list in start tag " ++ show t) :: XmlTrees) (XN.getAttrl t) reNest _ (EndTag : rest) = ([], rest) reNest original (Penalty : rest) = reNest original rest reNest original (Inferred d : rest) = reNest original (if original then rest else d:rest)