{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Symantic.RNC.Validate where import Control.Applicative (Applicative(..), Alternative(..), optional) import Control.Monad (Monad(..)) import Data.Bool import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), all) import Data.Function (($), const, id) import Data.Functor (Functor(..), (<$>)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Tuple (snd) import Prelude (error) import Data.Sequence (Seq) import qualified Data.Char as Char import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified Text.Megaparsec as P import Symantic.XML (XMLs) import qualified Symantic.XML as XML import qualified Symantic.RNC.Sym as RNC validateXML :: Ord e => P.Parsec e (XMLs src) a -> XMLs src -> Either (P.ParseErrorBundle (XMLs src) e) a validateXML p stateInput = snd $ P.runParser' p P.State { P.stateInput , P.stateOffset = 0 , P.statePosState = error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations" -- NOTE: reporting the node number is less helpful -- than the source text line and number where the node is; -- P.statePosState is only used by P.getSourcePos. } instance ( Ord err , Ord src , XML.NoSource src , P.Stream (Seq (XML.XML src)) , P.Token (Seq (XML.XML src)) ~ XML.Tree (XML.Sourced src XML.Node) ) => RNC.Sym_RNC (P.Parsec err (XMLs src)) where {- none = P.label "none" $ P.eof -} namespace _p _n = pure () element n p = do ts <- P.token check $ Set.singleton $ P.Tokens $ pure expected p_XMLs p ts where expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts) | e == n = Just $ removePI $ removeXMLNS $ removeSpaces ts where removePI xs = (`Seq.filter` xs) $ \case XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False _ -> True removeSpaces xs = if (`all` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts -> all (\case XML.EscapedPlain t -> TL.all Char.isSpace t _ -> False) et _ -> True then (`Seq.filter` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False _ -> True else xs removeXMLNS xs = let (attrs,rest) = (`Seq.spanl` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True _ -> False in let attrs' = (`Seq.filter` attrs) $ \case XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts -> case a of XML.QName "" "xmlns" -> False XML.QName ns _l -> ns /= XML.xmlns_xmlns _ -> True in attrs' <> rest check _t = Nothing attribute n p = do v <- P.token check $ Set.singleton $ P.Tokens $ pure expected p_XMLs p v where expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n) check (XML.Tree (XML.unSourced -> XML.NodeAttr k) v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n = Just v check _t = Nothing any = P.label "any" $ P.token (const $ Just ()) Set.empty anyElem ns p = P.label "anyElem" $ do (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected p_XMLs (p $ XML.qNameLocal n) ts where expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts) | XML.qNameSpace e == ns = Just $ (e,ts) check _t = Nothing {- comment = do s <- P.getInput case Seq.viewl s of XML.Tree0 (XML.unSourced -> XML.NodeComment c) :< ts -> do P.setInput ts c <$ XML.setFilePosToNextNode t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex EmptyL -> P.failure Nothing ex where ex = Set.singleton $ P.Tokens $ pure expected expected = XML.Tree0 (XML.notSourced $ XML.NodeComment "") -} escapedText = do P.token check $ Set.singleton $ P.Tokens $ pure expected where expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty) check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t check _t = Nothing optional = P.optional option = P.option choice = P.choice try = P.try fail = P.label "fail" $ P.failure Nothing mempty -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@, -- updating 'P.stateOffset' and re-raising any exception. p_XMLs :: Ord err => Ord src => P.Stream (Seq (XML.XML src)) => P.Parsec err (XMLs src) a -> XMLs src -> P.Parsec err (XMLs src) a p_XMLs p stateInput = do st <- P.getParserState let (st', res) = P.runParser' (p <* P.eof) st { P.stateInput = stateInput , P.stateOffset = P.stateOffset st } P.updateParserState (\s -> s{P.stateOffset = P.stateOffset st'}) case res of Right a -> return a Left (P.ParseErrorBundle errs _) -> case NonEmpty.head errs of P.TrivialError _o us es -> P.failure us es P.FancyError _o es -> P.fancyFailure es -- | Whether the given 'XML.Node' must be ignored by the RNC parser. isIgnoredNode :: XML.Node -> Bool isIgnoredNode = \case XML.NodeComment{} -> True XML.NodePI{} -> True XML.NodeCDATA{} -> True _ -> False instance ( Ord err , Ord src , P.Stream (Seq (XML.XML src)) ) => RNC.Sym_Permutation (P.ParsecT err (XMLs src) m) where runPermutation (Perm value parser) = optional parser >>= f where -- NOTE: copy Control.Applicative.Permutations.runPermutation -- to replace the commented empty below so that P.TrivialError -- has the unexpected token. f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value f (Just p) = RNC.runPermutation p toPermutation p = Perm Nothing $ pure <$> p toPermutationWithDefault v p = Perm (Just v) $ pure <$> p -- | Unprivatized 'Control.Applicative.Permutations.Permutation' to fix 'runPermutation'. -- so that the 'P.TrivialError' has an unexpected token -- which is an 'XML.Node' containing a 'XML.FileSource' useful when reporting errors. data Perm m a = Perm (Maybe a) (m (Perm m a)) type instance RNC.Permutation (P.ParsecT err (XMLs src) m) = Perm (P.ParsecT err (XMLs src) m) instance Functor m => Functor (Perm m) where fmap f (Perm v p) = Perm (f <$> v) (fmap f <$> p) instance Alternative m => Applicative (Perm m) where pure value = Perm (Just value) empty lhs@(Perm f v) <*> rhs@(Perm g w) = Perm (f <*> g) (lhsAlt <|> rhsAlt) where lhsAlt = (<*> rhs) <$> v rhsAlt = (lhs <*>) <$> w instance ( Ord err , Ord src , P.Stream (Seq (XML.XML src)) ) => RNC.Sym_Rule (P.ParsecT err (XMLs src) m) where -- rule n p = P.dbg s p {-(p P. s)-} where s = Text.unpack n rule _n = id arg _n = pure ()