{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Symantic.XML.Read where
import Control.Applicative as Alternative (Applicative(..), Alternative(..), optional)
import Control.Monad (Monad(..))
import Control.Monad.Trans.Class (MonadTrans(..))
import Data.Bool
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), all)
import Data.Function (($), (.), const, id, flip)
import Data.Functor (Functor(..), (<$>))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..), maybe, isNothing, maybeToList)
import Data.Monoid (Monoid(..))
import Data.Ord (Ord(..))
import Data.Proxy (Proxy(..))
import Data.Semigroup (Semigroup(..))
import Data.String (String, IsString(..))
import Data.Tuple (fst)
import Data.Void (Void)
import Numeric.Natural (Natural)
import Prelude ((+), Integer, undefined)
import System.IO (IO, FilePath)
import Text.Show (Show(..))
import qualified Data.Char as Char
import qualified Data.HashMap.Strict as HM
import qualified Data.List as List
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.TreeSeq.Strict as TS
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P
import qualified Text.Megaparsec.Char.Lexer as P
import qualified Text.Megaparsec.Internal as P
import Symantic.Base
import Symantic.XML.Language
import Symantic.XML.RelaxNG.Language
import Symantic.XML.Tree
read ::
Read FileSourced Void (x->x) a ->
FilePath ->
IO (Either String a)
read rng path =
readTree path >>= \case
Left err -> return $ Left err
Right xml -> return $ runRead rng xml
runRead ::
Read FileSourced Void (x->x) a ->
FileSourcedTrees ->
Either String a
runRead rng xml =
case P.runParser (unRead rng) "" (mempty, xml) of
Left err -> Left $ foldMap parseErrorTextPretty $ P.bundleErrors err
Right a -> Right $ a id
type ReadStream src =
( HM.HashMap QName (src EscapedAttr)
, Trees src
)
take1_ ::
UnSource src =>
(Node (src EscapedAttr) -> Bool) ->
ReadStream src ->
Maybe ( P.Token (ReadStream src)
, ReadStream src )
take1_ isIgnoredNode s@(attrs, trees) =
go trees
where
go trs =
case Seq.viewl trs of
Seq.EmptyL
| null attrs -> Nothing
| otherwise -> Just (Left attrs, s)
t Seq.:< ts ->
case unSource (TS.unTree t) of
n | isIgnoredNode n -> go ts
| otherwise -> Just (Right t, (attrs, ts))
type ReadConstraints src =
( Ord (src (Node (src EscapedAttr)))
, Ord (src EscapedAttr)
, UnSource src
, NoSource src
, SourceOf src
, Show (Source src)
, Show (src String)
, Functor src
)
instance ReadConstraints src => P.Stream (ReadStream src) where
type Token (ReadStream src) = Either
(HM.HashMap QName (src EscapedAttr))
(Tree src)
type Tokens (ReadStream src) = ReadStream src
take1_ = take1_ isIgnoredNode
where
isIgnoredNode = \case
NodeComment{} -> True
NodePI{} -> True
_ -> False
showTokens _s toks =
orList $
mconcat $
toList $ showTree <$> toks
where
showSource :: src String -> String
showSource sa =
let src = sourceOf sa in
if nullSource @src src
then unSource sa
else unSource sa<>" at "<>show src
showTree = \case
Left as ->
(\(an, av) -> showSource $ ("(attribute "<>show an<>")") <$ av)
<$> List.sortOn fst (HM.toList as)
Right (TS.Tree nod ts) ->
pure $
showSource . (<$ nod) $
case unSource nod of
NodeElem n _as -> "(element "<>show n<>")"
NodeText{} ->
case Seq.viewl ts of
TS.Tree tn _ Seq.:< _
| NodeText lit <- unSource tn ->
"\""<>TL.unpack (unescapeText lit)<>"\""
_ -> "text"
NodeComment _c -> "comment"
NodePI n _t -> "(processing-instruction "<>show n<>")"
NodeCDATA _t -> "cdata"
takeN_ = undefined
tokensToChunk = undefined
chunkToTokens = undefined
chunkLength = undefined
takeWhile_ = undefined
reachOffset = undefined
reachOffsetNoLine = undefined
newtype Read src e f k
= Read
{ unRead :: P.Parsec e (ReadStream src) (f->k) }
instance
( Ord err
, ReadConstraints src
) => Emptyable (Read src err) where
empty = Read $ id <$ P.eof
instance
( Ord err
, ReadConstraints src
) => Unitable (Read src err) where
unit = Read $ return ($ ())
instance
( Ord err
, ReadConstraints src
) => Voidable (Read src err) where
void _a (Read x) = Read $
(\a2b2k b -> a2b2k (\_a -> b)) <$> x
instance
( Ord err
, ReadConstraints src
) => Constant (Read src err) where
constant a = Read $ return ($ a)
instance
( Ord err
, ReadConstraints src
) => Permutable (Read src err) where
type Permutation (Read src err) =
ReadPerm src err
permutable (ReadPerm ma p) = Read $ do
r <- Alternative.optional p
unRead $
case r of
Just perms -> permutable perms
Nothing ->
Read $ maybe
(P.token (const Nothing) Set.empty)
return ma
noPerm = ReadPerm Nothing Alternative.empty
perm (Read x) =
ReadPerm Nothing $ (<$> x) $ \a ->
ReadPerm (Just a) Alternative.empty
permWithDefault d (Read x) =
ReadPerm (Just ($ d)) $ (<$> x) $ \a ->
ReadPerm (Just a) Alternative.empty
instance
( Ord err
, ReadConstraints src
) => Composable (Read src err) where
Read x <.> Read y = Read $
x >>= \a2b -> (. a2b) <$> y
instance
( Ord err
, ReadConstraints src
) => Tupable (Read src err) where
Read x <:> Read y = Read $
consCont (,) <$> x <*> y
instance
( Ord err
, ReadConstraints src
) => Eitherable (Read src err) where
Read x <+> Read y = Read $
mapCont Left <$> P.try x <|>
mapCont Right <$> y
instance
( Ord err
, ReadConstraints src
) => Optionable (Read src err) where
option (Read x) = Read $
P.try x <|> return id
optional (Read x) = Read $
mapCont Just <$> P.try x <|>
return ($ Nothing)
instance
( Ord err
, ReadConstraints src
) => Repeatable (Read src err) where
many0 (Read x) = Read $ concatCont <$> many x
many1 (Read x) = Read $ concatCont <$> some x
instance
( Ord err
, ReadConstraints src
) => Dimapable (Read src err) where
dimap a2b _b2a (Read r) =
Read $ (\k b2k -> k (b2k . a2b)) <$> r
instance
( Ord err
, ReadConstraints src
) => Dicurryable (Read src err) where
dicurry (_::proxy args) constr _destr (Read x) = Read $ do
f <- x
return $ \r2k ->
f (mapresultN @args r2k constr)
instance
( Ord err
, ReadConstraints src
, Textable (Read src err)
) => XML (Read src err) where
namespace _nm _ns = Read (return id)
element n p = Read $ do
s <- P.token check $ Set.singleton $
P.Tokens $ pure expected
unRead $ readNested p s
where
expected = Right $ TS.tree0 $ noSource $ NodeElem n mempty
check = \case
Right (TS.Tree nod ts)
| NodeElem e as <- unSource nod
, e == n
-> Just (removeXMLNS as, removeSpaces ts)
_ -> Nothing
attribute n p = Read $ do
v <- pTokenAttr n $ Set.singleton $
P.Tokens $ pure expected
unRead $ readNested p
(mempty, pure (TS.tree0 (NodeText . EscapedText . unEscapedAttr <$> v)))
where
expected = Left $ HM.singleton n $ noSource ""
literal lit = Read $ do
P.token check $ Set.singleton $ P.Tokens $ pure expected
where
expected = Right $
TS.Tree (noSource $ NodeText "")
(pure $ TS.tree0 (noSource $ NodeText $ escapeText lit))
check = \case
Right (Tree0 nod)
| NodeText t <- unSource nod
, unescapeText t == lit
-> Just id
_ -> Nothing
pi n = Read $ do
v <- pTokenPI n $ Set.singleton $
P.Tokens $ pure expected
return ($ v)
where
expected = Right $ TS.tree0 $ noSource $ NodePI n mempty
cdata = Read $
P.token check $ Set.singleton $
P.Tokens $ pure expected
where
expected = Right $ TS.tree0 $ noSource $ NodeCDATA mempty
check = \case
Right (Tree0 nod)
| NodeCDATA v <- unSource nod
-> Just ($ v)
_ -> Nothing
comment = Read $
P.token check $ Set.singleton $
P.Tokens $ pure expected
where
expected = Right $ TS.tree0 $ noSource $ NodeComment mempty
check = \case
Right (Tree0 nod)
| NodeComment v <- unSource nod
-> Just ($ v)
_ -> Nothing
instance Ord err => Textable (Read FileSourced err) where
type TextConstraint (Read FileSourced err) a =
DecodeText a
text :: forall a k repr.
repr ~ Read FileSourced err =>
TextConstraint repr a => repr (a->k) k
text = Read $ do
Sourced (FileSource (src :| _)) txt <-
P.token check $ Set.singleton $ P.Tokens $ pure expected
case P.runParser @Void (decodeText @a <* P.eof) "" (unescapeText txt) of
Right a -> return ($ a)
Left errs -> P.fancyFailure $ Set.singleton $ P.ErrorFail $
(`foldMap` P.bundleErrors errs) $ \err ->
fileRange_path src <> ":" <>
show (fileRange_begin src <> Offset (P.errorOffset err)) <> "\n" <>
P.parseErrorTextPretty err
where
expected = Right $ TS.tree0 $ noSource $ NodeText $ EscapedText mempty
check = \case
Right (Tree0 nod)
| NodeText t <- unSource nod
-> Just (t <$ nod)
_ -> Nothing
instance
( Ord err
, ReadConstraints src
, Textable (Read src err)
, Definable (Read src err)
) => RelaxNG (Read src err) where
elementMatch nc p = Read $ do
(n,s) <- P.token check $ Set.singleton $
P.Tokens $ pure expected
((\a2k n2a -> a2k (n2a n)) <$>) $
unRead (readNested p s)
where
expected = Right $ TS.tree0 $ noSource $
NodeElem (qName (NCName (TLB.toLazyText
(textify (mempty::Namespaces NCName,(infixN0,SideL),nc)))))
mempty
check = \case
Right (TS.Tree nod ts)
| NodeElem n as <- unSource nod
, matchNameClass nc n
-> Just (n, (removeXMLNS as, removeSpaces ts))
_ -> Nothing
attributeMatch nc p = Read $ do
(an,av) <- pTokenAttrNameClass nc $ Set.singleton $
P.Tokens $ pure expected
((\a2k n2a -> a2k (n2a an)) <$>) $
unRead $ readNested p
(mempty, pure (TS.tree0 (NodeText . EscapedText . unEscapedAttr <$> av)))
where
expected = Left $ HM.singleton (qName (NCName n)) $ noSource ""
where n = TLB.toLazyText $ textify (mempty::Namespaces NCName,(infixN0,SideL),nc)
instance Ord err => Definable (Read FileSourced err) where
define n = Read . P.label n . unRead
data ReadPerm (src :: * -> *) err a k
= ReadPerm
{ readPerm_result :: !(Maybe (a->k))
, readPerm_parser :: P.Parsec err (ReadStream src) (ReadPerm src err a k)
}
instance
(Ord err, ReadConstraints src) =>
Dimapable (ReadPerm src err) where
dimap a2b b2a (ReadPerm a ma) =
ReadPerm (merge <$> a)
(dimap a2b b2a `fmap` ma)
where merge = \a2k2k b2k -> a2k2k $ b2k . a2b
instance
(Ord err, ReadConstraints src) =>
Dicurryable (ReadPerm src err) where
dicurry ::
forall args r k proxy.
CurryN args =>
proxy args ->
(args-..->r) ->
(r->Tuples args) ->
ReadPerm src err (args-..->k) k ->
ReadPerm src err (r->k) k
dicurry px constr destr (ReadPerm a ma) =
ReadPerm (merge <$> a)
(dicurry px constr destr `fmap` ma)
where
merge args2k2k = \r2k ->
args2k2k $ mapresultN @args r2k constr
instance
(Ord err, ReadConstraints src) =>
Composable (ReadPerm src err) where
lhs@(ReadPerm da pa) <.> rhs@(ReadPerm db pb) =
ReadPerm a $
lhsAlt <|> rhsAlt
where
lhsAlt = (<.> rhs) <$> pa
rhsAlt = (lhs <.>) <$> pb
a = flip (.) <$> da <*> db
instance
(Ord err, ReadConstraints src) =>
Tupable (ReadPerm src err) where
lhs@(ReadPerm da pa) <:> rhs@(ReadPerm db pb) =
ReadPerm a (lhsAlt <|> rhsAlt)
where
lhsAlt = (<:> rhs) <$> pa
rhsAlt = (lhs <:>) <$> pb
a = consCont (,) <$> da <*> db
instance Definable (ReadPerm src err) where
define _n = id
concatCont :: [(a->k)->k] -> ([a]->k)->k
concatCont = List.foldr (consCont (:)) ($ [])
{-# INLINE concatCont #-}
consCont :: (a->b->c) -> ((a->k)->k) -> ((b->k)->k) -> (c->k)->k
consCont ab2c a2k2k b2k2k = \c2k -> a2k2k $ \a -> b2k2k $ \b -> c2k (ab2c a b)
{-# INLINE consCont #-}
mapCont :: (a->b) -> ((a->k)->k) -> ((b->k)->k)
mapCont a2b a2k2k = \b2k -> a2k2k (b2k . a2b)
{-# INLINE mapCont #-}
pTokenAttr ::
forall e m src.
ReadConstraints src =>
QName ->
Set.Set (P.ErrorItem (P.Token (ReadStream src))) ->
P.ParsecT e (ReadStream src) m (src EscapedAttr)
pTokenAttr an ps = P.ParsecT $ \st@(P.State s@(attrs,trees) o pst de) cok _ _ eerr ->
case HM.lookup an attrs of
Just av -> cok av (P.State (HM.delete an attrs, trees) (o+1) pst de) mempty
Nothing -> eerr (P.TrivialError o us ps) st
where
us = case P.take1_ s of
Nothing -> pure P.EndOfInput
Just (t,_ts) -> (Just . P.Tokens . pure) t
{-# INLINE pTokenAttr #-}
pTokenAttrNameClass ::
forall e m src.
ReadConstraints src =>
NameClass ->
Set.Set (P.ErrorItem (P.Token (ReadStream src))) ->
P.ParsecT e (ReadStream src) m (QName, src EscapedAttr)
pTokenAttrNameClass nc ps = P.ParsecT $ \st@(P.State s@(attrs,trees) o pst de) cok _ _ eerr ->
case HM.toList attrs of
a@(an,_av):_ | matchNameClass nc an -> cok a (P.State (HM.delete an attrs, trees) (o+1) pst de) mempty
_ -> eerr (P.TrivialError o us ps) st
where
us = case P.take1_ s of
Nothing -> pure P.EndOfInput
Just (t,_ts) -> (Just . P.Tokens . pure) t
{-# INLINE pTokenAttrNameClass #-}
pTokenPI ::
forall e m src.
UnSource src =>
PName ->
Set.Set (P.ErrorItem (P.Token (ReadStream src))) ->
P.ParsecT e (ReadStream src) m TL.Text
pTokenPI n ps = P.ParsecT $ \st@(P.State s o pst de) cok _ _ eerr ->
case take1 s of
Nothing -> eerr (P.TrivialError o us ps) st
where us = pure P.EndOfInput
Just (c, cs)
| Right (TS.Tree nod _) <- c
, NodePI pn pv <- unSource nod
, pn == n -> cok pv (P.State cs (o+1) pst de) mempty
| otherwise -> eerr (P.TrivialError o us ps) st
where
us = case take1 s of
Nothing -> pure P.EndOfInput
Just (t,_ts) -> (Just . P.Tokens . pure) t
where
take1 = take1_ isIgnoredNode
where
isIgnoredNode = \case
NodeComment{} -> True
_ -> False
removeXMLNS ::
HM.HashMap QName (src EscapedAttr) ->
HM.HashMap QName (src EscapedAttr)
removeXMLNS =
HM.filterWithKey $ \an _av ->
case an of
QName "" "xmlns" -> False
QName ns _l -> ns /= xmlns_xmlns
removeSpaces :: UnSource src => Trees src -> Trees src
removeSpaces xs =
if (`all` xs) $ \case
TS.Tree nod _ts
| NodeText (EscapedText et) <- unSource nod ->
all (\case
EscapedPlain t -> TL.all Char.isSpace t
_ -> False) et
_ -> True
then (`Seq.filter` xs) $ \case
TS.Tree nod _ts
| NodeText EscapedText{} <- unSource nod -> False
_ -> True
else xs
readNested ::
Ord err =>
ReadConstraints src =>
Read src err f a ->
ReadStream src ->
Read src err f a
readNested (Read p) stateInput = Read $ do
st <- P.getParserState
(st', res) <- lift $ P.runParserT' (p <* P.eof) st
{ P.stateInput
, P.stateOffset = P.stateOffset st
}
P.updateParserState (\s -> s{P.stateOffset = P.stateOffset st'})
case res of
Right a -> return a
Left (P.ParseErrorBundle errs _) ->
case NonEmpty.head errs of
P.TrivialError _o us es -> P.failure us es
P.FancyError _o es -> P.fancyFailure es
class DecodeText a where
decodeText :: P.Parsec Void TL.Text a
instance DecodeText String where
decodeText = TL.unpack . fst <$>
P.match (P.skipMany P.anySingle)
instance DecodeText Text.Text where
decodeText = TL.toStrict . fst <$>
P.match (P.skipMany P.anySingle)
instance DecodeText TL.Text where
decodeText = fst <$>
P.match (P.skipMany P.anySingle)
instance DecodeText Bool where
decodeText =
False <$ (P.string "false" <|> P.string "0") <|>
True <$ (P.string "true" <|> P.string "1")
instance DecodeText Integer where
decodeText = P.signed (return ()) P.decimal
instance DecodeText Natural where
decodeText = P.optional (P.char '+') *> P.decimal
parseErrorTextPretty ::
P.ShowErrorComponent err =>
P.ParseError (ReadStream FileSourced) err -> String
parseErrorTextPretty (P.TrivialError o us ps) =
if isNothing us && Set.null ps
then "unknown parse error\n"
else
(case us of
Just P.Tokens{} -> ""
_ ->
"node #"<>show o<>"\n"
) <>
messageItemsPretty "unexpected "
(showErrorItem px <$> maybeToList us) <>
messageItemsPretty "expecting "
(showErrorItem px <$> Set.toAscList ps)
where px = Proxy :: Proxy s
parseErrorTextPretty err = P.parseErrorTextPretty err
messageItemsPretty :: String -> [String] -> String
messageItemsPretty prefix ts
| null ts = ""
| otherwise = prefix <> orList ts <> "\n"
orList :: IsString s => Monoid s => [s] -> s
orList [] = mempty
orList [x] = x
orList [x,y] = x <> " or " <> y
orList xs = mconcat (List.intersperse ", " (List.init xs)) <> ", or " <> List.last xs
showErrorItem ::
(s ~ ReadStream (Sourced (FileSource Offset))) =>
Proxy s -> P.ErrorItem (P.Token s) -> String
showErrorItem px = \case
P.Tokens ts -> P.showTokens px ts
P.Label label -> NonEmpty.toList label
P.EndOfInput -> "end-of-node"