-- | DOM-based parsing and rendering. -- -- This module uses the datatypes from "Data.XML.Types", with the @String@-based -- function being the “default” parsing one and the @Text.Lazy@-based one being -- the “default” rendering one, @writeFile@ and @readFile@ being convenience -- functions. It uses more or less the same naming conventions as module Text.SXML ( -- * Parsing readFile -- ** String ,parseString -- ** Text ,parseText ,parseLazyText -- * Rendering ,writeFile -- ** String ,renderString -- ** Text ,renderText ,renderLazyText ) where import Data.Monoid import Data.Maybe import Data.List import Text.SXML.Internal import Text.ParserCombinators.Poly.State import Prelude hiding (readFile, writeFile) import Control.Applicative hiding ((<|>),many) import qualified System.IO as IO import qualified Data.XML.Types as XT import qualified Data.Text.Lazy as T import qualified Data.Text.Lazy.IO as TIO import qualified Data.Text.Lazy.Builder as TB import qualified Data.Text as TS import qualified Data.Map as M type SXMLParser a = Parser [M.Map TS.Text (TS.Text, Maybe TS.Text)] Token a -- A parser accepts a stream of Token and uses a stack of maps from -- Text [the SXML prefix] to (Text, Maybe Text) [the URI and, if present, the original prefix] -- as state to store the currently active namespaces readFile :: FilePath -> IO (Either String XT.Document) readFile file = do sxml <- IO.readFile file return $ parseString sxml parseText :: TS.Text -> Either String XT.Document parseText = parseString . TS.unpack parseLazyText :: T.Text -> Either String XT.Document parseLazyText = parseString . T.unpack parseString :: String -> Either String XT.Document parseString txt = f3 $ runParser getSXML [M.empty] $ lexer txt where f3 (x,_,_) = x writeFile :: FilePath -> XT.Document -> IO () writeFile file doc = TIO.writeFile file (renderLazyText doc) renderText :: XT.Document -> TS.Text renderText = T.toStrict . renderLazyText renderString :: XT.Document -> String renderString = T.unpack . renderLazyText renderLazyText :: XT.Document -> T.Text renderLazyText = TB.toLazyText . builderDoc where tagend = TB.singleton ')' tagstart = TB.singleton '(' space = TB.singleton ' ' empty = TB.fromString "" builderString txt = let toSchemeStringBuilder txt = fold_mod ((<>) . (<> TB.fromString "\\\\")) splitDoubleQuotes (TS.split (=='\\') txt) splitDoubleQuotes txt = fold_mod ((<>) . (<> TB.fromString "\\\"")) splitCarriageReturns (TS.split (=='"') txt) splitCarriageReturns txt = fold_mod ((<>) . (<> TB.fromString "\\r")) splitLineFeeds (TS.split (=='\r') txt) splitLineFeeds txt = fold_mod ((<>) . (<> TB.fromString "\\n")) splitFormFeeds (TS.split (=='\n') txt) splitFormFeeds txt = fold_mod ((<>) . (<> TB.fromString "\\f")) splitBackspaces (TS.split (=='\f') txt) splitBackspaces txt = fold_mod ((<>) . (<> TB.fromString "\\b")) splitTabulations (TS.split (=='\b') txt) splitTabulations txt = fold_mod ((<>) . (<> TB.fromString "\\t")) splitVerticalTab (TS.split (=='\t') txt) splitVerticalTab txt = fold_mod ((<>) . (<> TB.fromString "\\v")) TB.fromText (TS.split (=='\v') txt) fold_mod f t l = foldr1 f (map t l) stringsep = TB.singleton '"' in stringsep <> toSchemeStringBuilder txt <> stringsep nameToBuilder n = maybe empty ((<> TB.singleton ':') . TB.fromText) (XT.nameNamespace n) <> (TB.fromText . XT.nameLocalName $ n) builderDoc doc = TB.fromString "(*TOP*" <> (builderPrologue . sortBy prologueSorter $ ((XT.prologueBefore . XT.documentPrologue $ doc) ++ (XT.prologueAfter . XT.documentPrologue $ doc) ++ XT.documentEpilogue doc)) <> (builderElem . XT.documentRoot $ doc) <> tagend builderElem el = tagstart <> (nameToBuilder . XT.elementName $ el) <> space <> (builderAttrList . XT.elementAttributes $ el) <> (builderChildren . XT.elementNodes $ el) <> tagend builderContent (XT.ContentText t) = builderString t builderContent (XT.ContentEntity e) = TB.fromString "(*ENTITY* " <> TB.fromText e <> TB.fromString " \"\"" <> tagend builderInstruction (XT.Instruction tgt dat) = TB.fromString "(*PI* " <> TB.fromText tgt <> space <> builderString dat <> tagend builderComment ct = TB.fromString "(*COMMENT* " <> builderString ct <> tagend builderAttrList [] = empty builderAttrList al = let helperAttr [] = tagend helperAttr ((n,cs):ats) = tagstart <> nameToBuilder n <> helperAttrVal cs <> helperAttr ats helperAttrVal = foldr ((<>) . (space <>) . builderContent) tagend in TB.fromString "(@ " <> helperAttr al builderChildren [] = empty builderChildren (c:cs) = let helperChild (XT.NodeComment cmt) = builderComment cmt helperChild (XT.NodeInstruction instr) = builderInstruction instr helperChild (XT.NodeContent cnt) = builderContent cnt helperChild (XT.NodeElement elt) = builderElem elt in space <> helperChild c <> builderChildren cs builderPrologue [] = space builderPrologue (e:es) = let helperPrologue (XT.MiscInstruction instr) = builderInstruction instr helperPrologue (XT.MiscComment cmt) = builderComment cmt in space <> helperPrologue e <> builderPrologue es prologueSorter (XT.MiscInstruction _) (XT.MiscComment _) = GT prologueSorter (XT.MiscComment _) (XT.MiscInstruction _) = LT prologueSorter _ _ = EQ getSXML :: SXMLParser XT.Document getSXML = do lparen root pr <- prologue rEl <- element rparen return $ XT.Document pr rEl [] prologue :: SXMLParser XT.Prologue prologue = do optional annotations pis <- many prologueProcInstr comments <- many prologueComment return $ XT.Prologue (pis ++ comments) Nothing [] prologueComment = do c <- commentHelper return $ XT.MiscComment c comment = do c <- commentHelper return $ XT.NodeComment c commentHelper = parens (cmt >> strTok) prologueProcInstr = do inst <- prologueHelper return $ XT.MiscInstruction inst procInstr = do inst <- prologueHelper return $ XT.NodeInstruction inst prologueHelper = parens $ do pinst target <- nodeTok optional annotations instr <- strTok return $ XT.Instruction target instr annotations = do lparen atl namespaces many annotation rparen return () annotation = do lparen name <- nodeTok target <- nodeTok <|> strTok rparen return () namespaces = do lparen nsl many namespace rparen return () namespace = do lparen nid <- nodeTok uri <- strTok oid <- optional nodeTok rparen stUpdate (\(cns:stns) -> (M.insert nid (uri, oid) cns : stns)) return () helperName :: TS.Text -> SXMLParser XT.Name helperName qname = let (prefv, uqnamev) = TS.breakOnEnd sep qname (pref, uqname) = if TS.null prefv then (TS.empty, uqnamev) else (TS.init prefv, uqnamev) in do nss <- stGet if TS.null pref then return $ XT.Name uqname Nothing Nothing else let lval = lookupS pref nss in case lval of Nothing -> return $ XT.Name uqname (Just pref) Nothing Just (uri, _) -> return $ XT.Name uqname (Just uri) (Just pref) element :: SXMLParser XT.Element element = do lparen eName <- nodeTok stUpdate (M.empty:) attrs <- optional annotAttributes let attrs' = fromMaybe [] attrs children <- many childOfElement rparen ename <- helperName eName stUpdate tail return $ XT.Element ename attrs' children childOfElement = oneOf [elemText, comment, procInstr, entity, elementChild] elementChild = do el <- element return $ XT.NodeElement el attribute = do lparen name <- nodeTok val <- optional strTok optional annotations rparen atname <- helperName name case val of Nothing -> return (atname, [XT.ContentText $ XT.nameLocalName atname]) Just v -> return (atname, [XT.ContentText v]) annotAttributes = do lparen atl attrs <- many attribute optional annotations rparen return attrs entity = do lparen ent pid <- strTok sid <- strTok rparen return $ XT.NodeContent $ XT.ContentEntity pid elemText = do val <- strTok return $ XT.NodeContent $ XT.ContentText val root = satisfy' cdr "DocRoot" where cdr tok = case tok of DocRoot -> True _ -> False cmt = satisfy' cct "Comment" where cct tok = case tok of Comment -> True _ -> False ent = satisfy' cet "Entity" where cet tok = case tok of Entity -> True _ -> False nsl = satisfy' cns "NamespacesList" where cns tok = case tok of NamespacesList -> True _ -> False atl = satisfy' cal "AttributesList" where cal tok = case tok of AttributesList -> True _ -> False pinst = satisfy' cpi "PI" where cpi tok = case tok of PI -> True _ -> False lparen = satisfy' opp "OpenParen" where opp tok = case tok of OpenParen -> True _ -> False rparen = satisfy' clp "CloseParen" where clp tok = case tok of CloseParen -> True _ -> False strTok = (\(Str s) -> s) <$> satisfy' cs "Str" where cs tok = case tok of Str _ -> True _ -> False nodeTok = (\(NodeName s) -> s) <$> satisfy' cn "NodeName" where cn tok = case tok of NodeName _ -> True _ -> False satisfy' p attendu = do x <- next if p x then return x else case x of LexError s -> failBad s _ -> fail ("Error: " ++ attendu ++ " expected, was " ++ show x) parens :: SXMLParser a -> SXMLParser a parens = bracket lparen rparen --lookupS :: TS.Text -> [M.Map TS.Text (TS.Text, Maybe TS.Text)] -> Maybe (TS.Text, Maybe TS.Text) lookupS _ [] = Nothing lookupS idx (m:ms) = case M.lookup idx m of Nothing -> lookupS idx ms Just uri -> Just uri sep = TS.singleton ':'