{-# LANGUAGE OverloadedStrings #-} -------------------------------------------------------------------------------- -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : N3 -- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, -- 2011, 2012 Douglas Burke -- License : GPL V2 -- -- Maintainer : Douglas Burke -- Stability : experimental -- Portability : OverloadedStrings -- -- This Module implements a Notation 3 formatter -- for an 'RDFGraph' value. -- -- REFERENCES: -- -- - \"Notation3 (N3): A readable RDF syntax\", -- W3C Team Submission 14 January 2008, -- -- -- - Tim Berners-Lee's design issues series notes and description, -- -- -- - Notation 3 Primer by Sean Palmer, -- -- -- TODO: -- -- * Initial prefix list to include nested formulae; -- then don't need to update prefix list for these. -- -- * correct output of strings containing unsupported escape -- characters (such as @\\q@) -- -- * more flexible terminator generation for formatted formulae -- (for inline blank nodes.) -- -------------------------------------------------------------------------------- {- TODO: The code used to determine whether a blank node can be written using the "[]" short form could probably take advantage of the GraphPartition module. -} module Swish.RDF.Formatter.N3 ( NodeGenLookupMap , formatGraphAsText , formatGraphAsLazyText , formatGraphAsBuilder , formatGraphIndent , formatGraphDiag ) where import Swish.RDF.Formatter.Internal (NodeGenLookupMap, SubjTree, PredTree , LabelContext(..) , NodeGenState(..), emptyNgs , findMaxBnode , getCollection , processArcs , findPrefix) import Swish.Namespace (ScopedName, getScopeLocal, getScopeURI) import Swish.QName (getLName) import Swish.RDF.Graph ( RDFGraph, RDFLabel(..), NamespaceMap, emptyNamespaceMap, FormulaMap, emptyFormulaMap, setNamespaces, getNamespaces, getFormulae, emptyRDFGraph , quote , quoteT , resRdfFirst, resRdfRest ) import Swish.RDF.Vocabulary ( fromLangTag, rdfType, rdfNil, owlSameAs, logImplies , xsdBoolean, xsdDecimal, xsdInteger, xsdDouble ) import Control.Monad (liftM, when, void) import Control.Monad.State (State, modify, get, put, runState) import Data.Char (isDigit) import Data.List (partition, intersperse) import Data.Monoid (Monoid(..)) import Data.Word (Word32) -- it strikes me that using Lazy Text here is likely to be -- wrong; however I have done no profiling to back this -- assumption up! import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Builder as B -- temporary conversion quoteB :: Bool -> String -> B.Builder quoteB f v = B.fromString $ quote f v ---------------------------------------------------------------------- -- Graph formatting state monad ---------------------------------------------------------------------- -- -- The graph to be formatted is carried as part of the formatting -- state, so that decisions about what needs to be formatted can -- themselves be based upon and reflected in the state (e.g. if a -- decision is made to include a blank node inline, it can be removed -- from the graph state that remains to be formatted). data N3FormatterState = N3FS { indent :: B.Builder , lineBreak :: Bool , graph :: RDFGraph , subjs :: SubjTree RDFLabel , props :: PredTree RDFLabel -- for last subject selected , objs :: [RDFLabel] -- for last property selected , formAvail :: FormulaMap RDFLabel , formQueue :: [(RDFLabel,RDFGraph)] , nodeGenSt :: NodeGenState , bNodesCheck :: [RDFLabel] -- these bNodes are not to be converted to '[..]' format , traceBuf :: [String] } type Formatter a = State N3FormatterState a emptyN3FS :: NodeGenState -> N3FormatterState emptyN3FS ngs = N3FS { indent = "\n" , lineBreak = False , graph = emptyRDFGraph , subjs = [] , props = [] , objs = [] , formAvail = emptyFormulaMap , formQueue = [] , nodeGenSt = ngs , bNodesCheck = [] , traceBuf = [] } getIndent :: Formatter B.Builder getIndent = indent `liftM` get setIndent :: B.Builder -> Formatter () setIndent ind = modify $ \st -> st { indent = ind } getLineBreak :: Formatter Bool getLineBreak = lineBreak `liftM` get setLineBreak :: Bool -> Formatter () setLineBreak brk = modify $ \st -> st { lineBreak = brk } getNgs :: Formatter NodeGenState getNgs = nodeGenSt `liftM` get setNgs :: NodeGenState -> Formatter () setNgs ngs = modify $ \st -> st { nodeGenSt = ngs } getPrefixes :: Formatter NamespaceMap getPrefixes = prefixes `liftM` getNgs getSubjs :: Formatter (SubjTree RDFLabel) getSubjs = subjs `liftM` get setSubjs :: SubjTree RDFLabel -> Formatter () setSubjs sl = modify $ \st -> st { subjs = sl } getProps :: Formatter (PredTree RDFLabel) getProps = props `liftM` get setProps :: PredTree RDFLabel -> Formatter () setProps ps = modify $ \st -> st { props = ps } {- getObjs :: Formatter ([RDFLabel]) getObjs = objs `liftM` get setObjs :: [RDFLabel] -> Formatter () setObjs os = do st <- get put $ st { objs = os } -} getBnodesCheck :: Formatter [RDFLabel] getBnodesCheck = bNodesCheck `liftM` get {- addTrace :: String -> Formatter () addTrace tr = do st <- get put $ st { traceBuf = tr : traceBuf st } -} queueFormula :: RDFLabel -> Formatter () queueFormula fn = do st <- get let fa = formAvail st _newState fv = st { formAvail = M.delete fn fa, formQueue = (fn,fv) : formQueue st } case M.lookup fn fa of Nothing -> return () Just v -> void $ put $ _newState v {- Return the graph associated with the label and delete it from the store, if there is an association, otherwise return Nothing. -} extractFormula :: RDFLabel -> Formatter (Maybe RDFGraph) extractFormula fn = do st <- get let (rval, nform) = M.updateLookupWithKey (\_ _ -> Nothing) fn $ formAvail st put $ st { formAvail = nform } return rval {- moreFormulae :: Formatter Bool moreFormulae = do st <- get return $ not $ null (formQueue st) nextFormula :: Formatter (RDFLabel,RDFGraph) nextFormula = do st <- get let (nf : fq) = formQueue st put $ st { formQueue = fq } return nf -} {- TODO: Should we change the preds/objs entries as well? -} extractList :: LabelContext -> RDFLabel -> Formatter (Maybe [RDFLabel]) extractList lctxt ln = do osubjs <- getSubjs oprops <- getProps let mlst = getCollection osubjs' ln -- we only want to send in rdf:first/rdf:rest here fprops = filter ((`elem` [resRdfFirst, resRdfRest]) . fst) oprops osubjs' = case lctxt of SubjContext -> (ln, fprops) : osubjs _ -> osubjs -- tr = "extractList " ++ show ln ++ " (" ++ show lctxt ++ ")\n -> osubjs= " ++ show osubjs ++ "\n -> opreds= " ++ show oprops ++ "\n -> mlst= " ++ show mlst ++ "\n" -- addTrace tr case mlst of -- sl is guaranteed to be free of (ln,fprops) here if lctxt is SubjContext Just (sl,ls,_) -> do setSubjs sl when (lctxt == SubjContext) $ setProps $ filter ((`notElem` [resRdfFirst, resRdfRest]) . fst) oprops return (Just ls) Nothing -> return Nothing ---------------------------------------------------------------------- -- Define a top-level formatter function: ---------------------------------------------------------------------- -- | Convert the graph to text. formatGraphAsText :: RDFGraph -> T.Text formatGraphAsText = L.toStrict . formatGraphAsLazyText -- | Convert the graph to text. formatGraphAsLazyText :: RDFGraph -> L.Text formatGraphAsLazyText = B.toLazyText . formatGraphAsBuilder -- | Convert the graph to a Builder. formatGraphAsBuilder :: RDFGraph -> B.Builder formatGraphAsBuilder = formatGraphIndent "\n" True -- | Convert the graph to a builder using the given indentation text. formatGraphIndent :: B.Builder -- ^ indentation text -> Bool -- ^ are prefixes to be generated? -> RDFGraph -- ^ graph -> B.Builder formatGraphIndent indnt flag gr = let (res, _, _, _) = formatGraphDiag indnt flag gr in res -- | Format graph and return additional information formatGraphDiag :: B.Builder -- ^ indentation -> Bool -- ^ are prefixes to be generated? -> RDFGraph -> (B.Builder, NodeGenLookupMap, Word32, [String]) formatGraphDiag indnt flag gr = let fg = formatGraph indnt " .\n" False flag gr ngs = emptyNgs { prefixes = M.empty, nodeGen = findMaxBnode gr } (out, fgs) = runState fg (emptyN3FS ngs) ogs = nodeGenSt fgs in (out, nodeMap ogs, nodeGen ogs, traceBuf fgs) ---------------------------------------------------------------------- -- Formatting as a monad-based computation ---------------------------------------------------------------------- formatGraph :: B.Builder -- indentation string -> B.Builder -- text to be placed after final statement -> Bool -- True if a line break is to be inserted at the start -> Bool -- True if prefix strings are to be generated -> RDFGraph -- graph to convert -> Formatter B.Builder formatGraph ind end dobreak dopref gr = do setIndent ind setLineBreak dobreak setGraph gr fp <- if dopref then formatPrefixes (getNamespaces gr) else return mempty more <- moreSubjects if more then do fr <- formatSubjects return $ mconcat [fp, fr, end] else return fp formatPrefixes :: NamespaceMap -> Formatter B.Builder formatPrefixes pmap = do let mls = map pref $ M.assocs pmap ls <- sequence mls return $ mconcat ls where pref (Just p,u) = nextLine $ mconcat ["@prefix ", B.fromText p, ": <", quoteB True (show u), "> ."] pref (_,u) = nextLine $ mconcat ["@prefix : <", quoteB True (show u), "> ."] {- NOTE: I expect there to be confusion below where I need to convert from Text to Builder -} formatSubjects :: Formatter B.Builder formatSubjects = do sb <- nextSubject sbstr <- formatLabel SubjContext sb flagP <- moreProperties if flagP then do prstr <- formatProperties sb sbstr flagS <- moreSubjects if flagS then do fr <- formatSubjects return $ mconcat [prstr, " .", fr] else return prstr else do txt <- nextLine sbstr flagS <- moreSubjects if flagS then do fr <- formatSubjects return $ mconcat [txt, " .", fr] else return txt {- TODO: now we are throwing a Builder around it is awkward to get the length of the text to calculate the indentation So a) change the indentation scheme b) pass around text instead of builder mkIndent :: L.Text -> L.Text mkIndent inVal = L.replicate (L.length inVal) " " -} hackIndent :: B.Builder hackIndent = " " formatProperties :: RDFLabel -> B.Builder -> Formatter B.Builder formatProperties sb sbstr = do pr <- nextProperty sb prstr <- formatLabel PredContext pr obstr <- formatObjects sb pr $ mconcat [sbstr, " ", prstr] more <- moreProperties let sbindent = hackIndent -- mkIndent sbstr if more then do fr <- formatProperties sb sbindent nl <- nextLine $ obstr `mappend` " ;" return $ nl `mappend` fr else nextLine obstr formatObjects :: RDFLabel -> RDFLabel -> B.Builder -> Formatter B.Builder formatObjects sb pr prstr = do ob <- nextObject sb pr obstr <- formatLabel ObjContext ob more <- moreObjects if more then do let prindent = hackIndent -- mkIndent prstr fr <- formatObjects sb pr prindent nl <- nextLine $ mconcat [prstr, " ", obstr, ","] return $ nl `mappend` fr else return $ mconcat [prstr, " ", obstr] insertFormula :: RDFGraph -> Formatter B.Builder insertFormula gr = do ngs0 <- getNgs ind <- getIndent let grm = formatGraph (ind `mappend` " ") "" True False (setNamespaces emptyNamespaceMap gr) (f3str, fgs') = runState grm (emptyN3FS ngs0) setNgs (nodeGenSt fgs') f4str <- nextLine " } " return $ mconcat [" { ",f3str, f4str] {- Add a list inline. We are given the labels that constitute the list, in order, so just need to display them surrounded by (). -} insertList :: [RDFLabel] -> Formatter B.Builder insertList [] = return "()" -- not convinced this can happen insertList xs = do ls <- mapM (formatLabel ObjContext) xs return $ mconcat ("( " : intersperse " " ls) `mappend` " )" {- Add a blank node inline. -} insertBnode :: LabelContext -> RDFLabel -> Formatter B.Builder insertBnode SubjContext lbl = do flag <- moreProperties txt <- if flag then (`mappend` "\n") `liftM` formatProperties lbl "" else return "" -- TODO: handle indentation? return $ mconcat ["[", txt, "]"] insertBnode _ lbl = do ost <- get let osubjs = subjs ost oprops = props ost oobjs = objs ost (bsubj, rsubjs) = partition ((== lbl) . fst) osubjs rprops = case bsubj of [(_,rs)] -> rs _ -> [] -- we essentially want to create a new subgraph -- for this node but it's not as simple as that since -- we could have something like -- :a :b [ :foo [ :bar "xx" ] ] -- so we still need to carry around the whole graph -- nst = ost { subjs = rsubjs, props = rprops, objs = [] } put nst flag <- moreProperties txt <- if flag then (`mappend` "\n") `liftM` formatProperties lbl "" else return "" -- TODO: how do we restore the original set up? -- I can't believe the following is sufficient -- nst' <- get let slist = map fst $ subjs nst' nsubjs = filter (\(l,_) -> l `elem` slist) osubjs put $ nst' { subjs = nsubjs, props = oprops, objs = oobjs } -- TODO: handle indentation? return $ mconcat ["[", txt, "]"] ---------------------------------------------------------------------- -- Formatting helpers ---------------------------------------------------------------------- newState :: RDFGraph -> N3FormatterState -> N3FormatterState newState gr st = let ngs0 = nodeGenSt st pre' = prefixes ngs0 `M.union` getNamespaces gr ngs' = ngs0 { prefixes = pre' } (arcSubjs, bNodes) = processArcs gr in st { graph = gr , subjs = arcSubjs , props = [] , objs = [] , formAvail = getFormulae gr , nodeGenSt = ngs' , bNodesCheck = bNodes } setGraph :: RDFGraph -> Formatter () setGraph = modify . newState hasMore :: (N3FormatterState -> [b]) -> Formatter Bool hasMore lens = (not . null . lens) `liftM` get moreSubjects :: Formatter Bool moreSubjects = hasMore subjs moreProperties :: Formatter Bool moreProperties = hasMore props moreObjects :: Formatter Bool moreObjects = hasMore objs nextSubject :: Formatter RDFLabel nextSubject = do st <- get let sb:sbs = subjs st nst = st { subjs = sbs , props = snd sb , objs = [] } put nst return $ fst sb nextProperty :: RDFLabel -> Formatter RDFLabel nextProperty _ = do st <- get let pr:prs = props st nst = st { props = prs , objs = snd pr } put nst return $ fst pr nextObject :: RDFLabel -> RDFLabel -> Formatter RDFLabel nextObject _ _ = do st <- get let ob:obs = objs st nst = st { objs = obs } put nst return ob nextLine :: B.Builder -> Formatter B.Builder nextLine str = do ind <- getIndent brk <- getLineBreak if brk then return $ ind `mappend` str else do -- After first line, always insert line break setLineBreak True return str -- Format a label -- Most labels are simply displayed as provided, but there are a -- number of wrinkles to take care of here: -- (a) blank nodes automatically allocated on input, with node -- identifiers of the form of a digit string nnn. These are -- not syntactically valid, and are reassigned node identifiers -- of the form _nnn, where nnn is chosen so that is does not -- clash with any other identifier in the graph. -- (b) URI nodes: if possible, replace URI with qname, -- else display as -- (c) formula nodes (containing graphs). -- (d) use the "special-case" formats for integer/float/double -- literals. -- -- [[[TODO:]]] -- (d) generate multi-line literals when appropriate -- -- This is being updated to produce inline formula, lists and -- blank nodes. The code is not efficient. -- specialTable :: [(ScopedName, String)] specialTable = [ (rdfType, "a") , (owlSameAs, "=") , (logImplies, "=>") , (rdfNil, "()") ] formatLabel :: LabelContext -> RDFLabel -> Formatter B.Builder {- formatLabel lab@(Blank (_:_)) = do name <- formatNodeId lab queueFormula lab return name -} {- The "[..]" conversion is done last, after "()" and "{}" checks. -} formatLabel lctxt lab@(Blank (_:_)) = do mlst <- extractList lctxt lab case mlst of Just lst -> insertList lst Nothing -> do mfml <- extractFormula lab case mfml of Just fml -> insertFormula fml Nothing -> do nb1 <- getBnodesCheck if lctxt /= PredContext && lab `notElem` nb1 then insertBnode lctxt lab else formatNodeId lab formatLabel _ lab@(Res sn) = case lookup sn specialTable of Just txt -> return $ quoteB True txt -- TODO: do we need to quote? Nothing -> do pr <- getPrefixes let nsuri = getScopeURI sn local = getLName $ getScopeLocal sn prefix = findPrefix nsuri pr name = case prefix of Just (Just p) -> B.fromText $ quoteT True $ mconcat [p, ":", local] -- TODO: what are quoting rules for QNames _ -> mconcat ["<", quoteB True (show nsuri ++ T.unpack local), ">"] {- name = case prefix of Just p -> quoteB True (p ++ ":" ++ local) -- TODO: what are quoting rules for QNames _ -> mconcat ["<", quoteB True (nsuri++local), ">"] -} queueFormula lab return name -- The canonical notation for xsd:double in XSD, with an upper-case E, -- does not match the syntax used in N3, so we need to convert here. -- Rather than converting back to a Double and then displaying that -- we just convert E to e for now. -- formatLabel _ (TypedLit lit dtype) | dtype == xsdDouble = return $ B.fromText $ T.toLower lit | dtype `elem` [xsdBoolean, xsdDecimal, xsdInteger] = return $ B.fromText lit | otherwise = return $ quoteText lit `mappend` "^^" `mappend` showScopedName dtype formatLabel _ (LangLit lit lcode) = return $ quoteText lit `mappend` "@" `mappend` B.fromText (fromLangTag lcode) formatLabel _ (Lit lit) = return $ quoteText lit formatLabel _ lab = return $ B.fromString $ show lab {- We have to decide whether to use " or """ to quote the string. There is also no need to restrict the string to the ASCII character set; this could be an option but we can also leave Unicode as is (or at least convert to UTF-8). If we use """ to surround the string then we protect the last character if it is a " (assuming it isn't protected). -} quoteText :: T.Text -> B.Builder quoteText txt = let st = T.unpack txt -- TODO: fix qst = quoteB (n==1) st n = if '\n' `elem` st || '"' `elem` st then 3 else 1 qch = B.fromString (replicate n '"') in mconcat [qch, qst, qch] formatNodeId :: RDFLabel -> Formatter B.Builder formatNodeId lab@(Blank (lnc:_)) = if isDigit lnc then mapBlankNode lab else return $ B.fromString $ show lab formatNodeId other = error $ "formatNodeId not expecting a " ++ show other -- to shut up -Wall mapBlankNode :: RDFLabel -> Formatter B.Builder mapBlankNode lab = do ngs <- getNgs let cmap = nodeMap ngs cval = nodeGen ngs nv <- case M.findWithDefault 0 lab cmap of 0 -> do let nval = succ cval nmap = M.insert lab nval cmap setNgs $ ngs { nodeGen = nval, nodeMap = nmap } return nval n -> return n -- TODO: is this what we want? return $ "_:swish" `mappend` B.fromString (show nv) -- TODO: need to be a bit more clever with this than we did in NTriples -- not sure the following counts as clever enough ... -- showScopedName :: ScopedName -> B.Builder {- showScopedName (ScopedName n l) = let uri = nsURI n ++ l in quote uri -} showScopedName = quoteB True . show -------------------------------------------------------------------------------- -- -- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, -- 2011, 2012 Douglas Burke -- All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- --------------------------------------------------------------------------------