{-# LANGUAGE OverloadedStrings #-}
--------------------------------------------------------------------------------
-- See end of this file for licence information.
--------------------------------------------------------------------------------
-- |
-- Module : N3
-- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin,
-- 2011, 2012 Douglas Burke
-- License : GPL V2
--
-- Maintainer : Douglas Burke
-- Stability : experimental
-- Portability : OverloadedStrings
--
-- This Module implements a Notation 3 formatter
-- for an 'RDFGraph' value.
--
-- REFERENCES:
--
-- - \"Notation3 (N3): A readable RDF syntax\",
-- W3C Team Submission 14 January 2008,
--
--
-- - Tim Berners-Lee's design issues series notes and description,
--
--
-- - Notation 3 Primer by Sean Palmer,
--
--
-- TODO:
--
-- * Initial prefix list to include nested formulae;
-- then don't need to update prefix list for these.
--
-- * correct output of strings containing unsupported escape
-- characters (such as @\\q@)
--
-- * more flexible terminator generation for formatted formulae
-- (for inline blank nodes.)
--
--------------------------------------------------------------------------------
{-
TODO:
The code used to determine whether a blank node can be written
using the "[]" short form could probably take advantage of the
GraphPartition module.
-}
module Swish.RDF.Formatter.N3
( NodeGenLookupMap
, formatGraphAsText
, formatGraphAsLazyText
, formatGraphAsBuilder
, formatGraphIndent
, formatGraphDiag
)
where
import Swish.RDF.Formatter.Internal (NodeGenLookupMap, SubjTree, PredTree
, LabelContext(..)
, NodeGenState(..), emptyNgs
, findMaxBnode
, getCollection
, processArcs
, findPrefix)
import Swish.Namespace (ScopedName, getScopeLocal, getScopeURI)
import Swish.QName (getLName)
import Swish.RDF.Graph (
RDFGraph, RDFLabel(..),
NamespaceMap,
emptyNamespaceMap,
FormulaMap, emptyFormulaMap,
setNamespaces, getNamespaces,
getFormulae,
emptyRDFGraph
, quote
, quoteT
, resRdfFirst, resRdfRest
)
import Swish.RDF.Vocabulary (
fromLangTag,
rdfType,
rdfNil,
owlSameAs, logImplies
, xsdBoolean, xsdDecimal, xsdInteger, xsdDouble
)
import Control.Monad (liftM, when, void)
import Control.Monad.State (State, modify, get, put, runState)
import Data.Char (isDigit)
import Data.List (partition, intersperse)
import Data.Monoid (Monoid(..))
import Data.Word (Word32)
-- it strikes me that using Lazy Text here is likely to be
-- wrong; however I have done no profiling to back this
-- assumption up!
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as B
-- temporary conversion
quoteB :: Bool -> String -> B.Builder
quoteB f v = B.fromString $ quote f v
----------------------------------------------------------------------
-- Graph formatting state monad
----------------------------------------------------------------------
--
-- The graph to be formatted is carried as part of the formatting
-- state, so that decisions about what needs to be formatted can
-- themselves be based upon and reflected in the state (e.g. if a
-- decision is made to include a blank node inline, it can be removed
-- from the graph state that remains to be formatted).
data N3FormatterState = N3FS
{ indent :: B.Builder
, lineBreak :: Bool
, graph :: RDFGraph
, subjs :: SubjTree RDFLabel
, props :: PredTree RDFLabel -- for last subject selected
, objs :: [RDFLabel] -- for last property selected
, formAvail :: FormulaMap RDFLabel
, formQueue :: [(RDFLabel,RDFGraph)]
, nodeGenSt :: NodeGenState
, bNodesCheck :: [RDFLabel] -- these bNodes are not to be converted to '[..]' format
, traceBuf :: [String]
}
type Formatter a = State N3FormatterState a
emptyN3FS :: NodeGenState -> N3FormatterState
emptyN3FS ngs = N3FS
{ indent = "\n"
, lineBreak = False
, graph = emptyRDFGraph
, subjs = []
, props = []
, objs = []
, formAvail = emptyFormulaMap
, formQueue = []
, nodeGenSt = ngs
, bNodesCheck = []
, traceBuf = []
}
getIndent :: Formatter B.Builder
getIndent = indent `liftM` get
setIndent :: B.Builder -> Formatter ()
setIndent ind = modify $ \st -> st { indent = ind }
getLineBreak :: Formatter Bool
getLineBreak = lineBreak `liftM` get
setLineBreak :: Bool -> Formatter ()
setLineBreak brk = modify $ \st -> st { lineBreak = brk }
getNgs :: Formatter NodeGenState
getNgs = nodeGenSt `liftM` get
setNgs :: NodeGenState -> Formatter ()
setNgs ngs = modify $ \st -> st { nodeGenSt = ngs }
getPrefixes :: Formatter NamespaceMap
getPrefixes = prefixes `liftM` getNgs
getSubjs :: Formatter (SubjTree RDFLabel)
getSubjs = subjs `liftM` get
setSubjs :: SubjTree RDFLabel -> Formatter ()
setSubjs sl = modify $ \st -> st { subjs = sl }
getProps :: Formatter (PredTree RDFLabel)
getProps = props `liftM` get
setProps :: PredTree RDFLabel -> Formatter ()
setProps ps = modify $ \st -> st { props = ps }
{-
getObjs :: Formatter ([RDFLabel])
getObjs = objs `liftM` get
setObjs :: [RDFLabel] -> Formatter ()
setObjs os = do
st <- get
put $ st { objs = os }
-}
getBnodesCheck :: Formatter [RDFLabel]
getBnodesCheck = bNodesCheck `liftM` get
{-
addTrace :: String -> Formatter ()
addTrace tr = do
st <- get
put $ st { traceBuf = tr : traceBuf st }
-}
queueFormula :: RDFLabel -> Formatter ()
queueFormula fn = do
st <- get
let fa = formAvail st
_newState fv = st {
formAvail = M.delete fn fa,
formQueue = (fn,fv) : formQueue st
}
case M.lookup fn fa of
Nothing -> return ()
Just v -> void $ put $ _newState v
{-
Return the graph associated with the label and delete it
from the store, if there is an association, otherwise
return Nothing.
-}
extractFormula :: RDFLabel -> Formatter (Maybe RDFGraph)
extractFormula fn = do
st <- get
let (rval, nform) = M.updateLookupWithKey (\_ _ -> Nothing) fn $ formAvail st
put $ st { formAvail = nform }
return rval
{-
moreFormulae :: Formatter Bool
moreFormulae = do
st <- get
return $ not $ null (formQueue st)
nextFormula :: Formatter (RDFLabel,RDFGraph)
nextFormula = do
st <- get
let (nf : fq) = formQueue st
put $ st { formQueue = fq }
return nf
-}
{-
TODO:
Should we change the preds/objs entries as well?
-}
extractList :: LabelContext -> RDFLabel -> Formatter (Maybe [RDFLabel])
extractList lctxt ln = do
osubjs <- getSubjs
oprops <- getProps
let mlst = getCollection osubjs' ln
-- we only want to send in rdf:first/rdf:rest here
fprops = filter ((`elem` [resRdfFirst, resRdfRest]) . fst) oprops
osubjs' =
case lctxt of
SubjContext -> (ln, fprops) : osubjs
_ -> osubjs
-- tr = "extractList " ++ show ln ++ " (" ++ show lctxt ++ ")\n -> osubjs= " ++ show osubjs ++ "\n -> opreds= " ++ show oprops ++ "\n -> mlst= " ++ show mlst ++ "\n"
-- addTrace tr
case mlst of
-- sl is guaranteed to be free of (ln,fprops) here if lctxt is SubjContext
Just (sl,ls,_) -> do
setSubjs sl
when (lctxt == SubjContext) $ setProps $ filter ((`notElem` [resRdfFirst, resRdfRest]) . fst) oprops
return (Just ls)
Nothing -> return Nothing
----------------------------------------------------------------------
-- Define a top-level formatter function:
----------------------------------------------------------------------
-- | Convert the graph to text.
formatGraphAsText :: RDFGraph -> T.Text
formatGraphAsText = L.toStrict . formatGraphAsLazyText
-- | Convert the graph to text.
formatGraphAsLazyText :: RDFGraph -> L.Text
formatGraphAsLazyText = B.toLazyText . formatGraphAsBuilder
-- | Convert the graph to a Builder.
formatGraphAsBuilder :: RDFGraph -> B.Builder
formatGraphAsBuilder = formatGraphIndent "\n" True
-- | Convert the graph to a builder using the given indentation text.
formatGraphIndent ::
B.Builder -- ^ indentation text
-> Bool -- ^ are prefixes to be generated?
-> RDFGraph -- ^ graph
-> B.Builder
formatGraphIndent indnt flag gr =
let (res, _, _, _) = formatGraphDiag indnt flag gr
in res
-- | Format graph and return additional information
formatGraphDiag ::
B.Builder -- ^ indentation
-> Bool -- ^ are prefixes to be generated?
-> RDFGraph
-> (B.Builder, NodeGenLookupMap, Word32, [String])
formatGraphDiag indnt flag gr =
let fg = formatGraph indnt " .\n" False flag gr
ngs = emptyNgs {
prefixes = M.empty,
nodeGen = findMaxBnode gr
}
(out, fgs) = runState fg (emptyN3FS ngs)
ogs = nodeGenSt fgs
in (out, nodeMap ogs, nodeGen ogs, traceBuf fgs)
----------------------------------------------------------------------
-- Formatting as a monad-based computation
----------------------------------------------------------------------
formatGraph ::
B.Builder -- indentation string
-> B.Builder -- text to be placed after final statement
-> Bool -- True if a line break is to be inserted at the start
-> Bool -- True if prefix strings are to be generated
-> RDFGraph -- graph to convert
-> Formatter B.Builder
formatGraph ind end dobreak dopref gr = do
setIndent ind
setLineBreak dobreak
setGraph gr
fp <- if dopref
then formatPrefixes (getNamespaces gr)
else return mempty
more <- moreSubjects
if more
then do
fr <- formatSubjects
return $ mconcat [fp, fr, end]
else return fp
formatPrefixes :: NamespaceMap -> Formatter B.Builder
formatPrefixes pmap = do
let mls = map pref $ M.assocs pmap
ls <- sequence mls
return $ mconcat ls
where
pref (Just p,u) = nextLine $ mconcat ["@prefix ", B.fromText p, ": <", quoteB True (show u), "> ."]
pref (_,u) = nextLine $ mconcat ["@prefix : <", quoteB True (show u), "> ."]
{-
NOTE:
I expect there to be confusion below where I need to
convert from Text to Builder
-}
formatSubjects :: Formatter B.Builder
formatSubjects = do
sb <- nextSubject
sbstr <- formatLabel SubjContext sb
flagP <- moreProperties
if flagP
then do
prstr <- formatProperties sb sbstr
flagS <- moreSubjects
if flagS
then do
fr <- formatSubjects
return $ mconcat [prstr, " .", fr]
else return prstr
else do
txt <- nextLine sbstr
flagS <- moreSubjects
if flagS
then do
fr <- formatSubjects
return $ mconcat [txt, " .", fr]
else return txt
{-
TODO: now we are throwing a Builder around it is awkward to
get the length of the text to calculate the indentation
So
a) change the indentation scheme
b) pass around text instead of builder
mkIndent :: L.Text -> L.Text
mkIndent inVal = L.replicate (L.length inVal) " "
-}
hackIndent :: B.Builder
hackIndent = " "
formatProperties :: RDFLabel -> B.Builder -> Formatter B.Builder
formatProperties sb sbstr = do
pr <- nextProperty sb
prstr <- formatLabel PredContext pr
obstr <- formatObjects sb pr $ mconcat [sbstr, " ", prstr]
more <- moreProperties
let sbindent = hackIndent -- mkIndent sbstr
if more
then do
fr <- formatProperties sb sbindent
nl <- nextLine $ obstr `mappend` " ;"
return $ nl `mappend` fr
else nextLine obstr
formatObjects :: RDFLabel -> RDFLabel -> B.Builder -> Formatter B.Builder
formatObjects sb pr prstr = do
ob <- nextObject sb pr
obstr <- formatLabel ObjContext ob
more <- moreObjects
if more
then do
let prindent = hackIndent -- mkIndent prstr
fr <- formatObjects sb pr prindent
nl <- nextLine $ mconcat [prstr, " ", obstr, ","]
return $ nl `mappend` fr
else return $ mconcat [prstr, " ", obstr]
insertFormula :: RDFGraph -> Formatter B.Builder
insertFormula gr = do
ngs0 <- getNgs
ind <- getIndent
let grm = formatGraph (ind `mappend` " ") "" True False
(setNamespaces emptyNamespaceMap gr)
(f3str, fgs') = runState grm (emptyN3FS ngs0)
setNgs (nodeGenSt fgs')
f4str <- nextLine " } "
return $ mconcat [" { ",f3str, f4str]
{-
Add a list inline. We are given the labels that constitute
the list, in order, so just need to display them surrounded
by ().
-}
insertList :: [RDFLabel] -> Formatter B.Builder
insertList [] = return "()" -- not convinced this can happen
insertList xs = do
ls <- mapM (formatLabel ObjContext) xs
return $ mconcat ("( " : intersperse " " ls) `mappend` " )"
{-
Add a blank node inline.
-}
insertBnode :: LabelContext -> RDFLabel -> Formatter B.Builder
insertBnode SubjContext lbl = do
flag <- moreProperties
txt <- if flag
then (`mappend` "\n") `liftM` formatProperties lbl ""
else return ""
-- TODO: handle indentation?
return $ mconcat ["[", txt, "]"]
insertBnode _ lbl = do
ost <- get
let osubjs = subjs ost
oprops = props ost
oobjs = objs ost
(bsubj, rsubjs) = partition ((== lbl) . fst) osubjs
rprops = case bsubj of
[(_,rs)] -> rs
_ -> []
-- we essentially want to create a new subgraph
-- for this node but it's not as simple as that since
-- we could have something like
-- :a :b [ :foo [ :bar "xx" ] ]
-- so we still need to carry around the whole graph
--
nst = ost { subjs = rsubjs,
props = rprops,
objs = []
}
put nst
flag <- moreProperties
txt <- if flag
then (`mappend` "\n") `liftM` formatProperties lbl ""
else return ""
-- TODO: how do we restore the original set up?
-- I can't believe the following is sufficient
--
nst' <- get
let slist = map fst $ subjs nst'
nsubjs = filter (\(l,_) -> l `elem` slist) osubjs
put $ nst' { subjs = nsubjs,
props = oprops,
objs = oobjs
}
-- TODO: handle indentation?
return $ mconcat ["[", txt, "]"]
----------------------------------------------------------------------
-- Formatting helpers
----------------------------------------------------------------------
newState :: RDFGraph -> N3FormatterState -> N3FormatterState
newState gr st =
let ngs0 = nodeGenSt st
pre' = prefixes ngs0 `M.union` getNamespaces gr
ngs' = ngs0 { prefixes = pre' }
(arcSubjs, bNodes) = processArcs gr
in st { graph = gr
, subjs = arcSubjs
, props = []
, objs = []
, formAvail = getFormulae gr
, nodeGenSt = ngs'
, bNodesCheck = bNodes
}
setGraph :: RDFGraph -> Formatter ()
setGraph = modify . newState
hasMore :: (N3FormatterState -> [b]) -> Formatter Bool
hasMore lens = (not . null . lens) `liftM` get
moreSubjects :: Formatter Bool
moreSubjects = hasMore subjs
moreProperties :: Formatter Bool
moreProperties = hasMore props
moreObjects :: Formatter Bool
moreObjects = hasMore objs
nextSubject :: Formatter RDFLabel
nextSubject = do
st <- get
let sb:sbs = subjs st
nst = st { subjs = sbs
, props = snd sb
, objs = []
}
put nst
return $ fst sb
nextProperty :: RDFLabel -> Formatter RDFLabel
nextProperty _ = do
st <- get
let pr:prs = props st
nst = st { props = prs
, objs = snd pr
}
put nst
return $ fst pr
nextObject :: RDFLabel -> RDFLabel -> Formatter RDFLabel
nextObject _ _ = do
st <- get
let ob:obs = objs st
nst = st { objs = obs }
put nst
return ob
nextLine :: B.Builder -> Formatter B.Builder
nextLine str = do
ind <- getIndent
brk <- getLineBreak
if brk
then return $ ind `mappend` str
else do
-- After first line, always insert line break
setLineBreak True
return str
-- Format a label
-- Most labels are simply displayed as provided, but there are a
-- number of wrinkles to take care of here:
-- (a) blank nodes automatically allocated on input, with node
-- identifiers of the form of a digit string nnn. These are
-- not syntactically valid, and are reassigned node identifiers
-- of the form _nnn, where nnn is chosen so that is does not
-- clash with any other identifier in the graph.
-- (b) URI nodes: if possible, replace URI with qname,
-- else display as
-- (c) formula nodes (containing graphs).
-- (d) use the "special-case" formats for integer/float/double
-- literals.
--
-- [[[TODO:]]]
-- (d) generate multi-line literals when appropriate
--
-- This is being updated to produce inline formula, lists and
-- blank nodes. The code is not efficient.
--
specialTable :: [(ScopedName, String)]
specialTable =
[ (rdfType, "a")
, (owlSameAs, "=")
, (logImplies, "=>")
, (rdfNil, "()")
]
formatLabel :: LabelContext -> RDFLabel -> Formatter B.Builder
{-
formatLabel lab@(Blank (_:_)) = do
name <- formatNodeId lab
queueFormula lab
return name
-}
{-
The "[..]" conversion is done last, after "()" and "{}" checks.
-}
formatLabel lctxt lab@(Blank (_:_)) = do
mlst <- extractList lctxt lab
case mlst of
Just lst -> insertList lst
Nothing -> do
mfml <- extractFormula lab
case mfml of
Just fml -> insertFormula fml
Nothing -> do
nb1 <- getBnodesCheck
if lctxt /= PredContext && lab `notElem` nb1
then insertBnode lctxt lab
else formatNodeId lab
formatLabel _ lab@(Res sn) =
case lookup sn specialTable of
Just txt -> return $ quoteB True txt -- TODO: do we need to quote?
Nothing -> do
pr <- getPrefixes
let nsuri = getScopeURI sn
local = getLName $ getScopeLocal sn
prefix = findPrefix nsuri pr
name = case prefix of
Just (Just p) -> B.fromText $ quoteT True $ mconcat [p, ":", local] -- TODO: what are quoting rules for QNames
_ -> mconcat ["<", quoteB True (show nsuri ++ T.unpack local), ">"]
{-
name = case prefix of
Just p -> quoteB True (p ++ ":" ++ local) -- TODO: what are quoting rules for QNames
_ -> mconcat ["<", quoteB True (nsuri++local), ">"]
-}
queueFormula lab
return name
-- The canonical notation for xsd:double in XSD, with an upper-case E,
-- does not match the syntax used in N3, so we need to convert here.
-- Rather than converting back to a Double and then displaying that
-- we just convert E to e for now.
--
formatLabel _ (TypedLit lit dtype)
| dtype == xsdDouble = return $ B.fromText $ T.toLower lit
| dtype `elem` [xsdBoolean, xsdDecimal, xsdInteger] = return $ B.fromText lit
| otherwise = return $ quoteText lit `mappend` "^^" `mappend` showScopedName dtype
formatLabel _ (LangLit lit lcode) =
return $ quoteText lit `mappend` "@" `mappend` B.fromText (fromLangTag lcode)
formatLabel _ (Lit lit) = return $ quoteText lit
formatLabel _ lab = return $ B.fromString $ show lab
{-
We have to decide whether to use " or """ to quote
the string.
There is also no need to restrict the string to the
ASCII character set; this could be an option but we
can also leave Unicode as is (or at least convert to UTF-8).
If we use """ to surround the string then we protect the
last character if it is a " (assuming it isn't protected).
-}
quoteText :: T.Text -> B.Builder
quoteText txt =
let st = T.unpack txt -- TODO: fix
qst = quoteB (n==1) st
n = if '\n' `elem` st || '"' `elem` st then 3 else 1
qch = B.fromString (replicate n '"')
in mconcat [qch, qst, qch]
formatNodeId :: RDFLabel -> Formatter B.Builder
formatNodeId lab@(Blank (lnc:_)) =
if isDigit lnc then mapBlankNode lab else return $ B.fromString $ show lab
formatNodeId other = error $ "formatNodeId not expecting a " ++ show other -- to shut up -Wall
mapBlankNode :: RDFLabel -> Formatter B.Builder
mapBlankNode lab = do
ngs <- getNgs
let cmap = nodeMap ngs
cval = nodeGen ngs
nv <- case M.findWithDefault 0 lab cmap of
0 -> do
let nval = succ cval
nmap = M.insert lab nval cmap
setNgs $ ngs { nodeGen = nval, nodeMap = nmap }
return nval
n -> return n
-- TODO: is this what we want?
return $ "_:swish" `mappend` B.fromString (show nv)
-- TODO: need to be a bit more clever with this than we did in NTriples
-- not sure the following counts as clever enough ...
--
showScopedName :: ScopedName -> B.Builder
{-
showScopedName (ScopedName n l) =
let uri = nsURI n ++ l
in quote uri
-}
showScopedName = quoteB True . show
--------------------------------------------------------------------------------
--
-- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin,
-- 2011, 2012 Douglas Burke
-- All rights reserved.
--
-- This file is part of Swish.
--
-- Swish is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- Swish is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with Swish; if not, write to:
-- The Free Software Foundation, Inc.,
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--
--------------------------------------------------------------------------------