--------------------------------------------------------------------------------
-- See end of this file for licence information.
--------------------------------------------------------------------------------
-- |
-- Module : N3Formatter
-- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke
-- License : GPL V2
--
-- Maintainer : Douglas Burke
-- Stability : experimental
-- Portability : H98
--
-- This Module implements a Notation 3 formatter (see [1], [2] and [3]),
-- for an RDFGraph value.
--
-- REFERENCES:
--
-- (1)
-- Notation3 (N3): A readable RDF syntax,
-- W3C Team Submission 14 January 2008
--
-- (2)
-- Tim Berners-Lee's design issues series notes and description
--
-- (2)
-- Notation 3 Primer by Sean Palmer
--
-- TODO:
--
-- * Initial prefix list to include nested formulae;
-- then don't need to update prefix list for these.
--
-- * correct output of strings containing unsupported escape
-- characters (such as @\\q@)
--
-- * more flexible terminator generation for formatted formulae
-- (for inline blank nodes.)
--
--------------------------------------------------------------------------------
{-
TODO:
The code used to determine whether a blank node can be written
using the "[]" short form could probably take advantage of the
GraphPartition module.
-}
module Swish.RDF.N3Formatter
( NodeGenLookupMap
, formatGraphAsStringNl
, formatGraphAsString
, formatGraphAsShowS
, formatGraphIndent
, formatGraphDiag
)
where
import Swish.RDF.RDFGraph (
RDFGraph, RDFLabel(..),
NamespaceMap, RevNamespaceMap,
emptyNamespaceMap,
FormulaMap, emptyFormulaMap,
getArcs, labels,
setNamespaces, getNamespaces,
getFormulae,
emptyRDFGraph,
res_rdf_first, res_rdf_rest, res_rdf_nil
)
import Swish.RDF.Vocabulary (
isLang, langTag,
rdf_type,
rdf_nil,
owl_sameAs, log_implies
, xsd_boolean, xsd_decimal, xsd_integer, xsd_double
)
import Swish.RDF.GraphClass
( Arc(..) )
import Swish.Utils.LookupMap
( LookupEntryClass(..)
, LookupMap, emptyLookupMap, reverseLookupMap
, listLookupMap
, mapFind, mapFindMaybe, mapAdd, mapDelete, mapMerge
)
import Swish.Utils.Namespace
( ScopedName(..), getScopeURI )
import Data.Char (ord, isDigit, toLower)
import Data.List (foldl', delete, groupBy, partition, sort)
import Text.Printf (printf)
import Control.Monad (liftM, when)
import Control.Monad.State (State, get, put, runState)
----------------------------------------------------------------------
-- Ouptut string concatenation
----------------------------------------------------------------------
--
-- Function puts uses the shows mechanism to avoid the cost of
-- quadratic string concatenation times. (Use function composition to
-- concatenate strings thus reprersented.)
puts :: String -> ShowS
puts = showString
----------------------------------------------------------------------
-- Graph formatting state monad
----------------------------------------------------------------------
--
-- The graph to be formatted is carried as part of the formatting
-- state, so that decisions about what needs to be formatted can
-- themselves be based upon and reflected in the state (e.g. if a
-- decision is made to include a blank node inline, it can be removed
-- from the graph state that remains to be formatted).
type SubjTree lb = [(lb,PredTree lb)]
type PredTree lb = [(lb,[lb])]
data N3FormatterState = N3FS
{ indent :: String
, lineBreak :: Bool
, graph :: RDFGraph
, subjs :: SubjTree RDFLabel
, props :: PredTree RDFLabel -- for last subject selected
, objs :: [RDFLabel] -- for last property selected
, formAvail :: FormulaMap RDFLabel
, formQueue :: [(RDFLabel,RDFGraph)]
, nodeGenSt :: NodeGenState
, bNodesCheck :: [RDFLabel] -- these bNodes are not to be converted to '[..]' format
, traceBuf :: [String]
}
type Formatter a = State N3FormatterState a
emptyN3FS :: NodeGenState -> N3FormatterState
emptyN3FS ngs = N3FS
{ indent = "\n"
, lineBreak = False
, graph = emptyRDFGraph
, subjs = []
, props = []
, objs = []
, formAvail = emptyFormulaMap
, formQueue = []
, nodeGenSt = ngs
, bNodesCheck = []
, traceBuf = []
}
-- | Node name generation state information that carries through
-- and is updated by nested formulae
type NodeGenLookupMap = LookupMap (RDFLabel,Int)
data NodeGenState = Ngs
{ prefixes :: NamespaceMap
, nodeMap :: NodeGenLookupMap
, nodeGen :: Int
}
emptyNgs :: NodeGenState
emptyNgs = Ngs
{ prefixes = emptyLookupMap
, nodeMap = emptyLookupMap
, nodeGen = 0
}
-- simple context for label creation
-- (may be a temporary solution to the problem
-- of label creation)
--
data LabelContext = SubjContext | PredContext | ObjContext
deriving (Eq, Show)
getIndent :: Formatter String
getIndent = indent `liftM` get
setIndent :: String -> Formatter ()
setIndent ind = do
st <- get
put $ st { indent = ind }
getLineBreak :: Formatter Bool
getLineBreak = lineBreak `liftM` get
setLineBreak :: Bool -> Formatter ()
setLineBreak brk = do
st <- get
put $ st {lineBreak = brk}
getNgs :: Formatter NodeGenState
getNgs = nodeGenSt `liftM` get
setNgs :: NodeGenState -> Formatter ()
setNgs ngs = do
st <- get
put $ st { nodeGenSt = ngs }
getPrefixes :: Formatter NamespaceMap
getPrefixes = prefixes `liftM` getNgs
getSubjs :: Formatter (SubjTree RDFLabel)
getSubjs = subjs `liftM` get
setSubjs :: SubjTree RDFLabel -> Formatter ()
setSubjs sl = do
st <- get
put $ st { subjs = sl }
getProps :: Formatter (PredTree RDFLabel)
getProps = props `liftM` get
setProps :: PredTree RDFLabel -> Formatter ()
setProps ps = do
st <- get
put $ st { props = ps }
{-
getObjs :: Formatter ([RDFLabel])
getObjs = objs `liftM` get
setObjs :: [RDFLabel] -> Formatter ()
setObjs os = do
st <- get
put $ st { objs = os }
-}
getBnodesCheck :: Formatter [RDFLabel]
getBnodesCheck = bNodesCheck `liftM` get
{-
addTrace :: String -> Formatter ()
addTrace tr = do
st <- get
put $ st { traceBuf = tr : traceBuf st }
-}
queueFormula :: RDFLabel -> Formatter ()
queueFormula fn = do
st <- get
let fa = formAvail st
newState fv = st {
formAvail = mapDelete fa fn,
formQueue = (fn,fv) : formQueue st
}
case mapFindMaybe fn fa of
Nothing -> return ()
Just v -> put (newState v) >> return ()
{-
Return the graph associated with the label and delete it
from the store, if there is an association, otherwise
return Nothing.
-}
extractFormula :: RDFLabel -> Formatter (Maybe RDFGraph)
extractFormula fn = do
st <- get
let fa = formAvail st
newState = st { formAvail=mapDelete fa fn }
case mapFindMaybe fn fa of
Nothing -> return Nothing
Just fv -> put newState >> return (Just fv)
{-
moreFormulae :: Formatter Bool
moreFormulae = do
st <- get
return $ not $ null (formQueue st)
nextFormula :: Formatter (RDFLabel,RDFGraph)
nextFormula = do
st <- get
let (nf : fq) = formQueue st
put $ st { formQueue = fq }
return nf
-}
-- list has a length of 1
len1 :: [a] -> Bool
len1 (_:[]) = True
len1 _ = False
{-|
Given a set of statements and a label, return the details of the
RDF collection referred to by label, or Nothing.
For label to be considered as representing a collection we require the
following conditions to hold (this is only to support the
serialisation using the '(..)' syntax and does not make any statement
about semantics of the statements with regard to RDF Collections):
- there must be one rdf_first and one rdf_rest statement
- there must be no other predicates for the label
-}
getCollection ::
SubjTree RDFLabel -- ^ statements organized by subject
-> RDFLabel -- ^ does this label represent a list?
-> Maybe (SubjTree RDFLabel, [RDFLabel], [RDFLabel])
-- ^ the statements with the elements removed; the
-- content elements of the collection (the objects of the rdf:first
-- predicate) and the nodes that represent the spine of the
-- collection (in reverse order, unlike the actual contents which are in
-- order).
getCollection subjList lbl = go subjList lbl ([],[])
where
go sl l (cs,ss) | l == res_rdf_nil = Just (sl, reverse cs, ss)
| otherwise = do
(pList1, sl') <- removeItem sl l
(pFirst, pList2) <- removeItem pList1 res_rdf_first
(pNext, pList3) <- removeItem pList2 res_rdf_rest
-- QUS: could I include these checks implicitly in the pattern matches above?
-- ie instrad of (pFirst, pos1) <- ..
-- have ([content], pos1) <- ...
-- ?
if and [len1 pFirst, len1 pNext, null pList3]
then go sl' (head pNext) (head pFirst : cs, l : ss)
else Nothing
{-
TODO:
Should we change the preds/objs entries as well?
-}
extractList :: LabelContext -> RDFLabel -> Formatter (Maybe [RDFLabel])
extractList lctxt ln = do
osubjs <- getSubjs
oprops <- getProps
let mlst = getCollection osubjs' ln
-- we only want to send in rdf:first/rdf:rest here
fprops = filter ((`elem` [res_rdf_first, res_rdf_rest]) . fst) oprops
osubjs' =
case lctxt of
SubjContext -> (ln, fprops) : osubjs
_ -> osubjs
-- tr = "extractList " ++ show ln ++ " (" ++ show lctxt ++ ")\n -> osubjs= " ++ show osubjs ++ "\n -> opreds= " ++ show oprops ++ "\n -> mlst= " ++ show mlst ++ "\n"
-- addTrace tr
case mlst of
-- sl is guaranteed to be free of (ln,fprops) here if lctxt is SubjContext
Just (sl,ls,_) -> do
setSubjs sl
when (lctxt == SubjContext) $ setProps $ filter ((`notElem` [res_rdf_first, res_rdf_rest]) . fst) oprops
return (Just ls)
Nothing -> return Nothing
{-
-- for safety I am assuming no ordering of the subject tree
-- but really should be using one of the container types
--
deleteItems :: (Eq a) => [(a,b)] -> [a] -> [(a,b)]
deleteItems [] _ = []
deleteItems os [] = os
deleteItems os (x:xs) =
deleteItems (deleteItem os x) xs
deleteItem :: (Eq a) => [(a,b)] -> a -> [(a,b)]
deleteItem os x =
case removeItem os x of
Just (_, rest) -> rest
Nothing -> os
-}
{-|
Removes the first occurrence of the item from the
association list, returning it's contents and the rest
of the list, if it exists.
-}
removeItem :: (Eq a) => [(a,b)] -> a -> Maybe (b, [(a,b)])
removeItem os x =
let (as, bs) = break (\a -> fst a == x) os
in case bs of
((_,b):bbs) -> Just (b, as ++ bbs)
[] -> Nothing
----------------------------------------------------------------------
-- Define a top-level formatter function:
-- accepts a graph and returns a string
----------------------------------------------------------------------
formatGraphAsStringNl :: RDFGraph -> String
formatGraphAsStringNl gr = formatGraphAsShowS gr "\n"
formatGraphAsString :: RDFGraph -> String
formatGraphAsString gr = formatGraphAsShowS gr ""
formatGraphAsShowS :: RDFGraph -> ShowS
formatGraphAsShowS = formatGraphIndent "\n" True
{- old code:
where
(out,_,_,_) = formatGraphDiag gr
-}
formatGraphIndent :: String -> Bool -> RDFGraph -> ShowS
{- working version
formatGraphIndent ind dopref gr = out
where
(_,out) = formatGraphDiag1 ind dopref emptyLookupMap gr
-}
formatGraphIndent ind dopref = fst . formatGraphDiag1 ind dopref emptyLookupMap
{-
formatGraphIndent ind dopref gr = out
where
(out',fgs) = formatGraphDiag1 ind dopref emptyLookupMap gr
tbuff = traceBuf fgs
-- tr = if null tbuff then "" else "\nDEBUG:\n" ++ concat (reverse tbuff)
tr = ""
out = out' . (++ tr)
-}
-- | Format graph and return additional information
formatGraphDiag ::
RDFGraph -> (ShowS,NodeGenLookupMap,Int,[String])
formatGraphDiag gr = (out,nodeMap ngs,nodeGen ngs,traceBuf fgs)
where
(out,fgs) = formatGraphDiag1 "\n" True emptyLookupMap gr
ngs = nodeGenSt fgs
-- Internal function starts with supplied prefix table and indent string,
-- and returns final state and formatted string.
-- This is provided for diagnostic access to the final state
formatGraphDiag1 :: String -> Bool -> NamespaceMap -> RDFGraph -> (ShowS,N3FormatterState)
formatGraphDiag1 ind dopref pref gr =
let fg = formatGraph ind " ." False dopref gr
ngs = emptyNgs {
prefixes=pref,
nodeGen=findMaxBnode gr
}
in runState fg (emptyN3FS ngs)
----------------------------------------------------------------------
-- Formatting as a monad-based computation
----------------------------------------------------------------------
-- ind is indentation string
-- end is ending string to be placed after final statement
-- dobreak is True if a line break is to be inserted at the start
-- dopref is True if prefix strings are to be generated
--
formatGraph :: String -> String -> Bool -> Bool -> RDFGraph -> Formatter ShowS
formatGraph ind end dobreak dopref gr = do
setIndent ind
setLineBreak dobreak
setGraph gr
fp <- if dopref
then formatPrefixes (getNamespaces gr)
else return $ puts ""
more <- moreSubjects
if more
then do
fr <- formatSubjects
return $ fp . fr . puts end
else return fp
formatPrefixes :: NamespaceMap -> Formatter ShowS
formatPrefixes pmap = do
let mls = map (pref . keyVal) (listLookupMap pmap)
ls <- sequence mls
return $ puts $ concat ls
where
pref (p,u) = nextLine $ "@prefix "++p++": <"++ quote True u ++"> ."
formatSubjects :: Formatter ShowS
formatSubjects = do
sb <- nextSubject
sbstr <- formatLabel SubjContext sb
flagP <- moreProperties
if flagP
then do
prstr <- formatProperties sb sbstr
-- fmstr <- formatFormulae ""
flagS <- moreSubjects
if flagS
then do
fr <- formatSubjects
return $ puts (prstr ++ " .") . fr
-- return $ puts (prstr ++ fmstr ++ " .") . fr
else return $ puts prstr
-- else return $ puts $ prstr ++ fmstr
else do
txt <- nextLine sbstr
flagS <- moreSubjects
if flagS
then do
fr <- formatSubjects
return $ puts (txt ++ " .") . fr
else return $ puts txt
formatProperties :: RDFLabel -> String -> Formatter String
formatProperties sb sbstr = do
pr <- nextProperty sb
prstr <- formatLabel PredContext pr
obstr <- formatObjects sb pr (sbstr++" "++prstr)
more <- moreProperties
let sbindent = replicate (length sbstr) ' '
if more
then do
fr <- formatProperties sb sbindent
nl <- nextLine $ obstr ++ " ;"
return $ nl ++ fr
else nextLine obstr
formatObjects :: RDFLabel -> RDFLabel -> String -> Formatter String
formatObjects sb pr prstr = do
ob <- nextObject sb pr
obstr <- formatLabel ObjContext ob
more <- moreObjects
if more
then do
let prindent = replicate (length prstr) ' '
fr <- formatObjects sb pr prindent
nl <- nextLine $ prstr ++ " " ++ obstr ++ ","
return $ nl ++ fr
else return $ prstr ++ " " ++ obstr
{-
formatFormulae :: String -> Formatter String
formatFormulae fp = do
more <- moreFormulae
if more
then do
fnlgr <- nextFormula
fnstr <- formatFormula fnlgr
formatFormulae $ fp ++ " ." ++ fnstr
else return fp
TODO: need to remove the use of :-. It's not clear to me whether
we are guaranteed that fn is only used once in the graph - ie
if it is safe to inline this formula at the label location.
formatFormula :: (RDFLabel,RDFGraph) -> Formatter String
formatFormula (fn,gr) = do
fnstr <- formatLabel SubjContext fn
f1str <- nextLine $ fnstr ++ " :-"
f2str <- nextLine " {"
ngs0 <- getNgs
ind <- getIndent
let grm = formatGraph (ind++" ") "" True False
(setNamespaces emptyNamespaceMap gr)
(f3str, fgs') = runState grm (emptyN3FS ngs0)
setNgs (nodeGenSt fgs')
f4str <- nextLine " }"
return $ f1str ++ f2str ++ f3str f4str
-}
--- DJB's version of formatFormula when it can be inserted inline
insertFormula :: RDFGraph -> Formatter String
insertFormula gr = do
ngs0 <- getNgs
ind <- getIndent
let grm = formatGraph (ind++" ") "" True False
(setNamespaces emptyNamespaceMap gr)
(f3str, fgs') = runState grm (emptyN3FS ngs0)
setNgs (nodeGenSt fgs')
f4str <- nextLine " } "
return $ " { " ++ f3str f4str
{-
Add a list inline. We are given the labels that constitute
the list, in order, so just need to display them surrounded
by ().
-}
insertList :: [RDFLabel] -> Formatter String
insertList [] = return "()" -- not convinced this can happen
insertList xs = do
ls <- mapM (formatLabel ObjContext) xs
return $ "( " ++ unwords ls ++ " )"
{-
Add a blank node inline.
-}
insertBnode :: LabelContext -> RDFLabel -> Formatter String
insertBnode SubjContext lbl = do
flag <- moreProperties
txt <- if flag
then liftM (++"\n") $ formatProperties lbl ""
else return ""
-- TODO: handle indentation?
return $ "[" ++ txt ++ "]"
insertBnode _ lbl = do
ost <- get
let osubjs = subjs ost
oprops = props ost
oobjs = objs ost
(bsubj, rsubjs) = partition ((== lbl) . fst) osubjs
rprops = case bsubj of
[(_,rs)] -> rs
_ -> []
-- we essentially want to create a new subgraph
-- for this node but it's not as simple as that since
-- we could have something like
-- :a :b [ :foo [ :bar "xx" ] ]
-- so we still need to carry around the whole graph
--
nst = ost { subjs = rsubjs,
props = rprops,
objs = []
}
put nst
flag <- moreProperties
txt <- if flag
then liftM (++"\n") $ formatProperties lbl ""
else return ""
-- TODO: how do we restore the original set up?
-- I can't believe the following is sufficient
--
nst' <- get
let slist = map fst $ subjs nst'
nsubjs = filter (\(l,_) -> l `elem` slist) osubjs
put $ nst' { subjs = nsubjs,
props = oprops,
objs = oobjs
}
-- TODO: handle indentation?
return $ "[" ++ txt ++ "]"
----------------------------------------------------------------------
-- Formatting helpers
----------------------------------------------------------------------
setGraph :: RDFGraph -> Formatter ()
setGraph gr = do
st <- get
let ngs0 = nodeGenSt st
pre' = mapMerge (prefixes ngs0) (getNamespaces gr)
ngs' = ngs0 { prefixes = pre' }
arcs = sortArcs $ getArcs gr
nst = st { graph = gr
, subjs = arcTree arcs
, props = []
, objs = []
, formAvail = getFormulae gr
, nodeGenSt = ngs'
, bNodesCheck = countBnodes arcs
}
put nst
moreSubjects :: Formatter Bool
moreSubjects = (not . null . subjs) `liftM` get
nextSubject :: Formatter RDFLabel
nextSubject = do
st <- get
let sb:sbs = subjs st
nst = st { subjs = sbs
, props = snd sb
, objs = []
}
put nst
return $ fst sb
moreProperties :: Formatter Bool
moreProperties = (not . null . props) `liftM` get
nextProperty :: RDFLabel -> Formatter RDFLabel
nextProperty _ = do
st <- get
let pr:prs = props st
nst = st { props = prs
, objs = snd pr
}
put nst
return $ fst pr
moreObjects :: Formatter Bool
moreObjects = (not . null . objs) `liftM` get
nextObject :: RDFLabel -> RDFLabel -> Formatter RDFLabel
nextObject _ _ = do
st <- get
let ob:obs = objs st
nst = st { objs = obs }
put nst
return ob
nextLine :: String -> Formatter String
nextLine str = do
ind <- getIndent
brk <- getLineBreak
if brk
then return $ ind++str
else do
-- After first line, always insert line break
setLineBreak True
return str
-- Format a label
-- Most labels are simply displayed as provided, but there are a
-- number of wrinkles to take care of here:
-- (a) blank nodes automatically allocated on input, with node
-- identifiers of the form of a digit string nnn. These are
-- not syntactically valid, and are reassigned node identifiers
-- of the form _nnn, where nnn is chosen so that is does not
-- clash with any other identifier in the graph.
-- (b) URI nodes: if possible, replace URI with qname,
-- else display as
-- (c) formula nodes (containing graphs).
-- (d) use the "special-case" formats for integer/float/double
-- literals.
--
-- [[[TODO:]]]
-- (d) generate multi-line literals when appropriate
--
-- This is being updated to produce inline formula, lists and
-- blank nodes. The code is not efficient.
--
specialTable :: [(ScopedName, String)]
specialTable =
[ (rdf_type, "a")
, (owl_sameAs, "=")
, (log_implies, "=>")
, (rdf_nil, "()")
]
formatLabel :: LabelContext -> RDFLabel -> Formatter String
{-
formatLabel lab@(Blank (_:_)) = do
name <- formatNodeId lab
queueFormula lab
return name
-}
{-
The "[..]" conversion is done last, after "()" and "{}" checks.
-}
formatLabel lctxt lab@(Blank (_:_)) = do
mlst <- extractList lctxt lab
case mlst of
Just lst -> insertList lst
Nothing -> do
mfml <- extractFormula lab
case mfml of
Just fml -> insertFormula fml
Nothing -> do
nb1 <- getBnodesCheck
if lctxt /= PredContext && lab `notElem` nb1
then insertBnode lctxt lab
else formatNodeId lab
formatLabel _ lab@(Res sn) =
case lookup sn specialTable of
Just txt -> return $ quote True txt -- TODO: do we need to quote?
Nothing -> do
pr <- getPrefixes
let nsuri = getScopeURI sn
local = snLocal sn
premap = reverseLookupMap pr :: RevNamespaceMap
prefix = mapFindMaybe nsuri premap
name = case prefix of
Just p -> quote True (p ++ ":" ++ local) -- TODO: what are quoting rules for QNames
_ -> "<"++ quote True (nsuri++local) ++">"
queueFormula lab
return name
-- The canonical notation for xsd:double in XSD, with an upper-case E,
-- does not match the syntax used in N3, so we need to convert here.
-- Rather than converting back to a Double and then displaying that
-- we just convert E to e for now.
--
formatLabel _ (Lit lit (Just dtype))
| dtype == xsd_double = return $ map toLower lit
| dtype `elem` [xsd_boolean, xsd_decimal, xsd_integer] = return lit
| otherwise = return $ quoteStr lit ++ formatAnnotation dtype
formatLabel _ (Lit lit Nothing) = return $ quoteStr lit
formatLabel _ lab = return $ show lab
-- the annotation for a literal (ie type or language)
formatAnnotation :: ScopedName -> String
formatAnnotation a | isLang a = '@' : langTag a
| otherwise = '^':'^': showScopedName a
{-
Swish.Utils.MiscHelpers contains a quote routine
which we expand upon here to match the N3 syntax.
We have to decide whether to use " or """ to quote
the string.
There is also no need to restrict the string to the
ASCII character set; this could be an option but we
can also leave Unicode as is (or at least convert to UTF-8).
If we use """ to surround the string then we protect the
last character if it is a " (assuming it isn't protected).
-}
quoteStr :: String -> String
quoteStr st =
let qst = quote (n==1) st
n = if '\n' `elem` st || '"' `elem` st then 3 else 1
qch = replicate n '"'
in qch ++ qst ++ qch
-- The boolean flag is True if the string is being displayed
-- with single quotes, which should mean that there are
-- no newline or quote characters in the string.
--
-- TODO: when flag == False need to worry about n > 2 quotes
-- in a row.
--
quote :: Bool -> String -> String
quote _ [] = ""
quote False s@(c:'"':[]) | c == '\\' = s -- handle triple-quoted strings ending in "
| otherwise = [c, '\\', '"']
-- quote True ('"': st) = '\\':'"': quote True st -- this should not happen
-- quote True ('\n':st) = '\\':'n': quote True st -- this should not happen
quote True ('\t':st) = '\\':'t': quote True st
quote False ('"': st) = '"': quote False st
quote False ('\n':st) = '\n': quote False st
quote False ('\t':st) = '\t': quote False st
quote f ('\r':st) = '\\':'r': quote f st
quote f ('\\':st) = '\\':'\\': quote f st -- not sure about this
quote f (c:st) =
let nc = ord c
rst = quote f st
-- lazy way to convert to a string
hstr = printf "%08X" nc
ustr = hstr ++ rst
in if nc > 0xffff
then '\\':'U': ustr
else if nc > 0x7e || nc < 0x20
then '\\':'u': drop 4 ustr
else c : rst
formatNodeId :: RDFLabel -> Formatter String
formatNodeId lab@(Blank (lnc:_)) =
if isDigit lnc then mapBlankNode lab else return $ show lab
formatNodeId other = error $ "formatNodeId not expecting a " ++ show other -- to shut up -Wall
mapBlankNode :: RDFLabel -> Formatter String
mapBlankNode lab = do
ngs <- getNgs
let cmap = nodeMap ngs
cval = nodeGen ngs
nv <- case mapFind 0 lab cmap of
0 -> do
let nval = succ cval
nmap = mapAdd cmap (lab, nval)
setNgs $ ngs { nodeGen = nval, nodeMap = nmap }
return nval
n -> return n
-- TODO: is this what we want?
return $ "_:swish" ++ show nv
-- TODO: need to be a bit more clever with this than we did in NTriples
-- not sure the following counts as clever enough ...
--
showScopedName :: ScopedName -> String
{-
showScopedName (ScopedName n l) =
let uri = nsURI n ++ l
in quote uri
-}
showScopedName = quote True . show
----------------------------------------------------------------------
-- Graph-related helper functions
----------------------------------------------------------------------
newtype SortedArcs lb = SA [Arc lb]
sortArcs :: (Ord lb) => [Arc lb] -> SortedArcs lb
sortArcs = SA . sort
-- Rearrange a list of arcs into a tree of pairs which group together
-- all statements for a single subject, and similarly for multiple
-- objects of a common predicate.
--
arcTree :: (Eq lb) => SortedArcs lb -> SubjTree lb
arcTree (SA as) = commonFstEq (commonFstEq id) $ map spopair as
where
spopair (Arc s p o) = (s,(p,o))
{-
arcTree as = map spopair $ sort as
where
spopair (Arc s p o) = (s,[(p,[o])])
-}
-- Rearrange a list of pairs so that multiple occurrences of the first
-- are commoned up, and the supplied function is applied to each sublist
-- with common first elements to obtain the corresponding second value
commonFstEq :: (Eq a) => ( [b] -> c ) -> [(a,b)] -> [(a,c)]
commonFstEq f ps =
[ (fst $ head sps,f $ map snd sps) | sps <- groupBy fstEq ps ]
where
fstEq (f1,_) (f2,_) = f1 == f2
{-
-- Diagnostic code for checking arcTree logic:
testArcTree = (arcTree testArcTree1) == testArcTree2
testArcTree1 =
[Arc "s1" "p11" "o111", Arc "s1" "p11" "o112"
,Arc "s1" "p12" "o121", Arc "s1" "p12" "o122"
,Arc "s2" "p21" "o211", Arc "s2" "p21" "o212"
,Arc "s2" "p22" "o221", Arc "s2" "p22" "o222"
]
testArcTree2 =
[("s1",[("p11",["o111","o112"]),("p12",["o121","o122"])])
,("s2",[("p21",["o211","o212"]),("p22",["o221","o222"])])
]
-}
findMaxBnode :: RDFGraph -> Int
findMaxBnode = maximum . map getAutoBnodeIndex . labels
getAutoBnodeIndex :: RDFLabel -> Int
getAutoBnodeIndex (Blank ('_':lns)) = res where
-- cf. prelude definition of read s ...
res = case [x | (x,t) <- reads lns, ("","") <- lex t] of
[x] -> x
_ -> 0
getAutoBnodeIndex _ = 0
{-
Find all blank nodes that occur
- any number of times as a subject
- 0 or 1 times as an object
Such nodes can be output using the "[..]" syntax. To make it simpler
to check we actually store those nodes that can not be expanded.
Note that we do not try and expand any bNode that is used in
a predicate position.
Should probably be using the SubjTree RDFLabel structure but this
is easier for now.
-}
countBnodes :: SortedArcs RDFLabel -> [RDFLabel]
countBnodes (SA as) = snd (foldl' ctr ([],[]) as)
where
-- first element of tuple are those blank nodes only seen once,
-- second element those blank nodes seen multiple times
--
inc b@(b1s,bms) l@(Blank _) | l `elem` bms = b
| l `elem` b1s = (delete l b1s, l:bms)
| otherwise = (l:b1s, bms)
inc b _ = b
-- if the bNode appears as a predicate we instantly add it to the
-- list of nodes not to expand, even if only used once
incP b@(b1s,bms) l@(Blank _) | l `elem` bms = b
| l `elem` b1s = (delete l b1s, l:bms)
| otherwise = (b1s, l:bms)
incP b _ = b
ctr orig (Arc _ p o) = inc (incP orig p) o
--------------------------------------------------------------------------------
--
-- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke
-- All rights reserved.
--
-- This file is part of Swish.
--
-- Swish is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- Swish is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with Swish; if not, write to:
-- The Free Software Foundation, Inc.,
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--
--------------------------------------------------------------------------------