{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -------------------------------------------------------------------------------- -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : Internal -- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, -- 2011, 2012 Douglas Burke -- License : GPL V2 -- -- Maintainer : Douglas Burke -- Stability : experimental -- Portability : CPP, OverloadedStrings -- -- Utility routines. -- -------------------------------------------------------------------------------- module Swish.RDF.Formatter.Internal ( NodeGenLookupMap , SLens(..) , SubjTree , PredTree , LabelContext(..) , NodeGenState(..) , changeState , hasMore , emptyNgs , getBNodeLabel , findMaxBnode , splitOnLabel , getCollection , processArcs , findPrefix -- N3-like formatting , quoteB , quoteText , showScopedName , formatScopedName , formatPrefixLines , formatPlainLit , formatLangLit , formatTypedLit , insertList , nextLine_ , mapBlankNode_ , formatPrefixes_ , formatGraph_ , formatSubjects_ , formatProperties_ , formatObjects_ , insertBnode_ , extractList_ ) where import Swish.GraphClass (Arc(..), ArcSet) import Swish.Namespace (ScopedName, getScopeLocal, getScopeURI) import Swish.QName (getLName) import Swish.RDF.Graph (RDFGraph, RDFLabel(..), NamespaceMap) import Swish.RDF.Graph (labels, getArcs , getNamespaces , resRdfFirst, resRdfRest, resRdfNil , quote , quoteT ) import Swish.RDF.Vocabulary (LanguageTag, fromLangTag, xsdBoolean, xsdDecimal, xsdInteger, xsdDouble) import Control.Monad (liftM) import Control.Monad.State (State, get, gets, modify, put) import Data.List (delete, foldl', groupBy, intersperse, partition) import Data.Monoid (Monoid(..), mconcat) import Data.Word import Network.URI (URI) import Network.URI.Ord () import qualified Data.Map as M import qualified Data.Set as S import qualified Data.Text as T import qualified Data.Text.Lazy.Builder as B #if defined(__GLASGOW_HASKELL__) && (__GLASGOW_HASKELL__ >= 701) import Data.Tuple (swap) #else swap :: (a,b) -> (b,a) swap (a,b) = (b,a) #endif findPrefix :: URI -> M.Map a URI -> Maybe a findPrefix u = M.lookup u . M.fromList . map swap . M.assocs {- Playing around with ideas to reduce the amount of duplicated code without (for instance) deciding on one of the many lens packages available. It does not seem worth further re-factoring until we have another formatter using a turtle-like syntax (e.g. TriG http://www4.wiwiss.fu-berlin.de/bizer/trig/). -} data SLens a b = SLens (a -> b) (a -> b -> a) -- | Extract the setter. slens :: SLens a b -> a -> b -> a slens (SLens _ s) = s -- | Extract the getter. glens :: SLens a b -> a -> b glens (SLens g _) = g -- | Node name generation state information that carries through -- and is updated by nested formulae. type NodeGenLookupMap = M.Map RDFLabel Word32 {- TODO: look at using Swish.Graphpartition instead. -} type SubjTree lb = [(lb,PredTree lb)] type PredTree lb = [(lb,[lb])] -- | The context for label creation. -- data LabelContext = SubjContext | PredContext | ObjContext deriving (Eq, Show) -- | A generator for BNode labels. data NodeGenState = Ngs { nodeMap :: NodeGenLookupMap , nodeGen :: Word32 } -- | Create an empty node generator. emptyNgs :: NodeGenState emptyNgs = Ngs M.empty 0 {-| Get the label text for the blank node, creating a new one if it has not been seen before. The label text is currently _:swish where number is 1 or higher. This format may be changed in the future. -} getBNodeLabel :: RDFLabel -> NodeGenState -> (B.Builder, Maybe NodeGenState) getBNodeLabel lab ngs = let cmap = nodeMap ngs cval = nodeGen ngs (lnum, mngs) = case M.findWithDefault 0 lab cmap of 0 -> let nval = succ cval nmap = M.insert lab nval cmap in (nval, Just (ngs { nodeGen = nval, nodeMap = nmap })) n -> (n, Nothing) in ("_:swish" `mappend` B.fromString (show lnum), mngs) {-| Process the state, returning a value extracted from it after updating the state. -} changeState :: (a -> (b, a)) -> State a b changeState f = do st <- get let (rval, nst) = f st put nst return rval {-| Apply the function to the state and return True if the result is not empty. -} hasMore :: (a -> [b]) -> State a Bool hasMore lens = (not . null . lens) `liftM` get {-| Removes the first occurrence of the item from the association list, returning it's contents and the rest of the list, if it exists. -} removeItem :: (Eq a) => [(a,b)] -> a -> Maybe (b, [(a,b)]) removeItem os x = let (as, bs) = break (\a -> fst a == x) os in case bs of ((_,b):bbs) -> Just (b, as ++ bbs) [] -> Nothing {-| Given a set of statements and a label, return the details of the RDF collection referred to by label, or Nothing. For label to be considered as representing a collection we require the following conditions to hold (this is only to support the serialisation using the '(..)' syntax and does not make any statement about semantics of the statements with regard to RDF Collections): - there must be one rdf_first and one rdfRest statement - there must be no other predicates for the label -} getCollection :: SubjTree RDFLabel -- ^ statements organized by subject -> RDFLabel -- ^ does this label represent a list? -> Maybe (SubjTree RDFLabel, [RDFLabel], [RDFLabel]) -- ^ the statements with the elements removed; the -- content elements of the collection (the objects of the rdf:first -- predicate) and the nodes that represent the spine of the -- collection (in reverse order, unlike the actual contents which are in -- order). getCollection subjList lbl = go subjList lbl ([],[]) where go sl l (cs,ss) | l == resRdfNil = Just (sl, reverse cs, ss) | otherwise = do (pList1, sl') <- removeItem sl l ([pFirst], pList2) <- removeItem pList1 resRdfFirst ([pNext], []) <- removeItem pList2 resRdfRest go sl' pNext (pFirst : cs, l : ss) ---------------------------------------------------------------------- -- Graph-related helper functions ---------------------------------------------------------------------- processArcs :: RDFGraph -> (SubjTree RDFLabel, [RDFLabel]) processArcs gr = let arcs = sortArcs $ getArcs gr in (arcTree arcs, countBnodes arcs) newtype SortedArcs lb = SA [Arc lb] sortArcs :: (Ord lb) => ArcSet lb -> SortedArcs lb sortArcs = SA . S.toAscList -- Rearrange a list of arcs into a tree of pairs which group together -- all statements for a single subject, and similarly for multiple -- objects of a common predicate. -- arcTree :: (Eq lb) => SortedArcs lb -> SubjTree lb arcTree (SA as) = commonFstEq (commonFstEq id) $ map spopair as where spopair (Arc s p o) = (s,(p,o)) {- arcTree as = map spopair $ sort as where spopair (Arc s p o) = (s,[(p,[o])]) -} -- Rearrange a list of pairs so that multiple occurrences of the first -- are commoned up, and the supplied function is applied to each sublist -- with common first elements to obtain the corresponding second value commonFstEq :: (Eq a) => ( [b] -> c ) -> [(a,b)] -> [(a,c)] commonFstEq f ps = [ (fst $ head sps,f $ map snd sps) | sps <- groupBy fstEq ps ] where fstEq (f1,_) (f2,_) = f1 == f2 {- -- Diagnostic code for checking arcTree logic: testArcTree = (arcTree testArcTree1) == testArcTree2 testArcTree1 = [Arc "s1" "p11" "o111", Arc "s1" "p11" "o112" ,Arc "s1" "p12" "o121", Arc "s1" "p12" "o122" ,Arc "s2" "p21" "o211", Arc "s2" "p21" "o212" ,Arc "s2" "p22" "o221", Arc "s2" "p22" "o222" ] testArcTree2 = [("s1",[("p11",["o111","o112"]),("p12",["o121","o122"])]) ,("s2",[("p21",["o211","o212"]),("p22",["o221","o222"])]) ] -} findMaxBnode :: RDFGraph -> Word32 findMaxBnode = S.findMax . S.map getAutoBnodeIndex . labels getAutoBnodeIndex :: RDFLabel -> Word32 getAutoBnodeIndex (Blank ('_':lns)) = res where -- cf. prelude definition of read s ... res = case [x | (x,t) <- reads lns, ("","") <- lex t] of [x] -> x _ -> 0 getAutoBnodeIndex _ = 0 splitOnLabel :: (Eq a) => a -> SubjTree a -> (SubjTree a, PredTree a) splitOnLabel lbl osubjs = let (bsubj, rsubjs) = partition ((== lbl) . fst) osubjs rprops = case bsubj of [(_, rs)] -> rs _ -> [] in (rsubjs, rprops) {- Find all blank nodes that occur - any number of times as a subject - 0 or 1 times as an object Such nodes can be output using the "[..]" syntax. To make it simpler to check we actually store those nodes that can not be expanded. Note that we do not try and expand any bNode that is used in a predicate position. Should probably be using the SubjTree RDFLabel structure but this is easier for now. -} countBnodes :: SortedArcs RDFLabel -> [RDFLabel] countBnodes (SA as) = snd (foldl' ctr ([],[]) as) where -- first element of tuple are those blank nodes only seen once, -- second element those blank nodes seen multiple times -- inc b@(b1s,bms) l@(Blank _) | l `elem` bms = b | l `elem` b1s = (delete l b1s, l:bms) | otherwise = (l:b1s, bms) inc b _ = b -- if the bNode appears as a predicate we instantly add it to the -- list of nodes not to expand, even if only used once incP b@(b1s,bms) l@(Blank _) | l `elem` bms = b | l `elem` b1s = (delete l b1s, l:bms) | otherwise = (b1s, l:bms) incP b _ = b ctr orig (Arc _ p o) = inc (incP orig p) o -- N3-like output -- temporary conversion quoteB :: Bool -> String -> B.Builder quoteB f v = B.fromString $ quote f v {-| Convert text into a format for display in Turtle. The idea is to use one double quote unless three are needed, and to handle adding necessary @\\@ characters, or conversion for Unicode characters. -} quoteText :: T.Text -> B.Builder quoteText txt = let st = T.unpack txt -- TODO: fix qst = quoteB (n==1) st n = if '\n' `elem` st || '"' `elem` st then 3 else 1 qch = B.fromString (replicate n '"') in mconcat [qch, qst, qch] -- TODO: need to be a bit more clever with this than we did in NTriples -- not sure the following counts as clever enough ... -- showScopedName :: ScopedName -> B.Builder showScopedName = quoteB True . show formatScopedName :: ScopedName -> M.Map (Maybe T.Text) URI -> B.Builder formatScopedName sn prmap = let nsuri = getScopeURI sn local = getLName $ getScopeLocal sn in case findPrefix nsuri prmap of Just (Just p) -> B.fromText $ quoteT True $ mconcat [p, ":", local] _ -> mconcat [ "<" , quoteB True (show nsuri ++ T.unpack local) , ">" ] formatPlainLit :: T.Text -> B.Builder formatPlainLit = quoteText formatLangLit :: T.Text -> LanguageTag -> B.Builder formatLangLit lit lcode = mconcat [quoteText lit, "@", B.fromText (fromLangTag lcode)] -- The canonical notation for xsd:double in XSD, with an upper-case E, -- does not match the syntax used in N3, so we need to convert here. -- Rather than converting back to a Double and then displaying that -- we just convert E to e for now. -- formatTypedLit :: T.Text -> ScopedName -> B.Builder formatTypedLit lit dtype | dtype == xsdDouble = B.fromText $ T.toLower lit | dtype `elem` [xsdBoolean, xsdDecimal, xsdInteger] = B.fromText lit | otherwise = mconcat [quoteText lit, "^^", showScopedName dtype] {- Add a list inline. We are given the labels that constitute the list, in order, so just need to display them surrounded by (). -} insertList :: (RDFLabel -> State a B.Builder) -> [RDFLabel] -> State a B.Builder insertList _ [] = return "()" -- QUS: can this happen in a valid graph? insertList f xs = do ls <- mapM f xs return $ mconcat ("( " : intersperse " " ls) `mappend` " )" nextLine_ :: (a -> B.Builder) -- ^ indentation -> SLens a Bool -- ^ line break lens -> B.Builder -> State a B.Builder nextLine_ indent _lineBreak str = do ind <- gets indent brk <- gets $ glens _lineBreak if brk then return $ ind `mappend` str else do -- After first line, always insert line break modify $ \st -> slens _lineBreak st True return str mapBlankNode_ :: SLens a NodeGenState -> RDFLabel -> State a B.Builder mapBlankNode_ _nodeGen lab = do ngs <- gets $ glens _nodeGen let (lval, mngs) = getBNodeLabel lab ngs case mngs of Just ngs' -> modify $ \st -> slens _nodeGen st ngs' _ -> return () return lval formatPrefixLines :: NamespaceMap -> [B.Builder] formatPrefixLines = map pref . M.assocs where pref (Just p,u) = mconcat ["@prefix ", B.fromText p, ": <", quoteB True (show u), "> ."] pref (_,u) = mconcat ["@prefix : <", quoteB True (show u), "> ."] formatPrefixes_ :: (B.Builder -> State a B.Builder) -- ^ Create a new line -> NamespaceMap -> State a B.Builder formatPrefixes_ nextLine pmap = mconcat `liftM` mapM nextLine (formatPrefixLines pmap) formatGraph_ :: (B.Builder -> State a ()) -- set indent -> (Bool -> State a ()) -- set line-break flag -> (RDFGraph -> a -> a) -- create a new state from the graph -> (NamespaceMap -> State a B.Builder) -- format prefixes -> (a -> SubjTree RDFLabel) -- get the subjects -> State a B.Builder -- format the subjects -> B.Builder -- indentation string -> B.Builder -- text to be placed after final statement -> Bool -- True if a line break is to be inserted at the start -> Bool -- True if prefix strings are to be generated -> RDFGraph -- graph to convert -> State a B.Builder formatGraph_ setIndent setLineBreak newState formatPrefixes subjs formatSubjects ind end dobreak dopref gr = do setIndent ind setLineBreak dobreak modify (newState gr) fp <- if dopref then formatPrefixes (getNamespaces gr) else return mempty more <- hasMore subjs if more then do fr <- formatSubjects return $ mconcat [fp, fr, end] else return fp formatSubjects_ :: State a RDFLabel -- ^ next subject -> (LabelContext -> RDFLabel -> State a B.Builder) -- ^ convert label into text -> (a -> PredTree RDFLabel) -- ^ extract properties -> (RDFLabel -> B.Builder -> State a B.Builder) -- ^ format properties -> (a -> SubjTree RDFLabel) -- ^ extract subjects -> (B.Builder -> State a B.Builder) -- ^ next line -> State a B.Builder formatSubjects_ nextSubject formatLabel props formatProperties subjs nextLine = do sb <- nextSubject sbstr <- formatLabel SubjContext sb flagP <- hasMore props if flagP then do prstr <- formatProperties sb sbstr flagS <- hasMore subjs if flagS then do fr <- formatSubjects_ nextSubject formatLabel props formatProperties subjs nextLine return $ mconcat [prstr, " .", fr] else return prstr else do txt <- nextLine sbstr flagS <- hasMore subjs if flagS then do fr <- formatSubjects_ nextSubject formatLabel props formatProperties subjs nextLine return $ mconcat [txt, " .", fr] else return txt {- TODO: now we are throwing a Builder around it is awkward to get the length of the text to calculate the indentation So a) change the indentation scheme b) pass around text instead of builder mkIndent :: L.Text -> L.Text mkIndent inVal = L.replicate (L.length inVal) " " -} hackIndent :: B.Builder hackIndent = " " formatProperties_ :: (RDFLabel -> State a RDFLabel) -- ^ next property for the given subject -> (LabelContext -> RDFLabel -> State a B.Builder) -- ^ convert label into text -> (RDFLabel -> RDFLabel -> B.Builder -> State a B.Builder) -- ^ format objects -> (a -> PredTree RDFLabel) -- ^ extract properties -> (B.Builder -> State a B.Builder) -- ^ next line -> RDFLabel -- ^ property being processed -> B.Builder -- ^ current output -> State a B.Builder formatProperties_ nextProperty formatLabel formatObjects props nextLine sb sbstr = do pr <- nextProperty sb prstr <- formatLabel PredContext pr obstr <- formatObjects sb pr $ mconcat [sbstr, " ", prstr] more <- hasMore props let sbindent = hackIndent -- mkIndent sbstr if more then do fr <- formatProperties_ nextProperty formatLabel formatObjects props nextLine sb sbindent nl <- nextLine $ obstr `mappend` " ;" return $ nl `mappend` fr else nextLine obstr formatObjects_ :: (RDFLabel -> RDFLabel -> State a RDFLabel) -- ^ get the next object for the (subject,property) pair -> (LabelContext -> RDFLabel -> State a B.Builder) -- ^ format a label -> (a -> [RDFLabel]) -- ^ extract objects -> (B.Builder -> State a B.Builder) -- ^ insert a new line -> RDFLabel -- ^ subject -> RDFLabel -- ^ property -> B.Builder -- ^ current text -> State a B.Builder formatObjects_ nextObject formatLabel objs nextLine sb pr prstr = do ob <- nextObject sb pr obstr <- formatLabel ObjContext ob more <- hasMore objs if more then do let prindent = hackIndent -- mkIndent prstr fr <- formatObjects_ nextObject formatLabel objs nextLine sb pr prindent nl <- nextLine $ mconcat [prstr, " ", obstr, ","] return $ nl `mappend` fr else return $ mconcat [prstr, " ", obstr] {- Processing a Bnode when not a subject. -} insertBnode_ :: (a -> SubjTree RDFLabel) -- ^ extract subjects -> (a -> PredTree RDFLabel) -- ^ extract properties -> (a -> [RDFLabel]) -- ^ extract objects -> (a -> SubjTree RDFLabel -> PredTree RDFLabel -> [RDFLabel] -> a) -- ^ update state to new settings -> (RDFLabel -> B.Builder -> State a B.Builder) -- ^ format properties -> RDFLabel -> State a B.Builder insertBnode_ subjs props objs updateState formatProperties lbl = do ost <- get let osubjs = subjs ost (rsubjs, rprops) = splitOnLabel lbl osubjs put $ updateState ost rsubjs rprops [] flag <- hasMore props txt <- if flag then (`mappend` "\n") `liftM` formatProperties lbl "" else return "" -- restore the original data (where appropriate) nst <- get let slist = map fst $ subjs nst nsubjs = filter (\(l,_) -> l `elem` slist) osubjs put $ updateState nst nsubjs (props ost) (objs ost) -- TODO: handle indentation? return $ mconcat ["[", txt, "]"] maybeExtractList :: SubjTree RDFLabel -> PredTree RDFLabel -> LabelContext -> RDFLabel -> Maybe ([RDFLabel], SubjTree RDFLabel, PredTree RDFLabel) maybeExtractList osubjs oprops lctxt ln = let mlst = getCollection osubjs' ln -- we only want to send in rdf:first/rdf:rest here fprops = filter ((`elem` [resRdfFirst, resRdfRest]) . fst) oprops osubjs' = case lctxt of SubjContext -> (ln, fprops) : osubjs _ -> osubjs in case mlst of Just (sl, ls, _) -> let oprops' = if lctxt == SubjContext then filter ((`notElem` [resRdfFirst, resRdfRest]) . fst) oprops else oprops in Just (ls, sl, oprops') _ -> Nothing extractList_ :: (a -> SubjTree RDFLabel) -- ^ extract subjects -> (a -> PredTree RDFLabel) -- ^ extract properties -> (SubjTree RDFLabel -> State a ()) -- ^ set subjects -> (PredTree RDFLabel -> State a ()) -- ^ set properties -> LabelContext -> RDFLabel -> State a (Maybe [RDFLabel]) extractList_ subjs props setSubjs setProps lctxt ln = do osubjs <- gets subjs oprops <- gets props case maybeExtractList osubjs oprops lctxt ln of Just (ls, osubjs', oprops') -> do setSubjs osubjs' setProps oprops' return (Just ls) _ -> return Nothing -------------------------------------------------------------------------------- -- -- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, -- 2011, 2012 Douglas Burke -- All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- --------------------------------------------------------------------------------