{-# LANGUAGE OverloadedStrings #-} -------------------------------------------------------------------------------- -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : NTriples -- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, -- 2011, 2012 Douglas Burke -- License : GPL V2 -- -- Maintainer : Douglas Burke -- Stability : experimental -- Portability : OverloadedStrings -- -- This Module implements a NTriples formatter for an 'RDFGraph'. -- -- REFERENCES: -- -- - \"RDF Test Cases\", -- W3C Recommendation 10 February 2004, -- -- -------------------------------------------------------------------------------- module Swish.RDF.Formatter.NTriples ( formatGraphAsText , formatGraphAsLazyText , formatGraphAsBuilder ) where import Swish.RDF.Formatter.Internal (NodeGenLookupMap) import Swish.GraphClass (Arc(..)) import Swish.Namespace (ScopedName, getQName) import Swish.RDF.Graph (RDFGraph, RDFLabel(..)) import Swish.RDF.Graph (getArcs) import Swish.RDF.Vocabulary (fromLangTag) import Control.Monad.State import Control.Applicative ((<$>)) import Data.Char (ord, intToDigit, toUpper) import Data.Monoid import Data.Word (Word32) -- it strikes me that using Lazy Text here is likely to be -- wrong; however I have done no profiling to back this -- assumption up! import qualified Data.Map as M import qualified Data.Set as S import qualified Data.Text as T import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Builder as B ---------------------------------------------------------------------- -- Graph formatting state monad ---------------------------------------------------------------------- -- -- This is a lot simpler than other formatters. data NTFormatterState = NTFS { ntfsNodeMap :: NodeGenLookupMap, ntfsNodeGen :: Word32 } deriving Show emptyNTFS :: NTFormatterState emptyNTFS = NTFS { ntfsNodeMap = M.empty, ntfsNodeGen = 0 } type Formatter a = State NTFormatterState a -- | Convert a RDF graph to NTriples format. formatGraphAsText :: RDFGraph -> T.Text formatGraphAsText = L.toStrict . formatGraphAsLazyText -- | Convert a RDF graph to NTriples format. formatGraphAsLazyText :: RDFGraph -> L.Text formatGraphAsLazyText = B.toLazyText . formatGraphAsBuilder -- | Convert a RDF graph to NTriples format. formatGraphAsBuilder :: RDFGraph -> B.Builder formatGraphAsBuilder gr = fst $ runState (formatGraph gr) emptyNTFS ---------------------------------------------------------------------- -- Formatting as a monad-based computation ---------------------------------------------------------------------- formatGraph :: RDFGraph -> Formatter B.Builder formatGraph gr = mconcat <$> mapM formatArc (S.toList (getArcs gr)) -- TODO: this reverses the contents but may be faster? -- that is if I've got the order right in the mappend call -- formatGraphBuilder gr = foldl' (\a b -> b `mappend` (formatArcBuilder a)) B.empty (getArcs gr) space, nl :: B.Builder space = B.singleton ' ' nl = " .\n" formatArc :: Arc RDFLabel -> Formatter B.Builder formatArc (Arc s p o) = do sl <- formatLabel s pl <- formatLabel p ol <- formatLabel o return $ mconcat [sl, space, pl, space, ol, nl] -- return $ sl `mappend` $ space `mappend` $ pl `mappend` $ space `mappend` $ ol `mappend` nl {- If we have a blank node then can - use the label it contains - generate a new one on output For now we create new labels whatever the input was since this simplifies things, but it may be changed. formatLabel :: RDFLabel -> Formatter String formatLabel lab@(Blank (lnc:_)) = if isDigit lnc then mapBlankNode lab else return $ show lab formatLabel lab = return $ show lab -} squote, at, carets :: B.Builder squote = "\"" at = "@" carets = "^^" formatLabel :: RDFLabel -> Formatter B.Builder formatLabel lab@(Blank _) = mapBlankNode lab formatLabel (Res sn) = return $ showScopedName sn formatLabel (Lit lit) = return $ quoteText lit formatLabel (LangLit lit lang) = return $ mconcat [quoteText lit, at, B.fromText (fromLangTag lang)] formatLabel (TypedLit lit dt) = return $ mconcat [quoteText lit, carets, showScopedName dt] -- do not expect to get the following, but include -- just in case rather than failing formatLabel lab = return $ B.fromString $ show lab mapBlankNode :: RDFLabel -> Formatter B.Builder mapBlankNode lab = do st <- get let cmap = ntfsNodeMap st cval = ntfsNodeGen st nv <- case M.findWithDefault 0 lab cmap of 0 -> do let nval = succ cval nmap = M.insert lab nval cmap put $ st { ntfsNodeMap = nmap, ntfsNodeGen = nval } return nval n -> return n return $ "_:swish" `mappend` B.fromString (show nv) -- TODO: can we use Network.URI to protect the URI? showScopedName :: ScopedName -> B.Builder {- showScopedName (ScopedName n l) = let uri = T.pack (show (nsURI n)) `mappend` l in mconcat ["<", B.fromText (quote uri), ">"] -} -- showScopedName s = mconcat ["<", B.fromText (quote (T.pack (show (getQName s)))), ">"] showScopedName s = B.fromText (quote (T.pack (show (getQName s)))) -- looks like qname already adds the <> around this quoteText :: T.Text -> B.Builder quoteText st = mconcat [squote, B.fromText (quote st), squote] {- QUS: should we be operating on Text like this? -} quote :: T.Text -> T.Text quote = T.concatMap quoteT quoteT :: Char -> T.Text quoteT '\\' = "\\\\" quoteT '"' = "\\\"" quoteT '\n' = "\\n" quoteT '\t' = "\\t" quoteT '\r' = "\\r" quoteT c = let nc = ord c in if nc > 0xffff then T.pack ('\\':'U': numToHex 8 nc) else if nc > 0x7e || nc < 0x20 then T.pack ('\\':'u': numToHex 4 nc) else T.singleton c -- we assume c > 0, n >= 0 and that the input value fits -- into the requested number of digits numToHex :: Int -> Int -> String numToHex c = go [] where go s 0 = replicate (c - length s) '0' ++ s go s n = let (m,x) = divMod n 16 in go (iToD x:s) m -- Data.Char.intToDigit uses lower-case Hex iToD x | x < 10 = intToDigit x | otherwise = toUpper $ intToDigit x -------------------------------------------------------------------------------- -- -- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, -- 2011, 2012 Douglas Burke -- All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- --------------------------------------------------------------------------------