{-# LANGUAGE OverloadedStrings #-} -------------------------------------------------------------------------------- -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : N3Formatter -- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke -- License : GPL V2 -- -- Maintainer : Douglas Burke -- Stability : experimental -- Portability : OverloadedStrings -- -- This Module implements a NTriples formatter (see [1]) -- for an RDFGraph value. -- -- -- REFERENCES: -- -- 1 -- RDF Test Cases -- W3C Recommendation 10 February 2004 -- -------------------------------------------------------------------------------- module Swish.RDF.NTFormatter ( NodeGenLookupMap , formatGraphAsText , formatGraphAsLazyText , formatGraphAsBuilder ) where import Swish.RDF.RDFGraph ( RDFGraph, RDFLabel(..) , getArcs ) import Swish.RDF.GraphClass ( Arc(..) ) import Swish.Utils.Namespace (ScopedName, getQName) import Swish.RDF.Vocabulary (isLang, langTag) import Swish.Utils.LookupMap ( LookupMap, emptyLookupMap , mapFind, mapAdd ) import Data.Char (ord, intToDigit, toUpper) import Control.Monad.State import Control.Applicative ((<$>)) import Data.Monoid -- it strikes me that using Lazy Text here is likely to be -- wrong; however I have done no profiling to back this -- assumption up! import qualified Data.Text as T import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Builder as B ---------------------------------------------------------------------- -- Graph formatting state monad ---------------------------------------------------------------------- -- -- This is a lot simpler than other formatters. -- | Node name generation state information that carries through -- and is updated by nested formulae type NodeGenLookupMap = LookupMap (RDFLabel,Int) data NTFormatterState = NTFS { ntfsNodeMap :: NodeGenLookupMap, ntfsNodeGen :: Int } deriving Show emptyNTFS :: NTFormatterState emptyNTFS = NTFS { ntfsNodeMap = emptyLookupMap, ntfsNodeGen = 0 } type Formatter a = State NTFormatterState a -- | Convert a RDF graph to NTriples format. formatGraphAsText :: RDFGraph -> T.Text formatGraphAsText = L.toStrict . formatGraphAsLazyText -- | Convert a RDF graph to NTriples format. formatGraphAsLazyText :: RDFGraph -> L.Text formatGraphAsLazyText = B.toLazyText . formatGraphAsBuilder -- | Convert a RDF graph to NTriples format. formatGraphAsBuilder :: RDFGraph -> B.Builder formatGraphAsBuilder gr = fst $ runState (formatGraph gr) emptyNTFS ---------------------------------------------------------------------- -- Formatting as a monad-based computation ---------------------------------------------------------------------- formatGraph :: RDFGraph -> Formatter B.Builder formatGraph gr = mconcat <$> mapM formatArc (getArcs gr) -- TODO: this reverses the contents but may be faster? -- that is if I've got the order right in the mappend call -- formatGraphBuilder gr = foldl' (\a b -> b `mappend` (formatArcBuilder a)) B.empty (getArcs gr) space, nl :: B.Builder space = B.singleton ' ' nl = " .\n" formatArc :: Arc RDFLabel -> Formatter B.Builder formatArc (Arc s p o) = do sl <- formatLabel s pl <- formatLabel p ol <- formatLabel o return $ mconcat [sl, space, pl, space, ol, nl] -- return $ sl `mappend` $ space `mappend` $ pl `mappend` $ space `mappend` $ ol `mappend` nl {- If we have a blank node then can - use the label it contains - generate a new one on output For now we create new labels whatever the input was since this simplifies things, but it may be changed. formatLabel :: RDFLabel -> Formatter String formatLabel lab@(Blank (lnc:_)) = if isDigit lnc then mapBlankNode lab else return $ show lab formatLabel lab = return $ show lab -} squote, at, carets :: B.Builder squote = "\"" at = "@" carets = "^^" formatLabel :: RDFLabel -> Formatter B.Builder formatLabel lab@(Blank _) = mapBlankNode lab formatLabel (Res sn) = return $ showScopedName sn formatLabel (Lit lit Nothing) = return $ quoteText lit formatLabel (Lit lit (Just nam)) | isLang nam = return $ mconcat [quoteText lit, at, B.fromText (langTag nam)] | otherwise = return $ mconcat [quoteText lit, carets, showScopedName nam] -- do not expect to get the following, but include -- just in case rather than failing formatLabel lab = return $ B.fromString $ show lab mapBlankNode :: RDFLabel -> Formatter B.Builder mapBlankNode lab = do st <- get let cmap = ntfsNodeMap st cval = ntfsNodeGen st nv <- case mapFind 0 lab cmap of 0 -> do let nval = succ cval nmap = mapAdd cmap (lab, nval) put $ st { ntfsNodeMap = nmap, ntfsNodeGen = nval } return nval n -> return n return $ "_:swish" `mappend` B.fromString (show nv) -- TODO: can we use Network.URI to protect the URI? showScopedName :: ScopedName -> B.Builder {- showScopedName (ScopedName n l) = let uri = T.pack (show (nsURI n)) `mappend` l in mconcat ["<", B.fromText (quote uri), ">"] -} -- showScopedName s = mconcat ["<", B.fromText (quote (T.pack (show (getQName s)))), ">"] showScopedName s = B.fromText (quote (T.pack (show (getQName s)))) -- looks like qname already adds the <> around this {- Swish.Utils.MiscHelpers contains a quote routine which we expand upon here to match the NT syntax. -} quoteText :: T.Text -> B.Builder quoteText st = mconcat [squote, B.fromText (quote st), squote] {- QUS: should we be operating on Text like this? -} quote :: T.Text -> T.Text quote = T.concatMap quoteT quoteT :: Char -> T.Text quoteT '\\' = "\\\\" quoteT '"' = "\\\"" quoteT '\n' = "\\n" quoteT '\t' = "\\t" quoteT '\r' = "\\r" quoteT c = let nc = ord c in if nc > 0xffff then T.pack ('\\':'U': numToHex 8 nc) else if nc > 0x7e || nc < 0x20 then T.pack ('\\':'u': numToHex 4 nc) else T.singleton c -- we assume c > 0, n >= 0 and that the input value fits -- into the requested number of digits numToHex :: Int -> Int -> String numToHex c v = go [] v where go s 0 = replicate (c - length s) '0' ++ s go s n = let (m,x) = divMod n 16 in go (iToD x:s) m -- Data.Char.intToDigit uses lower-case Hex iToD x | x < 10 = intToDigit x | otherwise = toUpper $ intToDigit x -------------------------------------------------------------------------------- -- -- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke -- All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- --------------------------------------------------------------------------------