module Data.RDF.Types ( -- * RDF triples, nodes and literals LValue(PlainL,PlainLL,TypedL), Node(UNode,BNode,BNodeGen,LNode), Subject, Predicate, Object, Triple(Triple), Triples, View(view), -- * Constructor functions plainL,plainLL,typedL, unode,bnode,lnode,triple, -- * Node query function isUNode,isLNode,isBNode, -- * RDF Type RDF(baseUrl,prefixMappings,addPrefixMappings,empty,mkRdf,triplesOf,select,query), -- * Parsing RDF RdfParser(parseString,parseFile,parseURL), -- * Serializing RDF RdfSerializer(hWriteRdf,writeRdf,hWriteH,writeH,hWriteTs,hWriteT,writeT, writeTs,hWriteN, writeN), -- * Namespaces and Prefixes Namespace(PrefixedNS,PlainNS), PrefixMappings(PrefixMappings),PrefixMapping(PrefixMapping), -- * Supporting types BaseUrl(BaseUrl), NodeSelector, ParseFailure(ParseFailure), ) where import Prelude hiding (pred) import qualified Data.Text as T import System.IO import Text.Printf import Data.Map(Map) import qualified Data.List as List import qualified Data.Map as Map ------------------- -- LValue and constructor functions -- |The actual value of an RDF literal, represented as the 'LValue' -- parameter of an 'LNode'. data LValue = -- Constructors are not exported, because we need to have more -- control over the format of the literal text that we store. -- |A plain (untyped) literal value in an unspecified language. PlainL !T.Text -- |A plain (untyped) literal value with a language specifier. | PlainLL !T.Text !T.Text -- |A typed literal value consisting of the literal value and -- the URI of the datatype of the value, respectively. | TypedL !T.Text !T.Text -- |Return a PlainL LValue for the given string value. {-# INLINE plainL #-} plainL :: T.Text -> LValue plainL = PlainL -- |Return a PlainLL LValue for the given string value and language, -- respectively. {-# INLINE plainLL #-} plainLL :: T.Text -> T.Text -> LValue plainLL = PlainLL -- |Return a TypedL LValue for the given string value and datatype URI, -- respectively. {-# INLINE typedL #-} typedL :: T.Text -> T.Text -> LValue typedL val dtype = TypedL (canonicalize dtype val) dtype ------------------- -- Node and constructor functions -- |An RDF node, which may be either a URIRef node ('UNode'), a blank -- node ('BNode'), or a literal node ('LNode'). data Node = -- |An RDF URI reference. See -- for more -- information. UNode !T.Text -- |An RDF blank node. See -- for more -- information. | BNode !T.Text -- |An RDF blank node with an auto-generated identifier, as used in -- Turtle. | BNodeGen !Int -- |An RDF literal. See -- for more -- information. | LNode !LValue -- |An alias for 'Node', defined for convenience and readability purposes. type Subject = Node -- |An alias for 'Node', defined for convenience and readability purposes. type Predicate = Node -- |An alias for 'Node', defined for convenience and readability purposes. type Object = Node -- |Return a URIRef node for the given bytetring URI. {-# INLINE unode #-} unode :: T.Text -> Node unode = UNode -- |Return a blank node using the given string identifier. {-# INLINE bnode #-} bnode :: T.Text -> Node bnode = BNode -- |Return a literal node using the given LValue. {-# INLINE lnode #-} lnode :: LValue -> Node lnode = LNode ------------------- -- Triple and constructor functions -- |An RDF triple is a statement consisting of a subject, predicate, -- and object, respectively. -- -- See for -- more information. data Triple = Triple !Node !Node !Node -- |A list of triples. This is defined for convenience and readability. type Triples = [Triple] -- |A smart constructor function for 'Triple' that verifies the node arguments -- are of the correct type and creates the new 'Triple' if so or calls 'error'. -- /subj/ must be a 'UNode' or 'BNode', and /pred/ must be a 'UNode'. triple :: Subject -> Predicate -> Object -> Triple triple subj pred obj | isLNode subj = error $ "subject must be UNode or BNode: " ++ show subj | isLNode pred = error $ "predicate must be UNode, not LNode: " ++ show pred | isBNode pred = error $ "predicate must be UNode, not BNode: " ++ show pred | otherwise = Triple subj pred obj -- |Answer if given node is a URI Ref node. {-# INLINE isUNode #-} isUNode :: Node -> Bool isUNode (UNode _) = True isUNode _ = False -- |Answer if given node is a blank node. {-# INLINE isBNode #-} isBNode :: Node -> Bool isBNode (BNode _) = True isBNode (BNodeGen _) = True isBNode _ = False -- |Answer if given node is a literal node. {-# INLINE isLNode #-} isLNode :: Node -> Bool isLNode (LNode _) = True isLNode _ = False -- |A type class for ADTs that expose views to clients. class View a b where view :: a -> b -- |An RDF value is a set of (unique) RDF triples, together with the -- operations defined upon them. -- -- For information about the efficiency of the functions, see the -- documentation for the particular RDF instance. -- -- For more information about the concept of an RDF graph, see -- the following: . class RDF rdf where -- |Return the base URL of this RDF, if any. baseUrl :: rdf -> Maybe BaseUrl -- |Return the prefix mappings defined for this RDF, if any. prefixMappings :: rdf -> PrefixMappings -- |Return an RDF with the specified prefix mappings merged with -- the existing mappings. If the Bool arg is True, then a new mapping -- for an existing prefix will replace the old mapping; otherwise, -- the new mapping is ignored. addPrefixMappings :: rdf -> PrefixMappings -> Bool -> rdf -- |Return an empty RDF. empty :: rdf -- |Return a RDF containing all the given triples. Handling of duplicates -- in the input depend on the particular RDF implementation. mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> rdf -- |Return all triples in the RDF, as a list. triplesOf :: rdf -> Triples -- |Select the triples in the RDF that match the given selectors. -- -- The three NodeSelector parameters are optional functions that match -- the respective subject, predicate, and object of a triple. The triples -- returned are those in the given graph for which the first selector -- returns true when called on the subject, the second selector returns -- true when called on the predicate, and the third selector returns true -- when called on the ojbect. A 'Nothing' parameter is equivalent to a -- function that always returns true for the appropriate node; but -- implementations may be able to much more efficiently answer a select -- that involves a 'Nothing' parameter rather than an @(id True)@ parameter. -- -- The following call illustrates the use of select, and would result in -- the selection of all and only the triples that have a blank node -- as subject and a literal node as object: -- -- > select gr (Just isBNode) Nothing (Just isLNode) -- -- Note: this function may be very slow; see the documentation for the -- particular RDF implementation for more information. select :: rdf -> NodeSelector -> NodeSelector -> NodeSelector -> Triples -- |Return the triples in the RDF that match the given pattern, where -- the pattern (3 Maybe Node parameters) is interpreted as a triple pattern. -- -- The @Maybe Node@ params are interpreted as the subject, predicate, and -- object of a triple, respectively. @Just n@ is true iff the triple has -- a node equal to @n@ in the appropriate location; @Nothing@ is always -- true, regardless of the node in the appropriate location. -- -- For example, @ query rdf (Just n1) Nothing (Just n2) @ would return all -- and only the triples that have @n1@ as subject and @n2@ as object, -- regardless of the predicate of the triple. query :: rdf -> Maybe Node -> Maybe Node -> Maybe Node -> Triples -- |An RdfParser is a parser that knows how to parse 1 format of RDF and -- can parse an RDF document of that type from a string, a file, or a URL. -- Required configuration options will vary from instance to instance. class RdfParser p where -- |Parse RDF from the given text, yielding a failure with error message or -- the resultant RDF. parseString :: forall rdf. (RDF rdf) => p -> T.Text -> Either ParseFailure rdf -- |Parse RDF from the local file with the given path, yielding a failure with error -- message or the resultant RDF in the IO monad. parseFile :: forall rdf. (RDF rdf) => p -> String -> IO (Either ParseFailure rdf) -- |Parse RDF from the remote file with the given HTTP URL (https is not supported), -- yielding a failure with error message or the resultant graph in the IO monad. parseURL :: forall rdf. (RDF rdf) => p -> String -> IO (Either ParseFailure rdf) -- |An RdfSerializer is a serializer of RDF to some particular output format, such as -- NTriples or Turtle. class RdfSerializer s where -- |Write the RDF to a file handle using whatever configuration is specified by -- the first argument. hWriteRdf :: forall rdf. (RDF rdf) => s -> Handle -> rdf -> IO () -- |Write the RDF to stdout; equivalent to @'hWriteRdf' stdout@. writeRdf :: forall rdf. (RDF rdf) => s -> rdf -> IO () -- |Write to the file handle whatever header information is required based on -- the output format. For example, if serializing to Turtle, this method would -- write the necessary \@prefix declarations and possibly a \@baseUrl declaration, -- whereas for NTriples, there is no header section at all, so this would be a no-op. hWriteH :: forall rdf. (RDF rdf) => s -> Handle -> rdf -> IO () -- |Write header information to stdout; equivalent to @'hWriteRdf' stdout@. writeH :: forall rdf. (RDF rdf) => s -> rdf -> IO () -- |Write some triples to a file handle using whatever configuration is specified -- by the first argument. -- -- WARNING: if the serialization format has header-level information -- that should be output (e.g., \@prefix declarations for Turtle), then you should -- use 'hWriteG' instead of this method unless you're sure this is safe to use, since -- otherwise the resultant document will be missing the header information and -- will not be valid. hWriteTs :: s -> Handle -> Triples -> IO () -- |Write some triples to stdout; equivalent to @'hWriteTs' stdout@. writeTs :: s -> Triples -> IO () -- |Write a single triple to the file handle using whatever configuration is -- specified by the first argument. The same WARNING applies as to 'hWriteTs'. hWriteT :: s -> Handle -> Triple -> IO () -- |Write a single triple to stdout; equivalent to @'hWriteT' stdout@. writeT :: s -> Triple -> IO () -- |Write a single node to the file handle using whatever configuration is -- specified by the first argument. The same WARNING applies as to 'hWriteTs'. hWriteN :: s -> Handle -> Node -> IO () -- |Write a single node to sdout; equivalent to @'hWriteN' stdout@. writeN :: s -> Node -> IO () -- |The base URL of an RDF. newtype BaseUrl = BaseUrl T.Text deriving (Eq, Ord, Show) -- |A 'NodeSelector' is either a function that returns 'True' -- or 'False' for a node, or Nothing, which indicates that all -- nodes would return 'True'. -- -- The selector is said to select, or match, the nodes for -- which it returns 'True'. -- -- When used in conjunction with the 'select' method of 'Graph', three -- node selectors are used to match a triple. type NodeSelector = Maybe (Node -> Bool) -- |Represents a failure in parsing an N-Triples document, including -- an error message with information about the cause for the failure. newtype ParseFailure = ParseFailure String deriving (Eq, Show) -- |A node is equal to another node if they are both the same type -- of node and if the field values are equal. instance Eq Node where (UNode bs1) == (UNode bs2) = bs1 == bs2 (BNode bs1) == (BNode bs2) = bs1 == bs2 (BNodeGen i1) == (BNodeGen i2) = i1 == i2 (LNode l1) == (LNode l2) = l1 == l2 _ == _ = False -- |Node ordering is defined first by type, with Unode < BNode < BNodeGen -- < LNode PlainL < LNode PlainLL < LNode TypedL, and secondly by -- the natural ordering of the node value. -- -- E.g., a '(UNode _)' is LT any other type of node, and a -- '(LNode (TypedL _ _))' is GT any other type of node, and the ordering -- of '(BNodeGen 44)' and '(BNodeGen 3)' is that of the values, or -- 'compare 44 3', GT. instance Ord Node where compare = compareNode compareNode :: Node -> Node -> Ordering compareNode (UNode bs1) (UNode bs2) = compare bs1 bs2 compareNode (UNode _) _ = LT compareNode (BNode bs1) (BNode bs2) = compare bs1 bs2 compareNode (BNode _) (UNode _) = GT compareNode (BNode _) _ = LT compareNode (BNodeGen i1) (BNodeGen i2) = compare i1 i2 compareNode (BNodeGen _) (LNode _) = LT compareNode (BNodeGen _) _ = GT compareNode (LNode (PlainL bs1)) (LNode (PlainL bs2)) = compare bs1 bs2 compareNode (LNode (PlainL _)) (LNode _) = LT compareNode (LNode (PlainLL bs1 bs1')) (LNode (PlainLL bs2 bs2')) = case compare bs1' bs2' of EQ -> compare bs1 bs2 LT -> LT GT -> GT compareNode (LNode (PlainLL _ _)) (LNode (PlainL _)) = GT compareNode (LNode (PlainLL _ _)) (LNode _) = LT compareNode (LNode (TypedL bsType1 bs1)) (LNode (TypedL bsType2 bs2)) = case compare bs1 bs2 of EQ -> compare bsType1 bsType2 LT -> LT GT -> GT compareNode (LNode (TypedL _ _)) (LNode _) = GT compareNode (LNode _) _ = GT -- |Two triples are equal iff their respective subjects, predicates, and objects -- are equal. instance Eq Triple where (Triple s1 p1 o1) == (Triple s2 p2 o2) = s1 == s2 && p1 == p2 && o1 == o2 -- |The ordering of triples is based on that of the subject, predicate, and object -- of the triple, in that order. instance Ord Triple where (Triple s1 p1 o1) `compare` (Triple s2 p2 o2) = case compareNode s1 s2 of EQ -> case compareNode p1 p2 of EQ -> compareNode o1 o2 LT -> LT GT -> GT GT -> GT LT -> LT -- |Two 'LValue' values are equal iff they are of the same type and all fields are -- equal. instance Eq LValue where (PlainL bs1) == (PlainL bs2) = bs1 == bs2 (PlainLL bs1 bs1') == (PlainLL bs2 bs2') = bs1' == bs2' && bs1 == bs2 (TypedL bsType1 bs1) == (TypedL bsType2 bs2) = bsType1 == bsType2 && bs1 == bs2 _ == _ = False -- |Ordering of 'LValue' values is as follows: (PlainL _) < (PlainLL _ _) -- < (TypedL _ _), and values of the same type are ordered by field values, -- with '(PlainLL literalValue language)' being ordered by language first and -- literal value second, and '(TypedL literalValue datatypeUri)' being ordered -- by datatype first and literal value second. instance Ord LValue where compare = compareLValue {-# INLINE compareLValue #-} compareLValue :: LValue -> LValue -> Ordering compareLValue (PlainL bs1) (PlainL bs2) = compare bs1 bs2 compareLValue (PlainL _) _ = LT compareLValue _ (PlainL _) = GT compareLValue (PlainLL bs1 bs1') (PlainLL bs2 bs2') = case compare bs1' bs2' of EQ -> compare bs1 bs2 GT -> GT LT -> LT compareLValue (PlainLL _ _) _ = LT compareLValue _ (PlainLL _ _) = GT compareLValue (TypedL l1 t1) (TypedL l2 t2) = case compare t1 t2 of EQ -> compare l1 l2 GT -> GT LT -> LT -- String representations of the various data types; generally NTriples-like. instance Show Triple where show (Triple s p o) = printf "Triple(%s,%s,%s)" (show s) (show p) (show o) instance Show Node where show (UNode uri) = "UNode(" ++ show uri ++ ")" show (BNode i) = "BNode(" ++ show i ++ ")" show (BNodeGen genId) = "BNodeGen(" ++ show genId ++ ")" show (LNode lvalue) = "LNode(" ++ show lvalue ++ ")" instance Show LValue where show (PlainL lit) = "PlainL(" ++ T.unpack lit ++ ")" show (PlainLL lit lang) = "PlainLL(" ++ T.unpack lit ++ ", " ++ T.unpack lang ++ ")" show (TypedL lit dtype) = "TypedL(" ++ T.unpack lit ++ "," ++ show dtype ++ ")" ------------------------ -- Prefix mappings -- |Represents a namespace as either a prefix and uri, respectively, -- or just a uri. data Namespace = PrefixedNS T.Text T.Text -- prefix and ns uri | PlainNS T.Text -- ns uri alone instance Eq Namespace where (PrefixedNS _ u1) == (PrefixedNS _ u2) = u1 == u2 (PlainNS u1) == (PlainNS u2) = u1 == u2 (PrefixedNS _ u1) == (PlainNS u2) = u1 == u2 (PlainNS u1) == (PrefixedNS _ u2) = u1 == u2 instance Show Namespace where show (PlainNS uri) = T.unpack uri show (PrefixedNS prefix uri) = printf "(PrefixNS %s %s)" (T.unpack prefix) (T.unpack uri) -- |An alias for a map from prefix to namespace URI. newtype PrefixMappings = PrefixMappings (Map T.Text T.Text) deriving (Eq, Ord) instance Show PrefixMappings where -- This is really inefficient, but it's not used much so not what -- worth optimizing yet. show (PrefixMappings pmap) = printf "PrefixMappings [%s]" mappingsStr where showPM = show . PrefixMapping mappingsStr = List.intercalate ", " (map showPM (Map.toList pmap)) -- |A mapping of a prefix to the URI for that prefix. newtype PrefixMapping = PrefixMapping (T.Text, T.Text) deriving (Eq, Ord) instance Show PrefixMapping where show (PrefixMapping (prefix, uri)) = printf "PrefixMapping (%s, %s)" (show prefix) (show uri) ----------------- -- Internal canonicalize functions, don't export -- |Canonicalize the given 'T.Text' value using the 'T.Text' -- as the datatype URI. {-# NOINLINE canonicalize #-} canonicalize :: T.Text -> T.Text -> T.Text canonicalize typeTxt litValue = case Map.lookup typeTxt canonicalizerTable of Nothing -> litValue Just fn -> fn litValue -- A table of mappings from a 'T.Text' URI -- to a function that canonicalizes a T.Text -- assumed to be of that type. {-# NOINLINE canonicalizerTable #-} canonicalizerTable :: Map T.Text (T.Text -> T.Text) canonicalizerTable = Map.fromList [(integerUri, _integerStr), (doubleUri, _doubleStr), (decimalUri, _decimalStr)] where integerUri = "http://www.w3.org/2001/XMLSchema#integer" decimalUri = "http://www.w3.org/2001/XMLSchema#decimal" doubleUri = "http://www.w3.org/2001/XMLSchema#double" _integerStr, _decimalStr, _doubleStr :: T.Text -> T.Text _integerStr = T.dropWhile (== '0') -- exponent: [eE] ('-' | '+')? [0-9]+ -- ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent ) _doubleStr s = T.pack $ show (read $ T.unpack s :: Double) -- ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ ) _decimalStr s = -- haskell double parser doesn't handle '1.'.., case T.last s of -- so we add a zero if that's the case and then parse '.' -> f (s `T.snoc` '0') _ -> f s where f s' = T.pack $ show (read $ T.unpack s' :: Double)