-------------------------------------------------------------------------------- -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : N3Parser -- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke -- License : GPL V2 -- -- Maintainer : Douglas Burke -- Stability : experimental -- Portability : H98 -- -- This Module implements a Notation 3 parser (see [1], [2], [3]), returning a -- new 'RDFGraph' consisting of triples and namespace information parsed from -- the supplied N3 input string, or an error indication. -- -- Uses the Parsec monadic parser library. -- -- REFERENCES: -- -- 1 -- Notation3 (N3): A readable RDF syntax, -- W3C Team Submission 14 January 2008 -- -- 2 -- Tim Berners-Lee's design issues series notes and description -- -- 3 -- Notation 3 Primer by Sean Palmer -- -- NOTES: -- -- UTF-8 handling is not really tested. -- -- Several items seem to be allowed (from looking at N3 test suites and files -- 'in the wild') that are not given supported by the N3 grammar [1]. We try -- to support these, including -- -- - \"@:@\" and \"@base:@\" as valid QNames (ie a blank local component) -- -- - @true@ and @false@ as well as @\@true@ and @\@false@ -- -- - use of lower-case characters for @\\u@ and @\\U@ escape codes -- -- No performance testing has been applied. -- -- Not all N3 grammar elements are supported, including: -- -- - @\@forSome@ (we read it in but ignore the arguments) -- -- - @\@forAll@ (this causes a parse error) -- -- - formulae are lightly tested -- -- - string support is incomplete (e.g. unrecognized escape characters -- such as @\\q@ are probably handled incorrectly) -- -------------------------------------------------------------------------------- module Swish.RDF.N3Parser ( ParseResult , parseN3 , parseN3fromString , parseAnyfromString , parseTextFromString, parseAltFromString , parseNameFromString, parsePrefixFromString , parseAbsURIrefFromString, parseLexURIrefFromString, parseURIref2FromString -- * Exports for parsers that embed Notation3 in a bigger syntax , N3Parser, N3State(..), SpecialMap , whiteSpace, symbol, lexeme, eof, identStart, identLetter -- , getPrefix -- a combination of the old defaultPrefix and namedPrefix productions , n3symbol -- replacement for uriRef2 -- TODO: check this is semantically correct , quickVariable -- was varid , lexUriRef , document, subgraph , newBlankNode ) where import Swish.RDF.RDFGraph ( RDFGraph, RDFLabel(..) , NamespaceMap , LookupFormula(..) , addArc , setFormula , setNamespaces , emptyRDFGraph ) import Swish.RDF.GraphClass ( arc ) import Swish.Utils.LookupMap ( LookupMap(..) , mapFind, mapFindMaybe, mapReplaceOrAdd ) import Swish.Utils.Namespace ( Namespace(..) , ScopedName(..) , getScopePrefix , getScopedNameURI , makeScopedName, makeUriScopedName , makeQNameScopedName , nullScopedName ) import Swish.Utils.QName (QName, getQNameURI) import Swish.RDF.Vocabulary ( langName , rdf_type , rdf_first, rdf_rest, rdf_nil , owl_sameAs, log_implies , xsd_boolean, xsd_integer, xsd_decimal, xsd_double ) import Swish.RDF.RDFParser ( SpecialMap , mapPrefix , prefixTable, specialTable , ParseResult, RDFParser , n3Style, n3Lexer, ignore , annotateParsecError , mkTypedLit ) import Control.Applicative import Control.Monad (forM_, foldM) import Network.URI (URI, relativeTo, parseURI, parseURIReference, uriToString) import Data.Maybe (fromMaybe, fromJust) import Text.ParserCombinators.Parsec hiding (many, optional, (<|>)) import qualified Text.ParserCombinators.Parsec as PC import qualified Text.ParserCombinators.Parsec.Token as P import Data.Char (isSpace, chr) ---------------------------------------------------------------------- -- Set up token parsers ---------------------------------------------------------------------- lexer :: P.TokenParser N3State lexer = n3Lexer whiteSpace :: N3Parser () whiteSpace = P.whiteSpace lexer symbol :: String -> N3Parser String symbol = P.symbol lexer lexeme :: N3Parser a -> N3Parser a lexeme = P.lexeme lexer identStart , identLetter :: CharParser st Char identStart = P.identStart n3Style identLetter = P.identLetter n3Style ---------------------------------------------------------------------- -- Define parser state and helper functions ---------------------------------------------------------------------- -- | N3 parser state data N3State = N3State { graphState :: RDFGraph -- Graph under construction , thisNode :: RDFLabel -- current context node (aka 'this') , prefixUris :: NamespaceMap -- namespace prefix mapping table , syntaxUris :: SpecialMap -- special name mapping table , nodeGen :: Int -- blank node id generator , keywordsList :: [String] -- contents of the @keywords statement , allowLocalNames :: Bool -- True if @keywords used so that bare names are QNames in default namespace } -- | Functions to update N3State vector (use with Parsec updateState) setPrefix :: String -> String -> N3State -> N3State setPrefix pre uri st = st { prefixUris=p' } where p' = mapReplaceOrAdd (Namespace pre uri) (prefixUris st) -- | Set name for special syntax element setSName :: String -> ScopedName -> N3State -> N3State setSName nam snam st = st { syntaxUris=s' } where s' = mapReplaceOrAdd (nam,snam) (syntaxUris st) setSUri :: String -> String -> N3State -> N3State setSUri nam suri = setSName nam (makeScopedName "" suri "") -- | Set the list of tokens that can be used without needing the leading -- \@ symbol. setKeywordsList :: [String] -> N3State -> N3State setKeywordsList ks st = st { keywordsList = ks, allowLocalNames = True } -- Functions to access state: -- | Get name for special syntax element, default null getSName :: N3State -> String -> ScopedName getSName st nam = mapFind nullScopedName nam (syntaxUris st) getSUri :: N3State -> String -> String getSUri st nam = getScopedNameURI $ getSName st nam -- Map prefix to namespace getPrefixNs :: N3State -> String -> Namespace getPrefixNs st pre = Namespace pre (mapPrefix (prefixUris st) pre) -- Map ScopedName using prefix table -- (Ignore URI in supplied ScopedName) getPrefixScopedName :: N3State -> ScopedName -> ScopedName getPrefixScopedName st snam = ScopedName (getPrefixNs st pre) loc where pre = getScopePrefix snam loc = snLocal snam getKeywordsList :: N3State -> [String] getKeywordsList = keywordsList getAllowLocalNames :: N3State -> Bool getAllowLocalNames = allowLocalNames -- Return function to update graph in N3 parser state, -- using the supplied function of a graph -- (use returned function with Parsec updateState) updateGraph :: ( RDFGraph -> RDFGraph ) -> ( N3State -> N3State ) updateGraph f s = s { graphState = f (graphState s) } ---------------------------------------------------------------------- -- Define top-level parser function: -- accepts a string and returns a graph or error ---------------------------------------------------------------------- type N3Parser a = RDFParser N3State a -- | Parse a string as N3 (with no real base URI). -- -- See 'parseN3' if you need to provide a base URI. -- parseN3fromString :: String -- ^ input in N3 format. -> ParseResult parseN3fromString = parseAnyfromString document Nothing -- | Parse a string with an optional base URI. -- -- See also 'parseN3fromString'. -- parseN3 :: String -- ^ input in N3 format. -> Maybe QName -- ^ optional base URI -> ParseResult parseN3 = flip (parseAnyfromString document) {- -- useful for testing test :: String -> RDFGraph test = either error id . parseAnyfromString document Nothing -} -- | Function to supply initial context and parse supplied term. -- -- We augment the Parsec error with the context. -- parseAnyfromString :: N3Parser a -- ^ parser to apply -> Maybe QName -- ^ base URI of the input, or @Nothing@ to use default base value -> String -- ^ input to be parsed -> Either String a parseAnyfromString parser mbase input = let pmap = LookupMap prefixTable muri = fmap makeQNameScopedName mbase smap = LookupMap $ specialTable muri pstate = N3State { graphState = emptyRDFGraph , thisNode = NoNode , prefixUris = pmap , syntaxUris = smap , nodeGen = 0 , keywordsList = ["a", "is", "of", "true", "false"] -- not 100% sure about true/false here , allowLocalNames = False } puri = case mbase of Just base -> fmap showURI $ appendUris (getQNameURI base) "#" _ -> Right "#" -- this is getting a bit ugly in case puri of Left emsg -> Left $ "Invalid base: " ++ emsg Right p -> case runParser parser (setPrefix "" p pstate) "" input of Right res -> Right res Left err -> Left $ annotateParsecError 1 (lines input) err newBlankNode :: N3Parser RDFLabel newBlankNode = do s <- getState let n = succ (nodeGen s) setState $ s { nodeGen = n } return $ Blank (show n) -- Test functions for selected element parsing parseTextFromString :: String -> String -> Either String String parseTextFromString s = parseAnyfromString (string s) Nothing parseAltFromString :: String -> String -> String -> Either String String parseAltFromString s1 s2 = parseAnyfromString ( string s1 <|> string s2 ) Nothing parseNameFromString :: String -> Either String String parseNameFromString = parseAnyfromString n3Name Nothing parsePrefixFromString :: String -> Either String Namespace parsePrefixFromString = parseAnyfromString p Nothing where p = do pref <- n3Name st <- getState return (getPrefixNs st pref) -- map prefix to namespace parseAbsURIrefFromString :: String -> Either String String parseAbsURIrefFromString = parseAnyfromString (fmap showURI explicitURI) Nothing -- parseAnyfromString absUriRef Nothing parseLexURIrefFromString :: String -> Either String String parseLexURIrefFromString = parseAnyfromString lexUriRef Nothing parseURIref2FromString :: String -> Either String ScopedName parseURIref2FromString = parseAnyfromString n3symbol Nothing -- parseAnyfromString uriRef2 Nothing ---------------------------------------------------------------------- -- Syntax productions ---------------------------------------------------------------------- {- TODO: - this parser is a *lot* slower than the original one -} -- helper routines comma, semiColon , fullStop :: N3Parser () comma = ignore $ symbol "," semiColon = ignore $ symbol ";" fullStop = ignore $ symbol "." -- a specialization of bracket/between br :: String -> String -> N3Parser a -> N3Parser a br lsym rsym = between (symbol lsym) (symbol rsym) -- The @ character is optional if the keyword is in the -- keyword list -- atSign :: String -> N3Parser () atSign s = do st <- getState let p = ignore $ char '@' if s `elem` getKeywordsList st then PC.optional p else p atWord :: String -> N3Parser String atWord s = do atSign s -- TODO: does it really make sense to add the not-followed-by-a-colon rule here? -- apply to both cases even though should only really be necessary -- when the at sign is not given -- lexeme $ string s *> notFollowedBy (char ':') return s showURI :: URI -> String showURI u = uriToString id u "" -- TODO: look at using URIs throughout getScopedNameURI' :: URI -> String getScopedNameURI' = showURI -- getScopedNameURI' = getScopedNameURI . makeUriScopedName . showURI operatorLabel :: ScopedName -> N3Parser RDFLabel {- operatorLabel snam = do s <- getState return $ Res $ getPrefixScopedName s snam -} operatorLabel snam = (Res . flip getPrefixScopedName snam) <$> getState -- Add statement to graph in N3 parser state addStatement :: RDFLabel -> RDFLabel -> RDFLabel -> N3Parser () addStatement s p o = updateState (updateGraph (addArc (arc s p o) )) addStatementRev :: RDFLabel -> RDFLabel -> RDFLabel -> N3Parser () addStatementRev o p s = addStatement s p o {- A number of productions require a name, which starts with [A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff] and then has [\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]* we encode this as the n3Name production -} initChar , bodyChar :: String initChar = ['A'..'Z'] ++ "_" ++ ['a'..'z'] ++ map chr ([0x00c0..0x00d6] ++ [0x00d8..0x00f6] ++ [0x00f8..0x02ff] ++ [0x0370..0x037d] ++ [0x037f..0x1fff] ++ [0x200c..0x200d] ++ [0x2070..0x218f] ++ [0x2c00..0x2fef] ++ [0x3001..0xd7ff] ++ [0xf900..0xfdcf] ++ [0xfdf0..0xfffd] ++ [0x00010000..0x000effff]) bodyChar = '-' : ['0'..'9'] ++ ['A'..'Z'] ++ "_" ++ ['a'..'z'] ++ map chr (0x00b7 : [0x00c0..0x00d6] ++ [0x00d8..0x00f6] ++ [0x00f8..0x037d] ++ [0x037f..0x1fff] ++ [0x200c..0x200d] ++ [0x203f..0x2040] ++ [0x2070..0x218f] ++ [0x2c00..0x2fef] ++ [0x3001..0xd7ff] ++ [0xf900..0xfdcf] ++ [0xfdf0..0xfffd] ++ [0x00010000..0x000effff]) n3Name :: N3Parser String n3Name = (:) <$> n3Init <*> n3Body where n3Init = oneOf initChar "Initial character of a name" n3Body = many (oneOf bodyChar) "Body of the name" {- quickvariable ::= \?[A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]* -} -- TODO: is mapping to Var correct? quickVariable :: N3Parser RDFLabel quickVariable = char '?' *> (Var <$> n3Name) "quickvariable" {- string ::= ("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*") or string ::= tripleQuoted | singleQUoted -} n3string :: N3Parser String n3string = tripleQuoted <|> singleQuoted "string" {- singleQuoted ::= "[^"\\]*(?:\\.[^"\\]*)*" asciiChars :: String asciiChars = map chr [0x20..0x7e] asciiCharsN3 :: String asciiCharsN3 = filter (`notElem` "\\\"") asciiChars -} -- the grammer has only upper-case A-F but some lower case values -- seen in the wild, so support them -- ntHexDigit :: N3Parser Char ntHexDigit = oneOf $ ['0'..'9'] ++ ['A'..'F'] ++ ['a'..'f'] hex4 :: N3Parser Char hex4 = do digs <- count 4 ntHexDigit let dstr = "0x" ++ digs dchar = read dstr :: Int return $ chr dchar hex8 :: N3Parser Char hex8 = do digs <- count 8 ntHexDigit let dstr = "0x" ++ digs dchar = read dstr :: Int if dchar <= 0x10FFFF then return $ chr dchar else unexpected "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF" {- This is very similar to NTriples accept that also allow the escaping of ' even though it is not required. The Python rules allow \N{name}, where name is the Unicode name. It's not clear whether we need to support this too, so for now we do not. -} protectedChar :: N3Parser Char protectedChar = (char 't' *> return '\t') <|> (char 'n' *> return '\n') <|> (char 'r' *> return '\r') <|> (char '"' *> return '"') <|> (char '\'' *> return '\'') <|> (char '\\' *> return '\\') <|> (char 'u' *> hex4) <|> (char 'U' *> hex8) -- Accept an escape character or any character as long as it isn't -- a new-line or quote. Unrecognized escape sequences should therefore -- be left alone by this. -- n3Character :: N3Parser Char n3Character = (char '\\' *> (protectedChar <|> return '\\')) <|> noneOf "\"\n" {- <|> (oneOf asciiCharsN3 "ASCII character") -- TODO: bodyChar and asciiCharsN3 overlap <|> (oneOf bodyChar "Unicode character") -} sQuot :: N3Parser Char sQuot = char '"' singleQuoted :: N3Parser String singleQuoted = between sQuot sQuot $ many n3Character {- tripleQUoted ::= """[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""" -} tripleQuoted :: N3Parser String tripleQuoted = tQuot *> manyTill (n3Character <|> sQuot <|> char '\n') tQuot where tQuot = try (count 3 sQuot) getDefaultPrefix :: N3Parser Namespace getDefaultPrefix = do s <- getState return (getPrefixNs s "") addBase :: URI -> N3Parser () addBase = updateState . setSUri "base" . getScopedNameURI' addPrefix :: Maybe String -> URI -> N3Parser () addPrefix p = updateState . setPrefix (fromMaybe "" p) . getScopedNameURI' {-| Update the set of keywords that can be given without an \@ sign. -} updateKeywordsList :: [String] -> N3Parser () updateKeywordsList = updateState . setKeywordsList {- document ::= | statements_optional EOF -} document :: N3Parser RDFGraph document = mkGr <$> (whiteSpace *> statementsOptional *> eof *> getState) where mkGr s = setNamespaces (prefixUris s) (graphState s) {- statements_optional ::= | statement "." statements_optional | void -} statementsOptional :: N3Parser () statementsOptional = ignore $ endBy (lexeme statement) fullStop {- statement ::= | declaration | existential | simpleStatement | universal -} statement :: N3Parser () statement = declaration <|> existential <|> universal <|> simpleStatement -- having an error here leads to less informative errors in general, it seems -- "statement (existential or universal quantification or a simple statement)" {- declaration ::= | "@base" explicituri | "@keywords" barename_csl | "@prefix" prefix explicituri -} -- TODO: do we need the try statements here? atWord would need to have a try on '@' -- (if applicable) which should mean being able to get rid of try -- declaration :: N3Parser () declaration = (try (atWord "base") >> explicitURI >>= addBase) <|> (try (atWord "keywords") >> bareNameCsl >>= updateKeywordsList) <|> (try (atWord "prefix") *> getPrefix) "declaration" getPrefix :: N3Parser () getPrefix = do p <- lexeme prefix u <- explicitURI addPrefix p u {- explicituri ::= <[^>]*> Note: white space is to be ignored within <> -} explicitURI :: N3Parser URI explicitURI = do let lb = char '<' rb = char '>' -- TODO: do the whitespace definitions match? ustr <- between lb (rb "end of URI '>'") $ many (satisfy (/= '>')) let uclean = filter (not . isSpace) ustr s <- getState let base = getSUri s "base" case appendUris base uclean of Right uri -> return uri Left emsg -> fail emsg appendUris :: String -> String -> Either String URI appendUris base uri = case parseURI uri of Just absuri -> Right absuri _ -> case parseURIReference uri of Just reluri -> let baseuri = fromJust $ parseURI base in case relativeTo reluri baseuri of Just resuri -> Right resuri _ -> Left $ "Unable to append <" ++ uri ++ "> to base=<" ++ base ++ ">" _ -> Left $ "Invalid URI: <" ++ uri ++ ">" -- production from the old parser lexUriRef :: N3Parser String lexUriRef = fmap showURI $ lexeme explicitURI {- barename ::= [A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]* barename_csl ::= | barename barename_csl_tail | void barename_csl_tail ::= | "," barename barename_csl_tail | void -} bareNameCsl :: N3Parser [String] bareNameCsl = sepBy (lexeme bareName) comma bareName :: N3Parser String bareName = n3Name "barename" {- prefix ::= ([A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*)?: -} prefix :: N3Parser (Maybe String) prefix = optional (lexeme n3Name) <* char ':' "prefix name" {- symbol ::= | explicituri | qname symbol_csl ::= | symbol symbol_csl_tail | void symbol_csl_tail ::= | "," symbol symbol_csl_tail | void -} n3symbol :: N3Parser ScopedName n3symbol = ((makeUriScopedName . showURI) <$> explicitURI) <|> qname "symbol" symbolCsl :: N3Parser [ScopedName] symbolCsl = sepBy (lexeme n3symbol) comma {- qname ::= (([A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*)?:)?[A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]* Turtle appears to support ':' as a valid qname, which is not supported by the above production. Let's support and see what happens. This support may be removed since, if we allow white space between : and prefix or local name then statements like : a : b : c . are not parseable. TODO: Note that, for now, we explicitly handle blank nodes (of the form _:name) direcly in pathItem'. This is not a good idea since qname' is used elsewhere and so shouldn't we do the same thing there too? -} qname :: N3Parser ScopedName qname = (char ':' *> toSN getDefaultPrefix) <|> (n3Name >>= fullOrLocalQName) "QName" where toSN p = ScopedName <$> p <*> (n3Name <|> return "") fullOrLocalQName :: String -> N3Parser ScopedName fullOrLocalQName name = (char ':' *> fullQName name) <|> localQName name fullQName :: String -> N3Parser ScopedName fullQName name = do pre <- findPrefix name lname <- n3Name <|> return "" return $ ScopedName pre lname findPrefix :: String -> N3Parser Namespace findPrefix pre = do st <- getState case mapFindMaybe pre (prefixUris st) of Just uri -> return $ Namespace pre uri Nothing -> unexpected $ "Prefix '" ++ pre ++ ":' not bound." localQName :: String -> N3Parser ScopedName localQName name = do st <- getState if getAllowLocalNames st then do pre <- getDefaultPrefix return $ ScopedName pre name else fail "Invalid 'bare' word" -- TODO: not ideal error message; can we handle this case differently? {- existential ::= | "@forSome" symbol_csl For now we just read in the symbols and ignore them, since we do not mark blank nodes as existentially quantified (we assume this is the case). TODO: fix this? -} existential :: N3Parser () existential = try (atWord "forSome") *> symbolCsl >> return () {- simpleStatement ::= | subject propertylist -} simpleStatement :: N3Parser () simpleStatement = subject >>= propertyListWith {- subject ::= | expression -} subject :: N3Parser RDFLabel subject = lexeme expression {- expression ::= | pathitem pathtail pathtail ::= | "!" expression | "^" expression | void -} expression :: N3Parser RDFLabel expression = do i <- pathItem let backwardExpr = char '!' *> return addStatementRev forwardExpr = char '^' *> return addStatement mpt <- optional ( (,) <$> lexeme (forwardExpr <|> backwardExpr) <*> lexeme expression ) case mpt of Nothing -> return i Just (addFunc, pt) -> do bNode <- newBlankNode addFunc bNode pt i return bNode {- pathitem ::= | "(" pathlist ")" | "[" propertylist "]" | "{" formulacontent "}" | boolean | literal | numericliteral | quickvariable | symbol pathlist ::= | expression pathlist | void Need to think about how to handle formulae, since need to know the context of the call to know where to add them. TOOD: may include direct support for blank nodes here, namely convert _:stringval -> Blank stringval since although this should be done by symbol the types don't seem to easily match up (at first blush anyway) -} pathItem :: N3Parser RDFLabel pathItem = br "(" ")" pathList <|> br "[" "]" propertyListBNode <|> br "{" "}" formulaContent <|> try boolean <|> literal <|> numericLiteral <|> quickVariable <|> Blank <$> (string "_:" *> n3Name) -- TODO a hack that needs fixing <|> Res <$> n3symbol "pathitem" {- we create a blank node for the list and return it, whilst adding the list contents to the graph -} pathList :: N3Parser RDFLabel pathList = do cts <- many (lexeme expression) "pathlist" eNode <- operatorLabel rdf_nil case cts of [] -> return eNode (c:cs) -> do sNode <- newBlankNode first <- operatorLabel rdf_first addStatement sNode first c lNode <- foldM addElem sNode cs rest <- operatorLabel rdf_rest addStatement lNode rest eNode return sNode where addElem prevNode curElem = do bNode <- newBlankNode first <- operatorLabel rdf_first rest <- operatorLabel rdf_rest addStatement prevNode rest bNode addStatement bNode first curElem return bNode {- formulacontent ::= | statementlist statementlist ::= | statement statementtail | void statementtail ::= | "." statementlist | void -} {- We create a subgraph and assign it to a blank node, returning the blank node. At present it is a combination of the subgraph and formula productions from the origial parser. TODO: is it correct? -} formulaContent :: N3Parser RDFLabel formulaContent = do bNode <- newBlankNode pstate <- getState let fstate = pstate { graphState = emptyRDFGraph, thisNode = bNode } setState fstate statementList fstate' <- getState let nstate = pstate { nodeGen = nodeGen fstate' } setState nstate updateState $ updateGraph $ setFormula (Formula bNode (graphState fstate')) return bNode subgraph :: RDFLabel -> N3Parser RDFGraph subgraph this = do pstate <- getState let fstate = pstate { graphState = emptyRDFGraph, thisNode = this } setState fstate -- switch new state into parser statementsOptional -- parse statements of formula fstate' <- getState let nstate = pstate { nodeGen = nodeGen fstate' } setState nstate -- swap back state, with updated nodeGen return (graphState fstate') statementList :: N3Parser () statementList = ignore $ sepEndBy (lexeme statement) fullStop {- boolean ::= | "@false" | "@true" -} boolean :: N3Parser RDFLabel boolean = mkTypedLit xsd_boolean <$> (try (atWord "false") <|> atWord "true") {- dtlang ::= | "@" langcode | "^^" symbol | void literal ::= | string dtlang langcode ::= [a-z]+(-[a-z0-9]+)* -} literal :: N3Parser RDFLabel literal = Lit <$> n3string <*> optionMaybe dtlang dtlang :: N3Parser ScopedName dtlang = (char '@' *> langcode "langcode") <|> (try (string "^^") *> n3symbol) "dtlang" langcode :: N3Parser ScopedName langcode = do h <- many1 (oneOf ['a'..'z']) "start of langcode (a to z)" mt <- optionMaybe ( (:) <$> char '-' <*> many1 (oneOf (['a'..'z'] ++ ['0'..'9']))) "a to z or 0 to 9 (langcode after the hyphen)" return $ langName $ h ++ fromMaybe "" mt {- decimal ::= [-+]?[0-9]+(\.[0-9]+)? double ::= [-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+) integer ::= [-+]?[0-9]+ numericliteral ::= | decimal | double | integer -} numericLiteral :: N3Parser RDFLabel numericLiteral = try (mkTypedLit xsd_double <$> n3double) <|> try (mkTypedLit xsd_decimal <$> n3decimal) <|> mkTypedLit xsd_integer <$> n3integer "numericliteral" n3sign :: N3Parser Char n3sign = char '+' <|> char '-' n3integer :: N3Parser String n3integer = do ms <- optionMaybe n3sign ds <- many1 digit case ms of Just s -> return $ s : ds _ -> return ds n3decimal :: N3Parser String n3decimal = (++) <$> n3integer <*> ( (:) <$> char '.' <*> many1 digit ) n3double :: N3Parser String n3double = (++) <$> n3decimal <*> ( (:) <$> oneOf "eE" <*> n3integer ) {- propertylist ::= | verb object objecttail propertylisttail | void propertylisttail ::= | ";" propertylist | void -} propertyListBNode :: N3Parser RDFLabel propertyListBNode = do plist <- sepEndBy ((,) <$> lexeme verb <*> objectList) semiColon bNode <- newBlankNode let addList (vrb,items) = mapM_ (addItem vrb) items addItem (True,vrb) obj = addStatement bNode vrb obj addItem (_,vrb) subj = addStatement subj vrb bNode forM_ plist addList return bNode propertyListWith :: RDFLabel -> N3Parser () propertyListWith subj = ignore $ sepEndBy (lexeme verb >>= objectListWith subj) semiColon {- object ::= | expression objecttail ::= | "," object objecttail | void We change the production rule from objecttail to objectlist for lists of objects (may change back) -} object :: N3Parser RDFLabel object = lexeme expression objectList :: N3Parser [RDFLabel] objectList = sepBy1 object comma objectWith :: RDFLabel -> (Bool, RDFLabel) -> N3Parser () objectWith subj (flag,vrb) = object >>= addFunc subj vrb where addFunc = if flag then addStatement else addStatementRev objectListWith :: RDFLabel -> (Bool, RDFLabel) -> N3Parser () objectListWith subj vrb = ignore $ sepBy1 (objectWith subj vrb) comma {- objectList1 :: N3Parser [RDFLabel] objectList1 = sepBy1 object comma -} {- verb ::= | "<=" | "=" | "=>" | "@a" | "@has" expression | "@is" expression "@of" | expression -} verb :: N3Parser (Bool, RDFLabel) verb = -- we check reverse first so that <= is tried before looking for a URI via expression rule (,) False <$> verbReverse <|> (,) True <$> verbForward "verb" -- those verbs for which subject is on the right and object on the left verbReverse :: N3Parser RDFLabel verbReverse = try (string "<=") *> operatorLabel log_implies <|> between (try (atWord "is")) (atWord "of") (lexeme expression) -- those verbs with subject on the left and object on the right verbForward :: N3Parser RDFLabel verbForward = (try (string "=>") *> operatorLabel log_implies) <|> (string "=" *> operatorLabel owl_sameAs) <|> (try (atWord "a") *> operatorLabel rdf_type) <|> (atWord "has" *> lexeme expression) <|> lexeme expression {- universal ::= | "@forAll" symbol_csl TODO: what needs to be done to support universal quantification -} universal :: N3Parser () universal = try (atWord "forAll") *> unexpected "universal (@forAll) currently unsupported." -- will be something like: *> symbolCsl {- -- OLD -- -- helper routines isymbol :: String -> N3Parser () isymbol s = symbol s >> return () -- document = directive* statement-list document :: N3Parser RDFGraph document = do whiteSpace _ <- many directive statements eof s <- getState return $ setNamespaces (prefixUris s) (graphState s) -- directive = "@prefix" prefix ":" uriRef2 "." // Namespace declaration -- | "@prefix" ":" uriRef2 "." // Default namespace -- | "@equivalence" uriRef2 "." // Alternative to daml:equivalent -- | "@listfirst" uriRef2 "." // Alternative to n3:first -- | "@listrest" uriRef2 "." // Alternative to n3:rest -- | "@listnull" uriRef2 "." // Alternative to n3:null -- | "@plus" uriRef2 "." // Alternative to operator:plus -- | "@minus" uriRef2 "." // Alternative to operator:minus -- | "@slash" uriRef2 "." // Alternative to operator:slash -- | "@star" uriRef2 "." // Alternative to operator:star -- | "@base" uriRef2 "." // Base URI for relative URIs. directive :: N3Parser () directive = (try (symbol "@prefix") >> (defaultPrefix <|> namedPrefix)) <|> (string "@" >> syntaxUri) "directive" defaultPrefix :: N3Parser () defaultPrefix = do u <- br ":" "." uriRef2 updateState $ setPrefix "" (getScopedNameURI u) namedPrefix :: N3Parser () namedPrefix = do n <- name u <- br ":" "." uriRef2 updateState $ setPrefix n (getScopedNameURI u) syntaxUri :: N3Parser () syntaxUri = do s <- uriName u <- uriRef2 isymbol "." updateState $ setSUri s (getScopedNameURI u) uriName :: N3Parser String uriName = try (symbol "equivalence") <|> try (symbol "listfirst") <|> try (symbol "listrest") <|> try (symbol "listnull") <|> try (symbol "plus") <|> try (symbol "minus") <|> try (symbol "slash") <|> try (symbol "star") <|> try (symbol "base") "special URI directive" -- statements = [ statement ( "." statement )* ] -- -- statement = subject property-list -- -- properties = [ property ( ";" property )* ] -- -- New statements are added to the user state graph statements :: N3Parser () statements = sepEndBy1 statement (symbol ".") >> return () statement :: N3Parser () -- statement = subject >>= optional . properties -- when using Parsec's optional statement = subject >>= optional . properties >> return () -- not sure this is exactly the same as with Parsec properties :: RDFLabel -> N3Parser () properties subj = sepBy1 (property subj) (symbol ";") >> return () -- property = verb object-list -- | ":-" anon-node // Creates anon-node aongside the current node -- verb = ">-" prop "->" // has 'prop' of -- | "<-" prop "<-" // is 'prop' of -- | operator // has operator:'operator' of (???) -- | prop // has 'prop' of -- shorthand -- | "has" prop "of" // has 'prop' of -- | "is" prop "of" // is 'prop' of -- | "a" // has rdf:type of -- | "=" // has daml:equivalent of -- -- subj is the subject node for these properties. -- -- New statements are added to the graph in the parser's user state. property :: RDFLabel -> N3Parser () property subj = (verb >>= uncurry (objects subj)) <|> (isymbol ":-" >> anonNode subj >> return ()) verb :: N3Parser (RDFLabel,Bool) verb = (prop >>= \p -> return (p, False)) <|> (operator >>= \p -> return (p, False)) <|> (br ">-" "->" prop >>= \p -> return (p, False)) <|> (br "<-" "<-" prop >>= \p -> return (p, True)) <|> (br "has" "of" prop >>= \p -> return (p, False)) <|> (br "is" "of" prop >>= \p -> return (p, True)) <|> (symbol "a" >> operatorLabel rdf_type >>= \lab -> return (lab, False)) <|> (symbol "=" >> operatorLabel owl_sameAs >>= \lab -> return (lab, False)) "property" -- objects = object -- | object "," object-list -- -- subj is the subject node for the new statements, -- prop is the property node for the new statements. -- swap is true if the subject/object values in the resulting statement -- are to be swapped (for "is of", etc.) -- -- New statements are added to the graph in the parser's user state objects :: RDFLabel -> RDFLabel -> Bool -> N3Parser () objects subj ppty swap = sepBy1 (object subj ppty swap) (symbol ",") >> return () -- anonNode = "[" property-list "]" // Something with given properties -- | "{" statement-list "}" // List of statements as resource -- | "(" node-list ")" // Construct list with -- // rdf:first, rdf:rest, rdf:nil -- -- subj is the subject node with which the new anonymous node is equated, -- -- The anonymous node value is returned by this parser (which is often the same -- as the supplied subject node, but not always). -- -- New statements are added to the graph in the parser's user state -- (in the case of a formula, a new graph and parser are created, and -- the graph arcs are added to this new graph). anonNode :: RDFLabel -> N3Parser RDFLabel anonNode subj = (br "[" "]" (properties subj) >> return subj) <|> br "{" "}" (formula subj) <|> br "(" ")" (nodeList subj) "anon node (\"[\", \"(\" or \"{\")" -- This method allows a statement list to be parsed as a subgraph -- whose value is associated with the supplied node of the current -- graph. formula :: RDFLabel -> N3Parser RDFLabel formula subj = do subgr <- subgraph subj updateState $ updateGraph $ setFormula (Formula subj subgr) return subj subgraph :: RDFLabel -> N3Parser RDFGraph subgraph this = do pstate <- getState let fstate = pstate { graphState = emptyRDFGraph, thisNode = this } setState fstate -- switch new state into parser statements -- parse statements of formula fstate' <- getState let nstate = pstate { nodeGen = nodeGen fstate' } setState nstate -- swap back state, with updated nodeGen return (graphState fstate') -- prop = uri-ref2 -- | varid -- -- Returns URI value as a Node prop :: N3Parser RDFLabel prop = nodeid <|> varid <|> uriNode -- operator = "+" // >- operator:plus -> -- | "-" // >- operator:minus -> -- | "/" // >- operator:slash -> -- | "*" // >- operator:star-> -- -- If matched, the operator is returned as a node value. operator :: N3Parser RDFLabel operator = (symbol "+" >> operatorLabel operator_plus) <|> (symbol "-" >> operatorLabel operator_minus) <|> (symbol "*" >> operatorLabel operator_star) <|> (symbol "/" >> operatorLabel operator_slash) "" -- subject = node subject :: N3Parser RDFLabel subject = node -- object = litNode -- -- This production adds a new triple to the graph state, -- using the supplied subject and property values. -- If swap is True, the subject and object positions are -- swapped. object :: RDFLabel -> RDFLabel -> Bool -> N3Parser () object subj ppty True = do o <- litNode addStatement o ppty subj object subj ppty _ = do o <- litNode addStatement subj ppty o -- nodeList = litNode* -- -- subj is the node from which the list is linked. -- -- Returns the supplied head of list or Nil node allocated. -- -- Link first element of link to list head, scan rest of list, -- and return the list head; otherwise return a node rdf_null. -- -- This slightly convoluted pattern is to deal with two different -- occurrences of a list node: -- :- ( l1, l2, ... ) -- Here, (the supplied subj) is the listhead. -- prop ( l1, l2, ... ) -- Here, the a new blank is supplied as subj to be the listhead. -- In either case, if the list is non-empty, the supplied subj -- is returned. But if the list is empty, a rdf_null node is returned. -- In the second case, the invoking production must use the returned -- value. nodeList :: RDFLabel -> N3Parser RDFLabel nodeList subj = (do val <- litNode first <- operatorLabel rdf_first addStatement subj first val nodeList1 subj return subj) <|> operatorLabel rdf_nil "Node or ')'" nodeList1 :: RDFLabel -> N3Parser () nodeList1 prev = (do val <- litNode lnk <- newBlankNode first <- operatorLabel rdf_first rest <- operatorLabel rdf_rest addStatement lnk first val addStatement prev rest lnk nodeList1 lnk) <|> (do nil <- operatorLabel rdf_nil rest <- operatorLabel rdf_rest addStatement prev rest nil) "Node or ')'" -- lit-node = node -- | str-node [ "@" lang ] [ "^^" uriRef2 ] -- str-node = '"' constant-value '"' -- | '"""' constant value '"""' // Including single or double occurences of -- // quotes and/or newlines -- -- Returns a new node value. litNode :: N3Parser RDFLabel litNode = node <|> liftM2 Lit strNode litTypeOrLang "URI, blank node or literal" strNode :: N3Parser String strNode = tripleQuoteString <|> singleQuoteString litTypeOrLang :: N3Parser (Maybe ScopedName) litTypeOrLang = langTag <|> typeUri <|> return Nothing "'@tag' (language tag) or '^^name' (datatype URI)" langTag :: N3Parser (Maybe ScopedName) langTag = fmap (Just . langName) (string "@" >> name) "'@tag' (language tag)" typeUri :: N3Parser (Maybe ScopedName) typeUri = fmap Just (string "^^" >> uriRef2) "'^^name' (datatype URI)" -- node = nodeid -- | varid -- | uri-ref2 -- | anon-node -- -- nodeid = "_:" name -- -- varid = "?" name -- -- Returns a new node value. node :: N3Parser RDFLabel node = nodeid <|> varid <|> uriNode <|> (newBlankNode >>= anonNode) "URI or blank node" -- Identified blank node in input -- -- Note that automatically generated blank node identifiers start with -- a digit, where input node identifiers start with a letter, so there -- can be no clash. Care is needed when serializing a graph to ensure -- that future clashes are avoided. nodeid :: N3Parser RDFLabel -- nodeid = lexeme nodeid1 nodeid = fmap Blank (string "_:" >> name) -- variable identifier varid :: N3Parser RDFLabel varid = fmap Var (string "?" >> name) -- uriNode = qname -- | "<" URI-reference ">" -- | "this" uriNode :: N3Parser RDFLabel uriNode = fmap Res uriRef2 <|> fmap thisNode (string "this" >> getState) "URI node" -- uriRef2 = qname -- | ":" local-name -- | "<" URI-reference ">" -- qname = prefix ":" local-name -- -- prefix = name // Namespace prefix -- -- local-name = name // Local name (namespace qualified) -- -- name = alpha alphanumeric* -- -- alpha = "a"-"z" -- | "A"-"Z" -- | "_" -- -- alphanumeric = alpha -- | "0"-"9" -- -- URI-reference = (conforming to syntax in RFC2396) -- -- uriRef2 returns a ScopedName. uriRef2 :: N3Parser ScopedName uriRef2 = lexeme (try uriRef2a) "URI or QName" uriRef2a :: N3Parser ScopedName uriRef2a = liftM2 ScopedName prefix (colon >> localname) <|> (colon >> liftM2 ScopedName defaultprefix localname) <|> fmap makeUriScopedName absUriRef "URI or QName" prefix :: N3Parser Namespace prefix = do pref <- prefixname st <- getState return (getPrefixNs st pref) -- map prefix to namespace defaultprefix :: N3Parser Namespace defaultprefix = do st <- getState return (getPrefixNs st "") name :: N3Parser String name = lexeme $ name1 identStart prefixname :: N3Parser String prefixname = name1 identStart localname :: N3Parser String localname = lexeme $ name1 identLetter -- 'name1' is a name without following whitespace -- initChar is a parser for the first character name1 :: N3Parser Char -> N3Parser String name1 initChar = liftM2 (:) initChar (many identLetter) "identifier" ---------------------------------------------------------------------- -- Lexical support ---------------------------------------------------------------------- -- -- The following code adapted from ParsecToken, -- modified to handle different escape conventions and triple-quoted strings -- \c -- \uhhhh -- \Uhhhhhhhh -- -- Regular single-quoted string -- cannot be split over line breaks singleQuoteString :: N3Parser String singleQuoteString = lexeme ( between (char '"') (char '"' "end of string (\")") anyStringChars "literal string" ) anyStringChars :: CharParser st String anyStringChars = fmap (foldr (maybe id (:)) "") (many stringChar) -- Triple-quoted string -- may include line breaks, '"' or '""'. tripleQuoteString :: N3Parser String tripleQuoteString = lexeme (fmap (foldr (++) "") $ between (try $ string "\"\"\"") (string "\"\"\"" "end of string (\"\"\")") (many tripleQuoteSubstring)) "triple-quoted literal string" -- Match non-quote substring or one or two quote characters tripleQuoteSubstring :: N3Parser String tripleQuoteSubstring = tripleQuoteSubstring1 <|> try sqTripleQuoteSubstring1 <|> try dqTripleQuoteSubstring1 dqTripleQuoteSubstring1 :: N3Parser String dqTripleQuoteSubstring1 = fmap ("\"\""++) $ string "\"\"" >> tripleQuoteSubstring1 sqTripleQuoteSubstring1 :: N3Parser String sqTripleQuoteSubstring1 = fmap ('"':) $ char '"' >> tripleQuoteSubstring1 -- match at least one non-quote character in a triple-quoted string tripleQuoteSubstring1 :: N3Parser String tripleQuoteSubstring1 = fmap (foldr (maybe id (:)) "") $ many1 tripleQuoteStringChar tripleQuoteStringChar :: CharParser st (Maybe Char) tripleQuoteStringChar = stringChar <|> (string "\n" >> return (Just '\n')) stringChar :: CharParser st (Maybe Char) stringChar = fmap Just stringLetter <|> stringEscape "string character" stringLetter :: CharParser st Char stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c >= '\032')) stringEscape :: CharParser st (Maybe Char) stringEscape = fmap Just $ char '\\' >> escapeCode -- escape codes escapeCode :: CharParser st Char escapeCode = charEsc <|> charUCS2 <|> charUCS4 "escape code" -- \c charEsc :: CharParser st Char charEsc = choice (map parseEsc escMap) where parseEsc (c,code) = fmap (const code) (char c) escMap = zip "nrt\\\"\'" "\n\r\t\\\"\'" -- \uhhhh charUCS2 :: CharParser st Char charUCS2 = fmap chr $ char 'u' >> numberFW 16 hexDigit 4 0 -- \Uhhhhhhhh charUCS4 :: CharParser st Char charUCS4 = fmap chr $ char 'U' >> numberFW 16 hexDigit 8 0 -- parse fixed-width number: numberFW :: Int -> CharParser st Char -> Int -> Int -> CharParser st Int numberFW _ _ 0 val = return val numberFW base baseDigit width val = do d <- baseDigit numberFW base baseDigit (width-1) (val*base + digitToInt d) ---------------------------------------------------------------------- -- Parse a URI reference from the input -- The result returned has absolute form; relative URIs are resolved -- relative to the current base prefix (set using "@base"). -- -- lexeme version lexUriRef :: N3Parser String lexUriRef = lexeme absUriRef -- from Swish.Utils.ProcessURI absoluteUriPart :: String -- ^ URI base -> String -- ^ URI reference -> String absoluteUriPart base rel = showURI $ fromJust $ relativeTo (fromJust (parseURIReference rel)) (fromJust (parseURI base)) absUriRef :: N3Parser String absUriRef = do u <- between (char '<') (char '>' "end of URI '>'") anyUriChars if isURI u then return u else if isURIReference u then do s <- getState return $ absoluteUriPart (getSUri s "base") u else fail ("Invalid URI: <"++u++">") anyUriChars :: N3Parser String anyUriChars = many uriChar uriChar :: N3Parser Char uriChar = alphaNum <|> oneOf "[];?:@&=+$,-_.!~*'()%//#" "URI character" -} -------------------------------------------------------------------------------- -- -- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke -- All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- --------------------------------------------------------------------------------