{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DoAndIfThenElse #-} -- |An 'RdfParser' implementation for the Turtle format -- . module Text.RDF.RDF4H.TurtleParser ( TurtleParser(TurtleParser) , TurtleParserCustom(TurtleParserCustom) ) where import Prelude hiding (readFile) import Data.Attoparsec.Text (parse,IResult(..)) import Data.Char (toLower, toUpper, isDigit, isHexDigit) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe import Data.RDF.Types import Data.RDF.IRI import Data.RDF.Namespace import Text.RDF.RDF4H.ParserUtils import Text.RDF.RDF4H.NTriplesParser import Text.Parsec (runParser, ParseError) import qualified Data.Text as T import Data.Sequence (Seq, (|>)) import qualified Data.Foldable as F import Control.Monad import Text.Parser.Char import Text.Parser.Combinators import Text.Parser.LookAhead import Control.Applicative import Control.Monad.State.Class import Control.Monad.State.Strict -- |An 'RdfParser' implementation for parsing RDF in the -- Turtle format. It is an implementation of W3C Turtle grammar rules at -- http://www.w3.org/TR/turtle/#sec-grammar-grammar . -- It takes optional arguments representing the base URL to use -- for resolving relative URLs in the document (may be overridden in the document -- itself using the \@base directive), and the URL to use for the document itself -- for resolving references to <> in the document. -- To use this parser, pass a 'TurtleParser' value as the first argument to any of -- the 'parseString', 'parseFile', or 'parseURL' methods of the 'RdfParser' type -- class. data TurtleParser = TurtleParser (Maybe BaseUrl) (Maybe T.Text) data TurtleParserCustom = TurtleParserCustom (Maybe BaseUrl) (Maybe T.Text) Parser -- |'TurtleParser' is an instance of 'RdfParser' using a parsec based parser. instance RdfParser TurtleParser where parseString (TurtleParser bUrl dUrl) = parseStringParsec bUrl dUrl parseFile (TurtleParser bUrl dUrl) = parseFileParsec bUrl dUrl parseURL (TurtleParser bUrl dUrl) = parseURLParsec bUrl dUrl -- |'TurtleParser' is an instance of 'RdfParser' using either a -- parsec or an attoparsec based parser. instance RdfParser TurtleParserCustom where parseString (TurtleParserCustom bUrl dUrl Parsec) = parseStringParsec bUrl dUrl parseString (TurtleParserCustom bUrl dUrl Attoparsec) = parseStringAttoparsec bUrl dUrl parseFile (TurtleParserCustom bUrl dUrl Parsec) = parseFileParsec bUrl dUrl parseFile (TurtleParserCustom bUrl dUrl Attoparsec) = parseFileAttoparsec bUrl dUrl parseURL (TurtleParserCustom bUrl dUrl Parsec) = parseURLParsec bUrl dUrl parseURL (TurtleParserCustom bUrl dUrl Attoparsec) = parseURLAttoparsec bUrl dUrl type ParseState = ( Maybe BaseUrl -- the current BaseUrl, may be Nothing initially, but not after it is once set , Maybe T.Text -- the docUrl, which never changes and is used to resolve <> in the document. , Integer -- the id counter, containing the value of the next id to be used , PrefixMappings -- the mappings from prefix to URI that are encountered while parsing , Maybe Subject -- current subject node, if we have parsed a subject but not finished the triple , Maybe Predicate -- current predicate node, if we have parsed a predicate but not finished the triple , Seq Triple -- the triples encountered while parsing; always added to on the right side , Map String Integer ) -- map blank node names to generated id. -- grammar rule: [1] turtleDoc t_turtleDoc :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m (Seq Triple, PrefixMappings) t_turtleDoc = many t_statement *> (eof "eof") *> gets (\(_, _, _, pms, _, _, ts,_) -> (ts, pms)) -- grammar rule: [2] statement -- [2] statement ::= directive | triples '.' t_statement :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () t_statement = directive <|> triples <|> void (some t_ws "blankline-whitespace") where directive = void (try t_directive *> (many t_ws "directive-whitespace2")) triples = void (try t_triples *> (many t_ws "triple-whitespace1") *> (char '.' "end-of-triple-period") *> (many t_ws "triple-whitespace2")) -- grammar rule: [6] triples -- subject predicateObjectList | blankNodePropertyList predicateObjectList? t_triples :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () t_triples = try subjectWithPOL <|> blankNodePropertyListWithPOL where subjectWithPOL = t_subject *> many t_ws *> t_predicateObjectList *> resetSubjectPredicate blankNodePropertyListWithPOL = t_blankNodePropertyList >>= \bn -> many t_ws *> setSubjectPredicate (Just bn) Nothing *> optional t_predicateObjectList *> resetSubjectPredicate -- [14] blankNodePropertyList ::= '[' predicateObjectList ']' t_blankNodePropertyList :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_blankNodePropertyList = withConstantSubjectPredicate $ between (char '[') (char ']') $ do bn <- nextBlankNode setSubjectPredicate (Just bn) Nothing void (many t_ws *> t_predicateObjectList *> many t_ws) return bn -- grammar rule: [3] directive t_directive :: (CharParsing m, MonadState ParseState m) => m () t_directive = t_prefixID <|> t_base <|> t_sparql_prefix <|> t_sparql_base -- grammar rule: [135s] iri ::= IRIREF | PrefixedName t_iri :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m T.Text t_iri = try t_iriref <|> t_prefixedName -- grammar rule: [136s] PrefixedName ::= PNAME_LN | PNAME_NS t_prefixedName :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m T.Text t_prefixedName = try t_pname_ln <|> t_pname_ns -- grammar rule: [4] prefixID ::= '@prefix' PNAME_NS IRIREF '.' t_prefixID :: (CharParsing m, MonadState ParseState m) => m () t_prefixID = do void (try (string "@prefix" "@prefix-directive")) void (some t_ws "whitespace-after-@prefix") pre <- option mempty (try t_pn_prefix) <* char ':' void (some t_ws "whitespace-after-@prefix-colon") iriFrag <- t_iriref void (many t_ws "prefixID-whitespace") void (char '.' "end-of-prefixID-period") (bUrl, dUrl, _, PrefixMappings pms, _, _, _, _) <- get iri <- tryIriResolution bUrl dUrl iriFrag updatePMs $ Just (PrefixMappings $ Map.insert pre iri pms) -- grammar rule: [6s] sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF t_sparql_prefix :: (CharParsing m, MonadState ParseState m) => m () t_sparql_prefix = do void (try (caseInsensitiveString "PREFIX" "@prefix-directive")) void (some t_ws "whitespace-after-PREFIX") pre <- option mempty (try t_pn_prefix) <* char ':' void (some t_ws "whitespace-after-PREFIX-colon") iriFrag <- t_iriref (bUrl, dUrl, _, PrefixMappings pms, _, _, _, _) <- get iri <- tryIriResolution bUrl dUrl iriFrag updatePMs $ Just (PrefixMappings $ Map.insert pre iri pms) -- grammar rule: [5] base ::= '@base' IRIREF '.' t_base :: (CharParsing m, MonadState ParseState m) => m () t_base = do void (try (string "@base" "@base-directive")) void (some t_ws "whitespace-after-@base") iriFrag <- t_iriref void (many t_ws "base-whitespace") void (char '.') "end-of-base-period" bUrl <- currBaseUrl dUrl <- currDocUrl newBaseIri <- BaseUrl <$> tryIriResolution bUrl dUrl iriFrag updateBaseUrl (Just $ Just newBaseIri) -- grammar rule: [5s] sparqlBase ::= "BASE" IRIREF t_sparql_base :: (CharParsing m, MonadState ParseState m) => m () t_sparql_base = do void (try (caseInsensitiveString "BASE" "@sparql-base-directive")) void (some t_ws "whitespace-after-BASE") iriFrag <- t_iriref bUrl <- currBaseUrl dUrl <- currDocUrl newBaseIri <- BaseUrl <$> tryIriResolution bUrl dUrl iriFrag updateBaseUrl (Just $ Just newBaseIri) t_verb :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () t_verb = try t_predicate <|> (char 'a' *> pure rdfTypeNode) >>= setPredicate -- grammar rule: [11] predicate ::= iri t_predicate :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_predicate = UNode <$> (t_iri "resource") -- grammar rules: [139s] PNAME_NS ::= PN_PREFIX? ':' t_pname_ns :: (CharParsing m, MonadState ParseState m) => m T.Text t_pname_ns = do pre <- option mempty (try t_pn_prefix) <* char ':' (_, _, _, pms, _, _, _, _) <- get case resolveQName pre pms of Just n -> pure n Nothing -> unexpected ("Cannot resolve QName prefix: " ++ T.unpack pre) -- grammar rules: [168s] PN_LOCAL -- [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? t_pn_local :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m T.Text t_pn_local = do x <- t_pn_chars_u_str <|> string ":" <|> satisfy_str <|> t_plx xs <- option "" $ try $ do let recsve = (t_pn_chars_str <|> string ":" <|> t_plx) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws *> pure ".")) concat <$> many recsve pure (T.pack (x ++ xs)) where satisfy_str = pure <$> satisfy isDigit t_pn_chars_str = pure <$> t_pn_chars t_pn_chars_u_str = pure <$> t_pn_chars_u -- PERCENT | PN_LOCAL_ESC -- grammar rules: [169s] PLX t_plx :: (CharParsing m, Monad m) => m String t_plx = t_percent <|> t_pn_local_esc_str where t_pn_local_esc_str = pure <$> t_pn_local_esc -- '%' HEX HEX -- grammar rules: [170s] PERCENT t_percent :: (CharParsing m, Monad m) => m String t_percent = sequence [char '%', t_hex, t_hex] -- grammar rules: [172s] PN_LOCAL_ESC t_pn_local_esc :: CharParsing m => m Char t_pn_local_esc = char '\\' *> oneOf "_~.-!$&'()*+,;=/?#@%" -- grammar rules: [140s] PNAME_LN ::= PNAME_NS PN_LOCAL t_pname_ln :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m T.Text t_pname_ln = T.append <$> t_pname_ns <*> t_pn_local -- grammar rule: [10] subject -- [10] subject ::= iri | BlankNode | collection t_subject :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () t_subject = iri <|> t_blankNode <|> t_collection >>= setSubject where iri = unode <$> (try t_iri "subject resource") -- [137s] BlankNode ::= BLANK_NODE_LABEL | ANON t_blankNode :: (CharParsing m, MonadState ParseState m) => m Node t_blankNode = do genID <- try t_blank_node_label <|> (t_anon *> pure mempty) mp <- currGenIdLookup maybe (newBN genID) getExistingBN (Map.lookup genID mp) where newBN genID = do i <- nextIdCounter when (genID /= mempty) (addGenIdLookup genID i) return $ BNodeGen (fromIntegral i) getExistingBN = return . BNodeGen . fromIntegral -- TODO replicate the recursion technique from [168s] for ((..)* something)? -- [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? t_blank_node_label :: (CharParsing m, MonadState ParseState m) => m String t_blank_node_label = do void (string "_:") firstChar <- t_pn_chars_u <|> satisfy isDigit try $ (firstChar:) <$> otherChars where otherChars = option "" $ do xs <- many (t_pn_chars <|> char '.') if null xs then pure xs else if last xs == '.' then unexpected "'.' at the end of a blank node label" else pure xs -- [162s] ANON ::= '[' WS* ']' t_anon :: CharParsing m => m () t_anon = void (between (char '[') (char ']') (many t_ws)) -- [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)* t_predicateObjectList :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () t_predicateObjectList = void $ sepEndBy1 (try verbObjectList) (try separator) where verbObjectList = t_verb *> some t_ws *> t_objectList separator = some (many t_ws *> char ';' *> many t_ws) -- grammar rule: [8] objectlist -- [8] objectList ::= object (',' object)* t_objectList :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () t_objectList = do (t_object "object") >>= addTripleForObject void $ many (try (many t_ws *> char ',' *> many t_ws *> t_object >>= addTripleForObject)) -- grammar rule: [12] object -- [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal t_object :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_object = try (UNode <$> t_iri) <|> try t_blankNode <|> try t_collection <|> try t_blankNodePropertyList <|> t_literal -- grammar rule: [15] collection -- [15] collection ::= '(' object* ')' t_collection :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_collection = withConstantSubjectPredicate $ between (char '(') (char ')') $ do void (many t_ws) root <- try empty_list <|> non_empty_list void (many t_ws) return root where empty_list = lookAhead (char ')') *> return rdfNilNode non_empty_list = do ns <- sepEndBy1 element (some t_ws) addTripleForObject rdfNilNode return (head ns) element = do o <- t_object bn <- nextBlankNode s <- getSubject when (isJust s) (addTripleForObject bn) setSubjectPredicate (Just bn) (Just rdfFirstNode) addTripleForObject o setPredicate rdfRestNode return bn getSubject = get >>= \(_, _, _, _, s, _, _, _) -> pure s rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node rdfTypeNode = UNode $ mkUri rdf "type" rdfNilNode = UNode $ mkUri rdf "nil" rdfFirstNode = UNode $ mkUri rdf "first" rdfRestNode = UNode $ mkUri rdf "rest" xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: T.Text xsdIntUri = mkUri xsd "integer" xsdDoubleUri = mkUri xsd "double" xsdDecimalUri = mkUri xsd "decimal" xsdBooleanUri = mkUri xsd "boolean" t_literal :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_literal = LNode <$> try t_rdf_literal <|> (`mkLNode` xsdDoubleUri) <$> try t_double <|> (`mkLNode` xsdDecimalUri) <$> try t_decimal <|> (`mkLNode` xsdIntUri) <$> try t_integer <|> (`mkLNode` xsdBooleanUri) <$> t_boolean where mkLNode :: T.Text -> T.Text -> Node mkLNode bsType bs' = LNode (typedL bsType bs') -- [128s] RDFLiteral -- String (LANGTAG | '^^' iri)? t_rdf_literal :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m LValue t_rdf_literal = do str <- t_string option (plainL str) (langTag str <|> typeIRI str) where langTag str = plainLL str <$> try t_langtag typeIRI str = typedL str <$> try (count 2 (char '^') *> t_iri) -- [17] String -- STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE t_string :: (CharParsing m, Monad m) => m T.Text t_string = try t_string_literal_long_double_quote <|> try t_string_literal_long_single_quote <|> try t_string_literal_double_quote <|> t_string_literal_single_quote -- [22] STRING_LITERAL_QUOTE -- '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' t_string_literal_double_quote :: (CharParsing m, Monad m) => m T.Text t_string_literal_double_quote = nt_string_literal_quote -- [23] STRING_LITERAL_SINGLE_QUOTE -- "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" t_string_literal_single_quote :: (CharParsing m, Monad m) => m T.Text t_string_literal_single_quote = string_literal_quote '\'' -- [24] STRING_LITERAL_LONG_SINGLE_QUOTE -- "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''" t_string_literal_long_single_quote :: (CharParsing m, Monad m) => m T.Text t_string_literal_long_single_quote = between (string "'''") (string "'''") $ do ss <- many $ try $ do s1 <- T.pack <$> option "" (try (string "''") <|> string "'") s2 <- T.singleton <$> (noneOf ['\'','\\'] <|> t_echar <|> t_uchar) pure (s1 `T.append` s2) pure (T.concat ss) -- [25] STRING_LITERAL_LONG_QUOTE -- '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' t_string_literal_long_double_quote :: (CharParsing m, Monad m) => m T.Text t_string_literal_long_double_quote = between (string "\"\"\"") (string "\"\"\"") $ do ss <- many $ try $ do s1 <- T.pack <$> option "" (try (string "\"\"") <|> string "\"") s2 <- T.singleton <$> (noneOf ['"','\\'] <|> t_echar <|> t_uchar) pure (s1 `T.append` s2) pure (T.concat ss) -- [144s] LANGTAG t_langtag :: (CharParsing m, Monad m) => m T.Text t_langtag = nt_langtag -- [159s] ECHAR t_echar :: (CharParsing m, Monad m) => m Char t_echar = nt_echar -- [26] UCHAR t_uchar :: (CharParsing m, Monad m) => m Char t_uchar = nt_uchar -- [19] INTEGER ::= [+-]? [0-9]+ t_integer :: (CharParsing m, Monad m) => m T.Text t_integer = try $ do sign <- sign_parser "+-" ds <- some (satisfy isDigit "digit") pure $! ( T.pack sign `T.append` T.pack ds) -- grammar rule: [21] DOUBLE -- [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) t_double :: (CharParsing m, Monad m) => m T.Text t_double = do sign <- sign_parser "+-" rest <- try (do { ds <- (some (satisfy isDigit) "digit") <* char '.'; ds' <- many (satisfy isDigit) "digit"; e <- t_exponent "exponent"; pure ( T.pack ds `T.snoc` '.' `T.append` T.pack ds' `T.append` e) }) <|> try (do { ds <- char '.' *> some (satisfy isDigit) "digit"; e <- t_exponent "exponent"; pure ('.' `T.cons` T.pack ds `T.append` e) }) <|> (do { ds <- some (satisfy isDigit) "digit"; e <- t_exponent "exponent"; pure ( T.pack ds `T.append` e) }) pure $! T.pack sign `T.append` rest sign_parser :: CharParsing m => m String sign_parser = option "" (pure <$> oneOf "-+") -- [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ t_decimal :: (CharParsing m, Monad m) => m T.Text t_decimal = try $ do sign <- sign_parser dig1 <- many (satisfy isDigit) <* char '.' dig2 <- some (satisfy isDigit) pure (T.pack sign `T.append` T.pack dig1 `T.append` T.pack "." `T.append` T.pack dig2) -- [154s] EXPONENT ::= [eE] [+-]? [0-9]+ t_exponent :: (CharParsing m, Monad m) => m T.Text t_exponent = do e <- oneOf "eE" s <- option "" (pure <$> oneOf "-+") ds <- some digit pure $! (e `T.cons` ( T.pack s `T.append` T.pack ds)) -- [133s] BooleanLiteral ::= 'true' | 'false' t_boolean :: CharParsing m => m T.Text t_boolean = T.pack <$> try (string "true" <|> string "false") t_comment :: CharParsing m => m () t_comment = void (char '#' *> many (noneOf "\n\r")) --[TODO] t_comment = nt_comment -- [161s] WS ::= #x20 | #x9 | #xD | #xA t_ws :: CharParsing m => m () t_ws = (void (try (oneOf "\t\n\r "))) <|> try t_comment "whitespace-or-comment" -- [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? t_pn_prefix :: (CharParsing m, MonadState ParseState m) => m T.Text t_pn_prefix = do i <- try t_pn_chars_base r <- option "" (many (try t_pn_chars <|> char '.')) -- TODO: ensure t_pn_chars is last char pure (T.pack (i:r)) -- [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' t_iriref :: (CharParsing m, MonadState ParseState m) => m T.Text t_iriref = between (char '<') (char '>') $ do iriFrag <- iriFragment bUrl <- currBaseUrl dUrl <- currDocUrl tryIriResolution bUrl dUrl iriFrag -- [163s] PN_CHARS_BASE t_pn_chars_base :: CharParsing m => m Char t_pn_chars_base = nt_pn_chars_base -- [164s] PN_CHARS_U ::= PN_CHARS_BASE | '_' t_pn_chars_u :: CharParsing m => m Char t_pn_chars_u = t_pn_chars_base <|> char '_' -- [166s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] t_pn_chars :: CharParsing m => m Char t_pn_chars = t_pn_chars_u <|> char '-' <|> char '\x00B7' <|> satisfy f where f = flip in_range [('0', '9'), ('\x0300', '\x036F'), ('\x203F', '\x2040')] -- grammar rules: [171s] HEX t_hex :: CharParsing m => m Char t_hex = satisfy isHexDigit "hexadecimal digit" {-# INLINE in_range #-} in_range :: Char -> [(Char, Char)] -> Bool in_range c = any (\(c1, c2) -> c >= c1 && c <= c2) currGenIdLookup :: MonadState ParseState m => m (Map String Integer) currGenIdLookup = gets $ \(_, _, _, _, _, _, _,genMap) -> genMap addGenIdLookup :: MonadState ParseState m => String -> Integer -> m () addGenIdLookup genId counter = modify $ \(bUrl, dUrl, i, pms, s, p, ts, genMap) -> (bUrl, dUrl, i, pms, s, p, ts, Map.insert genId counter genMap) currBaseUrl :: MonadState ParseState m => m (Maybe BaseUrl) currBaseUrl = gets $ \(bUrl, _, _, _, _, _, _,_) -> bUrl currDocUrl :: MonadState ParseState m => m (Maybe T.Text) currDocUrl = gets $ \(_, dUrl, _, _, _, _, _,_) -> dUrl updateBaseUrl :: MonadState ParseState m => Maybe (Maybe BaseUrl) -> m () updateBaseUrl val = _modifyState val no no no no no -- combines get_current and increment into a single function nextIdCounter :: MonadState ParseState m => m Integer nextIdCounter = get >>= \(bUrl, dUrl, i, pms, s, p, ts, genMap) -> put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) *> pure i nextBlankNode :: MonadState ParseState m => m Node nextBlankNode = BNodeGen . fromIntegral <$> nextIdCounter updatePMs :: MonadState ParseState m => Maybe PrefixMappings -> m () updatePMs val = _modifyState no no val no no no -- Alias for Nothing for use with _modifyState calls, which can get very long with -- many Nothing values. no :: Maybe a no = Nothing withConstantSubjectPredicate :: MonadState ParseState m => m a -> m a withConstantSubjectPredicate a = do (_, _, _, _, s, p, _, _) <- get a' <- a (bUrl, dUrl, n, pms, _, _, ts, genMap) <- get put (bUrl, dUrl, n, pms, s, p, ts, genMap) return a' -- Update the subject and predicate values of the ParseState setSubjectPredicate :: MonadState ParseState m => Maybe Subject -> Maybe Predicate -> m () setSubjectPredicate s p = modify $ \(bUrl, dUrl, n, pms, _, _, ts, genMap) -> (bUrl, dUrl, n, pms, s, p, ts, genMap) setSubject :: MonadState ParseState m => Subject -> m () setSubject s = modify $ \(bUrl, dUrl, n, pms, _, p, ts, genMap) -> (bUrl, dUrl, n, pms, Just s, p, ts, genMap) setPredicate :: MonadState ParseState m => Predicate -> m () setPredicate p = modify $ \(bUrl, dUrl, n, pms, s, _, ts, genMap) -> (bUrl, dUrl, n, pms, s, Just p, ts, genMap) -- Update the subject and predicate values of the ParseState to Nothing. resetSubjectPredicate :: MonadState ParseState m => m () resetSubjectPredicate = setSubjectPredicate Nothing Nothing -- Modifies the current parser state by updating any state values among the parameters -- that have non-Nothing values. _modifyState :: MonadState ParseState m => Maybe (Maybe BaseUrl) -> Maybe (Integer -> Integer) -> Maybe PrefixMappings -> Maybe (Maybe Subject) -> Maybe (Maybe Predicate) -> Maybe (Seq Triple) -> m () _modifyState mb_bUrl mb_n mb_pms mb_subj mb_pred mb_trps = do (_bUrl, _dUrl, _n, _pms, _s, _p, _ts, genMap) <- get put ( fromMaybe _bUrl mb_bUrl , _dUrl , maybe _n (const _n) mb_n , fromMaybe _pms mb_pms , maybe _s (const _s) mb_subj , maybe _p (const _p) mb_pred , fromMaybe _ts mb_trps, genMap ) addTripleForObject :: (CharParsing m, MonadState ParseState m) => Object -> m () addTripleForObject obj = do (bUrl, dUrl, i, pms, s, p, ts, genMap) <- get t <- getTriple s p put (bUrl, dUrl, i, pms, s, p, ts |> t, genMap) where getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " ++ show obj getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " ++ show obj getTriple (Just s') (Just p') = pure $ Triple s' p' obj --------------------------------- -- parsec based parsers -- |Parse the document at the given location URL as a Turtle document, using an optional @BaseUrl@ -- as the base URI, and using the given document URL as the URI of the Turtle document itself. -- -- The @BaseUrl@ is used as the base URI within the document for resolving any relative URI references. -- It may be changed within the document using the @\@base@ directive. At any given point, the current -- base URI is the most recent @\@base@ directive, or if none, the @BaseUrl@ given to @parseURL@, or -- if none given, the document URL given to @parseURL@. For example, if the @BaseUrl@ were -- @http:\/\/example.org\/@ and a relative URI of @\@ were encountered (with no preceding @\@base@ -- directive), then the relative URI would expand to @http:\/\/example.org\/b@. -- -- The document URL is for the purpose of resolving references to 'this document' within the document, -- and may be different than the actual location URL from which the document is retrieved. Any reference -- to @\<>@ within the document is expanded to the value given here. Additionally, if no @BaseUrl@ is -- given and no @\@base@ directive has appeared before a relative URI occurs, this value is used as the -- base URI against which the relative URI is resolved. -- -- Returns either a @ParseFailure@ or a new RDF containing the parsed triples. parseURLParsec :: (Rdf a) => Maybe BaseUrl -- ^ The optional base URI of the document. -> Maybe T.Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. -> String -- ^ The location URI from which to retrieve the Turtle document. -> IO (Either ParseFailure (RDF a)) -- ^ The parse result, which is either a @ParseFailure@ or the RDF -- corresponding to the Turtle document. parseURLParsec bUrl docUrl = _parseURL (parseStringParsec bUrl docUrl) -- |Parse the given file as a Turtle document. The arguments and return type have the same semantics -- as 'parseURL', except that the last @String@ argument corresponds to a filesystem location rather -- than a location URI. -- -- Note: it does not relies on OS specificities (encoding, newline convention). -- -- Returns either a @ParseFailure@ or a new RDF containing the parsed triples. parseFileParsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> String -> IO (Either ParseFailure (RDF a)) parseFileParsec bUrl docUrl fpath = readFile fpath >>= \c -> pure $ handleResult bUrl (runParser (evalStateT t_turtleDoc (initialState bUrl docUrl)) () (maybe "" T.unpack docUrl) c) -- |Parse the given string as a Turtle document. The arguments and return type have the same semantics -- as , except that the last @String@ argument corresponds to the Turtle document itself as -- a string rather than a location URI. parseStringParsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> Either ParseFailure (RDF a) parseStringParsec bUrl docUrl ttlStr = handleResult bUrl (runParser (evalStateT t_turtleDoc (initialState bUrl docUrl)) () "" ttlStr) --------------------------------- -- attoparsec based parsers parseStringAttoparsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> Either ParseFailure (RDF a) parseStringAttoparsec bUrl docUrl t = handleResult' $ parse (evalStateT t_turtleDoc (initialState bUrl docUrl)) t where handleResult' res = case res of Fail _ _ err -> -- error err Left $ ParseFailure $ "Parse failure: \n" ++ show err Partial f -> handleResult' (f mempty) Done _ (ts,pms) -> Right $! mkRdf (F.toList ts) bUrl pms parseFileAttoparsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> String -> IO (Either ParseFailure (RDF a)) parseFileAttoparsec bUrl docUrl path = parseStringAttoparsec bUrl docUrl <$> readFile path parseURLAttoparsec :: (Rdf a) => Maybe BaseUrl -- ^ The optional base URI of the document. -> Maybe T.Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. -> String -- ^ The location URI from which to retrieve the Turtle document. -> IO (Either ParseFailure (RDF a)) -- ^ The parse result, which is either a @ParseFailure@ or the RDF -- corresponding to the Turtle document. parseURLAttoparsec bUrl docUrl = _parseURL (parseStringAttoparsec bUrl docUrl) --------------------------------- initialState :: Maybe BaseUrl -> Maybe T.Text -> ParseState initialState bUrl docUrl = (bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a) handleResult bUrl result = case result of (Left err) -> Left (ParseFailure $ "Parse failure: \n" ++ show err) (Right (ts, pms)) -> Right $! mkRdf (F.toList ts) bUrl pms -------------- -- auxiliary parsing functions -- Match the lowercase or uppercase form of 'c' caseInsensitiveChar :: CharParsing m => Char -> m Char caseInsensitiveChar c = char (toLower c) <|> char (toUpper c) -- Match the string 's', accepting either lowercase or uppercase form of each character caseInsensitiveString :: (CharParsing m, Monad m) => String -> m String caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" ++ s ++ "\"" tryIriResolution :: (CharParsing m, Monad m) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> m T.Text tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl where tryIriResolution' (Just (BaseUrl bIri)) _ = either err pure (resolveIRI bIri iriFrag) tryIriResolution' _ (Just dIri) = either err pure (resolveIRI dIri iriFrag) tryIriResolution' _ _ = either err pure (resolveIRI mempty iriFrag) err m = unexpected $ "Cannot resolve IRI: " ++ m ++ " " ++ show (mbUrl, mdUrl, iriFrag)