-- |An 'RdfParser' implementation for the Turtle format -- . module Text.RDF.RDF4H.TurtleParser( TurtleParser(TurtleParser) ) where import Data.Char (isLetter,isAlphaNum,toLower,toUpper) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe import Data.RDF.Types import Data.RDF.Namespace import Text.RDF.RDF4H.ParserUtils import Text.Parsec import Text.Parsec.Text import qualified Data.Text as T import qualified Data.Text.IO as TIO import Data.Sequence(Seq, (|>)) import qualified Data.Sequence as Seq import qualified Data.Foldable as F import Data.Char (isDigit) import Control.Monad -- |An 'RdfParser' implementation for parsing RDF in the -- Turtle format. It is an implementation of W3C Turtle grammar rules at -- http://www.w3.org/TR/turtle/#sec-grammar-grammar . -- It takes optional arguments representing the base URL to use -- for resolving relative URLs in the document (may be overridden in the document -- itself using the \@base directive), and the URL to use for the document itself -- for resolving references to <> in the document. -- To use this parser, pass a 'TurtleParser' value as the first argument to any of -- the 'parseString', 'parseFile', or 'parseURL' methods of the 'RdfParser' type -- class. data TurtleParser = TurtleParser (Maybe BaseUrl) (Maybe T.Text) -- |'TurtleParser' is an instance of 'RdfParser'. instance RdfParser TurtleParser where parseString (TurtleParser bUrl dUrl) = parseString' bUrl dUrl parseFile (TurtleParser bUrl dUrl) = parseFile' bUrl dUrl parseURL (TurtleParser bUrl dUrl) = parseURL' bUrl dUrl type ParseState = (Maybe BaseUrl, -- the current BaseUrl, may be Nothing initially, but not after it is once set Maybe T.Text, -- the docUrl, which never changes and is used to resolve <> in the document. Int, -- the id counter, containing the value of the next id to be used PrefixMappings, -- the mappings from prefix to URI that are encountered while parsing [Subject], -- stack of current subject nodes, if we have parsed a subject but not finished the triple [Predicate], -- stack of current predicate nodes, if we've parsed a predicate but not finished the triple [Bool], -- a stack of values to indicate that we're processing a (possibly nested) collection; top True indicates just started (on first element) [Bool], -- when in a collection, is it a subject collection or not Bool, -- when in a blank node property list, is it a subject collection or not Seq Triple, -- the triples encountered while parsing; always added to on the right side Map String Int) -- grammar rule: [1] turtleDoc t_turtleDoc :: GenParser ParseState (Seq Triple, PrefixMappings) t_turtleDoc = many t_statement >> (eof "eof") >> getState >>= \(_, _, _, pms, _, _, _, _, _, ts,_) -> return (ts, pms) -- grammar rule: [2] statement t_statement :: GenParser ParseState () t_statement = d <|> t <|> void (many1 t_ws "blankline-whitespace") where d = void (try t_directive >> (many t_ws "directive-whitespace2")) t = void (t_triples >> (many t_ws "triple-whitespace1") >> (char '.' "end-of-triple-period") >> (many t_ws "triple-whitespace2")) -- grammar rule: [6] triples -- subject predicateObjectList | blankNodePropertyList predicateObjectList? t_triples :: GenParser ParseState () t_triples = do try (t_subject >> many t_ws >> t_predicateObjectList >> resetSubjectPredicate) <|> (setSubjBlankNodePropList >> t_blankNodePropertyList >> many t_ws >> optional (t_predicateObjectList) >> resetSubjectPredicate >> setNotSubjBlankNodePropList) -- [14] blankNodePropertyList ::= '[' predicateObjectList ']' t_blankNodePropertyList :: GenParser ParseState () t_blankNodePropertyList = between (char '[') (char ']') $ do subjPropList <- isSubjPropList blankNode <- liftM BNodeGen nextIdCounter unless subjPropList $ addTripleForObject blankNode pushSubj blankNode (many t_ws >> t_predicateObjectList >> void (many t_ws)) unless subjPropList $ void popSubj -- grammar rule: [3] directive t_directive :: GenParser ParseState () t_directive = t_prefixID <|> t_base <|> t_sparql_prefix <|> t_sparql_base -- grammar rule: [135s] iri -- IRIREF | PrefixedName t_iri :: GenParser ParseState T.Text t_iri = try t_iriref <|> t_prefixedName -- grammar rule: [136s] PrefixedName t_prefixedName :: GenParser ParseState T.Text t_prefixedName = do t <- try t_pname_ln <|> try t_pname_ns return t -- grammar rule: [4] prefixID t_prefixID :: GenParser ParseState () t_prefixID = do void (try (string "@prefix" "@prefix-directive")) pre <- (many1 t_ws "whitespace-after-@prefix") >> option T.empty t_pn_prefix void (char ':' >> (many1 t_ws "whitespace-after-@prefix-colon")) uriFrag <- t_iriref void (many t_ws "prefixID-whitespace") void (char '.' "end-of-prefixID-period") (bUrl, dUrl, _, PrefixMappings pms, _, _, _, _, _, _, _) <- getState updatePMs $ Just (PrefixMappings $ Map.insert pre (absolutizeUrl bUrl dUrl uriFrag) pms) return () -- grammar rule: [6s] sparqlPrefix t_sparql_prefix :: GenParser ParseState () t_sparql_prefix = do void (try (caseInsensitiveString "PREFIX" "@prefix-directive")) pre <- (many1 t_ws "whitespace-after-@prefix") >> option T.empty t_pn_prefix void (char ':' >> (many1 t_ws "whitespace-after-@prefix-colon")) uriFrag <- t_iriref (bUrl, dUrl, _, PrefixMappings pms, _, _, _, _, _, _, _) <- getState updatePMs $ Just (PrefixMappings $ Map.insert pre (absolutizeUrl bUrl dUrl uriFrag) pms) return () -- grammar rule: [5] base t_base :: GenParser ParseState () t_base = do void (try (string "@base" "@base-directive")) void (many1 t_ws "whitespace-after-@base") urlFrag <- t_iriref void (many t_ws "base-whitespace") (void (char '.') "end-of-base-period") bUrl <- currBaseUrl dUrl <- currDocUrl updateBaseUrl (Just $ Just $ newBaseUrl bUrl (absolutizeUrl bUrl dUrl urlFrag)) -- grammar rule: [5s] sparqlBase t_sparql_base :: GenParser ParseState () t_sparql_base = do void (try (caseInsensitiveString "BASE" "@sparql-base-directive")) void (many1 t_ws "whitespace-after-BASE") urlFrag <- t_iriref bUrl <- currBaseUrl dUrl <- currDocUrl updateBaseUrl (Just $ Just $ newBaseUrl bUrl (absolutizeUrl bUrl dUrl urlFrag)) t_verb :: GenParser ParseState () -- [9] verb ::= predicate | 'a' t_verb = (try t_predicate <|> (char 'a' >> return rdfTypeNode)) >>= pushPred -- grammar rule: [11] predicate t_predicate :: GenParser ParseState Node t_predicate = liftM UNode (t_iri "resource") -- grammar rules: [139s] PNAME_NS t_pname_ns :: GenParser ParseState T.Text t_pname_ns =do pre <- option T.empty (try t_pn_prefix) void (char ':') (bUrl, _, _, pms, _, _, _, _, _, _, _) <- getState case resolveQName bUrl pre pms of Just n -> return n Nothing -> unexpected ("Cannot resolve QName prefix: " ++ T.unpack pre) -- grammar rules: [168s] PN_LOCAL -- [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? t_pn_local :: GenParser ParseState T.Text t_pn_local = do x <- t_pn_chars_u_str <|> string ":" <|> satisfy_str <|> t_plx xs <- option "" $ try $ do let recsve = (t_pn_chars_str <|> string ":" <|> t_plx) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." >> notFollowedBy t_ws >> return ".")) concat <$> many recsve return (T.pack (x ++ xs)) where satisfy_str = satisfy (flip in_range [('0', '9')]) >>= \c -> return [c] t_pn_chars_str = t_pn_chars >>= \c -> return [c] t_pn_chars_u_str = t_pn_chars_u >>= \c -> return [c] -- PERCENT | PN_LOCAL_ESC -- grammar rules: [169s] PLX t_plx :: GenParser ParseState String t_plx = t_percent <|> t_pn_local_esc_str where t_pn_local_esc_str = do c <- t_pn_local_esc return ([c]) -- '%' HEX HEX -- grammar rules: [170s] PERCENT t_percent :: GenParser ParseState String t_percent = do void (char '%') h1 <- t_hex h2 <- t_hex return (['%',h1,h2]) -- grammar rules: [172s] PN_LOCAL_ESC t_pn_local_esc :: GenParser ParseState Char t_pn_local_esc = char '\\' >> oneOf "_~.-!$&'()*+,;=/?#@%" -- grammar rules: [140s] PNAME_LN t_pname_ln :: GenParser ParseState T.Text t_pname_ln = do pre <- t_pname_ns name <- t_pn_local return (pre `T.append` name) -- grammar rule: [10] subject -- [10] subject ::= iri | BlankNode | collection t_subject :: GenParser ParseState () t_subject = iri <|> (t_blankNode >>= pushSubj) <|> (liftM BNodeGen nextIdCounter >>= pushSubj >> pushPred rdfFirstNode >> pushSubjColl >> t_collection) where iri = liftM unode (try t_iri "subject resource") >>= \s -> pushSubj s -- [137s] BlankNode ::= BLANK_NODE_LABEL | ANON t_blankNode :: GenParser ParseState Node t_blankNode = do genID <- (try t_blank_node_label <|> (t_anon >> return "")) mp <- currGenIdLookup node <- case Map.lookup genID mp of Nothing -> do i <- nextIdCounter let node = BNodeGen i addGenIdLookup genID i return node Just i -> return $ BNodeGen i return node -- TODO replicate the recursion technique from [168s] for ((..)* something)? -- [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? t_blank_node_label :: GenParser ParseState [Char] t_blank_node_label = do void (string "_:") firstChar <- (t_pn_chars_u <|> oneOf ['0'..'9']) -- optional $ try $ do try $ do ss <- option "" $ do xs <- many (t_pn_chars <|> char '.') if null xs then return xs else if last xs == '.' then unexpected "'.' at the end of a blank node label" else return xs return (firstChar : ss) -- [162s] ANON ::= '[' WS* ']' t_anon :: GenParser ParseState () t_anon = between (char '[') (char ']') (void (many t_ws)) -- [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)* t_predicateObjectList :: GenParser ParseState () t_predicateObjectList = do void (sepEndBy1 (optional (try (do { t_verb ; many1 t_ws ; t_objectList ; void popPred}))) (try (many t_ws >> char ';' >> many t_ws))) -- grammar rule: [8] objectlist -- [8] objectList ::= object (',' object)* t_objectList :: GenParser ParseState () t_objectList = -- t_object actually adds the triples void ((t_object "object") >> many (try (many t_ws >> char ',' >> many t_ws >> t_object))) -- grammar rule: [12] object -- [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal t_object :: GenParser ParseState () t_object = do inColl <- isInColl inSubjColl <- isInSubjColl onFirstItem <- onCollFirstItem let processObject = (liftM UNode t_iri >>= addTripleForObject) <|> (try t_blankNode >>= addTripleForObject) <|> (try t_collection >> pushObjColl) <|> (try t_blankNodePropertyList) <|> (t_literal >>= addTripleForObject) case (inColl,inSubjColl,onFirstItem) of (False,_,_) -> processObject (True,False,True) -> liftM BNodeGen nextIdCounter >>= \bSubj -> addTripleForObject bSubj >> pushSubj bSubj >> pushPred rdfFirstNode >> processObject >> collFirstItemProcessed -- (True,True,True) -> processObject >> collFirstItemProcessed (True,True,True) -> processObject >> collFirstItemProcessed >> popColl (True,_,False) -> liftM BNodeGen nextIdCounter >>= \bSubj -> pushPred rdfRestNode >> addTripleForObject bSubj >> popPred >> popSubj >> pushSubj bSubj >> processObject -- collection: '(' ws* itemList? ws* ')' -- itemList: object (ws+ object)* -- grammar rule: [15] collection -- 15] collection ::= '(' object* ')' t_collection :: GenParser ParseState () t_collection = between (char '(') (char ')') $ do beginColl (try empty_list <|> non_empty_list) void finishColl -- popColl where non_empty_list = do many1 (many t_ws >> t_object >> many t_ws) _inSubjColl <- isInSubjColl popPred pushPred rdfRestNode addTripleForObject rdfNilNode -- popPred -- if inSubjColl then trace "is sub" popColl else trace "not sub" $ void popSubj -- if inSubjColl then return () else trace "not sub" $ void popSubj empty_list = do lookAhead (try (many t_ws >> char ')')) addTripleForObject rdfNilNode rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node rdfTypeNode = UNode $ mkUri rdf "type" rdfNilNode = UNode $ mkUri rdf "nil" rdfFirstNode = UNode $ mkUri rdf "first" rdfRestNode = UNode $ mkUri rdf "rest" xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: T.Text xsdIntUri = mkUri xsd "integer" xsdDoubleUri = mkUri xsd "double" xsdDecimalUri = mkUri xsd "decimal" xsdBooleanUri = mkUri xsd "boolean" t_literal :: GenParser ParseState Node t_literal = (try t_rdf_literal >>= \l -> return (LNode l)) <|> liftM (`mkLNode` xsdDoubleUri) (try t_double) <|> liftM (`mkLNode` xsdDecimalUri) (try t_decimal) <|> liftM (`mkLNode` xsdIntUri) (try t_integer) <|> liftM (`mkLNode` xsdBooleanUri) t_boolean where mkLNode :: T.Text -> T.Text -> Node mkLNode bsType bs' = LNode (typedL bsType bs') -- [128s] RDFLiteral -- String (LANGTAG | '^^' iri)? t_rdf_literal :: GenParser ParseState LValue t_rdf_literal = do str' <- t_string let str = escapeRDFSyntax str' option (plainL str) $ do (try (t_langtag >>= \lang -> return (plainLL str lang)) <|> ((count 2 (char '^') >> t_iri >>= \iri -> return (typedL str iri)))) -- [17] String -- STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE t_string :: GenParser ParseState T.Text t_string = try t_string_literal_long_quote <|> try t_string_literal_long_single_quote <|> try t_string_literal_quote <|> t_string_literal_single_quote -- [22] STRING_LITERAL_QUOTE -- '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' t_string_literal_quote :: GenParser ParseState T.Text t_string_literal_quote = between (char '"') (char '"') $ do T.concat <$> many (T.singleton <$> noneOf ['\x22','\x5C','\xA','\xD'] <|> t_echar <|> t_uchar) -- [23] STRING_LITERAL_SINGLE_QUOTE -- "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" t_string_literal_single_quote :: GenParser ParseState T.Text t_string_literal_single_quote = between (char '\'') (char '\'') $ do T.concat <$> many (T.singleton <$> noneOf ['\x27','\x5C','\xA','\xD'] <|> t_echar <|> t_uchar) -- [24] STRING_LITERAL_LONG_SINGLE_QUOTE -- "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''" t_string_literal_long_single_quote :: GenParser ParseState T.Text t_string_literal_long_single_quote = between ((string "'''")) ((string "'''")) $ do ss <- many $ try $ do s1 <- T.pack <$> option "" (try (string "''") <|> string "'") s2 <- T.singleton <$> noneOf ['\'','\\'] <|> t_echar <|> t_uchar return (s1 `T.append` s2) return (T.concat ss) -- [25] STRING_LITERAL_LONG_QUOTE -- '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' t_string_literal_long_quote :: GenParser ParseState T.Text t_string_literal_long_quote = between (string "\"\"\"") (string "\"\"\"") $ do ss <- many $ try $ do s1 <- T.pack <$> option "" (try (string "\"\"") <|> string "\"") s2 <- (T.singleton <$> noneOf ['"','\\']) <|> t_echar <|> t_uchar return (s1 `T.append` s2) return (T.concat ss) -- [144s] LANGTAG -- '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* t_langtag :: GenParser ParseState T.Text t_langtag = do void (char '@') ss <- many1 (satisfy (\ c -> isLetter c)) rest <- concat <$> many (char '-' >> many1 (satisfy (\ c -> isAlphaNum c)) >>= \lang_str -> return ('-':lang_str)) return (T.pack (ss ++ rest)) -- [159s] ECHAR -- '\' [tbnrf"'\] t_echar :: GenParser ParseState T.Text t_echar = try $ do void (char '\\') c2 <- oneOf ['t','b','n','r','f','"','\'','\\'] return $ case c2 of 't' -> T.singleton '\t' 'b' -> T.singleton '\b' 'n' -> T.singleton '\n' 'r' -> T.singleton '\r' 'f' -> T.singleton '\f' '"' -> T.singleton '\"' '\'' -> T.singleton '\'' '\\' -> T.singleton '\\' _ -> error "nt_echar: impossible error." -- [26] UCHAR -- '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX t_uchar :: GenParser ParseState T.Text t_uchar = (try (string "\\u" >> count 4 hexDigit) >>= \cs -> return $ T.pack ('\\':'u':cs)) <|> (char '\\' >> char 'U' >> count 8 hexDigit >>= \cs -> return $ T.pack ('\\':'U':cs)) -- [19] INTEGER ::= [+-]? [0-9]+ t_integer :: GenParser ParseState T.Text t_integer = try $ do sign <- sign_parser "+-" ds <- many1 (oneOf ['0'..'9'] "digit") return $! ( T.pack sign `T.append` T.pack ds) -- grammar rule: [21] DOUBLE -- [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) t_double :: GenParser ParseState T.Text t_double = do sign <- sign_parser "+-" rest <- try (do { ds <- many1 (oneOf ['0'..'9']) "digit"; void (char '.'); ds' <- many (oneOf ['0'..'9']) "digit"; e <- t_exponent "exponent"; return ( T.pack ds `T.snoc` '.' `T.append` T.pack ds' `T.append` e) }) <|> try (do { void (char '.'); ds <- many1 (oneOf ['0'..'9']) "digit"; e <- t_exponent "exponent"; return ('.' `T.cons` T.pack ds `T.append` e) }) <|> (do { ds <- many1 (oneOf ['0'..'9']) "digit"; e <- t_exponent "exponent"; return ( T.pack ds `T.append` e) }) return $! T.pack sign `T.append` rest sign_parser :: GenParser ParseState String sign_parser = option "" (oneOf "-+" >>= (\c -> return [c])) -- [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ t_decimal :: GenParser ParseState T.Text t_decimal = try $ do sign <- sign_parser dig1 <- many (oneOf ['0'..'9']) void (char '.') dig2 <- many1 (oneOf ['0'..'9']) return (T.pack sign `T.append` T.pack dig1 `T.append` T.pack "." `T.append` T.pack dig2) t_exponent :: GenParser ParseState T.Text t_exponent = do e <- oneOf "eE" s <- option "" (oneOf "-+" >>= \c -> return [c]) ds <- many1 digit; return $! (e `T.cons` ( T.pack s `T.append` T.pack ds)) t_boolean :: GenParser ParseState T.Text t_boolean = try (liftM T.pack (string "true") <|> liftM T.pack (string "false")) t_comment :: GenParser ParseState () t_comment = void (char '#' >> many (satisfy (\ c -> c /= '\n' && c /= '\r'))) -- [161s] WS ::= #x20 | #x9 | #xD | #xA t_ws :: GenParser ParseState () t_ws = (void (try (char '\t' <|> char '\n' <|> char '\r' <|> char ' ')) <|> try t_comment) "whitespace-or-comment" -- grammar rule: [167s] PN_PREFIX t_pn_prefix :: GenParser ParseState T.Text t_pn_prefix = do i <- try t_pn_chars_base r <- option "" (many (try t_pn_chars <|> char '.')) -- TODO: ensure t_pn_chars is last char return (T.pack (i:r)) -- [18] IRIREF t_iriref :: GenParser ParseState T.Text t_iriref = between (char '<') (char '>') $ do iri <- T.concat <$> many ( T.singleton <$> noneOf (['\x00'..'\x20'] ++ ['<','>','"','{','}','|','^','`','\\']) <|> t_uchar ) bUrl <- currBaseUrl dUrl <- currDocUrl let iri' = escapeRDFSyntax iri validateURI (absolutizeUrl bUrl dUrl iri') t_pn_chars :: GenParser ParseState Char t_pn_chars = t_pn_chars_u <|> char '-' <|> char '\x00B7' <|> satisfy f where f = flip in_range [('0', '9'), ('\x0300', '\x036F'), ('\x203F', '\x2040')] -- grammar rule: [163s] PN_CHARS_BASE t_pn_chars_base :: GenParser ParseState Char t_pn_chars_base = try $ satisfy $ flip in_range blocks where blocks = [('A', 'Z'), ('a', 'z'), ('\x00C0', '\x00D6'), ('\x00D8', '\x00F6'), ('\x00F8', '\x02FF'), ('\x0370', '\x037D'), ('\x037F', '\x1FFF'), ('\x200C', '\x200D'), ('\x2070', '\x218F'), ('\x2C00', '\x2FEF'), ('\x3001', '\xD7FF'), ('\xF900', '\xFDCF'), ('\xFDF0', '\xFFFD'), ('\x10000', '\xEFFFF')] -- grammar rule: [164s] PN_CHARS_U t_pn_chars_u :: GenParser ParseState Char t_pn_chars_u = t_pn_chars_base <|> char '_' -- grammar rules: [171s] HEX t_hex :: GenParser ParseState Char t_hex = satisfy (\c -> isDigit c || (c >= 'A' && c <= 'F')) "hexadecimal digit" {-# INLINE in_range #-} in_range :: Char -> [(Char, Char)] -> Bool in_range c = any (\(c1, c2) -> c >= c1 && c <= c2) newBaseUrl :: Maybe BaseUrl -> T.Text -> BaseUrl newBaseUrl Nothing url = BaseUrl url newBaseUrl (Just (BaseUrl bUrl)) url = BaseUrl $! mkAbsoluteUrl bUrl url currGenIdLookup :: GenParser ParseState (Map String Int) currGenIdLookup = getState >>= \(_, _, _, _, _, _, _, _, _, _,genMap) -> return genMap addGenIdLookup :: String -> Int -> GenParser ParseState () addGenIdLookup genId counter = getState >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) -> setState (bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, Map.insert genId counter genMap) currBaseUrl :: GenParser ParseState (Maybe BaseUrl) currBaseUrl = getState >>= \(bUrl, _, _, _, _, _, _, _, _, _,_) -> return bUrl currDocUrl :: GenParser ParseState (Maybe T.Text) currDocUrl = getState >>= \(_, dUrl, _, _, _, _, _, _, _, _,_) -> return dUrl pushSubj :: Subject -> GenParser ParseState () pushSubj s = getState >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) -> setState (bUrl, dUrl, i, pms, s:ss, ps, cs, subjC, subjBNodeList, ts, genMap) popSubj :: GenParser ParseState Subject popSubj = getState >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) -> setState (bUrl, dUrl, i, pms, tail ss, ps, cs, subjC, subjBNodeList, ts, genMap) >> when (null ss) (error "Cannot pop subject off empty stack.") >> return (head ss) pushPred :: Predicate -> GenParser ParseState () pushPred p = getState >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) -> setState (bUrl, dUrl, i, pms, ss, p:ps, cs, subjC, subjBNodeList, ts, genMap) popPred :: GenParser ParseState Predicate popPred = getState >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) -> setState (bUrl, dUrl, i, pms, ss, tail ps, cs, subjC, subjBNodeList, ts, genMap) >> when (null ps) (error "Cannot pop predicate off empty stack.") >> return (head ps) isInColl :: GenParser ParseState Bool isInColl = getState >>= \(_, _, _, _, _, _, cs, _, _, _, _) -> return . not . null $ cs isInSubjColl :: GenParser ParseState Bool isInSubjColl = getState >>= \(_, _, _, _, _, _, _, xs, _, _, _) -> do if null xs then return False else return (head xs) {- isInObjColl :: GenParser ParseState Bool isInObjColl = getState >>= \(_, _, _, _, _, _, _, xs, _, _) -> do when (null xs) $ error "null in isInObjColl" return (not (head xs)) -} pushSubjColl :: GenParser ParseState () pushSubjColl = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts, genMap) -> setState (bUrl, dUrl, i, pms, s, p, cs, True:subjC, subjBNodeList, ts, genMap) popColl :: GenParser ParseState () popColl = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts, genMap) -> do when (null subjC) $ error "null in popColl" setState (bUrl, dUrl, i, pms, s, p, cs, tail subjC, subjBNodeList, ts, genMap) pushObjColl :: GenParser ParseState () pushObjColl = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) -> setState (bUrl, dUrl, i, pms, s, p, cs, False:subjC, subjBNodeList, ts,genMap) isSubjPropList :: GenParser ParseState Bool isSubjPropList = getState >>= \(_, _, _, _, _, _, _, _, subjBNodeList, _,_) -> do return subjBNodeList {- isObjPropList :: GenParser ParseState Bool isObjPropList = getState >>= \(_, _, _, _, _, _, _, _, subjBNodeList, _) -> do return subjBNodeList -} setSubjBlankNodePropList :: GenParser ParseState () setSubjBlankNodePropList = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, _, ts,genMap) -> setState (bUrl, dUrl, i, pms, s, p, cs, subjC, True, ts,genMap) setNotSubjBlankNodePropList :: GenParser ParseState () setNotSubjBlankNodePropList = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, _, ts,genMap) -> setState (bUrl, dUrl, i, pms, s, p, cs, subjC, True, ts,genMap) -- setObjBlankNodePropList :: GenParser ParseState () -- setObjBlankNodePropList = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, _, ts) -> -- setState (bUrl, dUrl, i, pms, s, p, cs, subjC, False, ts) -- popBlankNodePropList :: GenParser ParseState () -- popBlankNodePropList = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, _:subjBNodeList, ts) -> -- when (null subjBNodeList) $ "no subj/obj flag to pop when exiting collection" -- setState (bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts) updateBaseUrl :: Maybe (Maybe BaseUrl) -> GenParser ParseState () updateBaseUrl val = _modifyState val no no no no no -- combines get_current and increment into a single function nextIdCounter :: GenParser ParseState Int nextIdCounter = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) -> setState (bUrl, dUrl, i+1, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) >> return i updatePMs :: Maybe PrefixMappings -> GenParser ParseState () updatePMs val = _modifyState no no val no no no -- Register that we have begun processing a collection beginColl :: GenParser ParseState () beginColl = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) -> setState (bUrl, dUrl, i, pms, s, p, True:cs, subjC, subjBNodeList, ts,genMap) onCollFirstItem :: GenParser ParseState Bool onCollFirstItem = getState >>= \(_, _, _, _, _, _, cs, _, _, _,_) -> return (not (null cs) && head cs) collFirstItemProcessed :: GenParser ParseState () collFirstItemProcessed = getState >>= \(bUrl, dUrl, i, pms, s, p, _:cs, subjC, subjBNodeList, ts,genMap) -> setState (bUrl, dUrl, i, pms, s, p, False:cs, subjC, subjBNodeList, ts,genMap) -- Register that a collection is finished being processed; the bool value -- in the monad is *not* the value that was popped from the stack, but whether -- we are still processing a parent collection or have finished processing -- all collections and are no longer in a collection at all. finishColl :: GenParser ParseState Bool finishColl = getState >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) -> let cs' = drop 1 cs in setState (bUrl, dUrl, i, pms, s, p, cs', subjC, subjBNodeList, ts,genMap) >> return (not $ null cs') -- Alias for Nothing for use with _modifyState calls, which can get very long with -- many Nothing values. no :: Maybe a no = Nothing -- Update the subject and predicate values of the ParseState to Nothing. resetSubjectPredicate :: GenParser ParseState () resetSubjectPredicate = getState >>= \(bUrl, dUrl, n, pms, _, _, cs, subjC, subjBNodeList, ts,genMap) -> setState (bUrl, dUrl, n, pms, [], [], cs, subjC, subjBNodeList, ts,genMap) -- Modifies the current parser state by updating any state values among the parameters -- that have non-Nothing values. _modifyState :: Maybe (Maybe BaseUrl) -> Maybe (Int -> Int) -> Maybe PrefixMappings -> Maybe Subject -> Maybe Predicate -> Maybe (Seq Triple) -> GenParser ParseState () _modifyState mb_bUrl mb_n mb_pms mb_subj mb_pred mb_trps = do (_bUrl, _dUrl, _n, _pms, _s, _p, _cs, _subjC, _subjBNodeList, _ts,genMap) <- getState setState (fromMaybe _bUrl mb_bUrl, _dUrl, maybe _n (const _n) mb_n, fromMaybe _pms mb_pms, maybe _s (: _s) mb_subj, maybe _p (: _p) mb_pred, _cs, _subjC, _subjBNodeList, fromMaybe _ts mb_trps,genMap) addTripleForObject :: Object -> GenParser ParseState () addTripleForObject obj = do (bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts,genMap) <- getState when (null ss) $ unexpected $ "No Subject with which to create triple for: " ++ show obj when (null ps) $ unexpected $ "No Predicate with which to create triple for: " ++ show obj setState (bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts |> Triple (head ss) (head ps) obj,genMap) -- |Parse the document at the given location URL as a Turtle document, using an optional @BaseUrl@ -- as the base URI, and using the given document URL as the URI of the Turtle document itself. -- -- The @BaseUrl@ is used as the base URI within the document for resolving any relative URI references. -- It may be changed within the document using the @\@base@ directive. At any given point, the current -- base URI is the most recent @\@base@ directive, or if none, the @BaseUrl@ given to @parseURL@, or -- if none given, the document URL given to @parseURL@. For example, if the @BaseUrl@ were -- @http:\/\/example.org\/@ and a relative URI of @\@ were encountered (with no preceding @\@base@ -- directive), then the relative URI would expand to @http:\/\/example.org\/b@. -- -- The document URL is for the purpose of resolving references to 'this document' within the document, -- and may be different than the actual location URL from which the document is retrieved. Any reference -- to @\<>@ within the document is expanded to the value given here. Additionally, if no @BaseUrl@ is -- given and no @\@base@ directive has appeared before a relative URI occurs, this value is used as the -- base URI against which the relative URI is resolved. -- -- Returns either a @ParseFailure@ or a new RDF containing the parsed triples. parseURL' :: (Rdf a) => Maybe BaseUrl -- ^ The optional base URI of the document. -> Maybe T.Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. -> String -- ^ The location URI from which to retrieve the Turtle document. -> IO (Either ParseFailure (RDF a)) -- ^ The parse result, which is either a @ParseFailure@ or the RDF -- corresponding to the Turtle document. parseURL' bUrl docUrl = _parseURL (parseString' bUrl docUrl) -- |Parse the given file as a Turtle document. The arguments and return type have the same semantics -- as 'parseURL', except that the last @String@ argument corresponds to a filesystem location rather -- than a location URI. -- -- Returns either a @ParseFailure@ or a new RDF containing the parsed triples. parseFile' :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> String -> IO (Either ParseFailure (RDF a)) parseFile' bUrl docUrl fpath = do TIO.readFile fpath >>= \bs' -> return $ handleResult bUrl (runParser t_turtleDoc initialState (maybe "" T.unpack docUrl) bs') where initialState = (bUrl, docUrl, 1, PrefixMappings Map.empty, [], [], [], [], False, Seq.empty,Map.empty) -- |Parse the given string as a Turtle document. The arguments and return type have the same semantics -- as , except that the last @String@ argument corresponds to the Turtle document itself as -- a string rather than a location URI. parseString' :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> Either ParseFailure (RDF a) parseString' bUrl docUrl ttlStr = handleResult bUrl (runParser t_turtleDoc initialState "" ttlStr) where initialState = (bUrl, docUrl, 1, PrefixMappings Map.empty, [], [], [], [], False, Seq.empty,Map.empty) handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a) handleResult bUrl result = case result of (Left err) -> Left (ParseFailure $ show err) (Right (ts, pms)) -> Right $! mkRdf (F.toList ts) bUrl pms validateUNode :: T.Text -> GenParser ParseState Node validateUNode t = case unodeValidate t of Nothing -> unexpected ("Invalid URI in Turtle parser URI validation: " ++ show t) Just u@(UNode{}) -> return u Just node -> unexpected ("Unexpected node in Turtle parser URI validation: " ++ show node) validateURI :: T.Text -> GenParser ParseState T.Text validateURI t = do UNode uri <- validateUNode t return uri -------------- -- auxiliary parsing functions -- Match the lowercase or uppercase form of 'c' caseInsensitiveChar :: Char -> GenParser ParseState Char caseInsensitiveChar c = char (toLower c) <|> char (toUpper c) -- Match the string 's', accepting either lowercase or uppercase form of each character caseInsensitiveString :: String -> GenParser ParseState String caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" ++ s ++ "\""