{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DoAndIfThenElse #-}
module Text.RDF.RDF4H.TurtleParser(
TurtleParser(TurtleParser),
TurtleParserCustom(TurtleParserCustom)
)
where
import Data.Attoparsec.ByteString (parse,IResult(..))
import Data.Char (isLetter,isAlphaNum,toLower,toUpper,isDigit,isHexDigit)
import qualified Data.Map as Map
import Data.Map (Map)
import Data.Maybe
import Data.RDF.Types
import Data.RDF.Namespace
import Text.RDF.RDF4H.ParserUtils
import Text.Parsec (runParser,ParseError)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as TIO
import Data.Sequence(Seq, (|>))
import qualified Data.Sequence as Seq
import qualified Data.Foldable as F
import Control.Monad
import Text.Parser.Char
import Text.Parser.Combinators
import Text.Parser.LookAhead
import Control.Applicative
import Control.Monad.State.Class
import Control.Monad.State.Strict
data TurtleParser = TurtleParser (Maybe BaseUrl) (Maybe T.Text)
data TurtleParserCustom = TurtleParserCustom (Maybe BaseUrl) (Maybe T.Text) Parser
instance RdfParser TurtleParser where
parseString (TurtleParser bUrl dUrl) = parseStringParsec bUrl dUrl
parseFile (TurtleParser bUrl dUrl) = parseFileParsec bUrl dUrl
parseURL (TurtleParser bUrl dUrl) = parseURLParsec bUrl dUrl
instance RdfParser TurtleParserCustom where
parseString (TurtleParserCustom bUrl dUrl Parsec) = parseStringParsec bUrl dUrl
parseString (TurtleParserCustom bUrl dUrl Attoparsec) = parseStringAttoparsec bUrl dUrl
parseFile (TurtleParserCustom bUrl dUrl Parsec) = parseFileParsec bUrl dUrl
parseFile (TurtleParserCustom bUrl dUrl Attoparsec) = parseFileAttoparsec bUrl dUrl
parseURL (TurtleParserCustom bUrl dUrl Parsec) = parseURLParsec bUrl dUrl
parseURL (TurtleParserCustom bUrl dUrl Attoparsec) = parseURLAttoparsec bUrl dUrl
type ParseState =
(Maybe BaseUrl,
Maybe T.Text,
Int,
PrefixMappings,
[Subject],
[Predicate],
[Bool],
[Bool],
Bool,
Seq Triple,
Map String Int)
t_turtleDoc :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m (Seq Triple, PrefixMappings)
t_turtleDoc =
many t_statement *> (eof <?> "eof") *> gets (\(_, _, _, pms, _, _, _, _, _, ts,_) -> (ts, pms))
t_statement :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_statement = d <|> t <|> void (some t_ws <?> "blankline-whitespace")
where
d = void
(try t_directive
*> (many t_ws <?> "directive-whitespace2"))
t = void
(t_triples
*> (many t_ws <?> "triple-whitespace1")
*> (char '.' <?> "end-of-triple-period")
*> (many t_ws <?> "triple-whitespace2"))
t_triples :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_triples =
try (t_subject *> many t_ws *> t_predicateObjectList *> resetSubjectPredicate)
<|> (setSubjBlankNodePropList
*> t_blankNodePropertyList
*> many t_ws
*> optional t_predicateObjectList
*> resetSubjectPredicate
*> setNotSubjBlankNodePropList)
t_blankNodePropertyList :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_blankNodePropertyList = between (char '[') (char ']') $ do
subjPropList <- isSubjPropList
blankNode <- BNodeGen <$> nextIdCounter
unless subjPropList $ addTripleForObject blankNode
pushSubj blankNode
many t_ws *> t_predicateObjectList *> void (many t_ws)
unless subjPropList $ void popSubj
t_directive :: (CharParsing m, MonadState ParseState m) => m ()
t_directive = t_prefixID <|> t_base <|> t_sparql_prefix <|> t_sparql_base
t_iri :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m T.Text
t_iri = try t_iriref <|> t_prefixedName
t_prefixedName :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m T.Text
t_prefixedName = try t_pname_ln <|> try t_pname_ns
t_prefixID :: (CharParsing m, MonadState ParseState m) => m ()
t_prefixID =
do void (try (string "@prefix" <?> "@prefix-directive"))
pre <- (some t_ws <?> "whitespace-after-@prefix") *> option T.empty t_pn_prefix
void (char ':' *> (some t_ws <?> "whitespace-after-@prefix-colon"))
uriFrag <- t_iriref
void (many t_ws <?> "prefixID-whitespace")
void (char '.' <?> "end-of-prefixID-period")
(bUrl, dUrl, _, PrefixMappings pms, _, _, _, _, _, _, _) <- get
updatePMs $ Just (PrefixMappings $ Map.insert pre (absolutizeUrl bUrl dUrl uriFrag) pms)
pure ()
t_sparql_prefix :: (CharParsing m, MonadState ParseState m) => m ()
t_sparql_prefix =
do void (try (caseInsensitiveString "PREFIX" <?> "@prefix-directive"))
pre <- (some t_ws <?> "whitespace-after-@prefix") *> option T.empty t_pn_prefix
void (char ':' *> (some t_ws <?> "whitespace-after-@prefix-colon"))
uriFrag <- t_iriref
(bUrl, dUrl, _, PrefixMappings pms, _, _, _, _, _, _, _) <- get
updatePMs $ Just (PrefixMappings $ Map.insert pre (absolutizeUrl bUrl dUrl uriFrag) pms)
pure ()
t_base :: (CharParsing m, MonadState ParseState m) => m ()
t_base =
do void (try (string "@base" <?> "@base-directive"))
void (some t_ws <?> "whitespace-after-@base")
urlFrag <- t_iriref
void (many t_ws <?> "base-whitespace")
void (char '.') <?> "end-of-base-period"
bUrl <- currBaseUrl
dUrl <- currDocUrl
updateBaseUrl (Just $ Just $ newBaseUrl bUrl (absolutizeUrl bUrl dUrl urlFrag))
t_sparql_base :: (CharParsing m, MonadState ParseState m) => m ()
t_sparql_base =
do void (try (caseInsensitiveString "BASE" <?> "@sparql-base-directive"))
void (some t_ws <?> "whitespace-after-BASE")
urlFrag <- t_iriref
bUrl <- currBaseUrl
dUrl <- currDocUrl
updateBaseUrl (Just $ Just $ newBaseUrl bUrl (absolutizeUrl bUrl dUrl urlFrag))
t_verb :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_verb = (try t_predicate <|> (char 'a' *> pure rdfTypeNode)) >>= pushPred
t_predicate :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m Node
t_predicate = UNode <$> (t_iri <?> "resource")
t_pname_ns :: (CharParsing m, MonadState ParseState m) => m T.Text
t_pname_ns =do
pre <- option T.empty (try t_pn_prefix) <* char ':'
(bUrl, _, _, pms, _, _, _, _, _, _, _) <- get
case resolveQName bUrl pre pms of
Just n -> pure n
Nothing -> unexpected ("Cannot resolve QName prefix: " ++ T.unpack pre)
t_pn_local :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m T.Text
t_pn_local = do
x <- t_pn_chars_u_str <|> string ":" <|> satisfy_str <|> t_plx
xs <- option "" $ try $ do
let recsve = (t_pn_chars_str <|> string ":" <|> t_plx) <|>
(t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|>
(t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws *> pure "."))
concat <$> many recsve
pure (T.pack (x ++ xs))
where
satisfy_str = pure <$> satisfy isDigit
t_pn_chars_str = pure <$> t_pn_chars
t_pn_chars_u_str = pure <$> t_pn_chars_u
t_plx :: (CharParsing m, Monad m) => m String
t_plx = t_percent <|> t_pn_local_esc_str
where
t_pn_local_esc_str = pure <$> t_pn_local_esc
t_percent :: (CharParsing m, Monad m) => m String
t_percent = sequence [char '%',t_hex,t_hex]
t_pn_local_esc :: CharParsing m => m Char
t_pn_local_esc = char '\\' *> oneOf "_~.-!$&'()*+,;=/?#@%"
t_pname_ln :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m T.Text
t_pname_ln = T.append <$> t_pname_ns <*> t_pn_local
t_subject :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_subject =
iri <|>
(t_blankNode >>= pushSubj) <|>
(BNodeGen <$> nextIdCounter >>= \x -> pushSubj x
*> pushPred rdfFirstNode
*> pushSubjColl
*> t_collection)
where
iri = unode <$> (try t_iri <?> "subject resource") >>= pushSubj
t_blankNode :: (CharParsing m, MonadState ParseState m) => m Node
t_blankNode = do
genID <- try t_blank_node_label <|> (t_anon *> pure "")
mp <- currGenIdLookup
case Map.lookup genID mp of
Nothing -> do
i <- nextIdCounter
let node = BNodeGen i
addGenIdLookup genID i
pure node
Just i ->
pure $ BNodeGen i
t_blank_node_label :: (CharParsing m, MonadState ParseState m) => m String
t_blank_node_label = do
void (string "_:")
firstChar <- t_pn_chars_u <|> satisfy isDigit
try $ do
ss <- option "" $ do
xs <- many (t_pn_chars <|> char '.')
if null xs
then pure xs
else if last xs == '.'
then unexpected "'.' at the end of a blank node label"
else pure xs
pure (firstChar : ss)
t_anon :: CharParsing m => m ()
t_anon = between (char '[') (char ']') (skipMany t_ws)
t_predicateObjectList :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_predicateObjectList =
void (sepEndBy1
(optional (try (do { t_verb
; some t_ws
; t_objectList
; popPred})))
(try (many t_ws *> char ';' *> many t_ws)))
t_objectList :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_objectList =
() <$ ((t_object <?> "object")
*> many (try (many t_ws *> char ',' *> many t_ws *> t_object)))
t_object :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_object = do
inColl <- isInColl
inSubjColl <- isInSubjColl
onFirstItem <- onCollFirstItem
let processObject =
(UNode <$> t_iri >>= addTripleForObject) <|>
(try t_blankNode >>= addTripleForObject) <|>
(try t_collection *> pushObjColl) <|>
try t_blankNodePropertyList <|>
(t_literal >>= addTripleForObject)
case (inColl,inSubjColl,onFirstItem) of
(False,_,_) -> processObject
(True,False,True) -> BNodeGen <$> nextIdCounter >>= \bSubj -> addTripleForObject bSubj
*> pushSubj bSubj *> pushPred rdfFirstNode *> processObject *> collFirstItemProcessed
(True,True,True) -> processObject *> collFirstItemProcessed *> popColl
(True,_,False) -> BNodeGen <$> nextIdCounter >>= \bSubj -> pushPred rdfRestNode *>
addTripleForObject bSubj *> popPred *> popSubj *>
pushSubj bSubj *> processObject
t_collection :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m ()
t_collection =
between (char '(') (char ')') $ do
beginColl
try empty_list <|> non_empty_list
void (many t_ws)
void finishColl
where
non_empty_list = do
some (many t_ws *> t_object *> many t_ws)
_inSubjColl <- isInSubjColl
popPred
pushPred rdfRestNode
addTripleForObject rdfNilNode
void popSubj
empty_list = do
lookAhead (try (many t_ws *> char ')'))
addTripleForObject rdfNilNode
rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node
rdfTypeNode = UNode $ mkUri rdf "type"
rdfNilNode = UNode $ mkUri rdf "nil"
rdfFirstNode = UNode $ mkUri rdf "first"
rdfRestNode = UNode $ mkUri rdf "rest"
xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: T.Text
xsdIntUri = mkUri xsd "integer"
xsdDoubleUri = mkUri xsd "double"
xsdDecimalUri = mkUri xsd "decimal"
xsdBooleanUri = mkUri xsd "boolean"
t_literal :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m Node
t_literal =
LNode <$> try t_rdf_literal <|>
(`mkLNode` xsdDoubleUri) <$> try t_double <|>
(`mkLNode` xsdDecimalUri) <$> try t_decimal <|>
(`mkLNode` xsdIntUri) <$> try t_integer <|>
(`mkLNode` xsdBooleanUri) <$> t_boolean
where
mkLNode :: T.Text -> T.Text -> Node
mkLNode bsType bs' = LNode (typedL bsType bs')
t_rdf_literal :: (MonadState ParseState m,CharParsing m, LookAheadParsing m) => m LValue
t_rdf_literal = do
str' <- t_string
let str = escapeRDFSyntax str'
option (plainL str) $
try (t_langtag >>= \lang -> pure (plainLL str lang)) <|>
(count 2 (char '^') *> t_iri >>= \iri -> pure (typedL str iri))
t_string :: (CharParsing m, Monad m) => m T.Text
t_string = try t_string_literal_long_quote <|>
try t_string_literal_long_single_quote <|>
try t_string_literal_quote <|>
t_string_literal_single_quote
t_string_literal_quote :: (CharParsing m, Monad m) => m T.Text
t_string_literal_quote =
between (char '"') (char '"') $
T.concat <$> many (T.singleton <$> noneOf ['\x22','\x5C','\xA','\xD'] <|>
t_echar <|>
t_uchar)
t_string_literal_single_quote :: (CharParsing m, Monad m) => m T.Text
t_string_literal_single_quote =
between (char '\'') (char '\'') $
T.concat <$>
many (T.singleton <$> noneOf ['\x27','\x5C','\xA','\xD'] <|>
t_echar <|>
t_uchar)
t_string_literal_long_single_quote :: (CharParsing m, Monad m) => m T.Text
t_string_literal_long_single_quote =
between (string "'''") (string "'''") $ do
ss <- many $ try $ do
s1 <- T.pack <$> option "" (try (string "''") <|> string "'")
s2 <- T.singleton <$> noneOf ['\'','\\'] <|> t_echar <|> t_uchar
pure (s1 `T.append` s2)
pure (T.concat ss)
t_string_literal_long_quote :: (CharParsing m, Monad m) => m T.Text
t_string_literal_long_quote =
between (string "\"\"\"") (string "\"\"\"") $ do
ss <- many $ try $ do
s1 <- T.pack <$> option "" (try (string "\"\"") <|> string "\"")
s2 <- (T.singleton <$> noneOf ['"','\\']) <|> t_echar <|> t_uchar
pure (s1 `T.append` s2)
pure (T.concat ss)
t_langtag :: (CharParsing m, Monad m) => m T.Text
t_langtag = do
ss <- char '@' *> some (satisfy isLetter)
rest <- concat <$> many (char '-' *> some (satisfy isAlphaNum) >>= \lang_str -> pure ('-':lang_str))
pure (T.pack (ss ++ rest))
t_echar :: (CharParsing m, Monad m) => m T.Text
t_echar = try $ do
c2 <- char '\\' *> oneOf ['t','b','n','r','f','"','\'','\\']
case c2 of
't' -> pure $ T.singleton '\t'
'b' -> pure $ T.singleton '\b'
'n' -> pure $ T.singleton '\n'
'r' -> pure $ T.singleton '\r'
'f' -> pure $ T.singleton '\f'
'"' -> pure $ T.singleton '\"'
'\'' -> pure $ T.singleton '\''
'\\' -> pure $ T.singleton '\\'
_ -> fail "nt_echar: impossible error."
t_uchar :: (CharParsing m, Monad m) => m T.Text
t_uchar =
(try (string "\\u" *> count 4 hexDigit) >>= \cs -> pure $ T.pack ('\\':'u':cs)) <|>
(char '\\' *> char 'U' *> count 8 hexDigit >>= \cs -> pure $ T.pack ('\\':'U':cs))
t_integer :: (CharParsing m, Monad m) => m T.Text
t_integer = try $
do sign <- sign_parser <?> "+-"
ds <- some (satisfy isDigit <?> "digit")
pure $! ( T.pack sign `T.append` T.pack ds)
t_double :: (CharParsing m, Monad m) => m T.Text
t_double =
do sign <- sign_parser <?> "+-"
rest <- try (do { ds <- (some (satisfy isDigit) <?> "digit") <* char '.';
ds' <- many (satisfy isDigit) <?> "digit";
e <- t_exponent <?> "exponent";
pure ( T.pack ds `T.snoc` '.' `T.append` T.pack ds' `T.append` e) }) <|>
try (do { ds <- char '.' *> some (satisfy isDigit) <?> "digit";
e <- t_exponent <?> "exponent";
pure ('.' `T.cons` T.pack ds `T.append` e) }) <|>
(do { ds <- some (satisfy isDigit) <?> "digit";
e <- t_exponent <?> "exponent";
pure ( T.pack ds `T.append` e) })
pure $! T.pack sign `T.append` rest
sign_parser :: CharParsing m => m String
sign_parser = option "" (pure <$> oneOf "-+")
t_decimal :: (CharParsing m, Monad m) => m T.Text
t_decimal = try $ do
sign <- sign_parser
dig1 <- many (satisfy isDigit) <* char '.'
dig2 <- some (satisfy isDigit)
pure (T.pack sign `T.append` T.pack dig1 `T.append` T.pack "." `T.append` T.pack dig2)
t_exponent :: (CharParsing m, Monad m) => m T.Text
t_exponent = do e <- oneOf "eE"
s <- option "" (pure <$> oneOf "-+")
ds <- some digit
pure $! (e `T.cons` ( T.pack s `T.append` T.pack ds))
t_boolean :: CharParsing m => m T.Text
t_boolean =
T.pack <$> try (string "true" <|> string "false")
t_comment :: CharParsing m => m ()
t_comment =
void (char '#' *> many (noneOf "\n\r"))
t_ws :: CharParsing m => m ()
t_ws =
(void (try (oneOf "\t\n\r "))) <|> try t_comment
<?> "whitespace-or-comment"
t_pn_prefix :: (CharParsing m, MonadState ParseState m) => m T.Text
t_pn_prefix = do
i <- try t_pn_chars_base
r <- option "" (many (try t_pn_chars <|> char '.'))
pure (T.pack (i:r))
t_iriref :: (CharParsing m, MonadState ParseState m) => m T.Text
t_iriref =
between (char '<') (char '>') $ do
iri <- T.concat <$> many ( T.singleton <$> noneOf (['\x00'..'\x20'] ++ ['<','>','"','{','}','|','^','`','\\']) <|>
t_uchar )
bUrl <- currBaseUrl
dUrl <- currDocUrl
let iri' = escapeRDFSyntax iri
validateURI (absolutizeUrl bUrl dUrl iri')
t_pn_chars :: CharParsing m => m Char
t_pn_chars = t_pn_chars_u <|> char '-' <|> char '\x00B7' <|> satisfy f
where
f = flip in_range [('0', '9'), ('\x0300', '\x036F'), ('\x203F', '\x2040')]
t_pn_chars_base :: CharParsing m => m Char
t_pn_chars_base = try $ satisfy $ flip in_range blocks
where
blocks = [('A', 'Z'), ('a', 'z'), ('\x00C0', '\x00D6'),
('\x00D8', '\x00F6'), ('\x00F8', '\x02FF'),
('\x0370', '\x037D'), ('\x037F', '\x1FFF'),
('\x200C', '\x200D'), ('\x2070', '\x218F'),
('\x2C00', '\x2FEF'), ('\x3001', '\xD7FF'),
('\xF900', '\xFDCF'), ('\xFDF0', '\xFFFD'),
('\x10000', '\xEFFFF')]
t_pn_chars_u :: CharParsing m => m Char
t_pn_chars_u = t_pn_chars_base <|> char '_'
t_hex :: CharParsing m => m Char
t_hex = satisfy isHexDigit <?> "hexadecimal digit"
{-# INLINE in_range #-}
in_range :: Char -> [(Char, Char)] -> Bool
in_range c = any (\(c1, c2) -> c >= c1 && c <= c2)
newBaseUrl :: Maybe BaseUrl -> T.Text -> BaseUrl
newBaseUrl Nothing url = BaseUrl url
newBaseUrl (Just (BaseUrl bUrl)) url = BaseUrl $! mkAbsoluteUrl bUrl url
currGenIdLookup :: MonadState ParseState m => m (Map String Int)
currGenIdLookup = gets $ \(_, _, _, _, _, _, _, _, _, _,genMap) -> genMap
addGenIdLookup :: MonadState ParseState m => String -> Int -> m ()
addGenIdLookup genId counter =
modify $ \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) ->
(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, Map.insert genId counter genMap)
currBaseUrl :: MonadState ParseState m => m (Maybe BaseUrl)
currBaseUrl = gets $ \(bUrl, _, _, _, _, _, _, _, _, _,_) -> bUrl
currDocUrl :: MonadState ParseState m => m (Maybe T.Text)
currDocUrl = gets $ \(_, dUrl, _, _, _, _, _, _, _, _,_) -> dUrl
pushSubj :: MonadState ParseState m => Subject -> m ()
pushSubj s = modify $ \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) ->
(bUrl, dUrl, i, pms, s:ss, ps, cs, subjC, subjBNodeList, ts, genMap)
popSubj :: (CharParsing m, MonadState ParseState m) => m Subject
popSubj = get >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) ->
put (bUrl, dUrl, i, pms, tail ss, ps, cs, subjC, subjBNodeList, ts, genMap)
*> when (null ss) (fail "Cannot pop subject off empty stack.")
*> pure (head ss)
pushPred :: MonadState ParseState m => Predicate -> m ()
pushPred p = modify $ \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) ->
(bUrl, dUrl, i, pms, ss, p:ps, cs, subjC, subjBNodeList, ts, genMap)
popPred :: MonadState ParseState m => m Predicate
popPred = get >>= \(bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts, genMap) ->
put (bUrl, dUrl, i, pms, ss, tail ps, cs, subjC, subjBNodeList, ts, genMap)
*> when (null ps) (fail "Cannot pop predicate off empty stack.")
*> pure (head ps)
isInColl :: MonadState ParseState m => m Bool
isInColl = gets $ \(_, _, _, _, _, _, cs, _, _, _, _) -> not . null $ cs
isInSubjColl :: MonadState ParseState m => m Bool
isInSubjColl = gets $ \(_, _, _, _, _, _, _, xs, _, _, _) ->
if null xs then False else (head xs)
pushSubjColl :: MonadState ParseState m => m ()
pushSubjColl = modify $ \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts, genMap) ->
(bUrl, dUrl, i, pms, s, p, cs, True:subjC, subjBNodeList, ts, genMap)
popColl :: (CharParsing m, MonadState ParseState m) => m ()
popColl = get >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts, genMap) -> do
when (null subjC) $ fail "null in popColl"
put (bUrl, dUrl, i, pms, s, p, cs, tail subjC, subjBNodeList, ts, genMap)
pushObjColl :: MonadState ParseState m => m ()
pushObjColl = modify $ \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) ->
(bUrl, dUrl, i, pms, s, p, cs, False:subjC, subjBNodeList, ts,genMap)
isSubjPropList :: MonadState ParseState m => m Bool
isSubjPropList = gets $ \(_, _, _, _, _, _, _, _, subjBNodeList, _,_) -> subjBNodeList
setSubjBlankNodePropList :: MonadState ParseState m => m ()
setSubjBlankNodePropList =
modify $ \(bUrl, dUrl, i, pms, s, p, cs, subjC, _, ts,genMap) ->
(bUrl, dUrl, i, pms, s, p, cs, subjC, True, ts,genMap)
setNotSubjBlankNodePropList :: MonadState ParseState m => m ()
setNotSubjBlankNodePropList =
modify $ \(bUrl, dUrl, i, pms, s, p, cs, subjC, _, ts,genMap) ->
(bUrl, dUrl, i, pms, s, p, cs, subjC, True, ts,genMap)
updateBaseUrl :: MonadState ParseState m => Maybe (Maybe BaseUrl) -> m ()
updateBaseUrl val = _modifyState val no no no no no
nextIdCounter :: MonadState ParseState m => m Int
nextIdCounter = get >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) ->
put (bUrl, dUrl, i+1, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) *> pure i
updatePMs :: MonadState ParseState m => Maybe PrefixMappings -> m ()
updatePMs val = _modifyState no no val no no no
beginColl :: MonadState ParseState m => m ()
beginColl = modify $ \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) ->
(bUrl, dUrl, i, pms, s, p, True:cs, subjC, subjBNodeList, ts,genMap)
onCollFirstItem :: MonadState ParseState m => m Bool
onCollFirstItem = gets $ \(_, _, _, _, _, _, cs, _, _, _,_) -> (not (null cs) && head cs)
collFirstItemProcessed :: MonadState ParseState m => m ()
collFirstItemProcessed =
modify $ \(bUrl, dUrl, i, pms, s, p, _:cs, subjC, subjBNodeList, ts,genMap) ->
(bUrl, dUrl, i, pms, s, p, False:cs, subjC, subjBNodeList, ts,genMap)
finishColl :: MonadState ParseState m => m Bool
finishColl = get >>= \(bUrl, dUrl, i, pms, s, p, cs, subjC, subjBNodeList, ts,genMap) ->
let cs' = drop 1 cs
in put (bUrl, dUrl, i, pms, s, p, cs', subjC, subjBNodeList, ts,genMap) *> pure (not $ null cs')
no :: Maybe a
no = Nothing
resetSubjectPredicate :: MonadState ParseState m => m ()
resetSubjectPredicate =
modify $ \(bUrl, dUrl, n, pms, _, _, cs, subjC, subjBNodeList, ts,genMap) ->
(bUrl, dUrl, n, pms, [], [], cs, subjC, subjBNodeList, ts,genMap)
_modifyState :: MonadState ParseState m =>
Maybe (Maybe BaseUrl) -> Maybe (Int -> Int) -> Maybe PrefixMappings ->
Maybe Subject -> Maybe Predicate -> Maybe (Seq Triple) ->
m ()
_modifyState mb_bUrl mb_n mb_pms mb_subj mb_pred mb_trps =
do (_bUrl, _dUrl, _n, _pms, _s, _p, _cs, _subjC, _subjBNodeList, _ts,genMap) <- get
put (fromMaybe _bUrl mb_bUrl,
_dUrl,
maybe _n (const _n) mb_n,
fromMaybe _pms mb_pms,
maybe _s (: _s) mb_subj,
maybe _p (: _p) mb_pred,
_cs,
_subjC,
_subjBNodeList,
fromMaybe _ts mb_trps,genMap)
addTripleForObject :: (CharParsing m, MonadState ParseState m) => Object -> m ()
addTripleForObject obj =
do (bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts,genMap) <- get
when (null ss) $
unexpected $ "No Subject with which to create triple for: " ++ show obj
when (null ps) $
unexpected $ "No Predicate with which to create triple for: " ++ show obj
put (bUrl, dUrl, i, pms, ss, ps, cs, subjC, subjBNodeList, ts |> Triple (head ss) (head ps) obj,genMap)
parseURLParsec :: (Rdf a) =>
Maybe BaseUrl
-> Maybe T.Text
-> String
-> IO (Either ParseFailure (RDF a))
parseURLParsec bUrl docUrl = _parseURL (parseStringParsec bUrl docUrl)
parseFileParsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> String -> IO (Either ParseFailure (RDF a))
parseFileParsec bUrl docUrl fpath =
TIO.readFile fpath >>= \bs' -> pure $ handleResult bUrl (runParser (evalStateT t_turtleDoc (initialState bUrl docUrl)) () (maybe "" T.unpack docUrl) bs')
parseStringParsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> Either ParseFailure (RDF a)
parseStringParsec bUrl docUrl ttlStr = handleResult bUrl (runParser (evalStateT t_turtleDoc (initialState bUrl docUrl)) () "" ttlStr)
parseStringAttoparsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> Either ParseFailure (RDF a)
parseStringAttoparsec bUrl docUrl bs = handleResult' $ parse (evalStateT t_turtleDoc (initialState bUrl docUrl)) (T.encodeUtf8 bs)
where
handleResult' res = case res of
Fail _ _ err ->
Left $ ParseFailure $ "Parse failure: \n" ++ show err
Partial f -> handleResult' (f (T.encodeUtf8 T.empty))
Done _ (ts,pms) -> Right $! mkRdf (F.toList ts) bUrl pms
parseFileAttoparsec :: (Rdf a) => Maybe BaseUrl -> Maybe T.Text -> String -> IO (Either ParseFailure (RDF a))
parseFileAttoparsec bUrl docUrl path = parseStringAttoparsec bUrl docUrl <$> TIO.readFile path
parseURLAttoparsec :: (Rdf a) =>
Maybe BaseUrl
-> Maybe T.Text
-> String
-> IO (Either ParseFailure (RDF a))
parseURLAttoparsec bUrl docUrl = _parseURL (parseStringAttoparsec bUrl docUrl)
initialState :: Maybe BaseUrl -> Maybe T.Text -> ParseState
initialState bUrl docUrl = (bUrl, docUrl, 1, PrefixMappings Map.empty, [], [], [], [], False, Seq.empty,Map.empty)
handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a)
handleResult bUrl result =
case result of
(Left err) -> Left (ParseFailure $ show err)
(Right (ts, pms)) -> Right $! mkRdf (F.toList ts) bUrl pms
validateUNode :: CharParsing m => T.Text -> m Node
validateUNode t =
case unodeValidate t of
Nothing -> unexpected ("Invalid URI in Turtle parser URI validation: " ++ show t)
Just u@UNode{} -> pure u
Just node -> unexpected ("Unexpected node in Turtle parser URI validation: " ++ show node)
validateURI :: (CharParsing m, Monad m) => T.Text -> m T.Text
validateURI t = do
UNode uri <- validateUNode t
pure uri
caseInsensitiveChar :: CharParsing m => Char -> m Char
caseInsensitiveChar c = char (toLower c) <|> char (toUpper c)
caseInsensitiveString :: (CharParsing m, Monad m) => String -> m String
caseInsensitiveString s = try (mapM caseInsensitiveChar s) <?> "\"" ++ s ++ "\""