module Text.RDF.RDF4H.NTriplesParser(
NTriplesParser(NTriplesParser), ParseFailure
)
where
import Data.RDF
import Text.RDF.RDF4H.ParserUtils
import Data.Char(isLetter, isDigit, isLower)
import qualified Data.Map as Map
import Text.Parsec
import Text.Parsec.ByteString.Lazy
import Data.ByteString.Lazy.Char8(ByteString)
import qualified Data.ByteString.Lazy.Char8 as B
import Control.Monad (liftM,void)
data NTriplesParser = NTriplesParser
instance RdfParser NTriplesParser where
parseString _ = parseString'
parseFile _ = parseFile'
parseURL _ = parseURL'
nt_ntripleDoc :: GenParser ByteString () [Maybe Triple]
nt_ntripleDoc = manyTill nt_line eof
nt_line :: GenParser ByteString () (Maybe Triple)
nt_line =
skipMany nt_space >>
(nt_comment <|> nt_triple <|> nt_empty) >>=
\res -> nt_eoln >> return res
nt_comment :: GenParser ByteString () (Maybe Triple)
nt_comment = char '#' >> skipMany nt_character >> return Nothing
nt_triple :: GenParser ByteString () (Maybe Triple)
nt_triple =
do
subj <- nt_subject
skipMany1 nt_space
pred <- nt_predicate
skipMany1 nt_space
obj <- nt_object
skipMany nt_space
char '.'
many nt_space
return $ Just (Triple subj pred obj)
nt_literal :: GenParser ByteString () LValue
nt_literal =
do lit_str <- between_chars '"' '"' inner_literal
liftM (plainLL lit_str) (char '@' >> nt_language) <|>
liftM (typedL lit_str . mkFastString) (count 2 (char '^') >> nt_uriref) <|>
return (plainL lit_str)
where inner_literal = liftM B.concat (manyTill inner_string (lookAhead $ char '"'))
nt_language :: GenParser ByteString () ByteString
nt_language =
do str <- liftM B.pack (many (satisfy (\ c -> c == '-' || isLower c)))
if B.null str || B.last str == '-' || B.head str == '-'
then fail ("Invalid language string: '" ++ B.unpack str ++ "'")
else return str
nt_empty :: GenParser ByteString () (Maybe Triple)
nt_empty = skipMany nt_space >> return Nothing
nt_subject :: GenParser ByteString () Node
nt_subject =
liftM unode nt_uriref <|>
liftM bnode nt_nodeID
nt_predicate :: GenParser ByteString () Node
nt_predicate = liftM unode nt_uriref
nt_object :: GenParser ByteString () Node
nt_object =
liftM unode nt_uriref <|>
liftM bnode nt_nodeID <|>
liftM LNode nt_literal
nt_uriref :: GenParser ByteString () ByteString
nt_uriref = between_chars '<' '>' (liftM B.pack (many (satisfy ( /= '>'))))
nt_nodeID :: GenParser ByteString () ByteString
nt_nodeID = char '_' >> char ':' >> nt_name >>= \n ->
return ('_' `B.cons'` (':' `B.cons'` n))
nt_name :: GenParser ByteString () ByteString
nt_name =
do init <- letter
rest <- many (satisfy isLetterOrDigit)
return $ B.pack (init:rest)
isLetterOrDigit :: Char -> Bool
isLetterOrDigit c = isLetter c || isDigit c
nt_character :: GenParser ByteString () Char
nt_character = satisfy is_nonquote_char
is_character :: Char -> Bool
is_character c = c >= '\x0020' && c <= '\x007E'
is_nonquote_char :: Char -> Bool
is_nonquote_char c = is_character c && c/= '"'
nt_eoln :: GenParser ByteString () ()
nt_eoln = eof <|> void (nt_cr >> nt_lf) <|> void nt_lf
nt_space :: GenParser ByteString () Char
nt_space = char ' ' <|> nt_tab
nt_cr :: GenParser ByteString () Char
nt_cr = char '\r'
nt_lf :: GenParser ByteString () Char
nt_lf = char '\n'
nt_tab :: GenParser ByteString () Char
nt_tab = char '\t'
inner_string :: GenParser ByteString () ByteString
inner_string =
try (char '\\' >>
((char 't' >> return b_tab) <|>
(char 'r' >> return b_ret) <|>
(char 'n' >> return b_nl) <|>
(char '\\' >> return b_slash) <|>
(char '"' >> return b_quote) <|>
(char 'u' >> count 4 hexDigit >>= \cs -> return $ B.pack ('\\':'u':cs)) <|>
(char 'U' >> count 8 hexDigit >>= \cs -> return $ B.pack ('\\':'U':cs))))
<|> liftM B.pack
(many (satisfy (\ c -> is_nonquote_char c && c /= '\\')))
b_tab = B.singleton '\t'
b_ret = B.singleton '\r'
b_nl = B.singleton '\n'
b_slash = B.singleton '\\'
b_quote = B.singleton '"'
between_chars :: Char -> Char -> GenParser ByteString () ByteString -> GenParser ByteString () ByteString
between_chars start end parser = char start >> parser >>= \res -> char end >> return res
parseString' :: forall rdf. (RDF rdf) => ByteString -> Either ParseFailure rdf
parseString' bs = handleParse mkRdf (runParser nt_ntripleDoc () "" bs)
parseURL' :: forall rdf. (RDF rdf) => String -> IO (Either ParseFailure rdf)
parseURL' = _parseURL parseString'
parseFile' :: forall rdf. (RDF rdf) => String -> IO (Either ParseFailure rdf)
parseFile' path = liftM (handleParse mkRdf . runParser nt_ntripleDoc () path)
(B.readFile path)
handleParse :: forall rdf. (RDF rdf) => (Triples -> Maybe BaseUrl -> PrefixMappings -> rdf) ->
Either ParseError [Maybe Triple] ->
Either ParseFailure rdf
handleParse _mkRdf result
| otherwise =
case result of
Left err -> Left $ ParseFailure $ "Parse failure: \n" ++ show err
Right ts -> Right $ _mkRdf (conv ts) Nothing (PrefixMappings Map.empty)
where
conv [] = []
conv (Nothing:ts) = conv ts
conv (Just t:ts) = t : conv ts
_test :: GenParser ByteString () a -> String -> IO a
_test p str =
case result of
(Left err) -> putStr "ParseError: '" >> putStr (show err) >> putStr "\n" >> error ""
(Right a) -> return a
where result = runParser p () "" (B.pack str)