module Text.RDF.RDF4H.NTriplesParser(
NTriplesParser(NTriplesParser), ParseFailure
) where
import Prelude hiding (init,pred)
import Data.RDF.Types
import Text.RDF.RDF4H.ParserUtils
import Data.Char(isLetter, isDigit, isLower)
import qualified Data.Map as Map
import Text.Parsec
import Text.Parsec.Text
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Control.Monad (liftM,void)
data NTriplesParser = NTriplesParser
instance RdfParser NTriplesParser where
parseString _ = parseString'
parseFile _ = parseFile'
parseURL _ = parseURL'
nt_ntripleDoc :: GenParser () [Maybe Triple]
nt_ntripleDoc = manyTill nt_line eof
nt_line :: GenParser () (Maybe Triple)
nt_line =
skipMany nt_space >>
(nt_comment <|> nt_triple <|> nt_empty) >>=
\res -> nt_eoln >> return res
nt_comment :: GenParser () (Maybe Triple)
nt_comment = char '#' >> skipMany nt_character >> return Nothing
nt_triple :: GenParser () (Maybe Triple)
nt_triple =
do
subj <- nt_subject
skipMany1 nt_space
pred <- nt_predicate
skipMany1 nt_space
obj <- nt_object
skipMany nt_space
char '.'
many nt_space
return $ Just (Triple subj pred obj)
nt_literal :: GenParser () LValue
nt_literal =
do lit_str <- between_chars '"' '"' inner_literal
liftM (plainLL lit_str) (char '@' >> nt_language) <|>
liftM (typedL lit_str) (count 2 (char '^') >> nt_uriref) <|>
return (plainL lit_str)
where inner_literal = liftM T.concat (manyTill inner_string (lookAhead $ char '"'))
nt_language :: GenParser () T.Text
nt_language =
do str <- liftM T.pack (many (satisfy (\ c -> c == '-' || isLower c)))
if T.null str || T.last str == '-' || T.head str == '-'
then fail ("Invalid language string: '" ++ T.unpack str ++ "'")
else return str
nt_empty :: GenParser () (Maybe Triple)
nt_empty = skipMany nt_space >> return Nothing
nt_subject :: GenParser () Node
nt_subject =
liftM unode nt_uriref <|>
liftM bnode nt_nodeID
nt_predicate :: GenParser () Node
nt_predicate = liftM unode nt_uriref
nt_object :: GenParser () Node
nt_object =
liftM unode nt_uriref <|>
liftM bnode nt_nodeID <|>
liftM LNode nt_literal
nt_uriref :: GenParser () T.Text
nt_uriref = between_chars '<' '>' (liftM T.pack (many (satisfy ( /= '>'))))
nt_nodeID :: GenParser () T.Text
nt_nodeID = char '_' >> char ':' >> nt_name >>= \n ->
return ('_' `T.cons` (':' `T.cons` n))
nt_name :: GenParser () T.Text
nt_name =
do init <- letter
rest <- many (satisfy isLetterOrDigit)
return $ T.pack (init:rest)
isLetterOrDigit :: Char -> Bool
isLetterOrDigit c = isLetter c || isDigit c
nt_character :: GenParser () Char
nt_character = satisfy is_nonquote_char
is_character :: Char -> Bool
is_character c = c >= '\x0020' && c <= '\x007E'
is_nonquote_char :: Char -> Bool
is_nonquote_char c = is_character c && c/= '"'
nt_eoln :: GenParser () ()
nt_eoln = eof <|> void (nt_cr >> nt_lf) <|> void nt_lf
nt_space :: GenParser () Char
nt_space = char ' ' <|> nt_tab
nt_cr :: GenParser () Char
nt_cr = char '\r'
nt_lf :: GenParser () Char
nt_lf = char '\n'
nt_tab :: GenParser () Char
nt_tab = char '\t'
inner_string :: GenParser () T.Text
inner_string =
try (char '\\' >>
((char 't' >> return b_tab) <|>
(char 'r' >> return b_ret) <|>
(char 'n' >> return b_nl) <|>
(char '\\' >> return b_slash) <|>
(char '"' >> return b_quote) <|>
(char 'u' >> count 4 hexDigit >>= \cs -> return $ T.pack ('\\':'u':cs)) <|>
(char 'U' >> count 8 hexDigit >>= \cs -> return $ T.pack ('\\':'U':cs))))
<|> liftM T.pack
(many (satisfy (\ c -> is_nonquote_char c && c /= '\\')))
b_tab, b_ret, b_nl, b_slash, b_quote :: T.Text
b_tab = T.singleton '\t'
b_ret = T.singleton '\r'
b_nl = T.singleton '\n'
b_slash = T.singleton '\\'
b_quote = T.singleton '"'
between_chars :: Char -> Char -> GenParser () T.Text -> GenParser () T.Text
between_chars start end parser = char start >> parser >>= \res -> char end >> return res
parseString' :: forall rdf. (RDF rdf) => T.Text -> Either ParseFailure rdf
parseString' bs = handleParse mkRdf (runParser nt_ntripleDoc () "" bs)
parseURL' :: forall rdf. (RDF rdf) => String -> IO (Either ParseFailure rdf)
parseURL' = _parseURL parseString'
parseFile' :: forall rdf. (RDF rdf) => String -> IO (Either ParseFailure rdf)
parseFile' path = liftM (handleParse mkRdf . runParser nt_ntripleDoc () path)
(TIO.readFile path)
handleParse :: forall rdf. (RDF rdf) => (Triples -> Maybe BaseUrl -> PrefixMappings -> rdf) ->
Either ParseError [Maybe Triple] ->
Either ParseFailure rdf
handleParse _mkRdf result
| otherwise =
case result of
Left err -> Left $ ParseFailure $ "Parse failure: \n" ++ show err
Right ts -> Right $ _mkRdf (conv ts) Nothing (PrefixMappings Map.empty)
where
conv [] = []
conv (Nothing:ts) = conv ts
conv (Just t:ts) = t : conv ts