{-# LANGUAGE OverloadedStrings #-} -------------------------------------------------------------------------------- -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : RDFParser -- Copyright : (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke -- License : GPL V2 -- -- Maintainer : Douglas Burke -- Stability : experimental -- Portability : OverloadedStrings -- -- Support for the RDF Parsing modules. -- -------------------------------------------------------------------------------- module Swish.RDF.RDFParser ( SpecialMap -- , mapPrefix -- tables , prefixTable, specialTable -- parser , runParserWithError , ParseResult , ignore , char , ichar , string , stringT , symbol , isymbol , lexeme , notFollowedBy , whiteSpace , skipMany , skipMany1 , endBy , sepEndBy , sepEndBy1 , manyTill , noneOf , eoln , fullStop , mkTypedLit , hex4 , hex8 , appendURIs ) where import Swish.RDF.RDFGraph (RDFGraph, RDFLabel(..)) import Swish.RDF.Vocabulary ( namespaceRDF , namespaceRDFS , namespaceRDFD , namespaceOWL , namespaceLOG , rdfType , rdfFirst, rdfRest, rdfNil , owlSameAs, logImplies , defaultBase ) import Swish.Utils.LookupMap (LookupMap(..)) import Swish.Utils.Namespace (Namespace, makeNamespace, ScopedName) import qualified Data.Text as T import qualified Data.Text.Lazy as L import qualified Data.Text.Read as R import Text.ParserCombinators.Poly.StateText import Network.URI (URI(..), relativeTo, parseURIReference) import Data.Char (isSpace, isHexDigit, chr) import Data.Maybe (fromMaybe, fromJust) -- Code -- | Append the two URIs. Should probably be moved -- out of RDFParser. It is also just a thin wrapper around -- `Network.URI.relativeTo`. appendURIs :: URI -- ^ The base URI -> URI -- ^ The URI to append (it can be an absolute URI). -> Either String URI appendURIs base uri = case uriScheme uri of "" -> case uri `relativeTo` base of Just out -> Right out _ -> Left $ "Unable to append <" ++ show uri ++ "> to base=<" ++ show base ++ ">" _ -> Right uri -- | Type for special name lookup table type SpecialMap = LookupMap (String,ScopedName) {- -- | Lookup prefix in table and return the matching URI. -- -- If the prefix is unknown then we currently error -- out (used to return 'prefix:' or ':' but now using -- URIs I am changing this behavior). This may well be -- backed out. mapPrefix :: NamespaceMap -> Maybe String -> URI mapPrefix pmap pfix = case mapFindMaybe pfix pmap of Just uri -> uri Nothing -> error $ "Unable to find prefix: " ++ show pfix -- fromMaybe "" pfix ++ ":" -} {- mapPrefix ps p@(Just pre) = mapFind (pre++":") p ps mapPrefix ps _ = mapFind ":" Nothing ps -} -- | Define default table of namespaces prefixTable :: [Namespace] prefixTable = [ namespaceRDF , namespaceRDFS , namespaceRDFD -- datatypes , namespaceOWL , namespaceLOG , makeNamespace Nothing $ fromJust (parseURIReference "#") -- is this correct? ] {-| Define default special-URI table. The optional argument defines the initial base URI. -} specialTable :: Maybe ScopedName -> [(String,ScopedName)] specialTable mbase = [ ("a", rdfType ), ("equals", owlSameAs ), ("implies", logImplies ), ("listfirst", rdfFirst ), ("listrest", rdfRest ), ("listnull", rdfNil ), ("base", fromMaybe defaultBase mbase ) ] -- Parser routines, heavily based on Parsec combinators -- | Run the parser and return the successful parse or an error -- message which consists of the standard Polyparse error plus -- a fragment of the unparsed input to provide context. -- runParserWithError :: Parser a b -- ^ parser (carrying state) to apply -> a -- ^ starting state for the parser -> L.Text -- ^ input to be parsed -> Either String b runParserWithError parser state0 input = let (result, _, unparsed) = runParser parser state0 input -- TODO: work out how best to report error context; for now just take the -- next 40 characters and assume there is enough context. econtext = if L.null unparsed then "\n(at end of the text)\n" else "\nRemaining input:\n" ++ case L.compareLength unparsed 40 of GT -> L.unpack (L.take 40 unparsed) ++ "..." _ -> L.unpack unparsed in case result of Left emsg -> Left $ emsg ++ econtext _ -> result type ParseResult = Either String RDFGraph ignore :: (Applicative f) => f a -> f () ignore f = f *> pure () char :: Char -> Parser s Char char c = satisfy (==c) ichar :: Char -> Parser s () ichar = ignore . char -- TODO: is there a better way to do this? string :: String -> Parser s String string = mapM char stringT :: T.Text -> Parser s T.Text stringT s = string (T.unpack s) >> return s skipMany :: Parser s a -> Parser s () skipMany = ignore . many skipMany1 :: Parser s a -> Parser s () skipMany1 = ignore . many1 endBy :: Parser s a -> Parser s b -> Parser s [a] endBy p sep = many (p <* sep) sepEndBy :: Parser s a -> Parser s b -> Parser s [a] sepEndBy p sep = sepEndBy1 p sep <|> pure [] -- is the separator optional? sepEndBy1 :: Parser s a -> Parser s b -> Parser s [a] sepEndBy1 p sep = do x <- p (sep *> ((x:) <$> sepEndBy p sep)) <|> return [x] manyTill :: Parser s a -> Parser s b -> Parser s [a] manyTill p end = go where go = (end *> return []) <|> ((:) <$> p <*> go) noneOf :: String -> Parser s Char noneOf istr = satisfy (`notElem` istr) fullStop :: Parser s () fullStop = ichar '.' eoln :: Parser s () -- eoln = ignore (newline <|> (lineFeed *> optional newline)) -- eoln = ignore (try (string "\r\n") <|> string "\r" <|> string "\n") eoln = ignore (oneOf [string "\r\n", string "\r", string "\n"]) notFollowedBy :: (Char -> Bool) -> Parser s () notFollowedBy p = do c <- next if p c then fail $ "Unexpected character: " ++ show [c] else reparse $ L.singleton c symbol :: String -> Parser s String symbol = lexeme . string isymbol :: String -> Parser s () isymbol = ignore . symbol lexeme :: Parser s a -> Parser s a lexeme p = p <* whiteSpace whiteSpace :: Parser s () whiteSpace = skipMany (simpleSpace <|> oneLineComment) simpleSpace :: Parser s () simpleSpace = ignore $ many1Satisfy isSpace oneLineComment :: Parser s () oneLineComment = ichar '#' *> manySatisfy (/= '\n') *> pure () {- Not sure we can get this with polyparse -- | Annotate a Parsec error with the local context - i.e. the actual text -- that caused the error and preceeding/succeeding lines (if available) -- annotateParsecError :: Int -- ^ the number of extra lines to include in the context (<=0 is ignored) -> [String] -- ^ text being parsed -> ParseError -- ^ the parse error -> String -- ^ Parsec error with additional context annotateParsecError extraLines ls err = -- the following is based on the show instance of ParseError let ePos = errorPos err lNum = sourceLine ePos cNum = sourceColumn ePos -- it is possible to be at the end of the input so need -- to check; should produce better output than this in this -- case nLines = length ls ln1 = lNum - 1 eln = max 0 extraLines lNums = [max 0 (ln1 - eln) .. min (nLines-1) (ln1 + eln)] beforeLines = map (ls !!) $ filter (< ln1) lNums afterLines = map (ls !!) $ filter (> ln1) lNums -- in testing was able to get a line number after the text so catch this -- case; is it still necessary? errorLine = if ln1 >= nLines then "" else ls !! ln1 arrowLine = replicate (cNum-1) ' ' ++ "^" finalLine = "(line " ++ show lNum ++ ", column " ++ show cNum ++ " indicated by the '^' sign above):" eHdr = "" : beforeLines ++ errorLine : arrowLine : afterLines ++ [finalLine] eMsg = showErrorMessages "or" "unknown parse error" "expecting" "unexpected" "end of input" (errorMessages err) in unlines eHdr ++ eMsg -} -- | Create a typed literal. mkTypedLit :: ScopedName -- ^ the type -> T.Text -- ^ the value -> RDFLabel mkTypedLit u v = Lit v (Just u) {- Handle hex encoding; the spec for N3 and NTriples suggest that only upper-case A..F are valid but you can find lower-case values out there so support these too. -} hexDigit :: Parser a Char -- hexDigit = satisfy (`elem` ['0'..'9'] ++ ['A'..'F']) hexDigit = satisfy isHexDigit hex4 :: Parser a Char hex4 = do digs <- exactly 4 hexDigit let mhex = R.hexadecimal (T.pack digs) case mhex of Left emsg -> failBad $ "Internal error: unable to parse hex4: " ++ emsg Right (v, "") -> return $ chr v Right (_, vs) -> failBad $ "Internal error: hex4 remainder = " ++ T.unpack vs hex8 :: Parser a Char hex8 = do digs <- exactly 8 hexDigit let mhex = R.hexadecimal (T.pack digs) case mhex of Left emsg -> failBad $ "Internal error: unable to parse hex8: " ++ emsg Right (v, "") -> if v <= 0x10FFFF then return $ chr v else failBad "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF" Right (_, vs) -> failBad $ "Internal error: hex8 remainder = " ++ T.unpack vs -------------------------------------------------------------------------------- -- -- Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke -- All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- --------------------------------------------------------------------------------