{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} module Text.RDF.RDF4H.XmlParser.Identifiers ( -- rdf:ID validation checkRdfId -- Qualified names , resolveQName, resolveQName' , parseQName ) where import Data.Functor ((<$)) import Control.Applicative (liftA2, Alternative(..)) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Map as Map import Data.Attoparsec.Text (Parser, ()) import qualified Data.Attoparsec.Text as P import Data.Semigroup ((<>)) import Data.RDF.Namespace -------------------------------------------------------------------------------- -- rdf:ID -- |Validate the value of @rdf:ID@. -- -- See: https://www.w3.org/TR/rdf-syntax-grammar/#rdf-id checkRdfId :: Text -- ^ Value of a @rdf:ID@ attribute to validate. -> Either String Text checkRdfId t = t <$ parseId t parseId :: Text -> Either String Text parseId = P.parseOnly $ pNCName <* (P.endOfInput "Unexpected characters at the end") -------------------------------------------------------------------------------- -- Qualified names -- |Parse and resolve a qualified name. -- -- See: https://www.w3.org/TR/xml-names/#ns-qualnames resolveQName :: PrefixMappings -- ^ Namespace mapping to resolve q qualified name. -> Text -- ^ Raw qualified name to process. -> Either String Text resolveQName pm qn = parseQName qn >>= resolveQName' pm -- |Resolve a qualified name. resolveQName' :: PrefixMappings -- ^ Namespace mapping to resolve q qualified name. -> (Maybe Text, Text) -- ^ (namespace, local name) -> Either String Text resolveQName' (PrefixMappings pm) (Nothing, name) = case Map.lookup mempty pm of Nothing -> Left $ mconcat ["Cannot resolve QName \"", T.unpack name, "\": no default namespace defined."] Just iri -> Right $ iri <> name resolveQName' (PrefixMappings pm) (Just prefix, name) = case Map.lookup prefix pm of Nothing -> Left $ mconcat ["Cannot resolve QName: prefix \"", T.unpack prefix, "\" not defined"] Just iri -> Right $ iri <> name -- |Parse a qualified name. -- -- See: https://www.w3.org/TR/xml-names/#ns-qualnames parseQName :: Text -> Either String (Maybe Text, Text) parseQName = P.parseOnly $ pQName <* (P.endOfInput "Unexpected characters at the end of a QName") -- https://www.w3.org/TR/xml-names/#ns-qualnames -- https://www.w3.org/TR/xml-names/#NT-QName pQName :: Parser (Maybe Text, Text) pQName = pPrefixedName <|> pUnprefixedNamed where pUnprefixedNamed = (empty,) <$> pLocalPart -- https://www.w3.org/TR/xml-names/#NT-PrefixedName pPrefixedName :: Parser (Maybe Text, Text) pPrefixedName = do prefix <- pLocalPart <* P.char ':' localPart <- pLocalPart pure (Just prefix, localPart) -- https://www.w3.org/TR/xml-names/#NT-LocalPart pLocalPart :: Parser Text pLocalPart = pNCName -- http://www.w3.org/TR/REC-xml-names/#NT-NCName pNCName :: Parser Text pNCName = liftA2 T.cons pNameStartChar pNameRest where pNameStartChar = P.satisfy isValidFirstCharId pNameRest = P.takeWhile isValidRestCharId isValidFirstCharId c = ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z') || ('\xC0' <= c && c <= '\xD6') || ('\xD8' <= c && c <= '\xF6') || ('\xF8' <= c && c <= '\x2FF') || ('\x370' <= c && c <= '\x37D') || ('\x37F' <= c && c <= '\x1FFF') || ('\x200C' <= c && c <= '\x200D') || ('\x2070' <= c && c <= '\x218F') || ('\x2C00' <= c && c <= '\x2FEF') || ('\x3001' <= c && c <= '\xD7FF') || ('\xF900' <= c && c <= '\xFDCF') || ('\xFDF0' <= c && c <= '\xFFFD') || ('\x10000' <= c && c <= '\xEFFFF') isValidRestCharId c = isValidFirstCharId c || c == '-' || c == '.' || ('0' <= c && c <= '9') || ('\x0300' <= c && c <= '\x036F') || ('\x203F' <= c && c <= '\x2040')