{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections     #-}

module Text.RDF.RDF4H.XmlParser.Identifiers
  ( -- rdf:ID validation
    checkRdfId
    -- Qualified names
  , resolveQName, resolveQName'
  , parseQName
  ) where


#if !MIN_VERSION_base(4,13,0)
import           Data.Functor ((<$))
#else
#endif
import           Control.Applicative (liftA2, Alternative(..))
import           Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Map as Map
import           Data.Attoparsec.Text (Parser, (<?>))
import qualified Data.Attoparsec.Text as P
#if MIN_VERSION_base(4,9,0)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup ((<>))
#else
#endif
#else
#endif
import           Data.RDF.Namespace

--------------------------------------------------------------------------------
-- rdf:ID

-- |Validate the value of @rdf:ID@.
--
-- See: https://www.w3.org/TR/rdf-syntax-grammar/#rdf-id
checkRdfId
  :: Text
  -- ^ Value of a @rdf:ID@ attribute to validate.
  -> Either String Text
checkRdfId :: Text -> Either String Text
checkRdfId Text
t = Text
t Text -> Either String Text -> Either String Text
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Text -> Either String Text
parseId Text
t

parseId :: Text -> Either String Text
parseId :: Text -> Either String Text
parseId = Parser Text -> Text -> Either String Text
forall a. Parser a -> Text -> Either String a
P.parseOnly (Parser Text -> Text -> Either String Text)
-> Parser Text -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Parser Text
pNCName Parser Text -> Parser Text () -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
forall t. Chunk t => Parser t ()
P.endOfInput Parser Text () -> String -> Parser Text ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Unexpected characters at the end")

--------------------------------------------------------------------------------
-- Qualified names

-- |Parse and resolve a qualified name.
--
-- See: https://www.w3.org/TR/xml-names/#ns-qualnames
resolveQName
  :: PrefixMappings
  -- ^ Namespace mapping to resolve q qualified name.
  -> Text
  -- ^ Raw qualified name to process.
  -> Either String Text
resolveQName :: PrefixMappings -> Text -> Either String Text
resolveQName PrefixMappings
pm Text
qn = Text -> Either String (Maybe Text, Text)
parseQName Text
qn Either String (Maybe Text, Text)
-> ((Maybe Text, Text) -> Either String Text) -> Either String Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= PrefixMappings -> (Maybe Text, Text) -> Either String Text
resolveQName' PrefixMappings
pm

-- |Resolve a qualified name.
resolveQName'
  :: PrefixMappings
  -- ^ Namespace mapping to resolve q qualified name.
  -> (Maybe Text, Text)
  -- ^ (namespace, local name)
  -> Either String Text
resolveQName' :: PrefixMappings -> (Maybe Text, Text) -> Either String Text
resolveQName' (PrefixMappings Map Text Text
pm) (Maybe Text
Nothing, Text
name) =
  case Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
forall a. Monoid a => a
mempty Map Text Text
pm of
    Maybe Text
Nothing  -> String -> Either String Text
forall a b. a -> Either a b
Left (String -> Either String Text) -> String -> Either String Text
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall a. Monoid a => [a] -> a
mconcat [String
"Cannot resolve QName \"", Text -> String
T.unpack Text
name, String
"\": no default namespace defined."]
    Just Text
iri -> Text -> Either String Text
forall a b. b -> Either a b
Right (Text -> Either String Text) -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Text
iri Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name
resolveQName' (PrefixMappings Map Text Text
pm) (Just Text
prefix, Text
name) =
  case Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
prefix Map Text Text
pm of
    Maybe Text
Nothing  -> String -> Either String Text
forall a b. a -> Either a b
Left (String -> Either String Text) -> String -> Either String Text
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall a. Monoid a => [a] -> a
mconcat [String
"Cannot resolve QName: prefix \"", Text -> String
T.unpack Text
prefix, String
"\" not defined"]
    Just Text
iri -> Text -> Either String Text
forall a b. b -> Either a b
Right (Text -> Either String Text) -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Text
iri Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name

-- |Parse a qualified name.
--
-- See: https://www.w3.org/TR/xml-names/#ns-qualnames
parseQName :: Text -> Either String (Maybe Text, Text)
parseQName :: Text -> Either String (Maybe Text, Text)
parseQName = Parser (Maybe Text, Text)
-> Text -> Either String (Maybe Text, Text)
forall a. Parser a -> Text -> Either String a
P.parseOnly (Parser (Maybe Text, Text)
 -> Text -> Either String (Maybe Text, Text))
-> Parser (Maybe Text, Text)
-> Text
-> Either String (Maybe Text, Text)
forall a b. (a -> b) -> a -> b
$ Parser (Maybe Text, Text)
pQName Parser (Maybe Text, Text)
-> Parser Text () -> Parser (Maybe Text, Text)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
forall t. Chunk t => Parser t ()
P.endOfInput Parser Text () -> String -> Parser Text ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Unexpected characters at the end of a QName")

-- https://www.w3.org/TR/xml-names/#ns-qualnames
-- https://www.w3.org/TR/xml-names/#NT-QName
pQName :: Parser (Maybe Text, Text)
pQName :: Parser (Maybe Text, Text)
pQName = Parser (Maybe Text, Text)
pPrefixedName Parser (Maybe Text, Text)
-> Parser (Maybe Text, Text) -> Parser (Maybe Text, Text)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Text, Text)
forall a. Parser Text (Maybe a, Text)
pUnprefixedNamed
  where pUnprefixedNamed :: Parser Text (Maybe a, Text)
pUnprefixedNamed = (Maybe a
forall (f :: * -> *) a. Alternative f => f a
empty,) (Text -> (Maybe a, Text))
-> Parser Text -> Parser Text (Maybe a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
pLocalPart

-- https://www.w3.org/TR/xml-names/#NT-PrefixedName
pPrefixedName :: Parser (Maybe Text, Text)
pPrefixedName :: Parser (Maybe Text, Text)
pPrefixedName = do
  Text
prefix <- Parser Text
pLocalPart Parser Text -> Parser Text Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
P.char Char
':'
  Text
localPart <- Parser Text
pLocalPart
  (Maybe Text, Text) -> Parser (Maybe Text, Text)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
prefix, Text
localPart)

-- https://www.w3.org/TR/xml-names/#NT-LocalPart
pLocalPart :: Parser Text
pLocalPart :: Parser Text
pLocalPart = Parser Text
pNCName

-- http://www.w3.org/TR/REC-xml-names/#NT-NCName
pNCName :: Parser Text
pNCName :: Parser Text
pNCName = (Char -> Text -> Text)
-> Parser Text Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Char -> Text -> Text
T.cons Parser Text Char
pNameStartChar Parser Text
pNameRest
  where
    pNameStartChar :: Parser Text Char
pNameStartChar = (Char -> Bool) -> Parser Text Char
P.satisfy Char -> Bool
isValidFirstCharId
    pNameRest :: Parser Text
pNameRest = (Char -> Bool) -> Parser Text
P.takeWhile Char -> Bool
isValidRestCharId
    isValidFirstCharId :: Char -> Bool
isValidFirstCharId Char
c
      =  (Char
'A' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z') Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| (Char
'a' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z')
      Bool -> Bool -> Bool
|| (Char
'\xC0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD6') Bool -> Bool -> Bool
|| (Char
'\xD8' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
      Bool -> Bool -> Bool
|| (Char
'\xF8' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF') Bool -> Bool -> Bool
|| (Char
'\x370' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
      Bool -> Bool -> Bool
|| (Char
'\x37F' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF') Bool -> Bool -> Bool
|| (Char
'\x200C' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
      Bool -> Bool -> Bool
|| (Char
'\x2070' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F') Bool -> Bool -> Bool
|| (Char
'\x2C00' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
      Bool -> Bool -> Bool
|| (Char
'\x3001' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
|| (Char
'\xF900' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
      Bool -> Bool -> Bool
|| (Char
'\xFDF0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
|| (Char
'\x10000' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')
    isValidRestCharId :: Char -> Bool
isValidRestCharId Char
c = Char -> Bool
isValidFirstCharId Char
c
      Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
|| (Char
'0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9')
      Bool -> Bool -> Bool
|| (Char
'\x0300' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x036F') Bool -> Bool -> Bool
|| (Char
'\x203F' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')