module Data.RDF.BlankNode
  ( mkBNode,
  )
where

import Control.Applicative
import Control.Monad
import Data.Attoparsec.Text (Parser, (<?>))
import qualified Data.Attoparsec.Text as P
import Data.Char (isAsciiLower, isAsciiUpper, isDigit)
import Data.Text (Text)
import Text.Parser.Char
import Text.Parser.Combinators (option, try, unexpected)

-- Note: the NTriples parser combinators and Turtle parser combinators
-- should be shared, there's some duplication. This module could be
-- the place for those functions to be moved to, to reduce the size of
-- the NTriplesParser and TurtleParser modules.

-- mkBNode :: Text -> Either String String
-- mkBNode t = IRI . serializeIRI <$> parseIRI t

mkBNode :: Text -> Maybe String
mkBNode :: Text -> Maybe String
mkBNode Text
t =
  case Text -> Either String String
parseBNodeLabel Text
t of
    Left String
_e -> Maybe String
forall a. Maybe a
Nothing
    Right String
bString -> String -> Maybe String
forall a. a -> Maybe a
Just String
bString

parseBNodeLabel :: Text -> Either String String
parseBNodeLabel :: Text -> Either String String
parseBNodeLabel = Parser String -> Text -> Either String String
forall a. Parser a -> Text -> Either String a
P.parseOnly (Parser String -> Text -> Either String String)
-> Parser String -> Text -> Either String String
forall a b. (a -> b) -> a -> b
$ Parser String
t_blank_node_label Parser String -> Parser Text () -> Parser String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
forall t. Chunk t => Parser t ()
P.endOfInput Parser Text () -> String -> Parser Text ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Unexpected characters at the end")

-- taken from TurtleParser (TurtleParser and NTriplesParser could
--possibly share this blank node label parser combinator?)
--
-- TODO replicate the recursion technique from [168s] for ((..)* something)?
-- [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
-- t_blank_node_label :: (CharParsing m, Monad m) => m String
t_blank_node_label :: Parser String
t_blank_node_label :: Parser String
t_blank_node_label = do
  Parser String -> Parser Text ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (String -> Parser String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"_:")
  Char
firstChar <- Parser Text Char
forall (m :: * -> *). CharParsing m => m Char
t_pn_chars_u Parser Text Char -> Parser Text Char -> Parser Text Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool) -> Parser Text Char
forall (m :: * -> *). CharParsing m => (Char -> Bool) -> m Char
satisfy Char -> Bool
isDigit
  Parser String -> Parser String
forall (m :: * -> *) a. Parsing m => m a -> m a
try (Parser String -> Parser String) -> Parser String -> Parser String
forall a b. (a -> b) -> a -> b
$ (Char
firstChar Char -> String -> String
forall a. a -> [a] -> [a]
:) (String -> String) -> Parser String -> Parser String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser String
otherChars
  where
    otherChars :: Parser String
otherChars = String -> Parser String -> Parser String
forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option String
"" (Parser String -> Parser String) -> Parser String -> Parser String
forall a b. (a -> b) -> a -> b
$ do
      String
xs <- Parser Text Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Text Char
forall (m :: * -> *). CharParsing m => m Char
t_pn_chars Parser Text Char -> Parser Text Char -> Parser Text Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Text Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'.')
      if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
xs
        then String -> Parser String
forall (f :: * -> *) a. Applicative f => a -> f a
pure String
xs
        else
          if String -> Char
forall a. [a] -> a
last String
xs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.'
            then String -> Parser String
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected String
"'.' at the end of a blank node label"
            else String -> Parser String
forall (f :: * -> *) a. Applicative f => a -> f a
pure String
xs

-- [163s] PN_CHARS_BASE
t_pn_chars_base :: CharParsing m => m Char
t_pn_chars_base :: m Char
t_pn_chars_base = m Char
forall (m :: * -> *). CharParsing m => m Char
nt_pn_chars_base

-- [164s] PN_CHARS_U ::= PN_CHARS_BASE | '_'
t_pn_chars_u :: CharParsing m => m Char
t_pn_chars_u :: m Char
t_pn_chars_u = m Char
forall (m :: * -> *). CharParsing m => m Char
t_pn_chars_base m Char -> m Char -> m Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'_'

-- [166s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
t_pn_chars :: CharParsing m => m Char
t_pn_chars :: m Char
t_pn_chars = m Char
forall (m :: * -> *). CharParsing m => m Char
t_pn_chars_u m Char -> m Char -> m Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'-' m Char -> m Char -> m Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'\x00B7' m Char -> m Char -> m Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool) -> m Char
forall (m :: * -> *). CharParsing m => (Char -> Bool) -> m Char
satisfy Char -> Bool
f
  where
    f :: Char -> Bool
f = (Char -> [(Char, Char)] -> Bool) -> [(Char, Char)] -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> [(Char, Char)] -> Bool
in_range [(Char
'0', Char
'9'), (Char
'\x0300', Char
'\x036F'), (Char
'\x203F', Char
'\x2040')]

-- [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
nt_pn_chars_base :: CharParsing m => m Char
nt_pn_chars_base :: m Char
nt_pn_chars_base = m Char -> m Char
forall (m :: * -> *) a. Parsing m => m a -> m a
try (m Char -> m Char) -> m Char -> m Char
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> m Char
forall (m :: * -> *). CharParsing m => (Char -> Bool) -> m Char
satisfy Char -> Bool
isBaseChar
  where
    isBaseChar :: Char -> Bool
isBaseChar Char
c =
      (Char -> Bool
isAsciiUpper Char
c)
        Bool -> Bool -> Bool
|| (Char -> Bool
isAsciiLower Char
c)
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x00C0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x00D6')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x00D8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x00F6')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x00F8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x02FF')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x0370' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x037D')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x037F' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

{-# INLINE in_range #-}
in_range :: Char -> [(Char, Char)] -> Bool
in_range :: Char -> [(Char, Char)] -> Bool
in_range Char
c = ((Char, Char) -> Bool) -> [(Char, Char)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\(Char
c1, Char
c2) -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
c1 Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c2)