module Data.RDF.BlankNode
  ( mkBNode,
  )
where

import Control.Applicative
import Control.Monad
import Data.Attoparsec.Text (Parser, (<?>))
import qualified Data.Attoparsec.Text as P
import Data.Char (isAsciiLower, isAsciiUpper, isDigit)
import Data.Text (Text)
import Text.Parser.Char
import Text.Parser.Combinators (option, try, unexpected)

-- Note: the NTriples parser combinators and Turtle parser combinators
-- should be shared, there's some duplication. This module could be
-- the place for those functions to be moved to, to reduce the size of
-- the NTriplesParser and TurtleParser modules.

-- mkBNode :: Text -> Either String String
-- mkBNode t = IRI . serializeIRI <$> parseIRI t

mkBNode :: Text -> Maybe String
mkBNode t =
  case parseBNodeLabel t of
    Left _e -> Nothing
    Right bString -> Just bString

parseBNodeLabel :: Text -> Either String String
parseBNodeLabel = P.parseOnly $ t_blank_node_label <* (P.endOfInput <?> "Unexpected characters at the end")

-- taken from TurtleParser (TurtleParser and NTriplesParser could
--possibly share this blank node label parser combinator?)
--
-- TODO replicate the recursion technique from [168s] for ((..)* something)?
-- [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
-- t_blank_node_label :: (CharParsing m, Monad m) => m String
t_blank_node_label :: Parser String
t_blank_node_label = do
  void (string "_:")
  firstChar <- t_pn_chars_u <|> satisfy isDigit
  try $ (firstChar :) <$> otherChars
  where
    otherChars = option "" $ do
      xs <- many (t_pn_chars <|> char '.')
      if null xs
        then pure xs
        else
          if last xs == '.'
            then unexpected "'.' at the end of a blank node label"
            else pure xs

-- [163s] PN_CHARS_BASE
t_pn_chars_base :: CharParsing m => m Char
t_pn_chars_base = nt_pn_chars_base

-- [164s] PN_CHARS_U ::= PN_CHARS_BASE | '_'
t_pn_chars_u :: CharParsing m => m Char
t_pn_chars_u = t_pn_chars_base <|> char '_'

-- [166s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
t_pn_chars :: CharParsing m => m Char
t_pn_chars = t_pn_chars_u <|> char '-' <|> char '\x00B7' <|> satisfy f
  where
    f = flip in_range [('0', '9'), ('\x0300', '\x036F'), ('\x203F', '\x2040')]

-- [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
nt_pn_chars_base :: CharParsing m => m Char
nt_pn_chars_base = try $ satisfy isBaseChar
  where
    isBaseChar c =
      (isAsciiUpper c)
        || (isAsciiLower c)
        || (c >= '\x00C0' && c <= '\x00D6')
        || (c >= '\x00D8' && c <= '\x00F6')
        || (c >= '\x00F8' && c <= '\x02FF')
        || (c >= '\x0370' && c <= '\x037D')
        || (c >= '\x037F' && c <= '\x1FFF')
        || (c >= '\x200C' && c <= '\x200D')
        || (c >= '\x2070' && c <= '\x218F')
        || (c >= '\x2C00' && c <= '\x2FEF')
        || (c >= '\x3001' && c <= '\xD7FF')
        || (c >= '\xF900' && c <= '\xFDCF')
        || (c >= '\xFDF0' && c <= '\xFFFD')
        || (c >= '\x10000' && c <= '\xEFFFF')

{-# INLINE in_range #-}
in_range :: Char -> [(Char, Char)] -> Bool
in_range c = any (\(c1, c2) -> c >= c1 && c <= c2)