{-# OPTIONS_GHC -fno-warn-orphans #-}
-- | Symantics for terminal grammars.
module Language.Symantic.Grammar.Terminal where

import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import qualified Data.Bool as Bool
import qualified Data.Char as Char
import qualified Data.List as List
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL

import Language.Symantic.Grammar.Fixity
import Language.Symantic.Grammar.EBNF

-- * Type 'Terminal'
-- | Terminal grammar.
newtype Terminal g a
 =      Terminal { unTerminal :: g a }
 deriving (Functor, Gram_Char, Gram_String)
deriving instance Gram_Rule g => Gram_Rule (Terminal g)

-- ** Class 'Gram_Char'
-- | Symantics for terminal grammars.
class Gram_Rule g => Gram_Char g where
	any    :: g Char
	but    :: Terminal g Char -> Terminal g Char -> Terminal g Char
	eoi    :: g ()
	eol    :: g Char
	space  :: g Char
	char   :: Char -> g Char
	unicat :: Unicat -> g Char
	range  :: (Char, Char) -> g Char
	eol   = rule "NewLine" $ char '\n'
	space = rule "Space" $ char ' '
deriving instance Gram_Char RuleEBNF
instance Gram_Char EBNF where
	any  = ebnf_const "_"
	Terminal (EBNF f) `but` Terminal (EBNF g) =
		Terminal $ EBNF $ \bo po -> parenInfix po op $
			f bo (op, SideL) <> " - " <> g bo (op, SideR)
		where op = infixL 6
	eoi   = ebnf_const "eoi"
	eol   = ebnf_const "↵"
	space = ebnf_const "␣"
	char  = ebnf_const . escape
		where
		escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
		escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c]
	unicat = ebnf_const . Text.pack . show
	range (l, h) = ebnf_const $ Text.concat
	 [ runEBNF $ char l
	 , "…"
	 , runEBNF $ char h
	 ]

-- *** Type 'Unicat'
-- | Unicode category.
data Unicat
 = Unicat_Letter
 | Unicat_Mark
 | Unicat_Number
 | Unicat_Punctuation
 | Unicat_Symbol
 | Unicat Char.GeneralCategory
 deriving (Eq, Show)

unicode_categories :: Unicat -> [Char.GeneralCategory]
unicode_categories c =
	case c of
	 Unicat_Letter ->
		 [ Char.UppercaseLetter
		 , Char.LowercaseLetter
		 , Char.TitlecaseLetter
		 , Char.ModifierLetter
		 , Char.OtherLetter
		 ]
	 Unicat_Mark ->
		 [ Char.NonSpacingMark
		 , Char.SpacingCombiningMark
		 , Char.EnclosingMark
		 ]
	 Unicat_Number ->
		 [ Char.DecimalNumber
		 , Char.LetterNumber
		 , Char.OtherNumber
		 ]
	 Unicat_Punctuation ->
		 [ Char.ConnectorPunctuation
		 , Char.DashPunctuation
		 , Char.OpenPunctuation
		 , Char.ClosePunctuation
		 , Char.OtherPunctuation
		 ]
	 Unicat_Symbol ->
		 [ Char.MathSymbol
		 , Char.CurrencySymbol
		 , Char.ModifierSymbol
		 , Char.OtherSymbol
		 ]
	 Unicat cat -> [cat]

-- ** Class 'Gram_String'
class Functor g => Gram_String g where
	string   :: String -> g String
	{-
	string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
	string [] = pure []
	string (c:cs) = (:) <$> char c <*> string cs
	-}
	text     :: Text.Text -> g Text.Text
	textLazy :: TL.Text -> g TL.Text
	text t     = Text.pack <$> string (Text.unpack t)
	textLazy t = TL.pack   <$> string (TL.unpack t)
deriving instance Gram_String RuleEBNF
instance Gram_String EBNF where
	string s =
		case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
		 (ps, "")   -> raw ps
		 ("", [c])  -> "" <$ char c
		 (ps, [c])  -> "" <$ raw ps <* char c
		 ("", c:rs) -> "" <$ char c <* string rs
		 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
		where
		raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
instance IsString (EBNF String) where
	fromString = string