{-# OPTIONS_GHC -fno-warn-orphans #-} -- | Symantics for terminal grammars. module Language.Symantic.Grammar.Terminal where import Control.Monad import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Prelude hiding (any) import qualified Data.Bool as Bool import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Text as Text import Language.Symantic.Grammar.Fixity import Language.Symantic.Grammar.EBNF -- * Type 'Terminal' -- | Terminal grammar. newtype Terminal g a = Terminal { unTerminal :: g a } deriving (Functor, Gram_Terminal) deriving instance Gram_Rule g => Gram_Rule (Terminal g) -- ** Class 'Gram_Terminal' -- | Symantics for terminal grammars. class Gram_Terminal g where any :: g Char but :: Terminal g Char -> Terminal g Char -> Terminal g Char eoi :: g () char :: Char -> g Char string :: String -> g String unicat :: Unicat -> g Char range :: (Char, Char) -> g Char -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "") -- string [] = pure [] -- string (c:cs) = (:) <$> char c <*> string cs deriving instance Gram_Terminal RuleEBNF instance Gram_Terminal EBNF where any = ebnf_const "_" Terminal (EBNF f) `but` Terminal (EBNF g) = Terminal $ EBNF $ \bo po -> parenInfix po op $ f bo (op, SideL) <> " - " <> g bo (op, SideR) where op = infixL 6 eoi = ebnf_const "eoi" char = ebnf_const . escape where escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""] escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c] string s = case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of (ps, "") -> raw ps ("", [c]) -> "" <$ char c (ps, [c]) -> "" <$ raw ps <* char c ("", c:rs) -> "" <$ char c <* string rs (ps, c:rs) -> "" <$ raw ps <* char c <* string rs where raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""] unicat = ebnf_const . Text.pack . show range (l, h) = ebnf_const $ Text.concat [ runEBNF $ char l , "…" , runEBNF $ char h ] instance IsString (EBNF String) where fromString = string -- *** Type 'Unicat' -- | Unicode category. data Unicat = Unicat_Letter | Unicat_Mark | Unicat_Number | Unicat_Punctuation | Unicat_Symbol | Unicat Char.GeneralCategory deriving (Eq, Show) unicode_categories :: Unicat -> [Char.GeneralCategory] unicode_categories c = case c of Unicat_Letter -> [ Char.UppercaseLetter , Char.LowercaseLetter , Char.TitlecaseLetter , Char.ModifierLetter , Char.OtherLetter ] Unicat_Mark -> [ Char.NonSpacingMark , Char.SpacingCombiningMark , Char.EnclosingMark ] Unicat_Number -> [ Char.DecimalNumber , Char.LetterNumber , Char.OtherNumber ] Unicat_Punctuation -> [ Char.ConnectorPunctuation , Char.DashPunctuation , Char.OpenPunctuation , Char.ClosePunctuation , Char.OtherPunctuation ] Unicat_Symbol -> [ Char.MathSymbol , Char.CurrencySymbol , Char.ModifierSymbol , Char.OtherSymbol ] Unicat cat -> [cat]