{-# OPTIONS_GHC -fno-warn-orphans #-} -- | Symantics for terminal grammars. module Language.Symantic.Grammar.Terminal where import Control.Applicative (Applicative(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Function (($), (.)) import Data.Functor (Functor(..), (<$>)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..), String) import Text.Show (Show(..)) import qualified Data.Bool as Bool import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import Language.Symantic.Grammar.Fixity import Language.Symantic.Grammar.EBNF -- * Type 'Terminal' -- | Terminal grammar. newtype Terminal g a = Terminal { unTerminal :: g a } deriving (Functor, Gram_Char, Gram_String) deriving instance Gram_Rule g => Gram_Rule (Terminal g) -- ** Class 'Gram_Char' -- | Symantics for terminal grammars. class Gram_Rule g => Gram_Char g where any :: g Char but :: Terminal g Char -> Terminal g Char -> Terminal g Char eoi :: g () eol :: g Char space :: g Char char :: Char -> g Char unicat :: Unicat -> g Char range :: (Char, Char) -> g Char eol = rule "NewLine" $ char '\n' space = rule "Space" $ char ' ' deriving instance Gram_Char RuleEBNF instance Gram_Char EBNF where any = ebnf_const "_" Terminal (EBNF f) `but` Terminal (EBNF g) = Terminal $ EBNF $ \bo po -> pairIfNeeded pairParen po op $ f bo (op, SideL) <> " - " <> g bo (op, SideR) where op = infixL 6 eoi = ebnf_const "eoi" eol = ebnf_const "↵" space = ebnf_const "␣" char = ebnf_const . escape where escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""] escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c] unicat = ebnf_const . Text.pack . show range (l, h) = ebnf_const $ Text.concat [ runEBNF $ char l , "…" , runEBNF $ char h ] -- *** Type 'Unicat' -- | Unicode category. data Unicat = Unicat_Letter | Unicat_Mark | Unicat_Number | Unicat_Punctuation | Unicat_Symbol | Unicat Char.GeneralCategory deriving (Eq, Show) unicode_categories :: Unicat -> [Char.GeneralCategory] unicode_categories c = case c of Unicat_Letter -> [ Char.UppercaseLetter , Char.LowercaseLetter , Char.TitlecaseLetter , Char.ModifierLetter , Char.OtherLetter ] Unicat_Mark -> [ Char.NonSpacingMark , Char.SpacingCombiningMark , Char.EnclosingMark ] Unicat_Number -> [ Char.DecimalNumber , Char.LetterNumber , Char.OtherNumber ] Unicat_Punctuation -> [ Char.ConnectorPunctuation , Char.DashPunctuation , Char.OpenPunctuation , Char.ClosePunctuation , Char.OtherPunctuation ] Unicat_Symbol -> [ Char.MathSymbol , Char.CurrencySymbol , Char.ModifierSymbol , Char.OtherSymbol ] Unicat cat -> [cat] -- ** Class 'Gram_String' class Functor g => Gram_String g where string :: String -> g String {- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "") string [] = pure [] string (c:cs) = (:) <$> char c <*> string cs -} text :: Text.Text -> g Text.Text textLazy :: TL.Text -> g TL.Text text t = Text.pack <$> string (Text.unpack t) textLazy t = TL.pack <$> string (TL.unpack t) deriving instance Gram_String RuleEBNF instance Gram_String EBNF where string s = case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of (ps, "") -> raw ps ("", [c]) -> "" <$ char c (ps, [c]) -> "" <$ raw ps <* char c ("", c:rs) -> "" <$ char c <* string rs (ps, c:rs) -> "" <$ raw ps <* char c <* string rs where raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""] instance IsString (EBNF String) where fromString = string