{-# LANGUAGE OverloadedLists #-} module Nirum.Constructs.Identifier ( Identifier , fromString , fromText , identifierRule , normalize , reservedKeywords , show , toCode , tokens , toCamelCaseText , toNormalizedString , toNormalizedText , toString , toSnakeCaseText , toText , toPascalCaseText , toLispCaseText , (==) ) where import Data.Char (toLower, toUpper) import Data.Maybe (fromMaybe) import Data.String (IsString (fromString)) import Data.Void import qualified Data.Text as T import qualified Data.Set as S import qualified Text.Megaparsec as P import Text.Megaparsec.Char (oneOf, satisfy) import Nirum.Constructs (Construct (toCode)) {- | Case-insensitive identifier. It also doesn't distinguish hyphens from underscores. It has more restrict rules than the most of programming languages: * It can't start with digits or hyphens/underscores. * Hyphens/underscores can't continuously appear more than once. * Only roman alphabets, Arabic numerals, hyphens and underscores are allowed. These rules are for portability between various programming languages. For example, @BOOK_CATEGORY@ and @Book-Category@ are all normalized to @book-category@, and it can be translated to: [snake_case] @book_category@ [camelCase] @bookCategory@ [PascalCase] @BookCategory@ [lisp-case] @book-category@ -} newtype Identifier = Identifier T.Text deriving (Show) reservedKeywords :: S.Set Identifier reservedKeywords = [ "enum" , "record" , "service" , "throws" , "type" , "unboxed" , "union" , "default" , "as" ] identifierRule :: P.Parsec Void T.Text Identifier identifierRule = do firstChar <- satisfy isAlpha restChars <- P.many $ satisfy isAlnum restWords <- P.many $ do sep <- oneOf ("-_" :: String) chars <- P.some $ satisfy isAlnum return $ T.pack $ sep : chars return $ Identifier $ T.concat $ T.pack (firstChar : restChars) : restWords where isAlpha :: Char -> Bool isAlpha c = 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' isDigit :: Char -> Bool isDigit c = '0' <= c && c <= '9' isAlnum :: Char -> Bool isAlnum c = isAlpha c || isDigit c -- | Constructs an 'Identifier' value from the given identifier string. -- It could return 'Nothing' if the given identifier is invalid. fromText :: T.Text -> Maybe Identifier fromText text = case P.parse rule "" text of Right ident -> Just ident Left _ -> Nothing where rule :: P.Parsec Void T.Text Identifier rule = do identifier' <- identifierRule _ <- P.eof return identifier' normalize :: Identifier -> Identifier normalize (Identifier i) = Identifier $ T.map (\ c -> if c == '_' then '-' else toLower c) i toText :: Identifier -> T.Text toText (Identifier text) = text toNormalizedText :: Identifier -> T.Text toNormalizedText = toText . normalize tokens :: Identifier -> [T.Text] tokens ident = T.split (== '-') $ toText $ normalize ident instance Eq Identifier where a@(Identifier _) == b@(Identifier _) = toNormalizedText a == toNormalizedText b instance Ord Identifier where compare a@(Identifier _) b@(Identifier _) = compare (toNormalizedText a) (toNormalizedText b) instance Construct Identifier where toCode ident | ident `S.member` reservedKeywords = '`' `T.cons` text `T.snoc` '`' | otherwise = text where text = toText ident instance IsString Identifier where fromString string = fromMaybe (error $ "invalid identifier: " ++ string) $ fromText (T.pack string) toString :: Identifier -> String toString = T.unpack . toText toNormalizedString :: Identifier -> String toNormalizedString = T.unpack . toNormalizedText toPascalCaseText :: Identifier -> T.Text toPascalCaseText identifier = T.concat $ fmap makeFirstUpper (tokens identifier) where makeFirstUpper :: T.Text -> T.Text makeFirstUpper t = toUpper (T.head t) `T.cons` T.tail t toCamelCaseText :: Identifier -> T.Text toCamelCaseText identifier = toLower (T.head pascalCased) `T.cons` T.tail pascalCased where pascalCased :: T.Text pascalCased = toPascalCaseText identifier toSnakeCaseText :: Identifier -> T.Text toSnakeCaseText identifier = T.intercalate "_" $ tokens identifier toLispCaseText :: Identifier -> T.Text toLispCaseText = toNormalizedText