{- This file is part of language-kort. - - Written in 2015 by fr33domlover . - - ♡ Copying is an act of love. Please copy, reuse and share. - - The author(s) have dedicated all copyright and related and neighboring - rights to this software to the public domain worldwide. This software is - distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along - with this software. If not, see - . -} module Language.Kort.Parser ( parseString , parseText , parseFile , toSmaoinValue , toSmaoinModel ) where import Control.Exception (assert) import qualified Data.ByteString.Base64.Lazy as B64 import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BLC import Data.Char import Data.Either (rights) import Data.Maybe (catMaybes) import Data.Position (Advance, commonAdvance) import qualified Data.Smaoin as S import qualified Data.Smaoin.Vocabulary.Smaoin as V.Smaoin import Data.Ratio ((%)) import qualified Data.Text.Lazy as T import Language.Kort.Types import Language.Kort.UidGen import Text.Razom.Char import Text.Razom.Lexer import Text.Razom.Number import Text.Razom.Uid import Text.Razom.Value import Text.Razom.Types import Text.Regex.Applicative value :: Regex Element value = Value <$> literal <* string "::" <*> resource resource :: Regex Resource resource = Uid <$> uid <|> f <$> generator where f s = if null s then UGenerator else LGenerator s element :: Regex Element element = Resource <$> resource <|> value statement :: Regex Statement statement = Statement <$> resource <* sep <*> resource <*> elements where sep = some $ sym '\t' elements = some $ sep *> element comment :: Regex Comment comment = string "--" *> many (sym '-') *> many cspace *> many cchar where cspace = psym $ \ c -> isSpace c && c /= '\n' cchar = psym $ \ c -> isGraphical c || c == '\t' line :: Regex Line line = (Left <$> comment <|> Right <$> statement) <* sym '\n' adv :: Advance Char adv = commonAdvance 8 False True False False -- | Parse Kort source text from a 'String'. parseString :: String -> ParseResult parseString = tokenizeString adv line -- | Parse Kort source text from 'T.Text'. parseText :: T.Text -> ParseResult parseText = tokenizeText adv line -- | Parse Kort source text read from a file. parseFile :: FilePath -> IO ParseResult parseFile = tokenizeFile adv line -- | Matches a boolean Smaoin literal body and returns the value. boolean :: Regex Bool boolean = True <$ string "[x]" <|> False <$ string "[_]" -- | Matches a Smaoin hex number literal body and returns the value. hex :: Regex Integer hex = string "0x" *> hexadecimal -- | Matches a Smaoin real number literal body and returns the value. real :: Regex S.RealNum real = signed $ (\ b e -> b * S.realnum 1 e) <$> realbase <*> realexp where realbase = f <$> some digit <*> optional (sym '.' *> some digit) f ns Nothing = fromDigits ns [] f ns (Just fs) = fromDigits ns fs realexp = g <$> optional (sym '$' *> signed decimal) g Nothing = 0 g (Just n) = n -- | Matches a Smaoin rational number literal body and returns 'Just' the -- value. If the matched denominator is 0, it returns 'Nothing'. ratio :: Regex (Maybe S.Rational) ratio = f <$> signed decimal <*> (sym '/' *> decimal) where f _ 0 = Nothing f n d = Just $ n % d -- | Matches a Smaoin character literal body and returns the value. character :: Regex Char character = chardirect <|> sym '\\' *> (charesc <|> numesc) where chardirect = psym $ \ c -> isGraphical c && c `notElem` "{}\\" charesc = psym (`elem` "{}\\") numesc = sym 'x' *> (chr <$> hexadecimal) -- | Matches a Smaoin string literal body and returns the value. stringl :: Regex String stringl = catMaybes <$> many (chardirect <|> sym '\\' *> (charesc <|> numesc)) where chardirect = Just <$> psym (\ c -> isGraphical c && c `notElem` "{}\\") charesc = Just <$> psym (`elem` "{}\\") <|> Nothing <$ sym '&' numesc = sym 'x' *> (Just . chr <$> hexadecimal) -- | Matches a Smaoin data base64 encoded literal body, and returns the decoded -- value. chunk :: Regex BL.ByteString chunk = (\ c p -> f $ c ++ p) <$> many chunkchar <*> padding where chunkchar = psym $ \ c -> isAsciiUpper c || isAsciiLower c || isDigit c || c == '+' || c == '/' padc = sym '=' padding = snd <$> withMatched (optional (padc *> optional padc)) -- The regex is supposed to ensure we got a valid base64 string, so -- decoding must succeed. If it ever doesn't, there's an implementation -- error here. Therefore, use an assertion to signal a failure. f s = case B64.decode $ BLC.pack s of Right b -> b Left e -> assert False BL.empty -- | Parse a Kort literal into a typed value. Return an error message if the -- literal body format doesn't match the type. toSmaoinValue :: String -> S.Resource -> Either String S.Value toSmaoinValue s r | r == V.Smaoin._Boolean = case match boolean s of Just b -> Right $ S.Boolean b Nothing -> Left "Invalid boolean literal" | r == V.Smaoin._Number = case (match hex s, match real s, match ratio s) of (Just n, _, _) -> Right $ S.Number $ S.RealNumber $ fromInteger n (Nothing, Just n, _) -> Right $ S.Number $ S.RealNumber n (Nothing, Nothing, Just x) -> case x of Just n -> Right $ S.Number $ S.RatioNumber n Nothing -> Left "Rational number literal with denominator 0" (Nothing, Nothing, Nothing) -> Left "Number literal doesn't match any of the formats" | r == V.Smaoin._Character = case match character s of Just c -> Right $ S.Character c Nothing -> Left "Invalid character literal" | r == V.Smaoin._String = case match stringl s of Just s' -> Right $ S.String $ T.pack s' Nothing -> Left "Invalid string literal" | r == V.Smaoin._Data = case match chunk s of Just d -> Right $ S.Chunk d Nothing -> Left "Invalid data literal" | otherwise = Right $ S.Generic (T.pack s) r data Problem = NotTwo | HasGen | NotRes | InvVal -- | Convert from the Kort document model to a list of Smaoin statements. -- -- This function doesn't handle Uid generation or other shortcuts. It just -- collects statements invalid for Smaoin and statements with generators into -- separate lists. If you want Uids to be generated, use the functions in -- "Language.Kort.UidGen". -- -- 5 lists are returned: -- -- (1) Kort statements which don't have exactly 2 elements -- (2) Kort statements with 2 elements, but containing generators -- (3) Kort statements with 2 elements, but the 1st is a value, not a resource -- (4) Kort statement with 2 elements, but invalid object value -- (5) Successfully converted Smaoin statements -- -- Kort comments are ignored and dropped. toSmaoinModel :: Document -> ([Statement], [Statement], [Statement], [Statement], [S.Statement]) toSmaoinModel doc = g (rights doc) ([], [], [], [], []) where isVal (Resource _) = False isVal (Value _ _) = True f (Statement (Uid i) (Uid p) [Resource (Uid s), e]) = case e of Resource (Uid o) -> Right $ mk $ S.ResourceE $ S.res o Resource _ -> Left HasGen Value l (Uid t) -> case toSmaoinValue l (S.res t) of Right v -> Right $ mk $ S.ValueE v Left _ -> Left InvVal Value l _ -> Left HasGen where mk = S.Statement (S.res i) (S.res s) (S.res p) f s@(Statement i r e) | length e /= 2 = Left NotTwo | stmtHasGens s = Left HasGen | isVal $ head e = Left NotRes | otherwise = error "Implementation error" g [] t = t g (s:ss) (nottwo, hasgen, notres, invval, l) = case f s of Right stmt -> g ss (nottwo, hasgen, notres, invval, stmt:l) Left NotTwo -> g ss (s:nottwo, hasgen, notres, invval, l) Left HasGen -> g ss (nottwo, s:hasgen, notres, invval, l) Left NotRes -> g ss (nottwo, hasgen, s:notres, invval, l) Left InvVal -> g ss (nottwo, hasgen, notres, s:invval, l)