module Language.Kort.Parser
( parseString
, parseText
, parseFile
, toSmaoinValue
, toSmaoinModel
)
where
import Control.Exception (assert)
import qualified Data.ByteString.Base64.Lazy as B64
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import Data.Char
import Data.Either (rights)
import Data.Maybe (catMaybes)
import Data.Position (Advance, commonAdvance)
import qualified Data.Smaoin as S
import qualified Data.Smaoin.Vocabulary.Smaoin as V.Smaoin
import Data.Ratio ((%))
import qualified Data.Text.Lazy as T
import Language.Kort.Types
import Language.Kort.UidGen
import Text.Razom.Char
import Text.Razom.Lexer
import Text.Razom.Number
import Text.Razom.Uid
import Text.Razom.Value
import Text.Razom.Types
import Text.Regex.Applicative
value :: Regex Element
value = Value <$> literal <* string "::" <*> resource
resource :: Regex Resource
resource = Uid <$> uid <|> f <$> generator
where
f s = if null s then UGenerator else LGenerator s
element :: Regex Element
element = Resource <$> resource <|> value
statement :: Regex Statement
statement = Statement <$> resource <* sep <*> resource <*> elements
where
sep = some $ sym '\t'
elements = some $ sep *> element
comment :: Regex Comment
comment = string "--" *> many (sym '-') *> many cspace *> many cchar
where
cspace = psym $ \ c -> isSpace c && c /= '\n'
cchar = psym $ \ c -> isGraphical c || c == '\t'
line :: Regex Line
line = (Left <$> comment <|> Right <$> statement) <* sym '\n'
adv :: Advance Char
adv = commonAdvance 8 False True False False
parseString :: String -> ParseResult
parseString = tokenizeString adv line
parseText :: T.Text -> ParseResult
parseText = tokenizeText adv line
parseFile :: FilePath -> IO ParseResult
parseFile = tokenizeFile adv line
boolean :: Regex Bool
boolean = True <$ string "[x]" <|> False <$ string "[_]"
hex :: Regex Integer
hex = string "0x" *> hexadecimal
real :: Regex S.RealNum
real = signed $ (\ b e -> b * S.realnum 1 e) <$> realbase <*> realexp
where
realbase = f <$> some digit <*> optional (sym '.' *> some digit)
f ns Nothing = fromDigits ns []
f ns (Just fs) = fromDigits ns fs
realexp = g <$> optional (sym '$' *> signed decimal)
g Nothing = 0
g (Just n) = n
ratio :: Regex (Maybe S.Rational)
ratio = f <$> signed decimal <*> (sym '/' *> decimal)
where
f _ 0 = Nothing
f n d = Just $ n % d
character :: Regex Char
character = chardirect <|> sym '\\' *> (charesc <|> numesc)
where
chardirect = psym $ \ c -> isGraphical c && c `notElem` "{}\\"
charesc = psym (`elem` "{}\\")
numesc = sym 'x' *> (chr <$> hexadecimal)
stringl :: Regex String
stringl = catMaybes <$> many (chardirect <|> sym '\\' *> (charesc <|> numesc))
where
chardirect = Just <$> psym (\ c -> isGraphical c && c `notElem` "{}\\")
charesc = Just <$> psym (`elem` "{}\\") <|> Nothing <$ sym '&'
numesc = sym 'x' *> (Just . chr <$> hexadecimal)
chunk :: Regex BL.ByteString
chunk = (\ c p -> f $ c ++ p) <$> many chunkchar <*> padding
where
chunkchar = psym $ \ c -> isAsciiUpper c || isAsciiLower c || isDigit c
|| c == '+' || c == '/'
padc = sym '='
padding = snd <$> withMatched (optional (padc *> optional padc))
f s =
case B64.decode $ BLC.pack s of
Right b -> b
Left e -> assert False BL.empty
toSmaoinValue :: String -> S.Resource -> Either String S.Value
toSmaoinValue s r
| r == V.Smaoin._Boolean =
case match boolean s of
Just b -> Right $ S.Boolean b
Nothing -> Left "Invalid boolean literal"
| r == V.Smaoin._Number =
case (match hex s, match real s, match ratio s) of
(Just n, _, _) ->
Right $ S.Number $ S.RealNumber $ fromInteger n
(Nothing, Just n, _) ->
Right $ S.Number $ S.RealNumber n
(Nothing, Nothing, Just x) ->
case x of
Just n ->
Right $ S.Number $ S.RatioNumber n
Nothing ->
Left "Rational number literal with denominator 0"
(Nothing, Nothing, Nothing) ->
Left "Number literal doesn't match any of the formats"
| r == V.Smaoin._Character =
case match character s of
Just c -> Right $ S.Character c
Nothing -> Left "Invalid character literal"
| r == V.Smaoin._String =
case match stringl s of
Just s' -> Right $ S.String $ T.pack s'
Nothing -> Left "Invalid string literal"
| r == V.Smaoin._Data =
case match chunk s of
Just d -> Right $ S.Chunk d
Nothing -> Left "Invalid data literal"
| otherwise = Right $ S.Generic (T.pack s) r
data Problem = NotTwo | HasGen | NotRes | InvVal
toSmaoinModel
:: Document
-> ([Statement], [Statement], [Statement], [Statement], [S.Statement])
toSmaoinModel doc = g (rights doc) ([], [], [], [], [])
where
isVal (Resource _) = False
isVal (Value _ _) = True
f (Statement (Uid i) (Uid p) [Resource (Uid s), e]) =
case e of
Resource (Uid o) -> Right $ mk $ S.ResourceE $ S.res o
Resource _ -> Left HasGen
Value l (Uid t) ->
case toSmaoinValue l (S.res t) of
Right v -> Right $ mk $ S.ValueE v
Left _ -> Left InvVal
Value l _ -> Left HasGen
where
mk = S.Statement (S.res i) (S.res s) (S.res p)
f s@(Statement i r e)
| length e /= 2 = Left NotTwo
| stmtHasGens s = Left HasGen
| isVal $ head e = Left NotRes
| otherwise = error "Implementation error"
g [] t = t
g (s:ss) (nottwo, hasgen, notres, invval, l) =
case f s of
Right stmt -> g ss (nottwo, hasgen, notres, invval, stmt:l)
Left NotTwo -> g ss (s:nottwo, hasgen, notres, invval, l)
Left HasGen -> g ss (nottwo, s:hasgen, notres, invval, l)
Left NotRes -> g ss (nottwo, hasgen, s:notres, invval, l)
Left InvVal -> g ss (nottwo, hasgen, notres, s:invval, l)