{- This file is part of language-kort.
 -
 - Written in 2015 by fr33domlover <fr33domlover@rel4tion.org>.
 -
 - ♡ Copying is an act of love. Please copy, reuse and share.
 -
 - The author(s) have dedicated all copyright and related and neighboring
 - rights to this software to the public domain worldwide. This software is
 - distributed without any warranty.
 -
 - You should have received a copy of the CC0 Public Domain Dedication along
 - with this software. If not, see
 - <http://creativecommons.org/publicdomain/zero/1.0/>.
 -}

module Language.Kort.Parser
    ( parseString
    , parseText
    , parseFile
    , toSmaoinValue
    , toSmaoinModel
    )
where

import Control.Exception (assert)
import qualified Data.ByteString.Base64.Lazy as B64
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import Data.Char
import Data.Either (rights)
import Data.Maybe (catMaybes)
import Data.Position (Advance, commonAdvance)
import qualified Data.Smaoin as S
import qualified Data.Smaoin.Vocabulary.Smaoin as V.Smaoin
import Data.Ratio ((%))
import qualified Data.Text.Lazy as T
import Language.Kort.Types
import Language.Kort.UidGen
import Text.Razom.Char
import Text.Razom.Lexer
import Text.Razom.Number
import Text.Razom.Uid
import Text.Razom.Value
import Text.Razom.Types
import Text.Regex.Applicative

value :: Regex Element
value = Value <$> literal <* string "::" <*> resource

resource :: Regex Resource
resource = Uid <$> uid <|> f <$> generator
    where
    f s = if null s then UGenerator else LGenerator s

element :: Regex Element
element = Resource <$> resource <|> value

statement :: Regex Statement
statement = Statement <$> resource <* sep <*> resource <*> elements
    where
    sep      = some $ sym '\t'
    elements = some $ sep *> element

comment :: Regex Comment
comment = string "--" *> many (sym '-') *> many cspace *> many cchar
    where
    cspace = psym $ \ c -> isSpace c && c /= '\n'
    cchar  = psym $ \ c -> isGraphical c || c == '\t'

line :: Regex Line
line = (Left <$> comment <|> Right <$> statement) <* sym '\n'

adv :: Advance Char
adv = commonAdvance 8 False True False False

-- | Parse Kort source text from a 'String'.
parseString :: String -> ParseResult
parseString = tokenizeString adv line

-- | Parse Kort source text from 'T.Text'.
parseText :: T.Text -> ParseResult
parseText = tokenizeText adv line

-- | Parse Kort source text read from a file.
parseFile :: FilePath -> IO ParseResult
parseFile = tokenizeFile adv line

-- | Matches a boolean Smaoin literal body and returns the value.
boolean :: Regex Bool
boolean = True <$ string "[x]" <|> False <$ string "[_]"

-- | Matches a Smaoin hex number literal body and returns the value.
hex :: Regex Integer
hex = string "0x" *> hexadecimal

-- | Matches a Smaoin real number literal body and returns the value.
real :: Regex S.RealNum
real = signed $ (\ b e -> b * S.realnum 1 e) <$> realbase <*> realexp
    where
    realbase = f <$> some digit <*> optional (sym '.' *> some digit)
    f ns Nothing   = fromDigits ns []
    f ns (Just fs) = fromDigits ns fs
    realexp = g <$> optional (sym '$' *> signed decimal)
    g Nothing  = 0
    g (Just n) = n

-- | Matches a Smaoin rational number literal body and returns 'Just' the
-- value. If the matched denominator is 0, it returns 'Nothing'.
ratio :: Regex (Maybe S.Rational)
ratio = f <$> signed decimal <*> (sym '/' *> decimal)
    where
    f _ 0 = Nothing
    f n d = Just $ n % d

-- | Matches a Smaoin character literal body and returns the value.
character :: Regex Char
character = chardirect <|> sym '\\' *> (charesc <|> numesc)
    where
    chardirect = psym $ \ c -> isGraphical c && c `notElem` "{}\\"
    charesc = psym (`elem` "{}\\")
    numesc = sym 'x' *> (chr <$> hexadecimal)

-- | Matches a Smaoin string literal body and returns the value.
stringl :: Regex String
stringl = catMaybes <$> many (chardirect <|> sym '\\' *> (charesc <|> numesc))
    where
    chardirect = Just <$> psym (\ c -> isGraphical c && c `notElem` "{}\\")
    charesc = Just <$> psym (`elem` "{}\\") <|> Nothing <$ sym '&'
    numesc = sym 'x' *> (Just . chr <$> hexadecimal)

-- | Matches a Smaoin data base64 encoded literal body, and returns the decoded
-- value.
chunk :: Regex BL.ByteString
chunk = (\ c p -> f $ c ++ p) <$> many chunkchar <*> padding
    where
    chunkchar = psym $ \ c -> isAsciiUpper c || isAsciiLower c || isDigit c
                           || c == '+' || c == '/'
    padc = sym '='
    padding = snd <$> withMatched (optional (padc *> optional padc))
    -- The regex is supposed to ensure we got a valid base64 string, so
    -- decoding must succeed. If it ever doesn't, there's an implementation
    -- error here. Therefore, use an assertion to signal a failure.
    f s =
        case B64.decode $ BLC.pack s of
            Right b -> b
            Left e  -> assert False BL.empty

-- | Parse a Kort literal into a typed value. Return an error message if the
-- literal body format doesn't match the type.
toSmaoinValue :: String -> S.Resource -> Either String S.Value
toSmaoinValue s r
    | r == V.Smaoin._Boolean   =
        case match boolean s of
            Just b  -> Right $ S.Boolean b
            Nothing -> Left "Invalid boolean literal"
    | r == V.Smaoin._Number    =
        case (match hex s, match real s, match ratio s) of
            (Just n, _, _)              ->
                Right $ S.Number $ S.RealNumber $ fromInteger n
            (Nothing, Just n, _)        ->
                Right $ S.Number $ S.RealNumber n
            (Nothing, Nothing, Just x)  ->
                case x of
                    Just n  ->
                        Right $ S.Number $ S.RatioNumber n
                    Nothing ->
                        Left "Rational number literal with denominator 0"
            (Nothing, Nothing, Nothing) ->
                Left "Number literal doesn't match any of the formats"
    | r == V.Smaoin._Character =
        case match character s of
            Just c  -> Right $ S.Character c
            Nothing -> Left "Invalid character literal"
    | r == V.Smaoin._String    =
        case match stringl s of
            Just s' -> Right $ S.String $ T.pack s'
            Nothing -> Left "Invalid string literal"
    | r == V.Smaoin._Data      =
        case match chunk s of
            Just d  -> Right $ S.Chunk d
            Nothing -> Left "Invalid data literal"
    | otherwise                = Right $ S.Generic (T.pack s) r

data Problem = NotTwo | HasGen | NotRes | InvVal

-- | Convert from the Kort document model to a list of Smaoin statements.
--
-- This function doesn't handle Uid generation or other shortcuts. It just
-- collects statements invalid for Smaoin and statements with generators into
-- separate lists. If you want Uids to be generated, use the functions in
-- "Language.Kort.UidGen".
--
-- 5 lists are returned:
--
-- (1) Kort statements which don't have exactly 2 elements
-- (2) Kort statements with 2 elements, but containing generators
-- (3) Kort statements with 2 elements, but the 1st is a value, not a resource
-- (4) Kort statement with 2 elements, but invalid object value
-- (5) Successfully converted Smaoin statements
--
-- Kort comments are ignored and dropped.
toSmaoinModel
    :: Document
    -> ([Statement], [Statement], [Statement], [Statement], [S.Statement])
toSmaoinModel doc = g (rights doc) ([], [], [], [], [])
    where
    isVal (Resource _) = False
    isVal (Value _ _)  = True
    f (Statement (Uid i) (Uid p) [Resource (Uid s), e]) =
        case e of
            Resource (Uid o) -> Right $ mk $ S.ResourceE $ S.res o
            Resource _       -> Left HasGen
            Value l (Uid t)  ->
                case toSmaoinValue l (S.res t) of
                    Right v     -> Right $ mk $ S.ValueE v
                    Left _      -> Left InvVal
            Value l _        -> Left HasGen
        where
        mk = S.Statement (S.res i) (S.res s) (S.res p)
    f s@(Statement i r e)
        | length e /= 2  = Left NotTwo
        | stmtHasGens s  = Left HasGen
        | isVal $ head e = Left NotRes
        | otherwise      = error "Implementation error"
    g [] t = t
    g (s:ss) (nottwo, hasgen, notres, invval, l) =
        case f s of
            Right stmt  -> g ss (nottwo, hasgen, notres, invval, stmt:l)
            Left NotTwo -> g ss (s:nottwo, hasgen, notres, invval, l)
            Left HasGen -> g ss (nottwo, s:hasgen, notres, invval, l)
            Left NotRes -> g ss (nottwo, hasgen, s:notres, invval, l)
            Left InvVal -> g ss (nottwo, hasgen, notres, s:invval, l)