{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Parsing Dhall expressions.
module Dhall.Parser.Expression where

import Control.Applicative (Alternative(..), optional)
import Data.ByteArray.Encoding (Base(..))
import Data.Functor (void)
import Data.Semigroup (Semigroup(..))
import Data.Text (Text)
import Dhall.Core
import Prelude hiding (const, pi)
import Text.Parser.Combinators (choice, try, (<?>))

import qualified Crypto.Hash
import qualified Data.ByteArray.Encoding
import qualified Data.ByteString
import qualified Data.Char
import qualified Data.HashMap.Strict.InsOrd
import qualified Data.Sequence
import qualified Data.Text
import qualified Data.Text.Encoding
import qualified Text.Megaparsec
import qualified Text.Parser.Char

import Dhall.Parser.Combinators
import Dhall.Parser.Token

noted :: Parser (Expr Src a) -> Parser (Expr Src a)
noted parser = do
    before      <- Text.Megaparsec.getPosition
    (tokens, e) <- Text.Megaparsec.match parser
    after       <- Text.Megaparsec.getPosition
    let src₀ = Src before after tokens
    case e of
        Note src₁ _ | src₀ == src₁ -> return e
        _                          -> return (Note src₀ e)

expression :: Parser a -> Parser (Expr Src a)
expression embedded =
    (   noted
        ( choice
            [ alternative0
            , alternative1
            , alternative2
            , alternative3
            , alternative4
            ]
        )
    <|> alternative5
    ) <?> "expression"
  where
    alternative0 = do
        _lambda
        _openParens
        a <- label
        _colon
        b <- expression embedded
        _closeParens
        _arrow
        c <- expression embedded
        return (Lam a b c)

    alternative1 = do
        _if
        a <- expression embedded
        _then
        b <- expression embedded
        _else
        c <- expression embedded
        return (BoolIf a b c)

    alternative2 = do
        _let
        a <- label
        b <- optional (do
            _colon
            expression embedded )
        _equal
        c <- expression embedded
        _in
        d <- expression embedded
        return (Let a b c d)

    alternative3 = do
        _forall
        _openParens
        a <- label
        _colon
        b <- expression embedded
        _closeParens
        _arrow
        c <- expression embedded
        return (Pi a b c)

    alternative4 = do
        a <- try (do a <- operatorExpression embedded; _arrow; return a)
        b <- expression embedded
        return (Pi "_" a b)

    alternative5 = annotatedExpression embedded

annotatedExpression :: Parser a -> Parser (Expr Src a)
annotatedExpression embedded =
    noted
        ( choice
            [ alternative0
            , try alternative1
            , alternative2
            ]
        )
  where
    alternative0 = do
        _merge
        a <- importExpression embedded
        b <- importExpression embedded
        c <- optional (do
            _colon
            applicationExpression embedded )
        return (Merge a b c)

    alternative1 = (do
        _openBracket
        (emptyCollection embedded <|> nonEmptyOptional embedded) )
        <?> "list literal"

    alternative2 = do
        a <- operatorExpression embedded
        b <- optional (do _colon; expression embedded)
        case b of
            Nothing -> return a
            Just c  -> return (Annot a c)

emptyCollection :: Parser a -> Parser (Expr Src a)
emptyCollection embedded = do
    _closeBracket
    _colon
    a <- alternative0 <|> alternative1
    b <- importExpression embedded
    return (a b)
  where
    alternative0 = do
        _List
        return (\a -> ListLit (Just a) empty)

    alternative1 = do
        _Optional
        return (\a -> OptionalLit a empty)

nonEmptyOptional :: Parser a -> Parser (Expr Src a)
nonEmptyOptional embedded = do
    a <- expression embedded
    _closeBracket
    _colon
    _Optional
    b <- importExpression embedded
    return (OptionalLit b (pure a))

operatorExpression :: Parser a -> Parser (Expr Src a)
operatorExpression = importAltExpression

makeOperatorExpression
    :: (Parser a -> Parser (Expr Src a))
    -> Parser ()
    -> (Expr Src a -> Expr Src a -> Expr Src a)
    -> Parser a
    -> Parser (Expr Src a)
makeOperatorExpression subExpression operatorParser operator embedded =
    noted (do
        a <- subExpression embedded
        b <- many (do operatorParser; subExpression embedded)
        return (foldr1 operator (a:b)) )

importAltExpression :: Parser a -> Parser (Expr Src a)
importAltExpression =
    makeOperatorExpression orExpression _importAlt ImportAlt

orExpression :: Parser a -> Parser (Expr Src a)
orExpression =
    makeOperatorExpression plusExpression _or BoolOr

plusExpression :: Parser a -> Parser (Expr Src a)
plusExpression =
    makeOperatorExpression textAppendExpression _plus NaturalPlus

textAppendExpression :: Parser a -> Parser (Expr Src a)
textAppendExpression =
    makeOperatorExpression listAppendExpression _textAppend TextAppend

listAppendExpression :: Parser a -> Parser (Expr Src a)
listAppendExpression =
    makeOperatorExpression andExpression _listAppend ListAppend

andExpression :: Parser a -> Parser (Expr Src a)
andExpression =
    makeOperatorExpression combineExpression _and BoolAnd

combineExpression :: Parser a -> Parser (Expr Src a)
combineExpression =
    makeOperatorExpression preferExpression _combine Combine

preferExpression :: Parser a -> Parser (Expr Src a)
preferExpression =
    makeOperatorExpression combineTypesExpression _prefer Prefer

combineTypesExpression :: Parser a -> Parser (Expr Src a)
combineTypesExpression =
    makeOperatorExpression timesExpression _combineTypes CombineTypes

timesExpression :: Parser a -> Parser (Expr Src a)
timesExpression =
    makeOperatorExpression equalExpression _times NaturalTimes

equalExpression :: Parser a -> Parser (Expr Src a)
equalExpression =
    makeOperatorExpression notEqualExpression _doubleEqual BoolEQ

notEqualExpression :: Parser a -> Parser (Expr Src a)
notEqualExpression =
    makeOperatorExpression applicationExpression _notEqual BoolNE

applicationExpression :: Parser a -> Parser (Expr Src a)
applicationExpression embedded = do
    f <- (do _constructors; return Constructors) <|> return id
    a <- noted (importExpression embedded)
    b <- many (noted (importExpression embedded))
    return (foldl app (f a) b)
  where
    app nL@(Note (Src before _ bytesL) _) nR@(Note (Src _ after bytesR) _) =
        Note (Src before after (bytesL <> bytesR)) (App nL nR)
    app nL nR =
        App nL nR

importExpression :: Parser a -> Parser (Expr Src a)
importExpression embedded = noted (choice [ alternative0, alternative1 ])
  where
    alternative0 = do
        a <- embedded
        return (Embed a)

    alternative1 = selectorExpression embedded

selectorExpression :: Parser a -> Parser (Expr Src a)
selectorExpression embedded = noted (do
    a <- primitiveExpression embedded

    let left  x  e = Field   e x
    let right xs e = Project e xs
    b <- many (try (do _dot; fmap left label <|> fmap right labels))
    return (foldl (\e k -> k e) a b) )

primitiveExpression :: Parser a -> Parser (Expr Src a)
primitiveExpression embedded =
    noted
        ( choice
            [ alternative00
            , alternative01
            , alternative02
            , alternative03
            , alternative04
            , alternative05
            , alternative06
            , alternative37

            , choice
                [ alternative08
                , alternative09
                , alternative10
                , alternative11
                , alternative12
                , alternative13
                , alternative14
                , alternative15
                , alternativeIntegerToDouble
                , alternative16
                , alternative17
                , alternative18
                , alternative19
                , alternative20
                , alternative21
                , alternative22
                , alternative23
                , alternative24
                , alternative25
                , alternative26
                , alternative27
                , alternative28
                , alternative29
                , alternative30
                , alternative31
                , alternative32
                , alternative33
                , alternative34
                , alternative35
                , alternative36
                ] <?> "built-in expression"
            ]
        )
    <|> alternative38
  where
    alternative00 = do
        a <- try doubleLiteral
        return (DoubleLit a)

    alternative01 = do
        a <- try naturalLiteral
        return (NaturalLit a)

    alternative02 = do
        a <- try integerLiteral
        return (IntegerLit a)

    alternative03 = textLiteral embedded

    alternative04 = (do
        _openBrace
        a <- recordTypeOrLiteral embedded
        _closeBrace
        return a ) <?> "record type or literal"

    alternative05 = (do
        _openAngle
        a <- unionTypeOrLiteral embedded
        _closeAngle
        return a ) <?> "union type or literal"

    alternative06 = nonEmptyListLiteral embedded

    alternative08 = do
        _NaturalFold
        return NaturalFold

    alternative09 = do
        _NaturalBuild
        return NaturalBuild

    alternative10 = do
        _NaturalIsZero
        return NaturalIsZero

    alternative11 = do
        _NaturalEven
        return NaturalEven

    alternative12 = do
        _NaturalOdd
        return NaturalOdd

    alternative13 = do
        _NaturalToInteger
        return NaturalToInteger

    alternative14 = do
        _NaturalShow
        return NaturalShow

    alternative15 = do
        _IntegerShow
        return IntegerShow

    alternativeIntegerToDouble = do
        _IntegerToDouble
        return IntegerToDouble

    alternative16 = do
        _DoubleShow
        return DoubleShow

    alternative17 = do
        _ListBuild
        return ListBuild

    alternative18 = do
        _ListFold
        return ListFold

    alternative19 = do
        _ListLength
        return ListLength

    alternative20 = do
        _ListHead
        return ListHead

    alternative21 = do
        _ListLast
        return ListLast

    alternative22 = do
        _ListIndexed
        return ListIndexed

    alternative23 = do
        _ListReverse
        return ListReverse

    alternative24 = do
        _OptionalFold
        return OptionalFold

    alternative25 = do
        _OptionalBuild
        return OptionalBuild

    alternative26 = do
        _Bool
        return Bool

    alternative27 = do
        _Optional
        return Optional

    alternative28 = do
        _Natural
        return Natural

    alternative29 = do
        _Integer
        return Integer

    alternative30 = do
        _Double
        return Double

    alternative31 = do
        _Text
        return Text

    alternative32 = do
        _List
        return List

    alternative33 = do
        _True
        return (BoolLit True)

    alternative34 = do
        _False
        return (BoolLit False)

    alternative35 = do
        _Type
        return (Const Type)

    alternative36 = do
        _Kind
        return (Const Kind)

    alternative37 = do
        a <- identifier
        return (Var a)

    alternative38 = do
        _openParens
        a <- expression embedded
        _closeParens
        return a


doubleQuotedChunk :: Parser a -> Parser (Chunks Src a)
doubleQuotedChunk embedded =
    choice
        [ interpolation
        , unescapedCharacter
        , escapedCharacter
        ]
  where
    interpolation = do
        _ <- Text.Parser.Char.text "${"
        e <- completeExpression embedded
        _ <- Text.Parser.Char.char '}'
        return (Chunks [(mempty, e)] mempty)

    unescapedCharacter = do
        c <- Text.Parser.Char.satisfy predicate
        return (Chunks [] (Data.Text.singleton c))
      where
        predicate c =
                ('\x20' <= c && c <= '\x21'    )
            ||  ('\x23' <= c && c <= '\x5B'    )
            ||  ('\x5D' <= c && c <= '\x10FFFF')

    escapedCharacter = do
        _ <- Text.Parser.Char.char '\\'
        c <- choice
            [ quotationMark
            , dollarSign
            , backSlash
            , forwardSlash
            , backSpace
            , formFeed
            , lineFeed
            , carriageReturn
            , tab
            , unicode
            ]
        return (Chunks [] (Data.Text.singleton c))
      where
        quotationMark = Text.Parser.Char.char '"'

        dollarSign = Text.Parser.Char.char '$'

        backSlash = Text.Parser.Char.char '\\'

        forwardSlash = Text.Parser.Char.char '/'

        backSpace = do _ <- Text.Parser.Char.char 'b'; return '\b'

        formFeed = do _ <- Text.Parser.Char.char 'f'; return '\f'

        lineFeed = do _ <- Text.Parser.Char.char 'n'; return '\n'

        carriageReturn = do _ <- Text.Parser.Char.char 'r'; return '\r'

        tab = do _ <- Text.Parser.Char.char 't'; return '\t'

        unicode = do
            _  <- Text.Parser.Char.char 'u';
            n0 <- hexNumber
            n1 <- hexNumber
            n2 <- hexNumber
            n3 <- hexNumber
            let n = ((n0 * 16 + n1) * 16 + n2) * 16 + n3
            return (Data.Char.chr n)

doubleQuotedLiteral :: Parser a -> Parser (Chunks Src a)
doubleQuotedLiteral embedded = do
    _      <- Text.Parser.Char.char '"'
    chunks <- many (doubleQuotedChunk embedded)
    _      <- Text.Parser.Char.char '"'
    return (mconcat chunks)

singleQuoteContinue :: Parser a -> Parser (Chunks Src a)
singleQuoteContinue embedded =
    choice
        [ escapeSingleQuotes
        , interpolation
        , escapeInterpolation
        , endLiteral
        , unescapedCharacter
        , tab
        , endOfLine
        ]
  where
        escapeSingleQuotes = do
            _ <- "'''" :: Parser Text
            b <- singleQuoteContinue embedded
            return ("''" <> b)

        interpolation = do
            _ <- Text.Parser.Char.text "${"
            a <- completeExpression embedded
            _ <- Text.Parser.Char.char '}'
            b <- singleQuoteContinue embedded
            return (Chunks [(mempty, a)] mempty <> b)

        escapeInterpolation = do
            _ <- Text.Parser.Char.text "''${"
            b <- singleQuoteContinue embedded
            return ("${" <> b)

        endLiteral = do
            _ <- Text.Parser.Char.text "''"
            return mempty

        unescapedCharacter = do
            a <- satisfy predicate
            b <- singleQuoteContinue embedded
            return (Chunks [] a <> b)
          where
            predicate c = '\x20' <= c && c <= '\x10FFFF'

        endOfLine = do
            a <- "\n" <|> "\r\n"
            b <- singleQuoteContinue embedded
            return (Chunks [] a <> b)

        tab = do
            _ <- Text.Parser.Char.char '\t'
            b <- singleQuoteContinue embedded
            return ("\t" <> b)

singleQuoteLiteral :: Parser a -> Parser (Chunks Src a)
singleQuoteLiteral embedded = do
    _ <- Text.Parser.Char.text "''"

    -- This is technically not in the grammar, but it's still equivalent to the
    -- original grammar and an easy way to discard the first character if it's
    -- a newline
    _ <- optional endOfLine

    a <- singleQuoteContinue embedded

    return (dedent a)
  where
    endOfLine =
            void (Text.Parser.Char.char '\n'  )
        <|> void (Text.Parser.Char.text "\r\n")

textLiteral :: Parser a -> Parser (Expr Src a)
textLiteral embedded = (do
    literal <- doubleQuotedLiteral embedded <|> singleQuoteLiteral embedded
    whitespace
    return (TextLit literal) ) <?> "text literal"

recordTypeOrLiteral :: Parser a -> Parser (Expr Src a)
recordTypeOrLiteral embedded =
    choice
        [ alternative0
        , alternative1
        , alternative2
        ]
  where
    alternative0 = do
        _equal
        return (RecordLit Data.HashMap.Strict.InsOrd.empty)

    alternative1 = nonEmptyRecordTypeOrLiteral embedded

    alternative2 = return (Record Data.HashMap.Strict.InsOrd.empty)

nonEmptyRecordTypeOrLiteral :: Parser a -> Parser (Expr Src a)
nonEmptyRecordTypeOrLiteral embedded = do
    a <- label

    let nonEmptyRecordType = do
            _colon
            b <- expression embedded
            e <- many (do
                _comma
                c <- label
                _colon
                d <- expression embedded
                return (c, d) )
            m <- toMap ((a, b) : e)
            return (Record m)

    let nonEmptyRecordLiteral = do
            _equal
            b <- expression embedded
            e <- many (do
                _comma
                c <- label
                _equal
                d <- expression embedded
                return (c, d) )
            m <- toMap ((a, b) : e)
            return (RecordLit m)

    nonEmptyRecordType <|> nonEmptyRecordLiteral

unionTypeOrLiteral :: Parser a -> Parser (Expr Src a)
unionTypeOrLiteral embedded =
        nonEmptyUnionTypeOrLiteral embedded
    <|> return (Union Data.HashMap.Strict.InsOrd.empty)

nonEmptyUnionTypeOrLiteral :: Parser a -> Parser (Expr Src a)
nonEmptyUnionTypeOrLiteral embedded = do
    (f, kvs) <- loop
    m <- toMap kvs
    return (f m)
  where
    loop = do
        a <- label

        let alternative0 = do
                _equal
                b <- expression embedded
                kvs <- many (do
                    _bar
                    c <- label
                    _colon
                    d <- expression embedded
                    return (c, d) )
                return (UnionLit a b, kvs)

        let alternative1 = do
                _colon
                b <- expression embedded

                let alternative2 = do
                        _bar
                        (f, kvs) <- loop
                        return (f, (a, b):kvs)

                let alternative3 = return (Union, [(a, b)])

                alternative2 <|> alternative3

        alternative0 <|> alternative1

nonEmptyListLiteral :: Parser a -> Parser (Expr Src a)
nonEmptyListLiteral embedded = (do
    _openBracket
    a <- expression embedded
    b <- many (do _comma; expression embedded)
    _closeBracket
    return (ListLit Nothing (Data.Sequence.fromList (a:b))) ) <?> "list literal"

completeExpression :: Parser a -> Parser (Expr Src a)
completeExpression embedded = do
    whitespace
    expression embedded

env :: Parser ImportType
env = do
    _ <- Text.Parser.Char.text "env:"
    a <- (alternative0 <|> alternative1)
    whitespace
    return (Env a)
  where
    alternative0 = bashEnvironmentVariable

    alternative1 = do
        _ <- Text.Parser.Char.char '"'
        a <- posixEnvironmentVariable
        _ <- Text.Parser.Char.char '"'
        return a

localRaw :: Parser ImportType
localRaw =
    choice
        [ parentPath
        , herePath
        , homePath
        , try absolutePath
        ]
  where
    parentPath = do
        _    <- ".." :: Parser Text
        File (Directory segments) final <- file_

        return (Local Here (File (Directory (segments ++ [".."])) final))

    herePath = do
        _    <- "." :: Parser Text
        file <- file_

        return (Local Here file)

    homePath = do
        _    <- "~" :: Parser Text
        file <- file_

        return (Local Home file)

    absolutePath = do
        file <- file_

        return (Local Absolute file)

local :: Parser ImportType
local = do
    a <- localRaw
    whitespace
    return a

http :: Parser ImportType
http = do
    (prefix, path, suffix) <- httpRaw
    whitespace
    headers <- optional (do
        _using
        (importHashed_ <|> (_openParens *> importHashed_ <* _closeParens)) )
    return (URL prefix path suffix headers)

missing :: Parser ImportType
missing = do
  _missing
  return Missing

importType_ :: Parser ImportType
importType_ = choice [ local, http, env, missing ]

importHashed_ :: Parser ImportHashed
importHashed_ = do
    importType <- importType_
    hash       <- optional importHash_
    return (ImportHashed {..})
  where
    importHash_ = do
        _ <- Text.Parser.Char.text "sha256:"
        text <- count 64 (satisfy hexdig <?> "hex digit")
        whitespace
        let strictBytes16 = Data.Text.Encoding.encodeUtf8 text
        strictBytes <- case Data.ByteArray.Encoding.convertFromBase Base16 strictBytes16 of
            Left  string      -> fail string
            Right strictBytes -> return (strictBytes :: Data.ByteString.ByteString)
        case Crypto.Hash.digestFromByteString strictBytes of
          Nothing -> fail "Invalid sha256 hash"
          Just h  -> pure h

import_ :: Parser Import
import_ = (do
    importHashed <- importHashed_
    importMode   <- alternative <|> pure Code
    return (Import {..}) ) <?> "import"
  where
    alternative = do
        _as
        _Text
        return RawText

-- | Similar to `Dhall.Core.renderChunks` except that this doesn't bother to
-- render interpolated expressions to avoid a `Buildable a` constraint.  The
-- interpolated contents are not necessary for computing how much to dedent a
-- multi-line string
--
-- This also doesn't include the surrounding quotes since they would interfere
-- with the whitespace detection
renderChunks :: Chunks s a -> Text
renderChunks (Chunks a b) = foldMap renderChunk a <> b
  where
    renderChunk :: (Text, Expr s a) -> Text
    renderChunk (c, _) = c <> "${x}"

dedent :: Chunks Src a -> Chunks Src a
dedent chunks0 = process chunks0
  where
    text0 = renderChunks chunks0

    lines0 = Data.Text.lines text0

    isEmpty = Data.Text.all Data.Char.isSpace

    nonEmptyLines = filter (not . isEmpty) lines0

    indentLength line =
        Data.Text.length (Data.Text.takeWhile Data.Char.isSpace line)

    shortestIndent = case nonEmptyLines of
        [] -> 0
        _  -> minimum (map indentLength nonEmptyLines)

    -- The purpose of this complicated `trimBegin`/`trimContinue` is to ensure
    -- that we strip leading whitespace without stripping whitespace after
    -- variable interpolation

    -- This is the trim function we use up until the first variable
    -- interpolation, dedenting all lines
    trimBegin =
          Data.Text.intercalate "\n"
        . map (Data.Text.drop shortestIndent)
        . Data.Text.splitOn "\n"

    -- This is the trim function we use after each variable interpolation
    -- where we indent each line except the first line (since it's not a true
    -- beginning of a line)
    trimContinue text = Data.Text.intercalate "\n" lines_
      where
        lines_ = case Data.Text.splitOn "\n" text of
            []   -> []
            l:ls -> l:map (Data.Text.drop shortestIndent) ls

    -- This is the loop that drives whether or not to use `trimBegin` or
    -- `trimContinue`.  We call this function with `trimBegin`, but after the
    -- first interpolation we switch permanently to `trimContinue`
    process (Chunks ((x0, y0):xys) z) =
        Chunks ((trimBegin x0, y0):xys') (trimContinue z)
      where
        xys' = [ (trimContinue x, y) | (x, y) <- xys ]
    process (Chunks [] z) =
        Chunks [] (trimBegin z)