{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings          #-}

-- | This module contains Dhall's parsing logic

module Dhall.Parser (
    -- * Utilities
      exprFromText

    -- * Parsers
    , expr

    -- * Types
    , Src(..)
    , ParseError(..)
    , Parser(..)
    ) where

import Control.Applicative (Alternative(..), optional)
import Control.Exception (Exception)
import Control.Monad (MonadPlus)
import Data.ByteString (ByteString)
import Data.Map (Map)
import Data.Monoid ((<>))
import Data.Sequence (ViewL(..))
import Data.Text.Buildable (Buildable(..))
import Data.Text.Lazy (Text)
import Data.Text.Lazy.Builder (Builder)
import Data.Typeable (Typeable)
import Data.Vector (Vector)
import Dhall.Core (Const(..), Expr(..), Path(..), Var(..))
import Filesystem.Path (FilePath)
import Prelude hiding (FilePath, const, pi)
import Text.PrettyPrint.ANSI.Leijen (Doc)
import Text.Parser.Combinators (choice, try, (<?>))
import Text.Parser.Expression (Assoc(..), Operator(..))
import Text.Parser.Token (IdentifierStyle(..), TokenParsing(..))
import Text.Parser.Token.Highlight (Highlight(..))
import Text.Parser.Token.Style (CommentStyle(..))
import Text.Trifecta
    (CharParsing, DeltaParsing, MarkParsing, Parsing, Result(..))
import Text.Trifecta.Delta (Delta)

import qualified Data.Char
import qualified Data.HashSet
import qualified Data.Map
import qualified Data.ByteString
import qualified Data.ByteString.Lazy
import qualified Data.List
import qualified Data.Sequence
import qualified Data.Text
import qualified Data.Text.Lazy
import qualified Data.Text.Lazy.Builder
import qualified Data.Text.Lazy.Encoding
import qualified Data.Vector
import qualified Dhall.Core
import qualified Filesystem.Path.CurrentOS
import qualified Text.Parser.Char
import qualified Text.Parser.Combinators
import qualified Text.Parser.Expression
import qualified Text.Parser.Token
import qualified Text.Parser.Token.Style
import qualified Text.PrettyPrint.ANSI.Leijen
import qualified Text.Trifecta
import qualified Text.Trifecta.Combinators
import qualified Text.Trifecta.Delta

-- | Source code extract
data Src = Src Delta Delta ByteString deriving (Show)

instance Buildable Src where
    build (Src begin _ bytes) =
            build text <> "\n"
        <>  "\n"
        <>  build (show (Text.PrettyPrint.ANSI.Leijen.pretty begin))
        <>  "\n"
      where
        bytes' = Data.ByteString.Lazy.fromStrict bytes

        text = Data.Text.Lazy.strip (Data.Text.Lazy.Encoding.decodeUtf8 bytes')

{-| A `Parser` that is almost identical to
    @"Text.Trifecta".`Text.Trifecta.Parser`@ except treating Haskell-style
    comments as whitespace
-}
newtype Parser a = Parser { unParser :: Text.Trifecta.Parser a }
    deriving
    (   Functor
    ,   Applicative
    ,   Monad
    ,   Alternative
    ,   MonadPlus
    ,   Parsing
    ,   CharParsing
    ,   DeltaParsing
    ,   MarkParsing Delta
    )

instance TokenParsing Parser where
    someSpace =
        Text.Parser.Token.Style.buildSomeSpaceParser 
            (Parser someSpace)
            Text.Parser.Token.Style.haskellCommentStyle

    nesting (Parser m) = Parser (nesting m)

    semi = Parser semi

    highlight h (Parser m) = Parser (highlight h m)

    token parser = do
        r <- parser
        Text.Parser.Token.whiteSpace
        return r

identifierStyle :: IdentifierStyle Parser
identifierStyle = IdentifierStyle
    { _styleName     = "dhall"
    , _styleStart    =
        Text.Parser.Char.oneOf (['A'..'Z'] ++ ['a'..'z'] ++ "_")
    , _styleLetter   =
        Text.Parser.Char.oneOf (['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "_/")
    , _styleReserved = Data.HashSet.fromList
        [ "let"
        , "in"
        , "Type"
        , "Kind"
        , "forall"
        , "Bool"
        , "True"
        , "False"
        , "merge"
        , "if"
        , "then"
        , "else"
        , "Natural"
        , "Natural/fold"
        , "Natural/build"
        , "Natural/isZero"
        , "Natural/even"
        , "Natural/odd"
        , "Integer"
        , "Double"
        , "Text"
        , "List"
        , "List/build"
        , "List/fold"
        , "List/length"
        , "List/head"
        , "List/last"
        , "List/indexed"
        , "List/reverse"
        , "Optional"
        , "Optional/fold"
        ]
    , _styleHighlight         = Identifier
    , _styleReservedHighlight = ReservedIdentifier
    }

noted :: Parser (Expr Src a) -> Parser (Expr Src a)
noted parser = do
    before        <- Text.Trifecta.position
    (expr, bytes) <- Text.Trifecta.slicedWith (,) parser
    after         <- Text.Trifecta.position
    return (Note (Src before after bytes) expr)

toMap :: [(Text, a)] -> Parser (Map Text a)
toMap kvs = do
    let adapt (k, v) = (k, pure v)
    let m = Data.Map.fromListWith (<|>) (fmap adapt kvs)
    let action k vs = case Data.Sequence.viewl vs of
            EmptyL  -> empty
            v :< vs' ->
                if null vs'
                then pure v
                else
                    Text.Parser.Combinators.unexpected
                        ("duplicate field: " ++ Data.Text.Lazy.unpack k)
    Data.Map.traverseWithKey action m

reserve :: String -> Parser ()
reserve string = do
    _ <- Text.Parser.Token.reserve identifierStyle string
    return ()

symbol :: String -> Parser ()
symbol string = do
    _ <- Text.Parser.Token.symbol string
    return ()

stringLiteral :: Parser Builder
stringLiteral = Text.Parser.Token.stringLiteral <|> doubleSingleQuoteString

doubleSingleQuoteString :: Parser Builder
doubleSingleQuoteString = do
    builder <- Text.Parser.Token.token p0
    return (process builder)
  where
    process =
          Data.Text.Lazy.Builder.fromLazyText
        . Data.Text.Lazy.unlines
        . trim
        . Data.Text.Lazy.lines
        . Data.Text.Lazy.Builder.toLazyText

    trim lines_ = map (Data.Text.Lazy.drop shortestIndent) lines_
      where
        isEmpty = Data.Text.Lazy.all Data.Char.isSpace

        nonEmptyLines = filter (not . isEmpty) lines_

        indentLength line =
            Data.Text.Lazy.length
                (Data.Text.Lazy.takeWhile Data.Char.isSpace line)

        shortestIndent = case nonEmptyLines of
            [] -> 0
            _  -> minimum (map indentLength nonEmptyLines)

    p0 = do
        Text.Parser.Char.string "''"
        p1

    p1 = p2 <|> p3 <|> p4 <|> p5

    p2 = do
        Text.Parser.Char.text "'''"
        s1 <- p1
        return ("''" <> s1)

    p3 = do
        Text.Parser.Char.text "''"
        return ""

    p4 = do
        s0 <- Text.Parser.Char.text "'"
        s1 <- p1
        return (Data.Text.Lazy.Builder.fromText s0 <> s1)

    p5 = do
        s0 <- some (Text.Trifecta.satisfy (/= '\''))
        s1 <- p1
        return (Data.Text.Lazy.Builder.fromString s0 <> s1)

lambda :: Parser ()
lambda = symbol "\\" <|> symbol "λ"

pi :: Parser ()
pi = reserve "forall" <|> reserve "∀"

arrow :: Parser ()
arrow = symbol "->" <|> symbol "→"

combine :: Parser ()
combine = symbol "/\\" <|> symbol "∧"

label :: Parser Text
label = Text.Parser.Token.ident identifierStyle <?> "label"

-- | Parser for a top-level Dhall expression
expr :: Parser (Expr Src Path)
expr = exprA

exprA :: Parser (Expr Src Path)
exprA = do
    a <- exprB

    let exprA0 = do
            symbol ":"
            b <- exprA
            return (Annot a b)

    let exprA1 = pure a

    exprA0 <|> exprA1

exprB :: Parser (Expr Src Path)
exprB = choice
    [   noted      exprB0
    ,   noted      exprB1
    ,   noted      exprB3
    ,   noted      exprB5
    ,   noted      exprB6
    ,   noted      exprB7
    ,   noted (try exprB2)
    ,              exprB8
    ]
  where
    exprB0 = do
        lambda
        symbol "("
        a <- label
        symbol ":"
        b <- exprA
        symbol ")"
        arrow
        c <- exprB
        return (Lam a b c)

    exprB1 = do
        reserve "if"
        a <- exprA
        reserve "then"
        b <- exprB
        reserve "else"
        c <- exprC
        return (BoolIf a b c)

    exprB2 = do
        a <- exprC
        arrow
        b <- exprB
        return (Pi "_" a b)

    exprB3 = do
        pi
        symbol "("
        a <- label
        symbol ":"
        b <- exprA
        symbol ")"
        arrow
        c <- exprB
        return (Pi a b c)

    exprB5 = do
        reserve "let"
        a <- label
        b <- optional (do
            symbol ":"
            exprA )
        symbol "="
        c <- exprA
        reserve "in"
        d <- exprB
        return (Let a b c d)

    exprB6 = do
        symbol "["
        a <- elems
        symbol "]"
        symbol ":"
        b <- listLike
        c <- exprE
        return (b c (Data.Vector.fromList a))

    exprB7 = do
        reserve "merge"
        a <- exprE
        b <- exprE
        symbol ":"
        c <- exprD
        return (Merge a b c)

    exprB8 = exprC

listLike :: Parser (Expr Src Path -> Vector (Expr Src Path) -> Expr Src Path)
listLike =
    (   listLike0
    <|> listLike1
    )
  where
    listLike0 = do
        reserve "List"
        return ListLit

    listLike1 = do
        reserve "Optional"
        return OptionalLit

exprC :: Parser (Expr Src Path)
exprC = exprC0
  where
    chain pA pOp op pB = noted (do
        a <- pA
        try (do pOp <?> "operator"; b <- pB; return (op a b)) <|> pure a )

    exprC0 = chain exprC1 (symbol "||") BoolOr       exprC0
    exprC2 = chain exprC3 (symbol "++") TextAppend   exprC2
    exprC1 = chain exprC2 (symbol "+" ) NaturalPlus  exprC1
    exprC3 = chain exprC4 (symbol "&&") BoolAnd      exprC3
    exprC4 = chain exprC5  combine      Combine      exprC4
    exprC5 = chain exprC6 (symbol "*" ) NaturalTimes exprC5
    exprC6 = chain exprC7 (symbol "==") BoolEQ       exprC6
    exprC7 = chain exprD  (symbol "!=") BoolNE       exprC7

-- We can't use left-recursion to define `exprD` otherwise the parser will
-- loop infinitely. However, I'd still like to use left-recursion in the
-- definition because left recursion greatly simplifies the use of `noted`.  The
-- work-around is to parse in two phases:
--
-- * First, parse to count how many arguments the function is applied to
-- * Second, restart the parse using left recursion bounded by the number of
--   arguments
exprD :: Parser (Expr Src Path)
exprD = do
    es <- some (noted (try exprE))
    let app nL@(Note (Src before _ bytesL) eL) nR@(Note (Src _ after bytesR) eR) =
            Note (Src before after (bytesL <> bytesR)) (App nL nR)
        app _ _ = Dhall.Core.internalError
            ("Dhall.Parser.exprD: foldl1 app (" <> Data.Text.pack (show es) <> ")")
    return (Data.List.foldl1 app es)

exprE :: Parser (Expr Src Path)
exprE = noted (do
    a <- exprF
    b <- many (try (do
        symbol "."
        label ))
    return (Data.List.foldl Field a b) )

exprF :: Parser (Expr Src Path)
exprF = choice
    [   noted (try exprF26)
    ,   noted (try exprF25)
    ,   noted      exprF24
    ,   noted      exprF27
    ,   noted (try exprF28)
    ,   noted      exprF29
    ,   noted (try exprF30)
    ,   noted      exprF31
    ,   noted      exprF32
    ,   (choice
            [   noted      exprF03
            ,   noted      exprF04
            ,   noted      exprF05
            ,   noted      exprF06
            ,   noted      exprF07
            ,   noted      exprF12
            ,   noted      exprF13
            ,   noted      exprF14
            ,   noted      exprF15
            ,   noted      exprF16
            ,   noted      exprF17
            ,   noted      exprF18
            ,   noted      exprF20
            ,   noted      exprF21
            ,   noted      exprF19
            ,   noted      exprF02
            ,   noted      exprF08
            ,   noted      exprF09
            ,   noted      exprF10
            ,   noted      exprF11
            ,   noted      exprF22
            ,   noted      exprF23
            ,   noted      exprF01
            ]
        ) <?> "built-in value"
    ,   noted      exprF00
    ,              exprF33
    ]
  where
    exprF00 = do
        a <- var
        return (Var a)

    exprF01 = do
        a <- const
        return (Const a)

    exprF02 = do
        reserve "Natural"
        return Natural

    exprF03 = do
        reserve "Natural/fold"
        return NaturalFold

    exprF04 = do
        reserve "Natural/build"
        return NaturalBuild

    exprF05 = do
        reserve "Natural/isZero"
        return NaturalIsZero

    exprF06 = do
        reserve "Natural/even"
        return NaturalEven

    exprF07 = do
        reserve "Natural/odd"
        return NaturalOdd

    exprF08 = do
        reserve "Integer"
        return Integer

    exprF09 = do
        reserve "Double"
        return Double

    exprF10 = do
        reserve "Text"
        return Text

    exprF11 = do
        reserve "List"
        return List

    exprF12 = do
        reserve "List/build"
        return ListBuild

    exprF13 = do
        reserve "List/fold"
        return ListFold

    exprF14 = do
        reserve "List/length"
        return ListLength

    exprF15 = do
        reserve "List/head"
        return ListHead

    exprF16 = do
        reserve "List/last"
        return ListLast

    exprF17 = do
        reserve "List/indexed"
        return ListIndexed

    exprF18 = do
        reserve "List/reverse"
        return ListReverse

    exprF19 = do
        reserve "Optional"
        return Optional

    exprF20 = do
        reserve "Optional/fold"
        return OptionalFold

    exprF21 = do
        reserve "Bool"
        return Bool

    exprF22 = do
        reserve "True"
        return (BoolLit True)

    exprF23 = do
        reserve "False"
        return (BoolLit False)

    exprF24 = do
        a <- Text.Parser.Token.integer
        return (IntegerLit a)

    exprF25 = (do
        Text.Parser.Char.char '+'
        a <- Text.Parser.Token.natural
        return (NaturalLit (fromIntegral a)) ) <?> "natural"

    exprF26 = do
        sign <-  fmap (\_ -> negate) (Text.Parser.Char.char '-')
             <|> fmap (\_ -> id    ) (Text.Parser.Char.char '+')
             <|> pure id
        a <- Text.Parser.Token.double
        return (DoubleLit (sign a))

    exprF27 = do
        a <- stringLiteral
        return (TextLit a)

    exprF28 = record <?> "record type"

    exprF29 = recordLit <?> "record literal"

    exprF30 = union <?> "union type"

    exprF31 = unionLit <?> "union literal"

    exprF32 = do
        a <- import_ <?> "import"
        return (Embed a)

    exprF33 = do
        symbol "("
        a <- exprA
        symbol ")"
        return a

const :: Parser Const
const = const0
    <|> const1
  where
    const0 = do
        reserve "Type"
        return Type

    const1 = do
        reserve "Kind"
        return Kind

var :: Parser Var
var = do
    a <- label
    m <- optional (do
        symbol "@"
        Text.Parser.Token.natural )
    let b = case m of
            Just b  -> b
            Nothing -> 0
    return (V a b)

elems :: Parser [Expr Src Path]
elems = Text.Parser.Combinators.sepBy exprA (symbol ",")

recordLit :: Parser (Expr Src Path)
recordLit =
        recordLit0
    <|> recordLit1
  where
    recordLit0 = do
        symbol "{=}"
        return (RecordLit (Data.Map.fromList []))

    recordLit1 = do
        symbol "{"
        a <- fieldValues
        b <- toMap a
        symbol "}"
        return (RecordLit b)

fieldValues :: Parser [(Text, Expr Src Path)]
fieldValues =
    Text.Parser.Combinators.sepBy1 fieldValue (symbol ",")

fieldValue :: Parser (Text, Expr Src Path)
fieldValue = do
    a <- label
    symbol "="
    b <- exprA
    return (a, b)

record :: Parser (Expr Src Path)
record = do
    symbol "{"
    a <- fieldTypes
    b <- toMap a
    symbol "}"
    return (Record b)

fieldTypes :: Parser [(Text, Expr Src Path)]
fieldTypes =
    Text.Parser.Combinators.sepBy fieldType (symbol ",")

fieldType :: Parser (Text, Expr Src Path)
fieldType = do
    a <- label
    symbol ":"
    b <- exprA
    return (a, b)

union :: Parser (Expr Src Path)
union = do
    symbol "<"
    a <- alternativeTypes
    b <- toMap a
    symbol ">"
    return (Union b)

alternativeTypes :: Parser [(Text, Expr Src Path)]
alternativeTypes =
    Text.Parser.Combinators.sepBy alternativeType (symbol "|")

alternativeType :: Parser (Text, Expr Src Path)
alternativeType = do
    a <- label
    symbol ":"
    b <- exprA
    return (a, b)

unionLit :: Parser (Expr Src Path)
unionLit =
        try unionLit0
    <|>     unionLit1
  where
    unionLit0 = do
        symbol "<"
        a <- label
        symbol "="
        b <- exprA
        symbol ">"
        return (UnionLit a b Data.Map.empty)

    unionLit1 = do
        symbol "<"
        a <- label
        symbol "="
        b <- exprA
        symbol "|"
        c <- alternativeTypes
        d <- toMap c
        symbol ">"
        return (UnionLit a b d)

import_ :: Parser Path
import_ = do
    a <- import0 <|> import1
    Text.Parser.Token.whiteSpace
    return a
  where
    import0 = do
        a <- file
        return (File a)

    import1 = do
        a <- url
        return (URL a)

file :: Parser FilePath
file =  try (token file0)
    <|>      token file1
    <|>      token file2
  where
    file0 = do
        a <- Text.Parser.Char.string "/"
        b <- many (Text.Parser.Char.satisfy (not . Data.Char.isSpace))
        case b of
            '\\':_ -> empty -- So that "/\" parses as the operator and not a path
            _      -> return ()
        return (Filesystem.Path.CurrentOS.decodeString (a <> b))

    file1 = do
        a <- Text.Parser.Char.string "./"
        b <- many (Text.Parser.Char.satisfy (not . Data.Char.isSpace))
        return (Filesystem.Path.CurrentOS.decodeString (a <> b))

    file2 = do
        a <- Text.Parser.Char.string "../"
        b <- many (Text.Parser.Char.satisfy (not . Data.Char.isSpace))
        return (Filesystem.Path.CurrentOS.decodeString (a <> b))

url :: Parser Text
url =   try url0
    <|> url1
  where
    url0 = do
        a <- Text.Parser.Char.string "https://"
        b <- many (Text.Parser.Char.satisfy (not . Data.Char.isSpace))
        return (Data.Text.Lazy.pack (a <> b))

    url1 = do
        a <- Text.Parser.Char.string "http://"
        b <- many (Text.Parser.Char.satisfy (not . Data.Char.isSpace))
        return (Data.Text.Lazy.pack (a <> b))

-- | A parsing error
newtype ParseError = ParseError Doc deriving (Typeable)

instance Show ParseError where
    show (ParseError doc) = show doc

instance Exception ParseError

-- | Parse an expression from `Text` containing a Dhall program
exprFromText :: Delta -> Text -> Either ParseError (Expr Src Path)
exprFromText delta text = case result of
    Success r       -> Right r
    Failure errInfo -> Left (ParseError (Text.Trifecta._errDoc errInfo))
  where
    string = Data.Text.Lazy.unpack text

    parser = unParser (do
        Text.Parser.Token.whiteSpace
        r <- exprA
        Text.Parser.Combinators.eof
        return r )

    result = Text.Trifecta.parseString parser delta string