{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -- | Parsing Dhall expressions. module Dhall.Parser.Expression where import Control.Applicative (Alternative(..), optional) import Data.ByteArray.Encoding (Base(..)) import Data.Functor (void) import Data.Semigroup (Semigroup(..)) import Data.Text (Text) import Dhall.Core import Prelude hiding (const, pi) import Text.Parser.Combinators (choice, try, ()) import qualified Crypto.Hash import qualified Data.ByteArray.Encoding import qualified Data.ByteString import qualified Data.Char import qualified Data.List.NonEmpty import qualified Data.Sequence import qualified Data.Text import qualified Data.Text.Encoding import qualified Text.Megaparsec import qualified Text.Parser.Char import Dhall.Parser.Combinators import Dhall.Parser.Token noted :: Parser (Expr Src a) -> Parser (Expr Src a) noted parser = do before <- Text.Megaparsec.getSourcePos (tokens, e) <- Text.Megaparsec.match parser after <- Text.Megaparsec.getSourcePos let src₀ = Src before after tokens case e of Note src₁ _ | laxSrcEq src₀ src₁ -> return e _ -> return (Note src₀ e) shallowDenote :: Expr s a -> Expr s a shallowDenote (Note _ e) = shallowDenote e shallowDenote e = e completeExpression :: Parser a -> Parser (Expr Src a) completeExpression embedded = completeExpression_ where completeExpression_ = do whitespace expression expression = noted ( choice [ alternative0 , alternative1 , alternative2 , alternative3 , alternative4 ] ) "expression" where alternative0 = do _lambda _openParens a <- label _colon b <- expression _closeParens _arrow c <- expression return (Lam a b c) alternative1 = do _if a <- expression _then b <- expression _else c <- expression return (BoolIf a b c) alternative2 = do let binding = do _let c <- label d <- optional (do _colon expression ) _equal e <- expression return (Binding c d e) as <- Data.List.NonEmpty.some1 binding _in b <- expression return (Let as b) alternative3 = do _forall _openParens a <- label _colon b <- expression _closeParens _arrow c <- expression return (Pi a b c) alternative4 = do a <- operatorExpression let alternative4A = do _arrow b <- expression return (Pi "_" a b) let alternative4B = do _colon b <- expression case (shallowDenote a, shallowDenote b) of (ListLit _ xs, App f c) -> case shallowDenote f of List -> return (ListLit (Just c) xs) Optional -> case xs of [x] -> return (OptionalLit c (Just x)) [] -> return (OptionalLit c Nothing) _ -> return (Annot a b) _ -> return (Annot a b) (Merge c d _, e) -> return (Merge c d (Just e)) _ -> return (Annot a b) alternative4A <|> alternative4B <|> pure a operatorExpression = precedence0Expression makeOperatorExpression subExpression operatorParser = noted (do a <- subExpression b <- Text.Megaparsec.many $ do op <- operatorParser r <- subExpression return (\l -> l `op` r) return (foldl (\x f -> f x) a b) ) precedence0Operator = ImportAlt <$ _importAlt <|> BoolOr <$ _or <|> TextAppend <$ _textAppend <|> NaturalPlus <$ _plus <|> ListAppend <$ _listAppend precedence1Operator = BoolAnd <$ _and <|> Combine <$ _combine precedence2Operator = CombineTypes <$ _combineTypes <|> Prefer <$ _prefer <|> NaturalTimes <$ _times <|> BoolNE <$ _notEqual precedence3Operator = BoolEQ <$ _doubleEqual precedence0Expression = makeOperatorExpression precedence1Expression precedence0Operator precedence1Expression = makeOperatorExpression precedence2Expression precedence1Operator precedence2Expression = makeOperatorExpression precedence3Expression precedence2Operator precedence3Expression = makeOperatorExpression applicationExpression precedence3Operator applicationExpression = do f <- (do _constructors; return Constructors) <|> (do _Some; return Some) <|> return id a <- noted importExpression b <- Text.Megaparsec.many (noted importExpression) return (foldl app (f a) b) where app nL@(Note (Src before _ bytesL) _) nR@(Note (Src _ after bytesR) _) = Note (Src before after (bytesL <> bytesR)) (App nL nR) app nL nR = App nL nR importExpression = noted (choice [ alternative0, alternative1 ]) where alternative0 = do a <- embedded return (Embed a) alternative1 = selectorExpression selectorExpression = noted (do a <- primitiveExpression let left x e = Field e x let right xs e = Project e xs b <- Text.Megaparsec.many (try (do _dot; fmap left label <|> fmap right labels)) return (foldl (\e k -> k e) a b) ) primitiveExpression = noted ( choice [ alternative00 , alternative01 , alternative02 , alternative03 , alternative04 , alternative05 , alternative06 , alternative07 , alternative37 , alternative09 , builtin "built-in expression" ] ) <|> alternative38 where alternative00 = do n <- Text.Megaparsec.getOffset a <- try doubleLiteral b <- if isInfinite a then Text.Megaparsec.setOffset n *> fail "double out of bounds" else return a return (DoubleLit b) alternative01 = do a <- try naturalLiteral return (NaturalLit a) alternative02 = do a <- try integerLiteral return (IntegerLit a) alternative03 = textLiteral alternative04 = (do _openBrace a <- recordTypeOrLiteral _closeBrace return a ) "record type or literal" alternative05 = (do _openAngle a <- unionTypeOrLiteral _closeAngle return a ) "union type or literal" alternative06 = listLiteral alternative07 = do _merge a <- importExpression b <- importExpression return (Merge a b Nothing) alternative09 = do a <- try doubleInfinity return (DoubleLit a) builtin = do let predicate c = c == 'N' || c == 'I' || c == 'D' || c == 'L' || c == 'O' || c == 'B' || c == 'S' || c == 'T' || c == 'F' || c == 'K' let nan = (0.0/0.0) c <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate) case c of 'N' -> choice [ NaturalFold <$ _NaturalFold , NaturalBuild <$ _NaturalBuild , NaturalIsZero <$ _NaturalIsZero , NaturalEven <$ _NaturalEven , NaturalOdd <$ _NaturalOdd , NaturalToInteger <$ _NaturalToInteger , NaturalToInteger <$ _NaturalToInteger , NaturalShow <$ _NaturalShow , Natural <$ _Natural , None <$ _None , DoubleLit nan <$ _NaN ] 'I' -> choice [ IntegerShow <$ _IntegerShow , IntegerToDouble <$ _IntegerToDouble , Integer <$ _Integer ] 'D' -> choice [ DoubleShow <$ _DoubleShow , Double <$ _Double ] 'L' -> choice [ ListBuild <$ _ListBuild , ListFold <$ _ListFold , ListLength <$ _ListLength , ListHead <$ _ListHead , ListLast <$ _ListLast , ListIndexed <$ _ListIndexed , ListReverse <$ _ListReverse , List <$ _List ] 'O' -> choice [ OptionalFold <$ _OptionalFold , OptionalBuild <$ _OptionalBuild , Optional <$ _Optional ] 'B' -> Bool <$ _Bool 'S' -> Const Sort <$ _Sort 'T' -> choice [ Text <$ _Text , BoolLit True <$ _True , Const Type <$ _Type ] 'F' -> BoolLit False <$ _False 'K' -> Const Kind <$ _Kind _ -> empty alternative37 = do a <- identifier return (Var a) alternative38 = do _openParens a <- expression _closeParens return a doubleQuotedChunk = choice [ interpolation , unescapedCharacterFast , unescapedCharacterSlow , escapedCharacter ] where interpolation = do _ <- Text.Parser.Char.text "${" e <- completeExpression_ _ <- Text.Parser.Char.char '}' return (Chunks [(mempty, e)] mempty) unescapedCharacterFast = do t <- Text.Megaparsec.takeWhile1P Nothing predicate return (Chunks [] t) where predicate c = ( ('\x20' <= c && c <= '\x21' ) || ('\x23' <= c && c <= '\x5B' ) || ('\x5D' <= c && c <= '\x10FFFF') ) && c /= '$' unescapedCharacterSlow = do _ <- Text.Megaparsec.single '$' return (Chunks [] "$") escapedCharacter = do _ <- Text.Parser.Char.char '\\' c <- choice [ quotationMark , dollarSign , backSlash , forwardSlash , backSpace , formFeed , lineFeed , carriageReturn , tab , unicode ] return (Chunks [] (Data.Text.singleton c)) where quotationMark = Text.Parser.Char.char '"' dollarSign = Text.Parser.Char.char '$' backSlash = Text.Parser.Char.char '\\' forwardSlash = Text.Parser.Char.char '/' backSpace = do _ <- Text.Parser.Char.char 'b'; return '\b' formFeed = do _ <- Text.Parser.Char.char 'f'; return '\f' lineFeed = do _ <- Text.Parser.Char.char 'n'; return '\n' carriageReturn = do _ <- Text.Parser.Char.char 'r'; return '\r' tab = do _ <- Text.Parser.Char.char 't'; return '\t' unicode = do _ <- Text.Parser.Char.char 'u'; n0 <- hexNumber n1 <- hexNumber n2 <- hexNumber n3 <- hexNumber let n = ((n0 * 16 + n1) * 16 + n2) * 16 + n3 return (Data.Char.chr n) doubleQuotedLiteral = do _ <- Text.Parser.Char.char '"' chunks <- Text.Megaparsec.many doubleQuotedChunk _ <- Text.Parser.Char.char '"' return (mconcat chunks) singleQuoteContinue = choice [ escapeSingleQuotes , interpolation , escapeInterpolation , endLiteral , unescapedCharacterFast , unescapedCharacterSlow , tab , endOfLine ] where escapeSingleQuotes = do _ <- "'''" :: Parser Text b <- singleQuoteContinue return ("''" <> b) interpolation = do _ <- Text.Parser.Char.text "${" a <- completeExpression_ _ <- Text.Parser.Char.char '}' b <- singleQuoteContinue return (Chunks [(mempty, a)] mempty <> b) escapeInterpolation = do _ <- Text.Parser.Char.text "''${" b <- singleQuoteContinue return ("${" <> b) endLiteral = do _ <- Text.Parser.Char.text "''" return mempty unescapedCharacterFast = do a <- Text.Megaparsec.takeWhile1P Nothing predicate b <- singleQuoteContinue return (Chunks [] a <> b) where predicate c = ('\x20' <= c && c <= '\x10FFFF') && c /= '$' && c /= '\'' unescapedCharacterSlow = do a <- satisfy predicate b <- singleQuoteContinue return (Chunks [] a <> b) where predicate c = c == '$' || c == '\'' endOfLine = do a <- "\n" <|> "\r\n" b <- singleQuoteContinue return (Chunks [] a <> b) tab = do _ <- Text.Parser.Char.char '\t' b <- singleQuoteContinue return ("\t" <> b) singleQuoteLiteral = do _ <- Text.Parser.Char.text "''" -- This is technically not in the grammar, but it's still equivalent to the -- original grammar and an easy way to discard the first character if it's -- a newline _ <- optional endOfLine a <- singleQuoteContinue return (dedent a) where endOfLine = void (Text.Parser.Char.char '\n' ) <|> void (Text.Parser.Char.text "\r\n") textLiteral = (do literal <- doubleQuotedLiteral <|> singleQuoteLiteral whitespace return (TextLit literal) ) "text literal" recordTypeOrLiteral = choice [ alternative0 , alternative1 , alternative2 ] where alternative0 = do _equal return (RecordLit mempty) alternative1 = nonEmptyRecordTypeOrLiteral alternative2 = return (Record mempty) nonEmptyRecordTypeOrLiteral = do a <- label let nonEmptyRecordType = do _colon b <- expression e <- Text.Megaparsec.many (do _comma c <- label _colon d <- expression return (c, d) ) m <- toMap ((a, b) : e) return (Record m) let nonEmptyRecordLiteral = do _equal b <- expression e <- Text.Megaparsec.many (do _comma c <- label _equal d <- expression return (c, d) ) m <- toMap ((a, b) : e) return (RecordLit m) nonEmptyRecordType <|> nonEmptyRecordLiteral unionTypeOrLiteral = nonEmptyUnionTypeOrLiteral <|> return (Union mempty) nonEmptyUnionTypeOrLiteral = do (f, kvs) <- loop m <- toMap kvs return (f m) where loop = do a <- label let alternative0 = do _equal b <- expression kvs <- Text.Megaparsec.many (do _bar c <- label _colon d <- expression return (c, d) ) return (UnionLit a b, kvs) let alternative1 = do _colon b <- expression let alternative2 = do _bar (f, kvs) <- loop return (f, (a, b):kvs) let alternative3 = return (Union, [(a, b)]) alternative2 <|> alternative3 alternative0 <|> alternative1 listLiteral = (do _openBracket a <- Text.Megaparsec.sepBy expression _comma _closeBracket return (ListLit Nothing (Data.Sequence.fromList a)) ) "list literal" env :: Parser ImportType env = do _ <- Text.Parser.Char.text "env:" a <- (alternative0 <|> alternative1) whitespace return (Env a) where alternative0 = bashEnvironmentVariable alternative1 = do _ <- Text.Parser.Char.char '"' a <- posixEnvironmentVariable _ <- Text.Parser.Char.char '"' return a localRaw :: Parser ImportType localRaw = choice [ parentPath , herePath , homePath , try absolutePath ] where parentPath = do _ <- ".." :: Parser Text File (Directory segments) final <- file_ return (Local Here (File (Directory (segments ++ [".."])) final)) herePath = do _ <- "." :: Parser Text file <- file_ return (Local Here file) homePath = do _ <- "~" :: Parser Text file <- file_ return (Local Home file) absolutePath = do file <- file_ return (Local Absolute file) local :: Parser ImportType local = do a <- localRaw whitespace return a http :: Parser ImportType http = do url <- httpRaw whitespace headers <- optional (do _using (importHashed_ <|> (_openParens *> importHashed_ <* _closeParens)) ) return (Remote (url { headers })) missing :: Parser ImportType missing = do _missing return Missing importType_ :: Parser ImportType importType_ = do let predicate c = c == '~' || c == '.' || c == '/' || c == 'h' || c == 'e' || c == 'm' _ <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate) choice [ local, http, env, missing ] importHashed_ :: Parser ImportHashed importHashed_ = do importType <- importType_ hash <- optional importHash_ return (ImportHashed {..}) where importHash_ = do _ <- Text.Parser.Char.text "sha256:" text <- count 64 (satisfy hexdig "hex digit") whitespace let strictBytes16 = Data.Text.Encoding.encodeUtf8 text strictBytes <- case Data.ByteArray.Encoding.convertFromBase Base16 strictBytes16 of Left string -> fail string Right strictBytes -> return (strictBytes :: Data.ByteString.ByteString) case Crypto.Hash.digestFromByteString strictBytes of Nothing -> fail "Invalid sha256 hash" Just h -> pure h import_ :: Parser Import import_ = (do importHashed <- importHashed_ importMode <- alternative <|> pure Code return (Import {..}) ) "import" where alternative = do _as _Text return RawText -- | Similar to `Dhall.Core.renderChunks` except that this doesn't bother to -- render interpolated expressions to avoid a `Buildable a` constraint. The -- interpolated contents are not necessary for computing how much to dedent a -- multi-line string -- -- This also doesn't include the surrounding quotes since they would interfere -- with the whitespace detection renderChunks :: Chunks s a -> Text renderChunks (Chunks a b) = foldMap renderChunk a <> b where renderChunk :: (Text, Expr s a) -> Text renderChunk (c, _) = c <> "${x}" dedent :: Chunks Src a -> Chunks Src a dedent chunks0 = process chunks0 where text0 = renderChunks chunks0 lines0 = Data.Text.lines text0 isEmpty = Data.Text.all Data.Char.isSpace nonEmptyLines = filter (not . isEmpty) lines0 indentLength line = Data.Text.length (Data.Text.takeWhile Data.Char.isSpace line) shortestIndent = case nonEmptyLines of [] -> 0 _ -> minimum (map indentLength nonEmptyLines) -- The purpose of this complicated `trimBegin`/`trimContinue` is to ensure -- that we strip leading whitespace without stripping whitespace after -- variable interpolation -- This is the trim function we use up until the first variable -- interpolation, dedenting all lines trimBegin = Data.Text.intercalate "\n" . map (Data.Text.drop shortestIndent) . Data.Text.splitOn "\n" -- This is the trim function we use after each variable interpolation -- where we indent each line except the first line (since it's not a true -- beginning of a line) trimContinue text = Data.Text.intercalate "\n" lines_ where lines_ = case Data.Text.splitOn "\n" text of [] -> [] l:ls -> l:map (Data.Text.drop shortestIndent) ls -- This is the loop that drives whether or not to use `trimBegin` or -- `trimContinue`. We call this function with `trimBegin`, but after the -- first interpolation we switch permanently to `trimContinue` process (Chunks ((x0, y0):xys) z) = Chunks ((trimBegin x0, y0):xys') (trimContinue z) where xys' = [ (trimContinue x, y) | (x, y) <- xys ] process (Chunks [] z) = Chunks [] (trimBegin z)