{-# LANGUAGE CPP #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -- | Parsing Dhall expressions. module Dhall.Parser.Expression where import Control.Applicative (Alternative(..), optional) import Data.ByteArray.Encoding (Base(..)) import Data.Functor (void) import Data.List.NonEmpty (NonEmpty(..)) import Data.Semigroup (Semigroup(..)) import Data.Text (Text) import Dhall.Core import Dhall.Src (Src(..)) import Prelude hiding (const, pi) import Text.Parser.Combinators (choice, try, ()) import qualified Control.Monad import qualified Data.ByteArray.Encoding import qualified Data.ByteString import qualified Data.Char as Char import qualified Data.Foldable import qualified Data.List import qualified Data.List.NonEmpty import qualified Data.Sequence import qualified Data.Text import qualified Data.Text.Encoding import qualified Dhall.Crypto import qualified Text.Megaparsec #if !MIN_VERSION_megaparsec(7, 0, 0) import qualified Text.Megaparsec.Char as Text.Megaparsec #endif import qualified Text.Parser.Char import Dhall.Parser.Combinators import Dhall.Parser.Token -- | Get the current source position getSourcePos :: Text.Megaparsec.MonadParsec e s m => m Text.Megaparsec.SourcePos getSourcePos = #if MIN_VERSION_megaparsec(7, 0, 0) Text.Megaparsec.getSourcePos #else Text.Megaparsec.getPosition #endif {-# INLINE getSourcePos #-} -- | Get the current source offset (in tokens) getOffset :: Text.Megaparsec.MonadParsec e s m => m Int #if MIN_VERSION_megaparsec(7, 0, 0) getOffset = Text.Megaparsec.stateOffset <$> Text.Megaparsec.getParserState #else getOffset = Text.Megaparsec.stateTokensProcessed <$> Text.Megaparsec.getParserState #endif {-# INLINE getOffset #-} -- | Set the current source offset setOffset :: Text.Megaparsec.MonadParsec e s m => Int -> m () #if MIN_VERSION_megaparsec(7, 0, 0) setOffset o = Text.Megaparsec.updateParserState $ \(Text.Megaparsec.State s _ pst) -> Text.Megaparsec.State s o pst #else setOffset o = Text.Megaparsec.updateParserState $ \(Text.Megaparsec.State s p _ stw) -> Text.Megaparsec.State s p o stw #endif {-# INLINE setOffset #-} {-| Wrap a `Parser` to still match the same text but return only the `Src` span -} src :: Parser a -> Parser Src src parser = do before <- getSourcePos (tokens, _) <- Text.Megaparsec.match parser after <- getSourcePos return (Src before after tokens) {-| Wrap a `Parser` to still match the same text, but to wrap the resulting `Expr` in a `Note` constructor containing the `Src` span -} noted :: Parser (Expr Src a) -> Parser (Expr Src a) noted parser = do before <- getSourcePos (tokens, e) <- Text.Megaparsec.match parser after <- getSourcePos let src₀ = Src before after tokens case e of Note src₁ _ | laxSrcEq src₀ src₁ -> return e _ -> return (Note src₀ e) {-| Parse a complete expression (with leading and trailing whitespace) This corresponds to the @complete-expression@ rule from the official grammar -} completeExpression :: Parser a -> Parser (Expr Src a) completeExpression embedded = completeExpression_ where Parsers {..} = parsers embedded {-| Parse an \"import expression\" This is not the same thing as @`fmap` `Embed`@. This parses any expression of the same or higher precedence as an import expression (such as a selector expression). For example, this parses @(1)@ This corresponds to the @import-expression@ rule from the official grammar -} importExpression :: Parser a -> Parser (Expr Src a) importExpression embedded = importExpression_ where Parsers {..} = parsers embedded {-| For efficiency (and simplicity) we only expose two parsers from the result of the `parsers` function, since these are the only parsers needed outside of this module -} data Parsers a = Parsers { completeExpression_ :: Parser (Expr Src a) , importExpression_ :: Parser (Expr Src a) } -- | Given a parser for imports, parsers :: Parser a -> Parsers a parsers embedded = Parsers {..} where completeExpression_ = do whitespace expression expression = noted ( choice [ alternative0 , alternative1 , alternative2 , alternative3 , alternative4 , alternative5 ] ) "expression" where alternative0 = do _lambda _openParens a <- label _colon b <- expression _closeParens _arrow c <- expression return (Lam a b c) alternative1 = do _if a <- expression _then b <- expression _else c <- expression return (BoolIf a b c) alternative2 = do let binding = do _letOnly src0 <- src nonemptyWhitespace c <- labelOnly src1 <- src whitespace d <- optional (do _colonOnly src2 <- src nonemptyWhitespace e <- expression return (Just src2, e) ) _equalOnly src3 <- src whitespace f <- expression return (Binding (Just src0) c (Just src1) d (Just src3) f) as <- Data.List.NonEmpty.some1 binding _in b <- expression -- 'Note's in let-in-let: -- -- Subsequent @let@s that are not separated by an @in@ only get a -- single surrounding 'Note'. For example: -- -- let x = a -- let y = b -- in let z = c -- in x -- -- is parsed as -- -- (Note … -- (Let x … -- (Let y … -- (Note … -- (Let z … return (Dhall.Core.wrapInLets as b) alternative3 = do _forall _openParens a <- label _colon b <- expression _closeParens _arrow c <- expression return (Pi a b c) alternative4 = do _assert _colon a <- expression return (Assert a) alternative5 = do a <- operatorExpression let alternative4A = do _arrow b <- expression return (Pi "_" a b) let alternative4B = do _colon b <- expression case shallowDenote a of ListLit _ [] -> return (ListLit (Just b) []) Merge c d _ -> return (Merge c d (Just b)) ToMap c _ -> return (ToMap c (Just b)) _ -> return (Annot a b) alternative4A <|> alternative4B <|> pure a operatorExpression = precedence0Expression makeOperatorExpression subExpression operatorParser = noted (do a <- subExpression b <- Text.Megaparsec.many $ do op <- operatorParser r <- subExpression return (\l -> l `op` r) return (foldl (\x f -> f x) a b) ) precedence0Operator = ImportAlt <$ _importAlt <|> BoolOr <$ _or <|> TextAppend <$ _textAppend <|> NaturalPlus <$ _plus <|> ListAppend <$ _listAppend precedence1Operator = BoolAnd <$ _and <|> Combine <$ _combine precedence2Operator = CombineTypes <$ _combineTypes <|> Prefer <$ _prefer <|> NaturalTimes <$ _times <|> BoolEQ <$ _doubleEqual precedence3Operator = BoolNE <$ _notEqual <|> Equivalent <$ _equivalent precedence0Expression = makeOperatorExpression precedence1Expression precedence0Operator precedence1Expression = makeOperatorExpression precedence2Expression precedence1Operator precedence2Expression = makeOperatorExpression precedence3Expression precedence2Operator precedence3Expression = makeOperatorExpression applicationExpression precedence3Operator applicationExpression = do f <- (do _Some; return Some) <|> return id a <- noted importExpression_ b <- Text.Megaparsec.many (noted importExpression_) return (foldl app (f a) b) where app nL@(Note (Src before _ bytesL) _) nR@(Note (Src _ after bytesR) _) = Note (Src before after (bytesL <> bytesR)) (App nL nR) app nL nR = App nL nR importExpression_ = noted (choice [ alternative0, alternative1 ]) where alternative0 = do a <- embedded return (Embed a) alternative1 = completionExpression completionExpression = noted (do a <- selectorExpression mb <- optional (do _doubleColon selectorExpression ) case mb of Nothing -> return a Just b -> return (RecordCompletion a b) ) selectorExpression = noted (do a <- primitiveExpression let recordType = _openParens *> expression <* _closeParens let field x e = Field e x let projectBySet xs e = Project e (Left xs) let projectByExpression xs e = Project e (Right xs) let alternatives = fmap field anyLabel <|> fmap projectBySet labels <|> fmap projectByExpression recordType b <- Text.Megaparsec.many (try (do _dot; alternatives)) return (foldl (\e k -> k e) a b) ) primitiveExpression = noted ( choice [ alternative00 , alternative01 , alternative02 , alternative03 , alternative04 , alternative05 , alternative06 , alternative07 , alternative08 , alternative37 , alternative09 , builtin "built-in expression" ] ) <|> alternative38 where alternative00 = do n <- getOffset a <- try doubleLiteral b <- if isInfinite a then setOffset n *> fail "double out of bounds" else return a return (DoubleLit (DhallDouble b)) alternative01 = do a <- try naturalLiteral return (NaturalLit a) alternative02 = do a <- try integerLiteral return (IntegerLit a) alternative03 = textLiteral alternative04 = (do _openBrace _ <- optional _comma a <- recordTypeOrLiteral _closeBrace return a ) "record type or literal" alternative05 = unionType alternative06 = listLiteral alternative07 = do _merge a <- importExpression_ b <- importExpression_ "second argument to ❰merge❱" return (Merge a b Nothing) alternative08 = do _toMap a <- importExpression_ return (ToMap a Nothing) alternative09 = do a <- try doubleInfinity return (DoubleLit (DhallDouble a)) builtin = do let predicate c = c == 'N' || c == 'I' || c == 'D' || c == 'L' || c == 'O' || c == 'B' || c == 'S' || c == 'T' || c == 'F' || c == 'K' let nan = DhallDouble (0.0/0.0) c <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate) case c of 'N' -> choice [ NaturalFold <$ _NaturalFold , NaturalBuild <$ _NaturalBuild , NaturalIsZero <$ _NaturalIsZero , NaturalEven <$ _NaturalEven , NaturalOdd <$ _NaturalOdd , NaturalSubtract <$ _NaturalSubtract , NaturalToInteger <$ _NaturalToInteger , NaturalShow <$ _NaturalShow , Natural <$ _Natural , None <$ _None , DoubleLit nan <$ _NaN ] 'I' -> choice [ IntegerShow <$ _IntegerShow , IntegerToDouble <$ _IntegerToDouble , Integer <$ _Integer ] 'D' -> choice [ DoubleShow <$ _DoubleShow , Double <$ _Double ] 'L' -> choice [ ListBuild <$ _ListBuild , ListFold <$ _ListFold , ListLength <$ _ListLength , ListHead <$ _ListHead , ListLast <$ _ListLast , ListIndexed <$ _ListIndexed , ListReverse <$ _ListReverse , List <$ _List ] 'O' -> choice [ OptionalFold <$ _OptionalFold , OptionalBuild <$ _OptionalBuild , Optional <$ _Optional ] 'B' -> Bool <$ _Bool 'S' -> Const Sort <$ _Sort 'T' -> choice [ TextShow <$ _TextShow , Text <$ _Text , BoolLit True <$ _True , Const Type <$ _Type ] 'F' -> BoolLit False <$ _False 'K' -> Const Kind <$ _Kind _ -> empty alternative37 = do a <- identifier return (Var a) alternative38 = do _openParens a <- expression _closeParens return a doubleQuotedChunk = choice [ interpolation , unescapedCharacterFast , unescapedCharacterSlow , escapedCharacter ] where interpolation = do _ <- Text.Parser.Char.text "${" e <- completeExpression_ _ <- Text.Parser.Char.char '}' return (Chunks [(mempty, e)] mempty) unescapedCharacterFast = do t <- Text.Megaparsec.takeWhile1P Nothing predicate return (Chunks [] t) where predicate c = ( ('\x20' <= c && c <= '\x21' ) || ('\x23' <= c && c <= '\x5B' ) || ('\x5D' <= c && c <= '\x10FFFF') ) && c /= '$' unescapedCharacterSlow = do _ <- Text.Parser.Char.char '$' return (Chunks [] "$") escapedCharacter = do _ <- Text.Parser.Char.char '\\' c <- choice [ quotationMark , dollarSign , backSlash , forwardSlash , backSpace , formFeed , lineFeed , carriageReturn , tab , unicode ] return (Chunks [] (Data.Text.singleton c)) where quotationMark = Text.Parser.Char.char '"' dollarSign = Text.Parser.Char.char '$' backSlash = Text.Parser.Char.char '\\' forwardSlash = Text.Parser.Char.char '/' backSpace = do _ <- Text.Parser.Char.char 'b'; return '\b' formFeed = do _ <- Text.Parser.Char.char 'f'; return '\f' lineFeed = do _ <- Text.Parser.Char.char 'n'; return '\n' carriageReturn = do _ <- Text.Parser.Char.char 'r'; return '\r' tab = do _ <- Text.Parser.Char.char 't'; return '\t' unicode = do _ <- Text.Parser.Char.char 'u'; let toNumber = Data.List.foldl' (\x y -> x * 16 + y) 0 let fourCharacterEscapeSequence = fmap toNumber (Control.Monad.replicateM 4 hexNumber) let bracedEscapeSequence = do _ <- Text.Parser.Char.char '{' ns <- some hexNumber let number = toNumber ns Control.Monad.guard (number <= 0x10FFFF && validCodepoint (Char.chr number)) <|> fail "Invalid Unicode code point" _ <- Text.Parser.Char.char '}' return (toNumber ns) n <- bracedEscapeSequence <|> fourCharacterEscapeSequence return (Char.chr n) doubleQuotedLiteral = do _ <- Text.Parser.Char.char '"' chunks <- Text.Megaparsec.many doubleQuotedChunk _ <- Text.Parser.Char.char '"' return (mconcat chunks) singleQuoteContinue = choice [ escapeSingleQuotes , interpolation , escapeInterpolation , endLiteral , unescapedCharacterFast , unescapedCharacterSlow , tab , endOfLine ] where escapeSingleQuotes = do _ <- "'''" :: Parser Text b <- singleQuoteContinue return ("''" <> b) interpolation = do _ <- Text.Parser.Char.text "${" a <- completeExpression_ _ <- Text.Parser.Char.char '}' b <- singleQuoteContinue return (Chunks [(mempty, a)] mempty <> b) escapeInterpolation = do _ <- Text.Parser.Char.text "''${" b <- singleQuoteContinue return ("${" <> b) endLiteral = do _ <- Text.Parser.Char.text "''" return mempty unescapedCharacterFast = do a <- Text.Megaparsec.takeWhile1P Nothing predicate b <- singleQuoteContinue return (Chunks [] a <> b) where predicate c = ('\x20' <= c && c <= '\x10FFFF') && c /= '$' && c /= '\'' unescapedCharacterSlow = do a <- satisfy predicate b <- singleQuoteContinue return (Chunks [] a <> b) where predicate c = c == '$' || c == '\'' endOfLine = do a <- "\n" <|> "\r\n" b <- singleQuoteContinue return (Chunks [] a <> b) tab = do _ <- Text.Parser.Char.char '\t' b <- singleQuoteContinue return ("\t" <> b) singleQuoteLiteral = do _ <- Text.Parser.Char.text "''" _ <- endOfLine a <- singleQuoteContinue return (toDoubleQuoted a) where endOfLine = void (Text.Parser.Char.char '\n' ) <|> void (Text.Parser.Char.text "\r\n") textLiteral = (do literal <- doubleQuotedLiteral <|> singleQuoteLiteral whitespace return (TextLit literal) ) "text literal" recordTypeOrLiteral = choice [ alternative0 , alternative1 , alternative2 ] where alternative0 = do _equal return (RecordLit mempty) alternative1 = nonEmptyRecordTypeOrLiteral alternative2 = return (Record mempty) nonEmptyRecordTypeOrLiteral = do a <- anyLabel let nonEmptyRecordType = do _colon b <- expression e <- Text.Megaparsec.many (do _comma c <- anyLabel _colon d <- expression return (c, d) ) m <- toMap ((a, b) : e) return (Record m) let nonEmptyRecordLiteral = do _equal b <- expression e <- Text.Megaparsec.many (do _comma c <- anyLabel _equal d <- expression return (c, d) ) m <- toMap ((a, b) : e) return (RecordLit m) nonEmptyRecordType <|> nonEmptyRecordLiteral unionType = (do _openAngle _ <- optional _bar let unionTypeEntry = do a <- anyLabel b <- optional (do _colon; expression) return (a, b) kvs <- Text.Megaparsec.sepBy unionTypeEntry _bar m <- toMap kvs _closeAngle return (Union m) ) "union type" listLiteral = (do _openBracket _ <- optional _comma a <- Text.Megaparsec.sepBy expression _comma _closeBracket return (ListLit Nothing (Data.Sequence.fromList a)) ) "list literal" {-| Parse an environment variable import This corresponds to the @env@ rule from the official grammar -} env :: Parser ImportType env = do _ <- Text.Parser.Char.text "env:" a <- (alternative0 <|> alternative1) whitespace return (Env a) where alternative0 = bashEnvironmentVariable alternative1 = do _ <- Text.Parser.Char.char '"' a <- posixEnvironmentVariable _ <- Text.Parser.Char.char '"' return a -- | Parse a local import without trailing whitespace localOnly :: Parser ImportType localOnly = choice [ parentPath , herePath , homePath , try absolutePath ] where parentPath = do _ <- ".." :: Parser Text file <- file_ FileComponent return (Local Parent file) herePath = do _ <- "." :: Parser Text file <- file_ FileComponent return (Local Here file) homePath = do _ <- "~" :: Parser Text file <- file_ FileComponent return (Local Home file) absolutePath = do file <- file_ FileComponent return (Local Absolute file) {-| Parse a local import This corresponds to the @local@ rule from the official grammar -} local :: Parser ImportType local = do a <- localOnly whitespace return a {-| Parse an HTTP(S) import This corresponds to the @http@ rule from the official grammar -} http :: Parser ImportType http = do url <- httpRaw whitespace headers <- optional (do _using importExpression import_ ) return (Remote (url { headers })) {-| Parse a `Missing` import This corresponds to the @missing@ rule from the official grammar -} missing :: Parser ImportType missing = do _missing return Missing {-| Parse an `ImportType` This corresponds to the @import-type@ rule from the official grammar -} importType_ :: Parser ImportType importType_ = do let predicate c = c == '~' || c == '.' || c == '/' || c == 'h' || c == 'e' || c == 'm' _ <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate) choice [ local, http, env, missing ] {-| Parse a `Dhall.Crypto.SHA256Digest` This corresponds to the @hash@ rule from the official grammar -} importHash_ :: Parser Dhall.Crypto.SHA256Digest importHash_ = do _ <- Text.Parser.Char.text "sha256:" text <- count 64 (satisfy hexdig "hex digit") whitespace let strictBytes16 = Data.Text.Encoding.encodeUtf8 text strictBytes <- case Data.ByteArray.Encoding.convertFromBase Base16 strictBytes16 of Left string -> fail string Right strictBytes -> return (strictBytes :: Data.ByteString.ByteString) case Dhall.Crypto.sha256DigestFromByteString strictBytes of Nothing -> fail "Invalid sha256 hash" Just h -> pure h {-| Parse an `ImportHashed` This corresponds to the @import-hashed@ rule from the official grammar -} importHashed_ :: Parser ImportHashed importHashed_ = do importType <- importType_ hash <- optional importHash_ return (ImportHashed {..}) {-| Parse an `Import` This corresponds to the @import@ rule from the official grammar -} import_ :: Parser Import import_ = (do importHashed <- importHashed_ importMode <- alternative <|> pure Code return (Import {..}) ) "import" where alternative = do _as (_Text >> pure RawText) <|> (_Location >> pure Location) -- | Same as @Data.Text.splitOn@, except always returning a `NonEmpty` result splitOn :: Text -> Text -> NonEmpty Text splitOn needle haystack = case Data.Text.splitOn needle haystack of [] -> "" :| [] t : ts -> t :| ts -- | Split `Chunks` by lines linesLiteral :: Chunks s a -> NonEmpty (Chunks s a) linesLiteral (Chunks [] suffix) = fmap (Chunks []) (splitOn "\n" suffix) linesLiteral (Chunks ((prefix, interpolation) : pairs₀) suffix₀) = foldr Data.List.NonEmpty.cons (Chunks ((lastLine, interpolation) : pairs₁) suffix₁ :| chunks) (fmap (Chunks []) initLines) where splitLines = splitOn "\n" prefix initLines = Data.List.NonEmpty.init splitLines lastLine = Data.List.NonEmpty.last splitLines Chunks pairs₁ suffix₁ :| chunks = linesLiteral (Chunks pairs₀ suffix₀) -- | Flatten several `Chunks` back into a single `Chunks` by inserting newlines unlinesLiteral :: NonEmpty (Chunks s a) -> Chunks s a unlinesLiteral chunks = Data.Foldable.fold (Data.List.NonEmpty.intersperse "\n" chunks) -- | Returns `True` if the `Chunks` represents a blank line emptyLine :: Chunks s a -> Bool emptyLine (Chunks [] "" ) = True emptyLine (Chunks [] "\r") = True -- So that `\r\n` is treated as a blank line emptyLine _ = False -- | Return the leading whitespace for a `Chunks` literal leadingSpaces :: Chunks s a -> Text leadingSpaces chunks = Data.Text.takeWhile isSpace firstText where isSpace c = c == '\x20' || c == '\x09' firstText = case chunks of Chunks [] suffix -> suffix Chunks ((prefix, _) : _ ) _ -> prefix -- | Drop the first @n@ characters for a `Chunks` literal dropLiteral :: Int -> Chunks s a -> Chunks s a dropLiteral n (Chunks [] suffix) = Chunks [] (Data.Text.drop n suffix) dropLiteral n (Chunks ((prefix, interpolation) : rest) suffix) = Chunks ((Data.Text.drop n prefix, interpolation) : rest) suffix {-| Convert a single-quoted `Chunks` literal to the equivalent double-quoted `Chunks` literal -} toDoubleQuoted :: Chunks Src a -> Chunks Src a toDoubleQuoted literal = unlinesLiteral (fmap (dropLiteral indent) literals) where literals = linesLiteral literal sharedPrefix ab ac = case Data.Text.commonPrefixes ab ac of Just (a, _b, _c) -> a Nothing -> "" -- The standard specifies to filter out blank lines for all lines *except* -- for the last line filteredLines = newInit <> pure oldLast where oldInit = Data.List.NonEmpty.init literals oldLast = Data.List.NonEmpty.last literals newInit = filter (not . emptyLine) oldInit longestSharedPrefix = case filteredLines of l : ls -> Data.Foldable.foldl' sharedPrefix (leadingSpaces l) (fmap leadingSpaces ls) [] -> "" indent = Data.Text.length longestSharedPrefix