{ {-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} module Language.Dhall.Lexer.Mod ( lexDhall , alexMonadScan , runAlex , Alex (..) , AlexPosn (..) , Ann (..) , Token , AlexState (..) ) where import Control.Arrow ((&&&)) import Control.DeepSeq (NFData) import Control.Monad import Data.Bool (bool) import qualified Data.ByteString.Lazy as BSL import qualified Data.ByteString.Lazy.Char8 as ASCII import Data.Scientific import GHC.Generics (Generic) import GHC.Natural (Natural) import Language.Dhall.Lexer.Types } %wrapper "monad-bytestring" $digit = 0-9 @index = $digit+ @natural = \+ $digit+ @integer = (\- | "") $digit+ @double = (\- | "") $digit+ \. $digit+ -- TODO // for lambdas? $special = [\{\}\,\=\:\[\]\<\>\|\(\)\.\+\*\#] $lowercase = [a-z] $uppercase = [A-Z] $letter = [$lowercase $uppercase] @identifier_body = $letter | $digit | _ @type = $uppercase @identifier_body* @quoted = \` @type \` @identifier = ($lowercase | _) @identifier_body* @url_loc = "http://" | "https://" @file_loc = "./" | "../" | "/" $url_contents = [\:\/\-\.\_ $letter $digit] @url = @url_loc $url_contents+ @file = @file_loc $url_contents+ $string_char = $printable # [\"\\\$] $esc_char = [\"\$\\] -- Deficiency: what if $ is followed by "? @string_in = (\\ $esc_char | $string_char | \$ [^\{\"])* tokens :- <0,splice> $white+ ; <0,splice> "--".* ; "{-" { \_ _ -> nested_comment } -- keywords <0,splice> if { tok (\p _ -> alex p $ Keyword KwIf) } <0,splice> then { tok (\p _ -> alex p $ Keyword KwThen) } <0,splice> else { tok (\p _ -> alex p $ Keyword KwElse) } <0,splice> let { tok (\p _ -> alex p $ Keyword KwLet) } <0,splice> in { tok (\p _ -> alex p $ Keyword KwIn) } <0,splice> forall { tok (\p _ -> alex p $ Keyword KwForall) } <0,splice> "∀" { tok (\p _ -> alex p $ Keyword KwForall) } <0,splice> constructors { tok (\p _ -> alex p $ Keyword KwConstructors) } <0,splice> merge { tok (\p _ -> alex p $ Keyword KwMerge) } <0,splice> Text { tok (\p _ -> alex p $ Keyword KwText) } <0,splice> Double { tok (\p _ -> alex p $ Keyword KwDouble) } <0,splice> Integer { tok (\p _ -> alex p $ Keyword KwInteger) } <0,splice> List { tok (\p _ -> alex p $ Keyword KwList) } <0,splice> Natural { tok (\p _ -> alex p $ Keyword KwNatural) } <0,splice> Bool { tok (\p _ -> alex p $ Keyword KwBool) } <0,splice> Optional { tok (\p _ -> alex p $ Keyword KwOptional) } <0,splice> "Nat/fold" { tok (\p _ -> alex p $ Keyword KwNatFold) } <0,splice> "Integer/show" { tok (\p _ -> alex p $ Keyword KwIntegerShow) } <0,splice> Kind { tok (\p _ -> alex p $ Keyword KwKind) } <0,splice> Type { tok (\p _ -> alex p $ Keyword KwType) } -- builtin specials <0,splice> "//" { tok (\p _ -> alex p $ Operator CombineTok) } <0,splice> "⫽" { tok (\p _ -> alex p $ Operator CombineTok) } <0,splice> "/"\\ { tok (\p _ -> alex p $ Operator PreferTok) } <0,splice> "∧" { tok (\p _ -> alex p $ Operator PreferTok) } <0,splice> "→" { tok (\p _ -> alex p $ Operator ArrowTok) } <0,splice> "->" { tok (\p _ -> alex p $ Operator ArrowTok) } <0,splice> "λ" { tok (\p _ -> alex p $ Operator LambdaTok) } <0,splice> \\ { tok (\p _ -> alex p $ Operator LambdaTok) } <0,splice> \&\& { tok (\p _ -> alex p $ Operator AndTok) } <0,splice> \|\| { tok (\p _ -> alex p $ Operator OrTok) } <0,splice> \=\= { tok (\p _ -> alex p $ Operator EqTok) } <0,splice> \!\= { tok (\p _ -> alex p $ Operator NeqTok) } <0,splice> \+\+ { tok (\p _ -> alex p $ Operator AppendTok) } -- Path literals <0,splice> @url { tok (\p s -> alex p $ EmbedURL s) } <0,splice> @file { tok (\p s -> alex p $ EmbedFile s) } -- Various special characters <0> $special { tok (\p s -> alex p $ Special s) } -- Numeric literals <0,splice> @double { tok (\p s -> Ann p <$> fmap DoubleTok (readDouble s)) } <0,splice> @natural { tok (\p s -> Ann p <$> fmap NatLit (readNatural s)) } <0,splice> \@ @index { tok (\p s -> Ann p <$> fmap AtToken (readInteger (BSL.tail s))) } <0,splice> @integer { tok (\p s -> Ann p <$> fmap IntLit (readInteger s)) } -- Boolean literals <0,splice> True { tok (\p _ -> alex p $ BoolTok True) } <0,splice> False { tok (\p _ -> alex p $ BoolTok False) } -- Identifiers <0,splice> @quoted { tok (\p s -> alex p $ QuotedId s) } <0,splice> @identifier { tok (\p s -> alex p $ Identifier s) } <0,splice> @type { tok (\p s -> alex p $ TypeId s) } -- Strings & string splices <0,splice> \" { begin string } @string_in \$ / \" { tok (\p s -> alex p $ StringChunk s) } @string_in { tok (\p s -> alex p $ StringChunk s) } \$\{ { tok (\p _ -> alex p $ BeginSplice) `andBegin` splice } \} { tok (\p _ -> alex p $ EndSplice) `andBegin` string } $special # \} { tok (\p s -> alex p $ Special s) } \" { begin 0 } { -- Taken from example by Simon Marlow. nested_comment :: Alex Token nested_comment = go 1 =<< alexGetInput where go :: Int -> AlexInput -> Alex Token go 0 input = alexSetInput input >> alexMonadScan go n input = case alexGetByte input of Nothing -> err input Just (c, input') -> case Data.Char.chr (fromIntegral c) of '-' -> case alexGetByte input' of Nothing -> err input' Just (125,input_) -> go (n-1) input_ Just (_,input_) -> go n input_ '{' -> case alexGetByte input' of Nothing -> err input' Just (c',input_) -> go (addLevel c' $ n) input_ _ -> go n input' addLevel c' = bool id (+1) (c'==45) err (pos,_,_,_) = let (AlexPn _ line col) = pos in alexError ("Error in nested comment at line " ++ show line ++ ", column " ++ show col) readNatural :: BSL.ByteString -> Alex Natural readNatural = go <=< readInteger . BSL.tail where go x | x < 0 = alexError "Internal lexer error" go x = pure (fromIntegral x) readDouble :: BSL.ByteString -> Alex Scientific readDouble str = pure $ read (ASCII.unpack str) readInteger :: BSL.ByteString -> Alex Integer readInteger str = case ASCII.readInteger str of Just (i, "") -> pure i _ -> alexError "Not a valid integer" get_pos :: Alex AlexPosn get_pos = Alex (Right . (id &&& alex_pos)) alex :: AlexPosn -> a -> Alex (Ann AlexPosn a) alex = (pure .) . Ann tok f (p,_,s,_) len = f p (BSL.take len s) deriving instance Generic AlexPosn deriving instance NFData AlexPosn -- | Data type for values with annotations. data Ann a b = Ann { loc :: a , inner :: b } deriving (Eq, Show, Generic, NFData) -- | A token with location information. type Token = Ann AlexPosn TokenType alexEOF :: Alex Token alexEOF = Ann <$> get_pos <*> pure End lexDhall :: BSL.ByteString -> Either String [Token] lexDhall str = runAlex str loop loop :: Alex [Token] loop = do tok' <- alexMonadScan if inner tok' == End then pure mempty else (tok' :) <$> loop }