{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Strict #-}

-- | Definition of the tokens used in the lexer.
--
-- Also defines other useful building blocks for constructing tokens.
module Language.Futhark.Parser.Lexer.Tokens
  ( Token (..),
    Lexeme,
    fromRoman,
    symbol,
    mkQualId,
    tokenPosM,
    tokenM,
    tokenC,
    keyword,
    tokenS,
    indexing,
    suffZero,
    tryRead,
    readIntegral,
    readHexRealLit,
  )
where

import qualified Data.ByteString.Lazy as BS
import Data.Char (digitToInt, ord)
import Data.Either
import Data.List (find, foldl')
import Data.Loc (Loc (..), Pos (..))
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Read as T
import Language.Futhark.Core
  ( Int16,
    Int32,
    Int64,
    Int8,
    Name,
    Word16,
    Word32,
    Word64,
    Word8,
  )
import Language.Futhark.Parser.Lexer.Wrapper
import Language.Futhark.Prop (leadingOperator)
import Language.Futhark.Syntax (BinOp, nameFromText, nameToText)
import Numeric.Half
import Prelude hiding (exponent)

-- | A lexical token.  It does not itself contain position
-- information, so in practice the parser will consume tokens tagged
-- with a source position.
data Token
  = ID Name
  | INDEXING Name
  | QUALINDEXING [Name] Name
  | QUALPAREN [Name] Name
  | SYMBOL BinOp [Name] Name
  | CONSTRUCTOR Name
  | PROJ_INTFIELD Name
  | INTLIT Integer
  | STRINGLIT T.Text
  | I8LIT Int8
  | I16LIT Int16
  | I32LIT Int32
  | I64LIT Int64
  | U8LIT Word8
  | U16LIT Word16
  | U32LIT Word32
  | U64LIT Word64
  | FLOATLIT Double
  | F16LIT Half
  | F32LIT Float
  | F64LIT Double
  | CHARLIT Char
  | COLON
  | COLON_GT
  | BACKSLASH
  | APOSTROPHE
  | APOSTROPHE_THEN_HAT
  | APOSTROPHE_THEN_TILDE
  | BACKTICK
  | HASH_LBRACKET
  | DOT
  | TWO_DOTS
  | TWO_DOTS_LT
  | TWO_DOTS_GT
  | THREE_DOTS
  | LPAR
  | RPAR
  | RPAR_THEN_LBRACKET
  | LBRACKET
  | RBRACKET
  | LCURLY
  | RCURLY
  | COMMA
  | UNDERSCORE
  | RIGHT_ARROW
  | QUESTION_MARK
  | EQU
  | ASTERISK
  | NEGATE
  | BANG
  | DOLLAR
  | LTH
  | HAT
  | TILDE
  | PIPE
  | IF
  | THEN
  | ELSE
  | DEF
  | LET
  | LOOP
  | IN
  | FOR
  | DO
  | WITH
  | ASSERT
  | TRUE
  | FALSE
  | WHILE
  | INCLUDE
  | IMPORT
  | ENTRY
  | TYPE
  | MODULE
  | VAL
  | OPEN
  | LOCAL
  | MATCH
  | CASE
  | DOC String
  | EOF
  | HOLE
  deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show, Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Eq Token
Eq Token
-> (Token -> Token -> Ordering)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Token)
-> (Token -> Token -> Token)
-> Ord Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmax :: Token -> Token -> Token
>= :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c< :: Token -> Token -> Bool
compare :: Token -> Token -> Ordering
$ccompare :: Token -> Token -> Ordering
$cp1Ord :: Eq Token
Ord)

keyword :: T.Text -> Token
keyword :: Text -> Token
keyword Text
s =
  case Text
s of
    Text
"true" -> Token
TRUE
    Text
"false" -> Token
FALSE
    Text
"if" -> Token
IF
    Text
"then" -> Token
THEN
    Text
"else" -> Token
ELSE
    Text
"def" -> Token
DEF
    Text
"let" -> Token
LET
    Text
"loop" -> Token
LOOP
    Text
"in" -> Token
IN
    Text
"val" -> Token
VAL
    Text
"for" -> Token
FOR
    Text
"do" -> Token
DO
    Text
"with" -> Token
WITH
    Text
"local" -> Token
LOCAL
    Text
"open" -> Token
OPEN
    Text
"include" -> Token
INCLUDE
    Text
"import" -> Token
IMPORT
    Text
"type" -> Token
TYPE
    Text
"entry" -> Token
ENTRY
    Text
"module" -> Token
MODULE
    Text
"while" -> Token
WHILE
    Text
"assert" -> Token
ASSERT
    Text
"match" -> Token
MATCH
    Text
"case" -> Token
CASE
    Text
_ -> Name -> Token
ID (Name -> Token) -> Name -> Token
forall a b. (a -> b) -> a -> b
$ Text -> Name
nameFromText Text
s

indexing :: (Loc, T.Text) -> Alex Name
indexing :: (Loc, Text) -> Alex Name
indexing (Loc
loc, Text
s) = case Text -> Token
keyword Text
s of
  ID Name
v -> Name -> Alex Name
forall (f :: * -> *) a. Applicative f => a -> f a
pure Name
v
  Token
_ -> Loc -> String -> Alex Name
forall a. Loc -> String -> Alex a
alexError Loc
loc (String -> Alex Name) -> String -> Alex Name
forall a b. (a -> b) -> a -> b
$ String
"Cannot index keyword '" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"'."

mkQualId :: T.Text -> Alex ([Name], Name)
mkQualId :: Text -> Alex ([Name], Name)
mkQualId Text
s = case [Text] -> [Text]
forall a. [a] -> [a]
reverse ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text]
T.splitOn Text
"." Text
s of
  [] -> String -> Alex ([Name], Name)
forall a. HasCallStack => String -> a
error String
"mkQualId: no components"
  Text
k : [Text]
qs -> ([Name], Name) -> Alex ([Name], Name)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((Text -> Name) -> [Text] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Name
nameFromText ([Text] -> [Text]
forall a. [a] -> [a]
reverse [Text]
qs), Text -> Name
nameFromText Text
k)

-- | Suffix a zero if the last character is dot.
suffZero :: T.Text -> T.Text
suffZero :: Text -> Text
suffZero Text
s = if Text -> Char
T.last Text
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' then Text
s Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"0" else Text
s

tryRead :: Read a => String -> T.Text -> Alex a
tryRead :: String -> Text -> Alex a
tryRead String
desc Text
s = case ReadS a
forall a. Read a => ReadS a
reads String
s' of
  [(a
x, String
"")] -> a -> Alex a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
x
  [(a, String)]
_ -> String -> Alex a
forall a. HasCallStack => String -> a
error (String -> Alex a) -> String -> Alex a
forall a b. (a -> b) -> a -> b
$ String
"Invalid " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
desc String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" literal: `" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"'."
  where
    s' :: String
s' = Text -> String
T.unpack Text
s

readIntegral :: Integral a => T.Text -> a
readIntegral :: Text -> a
readIntegral Text
s
  | Text
"0x" Text -> Text -> Bool
`T.isPrefixOf` Text
s Bool -> Bool -> Bool
|| Text
"0X" Text -> Text -> Bool
`T.isPrefixOf` Text
s = a -> Text -> a
forall a. Num a => a -> Text -> a
parseBase a
16 (Int -> Text -> Text
T.drop Int
2 Text
s)
  | Text
"0b" Text -> Text -> Bool
`T.isPrefixOf` Text
s Bool -> Bool -> Bool
|| Text
"0B" Text -> Text -> Bool
`T.isPrefixOf` Text
s = a -> Text -> a
forall a. Num a => a -> Text -> a
parseBase a
2 (Int -> Text -> Text
T.drop Int
2 Text
s)
  | Text
"0r" Text -> Text -> Bool
`T.isPrefixOf` Text
s Bool -> Bool -> Bool
|| Text
"0R" Text -> Text -> Bool
`T.isPrefixOf` Text
s = Text -> a
forall a. Integral a => Text -> a
fromRoman (Int -> Text -> Text
T.drop Int
2 Text
s)
  | Bool
otherwise = a -> Text -> a
forall a. Num a => a -> Text -> a
parseBase a
10 Text
s
  where
    parseBase :: a -> Text -> a
parseBase a
base = (a -> Char -> a) -> a -> Text -> a
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl (\a
acc Char
c -> a
acc a -> a -> a
forall a. Num a => a -> a -> a
* a
base a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
c)) a
0

tokenC :: a -> (Pos, Char, BS.ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenC :: a -> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenC a
v = (Text -> a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
forall a.
(Text -> a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenS ((Text -> a)
 -> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a))
-> (Text -> a)
-> (Pos, Char, ByteString, Int64)
-> Int64
-> Alex (Lexeme a)
forall a b. (a -> b) -> a -> b
$ a -> Text -> a
forall a b. a -> b -> a
const a
v

tokenS :: (T.Text -> a) -> (Pos, Char, BS.ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenS :: (Text -> a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenS Text -> a
f = (Text -> Alex a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
forall a.
(Text -> Alex a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenM ((Text -> Alex a)
 -> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a))
-> (Text -> Alex a)
-> (Pos, Char, ByteString, Int64)
-> Int64
-> Alex (Lexeme a)
forall a b. (a -> b) -> a -> b
$ a -> Alex a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (a -> Alex a) -> (Text -> a) -> Text -> Alex a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> a
f

type Lexeme a = (Pos, Pos, a)

tokenM ::
  (T.Text -> Alex a) ->
  (Pos, Char, BS.ByteString, Int64) ->
  Int64 ->
  Alex (Lexeme a)
tokenM :: (Text -> Alex a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenM Text -> Alex a
f = ((Loc, Text) -> Alex a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
forall a.
((Loc, Text) -> Alex a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenPosM (Text -> Alex a
f (Text -> Alex a) -> ((Loc, Text) -> Text) -> (Loc, Text) -> Alex a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Loc, Text) -> Text
forall a b. (a, b) -> b
snd)

tokenPosM ::
  ((Loc, T.Text) -> Alex a) ->
  (Pos, Char, BS.ByteString, Int64) ->
  Int64 ->
  Alex (Lexeme a)
tokenPosM :: ((Loc, Text) -> Alex a)
-> (Pos, Char, ByteString, Int64) -> Int64 -> Alex (Lexeme a)
tokenPosM (Loc, Text) -> Alex a
f (Pos
pos, Char
_, ByteString
s, Int64
_) Int64
len = do
  a
x <- (Loc, Text) -> Alex a
f (Pos -> Pos -> Loc
Loc Pos
pos Pos
pos', ByteString -> Text
T.decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BS.toStrict ByteString
s')
  Lexeme a -> Alex (Lexeme a)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Pos
pos, Pos
pos', a
x)
  where
    pos' :: Pos
pos' = Pos -> ByteString -> Pos
advance Pos
pos ByteString
s'
    s' :: ByteString
s' = Int64 -> ByteString -> ByteString
BS.take Int64
len ByteString
s

advance :: Pos -> BS.ByteString -> Pos
advance :: Pos -> ByteString -> Pos
advance Pos
orig_pos = (Pos -> Word8 -> Pos) -> Pos -> [Word8] -> Pos
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Pos -> Word8 -> Pos
advance' Pos
orig_pos ([Word8] -> Pos) -> (ByteString -> [Word8]) -> ByteString -> Pos
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> [Word8]
forall a. [a] -> [a]
init ([Word8] -> [Word8])
-> (ByteString -> [Word8]) -> ByteString -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack
  where
    advance' :: Pos -> Word8 -> Pos
advance' (Pos String
f !Int
line !Int
col !Int
addr) Word8
c
      | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
nl = String -> Int -> Int -> Int -> Pos
Pos String
f (Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
1 (Int
addr Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      | Bool
otherwise = String -> Int -> Int -> Int -> Pos
Pos String
f Int
line (Int
col Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
addr Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    nl :: Word8
nl = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
'\n'

symbol :: [Name] -> Name -> Token
symbol :: [Name] -> Name -> Token
symbol [] Name
q
  | Name -> Text
nameToText Name
q Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"*" = Token
ASTERISK
  | Name -> Text
nameToText Name
q Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"-" = Token
NEGATE
  | Name -> Text
nameToText Name
q Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"<" = Token
LTH
  | Name -> Text
nameToText Name
q Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"^" = Token
HAT
  | Name -> Text
nameToText Name
q Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"|" = Token
PIPE
  | Bool
otherwise = BinOp -> [Name] -> Name -> Token
SYMBOL (Name -> BinOp
leadingOperator Name
q) [] Name
q
symbol [Name]
qs Name
q = BinOp -> [Name] -> Name -> Token
SYMBOL (Name -> BinOp
leadingOperator Name
q) [Name]
qs Name
q

romanNumerals :: Integral a => [(T.Text, a)]
romanNumerals :: [(Text, a)]
romanNumerals =
  [(Text, a)] -> [(Text, a)]
forall a. [a] -> [a]
reverse
    [ (Text
"I", a
1),
      (Text
"IV", a
4),
      (Text
"V", a
5),
      (Text
"IX", a
9),
      (Text
"X", a
10),
      (Text
"XL", a
40),
      (Text
"L", a
50),
      (Text
"XC", a
90),
      (Text
"C", a
100),
      (Text
"CD", a
400),
      (Text
"D", a
500),
      (Text
"CM", a
900),
      (Text
"M", a
1000)
    ]

fromRoman :: Integral a => T.Text -> a
fromRoman :: Text -> a
fromRoman Text
s =
  case ((Text, a) -> Bool) -> [(Text, a)] -> Maybe (Text, a)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((Text -> Text -> Bool
`T.isPrefixOf` Text
s) (Text -> Bool) -> ((Text, a) -> Text) -> (Text, a) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, a) -> Text
forall a b. (a, b) -> a
fst) [(Text, a)]
forall a. Integral a => [(Text, a)]
romanNumerals of
    Maybe (Text, a)
Nothing -> a
0
    Just (Text
d, a
n) -> a
n a -> a -> a
forall a. Num a => a -> a -> a
+ Text -> a
forall a. Integral a => Text -> a
fromRoman (Int -> Text -> Text
T.drop (Text -> Int
T.length Text
d) Text
s)

readHexRealLit :: RealFloat a => T.Text -> Alex a
readHexRealLit :: Text -> Alex a
readHexRealLit Text
s =
  let num :: Text
num = Int -> Text -> Text
T.drop Int
2 Text
s
   in -- extract number into integer, fractional and (optional) exponent
      let comps :: [Text]
comps = (Char -> Bool) -> Text -> [Text]
T.split (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'.', Char
'p', Char
'P']) Text
num
       in case [Text]
comps of
            [Text
i, Text
f, Text
p] ->
              let runTextReader :: (a -> Either a (Integer, b)) -> a -> c
runTextReader a -> Either a (Integer, b)
r = Integer -> c
forall a. Num a => Integer -> a
fromInteger (Integer -> c) -> (a -> Integer) -> a -> c
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Integer, b) -> Integer
forall a b. (a, b) -> a
fst ((Integer, b) -> Integer) -> (a -> (Integer, b)) -> a -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Integer, b) -> Either a (Integer, b) -> (Integer, b)
forall b a. b -> Either a b -> b
fromRight (String -> (Integer, b)
forall a. HasCallStack => String -> a
error String
"internal error") (Either a (Integer, b) -> (Integer, b))
-> (a -> Either a (Integer, b)) -> a -> (Integer, b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Either a (Integer, b)
r
                  intPart :: a
intPart = (Text -> Either String (Integer, Text)) -> Text -> a
forall c a a b. Num c => (a -> Either a (Integer, b)) -> a -> c
runTextReader Text -> Either String (Integer, Text)
forall a. Integral a => Reader a
T.hexadecimal Text
i
                  fracPart :: a
fracPart = (Text -> Either String (Integer, Text)) -> Text -> a
forall c a a b. Num c => (a -> Either a (Integer, b)) -> a -> c
runTextReader Text -> Either String (Integer, Text)
forall a. Integral a => Reader a
T.hexadecimal Text
f
                  exponent :: a
exponent = (Text -> Either String (Integer, Text)) -> Text -> a
forall c a a b. Num c => (a -> Either a (Integer, b)) -> a -> c
runTextReader ((Text -> Either String (Integer, Text))
-> Text -> Either String (Integer, Text)
forall a. Num a => Reader a -> Reader a
T.signed Text -> Either String (Integer, Text)
forall a. Integral a => Reader a
T.decimal) Text
p

                  fracLen :: a
fracLen = Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> a) -> Int -> a
forall a b. (a -> b) -> a -> b
$ Text -> Int
T.length Text
f
                  fracVal :: a
fracVal = a
fracPart a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
16.0 a -> a -> a
forall a. Floating a => a -> a -> a
** a
fracLen)
                  totalVal :: a
totalVal = (a
intPart a -> a -> a
forall a. Num a => a -> a -> a
+ a
fracVal) a -> a -> a
forall a. Num a => a -> a -> a
* (a
2.0 a -> a -> a
forall a. Floating a => a -> a -> a
** a
exponent)
               in a -> Alex a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
totalVal
            [Text]
_ -> String -> Alex a
forall a. HasCallStack => String -> a
error String
"bad hex real literal"