{-# LANGUAGE BangPatterns, CPP, OverloadedStrings, ViewPatterns #-}
-- | Tokenization breaks a 'String' into pieces of whitespace,
-- constants, symbols, and identifiers.
module Hpp.Tokens (Token(..), detok, isImportant, notImportant, importants,
                   trimUnimportant, detokenize, tokenize, newLine,
                   skipLiteral) where
import Control.Arrow (first, second)
import Data.Char (isAlphaNum, isDigit, isSpace, isOctDigit, isHexDigit, digitToInt)
import Data.Foldable (foldl')
#if __GLASGOW_HASKELL__ < 808
import Data.Monoid ((<>))
#endif
import Data.String (IsString, fromString)
import Hpp.StringSig

-- | Tokenization is 'words' except the white space is tagged rather
-- than discarded.
data Token s = Important s
             -- ^ Identifiers, symbols, and constants
             | Other s
             -- ^ White space, etc.
               deriving (Token s -> Token s -> Bool
(Token s -> Token s -> Bool)
-> (Token s -> Token s -> Bool) -> Eq (Token s)
forall s. Eq s => Token s -> Token s -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token s -> Token s -> Bool
$c/= :: forall s. Eq s => Token s -> Token s -> Bool
== :: Token s -> Token s -> Bool
$c== :: forall s. Eq s => Token s -> Token s -> Bool
Eq,Eq (Token s)
Eq (Token s)
-> (Token s -> Token s -> Ordering)
-> (Token s -> Token s -> Bool)
-> (Token s -> Token s -> Bool)
-> (Token s -> Token s -> Bool)
-> (Token s -> Token s -> Bool)
-> (Token s -> Token s -> Token s)
-> (Token s -> Token s -> Token s)
-> Ord (Token s)
Token s -> Token s -> Bool
Token s -> Token s -> Ordering
Token s -> Token s -> Token s
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall s. Ord s => Eq (Token s)
forall s. Ord s => Token s -> Token s -> Bool
forall s. Ord s => Token s -> Token s -> Ordering
forall s. Ord s => Token s -> Token s -> Token s
min :: Token s -> Token s -> Token s
$cmin :: forall s. Ord s => Token s -> Token s -> Token s
max :: Token s -> Token s -> Token s
$cmax :: forall s. Ord s => Token s -> Token s -> Token s
>= :: Token s -> Token s -> Bool
$c>= :: forall s. Ord s => Token s -> Token s -> Bool
> :: Token s -> Token s -> Bool
$c> :: forall s. Ord s => Token s -> Token s -> Bool
<= :: Token s -> Token s -> Bool
$c<= :: forall s. Ord s => Token s -> Token s -> Bool
< :: Token s -> Token s -> Bool
$c< :: forall s. Ord s => Token s -> Token s -> Bool
compare :: Token s -> Token s -> Ordering
$ccompare :: forall s. Ord s => Token s -> Token s -> Ordering
$cp1Ord :: forall s. Ord s => Eq (Token s)
Ord,Int -> Token s -> ShowS
[Token s] -> ShowS
Token s -> String
(Int -> Token s -> ShowS)
-> (Token s -> String) -> ([Token s] -> ShowS) -> Show (Token s)
forall s. Show s => Int -> Token s -> ShowS
forall s. Show s => [Token s] -> ShowS
forall s. Show s => Token s -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token s] -> ShowS
$cshowList :: forall s. Show s => [Token s] -> ShowS
show :: Token s -> String
$cshow :: forall s. Show s => Token s -> String
showsPrec :: Int -> Token s -> ShowS
$cshowsPrec :: forall s. Show s => Int -> Token s -> ShowS
Show)

instance Functor Token where
  fmap :: (a -> b) -> Token a -> Token b
fmap a -> b
f (Important a
s) = b -> Token b
forall s. s -> Token s
Important (a -> b
f a
s)
  fmap a -> b
f (Other a
s) = b -> Token b
forall s. s -> Token s
Other (a -> b
f a
s)
  {-# INLINE fmap #-}

-- | Extract the contents of a 'Token'.
detok :: Token s -> s
detok :: Token s -> s
detok (Important s
s) = s
s
detok (Other s
s) = s
s
{-# INLINE detok #-}

-- | 'True' if the given 'Token' is 'Important'; 'False' otherwise.
isImportant :: Token s -> Bool
isImportant :: Token s -> Bool
isImportant (Important s
_) = Bool
True
isImportant Token s
_ = Bool
False

-- | 'True' if the given 'Token' is /not/ 'Important'; 'False'
-- otherwise.
notImportant :: Token s -> Bool
notImportant :: Token s -> Bool
notImportant (Other s
_) = Bool
True
notImportant Token s
_ = Bool
False

-- | Return the contents of only 'Important' (non-space) tokens.
importants :: [Token s] -> [s]
importants :: [Token s] -> [s]
importants = (Token s -> s) -> [Token s] -> [s]
forall a b. (a -> b) -> [a] -> [b]
map Token s -> s
forall s. Token s -> s
detok ([Token s] -> [s]) -> ([Token s] -> [Token s]) -> [Token s] -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token s -> Bool) -> [Token s] -> [Token s]
forall a. (a -> Bool) -> [a] -> [a]
filter Token s -> Bool
forall s. Token s -> Bool
isImportant

-- | Trim 'Other' 'Token's from both ends of a list of 'Token's.
trimUnimportant :: [Token s] -> [Token s]
trimUnimportant :: [Token s] -> [Token s]
trimUnimportant = ([Token s] -> [Token s]) -> [Token s] -> [Token s]
forall s. ([Token s] -> [Token s]) -> [Token s] -> [Token s]
aux [Token s] -> [Token s]
forall a. a -> a
id ([Token s] -> [Token s])
-> ([Token s] -> [Token s]) -> [Token s] -> [Token s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token s -> Bool) -> [Token s] -> [Token s]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Bool -> Bool
not (Bool -> Bool) -> (Token s -> Bool) -> Token s -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Token s -> Bool
forall s. Token s -> Bool
isImportant)
  where aux :: ([Token s] -> [Token s]) -> [Token s] -> [Token s]
aux [Token s] -> [Token s]
_ [] = []
        aux [Token s] -> [Token s]
acc (t :: Token s
t@(Important s
_) : [Token s]
ts) = [Token s] -> [Token s]
acc (Token s
t Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: ([Token s] -> [Token s]) -> [Token s] -> [Token s]
aux [Token s] -> [Token s]
forall a. a -> a
id [Token s]
ts)
        aux [Token s] -> [Token s]
acc (t :: Token s
t@(Other s
_) : [Token s]
ts) = ([Token s] -> [Token s]) -> [Token s] -> [Token s]
aux ([Token s] -> [Token s]
acc ([Token s] -> [Token s])
-> ([Token s] -> [Token s]) -> [Token s] -> [Token s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token s
tToken s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
:)) [Token s]
ts

-- | Is a 'Token' a newline character?
newLine :: (Eq s, IsString s) => Token s -> Bool
newLine :: Token s -> Bool
newLine (Other s
s) = s
s s -> s -> Bool
forall a. Eq a => a -> a -> Bool
== s
"\n"
newLine Token s
_ = Bool
False

maybeImp :: Stringy s => s -> [Token s]
maybeImp :: s -> [Token s]
maybeImp s
s = if s -> Bool
forall s. Stringy s => s -> Bool
isEmpty s
s then [] else [s -> Token s
forall s. s -> Token s
Important s
s]

digitsFromBase :: Stringy s => Int -> s -> s
digitsFromBase :: Int -> s -> s
digitsFromBase Int
base = String -> s
forall a. IsString a => String -> a
fromString (String -> s) -> (s -> String) -> s -> s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show (Int -> String) -> (s -> Int) -> s -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Int) -> Int -> [Int] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Int -> Int -> Int
aux Int
0 ([Int] -> Int) -> (s -> [Int]) -> s -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Int
digitToInt (String -> [Int]) -> (s -> String) -> s -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> String
forall s. Stringy s => s -> String
toChars
  where aux :: Int -> Int -> Int
aux Int
acc Int
d = Int
base Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d

escapeChar :: Stringy s => Char -> Maybe s
escapeChar :: Char -> Maybe s
escapeChar = (String -> s) -> Maybe String -> Maybe s
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> s
forall a. IsString a => String -> a
fromString (Maybe String -> Maybe s)
-> (Char -> Maybe String) -> Char -> Maybe s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> [(Char, String)] -> Maybe String)
-> [(Char, String)] -> Char -> Maybe String
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> [(Char, String)] -> Maybe String
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(Char, String)]
lut
  where lut :: [(Char, String)]
lut = ((Char, Int) -> (Char, String))
-> [(Char, Int)] -> [(Char, String)]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> String) -> (Char, Int) -> (Char, String)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Int -> String
forall a. Show a => a -> String
show :: Int -> String))
                  [ (Char
'a', Int
0x07), (Char
'b', Int
0x08), (Char
'f', Int
0x0C), (Char
'n', Int
0x0A)
                  , (Char
'r', Int
0x0D), (Char
't', Int
0x09), (Char
'v', Int
0x0B), (Char
'\\', Int
0x5C)
                  , (Char
'\'', Int
0x27), (Char
'"', Int
0x22), (Char
'?', Int
0x3F) ]

data TokChar = TokSpace Char | TokQuote | TokDQuote

-- | Break a 'String' into space and non-whitespace runs.
tokWords :: Stringy s => s -> [Token s]
tokWords :: s -> [Token s]
tokWords s
s =
  case (Char -> Maybe TokChar) -> s -> Maybe (TokChar, s, s)
forall s t. Stringy s => (Char -> Maybe t) -> s -> Maybe (t, s, s)
sbreak Char -> Maybe TokChar
aux s
s of
     -- No word breaks
     Maybe (TokChar, s, s)
Nothing -> [s -> Token s
forall s. s -> Token s
Important s
s]

     -- Word delimited by space
     Just (TokSpace Char
c, s
pre, s
pos) ->
       case (Char -> Maybe Char) -> s -> Maybe (Char, s, s)
forall s t. Stringy s => (Char -> Maybe t) -> s -> Maybe (t, s, s)
sbreak ((Char -> Bool) -> Char -> Maybe Char
forall a. (a -> Bool) -> a -> Maybe a
predicateJust (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)) s
pos of
         Maybe (Char, s, s)
Nothing -> s -> [Token s]
forall s. Stringy s => s -> [Token s]
maybeImp s
pre [Token s] -> [Token s] -> [Token s]
forall a. [a] -> [a] -> [a]
++ [s -> Token s
forall s. s -> Token s
Other (Char -> s -> s
forall s. Stringy s => Char -> s -> s
cons Char
c s
pos)]
         Just (Char
c', s
spaces, s
pos') ->
           s -> [Token s]
forall s. Stringy s => s -> [Token s]
maybeImp s
pre [Token s] -> [Token s] -> [Token s]
forall a. [a] -> [a] -> [a]
++
           s -> Token s
forall s. s -> Token s
Other (Char -> s -> s
forall s. Stringy s => Char -> s -> s
cons Char
c s
spaces) Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords (Char -> s -> s
forall s. Stringy s => Char -> s -> s
cons Char
c' s
pos')

     -- Possible character literal
     Just (TokChar
TokQuote, s
pre, s
pos) ->
       let pre' :: s
pre' = s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
'\''
       in case s
pos of
            Char
'\\' :. s
cs ->
              case (Char -> Maybe ()) -> s -> Maybe ((), s, s)
forall s t. Stringy s => (Char -> Maybe t) -> s -> Maybe (t, s, s)
sbreak (Bool -> Maybe ()
boolJust (Bool -> Maybe ()) -> (Char -> Bool) -> Char -> Maybe ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'')) s
cs of
                Maybe ((), s, s)
Nothing -> [s -> Token s
forall s. s -> Token s
Important (s
pre' s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
pos)]

                Just (()
_,s
esc,s
pos')
                  | s -> Bool
forall s. Stringy s => s -> Bool
isEmpty s
esc ->
                    case (Char -> Maybe ()) -> s -> Maybe ((), s, s)
forall s t. Stringy s => (Char -> Maybe t) -> s -> Maybe (t, s, s)
sbreak (Bool -> Maybe ()
boolJust (Bool -> Maybe ()) -> (Char -> Bool) -> Char -> Maybe ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'')) s
pos' of
                      Just (()
_,s
esc', s
pos'')
                        | s -> Bool
forall s. Stringy s => s -> Bool
isEmpty s
esc' ->
                          s -> Token s
forall s. s -> Token s
Important s
pre Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: s -> Token s
forall s. s -> Token s
Important (s
"'\\\''") Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords s
pos''
                          -- Important (fromJust $ escapeChar '\'') : tokWords pos''
                      Maybe ((), s, s)
_ -> [s -> Token s
forall s. s -> Token s
Important (s
pre' s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
pos)]
                  | Bool
otherwise ->
                  let esc' :: Token s
esc' = if (Char -> Bool) -> s -> Bool
forall s. Stringy s => (Char -> Bool) -> s -> Bool
sall Char -> Bool
isOctDigit s
esc
                             then s -> Token s
forall s. s -> Token s
Important (Int -> s -> s
forall s. Stringy s => Int -> s -> s
digitsFromBase Int
8 s
esc)
                             else case s
esc of
                                    Char
'x' :. s
hs
                                      | (Char -> Bool) -> s -> Bool
forall s. Stringy s => (Char -> Bool) -> s -> Bool
sall Char -> Bool
isHexDigit s
hs ->
                                      s -> Token s
forall s. s -> Token s
Important (Int -> s -> s
forall s. Stringy s => Int -> s -> s
digitsFromBase Int
16 s
hs)
                                    (Char -> Maybe s
forall s. Stringy s => Char -> Maybe s
escapeChar -> Just s
e) :. s
Nil -> s -> Token s
forall s. s -> Token s
Important s
e
                                    s
_ -> s -> Token s
forall s. s -> Token s
Important (s
"'\\" s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
esc Char
'\'')
                  in  s -> [Token s]
forall s. Stringy s => s -> [Token s]
maybeImp s
pre [Token s] -> [Token s] -> [Token s]
forall a. [a] -> [a] -> [a]
++ Token s
esc' Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords s
pos'
            Char
c:.(Char
'\'':.s
cs) -> s -> [Token s]
forall s. Stringy s => s -> [Token s]
maybeImp s
pre
                                [Token s] -> [Token s] -> [Token s]
forall a. [a] -> [a] -> [a]
++ s -> Token s
forall s. s -> Token s
Important (String -> s
forall a. IsString a => String -> a
fromString [Char
'\'', Char
c, Char
'\''])
                                Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords s
cs
            Char
_:.s
_ -> let oops :: s
oops = s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
'\''
                    in case s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords s
pos of
                         (Important s
t:[Token s]
ts) -> s -> Token s
forall s. s -> Token s
Important (s
oopss -> s -> s
forall a. Semigroup a => a -> a -> a
<>s
t) Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: [Token s]
ts
                         [Token s]
ts -> s -> Token s
forall s. s -> Token s
Important s
oops Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: [Token s]
ts
            s
_ -> [s -> Token s
forall s. s -> Token s
Important (s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
'\'')]

     -- String literal
     Just (TokChar
TokDQuote, s
pre, s
pos) ->
       let (s
lit,s
pos') = s -> (s, s)
forall s. Stringy s => s -> (s, s)
skipLiteral s
pos
       in (if s -> Bool
forall s. Stringy s => s -> Bool
isEmpty s
pre then [] else [s -> Token s
forall s. s -> Token s
Important s
pre])
          [Token s] -> [Token s] -> [Token s]
forall a. [a] -> [a] -> [a]
++ s -> Token s
forall s. s -> Token s
Important (Char -> s -> s
forall s. Stringy s => Char -> s -> s
cons Char
'"' s
lit) Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords s
pos'
  where aux :: Char -> Maybe TokChar
aux Char
c | Char -> Bool
isSpace Char
c = TokChar -> Maybe TokChar
forall a. a -> Maybe a
Just (Char -> TokChar
TokSpace Char
c)
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' = TokChar -> Maybe TokChar
forall a. a -> Maybe a
Just TokChar
TokQuote
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"' = TokChar -> Maybe TokChar
forall a. a -> Maybe a
Just TokChar
TokDQuote
              | Bool
otherwise = Maybe TokChar
forall a. Maybe a
Nothing
        {-# INLINE aux #-}
{-# INLINABLE tokWords #-}

data LitStringChar = DBackSlash | EscapedDQuote | DQuote

-- | Skip over a string or character literal returning the literal and
-- the remaining the input.
skipLiteral :: Stringy s => s -> (s,s)
skipLiteral :: s -> (s, s)
skipLiteral s
s =
  case [(s, LitStringChar)] -> s -> Maybe (LitStringChar, s, s)
forall s t. Stringy s => [(s, t)] -> s -> Maybe (t, s, s)
breakOn [(s
"\\\\", LitStringChar
DBackSlash), (s
"\\\"", LitStringChar
EscapedDQuote), (s
"\"", LitStringChar
DQuote)] s
s of
    Maybe (LitStringChar, s, s)
Nothing -> (s
s, s
forall a. Monoid a => a
mempty) -- Unmatched double quote?!
    Just (LitStringChar
DBackSlash, s
pre, s
pos) -> (s -> s) -> (s, s) -> (s, s)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((s
pre s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
"\\\\") s -> s -> s
forall a. Semigroup a => a -> a -> a
<>) (s -> (s, s)
forall s. Stringy s => s -> (s, s)
skipLiteral s
pos)
    Just (LitStringChar
EscapedDQuote, s
pre, s
pos) -> (s -> s) -> (s, s) -> (s, s)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((s
pre s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
"\\\"") s -> s -> s
forall a. Semigroup a => a -> a -> a
<>) (s -> (s, s)
forall s. Stringy s => s -> (s, s)
skipLiteral s
pos)
    Just (LitStringChar
DQuote, s
pre, s
pos) -> (s -> Char -> s
forall s. Stringy s => s -> Char -> s
snoc s
pre Char
'"', s
pos)
{-# INLINABLE skipLiteral #-}

-- | @splits isDelimiter str@ tokenizes @str@ using @isDelimiter@ as a
-- delimiter predicate. Leading whitespace is also stripped from
-- tokens.
splits :: Stringy s => (Char -> Bool) -> s -> [s]
splits :: (Char -> Bool) -> s -> [s]
splits Char -> Bool
isDelim = (s -> Bool) -> [s] -> [s]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (s -> Bool) -> s -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> Bool
forall s. Stringy s => s -> Bool
isEmpty) ([s] -> [s]) -> (s -> [s]) -> s -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> [s]
forall t. Stringy t => t -> [t]
go (s -> [s]) -> (s -> s) -> s -> [s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> s -> s
forall s. Stringy s => (Char -> Bool) -> s -> s
sdropWhile Char -> Bool
isSpace
  where go :: t -> [t]
go t
s = case (Char -> Maybe Char) -> t -> Maybe (Char, t, t)
forall s t. Stringy s => (Char -> Maybe t) -> s -> Maybe (t, s, s)
sbreak (\Char
c -> if Char -> Bool
isDelim Char
c then Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c else Maybe Char
forall a. Maybe a
Nothing) t
s of
                  Maybe (Char, t, t)
Nothing -> [t
s]
                  Just (Char
d, t
pre, t
pos) ->
                    t
pre t -> [t] -> [t]
forall a. a -> [a] -> [a]
: String -> t
forall a. IsString a => String -> a
fromString [Char
d] t -> [t] -> [t]
forall a. a -> [a] -> [a]
: t -> [t]
go ((Char -> Bool) -> t -> t
forall s. Stringy s => (Char -> Bool) -> s -> s
sdropWhile Char -> Bool
isSpace t
pos)
{-# INLINE splits #-}

-- | Predicate on space characters based on something approximating
-- valid identifier syntax. This is used to break apart non-space
-- characters.
validIdentifierChar :: Char -> Bool
validIdentifierChar :: Char -> Bool
validIdentifierChar Char
c = Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''

-- | Something like @12E+FOO@ is a single pre-processor token, so
-- @FOO@ should not be macro expanded.
fixExponents :: Stringy s => [Token s] -> [Token s]
fixExponents :: [Token s] -> [Token s]
fixExponents [] = []
fixExponents (t1' :: Token s
t1'@(Important s
t1) : ts :: [Token s]
ts@(Important s
t2 : Important s
t3 : [Token s]
ts')) =
  case (,,,) ((Char, s)
 -> (s, Char)
 -> (Char, s)
 -> (Char, s)
 -> ((Char, s), (s, Char), (Char, s), (Char, s)))
-> Maybe (Char, s)
-> Maybe
     ((s, Char)
      -> (Char, s)
      -> (Char, s)
      -> ((Char, s), (s, Char), (Char, s), (Char, s)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> s -> Maybe (Char, s)
forall s. Stringy s => s -> Maybe (Char, s)
uncons s
t1 Maybe
  ((s, Char)
   -> (Char, s)
   -> (Char, s)
   -> ((Char, s), (s, Char), (Char, s), (Char, s)))
-> Maybe (s, Char)
-> Maybe
     ((Char, s)
      -> (Char, s) -> ((Char, s), (s, Char), (Char, s), (Char, s)))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> s -> Maybe (s, Char)
forall s. Stringy s => s -> Maybe (s, Char)
unsnoc s
t1 Maybe
  ((Char, s)
   -> (Char, s) -> ((Char, s), (s, Char), (Char, s), (Char, s)))
-> Maybe (Char, s)
-> Maybe
     ((Char, s) -> ((Char, s), (s, Char), (Char, s), (Char, s)))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> s -> Maybe (Char, s)
forall s. Stringy s => s -> Maybe (Char, s)
uncons s
t2 Maybe ((Char, s) -> ((Char, s), (s, Char), (Char, s), (Char, s)))
-> Maybe (Char, s)
-> Maybe ((Char, s), (s, Char), (Char, s), (Char, s))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> s -> Maybe (Char, s)
forall s. Stringy s => s -> Maybe (Char, s)
uncons s
t3 of
    Just !(!(!Char
d1,s
_), !(s
_,!Char
e), !(!Char
c,!s
cs), !(!Char
d2,s
_))
      | Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
c (String
"-+" :: [Char]) Bool -> Bool -> Bool
&&
        s -> Bool
forall s. Stringy s => s -> Bool
isEmpty s
cs Bool -> Bool -> Bool
&& Char -> Bool
isDigit Char
d1 Bool -> Bool -> Bool
&& Char -> Bool
isAlphaNum Char
d2 Bool -> Bool -> Bool
&&
        Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
e (String
"eE" :: [Char]) -> let t :: s
t = s
t1 s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
t2 s -> s -> s
forall a. Semigroup a => a -> a -> a
<> s
t3
                                   in s
t s -> [Token s] -> [Token s]
`seq` s -> Token s
forall s. s -> Token s
Important s
t Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: [Token s] -> [Token s]
forall s. Stringy s => [Token s] -> [Token s]
fixExponents [Token s]
ts'
    Maybe ((Char, s), (s, Char), (Char, s), (Char, s))
_ -> Token s
t1' Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: [Token s] -> [Token s]
forall s. Stringy s => [Token s] -> [Token s]
fixExponents [Token s]
ts
fixExponents (Token s
t:[Token s]
ts) = Token s
t Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
: [Token s] -> [Token s]
forall s. Stringy s => [Token s] -> [Token s]
fixExponents [Token s]
ts
{-# INLINABLE fixExponents #-}

-- | Break an input 'String' into a sequence of 'Tokens'. Warning:
-- This may not exactly correspond to your target language's
-- definition of a valid identifier!
tokenize :: Stringy s => s -> [Token s]
tokenize :: s -> [Token s]
tokenize = [Token s] -> [Token s]
forall s. Stringy s => [Token s] -> [Token s]
fixExponents ([Token s] -> [Token s]) -> (s -> [Token s]) -> s -> [Token s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token s -> [Token s]) -> [Token s] -> [Token s]
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Token s -> [Token s]
forall s. Stringy s => Token s -> [Token s]
seps ([Token s] -> [Token s]) -> (s -> [Token s]) -> s -> [Token s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> [Token s]
forall s. Stringy s => s -> [Token s]
tokWords
  where seps :: Token s -> [Token s]
seps t :: Token s
t@(Other s
_) = [Token s
t]
        seps t :: Token s
t@(Important s
s) =
          case s -> Maybe (Char, s)
forall s. Stringy s => s -> Maybe (Char, s)
uncons s
s of
            Maybe (Char, s)
Nothing -> []
            Just (Char
c,s
_)
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"' -> [Token s
t]
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' -> [Token s
t]
              | Bool
otherwise -> (s -> Token s) -> [s] -> [Token s]
forall a b. (a -> b) -> [a] -> [b]
map s -> Token s
forall s. s -> Token s
Important ((Char -> Bool) -> s -> [s]
forall s. Stringy s => (Char -> Bool) -> s -> [s]
splits (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
validIdentifierChar) s
s)
{-# INLINABLE tokenize #-}

-- | Collapse a sequence of 'Tokens' back into a 'String'. @detokenize
-- . tokenize == id@.
detokenize :: Monoid s => [Token s] -> s
detokenize :: [Token s] -> s
detokenize = (Token s -> s) -> [Token s] -> s
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Token s -> s
forall s. Token s -> s
detok
{-# INLINE detokenize #-}