module Hpp.Tokens where
import Data.Char (isAlphaNum, isDigit, isSpace)
data Token = Important String
| Other String
deriving (Eq,Ord,Show)
detok :: Token -> String
detok (Important s) = s
detok (Other s) = s
isImportant :: Token -> Bool
isImportant (Important _) = True
isImportant _ = False
importants :: [Token] -> [String]
importants = map detok . filter isImportant
tokWords :: String -> [Token]
tokWords [] = []
tokWords (c:cs)
| isSpace c = let (spaces,rst) = break (not . isSpace) cs
in Other (c : spaces) : tokWords rst
| c == '\'' && isCharLit = goCharLit
| c == '"' = flip skipLiteral cs $ \str rst ->
Important (str []) : tokWords rst
| otherwise = let (chars,rst) = break (not . validIdentifierChar) cs
in Important (c:chars) : tokWords rst
where (isCharLit, goCharLit) =
case cs of
(c':'\'':cs') -> (True, Important ['\'',c','\''] : tokWords cs')
_ -> (False, [])
skipLiteral :: ((String -> String) -> String -> r) -> String -> r
skipLiteral k = go ('"':)
where go acc ('\\':'\\':cs) = go (acc . ("\\\\"++)) cs
go acc ('\\':'"':cs) = go (acc . ("\\\""++)) cs
go acc ('"':cs) = k (acc . ('"':)) cs
go acc (c:cs) = go (acc . (c :)) cs
go acc [] = k acc []
splits :: (Char -> Bool) -> String -> [String]
splits isDelim = filter (not . null) . go . dropWhile isSpace
where go s = case break isDelim s of
(h,[]) -> [dropWhile isSpace h]
(h,d:t) -> dropWhile isSpace h : [d] : go t
validIdentifierChar :: Char -> Bool
validIdentifierChar c = isAlphaNum c || c == '_' || c == '\''
fixExponents :: [Token] -> [Token]
fixExponents [] = []
fixExponents (Important (t1@(d1:_)):Important [c]:Important (d2:t2):ts)
| elem c "-+" && isDigit d1 && elem (last t1) "eE" && isAlphaNum d2 =
Important (t1++c:d2:t2) : fixExponents ts
fixExponents (t:ts) = t : fixExponents ts
tokenize :: String -> [Token]
tokenize = fixExponents . concatMap seps . tokWords
where seps t@(Other _) = [t]
seps t@(Important ('"':_)) = [t]
seps t@(Important ('\'':_)) = [t]
seps (Important s) = map Important $
splits (not . validIdentifierChar) s
detokenize :: [Token] -> String
detokenize = concatMap detok