module Text.Read.Lex
  
  ( Lexeme(..)  
  
  , lex         
  , hsLex       
  , lexChar     
  , readIntP    
  , readOctP    
  , readDecP    
  , readHexP    
  )
 where
import Text.ParserCombinators.ReadP
#ifdef __GLASGOW_HASKELL__
import GHC.Base
import GHC.Num( Num(..), Integer )
import GHC.Show( Show(..) )
#ifndef __HADDOCK__
import  GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
#endif
import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, 
                 toInteger, (^), (^^), infinity, notANumber )
import GHC.List
import GHC.Enum( maxBound )
#else
import Prelude hiding ( lex )
import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
import Data.Ratio( Ratio, (%) )
#endif
#ifdef __HUGS__
import Hugs.Prelude( Ratio(..) )
#endif
import Data.Maybe
import Control.Monad
data Lexeme
  = Char   Char         
  | String String       
  | Punc   String       
  | Ident  String       
  | Symbol String       
  | Int Integer         
  | Rat Rational        
  | EOF
 deriving (Eq, Show)
lex :: ReadP Lexeme
lex = skipSpaces >> lexToken
hsLex :: ReadP String
hsLex = do skipSpaces 
           (s,_) <- gather lexToken
           return s
lexToken :: ReadP Lexeme
lexToken = lexEOF     +++
           lexLitChar +++ 
           lexString  +++ 
           lexPunc    +++ 
           lexSymbol  +++ 
           lexId      +++ 
           lexNumber
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
            guard (null s)
            return EOF
lexPunc :: ReadP Lexeme
lexPunc =
  do c <- satisfy isPuncChar
     return (Punc [c])
 where
  isPuncChar c = c `elem` ",;()[]{}`"
lexSymbol :: ReadP Lexeme
lexSymbol =
  do s <- munch1 isSymbolChar
     if s `elem` reserved_ops then 
        return (Punc s)         
      else
        return (Symbol s)
 where
  isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
  reserved_ops   = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
lexId :: ReadP Lexeme
lexId = lex_nan <++ lex_id
  where
        
        
    lex_nan = (string "NaN"      >> return (Rat notANumber)) +++
              (string "Infinity" >> return (Rat infinity))
  
    lex_id = do c <- satisfy isIdsChar
                s <- munch isIdfChar
                return (Ident (c:s))
          
    isIdsChar c = isAlpha c || c == '_'
    isIdfChar c = isAlphaNum c || c `elem` "_'"
#ifndef __GLASGOW_HASKELL__
infinity, notANumber :: Rational
infinity   = 1 :% 0
notANumber = 0 :% 0
#endif
lexLitChar :: ReadP Lexeme
lexLitChar =
  do char '\''
     (c,esc) <- lexCharE
     guard (esc || c /= '\'')   
     char '\''
     return (Char c)
lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; return c }
lexCharE :: ReadP (Char, Bool)  
lexCharE =
  do c1 <- get
     if c1 == '\\'
       then do c2 <- lexEsc; return (c2, True)
       else do return (c1, False)
 where 
  lexEsc =
    lexEscChar
      +++ lexNumeric
        +++ lexCntrlChar
          +++ lexAscii
  
  lexEscChar =
    do c <- get
       case c of
         'a'  -> return '\a'
         'b'  -> return '\b'
         'f'  -> return '\f'
         'n'  -> return '\n'
         'r'  -> return '\r'
         't'  -> return '\t'
         'v'  -> return '\v'
         '\\' -> return '\\'
         '\"' -> return '\"'
         '\'' -> return '\''
         _    -> pfail
  
  lexNumeric =
    do base <- lexBaseChar <++ return 10
       n    <- lexInteger base
       guard (n <= toInteger (ord maxBound))
       return (chr (fromInteger n))
  lexCntrlChar =
    do char '^'
       c <- get
       case c of
         '@'  -> return '\^@'
         'A'  -> return '\^A'
         'B'  -> return '\^B'
         'C'  -> return '\^C'
         'D'  -> return '\^D'
         'E'  -> return '\^E'
         'F'  -> return '\^F'
         'G'  -> return '\^G'
         'H'  -> return '\^H'
         'I'  -> return '\^I'
         'J'  -> return '\^J'
         'K'  -> return '\^K'
         'L'  -> return '\^L'
         'M'  -> return '\^M'
         'N'  -> return '\^N'
         'O'  -> return '\^O'
         'P'  -> return '\^P'
         'Q'  -> return '\^Q'
         'R'  -> return '\^R'
         'S'  -> return '\^S'
         'T'  -> return '\^T'
         'U'  -> return '\^U'
         'V'  -> return '\^V'
         'W'  -> return '\^W'
         'X'  -> return '\^X'
         'Y'  -> return '\^Y'
         'Z'  -> return '\^Z'
         '['  -> return '\^['
         '\\' -> return '\^\'
         ']'  -> return '\^]'
         '^'  -> return '\^^'
         '_'  -> return '\^_'
         _    -> pfail
  lexAscii =
    do choice
         [ (string "SOH" >> return '\SOH') <++
           (string "SO"  >> return '\SO') 
                
                
         , string "NUL" >> return '\NUL'
         , string "STX" >> return '\STX'
         , string "ETX" >> return '\ETX'
         , string "EOT" >> return '\EOT'
         , string "ENQ" >> return '\ENQ'
         , string "ACK" >> return '\ACK'
         , string "BEL" >> return '\BEL'
         , string "BS"  >> return '\BS'
         , string "HT"  >> return '\HT'
         , string "LF"  >> return '\LF'
         , string "VT"  >> return '\VT'
         , string "FF"  >> return '\FF'
         , string "CR"  >> return '\CR'
         , string "SI"  >> return '\SI'
         , string "DLE" >> return '\DLE'
         , string "DC1" >> return '\DC1'
         , string "DC2" >> return '\DC2'
         , string "DC3" >> return '\DC3'
         , string "DC4" >> return '\DC4'
         , string "NAK" >> return '\NAK'
         , string "SYN" >> return '\SYN'
         , string "ETB" >> return '\ETB'
         , string "CAN" >> return '\CAN'
         , string "EM"  >> return '\EM'
         , string "SUB" >> return '\SUB'
         , string "ESC" >> return '\ESC'
         , string "FS"  >> return '\FS'
         , string "GS"  >> return '\GS'
         , string "RS"  >> return '\RS'
         , string "US"  >> return '\US'
         , string "SP"  >> return '\SP'
         , string "DEL" >> return '\DEL'
         ]
lexString :: ReadP Lexeme
lexString =
  do char '"'
     body id
 where
  body f =
    do (c,esc) <- lexStrItem
       if c /= '"' || esc
         then body (f.(c:))
         else let s = f "" in
              return (String s)
  lexStrItem = (lexEmpty >> lexStrItem)
               +++ lexCharE
  
  lexEmpty =
    do char '\\'
       c <- get
       case c of
         '&'           -> do return ()
         _ | isSpace c -> do skipSpaces; char '\\'; return ()
         _             -> do pfail
type Base   = Int
type Digits = [Int]
lexNumber :: ReadP Lexeme
lexNumber 
  = lexHexOct  <++      
                        
    lexDecNumber        
                
lexHexOct :: ReadP Lexeme
lexHexOct
  = do  char '0'
        base <- lexBaseChar
        digits <- lexDigits base
        return (Int (val (fromIntegral base) 0 digits))
lexBaseChar :: ReadP Int
lexBaseChar = do { c <- get;
                   case c of
                        'o' -> return 8
                        'O' -> return 8
                        'x' -> return 16
                        'X' -> return 16
                        _   -> pfail } 
lexDecNumber :: ReadP Lexeme
lexDecNumber =
  do xs    <- lexDigits 10
     mFrac <- lexFrac <++ return Nothing
     mExp  <- lexExp  <++ return Nothing
     return (value xs mFrac mExp)
 where
  value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
  
  valueFracExp :: Integer -> Maybe Digits -> Maybe Integer 
               -> Lexeme
  valueFracExp a Nothing Nothing        
    = Int a                                             
  valueFracExp a Nothing (Just exp)
    | exp >= 0  = Int (a * (10 ^ exp))                  
    | otherwise = Rat (valExp (fromInteger a) exp)      
  valueFracExp a (Just fs) mExp 
     = case mExp of
         Nothing  -> Rat rat                            
         Just exp -> Rat (valExp rat exp)               
     where
        rat :: Rational
        rat = fromInteger a + frac 10 0 1 fs
  valExp :: Rational -> Integer -> Rational
  valExp rat exp = rat * (10 ^^ exp)
lexFrac :: ReadP (Maybe Digits)
lexFrac = do char '.'
             fraction <- lexDigits 10
             return (Just fraction)
lexExp :: ReadP (Maybe Integer)
lexExp = do char 'e' +++ char 'E'
            exp <- signedExp +++ lexInteger 10
            return (Just exp)
 where
   signedExp 
     = do c <- char '-' +++ char '+'
          n <- lexInteger 10
          return (if c == '-' then n else n)
lexDigits :: Int -> ReadP Digits
lexDigits base =
  do s  <- look
     xs <- scan s id
     guard (not (null xs))
     return xs
 where
  scan (c:cs) f = case valDig base c of
                    Just n  -> do get; scan cs (f.(n:))
                    Nothing -> do return (f [])
  scan []     f = do return (f [])
lexInteger :: Base -> ReadP Integer
lexInteger base =
  do xs <- lexDigits base
     return (val (fromIntegral base) 0 xs)
val :: Num a => a -> a -> Digits -> a
val _    y []     = y
val base y (x:xs) = y' `seq` val base y' xs
 where
  y' = y * base + fromIntegral x
frac :: Integral a => a -> a -> a -> Digits -> Ratio a
frac _    a b []     = a % b
frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
 where
  a' = a * base + fromIntegral x
  b' = b * base
valDig :: Num a => a -> Char -> Maybe Int
valDig 8 c
  | '0' <= c && c <= '7' = Just (ord c  ord '0')
  | otherwise            = Nothing
valDig 10 c = valDecDig c
valDig 16 c
  | '0' <= c && c <= '9' = Just (ord c  ord '0')
  | 'a' <= c && c <= 'f' = Just (ord c  ord 'a' + 10)
  | 'A' <= c && c <= 'F' = Just (ord c  ord 'A' + 10)
  | otherwise            = Nothing
valDig _ _ = error "valDig: Bad base"
valDecDig :: Char -> Maybe Int
valDecDig c
  | '0' <= c && c <= '9' = Just (ord c  ord '0')
  | otherwise            = Nothing
readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
readIntP base isDigit valDigit =
  do s <- munch1 isDigit
     return (val base 0 (map valDigit s))
readIntP' :: Num a => a -> ReadP a
readIntP' base = readIntP base isDigit valDigit
 where
  isDigit  c = maybe False (const True) (valDig base c)
  valDigit c = maybe 0     id           (valDig base c)
readOctP, readDecP, readHexP :: Num a => ReadP a
readOctP = readIntP' 8
readDecP = readIntP' 10
readHexP = readIntP' 16