module Language.Grammars.Murder.Machine (scanHandle,scanFile,scan,getRational,getBaseNumber) where import Data.Char import Data.List import Data.Maybe import System.IO import UU.Scanner.Position import qualified Data.Set as Set import Language.Grammars.Murder.Utils import Language.Grammars.Murder.ScanUtils import Language.Grammars.Murder.Token scanHandle :: ScanOpts -> FilePath -> Handle -> IO [Token] scanHandle opts fn fh = do { txt <- hGetContents fh ; return (scan opts (initPos fn) txt) } scanFile :: ScanOpts -> FilePath -> IO [Token] scanFile opts fn = do txt <- readFile fn return (scan opts (initPos fn) txt) scan :: ScanOpts -> Pos -> String -> [Token] scan opts pos input = if scoLitmode opts then scanLitText pos input else doScan pos input where -- locatein :: Ord a => [a] -> a -> Bool -- locatein es = isJust . btLocateIn compare (tab2tree (sort es)) iskw = (`Set.member` scoKeywordsTxt opts) -- locatein (scoKeywordsTxt opts) isop = (`Set.member` scoKeywordsOps opts) -- locatein (scoKeywordsOps opts) isSymbol = (`Set.member` scoSpecChars opts) -- locatein (scoSpecChars opts) isOpsym = (`Set.member` scoOpChars opts) -- locatein (scoOpChars opts) isPairSym= (`Set.member` scoSpecPairs opts) -- locatein (scoSpecPairs opts) isIdStart c = isLower c || c == '_' isIdChar c = isAlphaNum c || c == '\'' || c == '_' isQIdChar c = isIdChar c || c == '.' allowQual = scoAllowQualified opts scanIdent isId p s = (name,advc (length name) p,rest) where (name,rest) = span isId s scanDollarIdent :: String -> (String,Int,String) scanDollarIdent [] = ("",0,[]) scanDollarIdent ('$':c:s) | not (isSpace c) = let (str,w,s') = scanDollarIdent s in (c:str,w+2,s') scanDollarIdent cs@(c:s) | isSpace c || isSymbol c || isOpsym c = ("",0,cs) scanDollarIdent (c:s) = let (str,w,s') = scanDollarIdent s in (c:str,w+1,s') scanQualified :: String -> (String,String) scanQualified s = qual "" s where split isX s = span (\c -> isX c && c /= '.') s validQuald c = isId c || isOpsym c isId c = isIdStart c || isUpper c qual q s = case s of (c:s') | isUpper c -- possibly a module qualifier -> case split isIdChar s' of (s'',('.':srest@(c':_))) | validQuald c' -- something validly qualifiable follows -> qual (q ++ [c] ++ s'' ++ ".") srest _ -> dflt (c:_) | isOpsym c || isIdChar c -- not a qualifier, an operator or lowercase identifier -> dflt where dflt = (q,s) scanLitText p ('\\':'b':'e':'g':'i':'n':'{':'c':'o':'d':'e':'}':s) | posIs1stColumn p = doScan (advc 12 p) s scanLitText p (c:s) = scanLitText (adv p c) s scanLitText p [] = [] doScan p [] = [] doScan p (c:s) | isSpace c = let (sp,next) = span isSpace s in doScan (foldl adv p (c:sp)) next doScan p ('-':'-':s) = doScan p (dropWhile (/= '\n') s) doScan p ('{':'-':s) = lexNest doScan (advc 2 p) s doScan p ('"':ss) = let (s,p',rest) = scanString (advc 1 p) ss in if null rest || head rest /= '"' then errToken "Unterminated string literal" p : doScan p' rest else valueToken TkString s p : doScan (advc 1 p') (tail rest) doScan p ('$':ss) | scoDollarIdent opts = tok : doScan (advc (w+1) p) ss' where (ident,w,ss') = scanDollarIdent ss tok = if null ident then errToken "Zero length $identifier" p else valueToken TkVarid ident p doScan p ('\\':'e':'n':'d':'{':'c':'o':'d':'e':'}':s) | scoLitmode opts && posIs1stColumn p = scanLitText (advc 10 p) s -- this is experimental, for now, not foolproof, only to be used for the Prelude doScan p ('\'':'\'':ss) = let (s,w,r) = scanDQuoteIdent ss in if null r then errToken "Unterminated double quote ident" p : doScan (advc (w+1) p) r else valueToken TkConid s p : doScan (advc (w+4) p) r doScan p ('\'':ss) = let (mc,cwidth,rest) = scanChar ss in case mc of Nothing -> errToken "Error in character literal" p : doScan (advc cwidth p) rest Just c -> if null rest || head rest /= '\'' then errToken "Unterminated character literal" p : doScan (advc (cwidth+1) p) rest else valueToken TkChar [c] p : doScan (advc (cwidth+2) p) (tail rest) doScan p cs@(c:c2:s) | isPairSym sym = reserved sym p : doScan(advc 2 p) s where sym = [c,c2] doScan p cs@(c:s) | isSymbol c = reserved [c] p : doScan (advc 1 p) s | isIdStart c || isUpper c = let (qualPrefix,qualTail) = scanQualified cs in if null qualPrefix || not allowQual then let (name', p', s') = scanIdent isIdChar (advc 1 p) s name = c:name' tok = if iskw name then reserved name p else valueToken (varKind name) name p in tok : doScan p' s' else case doScan (advc (length qualPrefix) p) qualTail of (tok@(ValToken tp val _):toks) -> ValToken (tokTpQual tp) (qualPrefix ++ val) p : toks ts -> ts | isOpsym c = let (name, s') = span isOpsym cs tok n p (c:_) | length suf' == 2 && isPairSym suf' = (fst (tok pre p []) ++ [reserved suf' (advc (length pre) p)],1) where (pre,suf) = splitAt (length n - 1) n suf' = suf ++ [c] tok n p s | isop n = ([reserved n p],0) | length suf == 2 && isPairSym suf = (fst (tok pre p []) ++ [reserved suf (advc (length pre) p)],0) | c==':' = ([valueToken TkConOp n p],0) | otherwise = ([valueToken TkOp n p],0) where (pre,suf) = splitAt (length n - 2) n (toks,drops) = tok name p s' in toks ++ doScan (advc drops $ foldl adv p name) (drop drops s') | isDigit c = let (tktype,number,width,s') = getNumber cs in valueToken tktype number p : doScan (advc width p) s' | isDigit c = let (tktype,(number,mantissa,exp),w,cs') = getRational' cs m = maybe "" (\mant -> "." ++ mant) e = maybe "" (\(sign,exp) -> "E" ++ maybe "" id sign ++ exp) in valueToken tktype (number ++ m mantissa ++ e exp) p : doScan (advc w p) cs' | otherwise = errToken ("Unexpected character " ++ show c) p : doScan (adv p c) s varKind :: String -> EnumValToken varKind ('_':s) = varKind s varKind (c :s) | isUpper c = TkConid | otherwise = TkVarid varKind [] = TkVarid lexNest :: (Pos -> String -> [Token]) -> Pos -> String -> [Token] lexNest cont pos inp = lexNest' cont pos inp where lexNest' c p ('-':'}':s) = c (advc 2 p) s lexNest' c p ('{':'-':s) = lexNest' (lexNest' c) (advc 2 p) s lexNest' c p (x:s) = lexNest' c (adv p x) s lexNest' _ _ [] = [ errToken "Unterminated nested comment" pos] scanString :: Pos -> String -> (String,Pos,String) scanString p [] = ("",p,[]) scanString p ('\\':'&':xs) = scanString (advc 2 p) xs scanString p ('\'':xs) = let (str,p',r) = scanString (advc 1 p) xs in ('\'': str,p',r) scanString p ('\\':x:xs) | isSpace x = let (white,rest) = span isSpace xs in case rest of ('\\':rest') -> scanString (advc 1 $ foldl adv (advc 2 p) white) rest' _ -> ("",advc 2 p,xs) scanString p xs = let (ch,cw,cr) = getchar xs (str,p',r) = scanString (advc cw p) cr in maybe ("",p,xs) (\c -> (c:str,p',r)) ch scanChar :: [Char] -> (Maybe Char,Int,[Char]) scanChar ('"' :xs) = (Just '"',1,xs) scanChar xs = getchar xs getchar :: [Char] -> (Maybe Char,Int,[Char]) getchar [] = (Nothing,0,[]) getchar s@('\n':_ ) = (Nothing,0,s ) getchar s@('\t':_ ) = (Nothing,0,s) getchar s@('\'':_ ) = (Nothing,0,s) getchar s@('\"':_ ) = (Nothing,0,s) getchar ('\\':xs) = let (c,l,r) = getEscChar xs in (c,l+1,r) getchar (x:xs) = (Just x,1,xs) scanDQuoteIdent :: String -> (String,Int,String) scanDQuoteIdent [] = ("",0,[]) scanDQuoteIdent ('\'':'\'':xs) = ("",0,xs) scanDQuoteIdent (x:xs) = let (s,w,r) = scanDQuoteIdent xs -- should check similar to getchar in (x:s,w+1,r) getEscChar :: [Char] -> (Maybe Char,Int,[Char]) getEscChar [] = (Nothing,0,[]) getEscChar s@('x':xs) = let (tp,n,len,rest) = getNumber ('0' : s) in (Just $ chr $ fromInteger $ getBaseNumber 16 n, len-1, rest) getEscChar s@('o':xs) = let (tp,n,len,rest) = getNumber ('0' : s) in (Just $ chr $ fromInteger $ getBaseNumber 8 n, len-1, rest) getEscChar s@('^':x:xs) = case x `lookup` cntrCntrs of Just c -> (Just c,2,xs) _ -> (Nothing,0,s) where cntrCntrs = [ ('@','\^@'), ('[','\^['), ('\\','\^\'), (']','\^]'), ('^','\^^'), ('_','\^_') ] ++ zip ['A' .. 'Z'] ['\^A' .. '\^Z'] getEscChar s@(x:xs) | isDigit x = let (tp,n,len,rest) = getNumber s val = case tp of TkInteger8 -> getBaseNumber 8 n TkInteger16 -> getBaseNumber 16 n TkInteger10 -> getBaseNumber 10 n in if val >= 0 && val <= 255 then (Just (chr $ fromInteger val),len, rest) else (Nothing,1,rest) | otherwise = case x `lookup` cntrChars of Just c -> (Just c,1,xs) Nothing -> case filter (flip isPrefixOf s . fst) cntrStrs of [] -> (Nothing,0,s) ((m,mr):_) -> (Just mr,ml,drop ml s) where ml = length m where cntrChars = [('a','\a'),('b','\b'),('f','\f'),('n','\n'),('r','\r'),('t','\t') ,('v','\v'),('\\','\\'),('\"','\"'),('\'','\'')] cntrStrs = [ ("NUL",'\NUL'), ("SOH",'\SOH'), ("STX",'\STX'), ("ETX",'\ETX') , ("EOT",'\EOT'), ("ENQ",'\ENQ'), ("ACK",'\ACK'), ("BEL",'\BEL') , ("BS" ,'\BS' ), ("HT" ,'\HT' ), ("LF" ,'\LF' ), ("VT" ,'\VT' ) , ("FF" ,'\FF' ), ("CR" ,'\CR' ), ("SO" ,'\SO' ), ("SI" ,'\SI' ) , ("DLE",'\DLE'), ("DC1",'\DC1'), ("DC2",'\DC2'), ("DC3",'\DC3') , ("DC4",'\DC4'), ("NAK",'\NAK'), ("SYN",'\SYN'), ("ETB",'\ETB') , ("CAN",'\CAN'), ("EM" ,'\EM' ), ("SUB",'\SUB'), ("ESC",'\ESC') , ("FS" ,'\FS' ), ("GS" ,'\GS' ), ("RS" ,'\RS' ), ("US" ,'\US' ) , ("SP" ,'\SP' ), ("DEL",'\DEL') ] getBaseNumber :: Integer -> [Char] -> Integer getBaseNumber base n = foldl (\r x -> toInteger (value x) + base * r) 0 n getNumber :: [Char] -> (EnumValToken,[Char],Int,[Char]) getNumber cs@(c:s) | c /= '0' = num10 | null s = const0 | hs == 'x' || hs == 'X' = num16 | hs == 'o' || hs == 'O' = num8 | otherwise = num10 where (hs:ts) = s const0 = (TkInteger10, "0",1,s) num10 = let (n,r) = span isDigit cs in (TkInteger10,n,length n,r) num16 = readNum isHexaDigit ts TkInteger16 num8 = readNum isOctalDigit ts TkInteger8 readNum p ts tk = let nrs@(n,rs) = span p ts in if null n then const0 else (tk , n, 2+length n,rs) getRational' :: String -> (EnumValToken,(String,Maybe String,Maybe (Maybe String,String)),Int,String) getRational' s = case s2 of ('.':s3@(c:_)) | isDigit c && tktype == TkInteger10 && tktype2 == TkInteger10 -> case scanExp s4 of Just (sign,number3,width3,s5) -> (TkFraction,(number,Just number2,Just (sign,number3)),width + width2 + width3 + 1,s5) _ -> (TkFraction,(number,Just number2,Nothing),width + width2 + 1,s4) where (tktype2,number2,width2,s4) = getNumber s3 _ -> case scanExp s2 of Just (sign,number3,width3,s5) -> (TkFraction,(number,Nothing,Just (sign,number3)),width + width3,s5) _ -> (tktype,(number,Nothing,Nothing),width,s2) where (tktype,number,width,s2) = getNumber s scanExp s = case s of (c:s5) | c == 'e' || c == 'E' -> case s5 of (csign:s6) | csign == '+' || csign == '-' -> case s6 of (c:_) | isDigit c && tktype3 == TkInteger10 -> Just (Just [csign],number3,width3+2,s7) where (tktype3,number3,width3,s7) = getNumber s6 _ -> Nothing | isDigit csign && tktype3 == TkInteger10 -> Just (Nothing,number3,width3+1,s7) where (tktype3,number3,width3,s7) = getNumber s5 _ -> Nothing _ -> Nothing getRational :: String -> (String,Maybe String,Maybe (Maybe String,String)) getRational s = n where (_,n,_,_) = getRational' s isHexaDigit :: Char -> Bool isHexaDigit d = isDigit d || (d >= 'A' && d <= 'F') || (d >= 'a' && d <= 'f') isOctalDigit :: Char -> Bool isOctalDigit d = d >= '0' && d <= '7' value :: Char -> Int value c | isDigit c = ord c - ord '0' | isUpper c = ord c - ord 'A' + 10 | isLower c = ord c - ord 'a' + 10