-- Compiler Toolkit: identifiers -- -- Author : Manuel M. T. Chakravarty -- Created: 14 February 95 -- -- Version $Revision: 1.2 $ from $Date: 2004/11/13 17:26:50 $ -- -- Copyright (c) [1995..1999] Manuel M. T. Chakravarty -- -- This file is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- This file is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- --- DESCRIPTION --------------------------------------------------------------- -- -- This module provides an abstract notion of identifiers. -- --- DOCU ---------------------------------------------------------------------- -- -- language: Haskell 98 -- -- * We speed up the equality test between identifiers by assigning an -- identification number to each of them, and providing a special equality -- that compares the lexemes only if the identification numbers are equal. -- -- * The ordering relation on identifiers is also oriented at the -- identification number and, hence, does *not* follow the alphanumerical -- ordering of the lexemes of the identifiers. Instead, it provides a fast -- ordering when identifiers are used as keys in a `Map'. -- -- * The ambiguousness resolving number of an identifier is `-1' when no such -- number is present (so, such identifiers are distinguished from -- identifiers that share the front part of the lexeme while having an -- ambiguousness resolving number). -- -- The ambiguousness resolving number of primitive identifiers (`pid' in the -- grammar contained in the KCode definition) is `-2' (this gives primitive -- identifiers a distinct name space). -- -- * Attributes may be associated to identifiers, except with `OnlyPos' -- identifiers, which have a position as their only attribute (they do not -- carry an attribute identifier, which can be used to index attribute -- tables). -- -- * Internal identifiers that are forming a completely unique name space are -- supported. But note, they do not have a proper lexeme, i.e., they are not -- suited for code generation. -- --- TODO ---------------------------------------------------------------------- -- -- * Hashing is not 8bit clean. -- module Idents (Ident, noARNum, isLegalIdent, lexemeToIdent, internalIdent, onlyPosIdent, cloneIdent, identToLexeme, isIdentSimple, isIdentPrim, stripIdentARNum, getIdentARNum, newIdentARNum, getIdentAttrs, dumpIdent) where import Data.Char import Position (Position, Pos(posOf), nopos) import UNames (Name) import Errors (interr) import Attributes (Attrs, newAttrsOnlyPos, newAttrs, Attributed(attrsOf), posOfAttrsOf) import Binary (Binary(..), putSharedString, getSharedString) -- simple identifier representation (EXPORTED) -- -- identifiers without an ambiguousness resolving number get `noARNum' as -- number -- data Ident = Ident String -- lexeme !Int -- ambiguousness resolving number !Int -- id. number to speed up equality check !Attrs -- attributes of this ident. incl. position -- the definition of the equality allows identifiers to be equal that are -- defined at different source text positions, and aims at speeding up the -- equality test, by comparing the lexemes only if the two numbers are equal -- instance Eq Ident where (Ident s k id _) == (Ident s' k' id' _) = (k == k') && (id == id') && (s == s') -- this does *not* follow the alphanumerical ordering of the lexemes -- instance Ord Ident where (Ident s k id _) < (Ident s' k' id' _) = (k < k') || ((k == k') && (id < id')) || ((k == k') && (id == id') && (s < s')) id1 <= id2 = (id1 < id2) || (id1 == id2) -- for displaying identifiers -- instance Show Ident where showsPrec _ ide = showString ("`" ++ identToLexeme ide ++ "'") -- identifiers are attributed -- instance Attributed Ident where attrsOf (Ident _ _ _ at) = at -- identifiers have a canonical position -- instance Pos Ident where posOf = posOfAttrsOf -- to speed up the equality test we compute some hash-like value for each -- identifiers lexeme and store it in the identifiers representation -- hash function from the dragon book pp437; assumes 7 bit characters and needs -- the (nearly) full range of values guaranteed for `Int' by the Haskell -- language definition; can handle 8 bit characters provided we have 29 bit -- for the `Int's without sign -- quad :: String -> Int quad (c1:c2:c3:c4:s) = ((ord c4 * bits21 + ord c3 * bits14 + ord c2 * bits7 + ord c1) `mod` bits28) + (quad s `mod` bits28) quad (c1:c2:c3:[] ) = ord c3 * bits14 + ord c2 * bits7 + ord c1 quad (c1:c2:[] ) = ord c2 * bits7 + ord c1 quad (c1:[] ) = ord c1 quad ([] ) = 0 bits7 = 2^7 bits14 = 2^14 bits21 = 2^21 bits28 = 2^28 -- used as a substitute for the ambiguousness resolving number if it is not -- present (EXPORTED) -- noARNum :: Int noARNum = -1 -- used as the ambiguousness resolving number for primitive identifiers -- primARNum :: Int primARNum = -2 -- used as the ambiguousness resolving number for internal identifiers -- internARNum :: Int internARNum = -3 -- checks whether the given lexeme is a legal identifier (EXPORTED) -- isLegalIdent :: String -> Bool isLegalIdent [] = False isLegalIdent (c:cs) = if c == '`' then isQualIdent cs else (isAlpha c || c == '_') && isIdent (c:cs) where isIdent = checkTail . (dropWhile isAlphaNumOrUS) checkTail [] = True checkTail ("##") = True checkTail ('#':cs') = all isDigit cs' checkTail _ = False isAlphaNumOrUS c = isAlphaNum c || (c == '_') isAlphaNum c = isAlpha c || isNum c isAlpha c = c `elem` ['a'..'z'] ++ ['A'..'Z'] isNum c = c `elem` ['0'..'9'] isQualIdent cs = let cs' = skip cs in (not . null) cs' && (checkTail . tail) cs' skip [] = [] skip ('\'':cs) = '\'':cs skip ('\\':cs) = case cs of ('\'':cs') -> skip cs' ('\\':cs') -> skip cs' _ -> skip cs skip (c :cs) = skip cs -- given the lexeme of an identifier, yield the abstract identifier (EXPORTED) -- -- * the only attribute of the resulting identifier is its source text -- position; as provided in the first argument of this function -- -- * only minimal error checking, e.g., the characters of the identifier are -- not checked for being alphanumerical only; the correct lexis of the -- identifier should be ensured by the caller, e.g., the scanner or -- `isLegalIdent' -- -- * for reasons of simplicity the complete lexeme is hashed (with `quad') -- lexemeToIdent :: Position -> String -> Name -> Ident lexemeToIdent pos l name = Ident s k (quad s) (newAttrs pos name) where (s, k) = parseIdent pos l -- generate an internal identifier (has no position and cannot be asccociated -- with attributes) (EXPORTED) -- internalIdent :: String -> Ident internalIdent s = Ident s internARNum (quad s) (newAttrsOnlyPos nopos) -- generate a `only pos' identifier (may not be used to index attribute -- tables, but has a position value) (EXPORTED) -- onlyPosIdent :: Position -> String -> Ident onlyPosIdent pos l = Ident s k (quad s) (newAttrsOnlyPos pos) where (s, k) = parseIdent pos l -- Extract the name and ambiguousness resolving number from a lexeme. -- parseIdent :: Position -> String -> (String, Int) parseIdent pos l = if (null l) then interr $ "Idents: lexemeToIdent: Empty lexeme! " ++ show pos else if (head l == '\'') then parseQuoted (tail l) else parseNorm l where -- parse lexeme without quotes -- parseNorm [] = ([], noARNum) parseNorm ("##") = ([], primARNum) parseNorm ('#':cs) = ([], ((read . check) cs)::Int) parseNorm (c :cs) = let (cs', k) = parseNorm cs in (c:cs', k) check [] = interr "Idents: lexemeToIdent: Missing\ \ number!" check ('-':cs) = interr "Idents: lexemeToIdent: Illegal\ \ negative number!" check s = s -- parse lexeme with quotes -- parseQuoted [] = interr endInQuotes parseQuoted ('\\':cs) = parseSpecial cs parseQuoted ('\'':cs) = let (rmd, k) = parseNorm cs in if (null rmd) then ([], k) else interr afterQuotes parseQuoted (c :cs) = let (cs', k) = parseQuoted cs in (c:cs', k) endInQuotes = "Idents: lexemeToIdent: Unexpected end of\ \ lexeme (in quotes)!" afterQuotes = "Idents: lexemeToIdent: Superfluous\ \ characters after quotes!" endInSpecial = "Idents: lexemeToIdent: Unexpected end of\ \ lexeme (in escape sequence)!" illegalSpecial = "Idents: lexemeToIdent: Illegal escape\ \ sequence!" -- parse single escaped character, then continue with -- `parseQuoted' -- parseSpecial [] = interr endInSpecial parseSpecial (c1:c2:c3:cs) | isDigit c1 && isDigit c2 && isDigit c3 = let (cs', k) = parseQuoted cs ord0 = ord '0' d1 = ord c1 - ord0 d2 = ord c2 - ord0 d3 = ord c3 - ord0 in (chr (100*d1 + 10*d2 + d3) :cs', k) parseSpecial (c:cs) | c == '\\' = ('\\':cs', k) | c == '\"' = ('\"':cs', k) | c == '\'' = ('\'':cs', k) | c == 'n' = ('\n':cs', k) | c == 't' = ('\t':cs', k) | c == 'r' = ('\r':cs', k) where (cs', k) = parseQuoted cs parseSpecial _ = interr illegalSpecial -- create an identifier identical to the given one, but with its own set of -- attributes (EXPORTED) -- cloneIdent :: Ident -> Name -> Ident cloneIdent (Ident s k idnum at) name = Ident s k idnum (newAttrs (posOf at) name) -- given an abstract identifier, yield its lexeme (EXPORTED) -- identToLexeme :: Ident -> String identToLexeme (Ident s k _ _) = s ++ suffix where suffix = if (k == noARNum) then "" else if (k == primARNum) then "##" else if (k == internARNum) then "" else "#" ++ show k -- test if the given identifier is simple, i.e., has no ambiguousness -- resolving number and is not a primitive identifier (EXPORTED) -- isIdentSimple :: Ident -> Bool isIdentSimple (Ident _ k _ _) = k == noARNum -- test if the given identifier is a primitive identifier (EXPORTED) -- isIdentPrim :: Ident -> Bool isIdentPrim (Ident _ k _ _) = k == primARNum -- remove ambiguousness resolving of an identifier (EXPORTED) -- -- NOTE: The new identifier will not be equal (==) to the old one! -- stripIdentARNum :: Ident -> Ident stripIdentARNum (Ident s k id at) | k == primARNum || k == internARNum = interr "Idents: stripIdentARNum: \ \Not allowed!" | otherwise = Ident s noARNum id at -- get the ambiguousness resolving of an identifier (EXPORTED) -- getIdentARNum :: Ident -> Int getIdentARNum (Ident s k id at) | k == primARNum || k == internARNum = interr "Idents: getIdentARNum: \ \Not allowed!" | otherwise = k -- enter a new ambiguousness resolving into the identifier (EXPORTED) -- -- NOTE: The new identifier will not be equal (==) to the old one! -- newIdentARNum :: Ident -> Int -> Ident newIdentARNum (Ident s k id at) k' | k' < 0 = interr "Idents: newIdentARNum: \ \Negative number!" | k == primARNum || k == internARNum = interr "Idents: newIdentARNum: \ \Not allowed!" | otherwise = Ident s k' id at -- get the attribute identifier associated with the given identifier (EXPORTED) -- getIdentAttrs :: Ident -> Attrs getIdentAttrs (Ident _ _ _ as) = as -- dump the lexeme and its positions into a string for debugging purposes -- (EXPORTED) -- dumpIdent :: Ident -> String dumpIdent ide = identToLexeme ide ++ " at " ++ show (posOf ide) {-! for Ident derive : GhcBinary !-} {-* Generated by DrIFT : Look, but Don't Touch. *-} instance Binary Ident where put_ bh (Ident aa ab ac ad) = do putSharedString bh aa -- put_ bh aa put_ bh ab put_ bh ac put_ bh ad get bh = do aa <- getSharedString bh -- aa <- get bh ab <- get bh ac <- get bh ad <- get bh return (Ident aa ab ac ad)