{-# LANGUAGE CPP, DeriveDataTypeable #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.Token -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Lexical tokens for the Python lexer. Contains the superset of tokens from -- version 2 and version 3 of Python (they are mostly the same). ----------------------------------------------------------------------------- module Language.Python.Common.Token ( -- * The tokens Token (..), -- * String conversion debugTokenString, tokenString, -- * Classification hasLiteral, TokenClass (..), classifyToken ) where import Language.Python.Common.Pretty import Language.Python.Common.SrcLocation (SrcSpan (..), SrcLocation (..), Span(getSpan)) import Data.Data -- | Lexical tokens. data Token -- Whitespace = IndentToken { token_span :: !SrcSpan } -- ^ Indentation: increase. | DedentToken { token_span :: !SrcSpan } -- ^ Indentation: decrease. | NewlineToken { token_span :: !SrcSpan } -- ^ Newline. | LineJoinToken { token_span :: !SrcSpan } -- ^ Line join (backslash at end of line). -- Comment | CommentToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Single line comment. -- Identifiers | IdentifierToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Identifier. -- Literals | StringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: string. | ByteStringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: byte string. | UnicodeStringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: unicode string, version 2 only. | IntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer } -- ^ Literal: integer. | LongIntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer } -- ^ Literal: long integer. /Version 2 only/. | FloatToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double } -- ^ Literal: floating point. | ImaginaryToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double } -- ^ Literal: imaginary number. -- Keywords | DefToken { token_span :: !SrcSpan } -- ^ Keyword: \'def\'. | WhileToken { token_span :: !SrcSpan } -- ^ Keyword: \'while\'. | IfToken { token_span :: !SrcSpan } -- ^ Keyword: \'if\'. | TrueToken { token_span :: !SrcSpan } -- ^ Keyword: \'True\'. | FalseToken { token_span :: !SrcSpan } -- ^ Keyword: \'False\'. | ReturnToken { token_span :: !SrcSpan } -- ^ Keyword: \'Return\'. | TryToken { token_span :: !SrcSpan } -- ^ Keyword: \'try\'. | ExceptToken { token_span :: !SrcSpan } -- ^ Keyword: \'except\'. | RaiseToken { token_span :: !SrcSpan } -- ^ Keyword: \'raise\'. | InToken { token_span :: !SrcSpan } -- ^ Keyword: \'in\'. | IsToken { token_span :: !SrcSpan } -- ^ Keyword: \'is\'. | LambdaToken { token_span :: !SrcSpan } -- ^ Keyword: \'lambda\'. | ClassToken { token_span :: !SrcSpan } -- ^ Keyword: \'class\'. | FinallyToken { token_span :: !SrcSpan } -- ^ Keyword: \'finally\'. | NoneToken { token_span :: !SrcSpan } -- ^ Keyword: \'None\'. | ForToken { token_span :: !SrcSpan } -- ^ Keyword: \'for\'. | FromToken { token_span :: !SrcSpan } -- ^ Keyword: \'from\'. | GlobalToken { token_span :: !SrcSpan } -- ^ Keyword: \'global\'. | WithToken { token_span :: !SrcSpan } -- ^ Keyword: \'with\'. | AsToken { token_span :: !SrcSpan } -- ^ Keyword: \'as\'. | ElifToken { token_span :: !SrcSpan } -- ^ Keyword: \'elif\'. | YieldToken { token_span :: !SrcSpan } -- ^ Keyword: \'yield\'. | AssertToken { token_span :: !SrcSpan } -- ^ Keyword: \'assert\'. | ImportToken { token_span :: !SrcSpan } -- ^ Keyword: \'import\'. | PassToken { token_span :: !SrcSpan } -- ^ Keyword: \'pass\'. | BreakToken { token_span :: !SrcSpan } -- ^ Keyword: \'break\'. | ContinueToken { token_span :: !SrcSpan } -- ^ Keyword: \'continue\'. | DeleteToken { token_span :: !SrcSpan } -- ^ Keyword: \'del\'. | ElseToken { token_span :: !SrcSpan } -- ^ Keyword: \'else\'. | NotToken { token_span :: !SrcSpan } -- ^ Keyword: \'not\'. | AndToken { token_span :: !SrcSpan } -- ^ Keyword: boolean conjunction \'and\'. | OrToken { token_span :: !SrcSpan } -- ^ Keyword: boolean disjunction \'or\'. -- Version 3.x only: | NonLocalToken { token_span :: !SrcSpan } -- ^ Keyword: \'nonlocal\' (Python 3.x only) -- Version 2.x only: | PrintToken { token_span :: !SrcSpan } -- ^ Keyword: \'print\'. (Python 2.x only) | ExecToken { token_span :: !SrcSpan } -- ^ Keyword: \'exec\'. (Python 2.x only) -- Delimiters | AtToken { token_span :: !SrcSpan } -- ^ Delimiter: at sign \'\@\'. | LeftRoundBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: left round bracket \'(\'. | RightRoundBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: right round bracket \')\'. | LeftSquareBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: left square bracket \'[\'. | RightSquareBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: right square bracket \']\'. | LeftBraceToken { token_span :: !SrcSpan } -- ^ Delimiter: left curly bracket \'{\'. | RightBraceToken { token_span :: !SrcSpan } -- ^ Delimiter: right curly bracket \'}\'. | DotToken { token_span :: !SrcSpan } -- ^ Delimiter: dot (full stop) \'.\'. | CommaToken { token_span :: !SrcSpan } -- ^ Delimiter: comma \',\'. | SemiColonToken { token_span :: !SrcSpan } -- ^ Delimiter: semicolon \';\'. | ColonToken { token_span :: !SrcSpan } -- ^ Delimiter: colon \':\'. | EllipsisToken { token_span :: !SrcSpan } -- ^ Delimiter: ellipses (three dots) \'...\'. | RightArrowToken { token_span :: !SrcSpan } -- ^ Delimiter: right facing arrow \'->\'. | AssignToken { token_span :: !SrcSpan } -- ^ Delimiter: assignment \'=\'. | PlusAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: plus assignment \'+=\'. | MinusAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: minus assignment \'-=\'. | MultAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: multiply assignment \'*=\' | DivAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: divide assignment \'/=\'. | ModAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: modulus assignment \'%=\'. | PowAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: power assignment \'**=\'. | BinAndAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-and assignment \'&=\'. | BinOrAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-or assignment \'|=\'. | BinXorAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-xor assignment \'^=\'. | LeftShiftAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-left-shift assignment \'<<=\'. | RightShiftAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-right-shift assignment \'>>=\'. | FloorDivAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: floor-divide assignment \'//=\'. | BackQuoteToken { token_span :: !SrcSpan } -- ^ Delimiter: back quote character \'`\'. -- Operators | PlusToken { token_span :: !SrcSpan } -- ^ Operator: plus \'+\'. | MinusToken { token_span :: !SrcSpan } -- ^ Operator: minus: \'-\'. | MultToken { token_span :: !SrcSpan } -- ^ Operator: multiply \'*\'. | DivToken { token_span :: !SrcSpan } -- ^ Operator: divide \'/\'. | GreaterThanToken { token_span :: !SrcSpan } -- ^ Operator: greater-than \'>\'. | LessThanToken { token_span :: !SrcSpan } -- ^ Operator: less-than \'<\'. | EqualityToken { token_span :: !SrcSpan } -- ^ Operator: equals \'==\'. | GreaterThanEqualsToken { token_span :: !SrcSpan } -- ^ Operator: greater-than-or-equals \'>=\'. | LessThanEqualsToken { token_span :: !SrcSpan } -- ^ Operator: less-than-or-equals \'<=\'. | ExponentToken { token_span :: !SrcSpan } -- ^ Operator: exponential \'**\'. | BinaryOrToken { token_span :: !SrcSpan } -- ^ Operator: binary-or \'|\'. | XorToken { token_span :: !SrcSpan } -- ^ Operator: binary-xor \'^\'. | BinaryAndToken { token_span :: !SrcSpan } -- ^ Operator: binary-and \'&\'. | ShiftLeftToken { token_span :: !SrcSpan } -- ^ Operator: binary-shift-left \'<<\'. | ShiftRightToken { token_span :: !SrcSpan } -- ^ Operator: binary-shift-right \'>>\'. | ModuloToken { token_span :: !SrcSpan } -- ^ Operator: modulus \'%\'. | FloorDivToken { token_span :: !SrcSpan } -- ^ Operator: floor-divide \'//\'. | TildeToken { token_span :: !SrcSpan } -- ^ Operator: tilde \'~\'. | NotEqualsToken { token_span :: !SrcSpan } -- ^ Operator: not-equals \'!=\'. | NotEqualsV2Token { token_span :: !SrcSpan } -- ^ Operator: not-equals \'<>\'. Version 2 only. -- Special cases | EOFToken { token_span :: !SrcSpan } -- ^ End of file deriving (Eq,Ord,Show,Typeable,Data) instance Span Token where getSpan = token_span -- | Produce a string from a token containing detailed information. Mainly intended for debugging. debugTokenString :: Token -> String debugTokenString token = render (text (show $ toConstr token) <+> pretty (token_span token) <+> if hasLiteral token then text (token_literal token) else empty) -- | Test if a token contains its literal source text. hasLiteral :: Token -> Bool hasLiteral token = case token of CommentToken {} -> True IdentifierToken {} -> True StringToken {} -> True ByteStringToken {} -> True UnicodeStringToken {} -> True IntegerToken {} -> True LongIntegerToken {} -> True FloatToken {} -> True ImaginaryToken {} -> True other -> False -- | Classification of tokens data TokenClass = Comment | Number | Identifier | Punctuation | Bracket | Layout | Keyword | String | Operator | Assignment deriving (Show, Eq, Ord) classifyToken :: Token -> TokenClass classifyToken token = case token of IndentToken {} -> Layout DedentToken {} -> Layout NewlineToken {} -> Layout CommentToken {} -> Comment IdentifierToken {} -> Identifier StringToken {} -> String ByteStringToken {} -> String UnicodeStringToken {} -> String IntegerToken {} -> Number LongIntegerToken {} -> Number FloatToken {} -> Number ImaginaryToken {} -> Number DefToken {} -> Keyword WhileToken {} -> Keyword IfToken {} -> Keyword TrueToken {} -> Keyword FalseToken {} -> Keyword ReturnToken {} -> Keyword TryToken {} -> Keyword ExceptToken {} -> Keyword RaiseToken {} -> Keyword InToken {} -> Keyword IsToken {} -> Keyword LambdaToken {} -> Keyword ClassToken {} -> Keyword FinallyToken {} -> Keyword NoneToken {} -> Keyword ForToken {} -> Keyword FromToken {} -> Keyword GlobalToken {} -> Keyword WithToken {} -> Keyword AsToken {} -> Keyword ElifToken {} -> Keyword YieldToken {} -> Keyword AssertToken {} -> Keyword ImportToken {} -> Keyword PassToken {} -> Keyword BreakToken {} -> Keyword ContinueToken {} -> Keyword DeleteToken {} -> Keyword ElseToken {} -> Keyword NotToken {} -> Keyword AndToken {} -> Keyword OrToken {} -> Keyword NonLocalToken {} -> Keyword PrintToken {} -> Keyword ExecToken {} -> Keyword AtToken {} -> Keyword LeftRoundBracketToken {} -> Bracket RightRoundBracketToken {} -> Bracket LeftSquareBracketToken {} -> Bracket RightSquareBracketToken {} -> Bracket LeftBraceToken {} -> Bracket RightBraceToken {} -> Bracket DotToken {} -> Operator CommaToken {} -> Punctuation SemiColonToken {} -> Punctuation ColonToken {} -> Punctuation EllipsisToken {} -> Keyword -- What kind of thing is an ellipsis? RightArrowToken {} -> Punctuation AssignToken {} -> Assignment PlusAssignToken {} -> Assignment MinusAssignToken {} -> Assignment MultAssignToken {} -> Assignment DivAssignToken {} -> Assignment ModAssignToken {} -> Assignment PowAssignToken {} -> Assignment BinAndAssignToken {} -> Assignment BinOrAssignToken {} -> Assignment BinXorAssignToken {} -> Assignment LeftShiftAssignToken {} -> Assignment RightShiftAssignToken {} -> Assignment FloorDivAssignToken {} -> Assignment BackQuoteToken {} -> Punctuation PlusToken {} -> Operator MinusToken {} -> Operator MultToken {} -> Operator DivToken {} -> Operator GreaterThanToken {} -> Operator LessThanToken {} -> Operator EqualityToken {} -> Operator GreaterThanEqualsToken {} -> Operator LessThanEqualsToken {} -> Operator ExponentToken {} -> Operator BinaryOrToken {} -> Operator XorToken {} -> Operator BinaryAndToken {} -> Operator ShiftLeftToken {} -> Operator ShiftRightToken {} -> Operator ModuloToken {} -> Operator FloorDivToken {} -> Operator TildeToken {} -> Operator NotEqualsToken {} -> Operator NotEqualsV2Token {} -> Operator LineJoinToken {} -> Layout EOFToken {} -> Layout -- maybe a spurious classification. -- | Produce a string from a token which is suitable for printing as Python concrete syntax. -- /Invisible/ tokens yield an empty string. tokenString :: Token -> String tokenString token = case token of IndentToken {} -> "" DedentToken {} -> "" NewlineToken {} -> "" CommentToken {} -> token_literal token IdentifierToken {} -> token_literal token StringToken {} -> token_literal token ByteStringToken {} -> token_literal token UnicodeStringToken {} -> token_literal token IntegerToken {} -> token_literal token LongIntegerToken {} -> token_literal token FloatToken {} -> token_literal token ImaginaryToken {} -> token_literal token DefToken {} -> "def" WhileToken {} -> "while" IfToken {} -> "if" TrueToken {} -> "True" FalseToken {} -> "False" ReturnToken {} -> "return" TryToken {} -> "try" ExceptToken {} -> "except" RaiseToken {} -> "raise" InToken {} -> "in" IsToken {} -> "is" LambdaToken {} -> "lambda" ClassToken {} -> "class" FinallyToken {} -> "finally" NoneToken {} -> "None" ForToken {} -> "for" FromToken {} -> "from" GlobalToken {} -> "global" WithToken {} -> "with" AsToken {} -> "as" ElifToken {} -> "elif" YieldToken {} -> "yield" AssertToken {} -> "assert" ImportToken {} -> "import" PassToken {} -> "pass" BreakToken {} -> "break" ContinueToken {} -> "continue" DeleteToken {} -> "delete" ElseToken {} -> "else" NotToken {} -> "not" AndToken {} -> "and" OrToken {} -> "or" NonLocalToken {} -> "nonlocal" PrintToken {} -> "print" ExecToken {} -> "exec" AtToken {} -> "at" LeftRoundBracketToken {} -> "(" RightRoundBracketToken {} -> ")" LeftSquareBracketToken {} -> "[" RightSquareBracketToken {} -> "]" LeftBraceToken {} -> "{" RightBraceToken {} -> "}" DotToken {} -> "." CommaToken {} -> "," SemiColonToken {} -> ";" ColonToken {} -> ":" EllipsisToken {} -> "..." RightArrowToken {} -> "->" AssignToken {} -> "=" PlusAssignToken {} -> "+=" MinusAssignToken {} -> "-=" MultAssignToken {} -> "*=" DivAssignToken {} -> "/=" ModAssignToken {} -> "%=" PowAssignToken {} -> "**=" BinAndAssignToken {} -> "&=" BinOrAssignToken {} -> "|=" BinXorAssignToken {} -> "^=" LeftShiftAssignToken {} -> "<<=" RightShiftAssignToken {} -> ">>=" FloorDivAssignToken {} -> "//=" BackQuoteToken {} -> "`" PlusToken {} -> "+" MinusToken {} -> "-" MultToken {} -> "*" DivToken {} -> "/" GreaterThanToken {} -> ">" LessThanToken {} -> "<" EqualityToken {} -> "==" GreaterThanEqualsToken {} -> ">=" LessThanEqualsToken {} -> "<=" ExponentToken {} -> "**" BinaryOrToken {} -> "|" XorToken {} -> "^" BinaryAndToken {} -> "&" ShiftLeftToken {} -> "<<" ShiftRightToken {} -> ">>" ModuloToken {} -> "%" FloorDivToken {} -> "//" TildeToken {} -> "~" NotEqualsToken {} -> "!=" NotEqualsV2Token {} -> "<>" LineJoinToken {} -> "\\" EOFToken {} -> ""