{-# Language MultiParamTypeClasses #-} module Language.Haskell.TokenUtils.Types ( Entry(..) , TokenCache(..) , TreeId(..) , mainTid , ForestSpan , ForestPos , ForestLine(..) , RowOffset , ColOffset , Row , Col , SimpPos , SimpSpan , Layout(..) , EndOffset(..) , Outputable(..) , TokenLayout , LayoutTree , ghcLineToForestLine , forestLineToGhcLine , forestLenChangedMask , IsToken(..) , notWhiteSpace , isWhiteSpaceOrIgnored , isIgnored , isIgnoredNonComment , isWhereOrLet , showFriendlyToks , HasLoc(..) , Allocatable(..) ) where -- import Control.Exception import Data.Bits import Data.Tree import qualified Text.PrettyPrint as P import qualified Data.Map as Map -- --------------------------------------------------------------------- -- | An entry in the data structure for a particular srcspan. data Entry a = -- |Entry has -- * the source span contained in this Node -- * how the sub-elements nest -- * the tokens for the SrcSpan if subtree is empty Entry !ForestSpan !Layout ![a] -- |Deleted has -- * the source span has been deleted -- * prior gap in lines -- * the gap between this span end and the -- start of the next in the fringe of the -- tree. | Deleted !ForestSpan !RowOffset !SimpPos deriving (Show) type RowOffset = Int type ColOffset = Int type Row = Int type Col = Int type SimpPos = (Int,Int) -- Line, column type SimpSpan = (SimpPos,SimpPos) data Layout = Above EndOffset (Row,Col) (Row,Col) EndOffset -- ^ Initial offset from token before the -- stacked list of items, the (r,c) of the first -- non-comment token, the (r,c) of the end of the last non-comment -- token in the stacked list to be able to calculate the -- (RowOffset,ColOffset) between the last token and the -- start of the next item. | NoChange deriving (Show,Eq) data EndOffset = None | SameLine ColOffset | FromAlignCol (RowOffset, ColOffset) deriving (Show,Eq) -- --------------------------------------------------------------------- data ForestLine = ForestLine { flSpanLengthChanged :: !Bool -- ^The length of the -- span may have -- changed due to -- updated tokens. , flTreeSelector :: !Int , flInsertVersion :: !Int , flLine :: !Int } -- deriving (Eq) instance Eq ForestLine where -- TODO: make this undefined, and patch all broken code to use the -- specific fun here directly instead. (ForestLine _ s1 v1 l1) == (ForestLine _ s2 v2 l2) = s1 == s2 && v1 == v2 && l1 == l2 instance Show ForestLine where show s = "(ForestLine " ++ (show $ flSpanLengthChanged s) ++ " " ++ (show $ flTreeSelector s) ++ " " ++ (show $ flInsertVersion s) ++ " " ++ (show $ flLine s) ++ ")" -- instance Outputable ForestLine where -- ppr fl = text (show fl) instance Ord ForestLine where -- Use line as the primary comparison, but break any ties with the -- version -- Tree is ignored, as it is only a marker on the topmost element -- Ignore sizeChanged flag, it will only be relevant in the -- invariant check compare (ForestLine _sc1 _ v1 l1) (ForestLine _sc2 _ v2 l2) = if (l1 == l2) then compare v1 v2 else compare l1 l2 -- --------------------------------------------------------------------- type ForestPos = (ForestLine,Int) -- |Match a SrcSpan, using a ForestLine as the marker type ForestSpan = (ForestPos,ForestPos) -- --------------------------------------------------------------------- data TreeId = TId !Int deriving (Eq,Ord,Show) -- |Identifies the tree carrying the main tokens, not any work in -- progress or deleted ones mainTid :: TreeId mainTid = TId 0 data TokenCache a = TK { tkCache :: !(Map.Map TreeId (Tree (Entry a))) , tkLastTreeId :: !TreeId } -- --------------------------------------------------------------------- class Allocatable b a where -- |Construct a `LayoutTree` from a Haskell AST and a stream of tokens. allocTokens :: (IsToken a) => b -> [a] -> LayoutTree a -- |The IsToken class captures the different token type in use. For -- GHC it represents the type returned by `GHC.getRichTokenStream`, -- namely [(GHC.Located GHC.Token, String)] -- For haskell-src-exts this is the reult of `lexTokenStream`, namely `[HSE.Loc HSE.Token]` class (Show a,HasLoc a) => IsToken a where -- |tokenLen returns the length of the string representation of the -- token, not just the difference in the location, as the string may -- have changed without the position being updated, e.g. in a -- renaming tokenLen :: a -> Int isComment :: a -> Bool -- |Zero-length tokens, as appear in GHC as markers isEmpty :: a -> Bool mkZeroToken :: a isDo :: a -> Bool isElse :: a -> Bool isIn :: a -> Bool isLet :: a -> Bool isOf :: a -> Bool isThen :: a -> Bool isWhere :: a -> Bool tokenToString :: a -> String -- TODO: may be able to get rid of next due to former showTokenStream :: [a] -> String -- |Create a stream of tokens from source, with first token start at -- given location lexStringToTokens :: SimpSpan -> String -> [a] -- |Mark a token so that it can be use to trigger layout checking -- later when the toks are retrieved markToken :: a -> a isMarked :: a -> Bool -- derived functions isWhiteSpace :: (IsToken a) => a -> Bool isWhiteSpace tok = isComment tok || isEmpty tok notWhiteSpace :: (IsToken a) => a -> Bool notWhiteSpace tok = not (isWhiteSpace tok) isWhiteSpaceOrIgnored :: (IsToken a) => a -> Bool isWhiteSpaceOrIgnored tok = isWhiteSpace tok || isIgnored tok -- Tokens that are ignored when allocating tokens to a SrcSpan isIgnored :: (IsToken a) => a -> Bool isIgnored tok = isThen tok || isElse tok || isIn tok || isDo tok -- | Tokens that are ignored when determining the first non-comment -- token in a span isIgnoredNonComment :: (IsToken a) => a -> Bool isIgnoredNonComment tok = isThen tok || isElse tok || isWhiteSpace tok isWhereOrLet :: (IsToken a) => a -> Bool isWhereOrLet t = isWhere t || isLet t showFriendlyToks :: IsToken a => [a] -> String showFriendlyToks toks = reverse $ dropWhile (=='\n') $ reverse $ dropWhile (=='\n') $ showTokenStream toks class HasLoc a where getLoc :: a -> SimpPos getLocEnd :: a -> SimpPos getSpan :: a -> SimpSpan getSpan a = (getLoc a,getLocEnd a) putSpan :: a -> SimpSpan -> a {- data Located e = L Span e deriving Show data Span = Span (Row,Col) (Row,Col) deriving (Show,Eq,Ord) nullSpan :: Span nullSpan = Span (0,0) (0,0) -} data TokenLayout a = TL (Tree (Entry a)) type LayoutTree a = Tree (Entry a) -- --------------------------------------------------------------------- -- A data type for the line entries in a SrcSpan. This has the -- following properties -- -- 1. It can be converted to and from the underlying Int in the -- original SrcSpan -- 2. It allows the insertion of an arbitrary line as the start of a -- new SrcSpan -- 3. It has an ordering relation, which honours the inserts which -- were made. -- 4. It can keep track of tokens that have been removed from the main -- AST, which can be edited outside of it and then inserted again -- -- This is achieved by adding two fields to the SrcSpan, one to -- indicate which AST fragment it is in, and the other to indicate its -- insert relationship, encoded as 0 for the original, 1 for the -- first, 2 for the second and so on. -- -- This field is converted to and from the original line by being -- multiplied by a very large number and added to the original. -- -- The guaranteed max value in Haskell for an Int is 2^29 - 1. -- This evaluates to 536,870,911,or 536.8 million. -- -- However, as pointed out on #haskell, the GHC compiler (which this -- implemtation explicitly targets) provides the full 32 bits (at -- least, can be 64), so we have -- maxBound :: Int = 2,147,483,647 -- -- Schema:max pos value is 0x7fffffff (31 bits) -- 1 bit for LenChanged -- 5 bits for tree : 32 values -- 5 bits for version : 32 values -- 20 bits for line number: 1048576 values forestLineMask,forestVersionMask,forestTreeMask,forestLenChangedMask :: Int forestLineMask = 0xfffff -- bottom 20 bits forestVersionMask = 0x1f00000 -- next 5 bits forestTreeMask = 0x3e000000 -- next 5 bits forestLenChangedMask = 0x40000000 -- top (non-sign) bit forestVersionShift :: Int forestVersionShift = 20 forestTreeShift :: Int forestTreeShift = 25 -- | Extract an encoded ForestLine from a GHC line ghcLineToForestLine :: Int -> ForestLine ghcLineToForestLine l = ForestLine ch tr v l' where l' = l .&. forestLineMask v = shiftR (l .&. forestVersionMask) forestVersionShift tr = shiftR (l .&. forestTreeMask) forestTreeShift ch = (l .&. forestLenChangedMask) /= 0 -- TODO: check that the components are in range forestLineToGhcLine :: ForestLine -> Int forestLineToGhcLine fl = (if (flSpanLengthChanged fl) then forestLenChangedMask else 0) + (shiftL (flTreeSelector fl) forestTreeShift) + (shiftL (flInsertVersion fl) forestVersionShift) + (flLine fl) -- --------------------------------------------------------------------- class Outputable a where ppr :: a -> P.Doc -- ---------------------------------------------------------------------