Language/Haskell/HBB/Internal/SrcSpan.hs

{-# LANGUAGE DeriveDataTypeable #-}
{-# OPTIONS -Wall #-}
module Language.Haskell.HBB.Internal.SrcSpan where

import System.FilePath (normalise,makeRelative)
import Data.Generics
import FastString (unpackFS,fsLit)
import Data.Char (isSpace)
import SrcLoc

-- | This is just the combination of a line number and a column number.
data BufLoc = BufLoc Int Int -- ^ BufLoc line column
    deriving (Eq,Data,Typeable)

-- | BufLocs are shown by separating the line and the column number by a colon.
instance Show BufLoc where
    show (BufLoc l1 c1) = (show l1) ++ ':':(show c1)

instance Ord BufLoc where
    compare (BufLoc l1 _ ) (BufLoc l2 _ ) | l1 /= l2 = l1 `compare` l2
    compare (BufLoc _  c1) (BufLoc _  c2)            = c1 `compare` c2

-- | A BufSpan is simply defined by two times a BufLoc.
data BufSpan = BufSpan BufLoc BufLoc -- ^ BufSpan startLoc endLoc
    deriving (Eq,Typeable,Data)

instance Show BufSpan where
    show (BufSpan l1 l2) = (show l1) ++ " - " ++ (show l2)

-- | This is a file/text portion buffered as list of lines.
--
-- Line buffers are used to avoid repeated IO operations and to describe
-- line-oriented content (for example at assembling the TTree).
type LineBuf = [String]

-- | Returns the start location of a BufSpan
spanStart :: BufSpan -> BufLoc
spanStart (BufSpan s _) = s
-- | Returns the end location of a BufSpan
spanEnd   :: BufSpan -> BufLoc
spanEnd   (BufSpan _ e) = e

-- | Deconstructs a RealSrcSpan to the types more often used in libhbb.
unpackRealSrcSpan :: RealSrcSpan -> (FilePath,BufSpan)
unpackRealSrcSpan r = (unpackFS $ srcSpanFile r,toBufSpan r)

packRealSrcSpan :: (FilePath,BufSpan) -> RealSrcSpan
packRealSrcSpan (f,(BufSpan (BufLoc l1s l1e) (BufLoc l2s l2e))) =
    let newFilename = fsLit f
    in  mkRealSrcSpan
            (mkRealSrcLoc newFilename l1s l1e)
            (mkRealSrcLoc newFilename l2s l2e)

normalisePath 
    :: FilePath -- ^ The current working dir (to make pathes relative)
    -> FilePath -- ^ The path that should be adapted
    -> FilePath
normalisePath c p = makeRelative c $ normalise p

-- | This function converts a location in the form libhbb uses it into a
-- string. The string has the format that is used by 'occurrences-of' and
-- 'locate' to report locations to the client.
showSpan 
    :: Maybe FilePath     -- ^ The current working dir (to force relative pathes)
    -> (FilePath,BufSpan) -- ^ The location to convert to a string
    -> String
showSpan cwd (f,(BufSpan (BufLoc sli sco) (BufLoc eli eco))) =
    let filePart = case cwd of Nothing -> f
                               Just c  -> normalisePath c f
    in ('"':filePart ++ "\"") ++ (' ':(show sli)) ++ (' ':(show sco)) ++
                                 (' ':(show eli)) ++ (' ':(show eco))

-- | This is an auxiliary function that splits a string at all newlines.
--
-- Note that lines from Prelude cannot be used here. The reason is following
-- example:
--
-- @
--   lines       \"|  ->\\n\" = [\"|  ->\"]
--   str2LineBuf \"|  ->\\n\" = [\"|  ->\",\"\"]
-- @
str2LineBuf :: String -> LineBuf
str2LineBuf x = str2LineBufAcc x [] 
    where
        str2LineBufAcc :: String -> LineBuf -> LineBuf
        str2LineBufAcc []       acc     = reverse $ map reverse acc
        str2LineBufAcc ('\n':s) []      = str2LineBufAcc s ["",""] -- This is a special case!
        str2LineBufAcc ('\n':s) acc     = str2LineBufAcc s ("":acc)
        str2LineBufAcc (c   :s) []      = str2LineBufAcc s [(c:"")]
        str2LineBufAcc (c   :s) (a:acc) = str2LineBufAcc s ((c:a):acc)

-- | Converts a line buffer to a string.
-- 
-- Note that 'unlines' doesn't work here because it doesn't treat the last line
-- correctly.
lineBuf2Str :: LineBuf -> String
lineBuf2Str [] = ""
lineBuf2Str xs = (unlines (init xs)) ++ (last xs)

-- | This alias can be used to have a meaningful name for indentations.
type Indentation = Int

-- | For a line buffer this function returns the number of spaces charachters
-- of the line with the smallest indentation.
getIndentation :: LineBuf -> Indentation
getIndentation buf = minimum $ map (countInd 0) buf
    where
        -- FIXME Currently we are unable to count tabs with more than one
        -- space...
        countInd :: Indentation -> String -> Indentation
        countInd _   []                = 0 -- No non-space character at all on this line...!!
        countInd acc (c:s) | isSpace c = countInd (acc+1) s -- TODO tab is always 1 space currently
        countInd acc _                 = acc

-- | Compares two non-overlapping RealSrcSpan elements by their starting
-- location.
compareByStartLoc :: RealSrcSpan -> RealSrcSpan -> Ordering
compareByStartLoc r1 r2 = compare s1 s2
    where s1 = realSrcSpanStart r1
          s2 = realSrcSpanStart r2

-- | Converts a RealSrcLoc into a BufLoc effectively throwing away the
-- filename.
toBufLoc :: RealSrcLoc -> BufLoc
toBufLoc x =
    let line = srcLocLine x
        col  = srcLocCol  x
    in BufLoc line col

-- | Converts a RealSrcSpan into a BufSpan effectively throwing away the
-- filename.
toBufSpan :: RealSrcSpan -> BufSpan
toBufSpan x = BufSpan startLoc endLoc
    where startLoc = toBufLoc $ realSrcSpanStart x
          endLoc   = toBufLoc $ realSrcSpanEnd   x

-- | Creates a BufSpan where the first and the last BufLoc is the same.
--
-- The first parameter is the line and the second one is the column.
pointBufSpan :: Int -> Int -> BufSpan
pointBufSpan line column = BufSpan loc loc
    where loc = BufLoc line column

-- | This function splits the passed lines (of a file-cache) at the position
-- passed as second parameter.
--
-- Note that the line- and column-counts start with 1 (this is GHC behaviour).
-- The split contains the character pointed to by the BufLoc in the right part
-- of the tuple.
--
-- This means that (in case of line=1 and column=1) following applies:
--
-- @
-- splitAtBufLoc \"hello world\" loc == ([\"\"],[\"hello world\"])
-- @
splitAtBufLoc :: LineBuf -> BufLoc -> (LineBuf,LineBuf)
splitAtBufLoc []  _              = ([],[])
splitAtBufLoc lns (BufLoc ln co) =
    let leftPart  = 
            let firstPart = (take (max ln 0) lns)
            in (init firstPart) ++ [take (max (co-1) 0) (last firstPart)]
    in case (drop (max (ln-1) 0) lns) of 
        (r:rs) -> let rightPart = (drop (max (co-1) 0) r):rs in (leftPart,rightPart)
        _      -> (leftPart,[])

-- | This function splits a number of input lines in a way so that the area
-- located to by the passed source span is isolated.
--
-- The three areas in the return tuple are:
--
--  - Initial  lines (they come first)
--
--  - Subject  lines (they are between the locations)
--
--  - Trailing lines (they come after the last location)
--
-- The last line of initLines and the first line of subjLines must be joined to
-- reproduce the output. The same applies to subjLines and traiLines...
splitBufferedLinesAtBufSpan :: LineBuf -> BufSpan -> (LineBuf,LineBuf,LineBuf)
splitBufferedLinesAtBufSpan lns (BufSpan l1 l2) =
    let (rest,traiLines)      = splitAtBufLoc lns  l2
        (initLines,subjLines) = splitAtBufLoc rest l1
    in  (initLines,subjLines,traiLines)
    
-- | This function returns true if the first RealSrcSpan points to a region that
-- is located before the one pointed to by the second RealSrcSpan.
--
-- The two spans must be disjoint otherwise the results are undefined (can be
-- checked with the function 'disjoint')!
leq :: RealSrcSpan -> RealSrcSpan -> Bool
s1 `leq` s2 =
    let endS1   = realSrcSpanEnd   s1
        startS2 = realSrcSpanStart s2
    in  endS1 <= startS2

-- | This function returns true when the two passed RealSrcSpans do not
-- overlap.
--
-- This means that the end of the first RealSrcSpan is smaller or equal to the
-- start of the second RealSrcSpan and vice versa.
disjoint :: RealSrcSpan -> RealSrcSpan -> Bool
disjoint s1 s2 =
    let startS1 = realSrcSpanStart s1
        endS1   = realSrcSpanEnd   s1
        startS2 = realSrcSpanStart s2
        endS2   = realSrcSpanEnd   s2
    in endS1 <= startS2 || endS2 <= startS1

-- | This function is the opposite of splitAtBufLoc.
--
-- It can rejoin a split concerning that the last and the first line in the
-- frist respective second element of the split tuple must be joined by string
-- concatenation. This function has been designed to run with linear time
-- complexity.
joinSplit :: ([String],[String]) -> [String]
joinSplit (lines1,[]) = lines1
joinSplit ([],lines2) = lines2
joinSplit t           = joinSplitAcc [] t
    where
        joinSplitAcc :: [String] -> ([String],[String]) -> [String]
        joinSplitAcc acc ([],[])              = reverse acc
        joinSplitAcc acc ([],(l:lines2))      = joinSplitAcc (l:acc)        ([],lines2)
        joinSplitAcc acc ((x:y@(_:_)),lines2) = joinSplitAcc (x:acc)        (y ,lines2)
        joinSplitAcc acc ([x],(l:lines2))     = joinSplitAcc ((x ++ l):acc) ([],lines2)
        joinSplitAcc acc ([x],[])             = joinSplitAcc ( x      :acc) ([],[])

-- | This function combines two times joinSplit to be able to join lines that
-- have been split by a SrcSpan.
reassembleSplit :: ([String],[String],[String]) -> [String]
reassembleSplit (initLines,subjLines,traiLines) =
    joinSplit (initLines,(joinSplit (subjLines,traiLines)))