{-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances #-} -- Consider splitting off as a separate package -- Copyright (c) 2008 Gustav Munkby -- Copyright (c) 2008 Jean-Philippe Bernardy -- | This module defines a Rope representation. -- While the representation are ByteStrings stored in a finger tree, the indices -- are actually in number of characters. -- This is currently based on utf8-string, but a couple of other packages might be -- better: text, compact-string. -- At the moment none of them has a lazy -- implementation, which forces us to always export plain Strings. -- (Utf8-string does not have a proper newtype) module Data.Rope ( Rope, -- * Conversions to Rope fromString, -- * Conversions from Rope toString, toReverseString, -- * List-like functions null, empty, take, drop, append, splitAt, splitAtLine, length, reverse, countNewLines, -- * IO readFile, writeFile, -- * Low level functions splitAtChunkBefore ) where import Prelude hiding (null, head, tail, length, take, drop, splitAt, head, tail, foldl, reverse, readFile, writeFile) import qualified Data.List as L import qualified Data.ByteString.UTF8 as B import qualified Data.ByteString as B (append, concat, elemIndices) import qualified Data.ByteString as Byte import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as LB (toChunks, fromChunks, null, readFile) import qualified Data.ByteString.Lazy.UTF8 as LB import qualified Data.FingerTree as T import Data.FingerTree hiding (null, empty, reverse) import Data.Binary import Data.Char (ord) import Data.Monoid import Data.Foldable (toList) import Data.Int #ifdef CAUTIOUS_WRITES import System.IO.Cautious (writeFileL) #else import qualified Data.ByteString.Lazy as LB (writeFile) #endif defaultChunkSize :: Int defaultChunkSize = 128 -- in chars! (chunkSize requires this to be <= 256) -- The FingerTree does not store measurements for single chunks, which -- means that the length of chunks often have to be recomputed. mkChunk :: ByteString -> Chunk mkChunk s = Chunk (fromIntegral $ B.length s) s data Chunk = Chunk { chunkSize :: !Word8, fromChunk :: !ByteString } deriving (Eq, Show) data Size = Indices {charIndex :: !Int, lineIndex :: Int} -- lineIndex is lazy because we do not often want the line count. deriving Show instance Monoid Size where mempty = Indices 0 0 mappend (Indices c1 l1) (Indices c2 l2) = Indices (c1+c2) (l1+l2) newtype Rope = Rope { fromRope :: FingerTree Size Chunk } deriving (Eq, Show) (-|) :: Chunk -> FingerTree Size Chunk -> FingerTree Size Chunk b -| t | chunkSize b == 0 = t | otherwise = b <| t (|-) :: FingerTree Size Chunk -> Chunk -> FingerTree Size Chunk t |- b | chunkSize b == 0 = t | otherwise = t |> b -- Newlines are preserved by UTF8 encoding and decoding newline :: Word8 newline = fromIntegral (ord '\n') instance Measured Size Chunk where measure (Chunk l s) = Indices (fromIntegral l) -- note that this is the length in characters, not bytes. (Byte.count newline s) toLazyByteString :: Rope -> LB.ByteString toLazyByteString = LB.fromChunks . fmap fromChunk . toList . fromRope reverse :: Rope -> Rope reverse = Rope . fmap' (mkChunk . B.fromString . L.reverse . B.toString . fromChunk) . T.reverse . fromRope toReverseString :: Rope -> String toReverseString = L.concat . map (L.reverse . B.toString . fromChunk) . toList . T.reverse . fromRope toString :: Rope -> String toString = LB.toString . toLazyByteString fromLazyByteString :: LB.ByteString -> Rope fromLazyByteString = Rope . toTree where toTree b | LB.null b = T.empty toTree b = let (h,t) = LB.splitAt (fromIntegral defaultChunkSize) b in (mkChunk $ B.concat $ LB.toChunks $ h) <| toTree t fromString :: String -> Rope fromString = Rope . toTree where toTree [] = T.empty toTree b = let (h,t) = L.splitAt defaultChunkSize b in (mkChunk $ B.fromString h) <| toTree t null :: Rope -> Bool null (Rope a) = T.null a empty :: Rope empty = Rope T.empty -- | Get the length of the string. (This information cached, so O(1) amortized runtime.) length :: Rope -> Int length = charIndex . measure . fromRope -- | Count the number of newlines in the strings. (This information cached, so O(1) amortized runtime.) countNewLines :: Rope -> Int countNewLines = lineIndex . measure . fromRope -- | Append two strings by merging the two finger trees. append :: Rope -> Rope -> Rope append (Rope a) (Rope b) = Rope $ case T.viewr a of EmptyR -> b l :> (Chunk len x) -> case T.viewl b of EmptyL -> a (Chunk len' x') :< r -> if (fromIntegral len) + (fromIntegral len') < defaultChunkSize then l >< singleton (Chunk (len + len') (x `B.append` x')) >< r else a >< b take, drop :: Int -> Rope -> Rope take n = fst . splitAt n drop n = snd . splitAt n -- | Split the string at the specified position. splitAt :: Int -> Rope -> (Rope, Rope) splitAt n (Rope t) = case T.viewl c of (Chunk len x) :< r | n' /= 0 -> let (lx, rx) = B.splitAt n' x in (Rope $ l |> (Chunk (fromIntegral n') lx), Rope $ (Chunk (len - fromIntegral n') rx) -| r) _ -> (Rope l, Rope c) where (l, c) = T.split ((> n) . charIndex) t n' = n - charIndex (measure l) -- | Split the rope on a chunk, so that the desired -- position lies within the first chunk of the second rope. splitAtChunkBefore :: Int -> Rope -> (Rope, Rope) splitAtChunkBefore n (Rope t) = let (l, c) = T.split ((> n) . charIndex) t in (Rope l, Rope c) -- | Split before the specified line. Lines are indexed from 0. splitAtLine :: Int -> Rope -> (Rope, Rope) splitAtLine n | n <= 0 = \r -> (empty, r) | otherwise = splitAtLine' (n-1) -- | Split after the specified line. Lines are indexed from 0. splitAtLine' :: Int -> Rope -> (Rope, Rope) splitAtLine' n (Rope t) = case T.viewl c of ch@(Chunk _ x) :< r -> let (lx, rx) = cutExcess excess x excess = lineIndex (measure l) + lineIndex (measure ch) - n - 1 in (Rope $ l |- mkChunk lx, Rope $ mkChunk rx -| r) _ -> (Rope l, Rope c) where (l, c) = T.split ((n <) . lineIndex) t cutExcess :: Int -> ByteString -> (ByteString, ByteString) cutExcess i s = let idx = gt i $ L.reverse $ Byte.elemIndices newline s in Byte.splitAt (idx+1) s -- take one extra byte to that the newline is found on the left. where gt _ [] = Byte.length s gt 0 (x:_ ) = x gt n (_:xs) = gt (n-1) xs instance Binary Rope where put = put . toString get = fromString `fmap` get writeFile :: FilePath -> Rope -> IO () #ifdef CAUTIOUS_WRITES writeFile f r = writeFileL f $ toLazyByteString r #else writeFile f r = LB.writeFile f $ toLazyByteString r #endif readFile :: FilePath -> IO Rope readFile f = fromLazyByteString `fmap` LB.readFile f