{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# OPTIONS_HADDOCK show-extensions #-} -- | -- Module : Yi.OldRope -- License : GPL-2 -- Maintainer : yi-devel@googlegroups.com -- Stability : experimental -- Portability : portable -- -- This module defines a Rope representation. -- -- While the representation are ByteStrings stored in a finger tree, -- the indices are actually in number of characters. -- -- This is currently based on utf8-string, but a couple of other -- packages might be better: text, compact-string. -- -- At the moment none of them has a lazy implementation, which forces -- us to always export plain Strings. (Utf8-string does not have a -- proper newtype) -- -- __Important__: The reason this module exists is to allow -- benchmarking and behaviour checks against a new implementation. As -- of today (10th September 2014), Yi imports this module. Notably, -- this module will be going away and Yi will start using "Yi.Rope" -- instead in the near future. module Yi.OldRope ( Rope, -- * Conversions to Rope fromString, -- * Conversions from Rope toString, toReverseString, -- * List-like functions null, empty, take, drop, length, reverse, countNewLines, split, splitAt, splitAtLine, append, concat, -- * IO readFile, writeFile, -- * Low level functions splitAtChunkBefore ) where import Data.Binary import Data.ByteString (ByteString) import qualified Data.ByteString as B (append, concat) import qualified Data.ByteString as Byte import qualified Data.ByteString.Lazy as LB (toChunks, fromChunks, null, readFile, split) import qualified Data.ByteString.Lazy.UTF8 as LB import qualified Data.ByteString.UTF8 as B import Data.Char (ord) import qualified Data.FingerTree as T import Data.FingerTree hiding (null, empty, reverse, split) import qualified Data.List as L import Data.Monoid import Data.String (IsString(..)) import Prelude hiding (null, head, tail, length, take, drop, splitAt, head, tail, foldl, reverse, readFile, writeFile, concat) import System.IO.Cautious (writeFileL) defaultChunkSize :: Int defaultChunkSize = 128 -- in chars! (chunkSize requires this to be <= 256) -- The FingerTree does not store measurements for single chunks, which -- means that the length of chunks often have to be recomputed. mkChunk :: ByteString -> Chunk mkChunk s = Chunk (fromIntegral $ B.length s) s data Chunk = Chunk { chunkSize :: {-# UNPACK #-} !Word8 , fromChunk :: {-# UNPACK #-} !ByteString } deriving (Eq, Show) data Size = Indices { charIndex :: {-# UNPACK #-} !Int , lineIndex :: {-# UNPACK #-} !Int -- ^ lineIndex is lazy because we do not often -- want the line count. However, we need this to -- avoid stack overflows on large files! } deriving Show instance Monoid Size where mempty = Indices 0 0 mappend (Indices c1 l1) (Indices c2 l2) = Indices (c1+c2) (l1+l2) newtype Rope = Rope { fromRope :: FingerTree Size Chunk } deriving (Eq, Show) (-|) :: Chunk -> FingerTree Size Chunk -> FingerTree Size Chunk b -| t | chunkSize b == 0 = t | otherwise = b <| t (|-) :: FingerTree Size Chunk -> Chunk -> FingerTree Size Chunk t |- b | chunkSize b == 0 = t | otherwise = t |> b -- Newlines are preserved by UTF8 encoding and decoding newline :: Word8 newline = fromIntegral (ord '\n') instance Measured Size Chunk where measure (Chunk l s) = Indices (fromIntegral l) -- note that this is the length in -- characters, not bytes. (Byte.count newline s) -- | The 'Foldable' instance of 'FingerTree' only defines 'foldMap', -- so the 'foldr' needed for 'toList' is inefficient, and can cause -- stack overflows. So, we roll our own (somewhat inefficient) version -- of 'toList' to avoid this. toList :: Measured v a => FingerTree v a -> [a] toList t = case viewl t of c :< cs -> c : toList cs EmptyL -> [] toLazyByteString :: Rope -> LB.ByteString toLazyByteString = LB.fromChunks . fmap fromChunk . toList . fromRope reverse :: Rope -> Rope reverse = Rope . fmap' (mkChunk . B.fromString . L.reverse . B.toString . fromChunk) . T.reverse . fromRope toReverseString :: Rope -> String toReverseString = concatMap (L.reverse . B.toString . fromChunk) . toList . T.reverse . fromRope toString :: Rope -> String toString = LB.toString . toLazyByteString fromLazyByteString :: LB.ByteString -> Rope fromLazyByteString = Rope . toTree T.empty where toTree acc b | LB.null b = acc | otherwise = let (h,t) = LB.splitAt (fromIntegral defaultChunkSize) b chunk = mkChunk $ B.concat $ LB.toChunks h in acc `seq` chunk `seq` toTree (acc |> chunk) t instance IsString Rope where fromString = Rope . toTree T.empty where toTree acc [] = acc toTree acc b = let (h,t) = L.splitAt defaultChunkSize b chunk = mkChunk $ B.fromString h in acc `seq` chunk `seq` toTree (acc |> chunk) t null :: Rope -> Bool null (Rope a) = T.null a empty :: Rope empty = Rope T.empty -- | Get the length of the string. (This information cached, so O(1) -- amortized runtime.) length :: Rope -> Int length = charIndex . measure . fromRope -- | Count the number of newlines in the strings. (This information -- cached, so O(1) amortized runtime.) countNewLines :: Rope -> Int countNewLines = lineIndex . measure . fromRope -- | Append two strings by merging the two finger trees. append :: Rope -> Rope -> Rope append (Rope a) (Rope b) = Rope $ case T.viewr a of EmptyR -> b l :> Chunk len x -> case T.viewl b of EmptyL -> a Chunk len' x' :< r -> if fromIntegral len + fromIntegral len' < defaultChunkSize then l >< singleton (Chunk (len + len') (x `B.append` x')) >< r else a >< b concat :: [Rope] -> Rope concat = L.foldl' append empty take, drop :: Int -> Rope -> Rope take n = fst . splitAt n drop n = snd . splitAt n -- | Split the string at the specified position. splitAt :: Int -> Rope -> (Rope, Rope) splitAt n (Rope t) = case T.viewl c of Chunk len x :< r | n' /= 0 -> let (lx, rx) = B.splitAt n' x in (Rope $ l |> Chunk (fromIntegral n') lx, Rope $ Chunk (len - fromIntegral n') rx -| r) _ -> (Rope l, Rope c) where (l, c) = T.split ((> n) . charIndex) t n' = n - charIndex (measure l) -- | Split the rope on a chunk, so that the desired -- position lies within the first chunk of the second rope. splitAtChunkBefore :: Int -> Rope -> (Rope, Rope) splitAtChunkBefore n (Rope t) = let (l, c) = T.split ((> n) . charIndex) t in (Rope l, Rope c) -- | Split before the specified line. Lines are indexed from 0. splitAtLine :: Int -> Rope -> (Rope, Rope) splitAtLine n | n <= 0 = \r -> (empty, r) | otherwise = splitAtLine' (n-1) -- | Split after the specified line. Lines are indexed from 0. splitAtLine' :: Int -> Rope -> (Rope, Rope) splitAtLine' n (Rope t) = case T.viewl c of ch@(Chunk _ x) :< r -> let (lx, rx) = cutExcess excess x excess = lineIndex (measure l) + lineIndex (measure ch) - n - 1 in (Rope $ l |- mkChunk lx, Rope $ mkChunk rx -| r) _ -> (Rope l, Rope c) where (l, c) = T.split ((n <) . lineIndex) t split :: Word8 -> Rope -> [Rope] split c = map fromLazyByteString . LB.split c . toLazyByteString cutExcess :: Int -> ByteString -> (ByteString, ByteString) cutExcess i s = let idx = gt i $ L.reverse $ Byte.elemIndices newline s in Byte.splitAt (idx+1) s -- take one extra byte to that the newline -- is found on the left. where gt _ [] = Byte.length s gt 0 (x:_ ) = x gt n (_:xs) = gt (n-1) xs instance Binary Rope where put = put . toString get = fromString `fmap` get writeFile :: FilePath -> Rope -> IO () writeFile f = writeFileL f . toLazyByteString readFile :: FilePath -> IO Rope readFile f = fromLazyByteString `fmap` LB.readFile f