\begin{code} {-# OPTIONS -fglasgow-exts -XRecordWildCards -XNamedFieldPuns -XNoImplicitPrelude -cpp #-} -- | Implementation of the ideas in -- . -- Inspired also by "Data.Map" and the OCaml version of ropes. module Data.Rope( -- * The 'Rope' type Rope, -- * Introducing and eliminating 'Rope's empty, singleton, pack, unpack, fromByteString, toByteString, -- * Basic interface cons, snoc, append, head, uncons, last, tail, init, null, length, -- * Transforming 'Rope's map, reverse, intercalate, -- * Concatenations insert, -- * Reducing 'Rope's foldl, foldl', foldr, -- * Breaking 'Rope's take, drop, splitAt#, splitAt, breakByte, breaks, lines, -- * Indexing 'Rope's index, elemIndex, elemIndex', elemIndices, -- * Input and Output readFile, hGet, hGetLine, hGetContents, hPut, hPutStrLn, hPutStr, putStrLn, putStr, ) where import Data.Rope.Internals import Foreign.Storable import Foreign.ForeignPtr import qualified Foreign.Concurrent as FC import Foreign.Marshal.Alloc import Foreign.Marshal.Array import Foreign.Marshal.Utils import Foreign.Ptr import Foreign.C.Types import qualified Data.ByteString.Internal as B import Data.String import Data.Word import Control.Exception (finally) #if __GLASGOW_HASKELL__ >=608 import GHC.IO (IO(IO),unsafePerformIO,unsafeInterleaveIO) import GHC.Base (realWorld#) import GHC.IO.Handle.Types --import GHC.IO.Handle.Internals import GHC.IO.Device as IODevice #else import System.IO.Unsafe #endif import Data.Maybe import GHC.Conc (par) import System.Posix (openFd,closeFd,defaultFileFlags,getFileStatus, fileSize,OpenMode(..)) import System.IO (hGetBuf,hPutBuf,hSeek,hTell,hFileSize, hPutChar, stdout, hClose) import qualified Prelude(length,map,splitAt) import Prelude hiding (length,null,head,tail,last,init,map,reverse, foldl,foldr, take,drop,splitAt,lines, putStrLn,putStr,readFile) import Data.Bits hiding(rotateL,rotateR) \end{code} Attention, on ne garantit pas necessairement que les deux sous-arbres sont non-vides. Il faudrait regler ce probleme dans balance. i0 est le premier indice ou on a le droit d'ecrire. File sert a ne pas recrire tout un fichier. On l'enleve des qu'on modifie la @Rope@, par contre. Il faut recrire la fin du fichier quand meme, dans ce cas. \begin{code} data Rope= Concat { sizeC:: !Int, length_:: !Int, l::Rope, r::Rope } | String { contents:: !(ForeignPtr Word8), i0:: !(ForeignPtr Int), offset:: !Int, length_:: !Int } | File { handle::Handle, position:: !Int, length_:: !Int, rope::Rope } -- deriving (Show) instance IsString Rope where fromString s=pack $ Prelude.map (fromIntegral.fromEnum) s instance Show Rope where show s="\""++(Prelude.map (toEnum.fromIntegral) $ unpack s)++"\"" {- instance Show Rope where show (s@String{})=show ((Prelude.map (toEnum.fromIntegral) $ unpack s)::String) show (Concat{l,r})="Concat { l="++(show l)++"\nr="++(show r) -} instance Eq Rope where (==) String{contents=ca,offset=oa,length_=la} String{contents=cb, offset=ob, length_=lb} = la==lb && (inlinePerformIO $ (withForeignPtr ca $ \pa-> withForeignPtr cb $ \pb->do x<-strncmp (pa`plusPtr`oa) (pb`plusPtr`ob) (fromIntegral la) return $ x == 0)) (==) a b= (length a == length b) && (case (a,b) of (Concat{l,r}, _) -> let (# x,y #)=splitAt# (length l) b in x==l && y==r (_,Concat{l,r})-> let (# x,y #)=splitAt# (length l) a in x==l && y==r _->undefined) -- never happens instance Ord Rope where compare (String{contents=ca,offset=oa,length_=la}) (String{contents=cb, offset=ob, length_=lb}) = inlinePerformIO $ (withForeignPtr ca $ \pa->withForeignPtr cb $ \pb->do x<-strncmp (pa`plusPtr`oa) (pb`plusPtr`ob) (fromIntegral $ min la lb) if x==0 then return $ compare la lb else return $ compare x 0) compare a_ b_= let (a,b)=if length a_ let (# x,y #)=splitAt# (length l) a in case compare x l of EQ->compare y r ord->ord (Concat{l,r},_)-> let (# x,y #)=splitAt# (length l) b in case compare l x of EQ->compare r y ord->ord _->undefined --never happens -- | leafLen est la taille standard des blocs de feuilles. leafSize::Int leafSize=0x10000 \end{code} Interface basique. Rien \`a dire. Pour cons et snoc, il ne servirait a rien de rajouter un bout "non utilise" au debut de la chaine : vu qu'on ecrit toutes les chaines depuis l'offset 0, si on prend une sous-chaine, c'est que le debut est deja utilise dans une autre sous-chaine. \begin{code} -- | O(log n). Appends the specified byte at the beginning of the 'Rope'. cons::Word8->Rope->Rope cons w x=append (singleton w) x {-# INLINE cons #-} -- | O(log n). Appends the specified byte at the end of the 'Rope'. snoc::Rope->Word8->Rope snoc x w=append x (singleton w) {-# INLINE snoc #-} -- | O(log n) First element of the 'Rope'. Raises an error if the argument is empty. head::Rope->Word8 head (String{contents,offset,length_}) |length_>0 = inlinePerformIO $ withForeignPtr contents $ \c->peekByteOff c offset |otherwise = error "head" head (Concat{l})=head l head (File{rope})=head rope {- head (File {handle,position})= unsafePerformIO $ alloca $ \c->do hSeek handle AbsoluteSeek $ fromIntegral position x<-hGetBuf handle c 1 if x<=0 then error "head" else peek c -} -- | O(log n). Returns the first element of the 'Rope', and the 'Rope' of the -- remaining elements. uncons::Rope->Maybe (Word8, Rope) uncons (s@String{contents,offset,length_}) | length_<=0 = Nothing | otherwise= let u=inlinePerformIO $ withForeignPtr contents $ \c->peekByteOff c offset in Just (u,s { contents,offset=offset+1,length_=length_-1 }) uncons (c@Concat{l,r,length_})= case uncons l of Just (x,y)->Just (x,balance $ c { l=y, length_=length_-1 }) Nothing->uncons r uncons (File{rope})=uncons rope {- uncons (f@File{handle,position,length_})= unsafePerformIO $ alloca $ \c->do hSeek handle AbsoluteSeek $ fromIntegral position x<-hGetBuf handle c length_ if x<=0 then return Nothing else do cc<-peek c return $ Just $ (cc, f { position=position+1,length_=length_-1 }) -} -- | O(log n). Last element of a 'Rope'. last::Rope->Word8 last (String{contents,offset,length_}) | length_<=0 = error "last" | otherwise = inlinePerformIO $ withForeignPtr contents $ \c->peekByteOff c $ offset+length_-1 last (Concat{l,r}) |size r<=0 = last l |otherwise = last r last File{rope}=last rope {- last (File{handle,length_,position})= unsafePerformIO $ alloca $ \c->do hSeek handle AbsoluteSeek $ fromIntegral $ position+length_-1 x<-hGetBuf handle c 1 if x<=0 then error "last" else do cc<-peek c return cc -} -- | O(log n) The elements after the head. An error is raised if the 'Rope' is empty. tail::Rope->Rope tail (s@String{offset,length_}) | length_<=0 = error "tail" | otherwise = s { offset=offset+1,length_=length_-1 } tail (c@Concat{l,r}) | length l<=0 = tail r | otherwise = balance $ c { l=tail l } tail File{rope}=tail rope {- tail (f@File{length_,position}) | length_>1 = f { length_=length_-1,position=position+1 } | length_==1 = empty | otherwise = error "tail" -} -- | O(log n) The elements in the 'Rope' except the last one. init::Rope->Rope init (s@String{length_}) | length_<=0 = error "init" | otherwise = s { length_=length_-1 } init (s@Concat{r})=balance $ s { r=init r } init File{rope}=init rope {- init (f@File{length_}) | length_>1 = f { length_=length_-1 } | length_==1 = empty | otherwise = error "init" -} -- | O(1) Tests whether a 'Rope' is empty. null::Rope->Bool null x=length x==0 -- | O(1) Length of a 'Rope'. length::Rope->Int length (String{length_})=length_ length (Concat{length_})=length_ length (File{rope})=length rope size::Rope->Int size (Concat {sizeC})=sizeC size (File {rope})=size rope size _=1 \end{code} Transformations \begin{code} -- | O(n). @'map' f r@ applies @f@ on each element of @r@ and returns the -- concatenation of the result. map::(Word8->Word8)->Rope->Rope map f (String{contents,offset,length_})= unsafePerformIO $ withForeignPtr contents $ \c->do cont<-mallocForeignPtrBytes leafSize withForeignPtr cont $ \p->copyMap p (c`plusPtr`offset) length_ i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i length_ return $ String { contents=cont,offset=0,length_,i0 } where copyMap _ _ 0=return () copyMap p p' len=do peek p' >>= ((poke p).f) copyMap (p`plusPtr`1) (p'`plusPtr`1) $ len-1 map f (Concat{l,r})=append (map f l) (map f r) map f File{rope}=map f rope -- | O(n) efficient way to reverse a 'Rope'. reverse::Rope->Rope reverse (String{contents,offset,length_})= unsafePerformIO $ withForeignPtr contents $ \c->do cont<-mallocForeignPtrBytes leafSize withForeignPtr cont $ \p-> copyRev (p`plusPtr`(length_-1)) (c`plusPtr`offset) length_ i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i length_ return $ String { contents=cont,offset=0,length_,i0 } where copyRev::Ptr Word8->Ptr Word8->Int->IO() copyRev _ _ 0=return () copyRev p p' len=do peek p' >>= poke p copyRev (p`plusPtr`(-1)) (p'`plusPtr`1) $ len-1 reverse (Concat{l,r})=append (reverse r) (reverse l) reverse File{rope}=reverse rope -- | O(n) intercalate an element between each element of the list of 'Rope's -- and concatenates the result. intercalate::Rope->[Rope]->Rope intercalate rope list= intercalate_ list empty where intercalate_ [] x=x intercalate_ [h] x=append x h intercalate_ (h:s) x=intercalate_ s (append (append h rope) x) \end{code} Il n'est pas clair qu'on ne puisse pas faire beaucoup mieux que ce pack. Par exemple, vu qu'on calcule la taille de la liste, on pourrait sortir un arbre directement \'equilibr\'e. Vu qu'on ne fait jamais pack avec des 'ByteString's (sauf ghc quand il compile des IsString), \c ca ne vaut sans doute pas le co\^ut. Par contre, append est utilise presque partout. Il est crucial que son implementation soit extremement efficace. \begin{code} -- | O(1) The empty 'Rope' empty::Rope empty=unsafePerformIO $ do contents<-mallocForeignPtrArray leafSize i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i (-1) return $ String { contents,i0,offset=0,length_=0 } -- | O(1) Convert a 'Word8' into a 'Rope' singleton::Word8->Rope singleton c=unsafePerformIO $ do contents<-mallocForeignPtrArray leafSize withForeignPtr contents $ \con->poke con c i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i 1 return $ String { contents,i0,offset=0,length_=1 } -- | O(n) Convert a list of 'Word8' into a 'Rope' pack::[Word8]->Rope pack s= if Prelude.length s<=leafSize then unsafePerformIO $ do contents<-mallocForeignPtrArray leafSize i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i $ Prelude.length s withForeignPtr contents $ \c->fillPtr c s return $ String { contents, i0, offset=0, length_=Prelude.length s } else let (a,b)=Prelude.splitAt leafSize s in append (pack a) (pack b) where fillPtr::Storable a=>Ptr a->[a]->IO () fillPtr _ []=return () fillPtr ptr (h:t)=do poke ptr h fillPtr (plusPtr ptr 1) t -- | O(n) Inverse conversion unpack::Rope->[Word8] unpack (Concat{l,r})=unpack l++unpack r unpack (String{contents,offset,length_})= unsafePerformIO $ mapM (\i->withForeignPtr contents $ \c->peek (plusPtr c i)::IO Word8) [offset..(offset+length_-1)] unpack File{rope}=unpack rope {- unpack (File{handle,position,length_})= unsafePerformIO $ alloca $ \c->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle (c::Ptr Word8) length_ mapM (peek.(plusPtr c)) [0..(l-1)] -} -- | O(log n) Concatenates two 'Rope's append::Rope->Rope->Rope append File{rope=a} File{rope=b}=append a b append File{rope} a=append rope a append a File{rope}=append a rope append sa@(String {}) sb@(String{}) |length sa + offset sa + length sb <= leafSize = unsafePerformIO $ do i0_<-withForeignPtr (i0 sa) $ peek let l=length sa if i0_poke i $ offset sa+l+length sb withForeignPtr (contents sa) $ \ca->withForeignPtr (contents sb) $ \cb->copyArray (ca`plusPtr`(l+offset sa)::Ptr Word8) (cb`plusPtr`(offset sb)) $ length sb return $ sa { length_=length sa+length sb } else return $ Concat { sizeC=3, length_=length sa+length sb, l=sa, r=sb } |otherwise= Concat { sizeC=3, length_=length sa+length sb, l=sa, r=sb } append x@String{} y@Concat{sizeC,l}=balance $ y { sizeC=1+sizeC, length_=length x+length y, l=append x l } append x y = balance $ Concat { sizeC=size x+size y, length_=length x+length y, l=x, r=y } -- | O(log n) @'insert' a i b@ inserts 'Rope' @a@ in 'Rope' @b@ after the @i@th element of @b@. insert::Rope->Int->Rope->Rope insert a 0 b=append a b insert a i (b@String{})= balance $ Concat { sizeC=size a+size b, length_=length a+length b, l=String { contents=contents b, i0=i0 b, offset=offset b, length_=min i $ length b }, r=append a $ String { contents=contents b, i0=i0 b, offset=i+offset b, length_=max 0 $ (length b)-i } } insert a i (b@Concat{}) |i>=(length $ r b) = balance $ b { r=insert a (i-(length $ r b)) $ r b } |otherwise = balance $ b { l=insert a i $ l b} insert a i File{rope}=insert a i rope \end{code} R\'e\'equilibrage des arbres. @delta@ et @ratio@ sont des param\`etres exp\'erimentaux. Peut-\^etre faut-il profiler un peu. \begin{code} delta,ratio::Int delta=5 ratio=2 balance,rotateL,rotateR,singleL,singleR,doubleL,doubleR::Rope->Rope balance x=case x of String{}->x Concat{l,r} | length l==0 -> r | length r==0 -> l | size r>=delta*size l -> rotateL x | size l>=delta*size r -> rotateR x | otherwise->x _->x rotateL x=case x of String{}->x Concat{r}->case r of String{}->x Concat{l=rl,r=rr} |size rl singleL x |otherwise -> doubleL x _->x rotateR x=case x of String{}->x Concat{l}->case l of String{}->x Concat{l=ll,r=lr} |size lr singleR x |otherwise -> doubleR x _->x singleL (a@String{})=a singleL (b@Concat{})= case r b of d@Concat{}-> let b'=append (l b) (l d) {- Concat { length_=(length $ l b)+(length $ l d), sizeC=(size $ l b)+(size $ l d)+1, l=l b, r=l d } -} in Concat { length_=length b, sizeC=size b, r=r d, l=b' } _->b singleR (a@String{})=a singleR (d@Concat{})= case l d of b@Concat{}-> let d'=append (r b) (r d) {- Concat { length_=(length $ r b)+(length $ r d), sizeC=(size $ r b)+(size $ r d)+1, l=r b, r=r d } -} in Concat { length_=length d, sizeC=size d, r=d', l=l b } _->d doubleL (a@Concat{})=singleL $ a { l=singleR $ l a } doubleL a=a doubleR (a@Concat{})=singleR $ a { r=singleL $ r a } doubleR a=a \end{code} Unsafe Stuff \begin{code} foreign import ccall unsafe "string.h memchr" memchr::Ptr Word8->Word8->CSize->IO (Ptr Word8) foreign import ccall unsafe "string.h strncmp" strncmp::Ptr Word8->Ptr Word8->CSize->IO CInt {-# INLINE inlinePerformIO #-} inlinePerformIO::IO a->a #ifdef __GLASGOW_HASKELL__ inlinePerformIO (IO m)=case m realWorld# of (# _, r #)->r #else inlinePerformIO=unsafePerformIO #endif \end{code} Indexing Ropes \begin{code} -- | O(log n) returns the 'Word8' at given index in the 'Rope' index::Rope->Int->Char index c i |i>=length c = error $ "index trop grand : "++(show i)++", longueur = "++(show $ length c) |otherwise=index_ c i where index_ (String{contents,offset}) i_= inlinePerformIO $ withForeignPtr contents $ \con->do x<-peek $ plusPtr con (offset+i_) :: IO Word8 return $ toEnum $ fromIntegral x index_ (Concat{r,l}) i_ |i_>=length l = index_ r (i_-length l) |otherwise = index_ l i_ {- index_ (File{handle,position}) i_= unsafePerformIO $ alloca $ \cc->do hSeek handle AbsoluteSeek $ fromIntegral $ position+i_ hGetBuf handle cc 1 peek cc -} -- | O(n) returns the index of the first element equal to the query element. This implementation -- uses memchr at leaves, and explores the rope in parallel (with 'GHC.Conc.par'). elemIndex::Word8->Rope->Maybe Int elemIndex w rope= elemIndex_ 0 rope where elemIndex_ i (String{contents,offset,length_})= inlinePerformIO $ withForeignPtr contents $ \con->do ptr<-memchr (con`plusPtr`offset) w (fromIntegral length_) return $ if ptr==nullPtr then Nothing else Just $ i+offset+(ptr`minusPtr`con) elemIndex_ i (Concat{l,r})= let sl=elemIndex_ i l sr=elemIndex_ (i+length l) r in (sl`par`sr)`seq`(if isNothing sl then sr else sl) elemIndex_ i (File{handle,position,length_})= unsafePerformIO $ allocaBytes length_ $ \c->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle c length_ x<-loop c 0 l return $ x>>=(return.(+i)) loop ptr i l | i>=l = return Nothing | otherwise = do x<-peek ptr if x==w then return $ Just i else loop (ptr`plusPtr`1) (i+1) l -- | O(n) Same as 'elemIndex', but explores the 'Rope' sequentially. Useful for -- 'Rope's loaded lazily with 'readFile'. elemIndex'::Word8->Rope->Maybe Int elemIndex' w rope= elemIndex_ 0 rope where elemIndex_ i (String{contents,offset,length_})= inlinePerformIO $ withForeignPtr contents $ \con->do ptr<-memchr (con`plusPtr`offset) w (fromIntegral length_) return $ if ptr==nullPtr then Nothing else Just $ i+offset+(ptr`minusPtr`con) elemIndex_ i (Concat{l,r})= case elemIndex_ i l of Nothing->elemIndex_ (i+length l) r a->a elemIndex_ i (File{handle,position,length_})= unsafePerformIO $ allocaBytes length_ $ \c->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle c length_ x<-loop c 0 l return $ x>>=(return.(+i)) loop ptr i l | i>=l = return Nothing | otherwise = do x<-peek ptr if x==w then return $ Just i else loop (ptr`plusPtr`1) (i+1) l -- | O(n) returns the list of all positions where the queried elements occurs in the 'Rope'. -- This implementation uses memchr. elemIndices::Word8->Rope->[Int] elemIndices w rope= elemIndices_ 0 rope [] where -- loop est une action IO, avec le foreignPtr deja enleve. -- Autant dire que ca va tres vite. loop::Ptr Word8->CInt->CSize->[Int]->IO [Int] loop _ _ 0 l=return l loop ptr i len l=do ptr'<-memchr ptr w len if ptr'==nullPtr then return [] else do let off=fromIntegral $ ptr'`minusPtr`ptr x<-loop (ptr'`plusPtr`1) (i+off+1) (len-fromIntegral off-1) l return $ (fromIntegral $ i+off):x elemIndices_ i (String{contents,offset,length_}) list= inlinePerformIO $ withForeignPtr contents $ \con-> loop (con`plusPtr`offset) i (fromIntegral length_) list elemIndices_ i (Concat{l,r}) list= let sl=elemIndices_ i l sr sr=elemIndices_ (i+(fromIntegral $ length l)) r list in sl elemIndices_ i (File{handle,position,length_}) list= unsafePerformIO $ allocaBytes length_ $ \c->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle c length_ loop c i (fromIntegral l) list \end{code} Reducing \begin{code} -- | O(n). fold over a 'Rope'. -- This implementation is not tail-recursive but never pushes more than -- O(log n) calls on the stack. foldl::(a->Word8->a)->a->Rope->a foldl f a (String {contents,offset,length_})= unsafePerformIO $ withForeignPtr contents $ \c->foldlBuf f a (c`plusPtr`offset) length_ foldl f a (Concat{l,r})=foldl f (foldl f a l) r foldl f a (File{handle,position,length_})= unsafePerformIO $ allocaArray length_ $ \arr->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle arr length_ foldlBuf f a arr l foldlBuf::(a->Word8->a)->a->Ptr Word8->Int->IO a foldlBuf _ a _ 0=return a foldlBuf f a p l=do c<-peek p foldlBuf f (f a c) (p`plusPtr`1) (l-1) -- | O(n). like 'foldl' but strict. foldl'::(a->Word8->a)->a->Rope->a foldl' f a (String {contents,offset,length_})= unsafePerformIO $ withForeignPtr contents $ \c->foldlBuf' f a (c`plusPtr`offset) length_ foldl' f a (Concat{l,r})= let b=foldl' f a l in b`seq`(foldl' f b r) foldl' f a (File{handle,position,length_})= unsafePerformIO $ allocaArray length_ $ \arr->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle arr length_ foldlBuf' f a arr l foldlBuf'::(a->Word8->a)->a->Ptr Word8->Int->IO a foldlBuf' _ a _ 0=return a foldlBuf' f a p l=do c<-peek p let b=f a c b`seq`foldlBuf' f b (p`plusPtr`1) (l-1) -- | O(n). Right fold. Again not tail-recursive but never uses more than -- O(log n) on the stack. foldr::(Word8->a->a)->a->Rope->a foldr f a (String{contents,offset,length_})= unsafePerformIO $ withForeignPtr contents $ \p->foldrBuf f a (p`plusPtr`offset) length_ foldr f a (Concat{l,r})=foldr f (foldr f a r) l foldr f a (File{handle,position,length_})= unsafePerformIO $ allocaArray length_ $ \arr->do hSeek handle AbsoluteSeek $ fromIntegral position l<-hGetBuf handle arr length_ foldrBuf f a arr l foldrBuf::(Word8->a->a)->a->Ptr Word8->Int->IO a foldrBuf _ a _ 0=return a foldrBuf f a p l=do c<-peek $ p`plusPtr`(l-1) foldrBuf f (f c a) p (l-1) \end{code} Breaking Ropes \begin{code} take::Int->Rope->Rope take i (s@String{length_})=s { length_=min length_ i } take i (c@Concat{l,r}) | i==length l = l | iRope->Rope drop i (s@String{offset,length_})=s { offset=offset+(min i length_) } drop i (c@Concat{l,r}) | i==length l = r | iRope->(# Rope,Rope #) splitAt# i (s@String{offset,length_}) | i>=length_ = (# s,empty #) | otherwise = (# s { length_=i }, s { offset=offset+i,length_=length_-i} #) splitAt# i (c@Concat{length_=l0,l,r}) | i>=l0 = (# c,empty #) | i>=length_ l = let (# u,v #)=splitAt# (i-length_ l) r in (# c { length_=i, r=u }, v #) | otherwise = let (# u,v #)=splitAt# i l in (# u, c { length_=i-length_ u, l=v } #) -- | O(log n). @'splitAt' n xs@ is equivalent to (take n xs, drop n xs), but a little faster. splitAt::Int->Rope->(Rope,Rope) splitAt i r= let (# u,v #)=splitAt# i r in (u,v) -- | O(n). @'breakByte' c r@ breaks 'Rope' @r@ before the first occurence of @c@. breakByte::Word8->Rope->(Rope,Rope) breakByte w rope_= let (_,b,c)=breakByte_ rope_ in (b,c) where breakByte_::Rope->(Bool,Rope,Rope) breakByte_ x@(String{contents,offset,length_})= inlinePerformIO $ withForeignPtr contents $ \con->do ptr<-memchr (con`plusPtr`offset) w (fromIntegral length_) return $ if ptr==nullPtr then (False, x, empty) else (True, x { length_=length_-(ptr`minusPtr`con)-1 }, x { offset=offset+(ptr`minusPtr`con) }) breakByte_ (Concat{l,r})= case breakByte_ l of (True,x,y)->(True,x,append y r) (False,x,_)-> case breakByte_ r of (True,x',y')->(True,append x x',y') u@(False,_,_)->u breakByte_ (File{rope})=breakByte_ rope -- | O(n). @'breaks' w r@ breaks 'Rope' @r@ between each occurence of @w@ (non-inclusive). -- This function is not tail-recursive, uses @memchr@ and constructs the list in parallel -- using @'par'@. breaks::Word8->Rope->[Rope] breaks w rope_= let (a,_)=lines_ True rope_ in a where lines_::Bool->Rope->([Rope],Maybe Rope) lines_ isLast (str@String{contents,offset,length_})= if length_<=0 then ([],Nothing) else let off=inlinePerformIO $ withForeignPtr contents $ \con->do let pcon=con`plusPtr`offset ptr<-memchr pcon w (fromIntegral length_) if ptr==nullPtr then return Nothing else return $ Just (ptr`minusPtr`pcon) in case off of Nothing-> -- traceShow ("str",isLast, str) $ if isLast then if length str>0 then ([str],Nothing) else ([],Nothing) else ([], Just str) Just diff-> let line=str { length_=diff } (a,b)=lines_ isLast (str{ offset=offset+diff+1, length_=length_-diff-1}) in -- traceShow ("line",line:a,b) $ (line:a,b) lines_ isLast (Concat{l,r})= let (ll,x)=lines_ False l (lr,y)=lines_ isLast r in (ll`par`lr)`seq` (case lr of []->case (x,y) of (Just xx, Just yy)->(ll,Just $ append xx yy) (Just _, Nothing)->(ll, x) (Nothing, _)->(ll, y) h:s->case x of { Nothing->(ll++lr,y); Just xx->(ll++((append xx h):s), y) } ) -- | O(n). Satisfies @lines r == breaks 0x0a r@. lines::Rope->[Rope] lines r=breaks 0x0a r \end{code} Input and Output \begin{code} -- | Writes the contents of the 'Rope' on the specified 'Handle'. hPut::Handle->Rope->IO() hPut h (String{contents,offset,length_})= withForeignPtr contents $ \c->hPutBuf h (c`plusPtr`offset) length_ hPut h (Concat{l,r})=do hPut h l hPut h r hPut h (File{handle,position,length_})=do hSeek handle AbsoluteSeek $ fromIntegral position allocaBytes length_ $ \c->do l<-hGetBuf handle c length_ hPutBuf h c l -- | synonym for 'hPut'. hPutStr::Handle->Rope->IO() hPutStr=hPut -- | like 'hPut', but with a newline character at the end of the output hPutStrLn::Handle->Rope->IO() hPutStrLn handle r=do hPut handle r hPutChar handle '\n' -- | Writes the contents of the 'Rope' on the standard output. putStr::Rope->IO() putStr=hPutStr stdout -- | like 'putStr' but with a newline character at the end of the output putStrLn::Rope->IO() putStrLn=hPutStrLn stdout -- | fromPtr encapsule un pointeur, un offset et une longueur dans un @Rope@.. fromPtr::ForeignPtr Word8->Int->Int->IO Rope fromPtr fp offset s | s<=leafSize = withForeignPtr fp $ \p->do contents<-mallocForeignPtrBytes leafSize withForeignPtr contents $ \c->copyArray c (p`plusPtr`offset) s i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i s return $ String { contents,i0,offset=0,length_=s } | otherwise = do let sl=s`shiftR`1 sr=sl+s.&.1 sz=ceiling $ ((fromIntegral s)/(fromIntegral leafSize) :: Double) l<-unsafeInterleaveIO $ fromPtr fp offset sl r<-unsafeInterleaveIO $ fromPtr fp (offset+sl) sr return $ Concat { l,r,length_=s, sizeC=2*sz-1 } -- | O(n) Conversion from a strict 'ByteString' fromByteString::B.ByteString->Rope fromByteString b= let (fp,off,len)=B.toForeignPtr b in unsafePerformIO $ fromPtr fp off len -- | O(n) Conversion to a strict 'ByteString' toByteString::Rope->B.ByteString toByteString (String{contents,offset,length_})= B.fromForeignPtr contents offset length_ toByteString (c@Concat{length_=len})= B.unsafeCreate len $ \ptr->fill ptr c where fill p (Concat{l,r})=do fill p l fill (p`plusPtr`(length_ l)) r fill p (String{contents,offset,length_})= withForeignPtr contents $ \cc->copyBytes p (cc`plusPtr`offset) length_ -- | Lazy file reading, using @mmap@. readFile::FilePath->IO Rope readFile file=do stat<-getFileStatus file let s=fromIntegral $! fileSize stat s`seq`return () fd<-openFd file ReadOnly Nothing $ defaultFileFlags p<-c_mmap nullPtr s c_PROT_READ c_MAP_SHARED (fromIntegral fd) 0 fp<-FC.newForeignPtr p $ do c_munmap p s closeFd fd fromPtr fp 0 (fromIntegral s) -- | Strict hGet. The whole rope is constructed. hGet::Handle->Int->IO Rope hGet handle length_=do position<-hTell handle buildRope handle position length_ buildRope::Handle->Integer->Int->IO Rope buildRope handle position len | len<=leafSize = do contents<-mallocForeignPtrBytes leafSize length_<-withForeignPtr contents $ \c->do hSeek handle AbsoluteSeek position hGetBuf handle c len i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i len return $ String {contents,length_,offset=0,i0 } | len>leafSize = let a=len`shiftR`1 b=a+(len.&.1) in do aa<-buildRope handle position a bb<-buildRope handle (position+fromIntegral a) b return $ Concat { length_=len,l=aa,r=bb,sizeC=size aa+size bb } -- | Returns the next line in the input 'Handle'. If you need to iterate 'hGetLine', -- it may be more efficient to first @mmap@ the file using 'readFile', or even load -- it with then iterate -- @'breakByte' 0x0a@ : 'hGetLine' allocates a buffer to read the file -- and may waste most of this space if the lines are shorter than the standard buffer -- size of this module. hGetLine::Handle->IO Rope hGetLine h= hGetLine' empty where hGetLine' x=do contents<-mallocForeignPtrBytes leafSize withForeignPtr contents $ \con->do l<-hGetBuf h con leafSize let readBytes=min l leafSize if readBytes<=0 then return x else do ptr<-memchr con 0x0a (fromIntegral readBytes) if ptr==nullPtr then do i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i readBytes let y=String { contents,i0,offset=0,length_=readBytes } hGetLine' (x`append`y) else do i0<-mallocForeignPtr let lineLen=ptr`minusPtr`con y=String { contents,i0,offset=0,length_=lineLen } withForeignPtr i0 $ \i->poke i lineLen hSeek h RelativeSeek (fromIntegral $ lineLen-readBytes+1) return $ x`append`y -- | Reads the contents of a file handle strictly, then closes it. hGetContents::Handle->IO Rope hGetContents h=finally (hGetContents' empty) (hClose h) where hGetContents' r=do contents<-mallocForeignPtrBytes leafSize withForeignPtr contents $ \l->do x<-hGetBuf h l leafSize i0<-mallocForeignPtr withForeignPtr i0 $ \i->poke i x let y=String { contents,i0,offset=0,length_=x } if x==0 then return r else if x