----------------------------------------------------------------------------- -- | -- Module : Data.PackedString -- Copyright : (c) The University of Glasgow 2001 -- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : experimental -- Portability : portable -- -- This API is deprecated. You might be able to use "Data.ByteString" -- or "Data.ByteString.Char8", provided you don't need full Unicode support. -- The long term aim is to provide a Unicode layer on "Data.ByteString", -- and then to provide a replacement for this "Data.PackedString" API based on -- that. -- ----------------------------------------------------------------------------- -- Original GHC implementation by Bryan O\'Sullivan, -- rewritten to use UArray by Simon Marlow. module Data.PackedString {-# DEPRECATED "use Data.ByteString, Data.ByteString.Char8, or plain String." #-} ( -- * The @PackedString@ type PackedString, -- abstract, instances: Eq, Ord, Show, Typeable -- * Converting to and from @PackedString@s packString, -- :: String -> PackedString unpackPS, -- :: PackedString -> String #ifndef __NHC__ -- * I\/O with @PackedString@s hPutPS, -- :: Handle -> PackedString -> IO () hGetPS, -- :: Handle -> Int -> IO PackedString #endif -- * List-like manipulation functions nilPS, -- :: PackedString consPS, -- :: Char -> PackedString -> PackedString headPS, -- :: PackedString -> Char tailPS, -- :: PackedString -> PackedString nullPS, -- :: PackedString -> Bool appendPS, -- :: PackedString -> PackedString -> PackedString lengthPS, -- :: PackedString -> Int indexPS, -- :: PackedString -> Int -> Char mapPS, -- :: (Char -> Char) -> PackedString -> PackedString filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString reversePS, -- :: PackedString -> PackedString concatPS, -- :: [PackedString] -> PackedString elemPS, -- :: Char -> PackedString -> Bool substrPS, -- :: PackedString -> Int -> Int -> PackedString takePS, -- :: Int -> PackedString -> PackedString dropPS, -- :: Int -> PackedString -> PackedString splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString) foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) linesPS, -- :: PackedString -> [PackedString] unlinesPS, -- :: [PackedString] -> PackedString wordsPS, -- :: PackedString -> [PackedString] unwordsPS, -- :: [PackedString] -> PackedString splitPS, -- :: Char -> PackedString -> [PackedString] splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString] joinPS, -- :: PackedString -> [PackedString] -> PackedString ) where import Prelude #ifndef __NHC__ import Data.Array.Unboxed import Data.Array.IO import Data.Typeable import Data.Char #ifdef __GLASGOW_HASKELL__ import Data.Generics #endif import System.IO -- ----------------------------------------------------------------------------- -- PackedString type declaration -- | A space-efficient representation of a 'String', which supports various -- efficient operations. A 'PackedString' contains full Unicode 'Char's. newtype PackedString = PS (UArray Int Char) -- ToDo: we could support "slices", i.e. include offset and length fields into -- the string, so that operations like take/drop could be O(1). Perhaps making -- a slice should be conditional on the ratio of the slice/string size to -- limit memory leaks. instance Eq PackedString where (PS x) == (PS y) = x == y instance Ord PackedString where compare (PS x) (PS y) = compare x y --instance Read PackedString: ToDo instance Show PackedString where showsPrec p ps r = showsPrec p (unpackPS ps) r #include "Typeable.h" INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString") -- ----------------------------------------------------------------------------- -- Constructor functions -- | The 'nilPS' value is the empty string. nilPS :: PackedString nilPS = PS (array (0,-1) []) -- | The 'consPS' function prepends the given character to the -- given string. consPS :: Char -> PackedString -> PackedString consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better -- | Convert a 'String' into a 'PackedString' packString :: String -> PackedString packString str = packNChars (length str) str -- | The 'packNChars' function creates a 'PackedString' out of the -- first @len@ elements of the given 'String'. packNChars :: Int -> [Char] -> PackedString packNChars len str = PS (listArray (0,len-1) str) -- ----------------------------------------------------------------------------- -- Destructor functions (taking PackedStrings apart) -- | Convert a 'PackedString' into a 'String' unpackPS :: PackedString -> String unpackPS (PS ps) = elems ps -- ----------------------------------------------------------------------------- -- List-mimicking functions for PackedStrings -- | The 'lengthPS' function returns the length of the input list. Analogous to 'length'. lengthPS :: PackedString -> Int lengthPS (PS ps) = rangeSize (bounds ps) -- | The 'indexPS' function returns the character in the string at the given position. indexPS :: PackedString -> Int -> Char indexPS (PS ps) i = ps ! i -- | The 'headPS' function returns the first element of a 'PackedString' or throws an -- error if the string is empty. headPS :: PackedString -> Char headPS ps | nullPS ps = error "Data.PackedString.headPS: head []" | otherwise = indexPS ps 0 -- | The 'tailPS' function returns the tail of a 'PackedString' or throws an error -- if the string is empty. tailPS :: PackedString -> PackedString tailPS ps | len <= 0 = error "Data.PackedString.tailPS: tail []" | len == 1 = nilPS | otherwise = substrPS ps 1 (len - 1) where len = lengthPS ps -- | The 'nullPS' function returns True iff the argument is null. nullPS :: PackedString -> Bool nullPS (PS ps) = rangeSize (bounds ps) == 0 -- | The 'appendPS' function appends the second string onto the first. appendPS :: PackedString -> PackedString -> PackedString appendPS xs ys | nullPS xs = ys | nullPS ys = xs | otherwise = concatPS [xs,ys] -- | The 'mapPS' function applies a function to each character in the string. mapPS :: (Char -> Char) -> PackedString -> PackedString mapPS f (PS ps) = PS (amap f ps) -- | The 'filterPS' function filters out the appropriate substring. filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-} filterPS pred ps = packString (filter pred (unpackPS ps)) -- | The 'foldlPS' function behaves like 'foldl' on 'PackedString's. foldlPS :: (a -> Char -> a) -> a -> PackedString -> a foldlPS f b ps = foldl f b (unpackPS ps) -- | The 'foldrPS' function behaves like 'foldr' on 'PackedString's. foldrPS :: (Char -> a -> a) -> a -> PackedString -> a foldrPS f v ps = foldr f v (unpackPS ps) -- | The 'takePS' function takes the first @n@ characters of a 'PackedString'. takePS :: Int -> PackedString -> PackedString takePS n ps = substrPS ps 0 (n-1) -- | The 'dropPS' function drops the first @n@ characters of a 'PackedString'. dropPS :: Int -> PackedString -> PackedString dropPS n ps = substrPS ps n (lengthPS ps - 1) -- | The 'splitWithPS' function splits a 'PackedString' at a given index. splitAtPS :: Int -> PackedString -> (PackedString, PackedString) splitAtPS n ps = (takePS n ps, dropPS n ps) -- | The 'takeWhilePS' function is analogous to the 'takeWhile' function. takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps)) -- | The 'dropWhilePS' function is analogous to the 'dropWhile' function. dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps)) -- | The 'elemPS' function returns True iff the given element is in the string. elemPS :: Char -> PackedString -> Bool elemPS c ps = c `elem` unpackPS ps -- | The 'spanPS' function returns a pair containing the result of -- running both 'takeWhilePS' and 'dropWhilePS'. spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps) -- | The 'breakPS' function breaks a string at the first position which -- satisfies the predicate. breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) breakPS p ps = spanPS (not . p) ps -- | The 'linesPS' function splits the input on line-breaks. linesPS :: PackedString -> [PackedString] linesPS ps = splitPS '\n' ps -- | The 'unlinesPS' function concatenates the input list after -- interspersing newlines. unlinesPS :: [PackedString] -> PackedString unlinesPS = joinPS (packString "\n") -- | The 'wordsPS' function is analogous to the 'words' function. wordsPS :: PackedString -> [PackedString] wordsPS ps = filter (not.nullPS) (splitWithPS isSpace ps) -- | The 'unwordsPS' function is analogous to the 'unwords' function. unwordsPS :: [PackedString] -> PackedString unwordsPS = joinPS (packString " ") -- | The 'reversePS' function reverses the string. reversePS :: PackedString -> PackedString reversePS ps = packString (reverse (unpackPS ps)) -- | The 'concatPS' function concatenates a list of 'PackedString's. concatPS :: [PackedString] -> PackedString concatPS pss = packString (concat (map unpackPS pss)) ------------------------------------------------------------ -- | The 'joinPS' function takes a 'PackedString' and a list of 'PackedString's -- and concatenates the list after interspersing the first argument between -- each element of the list. joinPS :: PackedString -> [PackedString] -> PackedString joinPS filler pss = concatPS (splice pss) where splice [] = [] splice [x] = [x] splice (x:y:xs) = x:filler:splice (y:xs) -- ToDo: the obvious generalisation {- Some properties that hold: * splitPS x ls = ls' where False = any (map (x `elemPS`) ls') * joinPS (packString [x]) (splitPS x ls) = ls -} -- | The 'splitPS' function splits the input string on each occurrence of the given 'Char'. splitPS :: Char -> PackedString -> [PackedString] splitPS c = splitWithPS (== c) -- | The 'splitWithPS' function takes a character predicate and splits the input string -- at each character which satisfies the predicate. splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] splitWithPS pred (PS ps) = splitify 0 where len = lengthPS (PS ps) splitify n | n >= len = [] | otherwise = let break_pt = first_pos_that_satisfies pred ps len n in if break_pt == n then -- immediate match, empty substring nilPS : splitify (break_pt + 1) else substrPS (PS ps) n (break_pt - 1) -- leave out the matching character : splitify (break_pt + 1) first_pos_that_satisfies pred ps len n = case [ m | m <- [n..len-1], pred (ps ! m) ] of [] -> len (m:_) -> m -- ----------------------------------------------------------------------------- -- Local utility functions -- The definition of @_substrPS@ is essentially: -- @take (end - begin + 1) (drop begin str)@. -- | The 'substrPS' function takes a 'PackedString' and two indices -- and returns the substring of the input string between (and including) -- these indices. substrPS :: PackedString -> Int -> Int -> PackedString substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ] -- ----------------------------------------------------------------------------- -- hPutPS -- | Outputs a 'PackedString' to the specified 'Handle'. -- -- NOTE: the representation of the 'PackedString' in the file is assumed to -- be in the ISO-8859-1 encoding. In other words, only the least significant -- byte is taken from each character in the 'PackedString'. hPutPS :: Handle -> PackedString -> IO () hPutPS h (PS ps) = do let l = lengthPS (PS ps) arr <- newArray_ (0, l-1) sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ] hPutArray h arr l -- ----------------------------------------------------------------------------- -- hGetPS -- | Read a 'PackedString' directly from the specified 'Handle'. -- This is far more efficient than reading the characters into a 'String' -- and then using 'packString'. -- -- NOTE: as with 'hPutPS', the string representation in the file is -- assumed to be ISO-8859-1. hGetPS :: Handle -> Int -> IO PackedString hGetPS h i = do arr <- newArray_ (0, i-1) l <- hGetArray h arr i chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1] return (packNChars l chars) #else /* __NHC__ */ --import Prelude hiding (append, break, concat, cons, drop, dropWhile, -- filter, foldl, foldr, head, length, lines, map, -- nil, null, reverse, span, splitAt, subst, tail, -- take, takeWhile, unlines, unwords, words) -- also hiding: Ix(..), Functor(..) import qualified NHC.PackedString import NHC.PackedString (PackedString,packString,unpackPS) import List (intersperse) nilPS :: PackedString consPS :: Char -> PackedString -> PackedString headPS :: PackedString -> Char tailPS :: PackedString -> PackedString nullPS :: PackedString -> Bool appendPS :: PackedString -> PackedString -> PackedString lengthPS :: PackedString -> Int indexPS :: PackedString -> Int -> Char mapPS :: (Char -> Char) -> PackedString -> PackedString filterPS :: (Char -> Bool) -> PackedString -> PackedString reversePS :: PackedString -> PackedString concatPS :: [PackedString] -> PackedString elemPS :: Char -> PackedString -> Bool substrPS :: PackedString -> Int -> Int -> PackedString takePS :: Int -> PackedString -> PackedString dropPS :: Int -> PackedString -> PackedString splitAtPS :: Int -> PackedString -> (PackedString, PackedString) foldlPS :: (a -> Char -> a) -> a -> PackedString -> a foldrPS :: (Char -> a -> a) -> a -> PackedString -> a takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString) linesPS :: PackedString -> [PackedString] unlinesPS :: [PackedString] -> PackedString wordsPS :: PackedString -> [PackedString] unwordsPS :: [PackedString] -> PackedString splitPS :: Char -> PackedString -> [PackedString] splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString] joinPS :: PackedString -> [PackedString] -> PackedString nilPS = NHC.PackedString.nil consPS = NHC.PackedString.cons headPS = NHC.PackedString.head tailPS = NHC.PackedString.tail nullPS = NHC.PackedString.null appendPS = NHC.PackedString.append lengthPS = NHC.PackedString.length indexPS p i = (unpackPS p) !! i mapPS = NHC.PackedString.map filterPS = NHC.PackedString.filter reversePS = NHC.PackedString.reverse concatPS = NHC.PackedString.concat elemPS c p = c `elem` unpackPS p substrPS = NHC.PackedString.substr takePS = NHC.PackedString.take dropPS = NHC.PackedString.drop splitAtPS = NHC.PackedString.splitAt foldlPS = NHC.PackedString.foldl foldrPS = NHC.PackedString.foldr takeWhilePS = NHC.PackedString.takeWhile dropWhilePS = NHC.PackedString.dropWhile spanPS = NHC.PackedString.span breakPS = NHC.PackedString.break linesPS = NHC.PackedString.lines unlinesPS = NHC.PackedString.unlines wordsPS = NHC.PackedString.words unwordsPS = NHC.PackedString.unwords splitPS c = splitWithPS (==c) splitWithPS p = map packString . split' p [] . unpackPS where split' :: (Char->Bool) -> String -> String -> [String] split' pred [] [] = [] split' pred acc [] = [reverse acc] split' pred acc (x:xs) | pred x = reverse acc: split' pred [] xs | otherwise = split' pred (x:acc) xs joinPS sep = concatPS . intersperse sep #endif #ifdef __GLASGOW_HASKELL__ instance Data PackedString where gunfold k z c = error "gunfold" toConstr (PS _) = con_PS dataTypeOf _ = ty_PackedString con_PS = mkConstr ty_PackedString "PS" [] Prefix ty_PackedString = mkDataType "Data.PackedString.PackedString" [con_PS] #endif