{-# LANGUAGE DeriveDataTypeable #-} -- ------------------------------------------------------------ {- | Module : Yuuko.Data.Atom Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: non-portable Unique Atoms generated from Strings and managed as flyweights Yuuko.Data.Atom can be used for caching and storage optimisation of frequently used strings. An @Atom@ is constructed from a @String@. For two equal strings the identical atom is returned. This module can be used for optimizing memory usage when working with strings or names. Many applications use data types like @Map String SomeAttribute@ where a rather fixed set of keys is used. Especially XML applications often work with a limited set of element and attribute names. For these applications it becomes more memory efficient when working with types like @Map Atom SomeAttribute@ and convert the keys into atoms before operating on such a map. Internally this module manages a map of atoms. The atoms are internally represented by @ByteString@s. When creating a new atom from a string, the string is first converted into an UTF8 @Word8@ sequence, which is packed into a @ByteString@. This @ByteString@ is looked up in the table of atoms. If it is already there, the value in the map is used as atom, else the new @ByteString@ is inserted into the map. Of course the implementation of this name cache uses @unsavePerformIO@ and @MVar@s for managing this kind of global state. The following laws hold for atoms > > s == t => newAtom s == newAtom t > s `compare` t => newAtom s `compare` newAtom t > show . newAtom == id Equality test for @Atom@s runs in /O(1)/, it is just a pointer comarison. The @Ord@ comparisons have the same runtime like the @ByteString@ comparisons. Internally there is an UTF8 comparison, but UTF8 encoding preserves the total order. Warning: The internal cache never shrinks during execution. So using it in a undisciplined way can lead to memory leaks. -} ----------------------------------------------------------------------------- module Yuuko.Data.Atom ( -- * Atom objects Atom, -- instance (Eq, Ord, Read, Show) newAtom, -- :: String -> Atom share -- :: String -> String ) where import Control.Concurrent.MVar import Control.DeepSeq import Data.ByteString.Internal ( toForeignPtr, c2w, w2c ) import Data.ByteString ( ByteString, pack, unpack ) import qualified Data.Map as M import Data.Typeable import System.IO.Unsafe ( unsafePerformIO ) import Yuuko.Text.XML.HXT.DOM.Unicode ( unicodeToUtf8 ) import Yuuko.Text.XML.HXT.DOM.UTF8Decoding ( decodeUtf8 ) -- ------------------------------------------------------------ type Atoms = M.Map ByteString ByteString newtype Atom = A { bs :: ByteString } deriving (Typeable) -- ------------------------------------------------------------ -- | the internal cache for the strings theAtoms :: MVar Atoms theAtoms = unsafePerformIO (newMVar M.empty) {-# NOINLINE theAtoms #-} -- | insert a bytestring into the atom cache insertAtom :: ByteString -> Atoms -> (Atoms, Atom) insertAtom s m = maybe (M.insert s s m, A s) (\ s' -> (m, A s')) . M.lookup s $ m -- | creation of an @Atom@ from a @String@ newAtom :: String -> Atom newAtom = unsafePerformIO . newAtom' {-# NOINLINE newAtom #-} -- | The internal operation running in the IO monad newAtom' :: String -> IO Atom newAtom' s = do m <- takeMVar theAtoms let (m', a) = insertAtom (pack. map c2w . unicodeToUtf8 $ s) m putMVar theAtoms m' return a -- | Insert a @String@ into the atom cache and convert the atom back into a @String@. -- -- locically @share == id@ holds, but internally equal strings share the same memory. share :: String -> String share = show . newAtom instance Eq Atom where a1 == a2 = fp1 == fp2 where (fp1, _, _) = toForeignPtr . bs $ a1 (fp2, _, _) = toForeignPtr . bs $ a2 instance Ord Atom where compare a1 a2 | a1 == a2 = EQ | otherwise = compare (bs a1) (bs a2) instance Read Atom where readsPrec p str = [ (newAtom x, y) | (x, y) <- readsPrec p str ] instance Show Atom where show = fst . decodeUtf8 . map w2c . unpack . bs -- show = show . toForeignPtr . bs -- for debug only instance NFData Atom where -----------------------------------------------------------------------------