module Data.CompactMap.Index where
import Foreign hiding (rotateL,rotateR)
import Foreign.Storable
import Control.Monad
import Data.Maybe
import System.IO.Unsafe
import Data.Array.IO
import Data.Array.Unboxed
import Data.Binary
import qualified Data.ByteString as Strict
import qualified Data.ByteString.Unsafe as Strict
import qualified Data.ByteString.Internal as Strict
import qualified Data.ByteString.Lazy as Lazy
import Data.CompactMap.Buffer
import Data.CompactMap.Types
import Data.CompactMap.Fetch
import GHC.Exts (addr2Int#, Ptr(..), Int(..))
import Prelude hiding (Either(..))
type Tag = Int
peekKeyCursorData :: Ptr KeyCursor -> IO (Ptr DataCursor)
peekKeyCursorData ptr
= peek (castPtr ptr)
peekKeyCursorKey :: Ptr KeyCursor -> IO Strict.ByteString
peekKeyCursorKey ptr = do len <- peek (ptr `plusPtr` ptrSize)
Strict.unsafePackCStringLen (ptr `plusPtr` (ptrSize+intSize), len)
pokeKeyCursorData :: Ptr KeyCursor -> Ptr DataCursor -> IO ()
pokeKeyCursorData ptr dataPtr
= poke (castPtr ptr) dataPtr
newKeyCursor :: Buffer -> Lazy.ByteString -> IO (Ptr KeyCursor)
newKeyCursor buffer keyE
= withBytes buffer (intSize*2 + keyLen) $ \keyPtr ->
do poke (castPtr keyPtr) nullPtr
putByteString (keyPtr `plusPtr` intSize) keyE keyLen
return keyPtr
where keyLen = fromIntegral $ Lazy.length keyE
newBinaryKeyCursor :: (Binary a) => Buffer -> a -> IO (Ptr KeyCursor)
newBinaryKeyCursor !buffer !key
= newKeyCursor buffer (encode key)
pushNewDataCursor :: Ptr KeyCursor -> Ptr DataCursor -> IO ()
pushNewDataCursor keyCursor dataCursor
= do oldData <- peekKeyCursorData keyCursor
pokeDataCursorNext dataCursor oldData
pokeKeyCursorData keyCursor dataCursor
peekDataCursorNext :: Ptr DataCursor -> IO (Ptr DataCursor)
peekDataCursorNext ptr = peek (castPtr ptr)
peekDataCursorTag :: Ptr DataCursor -> IO Int
peekDataCursorTag ptr = peek (ptr `plusPtr` ptrSize)
peekDataCursorData :: Ptr DataCursor -> IO (Maybe Strict.ByteString)
peekDataCursorData ptr
= do isJ <- peek (ptr `plusPtr` (ptrSize+intSize))
case isJ == (1::Word8) of
False -> do return Nothing
True -> do len <- peek (ptr `plusPtr` (ptrSize+intSize+1))
bs <- Strict.unsafePackCStringLen (ptr `plusPtr` (intSize+intSize+1+intSize),len)
return (Just bs)
pokeDataCursorNext :: Ptr DataCursor -> Ptr DataCursor -> IO ()
pokeDataCursorNext ptr next
= poke (castPtr ptr) next
newDataCursor :: Buffer -> Tag -> Maybe Lazy.ByteString -> IO (Ptr DataCursor)
newDataCursor !buffer !tag !mbString
= do let !bsLen = fromIntegral $ maybe 0 Lazy.length mbString
!ext = if isJust mbString then intSize else 0
withBytes buffer (ptrSize+intSize+1+bsLen+ext) $ \ !ptr ->
do poke (castPtr ptr) (nullPtr :: Ptr DataCursor)
poke (ptr `plusPtr` ptrSize) tag
case mbString of
Nothing -> do poke (ptr `plusPtr` (ptrSize+intSize)) (0::Word8)
Just bs -> do poke (ptr `plusPtr` (ptrSize+intSize)) (1::Word8)
putByteString (ptr `plusPtr` (ptrSize+intSize+1)) bs bsLen
return ptr
intToPtr i = nullPtr `plusPtr` i
ptrToInt (Ptr addr#) = I# (addr2Int# addr#)
extractTop = extractField 0
extractSize = fmap ptrToInt . extractField 1
extractElemIdx = fmap castPtr . extractField 2
extractLeft = extractField 3
extractRight = extractField 4
putTop ptr val = if ptr == nullPtr then return () else putField 0 ptr val
putSize p s = putField 1 p (intToPtr s)
putLeft = putField 3
putRight :: Ptr IndexItem -> Ptr IndexItem -> IO ()
putRight = putField 4
data Direction = Left | Right | Stop
walkTree start move
= let loop n = do keyCursor <- extractElemIdx n
dir <- move keyCursor
case dir of
Left -> extractLeft n >>= \left ->
if left == nullPtr
then return (Left, n) else loop left
Right -> extractRight n >>= \right ->
if right == nullPtr
then return (Right, n) else loop right
Stop -> return (Stop, n)
in loop start
lookupNearest :: (Ord a, Binary a) => Ptr IndexItem
-> a -> IO (Direction, Ptr IndexItem)
lookupNearest start e
= walkTree start $ \keyCursor ->
do idxElem <- getElement (keyCursor `plusPtr` intSize)
case compare e idxElem of
LT -> return Left
GT -> return Right
EQ -> return Stop
lookupLargest :: Ptr IndexItem
-> IO (Direction, Ptr IndexItem)
lookupLargest start
= walkTree start $ \_ -> return Right
putByteString :: Ptr () -> Lazy.ByteString -> Int -> IO ()
putByteString dst lbs len
= do poke (castPtr dst) len
let loop !ptr [] = return ()
loop !ptr (chunk:cs) = do Strict.unsafeUseAsCString chunk $ \cstr ->
copyArray ptr cstr (Strict.length chunk)
loop (ptr `plusPtr` Strict.length chunk) cs
loop (dst `plusPtr` intSize) (Lazy.toChunks lbs)
intSize :: Int
intSize = sizeOf (undefined::Int)
ptrSize :: Int
ptrSize = sizeOf (undefined::Ptr ())
insert :: (Ord k, Binary k, Binary a) => Index -> k -> Tag -> Maybe a -> IO [(Tag,Maybe Strict.ByteString)]
insert idx key tag mbVal
= insertBS idx key tag (fmap encode mbVal)
insertBS :: (Ord k, Binary k) => Index -> k -> Tag -> Maybe Lazy.ByteString -> IO [(Tag,Maybe Strict.ByteString)]
insertBS idx key tag mbVal
= insertWithBS idx key tag (\_ -> return mbVal)
insertWithBS :: (Ord k, Binary k) => Index -> k -> Tag -> (Ptr DataCursor -> IO (Maybe Lazy.ByteString)) -> IO [(Tag,Maybe Strict.ByteString)]
insertWithBS (Index orig buffer) key tag genVal
= do keyCursor <- insertKey (Index orig buffer) key
oldData <- peekKeyCursorData keyCursor
dataPtr <- newDataCursor buffer tag =<< genVal oldData
pushNewDataCursor keyCursor dataPtr
if oldData == nullPtr
then return []
else fetchAllElts oldData
insertKey :: (Ord k, Binary k) => Index -> k -> IO (Ptr KeyCursor)
insertKey (Index orig buffer) key
= insertPrim (lookupNearest orig key) orig buffer (newBinaryKeyCursor buffer key)
insertLargestKey :: (Binary k) => Index -> k -> IO (Ptr KeyCursor)
insertLargestKey (Index orig buffer) key
= insertPrim (lookupLargest orig) orig buffer (newBinaryKeyCursor buffer key)
insertLargestKeyCursor :: Index -> Ptr KeyCursor -> IO ()
insertLargestKeyCursor (Index orig buffer) keyCursor
= do insertPrim (lookupLargest orig) orig buffer (return keyCursor)
return ()
lookupKey :: (Ord k, Binary k) => Index -> k -> IO (Maybe (Ptr KeyCursor))
lookupKey (Index orig buffer) key
= do (dir,pos) <- lookupNearest orig key
case dir of
Stop -> fmap Just $ extractElemIdx pos
_ -> return Nothing
lookupList :: (Ord k, Binary k) => Index -> k -> IO [(Tag,Maybe Strict.ByteString)]
lookupList idx key
= do mbKey <- lookupKey idx key
case mbKey of
Nothing -> return []
Just key -> fetchAllElts =<< peekKeyCursorData key
fetchAllElts :: Ptr DataCursor -> IO [(Tag,Maybe Strict.ByteString)]
fetchAllElts ptr | ptr == nullPtr = return []
fetchAllElts ptr
= unsafeInterleaveIO $
do next <- peekDataCursorNext ptr
tag <- peekDataCursorTag ptr
mbData <- peekDataCursorData ptr
liftM ((tag,mbData):) (fetchAllElts next)
indexItemSize :: Int
indexItemSize = sizeOf (undefined :: IndexItem)
insertPrim :: (IO (Direction,Ptr IndexItem)) -> Ptr IndexItem -> Buffer -> IO (Ptr KeyCursor) -> IO (Ptr KeyCursor)
insertPrim getPos !orig !buffer genIdx
= do size <- getSize orig
if size==0
then do eIdx <- genIdx
poke orig (IndexItem nullPtr (intToPtr 1) eIdx nullPtr nullPtr)
return eIdx
else do (dir,pos) <- getPos
case dir of
Right -> withBytes buffer indexItemSize $ \ptr ->
do eIdx <- genIdx
poke ptr (IndexItem pos (intToPtr 1) eIdx nullPtr nullPtr)
putRight pos ptr
balanceTree pos
return eIdx
Left -> withBytes buffer indexItemSize $ \ptr ->
do eIdx <- genIdx
poke ptr (IndexItem pos (intToPtr 1) eIdx nullPtr nullPtr)
putLeft pos ptr
balanceTree pos
return eIdx
Stop -> extractElemIdx pos
listKeyPointers :: Index -> IO (UArray Int (Ptr KeyCursor))
listKeyPointers (Index orig buffer)
= do size <- getSize orig
a <- newArray_ (0,size1) :: IO (IOUArray Int (Ptr KeyCursor))
let loop n ptr | ptr == nullPtr = return ()
loop n ptr = do left <- extractLeft ptr
right <- extractRight ptr
leftSize <- getSize left
key <- extractElemIdx ptr
writeArray a (leftSize+n) key
loop (n) left
loop (leftSize+1+n) right
unless (size==0) $ loop (0::Int) orig
unsafeFreeze a
getKeyFromPointer :: Ptr KeyCursor -> IO Strict.ByteString
getKeyFromPointer ptr
= peekKeyCursorKey ptr
getDataFromPointer :: Ptr KeyCursor -> IO [(Tag, Maybe Strict.ByteString)]
getDataFromPointer ptr
= do dataPtr <- peekKeyCursorData ptr
fetchAllElts dataPtr
newIndex = do buffer <- newBuffer 512
withBytes buffer indexItemSize $ \ptr -> ptr `seq`
do poke ptr (IndexItem nullPtr (intToPtr 0) nullPtr nullPtr nullPtr)
return $ Index ptr buffer
touchIndex (Index _ buffer) = touchBuffer buffer
balanceTree !pos | pos==nullPtr = return ()
balanceTree !pos
= do balance pos
!top <- extractTop pos
balanceTree top
getSize pos | pos == nullPtr = return $! 0
getSize pos
= extractSize pos
balance !pos
= do
!left <- extractLeft pos
!right <- extractRight pos
!sizeL <- getSize left
!sizeR <- getSize right
putSize pos (sizeL+sizeR+1)
case () of
() | sizeL + sizeR <= 1 -> return ()
| sizeR >= delta*sizeL -> rotateL pos left right
| sizeL >= delta*sizeR -> rotateR pos left right
| otherwise -> return ()
rotateL pos left right
= do !sizeLY <- getSize =<< extractLeft right
!sizeRY <- getSize =<< extractRight right
if sizeLY < ratio * sizeRY then singleL pos
else doubleL pos
rotateR pos left right
= do !sizeLY <- getSize =<< extractLeft left
!sizeRY <- getSize =<< extractRight left
if sizeRY < ratio * sizeLY then singleR pos
else doubleR pos
singleL pos
= do IndexItem kTop kSize kElemIdx p1 k2 <- peek pos
IndexItem k2Top k2Size k2ElemIdx p2 p3 <- peek k2
!p2Size <- getSize p2
let p1Size = ptrToInt kSizeptrToInt k2Size1
poke pos (IndexItem kTop kSize k2ElemIdx k2 p3)
poke k2 (IndexItem k2Top (intToPtr $ p2Size+p1Size+1) kElemIdx p1 p2)
putTop p3 pos
putTop p1 k2
singleR pos
= do IndexItem kTop kSize kElemIdx k2 p3 <- peek pos
IndexItem k2Top k2Size k2ElemIdx p1 p2 <- peek k2
!p2Size <- getSize p2
let p3Size = ptrToInt kSizeptrToInt k2Size1
poke pos (IndexItem kTop kSize k2ElemIdx p1 k2)
poke k2 (IndexItem k2Top (intToPtr $ p2Size+p3Size+1) kElemIdx p2 p3)
putTop p1 pos
putTop p3 k2
doubleL pos
= do IndexItem kTop kSize kElemIdx p1 k2 <- peek pos
IndexItem k2Top k2Size k2ElemIdx k3 p4 <- peek k2
IndexItem k3Top k3Size k3ElemIdx p2 p3 <- peek k3
!p2Size <- getSize p2
!p3Size <- getSize p3
let p1Size = ptrToInt kSize ptrToInt k2Size 1
p4Size = ptrToInt k2Size ptrToInt k3Size 1
poke pos (IndexItem kTop kSize k3ElemIdx k3 k2)
poke k2 (IndexItem k2Top (intToPtr $ p3Size+p4Size+1) k2ElemIdx p3 p4)
poke k3 (IndexItem k2Top (intToPtr $ p1Size+p2Size+1) kElemIdx p1 p2)
putTop p1 k3
putTop k3 pos
putTop p3 k2
doubleR pos
= do IndexItem kTop kSize kElemIdx k2 p4 <- peek pos
IndexItem k2Top k2Size k2ElemIdx p1 k3 <- peek k2
IndexItem k3Top k3Size k3ElemIdx p2 p3 <- peek k3
!p2Size <- getSize p2
!p3Size <- getSize p3
let p1Size = ptrToInt k2Size ptrToInt k3Size 1
p4Size = ptrToInt kSize ptrToInt k2Size 1
poke pos (IndexItem kTop kSize k3ElemIdx k2 k3)
poke k2 (IndexItem pos (intToPtr $ p1Size+p2Size+1) k2ElemIdx p1 p2)
poke k3 (IndexItem pos (intToPtr $ p3Size+p4Size+1) kElemIdx p3 p4)
putTop k3 pos
putTop p2 k2
putTop p4 k3
delta,ratio :: Int
delta = 5
ratio = 2