{-# OPTIONS_GHC -Wall -fwarn-tabs #-} ------------------------------------------------------------ -- ~ 2019.02.25 -- | -- Module : Data.Trie.ByteStringInternal -- Copyright : Copyright (c) 2008--2019 wren gayle romano -- License : BSD3 -- Maintainer : wren@community.haskell.org -- Stability : experimental -- Portability : GHC-only -- -- Helper functions on 'ByteString's for "Data.Trie.Internal". ------------------------------------------------------------ module Data.Trie.ByteStringInternal ( ByteString, ByteStringElem , breakMaximalPrefix ) where import qualified Data.ByteString as S import Data.ByteString.Internal (ByteString(PS)) import Data.Word import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) import Foreign.Ptr (Ptr, plusPtr) import Foreign.Storable (Storable(..)) -- This module name is since @__GLASGOW_HASKELL__ >= 611@. import GHC.IO (unsafeDupablePerformIO) ------------------------------------------------------------ -- | Associated type of 'ByteString' type ByteStringElem = Word8 ------------------------------------------------------------ -- | Returns the longest shared prefix and the two remaining suffixes -- for a pair of strings. -- -- > s == (\(pre,s',z') -> pre `append` s') (breakMaximalPrefix s z) -- > z == (\(pre,s',z') -> pre `append` z') (breakMaximalPrefix s z) breakMaximalPrefix :: ByteString -> ByteString -> (ByteString, ByteString, ByteString) breakMaximalPrefix str1@(PS s1 off1 len1) str2@(PS s2 off2 len2) | len1 == 0 = (S.empty, S.empty, str2) | len2 == 0 = (S.empty, str1, S.empty) | otherwise = unsafeDupablePerformIO $ withForeignPtr s1 $ \p1 -> withForeignPtr s2 $ \p2 -> do i <- indexOfDifference (p1 `ptrElemOff` off1) (p2 `ptrElemOff` off2) (min len1 len2) let pre = if off1 + len1 < off2 + len2 -- share the smaller one then newPS s1 off1 i else newPS s2 off2 i let s1' = newPS s1 (off1 + i) (len1 - i) let s2' = newPS s2 (off2 + i) (len2 - i) return $! (,,) !$ pre !$ s1' !$ s2' -- | Get the 'sizeOf' the type, without requiring @-XScopedTypeVariables@ -- nor making a spurious call to 'unsafePerformIO' or similar. sizeOfPtr :: Storable a => Ptr a -> Int sizeOfPtr = sizeOf . (undefined :: Ptr a -> a) {-# INLINE sizeOfPtr #-} -- | C-style pointer addition, without the excessively liberal type -- of 'plusPtr'. ptrElemOff :: Storable a => Ptr a -> Int -> Ptr a ptrElemOff p i = p `plusPtr` (i * sizeOfPtr p) {-# INLINE ptrElemOff #-} -- | Smart-constructor to share 'S.empty' as appropriate. newPS :: ForeignPtr ByteStringElem -> Int -> Int -> ByteString newPS s o l = if l <= 0 then S.empty else PS s o l {-# INLINE newPS #-} -- | fix associativity bug (!$) :: (a -> b) -> a -> b (!$) = ($!) {-# INLINE (!$) #-} ------------------------------------------------------------ -- This naive algorithm doesn't depend on architecture details. We -- could speed things up (in theory) by checking a natural word at -- a time and then falling back to checking each byte once the -- mismatched word is found. But in practice that doesn't seem to -- actually speed things up. -- -- | Calculates the first index where values differ. indexOfDifference :: Ptr ByteStringElem -> Ptr ByteStringElem -> Int -> IO Int indexOfDifference p1 p2 limit = goByte 0 where goByte n = if n >= limit then return limit else do c1 <- peekElemOff p1 n c2 <- peekElemOff p2 n if c1 == c2 then goByte $! n+1 else return n ------------------------------------------------------------ ------------------------------------------------------- fin.