module Data.ByteString.Search.Internal.Utils ( kmpBorders
, automaton
, ldrop
, ltake
, lsplit
, release
, keep
, strictify
) where
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Unsafe (unsafeIndex)
import Data.Array.Base (unsafeRead, unsafeWrite, unsafeAt)
import Data.Array.ST
import Data.Array.Unboxed
import Control.Monad (when)
import Data.Bits
import Data.Word (Word8)
automaton :: S.ByteString -> UArray Int Int
automaton !pat = runSTUArray (do
let !patLen = S.length pat
patAt !i = fromIntegral (unsafeIndex pat i)
!bord = kmpBorders pat
aut <- newArray (0, (patLen + 1)*256 1) 0
unsafeWrite aut (patAt 0) 1
let loop !state = do
let !base = state `shiftL` 8
inner j
| j < 0 = if state == patLen
then return aut
else loop (state+1)
| otherwise = do
let !i = base + patAt j
s <- unsafeRead aut i
when (s == 0) (unsafeWrite aut i (j+1))
inner (unsafeAt bord j)
if state == patLen
then inner (unsafeAt bord state)
else inner state
loop 1)
kmpBorders :: S.ByteString -> UArray Int Int
kmpBorders pat = runSTUArray (do
let !patLen = S.length pat
patAt :: Int -> Word8
patAt i = unsafeIndex pat i
ar <- newArray_ (0, patLen)
unsafeWrite ar 0 (1)
let dec w j
| j < 0 || w == patAt j = return $! j+1
| otherwise = unsafeRead ar j >>= dec w
bordLoop !i !j
| patLen < i = return ar
| otherwise = do
let !w = patAt (i1)
j' <- dec w j
if i < patLen && patAt j' == patAt i
then unsafeRead ar j' >>= unsafeWrite ar i
else unsafeWrite ar i j'
bordLoop (i+1) j'
bordLoop 1 (1))
strictify :: L.ByteString -> S.ByteString
strictify = S.concat . L.toChunks
ldrop :: Int -> [S.ByteString] -> [S.ByteString]
ldrop _ [] = []
ldrop k (!h : t)
| k < l = S.drop k h : t
| otherwise = ldrop (k l) t
where
!l = S.length h
ltake :: Int -> [S.ByteString] -> [S.ByteString]
ltake _ [] = []
ltake !k (!h : t)
| l < k = h : ltake (k l) t
| otherwise = [S.take k h]
where
!l = S.length h
lsplit :: Int -> [S.ByteString] -> ([S.ByteString], [S.ByteString])
lsplit _ [] = ([],[])
lsplit !k (!h : t)
= case compare k l of
LT -> ([S.take k h], S.drop k h : t)
EQ -> ([h], t)
GT -> let (u, v) = lsplit (k l) t in (h : u, v)
where
!l = S.length h
release :: Int -> [S.ByteString] -> [S.ByteString]
release !deep _
| deep <= 0 = []
release !deep (!x:xs) = let !rest = release (deepS.length x) xs in x : rest
release _ [] = error "stringsearch.release could not find enough past!"
keep :: Int -> [S.ByteString] -> ([S.ByteString],[S.ByteString])
keep !deep xs
| deep < 1 = ([],xs)
keep deep (!x:xs) = let (!p,d) = keep (deep S.length x) xs in (x:p,d)
keep _ [] = error "Forgot too much"