-------------------------------------------------------------------------------- -- | -- Module : Algorithms.StringSearch.KMP -- Copyright : (C) Frank Staals -- License : see the LICENSE file -- Maintainer : Frank Staals -- -- Implementation of Knuth-Morris-Pratt String-searching -- algorithm. The exposition is based on that of Goodrich and -- Tamassia in "Data Structures and Algorithms in Java 2nd Edition". -- -------------------------------------------------------------------------------- module Algorithms.StringSearch.KMP( isSubStringOf , kmpMatch , buildFailureFunction ) where import Control.Monad.ST import qualified Data.Vector as V import Data.Vector.Generic ((!)) import qualified Data.Vector.Unboxed as UV import qualified Data.Vector.Unboxed.Mutable as UMV import qualified VectorBuilder.Builder as Builder import qualified VectorBuilder.Vector as Builder -------------------------------------------------------------------------------- -- | Constructs the failure function. -- -- running time: \(O(m)\). buildFailureFunction :: forall a. Eq a => V.Vector a -> UV.Vector Int buildFailureFunction p = UV.create $ do f <- UMV.new m go f 1 0 where m = V.length p go :: UMV.MVector s Int -> Int -> Int -> ST s (UMV.MVector s Int) go f i j | i == m = pure f | p ! j == p ! i = UMV.write f i (j+1) >> go f (i+1) (j+1) | j > 0 = UMV.read f (j-1) >>= go f i | otherwise = UMV.write f i 0 >> go f (i+1) 0 -- | Test if the first argument, the pattern p, occurs as a consecutive subsequence in t. -- -- running time: \(O(n+m)\), where p has length \(m\) and t has length \(n\). isSubStringOf :: (Eq a, Foldable p, Foldable t) => p a -> t a -> Maybe Int p `isSubStringOf` t = kmpMatch (Builder.build . Builder.foldable $ p) (Builder.build . Builder.foldable $ t) -- | Test if the first argument, the pattern p, occurs as a consecutive subsequence in t. -- -- running time: \(O(n+m)\), where p has length \(m\) and t has length \(n\). kmpMatch :: Eq a => V.Vector a -> V.Vector a -> Maybe Int kmpMatch p t | m == 0 = Just 0 | otherwise = kmp 0 0 where m = V.length p n = V.length t f = buildFailureFunction p kmp i j | i == n = Nothing | p ! j == t ! i = if j == m - 1 then Just (i - m + 1) else kmp (i+1) (j+1) | j > 0 = kmp i (f ! (j - 1)) | otherwise = kmp (i+1) 0 -- j == 0