module Data.AhoCorasick
( ACMachine
, State(..)
, Match(..)
, construct
, constructWithValues
, root
, run
, step
, renderGraph
) where
import Control.Applicative
import Control.Monad
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as Map
import Data.List
import Data.Maybe
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV
import GHC.Generics (Generic)
data ACMachine a v = ACMachine (Goto a) Failure (Output v)
type Goto a = Vector (HashMap a State)
type Failure = Vector State
type Output v = Vector [(Int, v)]
type GotoMap a = HashMap State (HashMap a State)
newtype State = State Int
deriving (Eq, Generic)
instance Hashable State
data Match v = Match
{ matchPos :: Int
, matchValue :: v
} deriving (Show)
construct :: (Eq a, Hashable a) => [[a]] -> ACMachine a [a]
construct ps = constructWithValues $ zip ps ps
constructWithValues :: (Eq a, Hashable a) => [([a], v)] -> ACMachine a v
constructWithValues pvs = ACMachine g f o
where
(m, gotoMap) = buildGoto ps
n = m + 1
g = toGotoArray n gotoMap
f = buildFailure g
o = buildOutput pvs g f
ps = map fst pvs
toGotoArray :: Int -> GotoMap a -> Goto a
toGotoArray n m = V.generate n (fromMaybe Map.empty . flip Map.lookup m . State)
root :: State
root = State 0
run :: (Eq a, Hashable a) => ACMachine a v -> [a] -> [Match v]
run acm = go root . zip [1..]
where
go _ [] = []
go s ((i, x):ixs) = map toMatch vs ++ go s' ixs
where
toMatch (l, v) = Match { matchPos = i l + 1, matchValue = v }
(s', vs) = step acm x s
step :: (Eq a, Hashable a) => ACMachine a v -> a -> State -> (State, [(Int, v)])
step (ACMachine g f o) x s = (s', output s')
where
s' = head $ mapMaybe (flip goto x) $ iterate failure s
goto (State 0) x' = Map.lookup x' (g V.! 0) <|> Just root
goto (State i) x' = Map.lookup x' (g V.! i)
failure (State i) = f V.! i
output (State i) = o V.! i
buildOutput :: (Eq a, Hashable a) => [([a], v)] -> Goto a -> Failure -> Output v
buildOutput pvs g f = V.create $ do
o <- MV.replicate (V.length g) []
forM_ (map (fromJust . toKV) pvs) $ \(State i, vs) ->
MV.write o i vs
forM_ (tail $ toBFList g) $ \(State i) -> do
let ts = Map.toList (g V.! i)
forM_ ts $ \(_, s'@(State j)) -> do
let (State k) = failure s'
vs <- MV.read o j
vsf <- MV.read o k
let vs' = vsf `seq` vs ++ vsf
MV.write o j vs'
return o
where
failure (State i) = f V.! i
toKV (p, v) = do
s <- finalState g root p
return (s, [(length p, v)])
finalState :: (Eq a, Hashable a) => Goto a -> State -> [a] -> Maybe State
finalState g = foldM (\(State i) x -> Map.lookup x (g V.! i))
buildGoto :: (Eq a, Hashable a) => [[a]] -> (Int, GotoMap a)
buildGoto = foldl' (flip extend) (0, Map.empty)
extend :: (Eq a, Hashable a) => [a] -> (Int, GotoMap a) -> (Int, GotoMap a)
extend = go root
where
go _ [] nm = nm
go s (x:xs) nm@(n, m) = case Map.lookup x sm of
Nothing -> go s' xs (n', m')
where
s' = State n'
n' = n + 1
sm' = Map.insert x s' sm
m' = Map.insert s sm' m
Just s' -> go s' xs nm
where
sm = fromMaybe Map.empty $ Map.lookup s m
buildFailure :: (Eq a, Hashable a) => Goto a -> Failure
buildFailure g = V.create $ do
f <- MV.new (V.length g)
MV.write f 0 (error "Referencing the failure transition from the root node.")
forM_ (toBFList g) $ \s@(State i) -> do
let ts = Map.toList (g V.! i)
forM_ ts $ \(x, State j) -> do
s' <- failureState f s x
MV.write f j s'
return f
where
failureState _ (State 0) _ = return root
failureState f s x = go =<< failure s
where
go s' = case goto s' x of
Nothing -> go =<< failure s'
Just s'' -> return s''
failure (State i) = MV.read f i
goto (State 0) x = Map.lookup x (g V.! 0) <|> Just root
goto (State i) x = Map.lookup x (g V.! i)
toBFList :: Goto a -> [State]
toBFList g = ss0
where
ss0 = root : go 1 ss0
go 0 _ = []
go n (State i : ss) = children ++ go (n 1 + Map.size sm) ss
where
sm = g V.! i
children = Map.elems sm
go _ _ = error "toBFList: invalid state"
renderGraph :: ACMachine Char [Char] -> String
renderGraph (ACMachine g f o) =
graph "digraph" $ statements [
attr "graph" [("rankdir", "LR")]
, statements $ map state (toBFList g)
, statements $ map stateWithOutput $ filter (not . null . snd) $ zip (map State [0..]) (V.toList o)
, statements $ map (\s@(State i) -> statements $ map (uncurry $ transEdge s) $ Map.toList $ g V.! i) (toBFList g)
, statements $ map (\s@(State i) -> failEdge s $ f V.! i) (tail $ toBFList g)
]
where
statements = intercalate " "
graph typ body = typ ++ " { " ++ body ++ " }"
attr typ attrList = typ ++ " " ++ "[" ++ intercalate "," (map kvStr attrList) ++ "];"
node nid attrList = nid ++ " " ++ "[" ++ intercalate "," (map kvStr attrList) ++ "];"
kvStr (k, v) = k ++ "=" ++ v
state s@(State 0) = node (stateID s) [("shape", "doublecircle")]
state s = node (stateID s) [("shape", "circle")]
stateWithOutput (s, xs) = node (stateID s) [("label", "<" ++ tableHTML (stateID s) ("{" ++ intercalate "," (map snd xs) ++ "}") ++ ">"), ("shape", "none")]
tableHTML row1 row2 = "<table cellborder=\"0\"><tr><td>" ++ row1 ++ "</td></tr><tr><td>" ++ row2 ++ "</td></tr></table>"
stateID (State 0) = "Root"
stateID (State n) = 'S' : show n
transEdge s x s' = stateID s ++ " -> " ++ stateID s' ++ " [label=\"" ++ [x] ++ "\"];"
failEdge s s' = stateID s ++ " -> " ++ stateID s' ++ " [style=dashed, constraint=false];"