{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Text.AhoCorasick.Automaton
( AcMachine (..)
, CaseSensitivity (..)
, CodeUnitIndex (..)
, Match (..)
, Next (..)
, build
, debugBuildDot
, runLower
, runText
) where
import Prelude hiding (length)
import Control.DeepSeq (NFData)
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import Data.Foldable (foldl')
import Data.IntMap.Strict (IntMap)
import Data.Text.Internal (Text (..))
import Data.Word (Word64)
import GHC.Generics (Generic)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.List as List
import qualified Data.Vector as Vector
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf16 (CodeUnit, CodeUnitIndex (..), indexTextArray, lowerCodeUnit)
import Data.TypedByteArray (Prim, TypedByteArray)
import qualified Data.TypedByteArray as TBA
type State = Int
type Transition = Word64
data Match v = Match
{ Match v -> CodeUnitIndex
matchPos :: {-# UNPACK #-} !CodeUnitIndex
, Match v -> v
matchValue :: v
} deriving (Int -> Match v -> ShowS
[Match v] -> ShowS
Match v -> String
(Int -> Match v -> ShowS)
-> (Match v -> String) -> ([Match v] -> ShowS) -> Show (Match v)
forall v. Show v => Int -> Match v -> ShowS
forall v. Show v => [Match v] -> ShowS
forall v. Show v => Match v -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Match v] -> ShowS
$cshowList :: forall v. Show v => [Match v] -> ShowS
show :: Match v -> String
$cshow :: forall v. Show v => Match v -> String
showsPrec :: Int -> Match v -> ShowS
$cshowsPrec :: forall v. Show v => Int -> Match v -> ShowS
Show, Match v -> Match v -> Bool
(Match v -> Match v -> Bool)
-> (Match v -> Match v -> Bool) -> Eq (Match v)
forall v. Eq v => Match v -> Match v -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Match v -> Match v -> Bool
$c/= :: forall v. Eq v => Match v -> Match v -> Bool
== :: Match v -> Match v -> Bool
$c== :: forall v. Eq v => Match v -> Match v -> Bool
Eq)
data AcMachine v = AcMachine
{ AcMachine v -> Vector [v]
machineValues :: !(Vector.Vector [v])
, AcMachine v -> TypedByteArray Transition
machineTransitions :: !(TypedByteArray Transition)
, AcMachine v -> TypedByteArray Int
machineOffsets :: !(TypedByteArray Int)
, AcMachine v -> TypedByteArray Transition
machineRootAsciiTransitions :: !(TypedByteArray Transition)
} deriving ((forall x. AcMachine v -> Rep (AcMachine v) x)
-> (forall x. Rep (AcMachine v) x -> AcMachine v)
-> Generic (AcMachine v)
forall x. Rep (AcMachine v) x -> AcMachine v
forall x. AcMachine v -> Rep (AcMachine v) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall v x. Rep (AcMachine v) x -> AcMachine v
forall v x. AcMachine v -> Rep (AcMachine v) x
$cto :: forall v x. Rep (AcMachine v) x -> AcMachine v
$cfrom :: forall v x. AcMachine v -> Rep (AcMachine v) x
Generic)
instance NFData v => NFData (AcMachine v)
wildcard :: Integral a => a
wildcard :: a
wildcard = a
0x10000
transitionCodeUnit :: Transition -> CodeUnit
transitionCodeUnit :: Transition -> CodeUnit
transitionCodeUnit Transition
t = Transition -> CodeUnit
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.&. Transition
0xffff)
transitionState :: Transition -> State
transitionState :: Transition -> Int
transitionState Transition
t = Transition -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t Transition -> Int -> Transition
forall a. Bits a => a -> Int -> a
`shiftR` Int
32)
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard Transition
t = (Transition
t Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.&. Transition
forall a. Integral a => a
wildcard) Transition -> Transition -> Bool
forall a. Eq a => a -> a -> Bool
== Transition
forall a. Integral a => a
wildcard
newTransition :: CodeUnit -> State -> Transition
newTransition :: CodeUnit -> Int -> Transition
newTransition CodeUnit
input Int
state =
let
input64 :: Transition
input64 = CodeUnit -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
input :: Word64
state64 :: Transition
state64 = Int -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
state :: Word64
in
(Transition
state64 Transition -> Int -> Transition
forall a. Bits a => a -> Int -> a
`shiftL` Int
32) Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.|. Transition
input64
newWildcardTransition :: State -> Transition
newWildcardTransition :: Int -> Transition
newWildcardTransition Int
state =
let
state64 :: Transition
state64 = Int -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
state :: Word64
in
(Transition
state64 Transition -> Int -> Transition
forall a. Bits a => a -> Int -> a
`shiftL` Int
32) Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.|. Transition
forall a. Integral a => a
wildcard
packTransitions :: [[Transition]] -> (TypedByteArray Transition, TypedByteArray Int)
packTransitions :: [[Transition]] -> (TypedByteArray Transition, TypedByteArray Int)
packTransitions [[Transition]]
transitions =
let
packed :: TypedByteArray Transition
packed = [Transition] -> TypedByteArray Transition
forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList ([Transition] -> TypedByteArray Transition)
-> [Transition] -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ [[Transition]] -> [Transition]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Transition]]
transitions
offsets :: TypedByteArray Int
offsets = [Int] -> TypedByteArray Int
forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList ([Int] -> TypedByteArray Int) -> [Int] -> TypedByteArray Int
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Int -> [Int] -> [Int]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
0 ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ ([Transition] -> Int) -> [[Transition]] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Transition] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
List.length [[Transition]]
transitions
in
(TypedByteArray Transition
packed, TypedByteArray Int
offsets)
build :: [([CodeUnit], v)] -> AcMachine v
build :: [([CodeUnit], v)] -> AcMachine v
build [([CodeUnit], v)]
needlesWithValues =
let
(Int
numStates, TransitionMap
transitionMap, ValuesMap v
initialValueMap) = [([CodeUnit], v)] -> (Int, TransitionMap, ValuesMap v)
forall v. [([CodeUnit], v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap [([CodeUnit], v)]
needlesWithValues
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap v
valueMap = TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap v
initialValueMap
prependTransition :: [Transition] -> a -> Int -> [Transition]
prependTransition [Transition]
ts a
input Int
state = CodeUnit -> Int -> Transition
newTransition (a -> CodeUnit
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
input) Int
state Transition -> [Transition] -> [Transition]
forall a. a -> [a] -> [a]
: [Transition]
ts
makeTransitions :: Int -> FallbackMap -> [Transition]
makeTransitions Int
fallback FallbackMap
ts = ([Transition] -> Int -> Int -> [Transition])
-> [Transition] -> FallbackMap -> [Transition]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [Transition] -> Int -> Int -> [Transition]
forall a. Integral a => [Transition] -> a -> Int -> [Transition]
prependTransition [Int -> Transition
newWildcardTransition Int
fallback] FallbackMap
ts
transitionsList :: [[Transition]]
transitionsList = (Int -> FallbackMap -> [Transition])
-> [Int] -> [FallbackMap] -> [[Transition]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> FallbackMap -> [Transition]
makeTransitions (FallbackMap -> [Int]
forall a. IntMap a -> [a]
IntMap.elems FallbackMap
fallbackMap) (TransitionMap -> [FallbackMap]
forall a. IntMap a -> [a]
IntMap.elems TransitionMap
transitionMap)
(TypedByteArray Transition
transitions, TypedByteArray Int
offsets) = [[Transition]] -> (TypedByteArray Transition, TypedByteArray Int)
packTransitions [[Transition]]
transitionsList
rootTransitions :: TypedByteArray Transition
rootTransitions = FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable (FallbackMap -> TypedByteArray Transition)
-> FallbackMap -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitionMap TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
0
values :: Vector [v]
values = Int -> (Int -> [v]) -> Vector [v]
forall a. Int -> (Int -> a) -> Vector a
Vector.generate Int
numStates (ValuesMap v
valueMap ValuesMap v -> Int -> [v]
forall a. IntMap a -> Int -> a
IntMap.!)
in
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Int
-> TypedByteArray Transition
-> AcMachine v
forall v.
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Int
-> TypedByteArray Transition
-> AcMachine v
AcMachine Vector [v]
values TypedByteArray Transition
transitions TypedByteArray Int
offsets TypedByteArray Transition
rootTransitions
debugBuildDot :: [[CodeUnit]] -> String
debugBuildDot :: [[CodeUnit]] -> String
debugBuildDot [[CodeUnit]]
needles =
let
(Int
_numStates, TransitionMap
transitionMap, ValuesMap Int
initialValueMap) =
[([CodeUnit], Int)] -> (Int, TransitionMap, ValuesMap Int)
forall v. [([CodeUnit], v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap ([([CodeUnit], Int)] -> (Int, TransitionMap, ValuesMap Int))
-> [([CodeUnit], Int)] -> (Int, TransitionMap, ValuesMap Int)
forall a b. (a -> b) -> a -> b
$ [[CodeUnit]] -> [Int] -> [([CodeUnit], Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [[CodeUnit]]
needles ([Int
0..] :: [Int])
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap Int
valueMap = TransitionMap -> FallbackMap -> ValuesMap Int -> ValuesMap Int
forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap Int
initialValueMap
dotEdge :: String -> a -> a -> String
dotEdge String
extra a
state a
nextState =
String
" " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (a -> String
forall a. Show a => a -> String
show a
state) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" -> " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (a -> String
forall a. Show a => a -> String
show a
nextState) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" [" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
extra String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"];"
dotFallbackEdge :: [String] -> State -> State -> [String]
dotFallbackEdge :: [String] -> Int -> Int -> [String]
dotFallbackEdge [String]
edges Int
state Int
nextState =
(String -> Int -> Int -> String
forall a a. (Show a, Show a) => String -> a -> a -> String
dotEdge String
"style = dashed" Int
state Int
nextState) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
dotTransitionEdge :: State -> [String] -> Int -> State -> [String]
dotTransitionEdge :: Int -> [String] -> Int -> Int -> [String]
dotTransitionEdge Int
state [String]
edges Int
input Int
nextState =
(String -> Int -> Int -> String
forall a a. (Show a, Show a) => String -> a -> a -> String
dotEdge (String
"label = \"" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
input String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\"") Int
state Int
nextState) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
prependTransitionEdges :: [String] -> Int -> [String]
prependTransitionEdges [String]
edges Int
state =
([String] -> Int -> Int -> [String])
-> [String] -> FallbackMap -> [String]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (Int -> [String] -> Int -> Int -> [String]
dotTransitionEdge Int
state) [String]
edges (TransitionMap
transitionMap TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state)
dotMatchState :: [String] -> State -> [Int] -> [String]
dotMatchState :: [String] -> Int -> [Int] -> [String]
dotMatchState [String]
edges Int
_ [] = [String]
edges
dotMatchState [String]
edges Int
state [Int]
_ = (String
" " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
state String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" [shape = doublecircle];") String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
dot0 :: [String]
dot0 = ([String] -> Int -> [String])
-> [String] -> TransitionMap -> [String]
forall a. (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst [String] -> Int -> [String]
prependTransitionEdges [] TransitionMap
transitionMap
dot1 :: [String]
dot1 = ([String] -> Int -> Int -> [String])
-> [String] -> FallbackMap -> [String]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> Int -> Int -> [String]
dotFallbackEdge [String]
dot0 FallbackMap
fallbackMap
dot2 :: [String]
dot2 = ([String] -> Int -> [Int] -> [String])
-> [String] -> ValuesMap Int -> [String]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> Int -> [Int] -> [String]
dotMatchState [String]
dot1 ValuesMap Int
valueMap
in
[String] -> String
unlines ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String
"digraph {", String
" rankdir = \"LR\";"] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ ([String] -> [String]
forall a. [a] -> [a]
reverse [String]
dot2) [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String
"}"]
type TransitionMap = IntMap (IntMap State)
type FallbackMap = IntMap State
type ValuesMap v = IntMap [v]
buildTransitionMap :: forall v. [([CodeUnit], v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap :: [([CodeUnit], v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap =
let
go :: State
-> (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v)
-> (Int, TransitionMap, ValuesMap v)
go :: Int
-> (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v)
-> (Int, TransitionMap, ValuesMap v)
go !Int
state (!Int
numStates, TransitionMap
transitions, ValuesMap v
values) ([], v
v) =
(Int
numStates, TransitionMap
transitions, ([v] -> [v] -> [v]) -> Int -> [v] -> ValuesMap v -> ValuesMap v
forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
IntMap.insertWith [v] -> [v] -> [v]
forall a. [a] -> [a] -> [a]
(++) Int
state [v
v] ValuesMap v
values)
go !Int
state (!Int
numStates, TransitionMap
transitions, ValuesMap v
values) (!CodeUnit
input : [CodeUnit]
needleTail, v
vs) =
let
transitionsFromState :: FallbackMap
transitionsFromState = TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state
in
case Int -> FallbackMap -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
input) FallbackMap
transitionsFromState of
Just Int
nextState ->
Int
-> (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v)
-> (Int, TransitionMap, ValuesMap v)
go Int
nextState (Int
numStates, TransitionMap
transitions, ValuesMap v
values) ([CodeUnit]
needleTail, v
vs)
Maybe Int
Nothing ->
let
nextState :: Int
nextState = Int
numStates
transitionsFromState' :: FallbackMap
transitionsFromState' = Int -> Int -> FallbackMap -> FallbackMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
input) Int
nextState FallbackMap
transitionsFromState
transitions' :: TransitionMap
transitions'
= Int -> FallbackMap -> TransitionMap -> TransitionMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
state FallbackMap
transitionsFromState'
(TransitionMap -> TransitionMap) -> TransitionMap -> TransitionMap
forall a b. (a -> b) -> a -> b
$ Int -> FallbackMap -> TransitionMap -> TransitionMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
nextState FallbackMap
forall a. IntMap a
IntMap.empty
(TransitionMap -> TransitionMap) -> TransitionMap -> TransitionMap
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitions
in
Int
-> (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v)
-> (Int, TransitionMap, ValuesMap v)
go Int
nextState (Int
numStates Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, TransitionMap
transitions', ValuesMap v
values) ([CodeUnit]
needleTail, v
vs)
stateInitial :: Int
stateInitial = Int
0
initialTransitions :: IntMap (IntMap a)
initialTransitions = Int -> IntMap a -> IntMap (IntMap a)
forall a. Int -> a -> IntMap a
IntMap.singleton Int
stateInitial IntMap a
forall a. IntMap a
IntMap.empty
initialValues :: IntMap a
initialValues = IntMap a
forall a. IntMap a
IntMap.empty
insertNeedle :: (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle = Int
-> (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v)
-> (Int, TransitionMap, ValuesMap v)
go Int
stateInitial
in
((Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v) -> (Int, TransitionMap, ValuesMap v))
-> (Int, TransitionMap, ValuesMap v)
-> [([CodeUnit], v)]
-> (Int, TransitionMap, ValuesMap v)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Int, TransitionMap, ValuesMap v)
-> ([CodeUnit], v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle (Int
1, TransitionMap
forall a. IntMap (IntMap a)
initialTransitions, ValuesMap v
forall a. IntMap a
initialValues)
asciiCount :: Integral a => a
asciiCount :: a
asciiCount = a
128
buildAsciiTransitionLookupTable :: IntMap State -> TypedByteArray Transition
buildAsciiTransitionLookupTable :: FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable FallbackMap
transitions = Int -> (Int -> Transition) -> TypedByteArray Transition
forall a. Prim a => Int -> (Int -> a) -> TypedByteArray a
TBA.generate Int
forall a. Integral a => a
asciiCount ((Int -> Transition) -> TypedByteArray Transition)
-> (Int -> Transition) -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ \Int
i ->
case Int -> FallbackMap -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
i FallbackMap
transitions of
Just Int
state -> CodeUnit -> Int -> Transition
newTransition (Int -> CodeUnit
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i) Int
state
Maybe Int
Nothing -> Int -> Transition
newWildcardTransition Int
0
foldBreadthFirst :: (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst :: (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst a -> Int -> a
f a
seed TransitionMap
transitions = [Int] -> [Int] -> a -> a
go [Int
0] [] a
seed
where
go :: [Int] -> [Int] -> a -> a
go [] [] !a
acc = a
acc
go [] [Int]
revBacklog !a
acc = [Int] -> [Int] -> a -> a
go ([Int] -> [Int]
forall a. [a] -> [a]
reverse [Int]
revBacklog) [] a
acc
go (Int
state : [Int]
backlog) [Int]
revBacklog !a
acc =
let
extra :: [Int]
extra = FallbackMap -> [Int]
forall a. IntMap a -> [a]
IntMap.elems (FallbackMap -> [Int]) -> FallbackMap -> [Int]
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state
in
[Int] -> [Int] -> a -> a
go [Int]
backlog ([Int]
extra [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [Int]
revBacklog) (a -> Int -> a
f a
acc Int
state)
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitions =
let
getFallback :: FallbackMap -> State -> Int -> State
getFallback :: FallbackMap -> Int -> Int -> Int
getFallback FallbackMap
_ Int
0 Int
_ = Int
0
getFallback FallbackMap
fallbacks !Int
state !Int
input =
let
fallback :: Int
fallback = FallbackMap
fallbacks FallbackMap -> Int -> Int
forall a. IntMap a -> Int -> a
IntMap.! Int
state
transitionsFromFallback :: FallbackMap
transitionsFromFallback = TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
fallback
in
case Int -> FallbackMap -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
input FallbackMap
transitionsFromFallback of
Just Int
st -> Int
st
Maybe Int
Nothing -> FallbackMap -> Int -> Int -> Int
getFallback FallbackMap
fallbacks Int
fallback Int
input
insertFallback :: State -> FallbackMap -> Int -> State -> FallbackMap
insertFallback :: Int -> FallbackMap -> Int -> Int -> FallbackMap
insertFallback !Int
state FallbackMap
fallbacks !Int
input !Int
nextState =
Int -> Int -> FallbackMap -> FallbackMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
nextState (FallbackMap -> Int -> Int -> Int
getFallback FallbackMap
fallbacks Int
state Int
input) FallbackMap
fallbacks
insertFallbacks :: FallbackMap -> State -> FallbackMap
insertFallbacks :: FallbackMap -> Int -> FallbackMap
insertFallbacks FallbackMap
fallbacks !Int
state =
(FallbackMap -> Int -> Int -> FallbackMap)
-> FallbackMap -> FallbackMap -> FallbackMap
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (Int -> FallbackMap -> Int -> Int -> FallbackMap
insertFallback Int
state) FallbackMap
fallbacks (TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state)
in
(FallbackMap -> Int -> FallbackMap)
-> FallbackMap -> TransitionMap -> FallbackMap
forall a. (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst FallbackMap -> Int -> FallbackMap
insertFallbacks (Int -> Int -> FallbackMap
forall a. Int -> a -> IntMap a
IntMap.singleton Int
0 Int
0) TransitionMap
transitions
buildValueMap :: forall v. TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap :: TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitions FallbackMap
fallbacks ValuesMap v
valuesInitial =
let
insertValues :: ValuesMap v -> State -> ValuesMap v
insertValues :: ValuesMap v -> Int -> ValuesMap v
insertValues ValuesMap v
values !Int
state =
let
fallbackValues :: [v]
fallbackValues = ValuesMap v
values ValuesMap v -> Int -> [v]
forall a. IntMap a -> Int -> a
IntMap.! (FallbackMap
fallbacks FallbackMap -> Int -> Int
forall a. IntMap a -> Int -> a
IntMap.! Int
state)
valuesForState :: [v]
valuesForState = case Int -> ValuesMap v -> Maybe [v]
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
state ValuesMap v
valuesInitial of
Just [v]
vs -> [v]
vs [v] -> [v] -> [v]
forall a. [a] -> [a] -> [a]
++ [v]
fallbackValues
Maybe [v]
Nothing -> [v]
fallbackValues
in
Int -> [v] -> ValuesMap v -> ValuesMap v
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
state [v]
valuesForState ValuesMap v
values
in
(ValuesMap v -> Int -> ValuesMap v)
-> ValuesMap v -> TransitionMap -> ValuesMap v
forall a. (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst ValuesMap v -> Int -> ValuesMap v
insertValues (Int -> [v] -> ValuesMap v
forall a. Int -> a -> IntMap a
IntMap.singleton Int
0 []) TransitionMap
transitions
at :: forall a. Vector.Vector a -> Int -> a
at :: Vector a -> Int -> a
at = Vector a -> Int -> a
forall a. Vector a -> Int -> a
Vector.unsafeIndex
{-# INLINE uAt #-}
uAt :: Prim a => TypedByteArray a -> Int -> a
uAt :: TypedByteArray a -> Int -> a
uAt = TypedByteArray a -> Int -> a
forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex
data Next a
= Done !a
| Step !a
{-# INLINE runWithCase #-}
runWithCase
:: forall a v
. CaseSensitivity
-> a
-> (a -> Match v -> Next a)
-> AcMachine v
-> Text
-> a
runWithCase :: CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
caseSensitivity a
seed a -> Match v -> Next a
f AcMachine v
machine Text
text =
let
Text Array
u16data !Int
initialOffset !Int
initialRemaining = Text
text
!values :: Vector [v]
values = AcMachine v -> Vector [v]
forall v. AcMachine v -> Vector [v]
machineValues AcMachine v
machine
!transitions :: TypedByteArray Transition
transitions = AcMachine v -> TypedByteArray Transition
forall v. AcMachine v -> TypedByteArray Transition
machineTransitions AcMachine v
machine
!offsets :: TypedByteArray Int
offsets = AcMachine v -> TypedByteArray Int
forall v. AcMachine v -> TypedByteArray Int
machineOffsets AcMachine v
machine
!rootAsciiTransitions :: TypedByteArray Transition
rootAsciiTransitions = AcMachine v -> TypedByteArray Transition
forall v. AcMachine v -> TypedByteArray Transition
machineRootAsciiTransitions AcMachine v
machine
!stateInitial :: Int
stateInitial = Int
0
{-# NOINLINE consumeInput #-}
consumeInput :: Int -> Int -> a -> State -> a
consumeInput :: Int -> Int -> a -> Int -> a
consumeInput !Int
offset !Int
remaining !a
acc !Int
state =
let
inputCodeUnit :: CodeUnit
inputCodeUnit = CodeUnit -> CodeUnit
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CodeUnit -> CodeUnit) -> CodeUnit -> CodeUnit
forall a b. (a -> b) -> a -> b
$ Array -> Int -> CodeUnit
indexTextArray Array
u16data Int
offset
casedCodeUnit :: CodeUnit
casedCodeUnit = case CaseSensitivity
caseSensitivity of
CaseSensitivity
IgnoreCase -> CodeUnit -> CodeUnit
lowerCodeUnit CodeUnit
inputCodeUnit
CaseSensitivity
CaseSensitive -> CodeUnit
inputCodeUnit
in
case Int
remaining of
Int
0 -> a
acc
Int
_ -> Int -> Int -> a -> Int -> CodeUnit -> a
followEdge (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
remaining Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) a
acc Int
state CodeUnit
casedCodeUnit
{-# INLINE followEdge #-}
followEdge :: Int -> Int -> a -> State -> CodeUnit -> a
followEdge :: Int -> Int -> a -> Int -> CodeUnit -> a
followEdge !Int
offset !Int
remaining !a
acc !Int
state !CodeUnit
input =
let
!tssOffset :: Int
tssOffset = TypedByteArray Int
offsets TypedByteArray Int -> Int -> Int
forall a. Prim a => TypedByteArray a -> Int -> a
`uAt` Int
state
in
if Int
state Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
stateInitial Bool -> Bool -> Bool
&& CodeUnit
input CodeUnit -> CodeUnit -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnit
forall a. Integral a => a
asciiCount
then Int -> Int -> a -> CodeUnit -> a
lookupRootAsciiTransition Int
offset Int
remaining a
acc CodeUnit
input
else Int -> Int -> a -> Int -> CodeUnit -> Int -> a
lookupTransition Int
offset Int
remaining a
acc Int
state CodeUnit
input Int
tssOffset
{-# NOINLINE collectMatches #-}
collectMatches :: Int -> Int -> a -> State -> a
collectMatches :: Int -> Int -> a -> Int -> a
collectMatches !Int
offset !Int
remaining !a
acc !Int
state =
let
matchedValues :: [v]
matchedValues = Vector [v]
values Vector [v] -> Int -> [v]
forall a. Vector a -> Int -> a
`at` Int
state
handleMatch :: a -> [v] -> a
handleMatch !a
acc' [v]
vs = case [v]
vs of
[] -> Int -> Int -> a -> Int -> a
consumeInput Int
offset Int
remaining a
acc' Int
state
v
v:[v]
more -> case a -> Match v -> Next a
f a
acc' (CodeUnitIndex -> v -> Match v
forall v. CodeUnitIndex -> v -> Match v
Match (Int -> CodeUnitIndex
CodeUnitIndex (Int -> CodeUnitIndex) -> Int -> CodeUnitIndex
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
initialOffset) v
v) of
Step a
newAcc -> a -> [v] -> a
handleMatch a
newAcc [v]
more
Done a
finalAcc -> a
finalAcc
in
a -> [v] -> a
handleMatch a
acc [v]
matchedValues
{-# INLINE lookupRootAsciiTransition #-}
lookupRootAsciiTransition :: Int -> Int -> a -> CodeUnit -> a
lookupRootAsciiTransition :: Int -> Int -> a -> CodeUnit -> a
lookupRootAsciiTransition !Int
offset !Int
remaining !a
acc !CodeUnit
input =
case TypedByteArray Transition
rootAsciiTransitions TypedByteArray Transition -> Int -> Transition
forall a. Prim a => TypedByteArray a -> Int -> a
`uAt` CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
input of
Transition
t | Transition -> Bool
transitionIsWildcard Transition
t -> Int -> Int -> a -> Int -> a
consumeInput Int
offset Int
remaining a
acc Int
stateInitial
| Bool
otherwise -> Int -> Int -> a -> Int -> a
collectMatches Int
offset Int
remaining a
acc (Transition -> Int
transitionState Transition
t)
{-# INLINE lookupTransition #-}
lookupTransition :: Int -> Int -> a -> State -> CodeUnit -> Int -> a
lookupTransition :: Int -> Int -> a -> Int -> CodeUnit -> Int -> a
lookupTransition !Int
offset !Int
remaining !a
acc !Int
state !CodeUnit
input !Int
i =
case TypedByteArray Transition
transitions TypedByteArray Transition -> Int -> Transition
forall a. Prim a => TypedByteArray a -> Int -> a
`uAt` Int
i of
Transition
t | Transition -> Bool
transitionIsWildcard Transition
t ->
if Int
state Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
stateInitial
then Int -> Int -> a -> Int -> a
consumeInput Int
offset Int
remaining a
acc Int
state
else Int -> Int -> a -> Int -> CodeUnit -> a
followEdge Int
offset Int
remaining a
acc (Transition -> Int
transitionState Transition
t) CodeUnit
input
Transition
t | Transition -> CodeUnit
transitionCodeUnit Transition
t CodeUnit -> CodeUnit -> Bool
forall a. Eq a => a -> a -> Bool
== CodeUnit
input ->
Int -> Int -> a -> Int -> a
collectMatches Int
offset Int
remaining a
acc (Transition -> Int
transitionState Transition
t)
Transition
_ -> Int -> Int -> a -> Int -> CodeUnit -> Int -> a
lookupTransition Int
offset Int
remaining a
acc Int
state CodeUnit
input (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
in
Int -> Int -> a -> Int -> a
consumeInput Int
initialOffset Int
initialRemaining a
seed Int
stateInitial
{-# INLINE runText #-}
runText :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText :: a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText = CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
CaseSensitive
{-# INLINE runLower #-}
runLower :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower :: a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower = CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
IgnoreCase