{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Text.AhoCorasick.Automaton
( AcMachine (..)
, CaseSensitivity (..)
, CodeUnitIndex (..)
, Match (..)
, Next (..)
, build
, debugBuildDot
, runLower
, runText
, runWithCase
, needleCasings
) where
import Control.DeepSeq (NFData)
import Data.Bits (Bits (shiftL, shiftR, (.&.), (.|.)))
import Data.Char (chr)
import Data.Foldable (foldl')
import Data.IntMap.Strict (IntMap)
import Data.Word (Word32, Word64)
import GHC.Generics (Generic)
import qualified Data.Char as Char
import qualified Data.IntMap.Strict as IntMap
import qualified Data.List as List
import qualified Data.Vector as Vector
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (CodePoint, CodeUnitIndex (CodeUnitIndex), Text (..))
import Data.TypedByteArray (Prim, TypedByteArray)
import qualified Data.Text as Text
import qualified Data.Text.Utf8 as Utf8
import qualified Data.TypedByteArray as TBA
type State = Int
type Transition = Word64
type Offset = Word32
data Match v = Match
{ forall v. Match v -> CodeUnitIndex
matchPos :: {-# UNPACK #-} !CodeUnitIndex
, forall v. Match v -> v
matchValue :: v
}
data AcMachine v = AcMachine
{ forall v. AcMachine v -> Vector [v]
machineValues :: !(Vector.Vector [v])
, forall v. AcMachine v -> TypedByteArray Transition
machineTransitions :: !(TypedByteArray Transition)
, forall v. AcMachine v -> TypedByteArray Offset
machineOffsets :: !(TypedByteArray Offset)
, forall v. AcMachine v -> TypedByteArray Transition
machineRootAsciiTransitions :: !(TypedByteArray Transition)
} deriving ((forall x. AcMachine v -> Rep (AcMachine v) x)
-> (forall x. Rep (AcMachine v) x -> AcMachine v)
-> Generic (AcMachine v)
forall x. Rep (AcMachine v) x -> AcMachine v
forall x. AcMachine v -> Rep (AcMachine v) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall v x. Rep (AcMachine v) x -> AcMachine v
forall v x. AcMachine v -> Rep (AcMachine v) x
$cfrom :: forall v x. AcMachine v -> Rep (AcMachine v) x
from :: forall x. AcMachine v -> Rep (AcMachine v) x
$cto :: forall v x. Rep (AcMachine v) x -> AcMachine v
to :: forall x. Rep (AcMachine v) x -> AcMachine v
Generic, (forall a b. (a -> b) -> AcMachine a -> AcMachine b)
-> (forall a b. a -> AcMachine b -> AcMachine a)
-> Functor AcMachine
forall a b. a -> AcMachine b -> AcMachine a
forall a b. (a -> b) -> AcMachine a -> AcMachine b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
$cfmap :: forall a b. (a -> b) -> AcMachine a -> AcMachine b
fmap :: forall a b. (a -> b) -> AcMachine a -> AcMachine b
$c<$ :: forall a b. a -> AcMachine b -> AcMachine a
<$ :: forall a b. a -> AcMachine b -> AcMachine a
Functor)
instance NFData v => NFData (AcMachine v)
wildcard :: Integral a => a
wildcard :: forall a. Integral a => a
wildcard = a
0x200000
transitionCodeUnit :: Transition -> CodePoint
transitionCodeUnit :: Transition -> CodePoint
transitionCodeUnit Transition
t = State -> CodePoint
Char.chr (State -> CodePoint) -> State -> CodePoint
forall a b. (a -> b) -> a -> b
$ Transition -> State
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.&. Transition
0x1fffff)
transitionState :: Transition -> State
transitionState :: Transition -> State
transitionState Transition
t = Transition -> State
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t Transition -> State -> Transition
forall a. Bits a => a -> State -> a
`shiftR` State
32)
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard Transition
t = (Transition
t Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.&. Transition
forall a. Integral a => a
wildcard) Transition -> Transition -> Bool
forall a. Eq a => a -> a -> Bool
== Transition
forall a. Integral a => a
wildcard
newTransition :: CodePoint -> State -> Transition
newTransition :: CodePoint -> State -> Transition
newTransition CodePoint
input State
state =
let
input64 :: Transition
input64 = State -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral (State -> Transition) -> State -> Transition
forall a b. (a -> b) -> a -> b
$ CodePoint -> State
Char.ord CodePoint
input :: Word64
state64 :: Transition
state64 = State -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral State
state :: Word64
in
(Transition
state64 Transition -> State -> Transition
forall a. Bits a => a -> State -> a
`shiftL` State
32) Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.|. Transition
input64
newWildcardTransition :: State -> Transition
newWildcardTransition :: State -> Transition
newWildcardTransition State
state =
let
state64 :: Transition
state64 = State -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral State
state :: Word64
in
(Transition
state64 Transition -> State -> Transition
forall a. Bits a => a -> State -> a
`shiftL` State
32) Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.|. Transition
forall a. Integral a => a
wildcard
packTransitions :: [[Transition]] -> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions :: [[Transition]]
-> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions [[Transition]]
transitions =
let
packed :: TypedByteArray Transition
packed = [Transition] -> TypedByteArray Transition
forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList ([Transition] -> TypedByteArray Transition)
-> [Transition] -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ [[Transition]] -> [Transition]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Transition]]
transitions
offsets :: TypedByteArray Offset
offsets = [Offset] -> TypedByteArray Offset
forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList ([Offset] -> TypedByteArray Offset)
-> [Offset] -> TypedByteArray Offset
forall a b. (a -> b) -> a -> b
$ (State -> Offset) -> [State] -> [Offset]
forall a b. (a -> b) -> [a] -> [b]
map State -> Offset
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([State] -> [Offset]) -> [State] -> [Offset]
forall a b. (a -> b) -> a -> b
$ (State -> State -> State) -> State -> [State] -> [State]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl State -> State -> State
forall a. Num a => a -> a -> a
(+) State
0 ([State] -> [State]) -> [State] -> [State]
forall a b. (a -> b) -> a -> b
$ ([Transition] -> State) -> [[Transition]] -> [State]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Transition] -> State
forall a. [a] -> State
forall (t :: * -> *) a. Foldable t => t a -> State
List.length [[Transition]]
transitions
in
(TypedByteArray Transition
packed, TypedByteArray Offset
offsets)
build :: [(Text, v)] -> AcMachine v
build :: forall v. [(Text, v)] -> AcMachine v
build [(Text, v)]
needlesWithValues =
let
(State
numStates, TransitionMap
transitionMap, ValuesMap v
initialValueMap) = [(Text, v)] -> (State, TransitionMap, ValuesMap v)
forall v. [(Text, v)] -> (State, TransitionMap, ValuesMap v)
buildTransitionMap [(Text, v)]
needlesWithValues
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap v
valueMap = TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap v
initialValueMap
prependTransition :: [Transition] -> State -> State -> [Transition]
prependTransition [Transition]
ts State
input State
state = CodePoint -> State -> Transition
newTransition (State -> CodePoint
Char.chr State
input) State
state Transition -> [Transition] -> [Transition]
forall a. a -> [a] -> [a]
: [Transition]
ts
makeTransitions :: State -> FallbackMap -> [Transition]
makeTransitions State
fallback FallbackMap
ts = ([Transition] -> State -> State -> [Transition])
-> [Transition] -> FallbackMap -> [Transition]
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [Transition] -> State -> State -> [Transition]
prependTransition [State -> Transition
newWildcardTransition State
fallback] FallbackMap
ts
transitionsList :: [[Transition]]
transitionsList = (State -> FallbackMap -> [Transition])
-> [State] -> [FallbackMap] -> [[Transition]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith State -> FallbackMap -> [Transition]
makeTransitions (FallbackMap -> [State]
forall a. IntMap a -> [a]
IntMap.elems FallbackMap
fallbackMap) (TransitionMap -> [FallbackMap]
forall a. IntMap a -> [a]
IntMap.elems TransitionMap
transitionMap)
(TypedByteArray Transition
transitions, TypedByteArray Offset
offsets) = [[Transition]]
-> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions [[Transition]]
transitionsList
rootTransitions :: TypedByteArray Transition
rootTransitions = FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable (FallbackMap -> TypedByteArray Transition)
-> FallbackMap -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitionMap TransitionMap -> State -> FallbackMap
forall a. IntMap a -> State -> a
IntMap.! State
0
values :: Vector [v]
values = State -> (State -> [v]) -> Vector [v]
forall a. State -> (State -> a) -> Vector a
Vector.generate State
numStates (ValuesMap v
valueMap ValuesMap v -> State -> [v]
forall a. IntMap a -> State -> a
IntMap.!)
in
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Offset
-> TypedByteArray Transition
-> AcMachine v
forall v.
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Offset
-> TypedByteArray Transition
-> AcMachine v
AcMachine Vector [v]
values TypedByteArray Transition
transitions TypedByteArray Offset
offsets TypedByteArray Transition
rootTransitions
debugBuildDot :: [Text] -> String
debugBuildDot :: [Text] -> String
debugBuildDot [Text]
needles =
let
(State
_numStates, TransitionMap
transitionMap, ValuesMap State
initialValueMap) =
[(Text, State)] -> (State, TransitionMap, ValuesMap State)
forall v. [(Text, v)] -> (State, TransitionMap, ValuesMap v)
buildTransitionMap ([(Text, State)] -> (State, TransitionMap, ValuesMap State))
-> [(Text, State)] -> (State, TransitionMap, ValuesMap State)
forall a b. (a -> b) -> a -> b
$ [Text] -> [State] -> [(Text, State)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
needles ([State
0..] :: [Int])
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap State
valueMap = TransitionMap -> FallbackMap -> ValuesMap State -> ValuesMap State
forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap State
initialValueMap
dotEdge :: String -> a -> a -> String
dotEdge String
extra a
state a
nextState =
String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
state String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" -> " String -> String -> String
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
nextState String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" [" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
extra String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"];"
dotFallbackEdge :: [String] -> State -> State -> [String]
dotFallbackEdge :: [String] -> State -> State -> [String]
dotFallbackEdge [String]
edges State
state State
nextState =
String -> State -> State -> String
forall {a} {a}. (Show a, Show a) => String -> a -> a -> String
dotEdge String
"style = dashed" State
state State
nextState String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
dotTransitionEdge :: State -> [String] -> Int -> State -> [String]
dotTransitionEdge :: State -> [String] -> State -> State -> [String]
dotTransitionEdge State
state [String]
edges State
input State
nextState =
String -> State -> State -> String
forall {a} {a}. (Show a, Show a) => String -> a -> a -> String
dotEdge (String
"label = \"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ State -> String
showInput State
input String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\"") State
state State
nextState String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
showInput :: State -> String
showInput State
input = [State -> CodePoint
chr State
input]
prependTransitionEdges :: [String] -> State -> [String]
prependTransitionEdges [String]
edges State
state =
([String] -> State -> State -> [String])
-> [String] -> FallbackMap -> [String]
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (State -> [String] -> State -> State -> [String]
dotTransitionEdge State
state) [String]
edges (TransitionMap
transitionMap TransitionMap -> State -> FallbackMap
forall a. IntMap a -> State -> a
IntMap.! State
state)
dotMatchState :: [String] -> State -> [Int] -> [String]
dotMatchState :: [String] -> State -> [State] -> [String]
dotMatchState [String]
edges State
_ [] = [String]
edges
dotMatchState [String]
edges State
state [State]
_ = (String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ State -> String
forall a. Show a => a -> String
show State
state String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" [shape = doublecircle];") String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
dot0 :: [String]
dot0 = ([String] -> State -> [String])
-> [String] -> TransitionMap -> [String]
forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst [String] -> State -> [String]
prependTransitionEdges [] TransitionMap
transitionMap
dot1 :: [String]
dot1 = ([String] -> State -> State -> [String])
-> [String] -> FallbackMap -> [String]
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> State -> State -> [String]
dotFallbackEdge [String]
dot0 FallbackMap
fallbackMap
dot2 :: [String]
dot2 = ([String] -> State -> [State] -> [String])
-> [String] -> ValuesMap State -> [String]
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> State -> [State] -> [String]
dotMatchState [String]
dot1 ValuesMap State
valueMap
in
[String] -> String
unlines ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String
"digraph {", String
" rankdir = \"LR\";"] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String] -> [String]
forall a. [a] -> [a]
reverse [String]
dot2 [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String
"}"]
type TransitionMap = IntMap (IntMap State)
type FallbackMap = IntMap State
type ValuesMap v = IntMap [v]
buildTransitionMap :: forall v. [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap :: forall v. [(Text, v)] -> (State, TransitionMap, ValuesMap v)
buildTransitionMap =
let
insertNeedle :: (Int, TransitionMap, ValuesMap v) -> (Text, v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle :: (State, TransitionMap, ValuesMap v)
-> (Text, v) -> (State, TransitionMap, ValuesMap v)
insertNeedle !(State, TransitionMap, ValuesMap v)
acc (!Text
needle, !v
value) = State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go State
stateInitial CodeUnitIndex
0 (State, TransitionMap, ValuesMap v)
acc
where
!needleLen :: CodeUnitIndex
needleLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
needle
go :: State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go !State
state !CodeUnitIndex
index (!State
numStates, !TransitionMap
transitions, !ValuesMap v
values)
| CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
needleLen = (State
numStates, TransitionMap
transitions, ([v] -> [v] -> [v]) -> State -> [v] -> ValuesMap v -> ValuesMap v
forall a. (a -> a -> a) -> State -> a -> IntMap a -> IntMap a
IntMap.insertWith [v] -> [v] -> [v]
forall a. [a] -> [a] -> [a]
(++) State
state [v
value] ValuesMap v
values)
go !State
state !CodeUnitIndex
index (!State
numStates, !TransitionMap
transitions, !ValuesMap v
values) =
let
!transitionsFromState :: FallbackMap
transitionsFromState = TransitionMap
transitions TransitionMap -> State -> FallbackMap
forall a. IntMap a -> State -> a
IntMap.! State
state
(!CodeUnitIndex
codeUnits, !CodePoint
input) = Text -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
Utf8.unsafeIndexCodePoint Text
needle CodeUnitIndex
index
in
case State -> FallbackMap -> Maybe State
forall a. State -> IntMap a -> Maybe a
IntMap.lookup (CodePoint -> State
Char.ord CodePoint
input) FallbackMap
transitionsFromState of
Just !State
nextState ->
State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go State
nextState (CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) (State
numStates, TransitionMap
transitions, ValuesMap v
values)
Maybe State
Nothing ->
let
!nextState :: State
nextState = State
numStates
!transitionsFromState' :: FallbackMap
transitionsFromState' = State -> State -> FallbackMap -> FallbackMap
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert (CodePoint -> State
Char.ord CodePoint
input) State
nextState FallbackMap
transitionsFromState
!transitions' :: TransitionMap
transitions'
= State -> FallbackMap -> TransitionMap -> TransitionMap
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
state FallbackMap
transitionsFromState'
(TransitionMap -> TransitionMap) -> TransitionMap -> TransitionMap
forall a b. (a -> b) -> a -> b
$ State -> FallbackMap -> TransitionMap -> TransitionMap
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
nextState FallbackMap
forall a. IntMap a
IntMap.empty TransitionMap
transitions
in
State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go State
nextState (CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) (State
numStates State -> State -> State
forall a. Num a => a -> a -> a
+ State
1, TransitionMap
transitions', ValuesMap v
values)
stateInitial :: State
stateInitial = State
0
initialTransitions :: IntMap (IntMap a)
initialTransitions = State -> IntMap a -> IntMap (IntMap a)
forall a. State -> a -> IntMap a
IntMap.singleton State
stateInitial IntMap a
forall a. IntMap a
IntMap.empty
initialValues :: IntMap a
initialValues = IntMap a
forall a. IntMap a
IntMap.empty
in
((State, TransitionMap, ValuesMap v)
-> (Text, v) -> (State, TransitionMap, ValuesMap v))
-> (State, TransitionMap, ValuesMap v)
-> [(Text, v)]
-> (State, TransitionMap, ValuesMap v)
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (State, TransitionMap, ValuesMap v)
-> (Text, v) -> (State, TransitionMap, ValuesMap v)
insertNeedle (State
1, TransitionMap
forall {a}. IntMap (IntMap a)
initialTransitions, ValuesMap v
forall a. IntMap a
initialValues)
asciiCount :: Integral a => a
asciiCount :: forall a. Integral a => a
asciiCount = a
128
{-# NOINLINE buildAsciiTransitionLookupTable #-}
buildAsciiTransitionLookupTable :: IntMap State -> TypedByteArray Transition
buildAsciiTransitionLookupTable :: FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable FallbackMap
transitions = State -> (State -> Transition) -> TypedByteArray Transition
forall a. Prim a => State -> (State -> a) -> TypedByteArray a
TBA.generate State
forall a. Integral a => a
asciiCount ((State -> Transition) -> TypedByteArray Transition)
-> (State -> Transition) -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ \State
i ->
case State -> FallbackMap -> Maybe State
forall a. State -> IntMap a -> Maybe a
IntMap.lookup State
i FallbackMap
transitions of
Just State
state -> CodePoint -> State -> Transition
newTransition (State -> CodePoint
Char.chr State
i) State
state
Maybe State
Nothing -> State -> Transition
newWildcardTransition State
0
foldBreadthFirst :: (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst :: forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst a -> State -> a
f a
seed TransitionMap
transitions = [State] -> [State] -> a -> a
go [State
0] [] a
seed
where
go :: [State] -> [State] -> a -> a
go [] [] !a
acc = a
acc
go [] [State]
revBacklog !a
acc = [State] -> [State] -> a -> a
go ([State] -> [State]
forall a. [a] -> [a]
reverse [State]
revBacklog) [] a
acc
go (State
state : [State]
backlog) [State]
revBacklog !a
acc =
let
extra :: [State]
extra = FallbackMap -> [State]
forall a. IntMap a -> [a]
IntMap.elems (FallbackMap -> [State]) -> FallbackMap -> [State]
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitions TransitionMap -> State -> FallbackMap
forall a. IntMap a -> State -> a
IntMap.! State
state
in
[State] -> [State] -> a -> a
go [State]
backlog ([State]
extra [State] -> [State] -> [State]
forall a. [a] -> [a] -> [a]
++ [State]
revBacklog) (a -> State -> a
f a
acc State
state)
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitions =
let
getFallback :: FallbackMap -> State -> Int -> State
getFallback :: FallbackMap -> State -> State -> State
getFallback FallbackMap
_ State
0 State
_ = State
0
getFallback FallbackMap
fallbacks !State
state !State
input =
let
fallback :: State
fallback = FallbackMap
fallbacks FallbackMap -> State -> State
forall a. IntMap a -> State -> a
IntMap.! State
state
transitionsFromFallback :: FallbackMap
transitionsFromFallback = TransitionMap
transitions TransitionMap -> State -> FallbackMap
forall a. IntMap a -> State -> a
IntMap.! State
fallback
in
case State -> FallbackMap -> Maybe State
forall a. State -> IntMap a -> Maybe a
IntMap.lookup State
input FallbackMap
transitionsFromFallback of
Just State
st -> State
st
Maybe State
Nothing -> FallbackMap -> State -> State -> State
getFallback FallbackMap
fallbacks State
fallback State
input
insertFallback :: State -> FallbackMap -> Int -> State -> FallbackMap
insertFallback :: State -> FallbackMap -> State -> State -> FallbackMap
insertFallback !State
state FallbackMap
fallbacks !State
input !State
nextState =
State -> State -> FallbackMap -> FallbackMap
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
nextState (FallbackMap -> State -> State -> State
getFallback FallbackMap
fallbacks State
state State
input) FallbackMap
fallbacks
insertFallbacks :: FallbackMap -> State -> FallbackMap
insertFallbacks :: FallbackMap -> State -> FallbackMap
insertFallbacks FallbackMap
fallbacks !State
state =
(FallbackMap -> State -> State -> FallbackMap)
-> FallbackMap -> FallbackMap -> FallbackMap
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (State -> FallbackMap -> State -> State -> FallbackMap
insertFallback State
state) FallbackMap
fallbacks (TransitionMap
transitions TransitionMap -> State -> FallbackMap
forall a. IntMap a -> State -> a
IntMap.! State
state)
in
(FallbackMap -> State -> FallbackMap)
-> FallbackMap -> TransitionMap -> FallbackMap
forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst FallbackMap -> State -> FallbackMap
insertFallbacks (State -> State -> FallbackMap
forall a. State -> a -> IntMap a
IntMap.singleton State
0 State
0) TransitionMap
transitions
buildValueMap :: forall v. TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap :: forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitions FallbackMap
fallbacks ValuesMap v
valuesInitial =
let
insertValues :: ValuesMap v -> State -> ValuesMap v
insertValues :: ValuesMap v -> State -> ValuesMap v
insertValues ValuesMap v
values !State
state =
let
fallbackValues :: [v]
fallbackValues = ValuesMap v
values ValuesMap v -> State -> [v]
forall a. IntMap a -> State -> a
IntMap.! (FallbackMap
fallbacks FallbackMap -> State -> State
forall a. IntMap a -> State -> a
IntMap.! State
state)
valuesForState :: [v]
valuesForState = case State -> ValuesMap v -> Maybe [v]
forall a. State -> IntMap a -> Maybe a
IntMap.lookup State
state ValuesMap v
valuesInitial of
Just [v]
vs -> [v]
vs [v] -> [v] -> [v]
forall a. [a] -> [a] -> [a]
++ [v]
fallbackValues
Maybe [v]
Nothing -> [v]
fallbackValues
in
State -> [v] -> ValuesMap v -> ValuesMap v
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
state [v]
valuesForState ValuesMap v
values
in
(ValuesMap v -> State -> ValuesMap v)
-> ValuesMap v -> TransitionMap -> ValuesMap v
forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst ValuesMap v -> State -> ValuesMap v
insertValues (State -> [v] -> ValuesMap v
forall a. State -> a -> IntMap a
IntMap.singleton State
0 []) TransitionMap
transitions
{-# INLINE at #-}
at :: forall a. Vector.Vector a -> Int -> a
at :: forall a. Vector a -> State -> a
at = Vector a -> State -> a
forall a. Vector a -> State -> a
Vector.unsafeIndex
{-# INLINE uAt #-}
uAt :: Prim a => TypedByteArray a -> Int -> a
uAt :: forall a. Prim a => TypedByteArray a -> State -> a
uAt = TypedByteArray a -> State -> a
forall a. Prim a => TypedByteArray a -> State -> a
TBA.unsafeIndex
data Next a = Done !a | Step !a
{-# INLINE runWithCase #-}
runWithCase :: forall a v. CaseSensitivity -> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase :: forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase !CaseSensitivity
caseSensitivity !a
seed !a -> Match v -> Next a
f !AcMachine v
machine !Text
text =
CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
initialOffset a
seed State
initialState
where
initialState :: State
initialState = State
0
Text !Array
u8data !State
off !State
len = Text
text
AcMachine !Vector [v]
values !TypedByteArray Transition
transitions !TypedByteArray Offset
offsets !TypedByteArray Transition
rootAsciiTransitions = AcMachine v
machine
!initialOffset :: CodeUnitIndex
initialOffset = State -> CodeUnitIndex
CodeUnitIndex State
off
!limit :: CodeUnitIndex
limit = State -> CodeUnitIndex
CodeUnitIndex (State -> CodeUnitIndex) -> State -> CodeUnitIndex
forall a b. (a -> b) -> a -> b
$ State
off State -> State -> State
forall a. Num a => a -> a -> a
+ State
len
{-# NOINLINE consumeInput #-}
consumeInput :: CodeUnitIndex -> a -> State -> a
consumeInput :: CodeUnitIndex -> a -> State -> a
consumeInput !CodeUnitIndex
offset !a
acc !State
_state
| CodeUnitIndex
offset CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
limit = a
acc
consumeInput !CodeUnitIndex
offset !a
acc !State
state =
CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint (CodeUnitIndex
offset CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) a
acc CodePoint
possiblyLoweredCp State
state
where
(!CodeUnitIndex
codeUnits, !CodePoint
cp) = Array -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
Utf8.unsafeIndexCodePoint' Array
u8data CodeUnitIndex
offset
!possiblyLoweredCp :: CodePoint
possiblyLoweredCp = case CaseSensitivity
caseSensitivity of
CaseSensitivity
CaseSensitive -> CodePoint
cp
CaseSensitivity
IgnoreCase -> CodePoint -> CodePoint
Utf8.lowerCodePoint CodePoint
cp
{-# INLINE followCodePoint #-}
followCodePoint :: CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint :: CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint !CodeUnitIndex
offset !a
acc !CodePoint
cp !State
state
| State
state State -> State -> Bool
forall a. Eq a => a -> a -> Bool
== State
initialState Bool -> Bool -> Bool
&& CodePoint -> State
Char.ord CodePoint
cp State -> State -> Bool
forall a. Ord a => a -> a -> Bool
< State
forall a. Integral a => a
asciiCount = CodeUnitIndex -> a -> CodePoint -> a
lookupRootAsciiTransition CodeUnitIndex
offset a
acc CodePoint
cp
| Bool
otherwise = CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition CodeUnitIndex
offset a
acc CodePoint
cp State
state (Offset -> a) -> Offset -> a
forall a b. (a -> b) -> a -> b
$ TypedByteArray Offset
offsets TypedByteArray Offset -> State -> Offset
forall a. Prim a => TypedByteArray a -> State -> a
`uAt` State
state
{-# NOINLINE lookupTransition #-}
lookupTransition :: CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition :: CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition !CodeUnitIndex
offset !a
acc !CodePoint
cp !State
state !Offset
i
| Transition -> Bool
transitionIsWildcard Transition
t =
if State
state State -> State -> Bool
forall a. Eq a => a -> a -> Bool
== State
initialState
then CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
offset a
acc State
state
else CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint CodeUnitIndex
offset a
acc CodePoint
cp (Transition -> State
transitionState Transition
t)
| Transition -> CodePoint
transitionCodeUnit Transition
t CodePoint -> CodePoint -> Bool
forall a. Eq a => a -> a -> Bool
== CodePoint
cp =
CodeUnitIndex -> a -> State -> a
collectMatches CodeUnitIndex
offset a
acc (Transition -> State
transitionState Transition
t)
| Bool
otherwise =
CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition CodeUnitIndex
offset a
acc CodePoint
cp State
state (Offset -> a) -> Offset -> a
forall a b. (a -> b) -> a -> b
$ Offset
i Offset -> Offset -> Offset
forall a. Num a => a -> a -> a
+ Offset
1
where
!t :: Transition
t = TypedByteArray Transition
transitions TypedByteArray Transition -> State -> Transition
forall a. Prim a => TypedByteArray a -> State -> a
`uAt` Offset -> State
forall a b. (Integral a, Num b) => a -> b
fromIntegral Offset
i
{-# INLINE lookupRootAsciiTransition #-}
lookupRootAsciiTransition :: CodeUnitIndex -> a -> CodePoint -> a
lookupRootAsciiTransition !CodeUnitIndex
offset !a
acc !CodePoint
cp
| Transition -> Bool
transitionIsWildcard Transition
t = CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
offset a
acc State
initialState
| Bool
otherwise = CodeUnitIndex -> a -> State -> a
collectMatches CodeUnitIndex
offset a
acc (State -> a) -> State -> a
forall a b. (a -> b) -> a -> b
$ Transition -> State
transitionState Transition
t
where !t :: Transition
t = TypedByteArray Transition
rootAsciiTransitions TypedByteArray Transition -> State -> Transition
forall a. Prim a => TypedByteArray a -> State -> a
`uAt` CodePoint -> State
Char.ord CodePoint
cp
{-# NOINLINE collectMatches #-}
collectMatches :: CodeUnitIndex -> a -> State -> a
collectMatches !CodeUnitIndex
offset !a
acc !State
state =
let
matchedValues :: [v]
matchedValues = Vector [v]
values Vector [v] -> State -> [v]
forall a. Vector a -> State -> a
`at` State
state
handleMatch :: a -> [v] -> a
handleMatch !a
acc' [v]
vs = case [v]
vs of
[] -> CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
offset a
acc' State
state
v
v:[v]
more -> case a -> Match v -> Next a
f a
acc' (CodeUnitIndex -> v -> Match v
forall v. CodeUnitIndex -> v -> Match v
Match (CodeUnitIndex
offset CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
initialOffset) v
v) of
Step a
newAcc -> a -> [v] -> a
handleMatch a
newAcc [v]
more
Done a
finalAcc -> a
finalAcc
in
a -> [v] -> a
handleMatch a
acc [v]
matchedValues
{-# INLINE runText #-}
runText :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText :: forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText = CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
CaseSensitive
{-# INLINE runLower #-}
runLower :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower :: forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower = CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
IgnoreCase
needleCasings :: Text -> [Text]
needleCasings :: Text -> [Text]
needleCasings = (String -> Text) -> [String] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map String -> Text
Text.pack ([String] -> [Text]) -> (Text -> [String]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
loop (String -> [String]) -> (Text -> String) -> Text -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack
where
loop :: String -> [String]
loop String
"" = [String
""]
loop (CodePoint
c:String
cs) = (:) (CodePoint -> String -> String) -> String -> [String -> String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CodePoint -> String
Utf8.unlowerCodePoint CodePoint
c [String -> String] -> [String] -> [String]
forall a b. [a -> b] -> [a] -> [b]
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> String -> [String]
loop String
cs