{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Text.AhoCorasick.Automaton
( AcMachine (..)
, CaseSensitivity (..)
, CodeUnitIndex (..)
, Match (..)
, Next (..)
, build
, debugBuildDot
, runLower
, runText
, runWithCase
) where
import Control.DeepSeq (NFData)
import Data.Bits (Bits (shiftL, shiftR, (.&.), (.|.)))
import Data.Char (chr)
import Data.Foldable (foldl')
import Data.IntMap.Strict (IntMap)
import Data.Word (Word32, Word64)
import GHC.Generics (Generic)
import qualified Data.Char as Char
import qualified Data.IntMap.Strict as IntMap
import qualified Data.List as List
import qualified Data.Vector as Vector
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (CodePoint, CodeUnitIndex (CodeUnitIndex), Text (..))
import Data.TypedByteArray (Prim, TypedByteArray)
import qualified Data.Text.Utf8 as Utf8
import qualified Data.TypedByteArray as TBA
type State = Int
type Transition = Word64
type Offset = Word32
data Match v = Match
{ Match v -> CodeUnitIndex
matchPos :: {-# UNPACK #-} !CodeUnitIndex
, Match v -> v
matchValue :: v
}
data AcMachine v = AcMachine
{ AcMachine v -> Vector [v]
machineValues :: !(Vector.Vector [v])
, AcMachine v -> TypedByteArray Transition
machineTransitions :: !(TypedByteArray Transition)
, AcMachine v -> TypedByteArray Offset
machineOffsets :: !(TypedByteArray Offset)
, AcMachine v -> TypedByteArray Transition
machineRootAsciiTransitions :: !(TypedByteArray Transition)
} deriving ((forall x. AcMachine v -> Rep (AcMachine v) x)
-> (forall x. Rep (AcMachine v) x -> AcMachine v)
-> Generic (AcMachine v)
forall x. Rep (AcMachine v) x -> AcMachine v
forall x. AcMachine v -> Rep (AcMachine v) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall v x. Rep (AcMachine v) x -> AcMachine v
forall v x. AcMachine v -> Rep (AcMachine v) x
$cto :: forall v x. Rep (AcMachine v) x -> AcMachine v
$cfrom :: forall v x. AcMachine v -> Rep (AcMachine v) x
Generic)
instance NFData v => NFData (AcMachine v)
wildcard :: Integral a => a
wildcard :: a
wildcard = a
0x200000
transitionCodeUnit :: Transition -> CodePoint
transitionCodeUnit :: Transition -> CodePoint
transitionCodeUnit Transition
t = Int -> CodePoint
Char.chr (Int -> CodePoint) -> Int -> CodePoint
forall a b. (a -> b) -> a -> b
$ Transition -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.&. Transition
0x1fffff)
transitionState :: Transition -> State
transitionState :: Transition -> Int
transitionState Transition
t = Transition -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t Transition -> Int -> Transition
forall a. Bits a => a -> Int -> a
`shiftR` Int
32)
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard Transition
t = (Transition
t Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.&. Transition
forall a. Integral a => a
wildcard) Transition -> Transition -> Bool
forall a. Eq a => a -> a -> Bool
== Transition
forall a. Integral a => a
wildcard
newTransition :: CodePoint -> State -> Transition
newTransition :: CodePoint -> Int -> Transition
newTransition CodePoint
input Int
state =
let
input64 :: Transition
input64 = Int -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Transition) -> Int -> Transition
forall a b. (a -> b) -> a -> b
$ CodePoint -> Int
Char.ord CodePoint
input :: Word64
state64 :: Transition
state64 = Int -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
state :: Word64
in
(Transition
state64 Transition -> Int -> Transition
forall a. Bits a => a -> Int -> a
`shiftL` Int
32) Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.|. Transition
input64
newWildcardTransition :: State -> Transition
newWildcardTransition :: Int -> Transition
newWildcardTransition Int
state =
let
state64 :: Transition
state64 = Int -> Transition
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
state :: Word64
in
(Transition
state64 Transition -> Int -> Transition
forall a. Bits a => a -> Int -> a
`shiftL` Int
32) Transition -> Transition -> Transition
forall a. Bits a => a -> a -> a
.|. Transition
forall a. Integral a => a
wildcard
packTransitions :: [[Transition]] -> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions :: [[Transition]]
-> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions [[Transition]]
transitions =
let
packed :: TypedByteArray Transition
packed = [Transition] -> TypedByteArray Transition
forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList ([Transition] -> TypedByteArray Transition)
-> [Transition] -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ [[Transition]] -> [Transition]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Transition]]
transitions
offsets :: TypedByteArray Offset
offsets = [Offset] -> TypedByteArray Offset
forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList ([Offset] -> TypedByteArray Offset)
-> [Offset] -> TypedByteArray Offset
forall a b. (a -> b) -> a -> b
$ (Int -> Offset) -> [Int] -> [Offset]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Offset
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Int] -> [Offset]) -> [Int] -> [Offset]
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Int -> [Int] -> [Int]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
0 ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ ([Transition] -> Int) -> [[Transition]] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Transition] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
List.length [[Transition]]
transitions
in
(TypedByteArray Transition
packed, TypedByteArray Offset
offsets)
build :: [(Text, v)] -> AcMachine v
build :: [(Text, v)] -> AcMachine v
build [(Text, v)]
needlesWithValues =
let
(Int
numStates, TransitionMap
transitionMap, ValuesMap v
initialValueMap) = [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
forall v. [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap [(Text, v)]
needlesWithValues
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap v
valueMap = TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap v
initialValueMap
prependTransition :: [Transition] -> Int -> Int -> [Transition]
prependTransition [Transition]
ts Int
input Int
state = CodePoint -> Int -> Transition
newTransition (Int -> CodePoint
Char.chr Int
input) Int
state Transition -> [Transition] -> [Transition]
forall a. a -> [a] -> [a]
: [Transition]
ts
makeTransitions :: Int -> FallbackMap -> [Transition]
makeTransitions Int
fallback FallbackMap
ts = ([Transition] -> Int -> Int -> [Transition])
-> [Transition] -> FallbackMap -> [Transition]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [Transition] -> Int -> Int -> [Transition]
prependTransition [Int -> Transition
newWildcardTransition Int
fallback] FallbackMap
ts
transitionsList :: [[Transition]]
transitionsList = (Int -> FallbackMap -> [Transition])
-> [Int] -> [FallbackMap] -> [[Transition]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> FallbackMap -> [Transition]
makeTransitions (FallbackMap -> [Int]
forall a. IntMap a -> [a]
IntMap.elems FallbackMap
fallbackMap) (TransitionMap -> [FallbackMap]
forall a. IntMap a -> [a]
IntMap.elems TransitionMap
transitionMap)
(TypedByteArray Transition
transitions, TypedByteArray Offset
offsets) = [[Transition]]
-> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions [[Transition]]
transitionsList
rootTransitions :: TypedByteArray Transition
rootTransitions = FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable (FallbackMap -> TypedByteArray Transition)
-> FallbackMap -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitionMap TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
0
values :: Vector [v]
values = Int -> (Int -> [v]) -> Vector [v]
forall a. Int -> (Int -> a) -> Vector a
Vector.generate Int
numStates (ValuesMap v
valueMap ValuesMap v -> Int -> [v]
forall a. IntMap a -> Int -> a
IntMap.!)
in
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Offset
-> TypedByteArray Transition
-> AcMachine v
forall v.
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Offset
-> TypedByteArray Transition
-> AcMachine v
AcMachine Vector [v]
values TypedByteArray Transition
transitions TypedByteArray Offset
offsets TypedByteArray Transition
rootTransitions
debugBuildDot :: [Text] -> String
debugBuildDot :: [Text] -> String
debugBuildDot [Text]
needles =
let
(Int
_numStates, TransitionMap
transitionMap, ValuesMap Int
initialValueMap) =
[(Text, Int)] -> (Int, TransitionMap, ValuesMap Int)
forall v. [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap ([(Text, Int)] -> (Int, TransitionMap, ValuesMap Int))
-> [(Text, Int)] -> (Int, TransitionMap, ValuesMap Int)
forall a b. (a -> b) -> a -> b
$ [Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
needles ([Int
0..] :: [Int])
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap Int
valueMap = TransitionMap -> FallbackMap -> ValuesMap Int -> ValuesMap Int
forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap Int
initialValueMap
dotEdge :: String -> a -> a -> String
dotEdge String
extra a
state a
nextState =
String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
state String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" -> " String -> String -> String
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
nextState String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" [" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
extra String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"];"
dotFallbackEdge :: [String] -> State -> State -> [String]
dotFallbackEdge :: [String] -> Int -> Int -> [String]
dotFallbackEdge [String]
edges Int
state Int
nextState =
String -> Int -> Int -> String
forall a a. (Show a, Show a) => String -> a -> a -> String
dotEdge String
"style = dashed" Int
state Int
nextState String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
dotTransitionEdge :: State -> [String] -> Int -> State -> [String]
dotTransitionEdge :: Int -> [String] -> Int -> Int -> [String]
dotTransitionEdge Int
state [String]
edges Int
input Int
nextState =
String -> Int -> Int -> String
forall a a. (Show a, Show a) => String -> a -> a -> String
dotEdge (String
"label = \"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
showInput Int
input String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\"") Int
state Int
nextState String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
showInput :: Int -> String
showInput Int
input = [Int -> CodePoint
chr Int
input]
prependTransitionEdges :: [String] -> Int -> [String]
prependTransitionEdges [String]
edges Int
state =
([String] -> Int -> Int -> [String])
-> [String] -> FallbackMap -> [String]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (Int -> [String] -> Int -> Int -> [String]
dotTransitionEdge Int
state) [String]
edges (TransitionMap
transitionMap TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state)
dotMatchState :: [String] -> State -> [Int] -> [String]
dotMatchState :: [String] -> Int -> [Int] -> [String]
dotMatchState [String]
edges Int
_ [] = [String]
edges
dotMatchState [String]
edges Int
state [Int]
_ = (String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
state String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" [shape = doublecircle];") String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
edges
dot0 :: [String]
dot0 = ([String] -> Int -> [String])
-> [String] -> TransitionMap -> [String]
forall a. (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst [String] -> Int -> [String]
prependTransitionEdges [] TransitionMap
transitionMap
dot1 :: [String]
dot1 = ([String] -> Int -> Int -> [String])
-> [String] -> FallbackMap -> [String]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> Int -> Int -> [String]
dotFallbackEdge [String]
dot0 FallbackMap
fallbackMap
dot2 :: [String]
dot2 = ([String] -> Int -> [Int] -> [String])
-> [String] -> ValuesMap Int -> [String]
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> Int -> [Int] -> [String]
dotMatchState [String]
dot1 ValuesMap Int
valueMap
in
[String] -> String
unlines ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String
"digraph {", String
" rankdir = \"LR\";"] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String] -> [String]
forall a. [a] -> [a]
reverse [String]
dot2 [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String
"}"]
type TransitionMap = IntMap (IntMap State)
type FallbackMap = IntMap State
type ValuesMap v = IntMap [v]
buildTransitionMap :: forall v. [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap :: [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap =
let
insertNeedle :: (Int, TransitionMap, ValuesMap v) -> (Text, v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle :: (Int, TransitionMap, ValuesMap v)
-> (Text, v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle !(Int, TransitionMap, ValuesMap v)
acc (!Text
needle, !v
value) = Int
-> CodeUnitIndex
-> (Int, TransitionMap, ValuesMap v)
-> (Int, TransitionMap, ValuesMap v)
go Int
stateInitial CodeUnitIndex
0 (Int, TransitionMap, ValuesMap v)
acc
where
!needleLen :: CodeUnitIndex
needleLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
needle
go :: Int
-> CodeUnitIndex
-> (Int, TransitionMap, ValuesMap v)
-> (Int, TransitionMap, ValuesMap v)
go !Int
state !CodeUnitIndex
index (!Int
numStates, !TransitionMap
transitions, !ValuesMap v
values)
| CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
needleLen = (Int
numStates, TransitionMap
transitions, ([v] -> [v] -> [v]) -> Int -> [v] -> ValuesMap v -> ValuesMap v
forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
IntMap.insertWith [v] -> [v] -> [v]
forall a. [a] -> [a] -> [a]
(++) Int
state [v
value] ValuesMap v
values)
go !Int
state !CodeUnitIndex
index (!Int
numStates, !TransitionMap
transitions, !ValuesMap v
values) =
let
!transitionsFromState :: FallbackMap
transitionsFromState = TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state
(!CodeUnitIndex
codeUnits, !CodePoint
input) = Text -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
Utf8.unsafeIndexCodePoint Text
needle CodeUnitIndex
index
in
case Int -> FallbackMap -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (CodePoint -> Int
Char.ord CodePoint
input) FallbackMap
transitionsFromState of
Just !Int
nextState ->
Int
-> CodeUnitIndex
-> (Int, TransitionMap, ValuesMap v)
-> (Int, TransitionMap, ValuesMap v)
go Int
nextState (CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) (Int
numStates, TransitionMap
transitions, ValuesMap v
values)
Maybe Int
Nothing ->
let
!nextState :: Int
nextState = Int
numStates
!transitionsFromState' :: FallbackMap
transitionsFromState' = Int -> Int -> FallbackMap -> FallbackMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (CodePoint -> Int
Char.ord CodePoint
input) Int
nextState FallbackMap
transitionsFromState
!transitions' :: TransitionMap
transitions'
= Int -> FallbackMap -> TransitionMap -> TransitionMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
state FallbackMap
transitionsFromState'
(TransitionMap -> TransitionMap) -> TransitionMap -> TransitionMap
forall a b. (a -> b) -> a -> b
$ Int -> FallbackMap -> TransitionMap -> TransitionMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
nextState FallbackMap
forall a. IntMap a
IntMap.empty TransitionMap
transitions
in
Int
-> CodeUnitIndex
-> (Int, TransitionMap, ValuesMap v)
-> (Int, TransitionMap, ValuesMap v)
go Int
nextState (CodeUnitIndex
index CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) (Int
numStates Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, TransitionMap
transitions', ValuesMap v
values)
stateInitial :: Int
stateInitial = Int
0
initialTransitions :: IntMap (IntMap a)
initialTransitions = Int -> IntMap a -> IntMap (IntMap a)
forall a. Int -> a -> IntMap a
IntMap.singleton Int
stateInitial IntMap a
forall a. IntMap a
IntMap.empty
initialValues :: IntMap a
initialValues = IntMap a
forall a. IntMap a
IntMap.empty
in
((Int, TransitionMap, ValuesMap v)
-> (Text, v) -> (Int, TransitionMap, ValuesMap v))
-> (Int, TransitionMap, ValuesMap v)
-> [(Text, v)]
-> (Int, TransitionMap, ValuesMap v)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Int, TransitionMap, ValuesMap v)
-> (Text, v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle (Int
1, TransitionMap
forall a. IntMap (IntMap a)
initialTransitions, ValuesMap v
forall a. IntMap a
initialValues)
asciiCount :: Integral a => a
asciiCount :: a
asciiCount = a
128
{-# NOINLINE buildAsciiTransitionLookupTable #-}
buildAsciiTransitionLookupTable :: IntMap State -> TypedByteArray Transition
buildAsciiTransitionLookupTable :: FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable FallbackMap
transitions = Int -> (Int -> Transition) -> TypedByteArray Transition
forall a. Prim a => Int -> (Int -> a) -> TypedByteArray a
TBA.generate Int
forall a. Integral a => a
asciiCount ((Int -> Transition) -> TypedByteArray Transition)
-> (Int -> Transition) -> TypedByteArray Transition
forall a b. (a -> b) -> a -> b
$ \Int
i ->
case Int -> FallbackMap -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
i FallbackMap
transitions of
Just Int
state -> CodePoint -> Int -> Transition
newTransition (Int -> CodePoint
Char.chr Int
i) Int
state
Maybe Int
Nothing -> Int -> Transition
newWildcardTransition Int
0
foldBreadthFirst :: (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst :: (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst a -> Int -> a
f a
seed TransitionMap
transitions = [Int] -> [Int] -> a -> a
go [Int
0] [] a
seed
where
go :: [Int] -> [Int] -> a -> a
go [] [] !a
acc = a
acc
go [] [Int]
revBacklog !a
acc = [Int] -> [Int] -> a -> a
go ([Int] -> [Int]
forall a. [a] -> [a]
reverse [Int]
revBacklog) [] a
acc
go (Int
state : [Int]
backlog) [Int]
revBacklog !a
acc =
let
extra :: [Int]
extra = FallbackMap -> [Int]
forall a. IntMap a -> [a]
IntMap.elems (FallbackMap -> [Int]) -> FallbackMap -> [Int]
forall a b. (a -> b) -> a -> b
$ TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state
in
[Int] -> [Int] -> a -> a
go [Int]
backlog ([Int]
extra [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [Int]
revBacklog) (a -> Int -> a
f a
acc Int
state)
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitions =
let
getFallback :: FallbackMap -> State -> Int -> State
getFallback :: FallbackMap -> Int -> Int -> Int
getFallback FallbackMap
_ Int
0 Int
_ = Int
0
getFallback FallbackMap
fallbacks !Int
state !Int
input =
let
fallback :: Int
fallback = FallbackMap
fallbacks FallbackMap -> Int -> Int
forall a. IntMap a -> Int -> a
IntMap.! Int
state
transitionsFromFallback :: FallbackMap
transitionsFromFallback = TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
fallback
in
case Int -> FallbackMap -> Maybe Int
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
input FallbackMap
transitionsFromFallback of
Just Int
st -> Int
st
Maybe Int
Nothing -> FallbackMap -> Int -> Int -> Int
getFallback FallbackMap
fallbacks Int
fallback Int
input
insertFallback :: State -> FallbackMap -> Int -> State -> FallbackMap
insertFallback :: Int -> FallbackMap -> Int -> Int -> FallbackMap
insertFallback !Int
state FallbackMap
fallbacks !Int
input !Int
nextState =
Int -> Int -> FallbackMap -> FallbackMap
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
nextState (FallbackMap -> Int -> Int -> Int
getFallback FallbackMap
fallbacks Int
state Int
input) FallbackMap
fallbacks
insertFallbacks :: FallbackMap -> State -> FallbackMap
insertFallbacks :: FallbackMap -> Int -> FallbackMap
insertFallbacks FallbackMap
fallbacks !Int
state =
(FallbackMap -> Int -> Int -> FallbackMap)
-> FallbackMap -> FallbackMap -> FallbackMap
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (Int -> FallbackMap -> Int -> Int -> FallbackMap
insertFallback Int
state) FallbackMap
fallbacks (TransitionMap
transitions TransitionMap -> Int -> FallbackMap
forall a. IntMap a -> Int -> a
IntMap.! Int
state)
in
(FallbackMap -> Int -> FallbackMap)
-> FallbackMap -> TransitionMap -> FallbackMap
forall a. (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst FallbackMap -> Int -> FallbackMap
insertFallbacks (Int -> Int -> FallbackMap
forall a. Int -> a -> IntMap a
IntMap.singleton Int
0 Int
0) TransitionMap
transitions
buildValueMap :: forall v. TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap :: TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitions FallbackMap
fallbacks ValuesMap v
valuesInitial =
let
insertValues :: ValuesMap v -> State -> ValuesMap v
insertValues :: ValuesMap v -> Int -> ValuesMap v
insertValues ValuesMap v
values !Int
state =
let
fallbackValues :: [v]
fallbackValues = ValuesMap v
values ValuesMap v -> Int -> [v]
forall a. IntMap a -> Int -> a
IntMap.! (FallbackMap
fallbacks FallbackMap -> Int -> Int
forall a. IntMap a -> Int -> a
IntMap.! Int
state)
valuesForState :: [v]
valuesForState = case Int -> ValuesMap v -> Maybe [v]
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
state ValuesMap v
valuesInitial of
Just [v]
vs -> [v]
vs [v] -> [v] -> [v]
forall a. [a] -> [a] -> [a]
++ [v]
fallbackValues
Maybe [v]
Nothing -> [v]
fallbackValues
in
Int -> [v] -> ValuesMap v -> ValuesMap v
forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert Int
state [v]
valuesForState ValuesMap v
values
in
(ValuesMap v -> Int -> ValuesMap v)
-> ValuesMap v -> TransitionMap -> ValuesMap v
forall a. (a -> Int -> a) -> a -> TransitionMap -> a
foldBreadthFirst ValuesMap v -> Int -> ValuesMap v
insertValues (Int -> [v] -> ValuesMap v
forall a. Int -> a -> IntMap a
IntMap.singleton Int
0 []) TransitionMap
transitions
{-# INLINE at #-}
at :: forall a. Vector.Vector a -> Int -> a
at :: Vector a -> Int -> a
at = Vector a -> Int -> a
forall a. Vector a -> Int -> a
Vector.unsafeIndex
{-# INLINE uAt #-}
uAt :: Prim a => TypedByteArray a -> Int -> a
uAt :: TypedByteArray a -> Int -> a
uAt = TypedByteArray a -> Int -> a
forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex
data Next a = Done !a | Step !a
{-# INLINE runWithCase #-}
runWithCase :: forall a v. CaseSensitivity -> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase :: CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase !CaseSensitivity
caseSensitivity !a
seed !a -> Match v -> Next a
f !AcMachine v
machine !Text
text =
CodeUnitIndex -> a -> Int -> a
consumeInput CodeUnitIndex
initialOffset a
seed Int
initialState
where
initialState :: Int
initialState = Int
0
Text !Array
u8data !Int
off !Int
len = Text
text
AcMachine !Vector [v]
values !TypedByteArray Transition
transitions !TypedByteArray Offset
offsets !TypedByteArray Transition
rootAsciiTransitions = AcMachine v
machine
!initialOffset :: CodeUnitIndex
initialOffset = Int -> CodeUnitIndex
CodeUnitIndex Int
off
!limit :: CodeUnitIndex
limit = Int -> CodeUnitIndex
CodeUnitIndex (Int -> CodeUnitIndex) -> Int -> CodeUnitIndex
forall a b. (a -> b) -> a -> b
$ Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
{-# NOINLINE consumeInput #-}
consumeInput :: CodeUnitIndex -> a -> State -> a
consumeInput :: CodeUnitIndex -> a -> Int -> a
consumeInput !CodeUnitIndex
offset !a
acc !Int
_state
| CodeUnitIndex
offset CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
limit = a
acc
consumeInput !CodeUnitIndex
offset !a
acc !Int
state =
CodeUnitIndex -> a -> CodePoint -> Int -> a
followCodePoint (CodeUnitIndex
offset CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) a
acc CodePoint
possiblyLoweredCp Int
state
where
(!CodeUnitIndex
codeUnits, !CodePoint
cp) = Array -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
Utf8.unsafeIndexCodePoint' Array
u8data CodeUnitIndex
offset
!possiblyLoweredCp :: CodePoint
possiblyLoweredCp = case CaseSensitivity
caseSensitivity of
CaseSensitivity
CaseSensitive -> CodePoint
cp
CaseSensitivity
IgnoreCase -> CodePoint -> CodePoint
Utf8.lowerCodePoint CodePoint
cp
{-# INLINE followCodePoint #-}
followCodePoint :: CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint :: CodeUnitIndex -> a -> CodePoint -> Int -> a
followCodePoint !CodeUnitIndex
offset !a
acc !CodePoint
cp !Int
state
| Int
state Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
initialState Bool -> Bool -> Bool
&& CodePoint -> Int
Char.ord CodePoint
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
forall a. Integral a => a
asciiCount = CodeUnitIndex -> a -> CodePoint -> a
lookupRootAsciiTransition CodeUnitIndex
offset a
acc CodePoint
cp
| Bool
otherwise = CodeUnitIndex -> a -> CodePoint -> Int -> Offset -> a
lookupTransition CodeUnitIndex
offset a
acc CodePoint
cp Int
state (Offset -> a) -> Offset -> a
forall a b. (a -> b) -> a -> b
$ TypedByteArray Offset
offsets TypedByteArray Offset -> Int -> Offset
forall a. Prim a => TypedByteArray a -> Int -> a
`uAt` Int
state
{-# NOINLINE lookupTransition #-}
lookupTransition :: CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition :: CodeUnitIndex -> a -> CodePoint -> Int -> Offset -> a
lookupTransition !CodeUnitIndex
offset !a
acc !CodePoint
cp !Int
state !Offset
i
| Transition -> Bool
transitionIsWildcard Transition
t =
if Int
state Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
initialState
then CodeUnitIndex -> a -> Int -> a
consumeInput CodeUnitIndex
offset a
acc Int
state
else CodeUnitIndex -> a -> CodePoint -> Int -> a
followCodePoint CodeUnitIndex
offset a
acc CodePoint
cp (Transition -> Int
transitionState Transition
t)
| Transition -> CodePoint
transitionCodeUnit Transition
t CodePoint -> CodePoint -> Bool
forall a. Eq a => a -> a -> Bool
== CodePoint
cp =
CodeUnitIndex -> a -> Int -> a
collectMatches CodeUnitIndex
offset a
acc (Transition -> Int
transitionState Transition
t)
| Bool
otherwise =
CodeUnitIndex -> a -> CodePoint -> Int -> Offset -> a
lookupTransition CodeUnitIndex
offset a
acc CodePoint
cp Int
state (Offset -> a) -> Offset -> a
forall a b. (a -> b) -> a -> b
$ Offset
i Offset -> Offset -> Offset
forall a. Num a => a -> a -> a
+ Offset
1
where
!t :: Transition
t = TypedByteArray Transition
transitions TypedByteArray Transition -> Int -> Transition
forall a. Prim a => TypedByteArray a -> Int -> a
`uAt` Offset -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Offset
i
{-# INLINE lookupRootAsciiTransition #-}
lookupRootAsciiTransition :: CodeUnitIndex -> a -> CodePoint -> a
lookupRootAsciiTransition !CodeUnitIndex
offset !a
acc !CodePoint
cp
| Transition -> Bool
transitionIsWildcard Transition
t = CodeUnitIndex -> a -> Int -> a
consumeInput CodeUnitIndex
offset a
acc Int
initialState
| Bool
otherwise = CodeUnitIndex -> a -> Int -> a
collectMatches CodeUnitIndex
offset a
acc (Int -> a) -> Int -> a
forall a b. (a -> b) -> a -> b
$ Transition -> Int
transitionState Transition
t
where !t :: Transition
t = TypedByteArray Transition
rootAsciiTransitions TypedByteArray Transition -> Int -> Transition
forall a. Prim a => TypedByteArray a -> Int -> a
`uAt` CodePoint -> Int
Char.ord CodePoint
cp
{-# NOINLINE collectMatches #-}
collectMatches :: CodeUnitIndex -> a -> Int -> a
collectMatches !CodeUnitIndex
offset !a
acc !Int
state =
let
matchedValues :: [v]
matchedValues = Vector [v]
values Vector [v] -> Int -> [v]
forall a. Vector a -> Int -> a
`at` Int
state
handleMatch :: a -> [v] -> a
handleMatch !a
acc' [v]
vs = case [v]
vs of
[] -> CodeUnitIndex -> a -> Int -> a
consumeInput CodeUnitIndex
offset a
acc' Int
state
v
v:[v]
more -> case a -> Match v -> Next a
f a
acc' (CodeUnitIndex -> v -> Match v
forall v. CodeUnitIndex -> v -> Match v
Match (CodeUnitIndex
offset CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
initialOffset) v
v) of
Step a
newAcc -> a -> [v] -> a
handleMatch a
newAcc [v]
more
Done a
finalAcc -> a
finalAcc
in
a -> [v] -> a
handleMatch a
acc [v]
matchedValues
{-# INLINE runText #-}
runText :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText :: a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText = CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
CaseSensitive
{-# INLINE runLower #-}
runLower :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower :: a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower = CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
IgnoreCase