{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Text.AhoCorasick.Automaton
( AcMachine (..)
, CaseSensitivity (..)
, CodeUnitIndex (..)
, Match (..)
, Next (..)
, build
, debugBuildDot
, runLower
, runText
, runWithCase
, needleCasings
) where
import Control.DeepSeq (NFData)
import Data.Bits (Bits (shiftL, shiftR, (.&.), (.|.)))
import Data.Char (chr)
import Data.Foldable (foldl')
import Data.IntMap.Strict (IntMap)
import Data.Word (Word32, Word64)
import GHC.Generics (Generic)
import qualified Data.Char as Char
import qualified Data.IntMap.Strict as IntMap
import qualified Data.List as List
import qualified Data.Vector as Vector
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (CodePoint, CodeUnitIndex (CodeUnitIndex), Text (..))
import Data.TypedByteArray (Prim, TypedByteArray)
import qualified Data.Text as Text
import qualified Data.Text.Utf8 as Utf8
import qualified Data.TypedByteArray as TBA
type State = Int
type Transition = Word64
type Offset = Word32
data Match v = Match
{ forall v. Match v -> CodeUnitIndex
matchPos :: {-# UNPACK #-} !CodeUnitIndex
, forall v. Match v -> v
matchValue :: v
}
data AcMachine v = AcMachine
{ forall v. AcMachine v -> Vector [v]
machineValues :: !(Vector.Vector [v])
, forall v. AcMachine v -> TypedByteArray Transition
machineTransitions :: !(TypedByteArray Transition)
, forall v. AcMachine v -> TypedByteArray Offset
machineOffsets :: !(TypedByteArray Offset)
, forall v. AcMachine v -> TypedByteArray Transition
machineRootAsciiTransitions :: !(TypedByteArray Transition)
} deriving (forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall v x. Rep (AcMachine v) x -> AcMachine v
forall v x. AcMachine v -> Rep (AcMachine v) x
$cto :: forall v x. Rep (AcMachine v) x -> AcMachine v
$cfrom :: forall v x. AcMachine v -> Rep (AcMachine v) x
Generic, forall a b. a -> AcMachine b -> AcMachine a
forall a b. (a -> b) -> AcMachine a -> AcMachine b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> AcMachine b -> AcMachine a
$c<$ :: forall a b. a -> AcMachine b -> AcMachine a
fmap :: forall a b. (a -> b) -> AcMachine a -> AcMachine b
$cfmap :: forall a b. (a -> b) -> AcMachine a -> AcMachine b
Functor)
instance NFData v => NFData (AcMachine v)
wildcard :: Integral a => a
wildcard :: forall a. Integral a => a
wildcard = a
0x200000
transitionCodeUnit :: Transition -> CodePoint
transitionCodeUnit :: Transition -> CodePoint
transitionCodeUnit Transition
t = State -> CodePoint
Char.chr forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t forall a. Bits a => a -> a -> a
.&. Transition
0x1fffff)
transitionState :: Transition -> State
transitionState :: Transition -> State
transitionState Transition
t = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Transition
t forall a. Bits a => a -> State -> a
`shiftR` State
32)
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard :: Transition -> Bool
transitionIsWildcard Transition
t = (Transition
t forall a. Bits a => a -> a -> a
.&. forall a. Integral a => a
wildcard) forall a. Eq a => a -> a -> Bool
== forall a. Integral a => a
wildcard
newTransition :: CodePoint -> State -> Transition
newTransition :: CodePoint -> State -> Transition
newTransition CodePoint
input State
state =
let
input64 :: Transition
input64 = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ CodePoint -> State
Char.ord CodePoint
input :: Word64
state64 :: Transition
state64 = forall a b. (Integral a, Num b) => a -> b
fromIntegral State
state :: Word64
in
(Transition
state64 forall a. Bits a => a -> State -> a
`shiftL` State
32) forall a. Bits a => a -> a -> a
.|. Transition
input64
newWildcardTransition :: State -> Transition
newWildcardTransition :: State -> Transition
newWildcardTransition State
state =
let
state64 :: Transition
state64 = forall a b. (Integral a, Num b) => a -> b
fromIntegral State
state :: Word64
in
(Transition
state64 forall a. Bits a => a -> State -> a
`shiftL` State
32) forall a. Bits a => a -> a -> a
.|. forall a. Integral a => a
wildcard
packTransitions :: [[Transition]] -> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions :: [[Transition]]
-> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions [[Transition]]
transitions =
let
packed :: TypedByteArray Transition
packed = forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Transition]]
transitions
offsets :: TypedByteArray Offset
offsets = forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl forall a. Num a => a -> a -> a
(+) State
0 forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall (t :: * -> *) a. Foldable t => t a -> State
List.length [[Transition]]
transitions
in
(TypedByteArray Transition
packed, TypedByteArray Offset
offsets)
build :: [(Text, v)] -> AcMachine v
build :: forall v. [(Text, v)] -> AcMachine v
build [(Text, v)]
needlesWithValues =
let
(State
numStates, TransitionMap
transitionMap, ValuesMap v
initialValueMap) = forall v. [(Text, v)] -> (State, TransitionMap, ValuesMap v)
buildTransitionMap [(Text, v)]
needlesWithValues
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap v
valueMap = forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap v
initialValueMap
prependTransition :: [Transition] -> State -> State -> [Transition]
prependTransition [Transition]
ts State
input State
state = CodePoint -> State -> Transition
newTransition (State -> CodePoint
Char.chr State
input) State
state forall a. a -> [a] -> [a]
: [Transition]
ts
makeTransitions :: State -> FallbackMap -> [Transition]
makeTransitions State
fallback FallbackMap
ts = forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [Transition] -> State -> State -> [Transition]
prependTransition [State -> Transition
newWildcardTransition State
fallback] FallbackMap
ts
transitionsList :: [[Transition]]
transitionsList = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith State -> FallbackMap -> [Transition]
makeTransitions (forall a. IntMap a -> [a]
IntMap.elems FallbackMap
fallbackMap) (forall a. IntMap a -> [a]
IntMap.elems TransitionMap
transitionMap)
(TypedByteArray Transition
transitions, TypedByteArray Offset
offsets) = [[Transition]]
-> (TypedByteArray Transition, TypedByteArray Offset)
packTransitions [[Transition]]
transitionsList
rootTransitions :: TypedByteArray Transition
rootTransitions = FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable forall a b. (a -> b) -> a -> b
$ TransitionMap
transitionMap forall a. IntMap a -> State -> a
IntMap.! State
0
values :: Vector [v]
values = forall a. State -> (State -> a) -> Vector a
Vector.generate State
numStates (ValuesMap v
valueMap forall a. IntMap a -> State -> a
IntMap.!)
in
forall v.
Vector [v]
-> TypedByteArray Transition
-> TypedByteArray Offset
-> TypedByteArray Transition
-> AcMachine v
AcMachine Vector [v]
values TypedByteArray Transition
transitions TypedByteArray Offset
offsets TypedByteArray Transition
rootTransitions
debugBuildDot :: [Text] -> String
debugBuildDot :: [Text] -> String
debugBuildDot [Text]
needles =
let
(State
_numStates, TransitionMap
transitionMap, ValuesMap State
initialValueMap) =
forall v. [(Text, v)] -> (State, TransitionMap, ValuesMap v)
buildTransitionMap forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
needles ([State
0..] :: [Int])
fallbackMap :: FallbackMap
fallbackMap = TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitionMap
valueMap :: ValuesMap State
valueMap = forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitionMap FallbackMap
fallbackMap ValuesMap State
initialValueMap
dotEdge :: String -> a -> a -> String
dotEdge String
extra a
state a
nextState =
String
" " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show a
state forall a. [a] -> [a] -> [a]
++ String
" -> " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show a
nextState forall a. [a] -> [a] -> [a]
++ String
" [" forall a. [a] -> [a] -> [a]
++ String
extra forall a. [a] -> [a] -> [a]
++ String
"];"
dotFallbackEdge :: [String] -> State -> State -> [String]
dotFallbackEdge :: [String] -> State -> State -> [String]
dotFallbackEdge [String]
edges State
state State
nextState =
forall {a} {a}. (Show a, Show a) => String -> a -> a -> String
dotEdge String
"style = dashed" State
state State
nextState forall a. a -> [a] -> [a]
: [String]
edges
dotTransitionEdge :: State -> [String] -> Int -> State -> [String]
dotTransitionEdge :: State -> [String] -> State -> State -> [String]
dotTransitionEdge State
state [String]
edges State
input State
nextState =
forall {a} {a}. (Show a, Show a) => String -> a -> a -> String
dotEdge (String
"label = \"" forall a. [a] -> [a] -> [a]
++ State -> String
showInput State
input forall a. [a] -> [a] -> [a]
++ String
"\"") State
state State
nextState forall a. a -> [a] -> [a]
: [String]
edges
showInput :: State -> String
showInput State
input = [State -> CodePoint
chr State
input]
prependTransitionEdges :: [String] -> State -> [String]
prependTransitionEdges [String]
edges State
state =
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (State -> [String] -> State -> State -> [String]
dotTransitionEdge State
state) [String]
edges (TransitionMap
transitionMap forall a. IntMap a -> State -> a
IntMap.! State
state)
dotMatchState :: [String] -> State -> [Int] -> [String]
dotMatchState :: [String] -> State -> [State] -> [String]
dotMatchState [String]
edges State
_ [] = [String]
edges
dotMatchState [String]
edges State
state [State]
_ = (String
" " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show State
state forall a. [a] -> [a] -> [a]
++ String
" [shape = doublecircle];") forall a. a -> [a] -> [a]
: [String]
edges
dot0 :: [String]
dot0 = forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst [String] -> State -> [String]
prependTransitionEdges [] TransitionMap
transitionMap
dot1 :: [String]
dot1 = forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> State -> State -> [String]
dotFallbackEdge [String]
dot0 FallbackMap
fallbackMap
dot2 :: [String]
dot2 = forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' [String] -> State -> [State] -> [String]
dotMatchState [String]
dot1 ValuesMap State
valueMap
in
[String] -> String
unlines forall a b. (a -> b) -> a -> b
$ [String
"digraph {", String
" rankdir = \"LR\";"] forall a. [a] -> [a] -> [a]
++ forall a. [a] -> [a]
reverse [String]
dot2 forall a. [a] -> [a] -> [a]
++ [String
"}"]
type TransitionMap = IntMap (IntMap State)
type FallbackMap = IntMap State
type ValuesMap v = IntMap [v]
buildTransitionMap :: forall v. [(Text, v)] -> (Int, TransitionMap, ValuesMap v)
buildTransitionMap :: forall v. [(Text, v)] -> (State, TransitionMap, ValuesMap v)
buildTransitionMap =
let
insertNeedle :: (Int, TransitionMap, ValuesMap v) -> (Text, v) -> (Int, TransitionMap, ValuesMap v)
insertNeedle :: (State, TransitionMap, ValuesMap v)
-> (Text, v) -> (State, TransitionMap, ValuesMap v)
insertNeedle !(State, TransitionMap, ValuesMap v)
acc (!Text
needle, !v
value) = State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go State
stateInitial CodeUnitIndex
0 (State, TransitionMap, ValuesMap v)
acc
where
!needleLen :: CodeUnitIndex
needleLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
needle
go :: State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go !State
state !CodeUnitIndex
index (!State
numStates, !TransitionMap
transitions, !ValuesMap v
values)
| CodeUnitIndex
index forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
needleLen = (State
numStates, TransitionMap
transitions, forall a. (a -> a -> a) -> State -> a -> IntMap a -> IntMap a
IntMap.insertWith forall a. [a] -> [a] -> [a]
(++) State
state [v
value] ValuesMap v
values)
go !State
state !CodeUnitIndex
index (!State
numStates, !TransitionMap
transitions, !ValuesMap v
values) =
let
!transitionsFromState :: FallbackMap
transitionsFromState = TransitionMap
transitions forall a. IntMap a -> State -> a
IntMap.! State
state
(!CodeUnitIndex
codeUnits, !CodePoint
input) = Text -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
Utf8.unsafeIndexCodePoint Text
needle CodeUnitIndex
index
in
case forall a. State -> IntMap a -> Maybe a
IntMap.lookup (CodePoint -> State
Char.ord CodePoint
input) FallbackMap
transitionsFromState of
Just !State
nextState ->
State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go State
nextState (CodeUnitIndex
index forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) (State
numStates, TransitionMap
transitions, ValuesMap v
values)
Maybe State
Nothing ->
let
!nextState :: State
nextState = State
numStates
!transitionsFromState' :: FallbackMap
transitionsFromState' = forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert (CodePoint -> State
Char.ord CodePoint
input) State
nextState FallbackMap
transitionsFromState
!transitions' :: TransitionMap
transitions'
= forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
state FallbackMap
transitionsFromState'
forall a b. (a -> b) -> a -> b
$ forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
nextState forall a. IntMap a
IntMap.empty TransitionMap
transitions
in
State
-> CodeUnitIndex
-> (State, TransitionMap, ValuesMap v)
-> (State, TransitionMap, ValuesMap v)
go State
nextState (CodeUnitIndex
index forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) (State
numStates forall a. Num a => a -> a -> a
+ State
1, TransitionMap
transitions', ValuesMap v
values)
stateInitial :: State
stateInitial = State
0
initialTransitions :: IntMap (IntMap a)
initialTransitions = forall a. State -> a -> IntMap a
IntMap.singleton State
stateInitial forall a. IntMap a
IntMap.empty
initialValues :: IntMap a
initialValues = forall a. IntMap a
IntMap.empty
in
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (State, TransitionMap, ValuesMap v)
-> (Text, v) -> (State, TransitionMap, ValuesMap v)
insertNeedle (State
1, forall {a}. IntMap (IntMap a)
initialTransitions, forall a. IntMap a
initialValues)
asciiCount :: Integral a => a
asciiCount :: forall a. Integral a => a
asciiCount = a
128
{-# NOINLINE buildAsciiTransitionLookupTable #-}
buildAsciiTransitionLookupTable :: IntMap State -> TypedByteArray Transition
buildAsciiTransitionLookupTable :: FallbackMap -> TypedByteArray Transition
buildAsciiTransitionLookupTable FallbackMap
transitions = forall a. Prim a => State -> (State -> a) -> TypedByteArray a
TBA.generate forall a. Integral a => a
asciiCount forall a b. (a -> b) -> a -> b
$ \State
i ->
case forall a. State -> IntMap a -> Maybe a
IntMap.lookup State
i FallbackMap
transitions of
Just State
state -> CodePoint -> State -> Transition
newTransition (State -> CodePoint
Char.chr State
i) State
state
Maybe State
Nothing -> State -> Transition
newWildcardTransition State
0
foldBreadthFirst :: (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst :: forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst a -> State -> a
f a
seed TransitionMap
transitions = [State] -> [State] -> a -> a
go [State
0] [] a
seed
where
go :: [State] -> [State] -> a -> a
go [] [] !a
acc = a
acc
go [] [State]
revBacklog !a
acc = [State] -> [State] -> a -> a
go (forall a. [a] -> [a]
reverse [State]
revBacklog) [] a
acc
go (State
state : [State]
backlog) [State]
revBacklog !a
acc =
let
extra :: [State]
extra = forall a. IntMap a -> [a]
IntMap.elems forall a b. (a -> b) -> a -> b
$ TransitionMap
transitions forall a. IntMap a -> State -> a
IntMap.! State
state
in
[State] -> [State] -> a -> a
go [State]
backlog ([State]
extra forall a. [a] -> [a] -> [a]
++ [State]
revBacklog) (a -> State -> a
f a
acc State
state)
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap :: TransitionMap -> FallbackMap
buildFallbackMap TransitionMap
transitions =
let
getFallback :: FallbackMap -> State -> Int -> State
getFallback :: FallbackMap -> State -> State -> State
getFallback FallbackMap
_ State
0 State
_ = State
0
getFallback FallbackMap
fallbacks !State
state !State
input =
let
fallback :: State
fallback = FallbackMap
fallbacks forall a. IntMap a -> State -> a
IntMap.! State
state
transitionsFromFallback :: FallbackMap
transitionsFromFallback = TransitionMap
transitions forall a. IntMap a -> State -> a
IntMap.! State
fallback
in
case forall a. State -> IntMap a -> Maybe a
IntMap.lookup State
input FallbackMap
transitionsFromFallback of
Just State
st -> State
st
Maybe State
Nothing -> FallbackMap -> State -> State -> State
getFallback FallbackMap
fallbacks State
fallback State
input
insertFallback :: State -> FallbackMap -> Int -> State -> FallbackMap
insertFallback :: State -> FallbackMap -> State -> State -> FallbackMap
insertFallback !State
state FallbackMap
fallbacks !State
input !State
nextState =
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
nextState (FallbackMap -> State -> State -> State
getFallback FallbackMap
fallbacks State
state State
input) FallbackMap
fallbacks
insertFallbacks :: FallbackMap -> State -> FallbackMap
insertFallbacks :: FallbackMap -> State -> FallbackMap
insertFallbacks FallbackMap
fallbacks !State
state =
forall a b. (a -> State -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' (State -> FallbackMap -> State -> State -> FallbackMap
insertFallback State
state) FallbackMap
fallbacks (TransitionMap
transitions forall a. IntMap a -> State -> a
IntMap.! State
state)
in
forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst FallbackMap -> State -> FallbackMap
insertFallbacks (forall a. State -> a -> IntMap a
IntMap.singleton State
0 State
0) TransitionMap
transitions
buildValueMap :: forall v. TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap :: forall v.
TransitionMap -> FallbackMap -> ValuesMap v -> ValuesMap v
buildValueMap TransitionMap
transitions FallbackMap
fallbacks ValuesMap v
valuesInitial =
let
insertValues :: ValuesMap v -> State -> ValuesMap v
insertValues :: ValuesMap v -> State -> ValuesMap v
insertValues ValuesMap v
values !State
state =
let
fallbackValues :: [v]
fallbackValues = ValuesMap v
values forall a. IntMap a -> State -> a
IntMap.! (FallbackMap
fallbacks forall a. IntMap a -> State -> a
IntMap.! State
state)
valuesForState :: [v]
valuesForState = case forall a. State -> IntMap a -> Maybe a
IntMap.lookup State
state ValuesMap v
valuesInitial of
Just [v]
vs -> [v]
vs forall a. [a] -> [a] -> [a]
++ [v]
fallbackValues
Maybe [v]
Nothing -> [v]
fallbackValues
in
forall a. State -> a -> IntMap a -> IntMap a
IntMap.insert State
state [v]
valuesForState ValuesMap v
values
in
forall a. (a -> State -> a) -> a -> TransitionMap -> a
foldBreadthFirst ValuesMap v -> State -> ValuesMap v
insertValues (forall a. State -> a -> IntMap a
IntMap.singleton State
0 []) TransitionMap
transitions
{-# INLINE at #-}
at :: forall a. Vector.Vector a -> Int -> a
at :: forall a. Vector a -> State -> a
at = forall a. Vector a -> State -> a
Vector.unsafeIndex
{-# INLINE uAt #-}
uAt :: Prim a => TypedByteArray a -> Int -> a
uAt :: forall a. Prim a => TypedByteArray a -> State -> a
uAt = forall a. Prim a => TypedByteArray a -> State -> a
TBA.unsafeIndex
data Next a = Done !a | Step !a
{-# INLINE runWithCase #-}
runWithCase :: forall a v. CaseSensitivity -> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase :: forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase !CaseSensitivity
caseSensitivity !a
seed !a -> Match v -> Next a
f !AcMachine v
machine !Text
text =
CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
initialOffset a
seed State
initialState
where
initialState :: State
initialState = State
0
Text !Array
u8data !State
off !State
len = Text
text
AcMachine !Vector [v]
values !TypedByteArray Transition
transitions !TypedByteArray Offset
offsets !TypedByteArray Transition
rootAsciiTransitions = AcMachine v
machine
!initialOffset :: CodeUnitIndex
initialOffset = State -> CodeUnitIndex
CodeUnitIndex State
off
!limit :: CodeUnitIndex
limit = State -> CodeUnitIndex
CodeUnitIndex forall a b. (a -> b) -> a -> b
$ State
off forall a. Num a => a -> a -> a
+ State
len
{-# NOINLINE consumeInput #-}
consumeInput :: CodeUnitIndex -> a -> State -> a
consumeInput :: CodeUnitIndex -> a -> State -> a
consumeInput !CodeUnitIndex
offset !a
acc !State
_state
| CodeUnitIndex
offset forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
limit = a
acc
consumeInput !CodeUnitIndex
offset !a
acc !State
state =
CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint (CodeUnitIndex
offset forall a. Num a => a -> a -> a
+ CodeUnitIndex
codeUnits) a
acc CodePoint
possiblyLoweredCp State
state
where
(!CodeUnitIndex
codeUnits, !CodePoint
cp) = Array -> CodeUnitIndex -> (CodeUnitIndex, CodePoint)
Utf8.unsafeIndexCodePoint' Array
u8data CodeUnitIndex
offset
!possiblyLoweredCp :: CodePoint
possiblyLoweredCp = case CaseSensitivity
caseSensitivity of
CaseSensitivity
CaseSensitive -> CodePoint
cp
CaseSensitivity
IgnoreCase -> CodePoint -> CodePoint
Utf8.lowerCodePoint CodePoint
cp
{-# INLINE followCodePoint #-}
followCodePoint :: CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint :: CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint !CodeUnitIndex
offset !a
acc !CodePoint
cp !State
state
| State
state forall a. Eq a => a -> a -> Bool
== State
initialState Bool -> Bool -> Bool
&& CodePoint -> State
Char.ord CodePoint
cp forall a. Ord a => a -> a -> Bool
< forall a. Integral a => a
asciiCount = CodeUnitIndex -> a -> CodePoint -> a
lookupRootAsciiTransition CodeUnitIndex
offset a
acc CodePoint
cp
| Bool
otherwise = CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition CodeUnitIndex
offset a
acc CodePoint
cp State
state forall a b. (a -> b) -> a -> b
$ TypedByteArray Offset
offsets forall a. Prim a => TypedByteArray a -> State -> a
`uAt` State
state
{-# NOINLINE lookupTransition #-}
lookupTransition :: CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition :: CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition !CodeUnitIndex
offset !a
acc !CodePoint
cp !State
state !Offset
i
| Transition -> Bool
transitionIsWildcard Transition
t =
if State
state forall a. Eq a => a -> a -> Bool
== State
initialState
then CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
offset a
acc State
state
else CodeUnitIndex -> a -> CodePoint -> State -> a
followCodePoint CodeUnitIndex
offset a
acc CodePoint
cp (Transition -> State
transitionState Transition
t)
| Transition -> CodePoint
transitionCodeUnit Transition
t forall a. Eq a => a -> a -> Bool
== CodePoint
cp =
CodeUnitIndex -> a -> State -> a
collectMatches CodeUnitIndex
offset a
acc (Transition -> State
transitionState Transition
t)
| Bool
otherwise =
CodeUnitIndex -> a -> CodePoint -> State -> Offset -> a
lookupTransition CodeUnitIndex
offset a
acc CodePoint
cp State
state forall a b. (a -> b) -> a -> b
$ Offset
i forall a. Num a => a -> a -> a
+ Offset
1
where
!t :: Transition
t = TypedByteArray Transition
transitions forall a. Prim a => TypedByteArray a -> State -> a
`uAt` forall a b. (Integral a, Num b) => a -> b
fromIntegral Offset
i
{-# INLINE lookupRootAsciiTransition #-}
lookupRootAsciiTransition :: CodeUnitIndex -> a -> CodePoint -> a
lookupRootAsciiTransition !CodeUnitIndex
offset !a
acc !CodePoint
cp
| Transition -> Bool
transitionIsWildcard Transition
t = CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
offset a
acc State
initialState
| Bool
otherwise = CodeUnitIndex -> a -> State -> a
collectMatches CodeUnitIndex
offset a
acc forall a b. (a -> b) -> a -> b
$ Transition -> State
transitionState Transition
t
where !t :: Transition
t = TypedByteArray Transition
rootAsciiTransitions forall a. Prim a => TypedByteArray a -> State -> a
`uAt` CodePoint -> State
Char.ord CodePoint
cp
{-# NOINLINE collectMatches #-}
collectMatches :: CodeUnitIndex -> a -> State -> a
collectMatches !CodeUnitIndex
offset !a
acc !State
state =
let
matchedValues :: [v]
matchedValues = Vector [v]
values forall a. Vector a -> State -> a
`at` State
state
handleMatch :: a -> [v] -> a
handleMatch !a
acc' [v]
vs = case [v]
vs of
[] -> CodeUnitIndex -> a -> State -> a
consumeInput CodeUnitIndex
offset a
acc' State
state
v
v:[v]
more -> case a -> Match v -> Next a
f a
acc' (forall v. CodeUnitIndex -> v -> Match v
Match (CodeUnitIndex
offset forall a. Num a => a -> a -> a
- CodeUnitIndex
initialOffset) v
v) of
Step a
newAcc -> a -> [v] -> a
handleMatch a
newAcc [v]
more
Done a
finalAcc -> a
finalAcc
in
a -> [v] -> a
handleMatch a
acc [v]
matchedValues
{-# INLINE runText #-}
runText :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText :: forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText = forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
CaseSensitive
{-# INLINE runLower #-}
runLower :: forall a v. a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower :: forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower = forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
IgnoreCase
needleCasings :: Text -> [Text]
needleCasings :: Text -> [Text]
needleCasings = forall a b. (a -> b) -> [a] -> [b]
map String -> Text
Text.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
loop forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack
where
loop :: String -> [String]
loop String
"" = [String
""]
loop (CodePoint
c:String
cs) = (:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CodePoint -> String
Utf8.unlowerCodePoint CodePoint
c forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> String -> [String]
loop String
cs