{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MultiWayIf #-}
module Data.Text.Metrics
(
levenshtein,
levenshteinNorm,
damerauLevenshtein,
damerauLevenshteinNorm,
overlap,
jaccard,
hamming,
jaro,
jaroWinkler,
)
where
import Control.Monad
import Control.Monad.ST
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Ratio
import Data.Text
import qualified Data.Text as T
import qualified Data.Text.Internal as T
import qualified Data.Text.Unsafe as TU
import qualified Data.Vector.Unboxed.Mutable as VUM
import GHC.Exts (inline)
levenshtein :: Text -> Text -> Int
levenshtein :: Text -> Text -> Int
levenshtein Text
a Text
b = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (Text -> Text -> (Int, Int)
levenshtein_ Text
a Text
b)
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm = (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm Text -> Text -> (Int, Int)
levenshtein_
levenshtein_ :: Text -> Text -> (Int, Int)
levenshtein_ :: Text -> Text -> (Int, Int)
levenshtein_ Text
a Text
b
| Text -> Bool
T.null Text
a = (Int
lenb, Int
lenm)
| Text -> Bool
T.null Text
b = (Int
lena, Int
lenm)
| Bool
otherwise = (forall s. ST s (Int, Int)) -> (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Int, Int)) -> (Int, Int))
-> (forall s. ST s (Int, Int)) -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
let v_len :: Int
v_len = Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
MVector s Int
v <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew (Int
v_len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
let gov :: Int -> ST s ()
gov !Int
i =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
v_len) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
i Int
i
Int -> ST s ()
gov (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
goi :: Int -> Int -> Int -> Int -> ST s ()
goi !Int
i !Int
na !Int
v0 !Int
v1 = do
let !(TU.Iter Char
ai Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
goj :: Int -> Int -> ST s ()
goj !Int
j !Int
nb =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lenb) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
let !(TU.Iter Char
bj Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
cost :: Int
cost = if Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj then Int
0 else Int
1
Int
x <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
Int
y <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Int
z <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cost) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
x (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
y Int
z))
Int -> Int -> ST s ()
goj (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db)
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
v1 (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Int -> Int -> ST s ()
goj Int
0 Int
0
Int -> Int -> Int -> Int -> ST s ()
goi (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) Int
v1 Int
v0
Int -> ST s ()
gov Int
0
Int -> Int -> Int -> Int -> ST s ()
goi Int
0 Int
0 Int
0 Int
v_len
Int
ld <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ if Int -> Bool
forall a. Integral a => a -> Bool
even Int
lena then Int
0 else Int
v_len)
(Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ld, Int
lenm)
where
lena :: Int
lena = Text -> Int
T.length Text
a
lenb :: Int
lenb = Text -> Int
T.length Text
b
lenm :: Int
lenm = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
lena Int
lenb
{-# INLINE levenshtein_ #-}
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein Text
a Text
b = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (Text -> Text -> (Int, Int)
damerauLevenshtein_ Text
a Text
b)
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm = (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm Text -> Text -> (Int, Int)
damerauLevenshtein_
damerauLevenshtein_ :: Text -> Text -> (Int, Int)
damerauLevenshtein_ :: Text -> Text -> (Int, Int)
damerauLevenshtein_ Text
a Text
b
| Text -> Bool
T.null Text
a = (Int
lenb, Int
lenm)
| Text -> Bool
T.null Text
b = (Int
lena, Int
lenm)
| Bool
otherwise = (forall s. ST s (Int, Int)) -> (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Int, Int)) -> (Int, Int))
-> (forall s. ST s (Int, Int)) -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
let v_len :: Int
v_len = Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
MVector s Int
v <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew (Int
v_len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
3)
let gov :: Int -> ST s ()
gov !Int
i =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
v_len) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
i Int
i
Int -> ST s ()
gov (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
goi :: Int -> Int -> Char -> Int -> Int -> Int -> ST s ()
goi !Int
i !Int
na !Char
ai_1 !Int
v0 !Int
v1 !Int
v2 = do
let !(TU.Iter Char
ai Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
goj :: Int -> Int -> Char -> ST s ()
goj !Int
j !Int
nb !Char
bj_1 =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lenb) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
let !(TU.Iter Char
bj Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
cost :: Int
cost = if Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj then Int
0 else Int
1
Int
x <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
Int
y <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Int
z <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cost) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
let g :: Int
g = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
x (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
y Int
z)
Int
val <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cost) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$
if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj_1 Bool -> Bool -> Bool
&& Char
ai_1 Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj Bool -> Bool -> Bool
&& Int
val Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
g
then Int
val
else Int
g
Int -> Int -> Char -> ST s ()
goj (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) Char
bj
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
v1 (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Int -> Int -> Char -> ST s ()
goj Int
0 Int
0 Char
'a'
Int -> Int -> Char -> Int -> Int -> Int -> ST s ()
goi (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) Char
ai Int
v1 Int
v2 Int
v0
Int -> ST s ()
gov Int
0
Int -> Int -> Char -> Int -> Int -> Int -> ST s ()
goi Int
0 Int
0 Char
'a' Int
0 Int
v_len (Int
v_len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
Int
ld <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
lena Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
3) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
v_len)
(Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ld, Int
lenm)
where
lena :: Int
lena = Text -> Int
T.length Text
a
lenb :: Int
lenb = Text -> Int
T.length Text
b
lenm :: Int
lenm = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
lena Int
lenb
{-# INLINE damerauLevenshtein_ #-}
overlap :: Text -> Text -> Ratio Int
overlap :: Text -> Text -> Ratio Int
overlap Text
a Text
b =
if Int
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Int
1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
1
else Map Char Int -> Map Char Int -> Int
intersectionSize (Text -> Map Char Int
mkTextMap Text
a) (Text -> Map Char Int
mkTextMap Text
b) Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
d
where
d :: Int
d = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Text -> Int
T.length Text
a) (Text -> Int
T.length Text
b)
jaccard :: Text -> Text -> Ratio Int
jaccard :: Text -> Text -> Ratio Int
jaccard Text
a Text
b =
if Int
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Int
1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
1
else Map Char Int -> Map Char Int -> Int
intersectionSize Map Char Int
ma Map Char Int
mb Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
d
where
ma :: Map Char Int
ma = Text -> Map Char Int
mkTextMap Text
a
mb :: Map Char Int
mb = Text -> Map Char Int
mkTextMap Text
b
d :: Int
d = Map Char Int -> Map Char Int -> Int
unionSize Map Char Int
ma Map Char Int
mb
mkTextMap :: Text -> Map Char Int
mkTextMap :: Text -> Map Char Int
mkTextMap = (Map Char Int -> Char -> Map Char Int)
-> Map Char Int -> Text -> Map Char Int
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' Map Char Int -> Char -> Map Char Int
forall k a. (Ord k, Num a) => Map k a -> k -> Map k a
f Map Char Int
forall k a. Map k a
M.empty
where
f :: Map k a -> k -> Map k a
f Map k a
m k
ch = (a -> a -> a) -> k -> a -> Map k a -> Map k a
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith a -> a -> a
forall a. Num a => a -> a -> a
(+) k
ch a
1 Map k a
m
{-# INLINE mkTextMap #-}
intersectionSize :: Map Char Int -> Map Char Int -> Int
intersectionSize :: Map Char Int -> Map Char Int -> Int
intersectionSize Map Char Int
a Map Char Int
b = (Int -> Int -> Int) -> Int -> Map Char Int -> Int
forall a b k. (a -> b -> a) -> a -> Map k b -> a
M.foldl' Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
0 ((Int -> Int -> Int) -> Map Char Int -> Map Char Int -> Map Char Int
forall k a b c.
Ord k =>
(a -> b -> c) -> Map k a -> Map k b -> Map k c
M.intersectionWith Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Map Char Int
a Map Char Int
b)
{-# INLINE intersectionSize #-}
unionSize :: Map Char Int -> Map Char Int -> Int
unionSize :: Map Char Int -> Map Char Int -> Int
unionSize Map Char Int
a Map Char Int
b = (Int -> Int -> Int) -> Int -> Map Char Int -> Int
forall a b k. (a -> b -> a) -> a -> Map k b -> a
M.foldl' Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
0 ((Int -> Int -> Int) -> Map Char Int -> Map Char Int -> Map Char Int
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
M.unionWith Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Map Char Int
a Map Char Int
b)
{-# INLINE unionSize #-}
hamming :: Text -> Text -> Maybe Int
hamming :: Text -> Text -> Maybe Int
hamming a :: Text
a@(T.Text Array
_ Int
_ Int
len) Text
b =
if Text -> Int
T.length Text
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Int
T.length Text
b
then Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Int -> Int -> Int
forall t. Num t => Int -> Int -> t -> t
go Int
0 Int
0 Int
0)
else Maybe Int
forall a. Maybe a
Nothing
where
go :: Int -> Int -> t -> t
go !Int
na !Int
nb !t
r =
let !(TU.Iter Char
cha Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
!(TU.Iter Char
chb Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
in if
| Int
na Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len -> t
r
| Char
cha Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
chb -> Int -> Int -> t -> t
go (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) (t
r t -> t -> t
forall a. Num a => a -> a -> a
+ t
1)
| Bool
otherwise -> Int -> Int -> t -> t
go (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) t
r
jaro :: Text -> Text -> Ratio Int
jaro :: Text -> Text -> Ratio Int
jaro Text
a Text
b =
if Text -> Bool
T.null Text
a Bool -> Bool -> Bool
|| Text -> Bool
T.null Text
b
then Int
0 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
1
else (forall s. ST s (Ratio Int)) -> Ratio Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Ratio Int)) -> Ratio Int)
-> (forall s. ST s (Ratio Int)) -> Ratio Int
forall a b. (a -> b) -> a -> b
$ do
let lena :: Int
lena = Text -> Int
T.length Text
a
lenb :: Int
lenb = Text -> Int
T.length Text
b
d :: Int
d =
if Int
lena Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2 Bool -> Bool -> Bool
&& Int
lenb Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2
then Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
lena Int
lenb Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
else Int
0
MVector s Int
v <- Int -> Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
VUM.replicate Int
lenb (Int
0 :: Int)
MVector s Int
r <- Int -> Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
VUM.replicate Int
3 (Int
0 :: Int)
let goi :: Int -> Int -> Int -> ST s ()
goi !Int
i !Int
na !Int
fromb = do
let !(TU.Iter Char
ai Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
(Int
from, Int
fromb') =
if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
d
then (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
d, Int
fromb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int -> Int
TU.iter_ Text
b Int
fromb)
else (Int
0, Int
0)
to :: Int
to = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
lenb
goj :: Int -> Int -> ST s ()
goj !Int
j !Int
nb =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
to) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
let !(TU.Iter Char
bj Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
Bool
used <- (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1) (Int -> Bool) -> ST s Int -> ST s Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v Int
j
if Bool -> Bool
not Bool
used Bool -> Bool -> Bool
&& Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj
then do
Int
tj <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
r Int
0
if Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
tj
then MVector (PrimState (ST s)) Int -> (Int -> Int) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
VUM.unsafeModify MVector s Int
MVector (PrimState (ST s)) Int
r (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
2
else MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
r Int
0 Int
j
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
j Int
1
MVector (PrimState (ST s)) Int -> (Int -> Int) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
VUM.unsafeModify MVector s Int
MVector (PrimState (ST s)) Int
r (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
1
else Int -> Int -> ST s ()
goj (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db)
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
Int -> Int -> ST s ()
goj Int
from Int
fromb
Int -> Int -> Int -> ST s ()
goi (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) Int
fromb'
Int -> Int -> Int -> ST s ()
goi Int
0 Int
0 Int
0
Int
m <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
r Int
1
Int
t <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
r Int
2
Ratio Int -> ST s (Ratio Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ratio Int -> ST s (Ratio Int)) -> Ratio Int -> ST s (Ratio Int)
forall a b. (a -> b) -> a -> b
$
if Int
m Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Int
0 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
1
else
( (Int
m Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
lena)
Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
+ (Int
m Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
lenb)
Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
+ ((Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
t) Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
m)
)
Ratio Int -> Ratio Int -> Ratio Int
forall a. Fractional a => a -> a -> a
/ Ratio Int
3
jaroWinkler :: Text -> Text -> Ratio Int
jaroWinkler :: Text -> Text -> Ratio Int
jaroWinkler Text
a Text
b = Ratio Int
dj Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
+ (Int
1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
10) Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
* Ratio Int
l Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
* (Ratio Int
1 Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
- Ratio Int
dj)
where
dj :: Ratio Int
dj = Ratio Int -> Ratio Int
forall a. a -> a
inline (Text -> Text -> Ratio Int
jaro Text
a Text
b)
l :: Ratio Int
l = Int -> Ratio Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
4 (Text -> Text -> Int
commonPrefix Text
a Text
b))
commonPrefix :: Text -> Text -> Int
commonPrefix :: Text -> Text -> Int
commonPrefix Text
a Text
b = case Text -> Text -> Maybe (Text, Text, Text)
T.commonPrefixes Text
a Text
b of
Maybe (Text, Text, Text)
Nothing -> Int
0
Just (Text
pref, Text
_, Text
_) -> Text -> Int
T.length Text
pref
{-# INLINE commonPrefix #-}
norm :: (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm :: (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm Text -> Text -> (Int, Int)
f Text
a Text
b =
let (Int
r, Int
l) = Text -> Text -> (Int, Int)
f Text
a Text
b
in if Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Int
1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
1
else Int
1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
1 Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
- Int
r Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
l
{-# INLINE norm #-}