{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RecordWildCards #-}
module Data.FuzzySet.Internal
( FuzzySet(..)
, FuzzySetItem(..)
, GramInfo(..)
, FuzzyMatch
, grams
, gramVector
, matches
, getMatches
, add_
, addMany_
, normalized
, norm
, distance
) where
import Control.Monad.State (MonadState, get, modify)
import Data.Bifunctor (second)
import Data.Char (isAlphaNum, isSpace)
import Data.Foldable (traverse_)
import Data.Function ((&))
import Data.FuzzySet.Utils (enclosedIn, substr, (<$$>))
import Data.HashMap.Strict (HashMap, elems, foldrWithKey, insert, insertWith, lookup, lookupDefault)
import qualified Data.HashMap.Strict as HashMap
import Data.List (sortBy)
import Data.Maybe (fromMaybe)
import Data.Ord (Down(..), comparing)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Metrics (levenshteinNorm)
import Data.Vector (Vector, (!?))
import qualified Data.Vector as Vector
import Prelude hiding (lookup)
data FuzzySetItem = FuzzySetItem
{ FuzzySetItem -> Double
vectorMagnitude :: !Double
, FuzzySetItem -> Text
normalizedEntry :: !Text
} deriving (FuzzySetItem -> FuzzySetItem -> Bool
(FuzzySetItem -> FuzzySetItem -> Bool)
-> (FuzzySetItem -> FuzzySetItem -> Bool) -> Eq FuzzySetItem
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: FuzzySetItem -> FuzzySetItem -> Bool
== :: FuzzySetItem -> FuzzySetItem -> Bool
$c/= :: FuzzySetItem -> FuzzySetItem -> Bool
/= :: FuzzySetItem -> FuzzySetItem -> Bool
Eq, Int -> FuzzySetItem -> ShowS
[FuzzySetItem] -> ShowS
FuzzySetItem -> String
(Int -> FuzzySetItem -> ShowS)
-> (FuzzySetItem -> String)
-> ([FuzzySetItem] -> ShowS)
-> Show FuzzySetItem
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> FuzzySetItem -> ShowS
showsPrec :: Int -> FuzzySetItem -> ShowS
$cshow :: FuzzySetItem -> String
show :: FuzzySetItem -> String
$cshowList :: [FuzzySetItem] -> ShowS
showList :: [FuzzySetItem] -> ShowS
Show)
data GramInfo = GramInfo
{ GramInfo -> Int
itemIndex :: !Int
, GramInfo -> Int
gramCount :: !Int
} deriving (GramInfo -> GramInfo -> Bool
(GramInfo -> GramInfo -> Bool)
-> (GramInfo -> GramInfo -> Bool) -> Eq GramInfo
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GramInfo -> GramInfo -> Bool
== :: GramInfo -> GramInfo -> Bool
$c/= :: GramInfo -> GramInfo -> Bool
/= :: GramInfo -> GramInfo -> Bool
Eq, Int -> GramInfo -> ShowS
[GramInfo] -> ShowS
GramInfo -> String
(Int -> GramInfo -> ShowS)
-> (GramInfo -> String) -> ([GramInfo] -> ShowS) -> Show GramInfo
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GramInfo -> ShowS
showsPrec :: Int -> GramInfo -> ShowS
$cshow :: GramInfo -> String
show :: GramInfo -> String
$cshowList :: [GramInfo] -> ShowS
showList :: [GramInfo] -> ShowS
Show)
data FuzzySet = FuzzySet
{ FuzzySet -> HashMap Text Text
exactSet :: !(HashMap Text Text)
, FuzzySet -> HashMap Text [GramInfo]
matchDict :: !(HashMap Text [GramInfo])
, FuzzySet -> HashMap Int (Vector FuzzySetItem)
items :: !(HashMap Int (Vector FuzzySetItem))
, FuzzySet -> Int
gramSizeLower :: !Int
, FuzzySet -> Int
gramSizeUpper :: !Int
, FuzzySet -> Bool
useLevenshtein :: !Bool
} deriving (FuzzySet -> FuzzySet -> Bool
(FuzzySet -> FuzzySet -> Bool)
-> (FuzzySet -> FuzzySet -> Bool) -> Eq FuzzySet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: FuzzySet -> FuzzySet -> Bool
== :: FuzzySet -> FuzzySet -> Bool
$c/= :: FuzzySet -> FuzzySet -> Bool
/= :: FuzzySet -> FuzzySet -> Bool
Eq, Int -> FuzzySet -> ShowS
[FuzzySet] -> ShowS
FuzzySet -> String
(Int -> FuzzySet -> ShowS)
-> (FuzzySet -> String) -> ([FuzzySet] -> ShowS) -> Show FuzzySet
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> FuzzySet -> ShowS
showsPrec :: Int -> FuzzySet -> ShowS
$cshow :: FuzzySet -> String
show :: FuzzySet -> String
$cshowList :: [FuzzySet] -> ShowS
showList :: [FuzzySet] -> ShowS
Show)
type FuzzyMatch = (Double, Text)
matches :: FuzzySet -> HashMap Text Int -> HashMap Int Int
matches :: FuzzySet -> HashMap Text Int -> HashMap Int Int
matches FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
exactSet :: FuzzySet -> HashMap Text Text
matchDict :: FuzzySet -> HashMap Text [GramInfo]
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
gramSizeLower :: FuzzySet -> Int
gramSizeUpper :: FuzzySet -> Int
useLevenshtein :: FuzzySet -> Bool
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..} = (Text -> Int -> HashMap Int Int -> HashMap Int Int)
-> HashMap Int Int -> HashMap Text Int -> HashMap Int Int
forall k v a. (k -> v -> a -> a) -> a -> HashMap k v -> a
foldrWithKey Text -> Int -> HashMap Int Int -> HashMap Int Int
go HashMap Int Int
forall a. Monoid a => a
mempty
where
go :: Text -> Int -> HashMap Int Int -> HashMap Int Int
go Text
gram Int
count HashMap Int Int
hashMap =
Text -> HashMap Text [GramInfo] -> Maybe [GramInfo]
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
gram HashMap Text [GramInfo]
matchDict
Maybe [GramInfo]
-> (Maybe [GramInfo] -> HashMap Int Int) -> HashMap Int Int
forall a b. a -> (a -> b) -> b
& HashMap Int Int
-> ([GramInfo] -> HashMap Int Int)
-> Maybe [GramInfo]
-> HashMap Int Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe HashMap Int Int
hashMap ((GramInfo -> HashMap Int Int -> HashMap Int Int)
-> HashMap Int Int -> [GramInfo] -> HashMap Int Int
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Int -> GramInfo -> HashMap Int Int -> HashMap Int Int
insert_ Int
count) HashMap Int Int
hashMap)
insert_ :: Int -> GramInfo -> HashMap Int Int -> HashMap Int Int
insert_ Int
count GramInfo{Int
itemIndex :: GramInfo -> Int
gramCount :: GramInfo -> Int
itemIndex :: Int
gramCount :: Int
..} =
(Int -> Int -> Int)
-> Int -> Int -> HashMap Int Int -> HashMap Int Int
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
itemIndex (Int
gramCount Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
count)
getMatches :: FuzzySet -> Text -> Double -> Int -> [FuzzyMatch]
getMatches :: FuzzySet -> Text -> Double -> Int -> [FuzzyMatch]
getMatches FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
exactSet :: FuzzySet -> HashMap Text Text
matchDict :: FuzzySet -> HashMap Text [GramInfo]
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
gramSizeLower :: FuzzySet -> Int
gramSizeUpper :: FuzzySet -> Int
useLevenshtein :: FuzzySet -> Bool
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..} Text
str Double
minScore Int
gramSize =
[FuzzyMatch]
results
[FuzzyMatch] -> ([FuzzyMatch] -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& (FuzzyMatch -> Bool) -> [FuzzyMatch] -> [FuzzyMatch]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
minScore) (Double -> Bool) -> (FuzzyMatch -> Double) -> FuzzyMatch -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzyMatch -> Double
forall a b. (a, b) -> a
fst)
[FuzzyMatch] -> ([FuzzyMatch] -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& (FuzzyMatch -> FuzzyMatch) -> [FuzzyMatch] -> [FuzzyMatch]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Text -> Text) -> FuzzyMatch -> FuzzyMatch
forall b c a. (b -> c) -> (a, b) -> (a, c)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second ((Text -> HashMap Text Text -> Text)
-> HashMap Text Text -> Text -> Text
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Text -> Text -> HashMap Text Text -> Text
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
lookupDefault Text
forall a. Monoid a => a
mempty) HashMap Text Text
exactSet))
where
results :: [FuzzyMatch]
results =
let sorted :: [FuzzyMatch]
sorted =
FuzzySet -> HashMap Text Int -> HashMap Int Int
matches FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..} HashMap Text Int
queryVector
HashMap Int Int
-> (HashMap Int Int -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& (Int -> Int -> [FuzzyMatch] -> [FuzzyMatch])
-> [FuzzyMatch] -> HashMap Int Int -> [FuzzyMatch]
forall k v a. (k -> v -> a -> a) -> a -> HashMap k v -> a
foldrWithKey Int -> Int -> [FuzzyMatch] -> [FuzzyMatch]
forall {a}. Integral a => Int -> a -> [FuzzyMatch] -> [FuzzyMatch]
go []
[FuzzyMatch] -> ([FuzzyMatch] -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& (FuzzyMatch -> FuzzyMatch -> Ordering)
-> [FuzzyMatch] -> [FuzzyMatch]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ((FuzzyMatch -> Down Double) -> FuzzyMatch -> FuzzyMatch -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Double -> Down Double
forall a. a -> Down a
Down (Double -> Down Double)
-> (FuzzyMatch -> Double) -> FuzzyMatch -> Down Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzyMatch -> Double
forall a b. (a, b) -> a
fst))
in if Bool
useLevenshtein
then
[FuzzyMatch]
sorted
[FuzzyMatch] -> ([FuzzyMatch] -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& Int -> [FuzzyMatch] -> [FuzzyMatch]
forall a. Int -> [a] -> [a]
take Int
50
[FuzzyMatch] -> ([FuzzyMatch] -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& (FuzzyMatch -> FuzzyMatch) -> [FuzzyMatch] -> [FuzzyMatch]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Double
_, Text
entry) -> (Text -> Text -> Double
distance Text
str Text
entry, Text
entry))
[FuzzyMatch] -> ([FuzzyMatch] -> [FuzzyMatch]) -> [FuzzyMatch]
forall a b. a -> (a -> b) -> b
& (FuzzyMatch -> FuzzyMatch -> Ordering)
-> [FuzzyMatch] -> [FuzzyMatch]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ((FuzzyMatch -> Down Double) -> FuzzyMatch -> FuzzyMatch -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Double -> Down Double
forall a. a -> Down a
Down (Double -> Down Double)
-> (FuzzyMatch -> Double) -> FuzzyMatch -> Down Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzyMatch -> Double
forall a b. (a, b) -> a
fst))
else [FuzzyMatch]
sorted
queryMagnitude :: Double
queryMagnitude = [Int] -> Double
norm (HashMap Text Int -> [Int]
forall k v. HashMap k v -> [v]
elems HashMap Text Int
queryVector)
queryVector :: HashMap Text Int
queryVector = Text -> Int -> HashMap Text Int
gramVector Text
str Int
gramSize
itemsVector :: Vector FuzzySetItem
itemsVector = Vector FuzzySetItem
-> Maybe (Vector FuzzySetItem) -> Vector FuzzySetItem
forall a. a -> Maybe a -> a
fromMaybe Vector FuzzySetItem
forall a. Monoid a => a
mempty (Int
gramSize Int
-> HashMap Int (Vector FuzzySetItem) -> Maybe (Vector FuzzySetItem)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
`lookup` HashMap Int (Vector FuzzySetItem)
items)
go :: Int -> a -> [FuzzyMatch] -> [FuzzyMatch]
go Int
index a
score [FuzzyMatch]
list =
case Vector FuzzySetItem
itemsVector Vector FuzzySetItem -> Int -> Maybe FuzzySetItem
forall a. Vector a -> Int -> Maybe a
!? Int
index of
Maybe FuzzySetItem
Nothing ->
[FuzzyMatch]
list
Just FuzzySetItem{Double
Text
vectorMagnitude :: FuzzySetItem -> Double
normalizedEntry :: FuzzySetItem -> Text
vectorMagnitude :: Double
normalizedEntry :: Text
..} ->
( a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
score Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
queryMagnitude Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
vectorMagnitude)
, Text
normalizedEntry
) FuzzyMatch -> [FuzzyMatch] -> [FuzzyMatch]
forall a. a -> [a] -> [a]
: [FuzzyMatch]
list
add_ :: (MonadState FuzzySet m) => Text -> m Bool
add_ :: forall (m :: * -> *). MonadState FuzzySet m => Text -> m Bool
add_ Text
str = do
FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
exactSet :: FuzzySet -> HashMap Text Text
matchDict :: FuzzySet -> HashMap Text [GramInfo]
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
gramSizeLower :: FuzzySet -> Int
gramSizeUpper :: FuzzySet -> Int
useLevenshtein :: FuzzySet -> Bool
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..} <- m FuzzySet
forall s (m :: * -> *). MonadState s m => m s
get
if Text
key Text -> HashMap Text Text -> Bool
forall a. Eq a => a -> HashMap Text a -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` HashMap Text Text
exactSet
then
Bool -> m Bool
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
else do
(Int -> m ()) -> [Int] -> m ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ ((FuzzySet -> FuzzySet) -> m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((FuzzySet -> FuzzySet) -> m ())
-> (Int -> FuzzySet -> FuzzySet) -> Int -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> FuzzySet -> FuzzySet
updateDict) [Int
gramSizeLower .. Int
gramSizeUpper]
(FuzzySet -> FuzzySet) -> m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (Text -> Text -> FuzzySet -> FuzzySet
updateExactSet Text
key Text
str)
Bool -> m Bool
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
where
key :: Text
key = Text -> Text
Text.toLower Text
str
updateDict :: Int -> FuzzySet -> FuzzySet
updateDict Int
size_ FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
exactSet :: FuzzySet -> HashMap Text Text
matchDict :: FuzzySet -> HashMap Text [GramInfo]
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
gramSizeLower :: FuzzySet -> Int
gramSizeUpper :: FuzzySet -> Int
useLevenshtein :: FuzzySet -> Bool
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..} =
let
itemVector :: Vector FuzzySetItem
itemVector =
HashMap Int (Vector FuzzySetItem)
items
HashMap Int (Vector FuzzySetItem)
-> (HashMap Int (Vector FuzzySetItem)
-> Maybe (Vector FuzzySetItem))
-> Maybe (Vector FuzzySetItem)
forall a b. a -> (a -> b) -> b
& Int
-> HashMap Int (Vector FuzzySetItem) -> Maybe (Vector FuzzySetItem)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Int
size_
Maybe (Vector FuzzySetItem)
-> (Maybe (Vector FuzzySetItem) -> Vector FuzzySetItem)
-> Vector FuzzySetItem
forall a b. a -> (a -> b) -> b
& Vector FuzzySetItem
-> Maybe (Vector FuzzySetItem) -> Vector FuzzySetItem
forall a. a -> Maybe a -> a
fromMaybe Vector FuzzySetItem
forall a. Vector a
Vector.empty
grams_ :: HashMap Text Int
grams_ =
Text -> Int -> HashMap Text Int
gramVector Text
key Int
size_
insertInfo :: k -> Int -> HashMap k [GramInfo] -> HashMap k [GramInfo]
insertInfo k
gram Int
count =
let info :: GramInfo
info = Int -> Int -> GramInfo
GramInfo (Vector FuzzySetItem -> Int
forall a. Vector a -> Int
Vector.length Vector FuzzySetItem
itemVector) Int
count
in ([GramInfo] -> [GramInfo] -> [GramInfo])
-> k -> [GramInfo] -> HashMap k [GramInfo] -> HashMap k [GramInfo]
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
HashMap.insertWith [GramInfo] -> [GramInfo] -> [GramInfo]
forall a. Semigroup a => a -> a -> a
(<>) k
gram [GramInfo
info]
item :: FuzzySetItem
item =
Double -> Text -> FuzzySetItem
FuzzySetItem (HashMap Text Int -> [Int]
forall k v. HashMap k v -> [v]
elems HashMap Text Int
grams_ [Int] -> ([Int] -> Double) -> Double
forall a b. a -> (a -> b) -> b
& [Int] -> Double
norm) Text
key
in
FuzzySet
{ items :: HashMap Int (Vector FuzzySetItem)
items = Int
-> Vector FuzzySetItem
-> HashMap Int (Vector FuzzySetItem)
-> HashMap Int (Vector FuzzySetItem)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
insert Int
size_ (Vector FuzzySetItem
itemVector Vector FuzzySetItem -> FuzzySetItem -> Vector FuzzySetItem
forall a. Vector a -> a -> Vector a
`Vector.snoc` FuzzySetItem
item) HashMap Int (Vector FuzzySetItem)
items
, matchDict :: HashMap Text [GramInfo]
matchDict = (Text -> Int -> HashMap Text [GramInfo] -> HashMap Text [GramInfo])
-> HashMap Text [GramInfo]
-> HashMap Text Int
-> HashMap Text [GramInfo]
forall k v a. (k -> v -> a -> a) -> a -> HashMap k v -> a
foldrWithKey Text -> Int -> HashMap Text [GramInfo] -> HashMap Text [GramInfo]
forall {k}.
Hashable k =>
k -> Int -> HashMap k [GramInfo] -> HashMap k [GramInfo]
insertInfo HashMap Text [GramInfo]
matchDict HashMap Text Int
grams_
, Bool
Int
HashMap Text Text
exactSet :: HashMap Text Text
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
exactSet :: HashMap Text Text
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..
}
addMany_ :: (MonadState FuzzySet m) => [Text] -> m [Text]
addMany_ :: forall (m :: * -> *). MonadState FuzzySet m => [Text] -> m [Text]
addMany_ = [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Text]] -> [Text])
-> ([Text] -> m [[Text]]) -> [Text] -> m [Text]
forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<$$> (Text -> m [Text]) -> [Text] -> m [[Text]]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse Text -> m [Text]
forall {m :: * -> *}. MonadState FuzzySet m => Text -> m [Text]
addOne
where
addOne :: Text -> m [Text]
addOne Text
str = do
Bool
p <- Text -> m Bool
forall (m :: * -> *). MonadState FuzzySet m => Text -> m Bool
add_ Text
str
[Text] -> m [Text]
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text
str | Bool
p]
gramVector :: Text -> Int -> HashMap Text Int
gramVector :: Text -> Int -> HashMap Text Int
gramVector = (Text -> HashMap Text Int -> HashMap Text Int)
-> HashMap Text Int -> [Text] -> HashMap Text Int
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> HashMap Text Int -> HashMap Text Int
forall {k} {v}.
(Hashable k, Num v) =>
k -> HashMap k v -> HashMap k v
insert_ HashMap Text Int
forall k v. HashMap k v
HashMap.empty ([Text] -> HashMap Text Int)
-> (Text -> Int -> [Text]) -> Text -> Int -> HashMap Text Int
forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<$$> Text -> Int -> [Text]
grams
where
insert_ :: k -> HashMap k v -> HashMap k v
insert_ k
key = (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
HashMap.insertWith v -> v -> v
forall a. Num a => a -> a -> a
(+) k
key v
1
grams :: Text -> Int -> [Text]
grams :: Text -> Int -> [Text]
grams Text
input Int
size_
| Int
size_ Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = String -> [Text]
forall a. HasCallStack => String -> a
error String
"gram size_ must be at least 2"
| Bool
otherwise = (Int -> Text -> Text) -> Text -> Int -> Text
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int -> Int -> Text -> Text
substr Int
size_) Text
normalizedInput (Int -> Text) -> [Int] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
offsets
where
normalizedInput :: Text
normalizedInput = Text -> Text
normalized Text
input Text -> Char -> Text
`enclosedIn` Char
'-'
offsets :: [Int]
offsets = [Int
0 .. Text -> Int
Text.length Text
normalizedInput Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
size_]
normalized :: Text -> Text
normalized :: Text -> Text
normalized = (Char -> Bool) -> Text -> Text
Text.filter Char -> Bool
word (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.toLower
where
word :: Char -> Bool
word Char
char
| Char -> Bool
isAlphaNum Char
char = Bool
True
| Char -> Bool
isSpace Char
char = Bool
True
| Char
char Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
',' = Bool
True
| Bool
otherwise = Bool
False
norm :: [Int] -> Double
norm :: [Int] -> Double
norm = Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> ([Int] -> Double) -> [Int] -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> ([Int] -> Int) -> [Int] -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> ([Int] -> [Int]) -> [Int] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int) -> [Int] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Int -> Int
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int))
distance :: Text -> Text -> Double
distance :: Text -> Text -> Double
distance = Ratio Int -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac (Ratio Int -> Double)
-> (Text -> Text -> Ratio Int) -> Text -> Text -> Double
forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<$$> Text -> Text -> Ratio Int
levenshteinNorm
updateExactSet :: Text -> Text -> FuzzySet -> FuzzySet
updateExactSet :: Text -> Text -> FuzzySet -> FuzzySet
updateExactSet Text
key Text
str FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
exactSet :: FuzzySet -> HashMap Text Text
matchDict :: FuzzySet -> HashMap Text [GramInfo]
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
gramSizeLower :: FuzzySet -> Int
gramSizeUpper :: FuzzySet -> Int
useLevenshtein :: FuzzySet -> Bool
exactSet :: HashMap Text Text
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..} =
FuzzySet
{ exactSet :: HashMap Text Text
exactSet = Text -> Text -> HashMap Text Text -> HashMap Text Text
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
insert Text
key Text
str HashMap Text Text
exactSet
, Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
matchDict :: HashMap Text [GramInfo]
items :: HashMap Int (Vector FuzzySetItem)
gramSizeLower :: Int
gramSizeUpper :: Int
useLevenshtein :: Bool
..
}