-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Fuzzy set data structure for approximate string matching
--
-- Please see the README on GitHub at
-- https://github.com/laserpants/fuzzyset-haskell#readme
@package fuzzyset
@version 0.3.2
module Data.FuzzySet.Utils
(<$$>) :: (Functor f, Functor g) => (a -> b) -> f (g a) -> f (g b)
infixr 8 <$$>
(<$$$>) :: (Functor f, Functor g, Functor h) => (a -> b) -> f (g (h a)) -> f (g (h b))
infixr 8 <$$$>
safeHead :: [a] -> Maybe a
enclosedIn :: Text -> Char -> Text
substr :: Int -> Int -> Text -> Text
module Data.FuzzySet.Internal
-- | Main fuzzy string set data type.
data FuzzySet
FuzzySet :: !HashMap Text Text -> !HashMap Text [GramInfo] -> !HashMap Int (Vector FuzzySetItem) -> !Int -> !Int -> !Bool -> FuzzySet
[exactSet] :: FuzzySet -> !HashMap Text Text
[matchDict] :: FuzzySet -> !HashMap Text [GramInfo]
[items] :: FuzzySet -> !HashMap Int (Vector FuzzySetItem)
-- | Lower bound on gram sizes to use (inclusive)
[gramSizeLower] :: FuzzySet -> !Int
-- | Upper bound on gram sizes to use (inclusive)
[gramSizeUpper] :: FuzzySet -> !Int
-- | Whether or not to use the Levenshtein distance to determine the score
[useLevenshtein] :: FuzzySet -> !Bool
data FuzzySetItem
FuzzySetItem :: !Double -> !Text -> FuzzySetItem
[vectorMagnitude] :: FuzzySetItem -> !Double
[normalizedEntry] :: FuzzySetItem -> !Text
data GramInfo
GramInfo :: !Int -> !Int -> GramInfo
[itemIndex] :: GramInfo -> !Int
[gramCount] :: GramInfo -> !Int
-- | An individual result when looking up a string in the set, consisting
-- of
--
--
-- - a similarity score in the range <math>, and
-- - the matching string.
--
type FuzzyMatch = (Double, Text)
-- | Break apart the input string into a list of n-grams. The string
-- is first normalized and enclosed in hyphens. We then take all
-- substrings of length n, letting the offset range from
-- <math>, where s is the length of the normalized input.
--
-- Example: The string "Destroido Corp." is first
-- normalized to "destroido corp", and then enclosed in hyphens,
-- so that it becomes "-destroido corp-". The trigrams generated
-- from this normalized string are:
--
--
-- [ "-de"
-- , "des"
-- , "est"
-- , "str"
-- , "tro"
-- , "roi"
-- , "oid"
-- , "ido"
-- , "do "
-- , "o c"
-- , " co"
-- , "cor"
-- , "orp"
-- , "rp-"
-- ]
--
grams :: Text -> Int -> [Text]
-- | Generate a list of n-grams (character substrings) from the
-- normalized input and then translate this into a dictionary with the
-- n-grams as keys mapping to the number of occurences of the
-- substring in the list.
--
--
-- >>> gramVector "xxxx" 2
-- fromList [("-x",1), ("xx",3), ("x-",1)]
--
--
-- The substring "xx" appears three times in the normalized
-- string:
--
--
-- >>> grams "xxxx" 2
-- ["-x","xx","xx","xx","x-"]
--
--
--
-- >>> Data.HashMap.Strict.lookup "nts" (gramVector "intrent'srestaurantsomeoftrent'saunt'santswantsamtorentsomepants" 3)
-- Just 8
--
gramVector :: Text -> Int -> HashMap Text Int
matches :: FuzzySet -> HashMap Text Int -> HashMap Int Int
getMatches :: FuzzySet -> Text -> Double -> Int -> [FuzzyMatch]
add_ :: MonadState FuzzySet m => Text -> m Bool
addMany_ :: MonadState FuzzySet m => [Text] -> m [Text]
-- | Normalize the input by
--
--
-- - removing non-word characters, except for spaces and commas;
-- and
-- - converting alphabetic characters to lowercase.
--
normalized :: Text -> Text
-- | Return the euclidean norm, or magnitude, of the input list
-- interpreted as a vector.
--
-- That is,
--
-- <math>
--
-- for the input
--
-- <math>
--
-- where <math> is the element at position i in the input
-- list.
norm :: [Int] -> Double
-- | Return the normalized Levenshtein distance between the two strings.
--
-- See https://en.wikipedia.org/wiki/Levenshtein_distance.
distance :: Text -> Text -> Double
instance GHC.Show.Show Data.FuzzySet.Internal.FuzzySetItem
instance GHC.Classes.Eq Data.FuzzySet.Internal.FuzzySetItem
instance GHC.Show.Show Data.FuzzySet.Internal.GramInfo
instance GHC.Classes.Eq Data.FuzzySet.Internal.GramInfo
instance GHC.Show.Show Data.FuzzySet.Internal.FuzzySet
instance GHC.Classes.Eq Data.FuzzySet.Internal.FuzzySet
module Data.FuzzySet.Simple
-- | Main fuzzy string set data type.
data FuzzySet
-- | An individual result when looking up a string in the set, consisting
-- of
--
--
-- - a similarity score in the range <math>, and
-- - the matching string.
--
type FuzzyMatch = (Double, Text)
-- | Initialize an empty FuzzySet.
emptySet :: Int -> Int -> Bool -> FuzzySet
-- | An empty FuzzySet with the following defaults:
--
--
-- - Gram size lower: 2
-- - Gram size upper: 3
-- - Use Levenshtein distance: True
--
defaultSet :: FuzzySet
-- | Create a new set from a list of entries, using the default settings.
fromList :: [Text] -> FuzzySet
-- | Add a string to the set, unless it is already present. A pair is
-- returned consisting of a boolean which denotes whether or not anything
-- was inserted, and the updated set.
addToSet :: Text -> FuzzySet -> (Bool, FuzzySet)
-- | Add a string to the set, or do nothing if a key that matches the
-- string already exists.
add :: Text -> FuzzySet -> FuzzySet
-- | Add a list of strings to the set, all at once.
--
-- Unless you need to know the subset of values that were actually
-- inserted, use addMany instead.
addManyToSet :: [Text] -> FuzzySet -> ([Text], FuzzySet)
-- | Add a list of strings to the set, all at once.
--
-- This function is identical to addManyToSet, except that it only
-- returns the set itself. If you need to know what values were inserted,
-- then use the latter instead.
addMany :: [Text] -> FuzzySet -> FuzzySet
-- | Infix operator to add entries to a FuzzySet, defined as
-- flip add.
(>+<) :: FuzzySet -> Text -> FuzzySet
infixl 4 >+<
-- | Try to match a string against the entries in the set, and return a
-- list of all results with a score greater than or equal to the
-- specified minimum score (i.e., the first argument). The results are
-- ordered by similarity, with the closest match first.
findMin :: Double -> Text -> FuzzySet -> [FuzzyMatch]
-- | Try to match the given string against the entries in the set using the
-- specified minimum score and return the closest match, if one is found.
findOneMin :: Double -> Text -> FuzzySet -> Maybe FuzzyMatch
-- | Try to match the given string against the entries in the set using the
-- specified minimum score and return the string that most closely
-- matches the input, if a match is found.
closestMatchMin :: Double -> Text -> FuzzySet -> Maybe Text
-- | Try to match the given string against the entries in the set, using a
-- minimum score of 0.33. Return a list of results ordered by similarity
-- score, with the closest match first. Use findMin if you need to
-- specify a custom threshold value.
find :: Text -> FuzzySet -> [FuzzyMatch]
-- | Try to match the given string against the entries in the set, and
-- return the closest match, if one is found. A minimum score of 0.33 is
-- used. To specify a custom threshold value, instead use
-- findOneMin.
findOne :: Text -> FuzzySet -> Maybe FuzzyMatch
-- | Try to match the given string against the entries in the set, and
-- return the string that most closely matches the input, if a match is
-- found. A minimum score of 0.33 is used. To specify a custom threshold
-- value, instead use closestMatchMin.
closestMatch :: Text -> FuzzySet -> Maybe Text
-- | Return the elements of the set. No particular order is guaranteed.
--
--
-- >>> values (fromList ["bass", "craze", "space", "lace", "daze", "haze", "ace", "maze"])
-- ["space","daze","bass","maze","ace","craze","lace","haze"]
--
values :: FuzzySet -> [Text]
-- | Return the number of entries in the set.
--
--
-- >>> size (defaultSet >+< "map" >+< "cap")
-- 2
--
-- >>> size (defaultSet >+< "bork" >+< "bork" >+< "bork")
-- 1
--
size :: FuzzySet -> Int
-- | Return a boolean indicating whether the set is empty.
--
--
-- >>> isEmpty (fromList [])
-- True
--
-- >>> isEmpty $ fromList ["Aramis", "Porthos", "Athos"]
-- False
--
isEmpty :: FuzzySet -> Bool
module Data.FuzzySet.Monad
-- | Add a string to the set. A boolean is returned which is True
-- if the string was inserted, or False if it already existed in
-- the set.
add :: MonadFuzzySearch m => Text -> m Bool
-- | Add a string to the set, or do nothing if a key that matches the
-- string already exists.
--
-- This function is identical to add, except that the latter
-- returns a boolean to indicate whether any new value was added.
add_ :: MonadFuzzySearch m => Text -> m ()
findMin :: MonadFuzzySearch m => Double -> Text -> m [FuzzyMatch]
-- | Return the elements of the set. No particular order is guaranteed.
values :: MonadFuzzySearch m => m [Text]
-- | Add a list of strings to the set, all at once.
--
-- Unless you need to know the subset of values that were actually
-- inserted, use addMany_ instead.
addMany :: MonadFuzzySearch m => [Text] -> m [Text]
-- | Add a list of strings to the set, all at once.
--
-- This function is identical to addMany, except that the latter
-- returns a list of all values that were inserted.
addMany_ :: MonadFuzzySearch m => [Text] -> m ()
-- | Try to match the given string against the entries in the set, using a
-- minimum score of 0.33. Return a list of results ordered by similarity
-- score, with the closest match first. Use findMin if you need to
-- specify a custom threshold value.
find :: MonadFuzzySearch m => Text -> m [FuzzyMatch]
-- | Try to match the given string against the entries in the set, and
-- return the closest match, if one is found. A minimum score of 0.33 is
-- used. To specify a custom threshold value, instead use
-- findOneMin.
findOne :: MonadFuzzySearch m => Text -> m (Maybe FuzzyMatch)
-- | Try to match the given string against the entries in the set using the
-- specified minimum score and return the closest match, if one is found.
findOneMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe FuzzyMatch)
-- | Try to match the given string against the entries in the set, and
-- return the string that most closely matches the input, if a match is
-- found. A minimum score of 0.33 is used. To specify a custom threshold
-- value, instead use closestMatchMin.
closestMatch :: MonadFuzzySearch m => Text -> m (Maybe Text)
-- | Try to match the given string against the entries in the set using the
-- specified minimum score and return the string that most closely
-- matches the input, if a match is found.
closestMatchMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe Text)
-- | Return the number of entries in the set.
size :: MonadFuzzySearch m => m Int
-- | Return a boolean indicating whether the set is empty.
isEmpty :: MonadFuzzySearch m => m Bool
-- | FuzzySearch monad transformer
newtype FuzzySearchT m a
FuzzySearchT :: StateT FuzzySet m a -> FuzzySearchT m a
[getFuzzySearchT] :: FuzzySearchT m a -> StateT FuzzySet m a
-- | FuzzySearch monad
type FuzzySearch = FuzzySearchT Identity
-- | Evaluate a FuzzySearchT computation with the given options.
runFuzzySearchT :: Monad m => FuzzySearchT m a -> Int -> Int -> Bool -> m a
-- | Evaluate a FuzzySearchT computation with the following
-- defaults:
--
--
-- - Gram size lower: 2
-- - Gram size upper: 3
-- - Use Levenshtein distance: True
--
runDefaultFuzzySearchT :: Monad m => FuzzySearchT m a -> m a
-- | Evaluate a FuzzySearch computation with the given options.
runFuzzySearch :: FuzzySearch a -> Int -> Int -> Bool -> a
-- | Evaluate a FuzzySearch computation with the following defaults:
--
--
-- - Gram size lower: 2
-- - Gram size upper: 3
-- - Use Levenshtein distance: True
--
runDefaultFuzzySearch :: FuzzySearch a -> a
class (MonadState FuzzySet m) => MonadFuzzySearch m
instance Control.Monad.Fix.MonadFix m => Control.Monad.Fix.MonadFix (Data.FuzzySet.Monad.FuzzySearchT m)
instance Control.Monad.IO.Class.MonadIO m => Control.Monad.IO.Class.MonadIO (Data.FuzzySet.Monad.FuzzySearchT m)
instance GHC.Base.Monad m => Control.Monad.State.Class.MonadState Data.FuzzySet.Internal.FuzzySet (Data.FuzzySet.Monad.FuzzySearchT m)
instance GHC.Base.Monad m => GHC.Base.Monad (Data.FuzzySet.Monad.FuzzySearchT m)
instance GHC.Base.Monad m => GHC.Base.Applicative (Data.FuzzySet.Monad.FuzzySearchT m)
instance GHC.Base.Functor m => GHC.Base.Functor (Data.FuzzySet.Monad.FuzzySearchT m)
instance GHC.Base.Monad m => Data.FuzzySet.Monad.MonadFuzzySearch (Data.FuzzySet.Monad.FuzzySearchT m)
instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.State.Lazy.StateT Data.FuzzySet.Internal.FuzzySet m)
instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Except.ExceptT e m)
instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Reader.ReaderT r m)
instance (Data.FuzzySet.Monad.MonadFuzzySearch m, GHC.Base.Monoid w) => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Writer.Lazy.WriterT w m)
instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Maybe.MaybeT m)
instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Cont.ContT r m)
instance (Data.FuzzySet.Monad.MonadFuzzySearch m, Control.Monad.State.Class.MonadState Data.FuzzySet.Internal.FuzzySet (Control.Monad.Trans.Select.SelectT s m)) => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Select.SelectT s m)
instance Control.Monad.Trans.Class.MonadTrans Data.FuzzySet.Monad.FuzzySearchT
module Data.FuzzySet
-- | FuzzySearch monad
type FuzzySearch = FuzzySearchT Identity
class (MonadState FuzzySet m) => MonadFuzzySearch m
-- | Evaluate a FuzzySearch computation with the given options.
runFuzzySearch :: FuzzySearch a -> Int -> Int -> Bool -> a
-- | Evaluate a FuzzySearch computation with the following defaults:
--
--
-- - Gram size lower: 2
-- - Gram size upper: 3
-- - Use Levenshtein distance: True
--
runDefaultFuzzySearch :: FuzzySearch a -> a
-- | FuzzySearch monad transformer
data FuzzySearchT m a
-- | Evaluate a FuzzySearchT computation with the given options.
runFuzzySearchT :: Monad m => FuzzySearchT m a -> Int -> Int -> Bool -> m a
-- | Evaluate a FuzzySearchT computation with the following
-- defaults:
--
--
-- - Gram size lower: 2
-- - Gram size upper: 3
-- - Use Levenshtein distance: True
--
runDefaultFuzzySearchT :: Monad m => FuzzySearchT m a -> m a
-- | Add a string to the set. A boolean is returned which is True
-- if the string was inserted, or False if it already existed in
-- the set.
add :: MonadFuzzySearch m => Text -> m Bool
-- | Add a string to the set, or do nothing if a key that matches the
-- string already exists.
--
-- This function is identical to add, except that the latter
-- returns a boolean to indicate whether any new value was added.
add_ :: MonadFuzzySearch m => Text -> m ()
-- | Add a list of strings to the set, all at once.
--
-- Unless you need to know the subset of values that were actually
-- inserted, use addMany_ instead.
addMany :: MonadFuzzySearch m => [Text] -> m [Text]
-- | Add a list of strings to the set, all at once.
--
-- This function is identical to addMany, except that the latter
-- returns a list of all values that were inserted.
addMany_ :: MonadFuzzySearch m => [Text] -> m ()
-- | Try to match the given string against the entries in the set, using a
-- minimum score of 0.33. Return a list of results ordered by similarity
-- score, with the closest match first. Use findMin if you need to
-- specify a custom threshold value.
find :: MonadFuzzySearch m => Text -> m [FuzzyMatch]
findMin :: MonadFuzzySearch m => Double -> Text -> m [FuzzyMatch]
-- | Try to match the given string against the entries in the set, and
-- return the closest match, if one is found. A minimum score of 0.33 is
-- used. To specify a custom threshold value, instead use
-- findOneMin.
findOne :: MonadFuzzySearch m => Text -> m (Maybe FuzzyMatch)
-- | Try to match the given string against the entries in the set using the
-- specified minimum score and return the closest match, if one is found.
findOneMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe FuzzyMatch)
-- | Try to match the given string against the entries in the set using the
-- specified minimum score and return the string that most closely
-- matches the input, if a match is found.
closestMatchMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe Text)
-- | Try to match the given string against the entries in the set, and
-- return the string that most closely matches the input, if a match is
-- found. A minimum score of 0.33 is used. To specify a custom threshold
-- value, instead use closestMatchMin.
closestMatch :: MonadFuzzySearch m => Text -> m (Maybe Text)
-- | Return the elements of the set. No particular order is guaranteed.
values :: MonadFuzzySearch m => m [Text]
-- | Return the number of entries in the set.
size :: MonadFuzzySearch m => m Int
-- | Return a boolean indicating whether the set is empty.
isEmpty :: MonadFuzzySearch m => m Bool