-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Fuzzy set data structure for approximate string matching -- -- Please see the README on GitHub at -- https://github.com/laserpants/fuzzyset-haskell#readme @package fuzzyset @version 0.3.2 module Data.FuzzySet.Utils (<$$>) :: (Functor f, Functor g) => (a -> b) -> f (g a) -> f (g b) infixr 8 <$$> (<$$$>) :: (Functor f, Functor g, Functor h) => (a -> b) -> f (g (h a)) -> f (g (h b)) infixr 8 <$$$> safeHead :: [a] -> Maybe a enclosedIn :: Text -> Char -> Text substr :: Int -> Int -> Text -> Text module Data.FuzzySet.Internal -- | Main fuzzy string set data type. data FuzzySet FuzzySet :: !HashMap Text Text -> !HashMap Text [GramInfo] -> !HashMap Int (Vector FuzzySetItem) -> !Int -> !Int -> !Bool -> FuzzySet [exactSet] :: FuzzySet -> !HashMap Text Text [matchDict] :: FuzzySet -> !HashMap Text [GramInfo] [items] :: FuzzySet -> !HashMap Int (Vector FuzzySetItem) -- | Lower bound on gram sizes to use (inclusive) [gramSizeLower] :: FuzzySet -> !Int -- | Upper bound on gram sizes to use (inclusive) [gramSizeUpper] :: FuzzySet -> !Int -- | Whether or not to use the Levenshtein distance to determine the score [useLevenshtein] :: FuzzySet -> !Bool data FuzzySetItem FuzzySetItem :: !Double -> !Text -> FuzzySetItem [vectorMagnitude] :: FuzzySetItem -> !Double [normalizedEntry] :: FuzzySetItem -> !Text data GramInfo GramInfo :: !Int -> !Int -> GramInfo [itemIndex] :: GramInfo -> !Int [gramCount] :: GramInfo -> !Int -- | An individual result when looking up a string in the set, consisting -- of -- -- type FuzzyMatch = (Double, Text) -- | Break apart the input string into a list of n-grams. The string -- is first normalized and enclosed in hyphens. We then take all -- substrings of length n, letting the offset range from -- <math>, where s is the length of the normalized input. -- -- Example: The string "Destroido Corp." is first -- normalized to "destroido corp", and then enclosed in hyphens, -- so that it becomes "-destroido corp-". The trigrams generated -- from this normalized string are: -- --
--   [ "-de"
--   , "des"
--   , "est"
--   , "str"
--   , "tro"
--   , "roi"
--   , "oid"
--   , "ido"
--   , "do "
--   , "o c"
--   , " co"
--   , "cor"
--   , "orp"
--   , "rp-"
--   ]
--   
grams :: Text -> Int -> [Text] -- | Generate a list of n-grams (character substrings) from the -- normalized input and then translate this into a dictionary with the -- n-grams as keys mapping to the number of occurences of the -- substring in the list. -- --
--   >>> gramVector "xxxx" 2
--   fromList [("-x",1), ("xx",3), ("x-",1)]
--   
-- -- The substring "xx" appears three times in the normalized -- string: -- --
--   >>> grams "xxxx" 2
--   ["-x","xx","xx","xx","x-"]
--   
-- --
--   >>> Data.HashMap.Strict.lookup "nts" (gramVector "intrent'srestaurantsomeoftrent'saunt'santswantsamtorentsomepants" 3)
--   Just 8
--   
gramVector :: Text -> Int -> HashMap Text Int matches :: FuzzySet -> HashMap Text Int -> HashMap Int Int getMatches :: FuzzySet -> Text -> Double -> Int -> [FuzzyMatch] add_ :: MonadState FuzzySet m => Text -> m Bool addMany_ :: MonadState FuzzySet m => [Text] -> m [Text] -- | Normalize the input by -- -- normalized :: Text -> Text -- | Return the euclidean norm, or magnitude, of the input list -- interpreted as a vector. -- -- That is, -- -- <math> -- -- for the input -- -- <math> -- -- where <math> is the element at position i in the input -- list. norm :: [Int] -> Double -- | Return the normalized Levenshtein distance between the two strings. -- -- See https://en.wikipedia.org/wiki/Levenshtein_distance. distance :: Text -> Text -> Double instance GHC.Show.Show Data.FuzzySet.Internal.FuzzySetItem instance GHC.Classes.Eq Data.FuzzySet.Internal.FuzzySetItem instance GHC.Show.Show Data.FuzzySet.Internal.GramInfo instance GHC.Classes.Eq Data.FuzzySet.Internal.GramInfo instance GHC.Show.Show Data.FuzzySet.Internal.FuzzySet instance GHC.Classes.Eq Data.FuzzySet.Internal.FuzzySet module Data.FuzzySet.Simple -- | Main fuzzy string set data type. data FuzzySet -- | An individual result when looking up a string in the set, consisting -- of -- -- type FuzzyMatch = (Double, Text) -- | Initialize an empty FuzzySet. emptySet :: Int -> Int -> Bool -> FuzzySet -- | An empty FuzzySet with the following defaults: -- -- defaultSet :: FuzzySet -- | Create a new set from a list of entries, using the default settings. fromList :: [Text] -> FuzzySet -- | Add a string to the set, unless it is already present. A pair is -- returned consisting of a boolean which denotes whether or not anything -- was inserted, and the updated set. addToSet :: Text -> FuzzySet -> (Bool, FuzzySet) -- | Add a string to the set, or do nothing if a key that matches the -- string already exists. add :: Text -> FuzzySet -> FuzzySet -- | Add a list of strings to the set, all at once. -- -- Unless you need to know the subset of values that were actually -- inserted, use addMany instead. addManyToSet :: [Text] -> FuzzySet -> ([Text], FuzzySet) -- | Add a list of strings to the set, all at once. -- -- This function is identical to addManyToSet, except that it only -- returns the set itself. If you need to know what values were inserted, -- then use the latter instead. addMany :: [Text] -> FuzzySet -> FuzzySet -- | Infix operator to add entries to a FuzzySet, defined as -- flip add. (>+<) :: FuzzySet -> Text -> FuzzySet infixl 4 >+< -- | Try to match a string against the entries in the set, and return a -- list of all results with a score greater than or equal to the -- specified minimum score (i.e., the first argument). The results are -- ordered by similarity, with the closest match first. findMin :: Double -> Text -> FuzzySet -> [FuzzyMatch] -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the closest match, if one is found. findOneMin :: Double -> Text -> FuzzySet -> Maybe FuzzyMatch -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the string that most closely -- matches the input, if a match is found. closestMatchMin :: Double -> Text -> FuzzySet -> Maybe Text -- | Try to match the given string against the entries in the set, using a -- minimum score of 0.33. Return a list of results ordered by similarity -- score, with the closest match first. Use findMin if you need to -- specify a custom threshold value. find :: Text -> FuzzySet -> [FuzzyMatch] -- | Try to match the given string against the entries in the set, and -- return the closest match, if one is found. A minimum score of 0.33 is -- used. To specify a custom threshold value, instead use -- findOneMin. findOne :: Text -> FuzzySet -> Maybe FuzzyMatch -- | Try to match the given string against the entries in the set, and -- return the string that most closely matches the input, if a match is -- found. A minimum score of 0.33 is used. To specify a custom threshold -- value, instead use closestMatchMin. closestMatch :: Text -> FuzzySet -> Maybe Text -- | Return the elements of the set. No particular order is guaranteed. -- --
--   >>> values (fromList ["bass", "craze", "space", "lace", "daze", "haze", "ace", "maze"])
--   ["space","daze","bass","maze","ace","craze","lace","haze"]
--   
values :: FuzzySet -> [Text] -- | Return the number of entries in the set. -- --
--   >>> size (defaultSet >+< "map" >+< "cap")
--   2
--   
--   >>> size (defaultSet >+< "bork" >+< "bork" >+< "bork")
--   1
--   
size :: FuzzySet -> Int -- | Return a boolean indicating whether the set is empty. -- --
--   >>> isEmpty (fromList [])
--   True
--   
--   >>> isEmpty $ fromList ["Aramis", "Porthos", "Athos"]
--   False
--   
isEmpty :: FuzzySet -> Bool module Data.FuzzySet.Monad -- | Add a string to the set. A boolean is returned which is True -- if the string was inserted, or False if it already existed in -- the set. add :: MonadFuzzySearch m => Text -> m Bool -- | Add a string to the set, or do nothing if a key that matches the -- string already exists. -- -- This function is identical to add, except that the latter -- returns a boolean to indicate whether any new value was added. add_ :: MonadFuzzySearch m => Text -> m () findMin :: MonadFuzzySearch m => Double -> Text -> m [FuzzyMatch] -- | Return the elements of the set. No particular order is guaranteed. values :: MonadFuzzySearch m => m [Text] -- | Add a list of strings to the set, all at once. -- -- Unless you need to know the subset of values that were actually -- inserted, use addMany_ instead. addMany :: MonadFuzzySearch m => [Text] -> m [Text] -- | Add a list of strings to the set, all at once. -- -- This function is identical to addMany, except that the latter -- returns a list of all values that were inserted. addMany_ :: MonadFuzzySearch m => [Text] -> m () -- | Try to match the given string against the entries in the set, using a -- minimum score of 0.33. Return a list of results ordered by similarity -- score, with the closest match first. Use findMin if you need to -- specify a custom threshold value. find :: MonadFuzzySearch m => Text -> m [FuzzyMatch] -- | Try to match the given string against the entries in the set, and -- return the closest match, if one is found. A minimum score of 0.33 is -- used. To specify a custom threshold value, instead use -- findOneMin. findOne :: MonadFuzzySearch m => Text -> m (Maybe FuzzyMatch) -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the closest match, if one is found. findOneMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe FuzzyMatch) -- | Try to match the given string against the entries in the set, and -- return the string that most closely matches the input, if a match is -- found. A minimum score of 0.33 is used. To specify a custom threshold -- value, instead use closestMatchMin. closestMatch :: MonadFuzzySearch m => Text -> m (Maybe Text) -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the string that most closely -- matches the input, if a match is found. closestMatchMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe Text) -- | Return the number of entries in the set. size :: MonadFuzzySearch m => m Int -- | Return a boolean indicating whether the set is empty. isEmpty :: MonadFuzzySearch m => m Bool -- | FuzzySearch monad transformer newtype FuzzySearchT m a FuzzySearchT :: StateT FuzzySet m a -> FuzzySearchT m a [getFuzzySearchT] :: FuzzySearchT m a -> StateT FuzzySet m a -- | FuzzySearch monad type FuzzySearch = FuzzySearchT Identity -- | Evaluate a FuzzySearchT computation with the given options. runFuzzySearchT :: Monad m => FuzzySearchT m a -> Int -> Int -> Bool -> m a -- | Evaluate a FuzzySearchT computation with the following -- defaults: -- -- runDefaultFuzzySearchT :: Monad m => FuzzySearchT m a -> m a -- | Evaluate a FuzzySearch computation with the given options. runFuzzySearch :: FuzzySearch a -> Int -> Int -> Bool -> a -- | Evaluate a FuzzySearch computation with the following defaults: -- -- runDefaultFuzzySearch :: FuzzySearch a -> a class (MonadState FuzzySet m) => MonadFuzzySearch m instance Control.Monad.Fix.MonadFix m => Control.Monad.Fix.MonadFix (Data.FuzzySet.Monad.FuzzySearchT m) instance Control.Monad.IO.Class.MonadIO m => Control.Monad.IO.Class.MonadIO (Data.FuzzySet.Monad.FuzzySearchT m) instance GHC.Base.Monad m => Control.Monad.State.Class.MonadState Data.FuzzySet.Internal.FuzzySet (Data.FuzzySet.Monad.FuzzySearchT m) instance GHC.Base.Monad m => GHC.Base.Monad (Data.FuzzySet.Monad.FuzzySearchT m) instance GHC.Base.Monad m => GHC.Base.Applicative (Data.FuzzySet.Monad.FuzzySearchT m) instance GHC.Base.Functor m => GHC.Base.Functor (Data.FuzzySet.Monad.FuzzySearchT m) instance GHC.Base.Monad m => Data.FuzzySet.Monad.MonadFuzzySearch (Data.FuzzySet.Monad.FuzzySearchT m) instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.State.Lazy.StateT Data.FuzzySet.Internal.FuzzySet m) instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Except.ExceptT e m) instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Reader.ReaderT r m) instance (Data.FuzzySet.Monad.MonadFuzzySearch m, GHC.Base.Monoid w) => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Writer.Lazy.WriterT w m) instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Maybe.MaybeT m) instance Data.FuzzySet.Monad.MonadFuzzySearch m => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Cont.ContT r m) instance (Data.FuzzySet.Monad.MonadFuzzySearch m, Control.Monad.State.Class.MonadState Data.FuzzySet.Internal.FuzzySet (Control.Monad.Trans.Select.SelectT s m)) => Data.FuzzySet.Monad.MonadFuzzySearch (Control.Monad.Trans.Select.SelectT s m) instance Control.Monad.Trans.Class.MonadTrans Data.FuzzySet.Monad.FuzzySearchT module Data.FuzzySet -- | FuzzySearch monad type FuzzySearch = FuzzySearchT Identity class (MonadState FuzzySet m) => MonadFuzzySearch m -- | Evaluate a FuzzySearch computation with the given options. runFuzzySearch :: FuzzySearch a -> Int -> Int -> Bool -> a -- | Evaluate a FuzzySearch computation with the following defaults: -- -- runDefaultFuzzySearch :: FuzzySearch a -> a -- | FuzzySearch monad transformer data FuzzySearchT m a -- | Evaluate a FuzzySearchT computation with the given options. runFuzzySearchT :: Monad m => FuzzySearchT m a -> Int -> Int -> Bool -> m a -- | Evaluate a FuzzySearchT computation with the following -- defaults: -- -- runDefaultFuzzySearchT :: Monad m => FuzzySearchT m a -> m a -- | Add a string to the set. A boolean is returned which is True -- if the string was inserted, or False if it already existed in -- the set. add :: MonadFuzzySearch m => Text -> m Bool -- | Add a string to the set, or do nothing if a key that matches the -- string already exists. -- -- This function is identical to add, except that the latter -- returns a boolean to indicate whether any new value was added. add_ :: MonadFuzzySearch m => Text -> m () -- | Add a list of strings to the set, all at once. -- -- Unless you need to know the subset of values that were actually -- inserted, use addMany_ instead. addMany :: MonadFuzzySearch m => [Text] -> m [Text] -- | Add a list of strings to the set, all at once. -- -- This function is identical to addMany, except that the latter -- returns a list of all values that were inserted. addMany_ :: MonadFuzzySearch m => [Text] -> m () -- | Try to match the given string against the entries in the set, using a -- minimum score of 0.33. Return a list of results ordered by similarity -- score, with the closest match first. Use findMin if you need to -- specify a custom threshold value. find :: MonadFuzzySearch m => Text -> m [FuzzyMatch] findMin :: MonadFuzzySearch m => Double -> Text -> m [FuzzyMatch] -- | Try to match the given string against the entries in the set, and -- return the closest match, if one is found. A minimum score of 0.33 is -- used. To specify a custom threshold value, instead use -- findOneMin. findOne :: MonadFuzzySearch m => Text -> m (Maybe FuzzyMatch) -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the closest match, if one is found. findOneMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe FuzzyMatch) -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the string that most closely -- matches the input, if a match is found. closestMatchMin :: MonadFuzzySearch m => Double -> Text -> m (Maybe Text) -- | Try to match the given string against the entries in the set, and -- return the string that most closely matches the input, if a match is -- found. A minimum score of 0.33 is used. To specify a custom threshold -- value, instead use closestMatchMin. closestMatch :: MonadFuzzySearch m => Text -> m (Maybe Text) -- | Return the elements of the set. No particular order is guaranteed. values :: MonadFuzzySearch m => m [Text] -- | Return the number of entries in the set. size :: MonadFuzzySearch m => m Int -- | Return a boolean indicating whether the set is empty. isEmpty :: MonadFuzzySearch m => m Bool