-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Fuzzy set for approximate string matching -- -- This library is based on the Python and JavaScript libraries with -- similar names. @package fuzzyset @version 0.2.2 module Data.FuzzySet.Types data FuzzySetItem FuzzySetItem :: !Double -> !Text -> FuzzySetItem [vectorMagnitude] :: FuzzySetItem -> !Double [normalizedEntry] :: FuzzySetItem -> !Text data GramInfo GramInfo :: !Int -> !Int -> GramInfo [itemIndex] :: GramInfo -> !Int [gramCount] :: GramInfo -> !Int -- | Main fuzzy string set data type. Use emptySet, -- defaultSet, or fromList to create sets. data FuzzySet FuzzySet :: !HashMap Text Text -> !HashMap Text [GramInfo] -> !HashMap Int (Vector FuzzySetItem) -> !Int -> !Int -> !Bool -> FuzzySet [exactSet] :: FuzzySet -> !HashMap Text Text [matchDict] :: FuzzySet -> !HashMap Text [GramInfo] [items] :: FuzzySet -> !HashMap Int (Vector FuzzySetItem) [gramSizeLower] :: FuzzySet -> !Int [gramSizeUpper] :: FuzzySet -> !Int [useLevenshtein] :: FuzzySet -> !Bool instance GHC.Show.Show Data.FuzzySet.Types.FuzzySetItem instance GHC.Classes.Eq Data.FuzzySet.Types.FuzzySetItem instance GHC.Show.Show Data.FuzzySet.Types.GramInfo instance GHC.Classes.Eq Data.FuzzySet.Types.GramInfo instance GHC.Show.Show Data.FuzzySet.Types.FuzzySet instance GHC.Classes.Eq Data.FuzzySet.Types.FuzzySet module Data.FuzzySet.Util -- | Normalize the input by -- -- normalized :: Text -> Text -- | Return n characters starting from offset m in the input -- string. substr :: Int -> Int -> Text -> Text -- | Insert a character at the beginning and end of the given string. enclosedIn :: Text -> Char -> Text -- | Returns the euclidean norm, or magnitude, of the input list -- interpreted as a vector. -- -- That is, -- -- <math> -- -- for the input -- -- <math> -- -- where <math> is the element at position i in the input -- list. norm :: (Integral a, Floating b) => [a] -> b -- | Return the normalized Levenshtein distance between the two strings. -- See https://en.wikipedia.org/wiki/Levenshtein_distance. distance :: Text -> Text -> Double module Data.FuzzySet.Internal -- | Alternative syntax for the reverse function application operator -- (&), known also as the pipe operator. (|>) :: a -> (a -> b) -> b infixl 1 |> -- | Dot products used to compute the cosine similarity, which is the -- similarity score assigned to entries that match the search string in -- the fuzzy set. matches :: FuzzySet -> HashMap Text Int -> HashMap Int Int -- | This function performs the actual task of querying a set for matches, -- supported by the other functions in this module. See -- Implementation for an explanation. getMatches :: FuzzySet -> Text -> Double -> Int -> [(Double, Text)] -- | Generate a list of n-grams (character substrings) from the -- normalized input and then translate this into a dictionary with the -- n-grams as keys mapping to the number of occurences of the -- substring in the list. -- --
--   >>> gramVector "xxxx" 2
--   fromList [("-x",1), ("xx",3), ("x-",1)]
--   
-- -- The substring "xx" appears three times in the normalized -- string: -- --
--   >>> grams "xxxx" 2
--   ["-x","xx","xx","xx","x-"]
--   
-- --
--   >>> Data.HashMap.Strict.lookup "nts" (gramVector "intrent'srestaurantsomeoftrent'saunt'santswantsamtorentsomepants" 3)
--   Just 8
--   
gramVector :: Text -> Int -> HashMap Text Int -- | Break apart the input string into a list of n-grams. The string -- is first normalized and enclosed in hyphens. We then take all -- substrings of length n, letting the offset range from -- <math>, where s is the length of the normalized input. -- -- Example: The string "Destroido Corp." is first -- normalized to "destroido corp", and then enclosed in hyphens, -- so that it becomes "-destroido corp-". The trigrams generated -- from this normalized string are: -- --
--   [ "-de"
--   , "des"
--   , "est"
--   , "str"
--   , "tro"
--   , "roi"
--   , "oid"
--   , "ido"
--   , "do "
--   , "o c"
--   , " co"
--   , "cor"
--   , "orp"
--   , "rp-"
--   ]
--   
grams :: Text -> Int -> [Text] -- | A fuzzy string set data structure for approximate string matching. -- This implementation is based on the Python and JavaScript libraries -- with similar names; fuzzyset.js, and the original -- fuzzyset Python library. module Data.FuzzySet -- | Main fuzzy string set data type. Use emptySet, -- defaultSet, or fromList to create sets. data FuzzySet -- | Initialize an empty FuzzySet. emptySet :: Int -> Int -> Bool -> FuzzySet -- | An empty FuzzySet with the following defaults: -- -- defaultSet :: FuzzySet -- | Create a set from a list of entries, using the default settings. -- --
--   fromList = addMany defaultSet
--   
fromList :: [Text] -> FuzzySet -- | Add an entry to the set, or do nothing if a key that matches the -- string already exists in the set. add :: FuzzySet -> Text -> FuzzySet -- | Add an entry, unless it is already present in the set. A pair is -- returned with the new set and a boolean which denotes whether or not -- anything was inserted. addToSet :: FuzzySet -> Text -> (FuzzySet, Bool) -- | Add a list of entries to the set, in one go. -- --
--   addMany = foldr (flip add)
--   
addMany :: FuzzySet -> [Text] -> FuzzySet -- | Try to match the given string against the entries in the set, using a -- minimum score of 0.33. Return a list of results ordered by similarity -- score, with the closest match first. Use getWithMinScore to -- specify a different threshold value. get :: FuzzySet -> Text -> [(Double, Text)] -- | Try to match a string against the entries in the set, and return a -- list of all results with a score greater than or equal to the -- specified minimum score (i.e., the first argument). The results are -- ordered by similarity score, with the closest match first. getWithMinScore :: Double -> FuzzySet -> Text -> [(Double, Text)] -- | Try to match the given string against the entries in the set, and -- return the closest match, if one is found. A minimum score of 0.33 is -- used. To specify a different threshold value, instead use -- getOneWithMinScore. getOne :: FuzzySet -> Text -> Maybe Text -- | Try to match the given string against the entries in the set using the -- specified minimum score and return the closest match, if one is found. getOneWithMinScore :: Double -> FuzzySet -> Text -> Maybe Text -- | Return the number of entries in the set. -- --
--   >>> size (defaultSet `add` "map" `add` "cap")
--   2
--   
--   >>> size (defaultSet `add` "bork" `add` "bork" `add` "bork")
--   1
--   
size :: FuzzySet -> Int -- | Return a boolean indicating whether the set is empty. -- --
--   >>> isEmpty (fromList [])
--   True
--   
--   >>> isEmpty $ fromList ["Aramis", "Porthos", "Athos"]
--   False
--   
isEmpty :: FuzzySet -> Bool -- | Return the elements of the set. No particular order is guaranteed. -- --
--   >>> values (fromList ["bass", "craze", "space", "lace", "daze", "haze", "ace", "maze"])
--   ["space","daze","bass","maze","ace","craze","lace","haze"]
--   
values :: FuzzySet -> [Text] instance Data.Default.Class.Default Data.FuzzySet.Types.FuzzySet