{-# LANGUAGE CPP, UnicodeSyntax #-}
module Data.FuzzySet.Util
( distance
, enclosedIn
, normalized
, norm
, substr
, ε
, (<$$>)
#if !MIN_VERSION_base_unicode_symbols(0,2,3)
, (−)
#endif
, (×)
) where
import Data.Char ( isAlphaNum, isSpace )
import Data.HashMap.Strict ( HashMap, empty )
import Data.Text ( Text, cons, snoc )
import Data.Text.Metrics
import Prelude.Unicode
import qualified Data.Text as Text
normalized ∷ Text → Text
normalized = Text.filter word ∘ Text.toLower
where
word ch
| isAlphaNum ch = True
| isSpace ch = True
| (≡) ',' ch = True
| otherwise = False
substr ∷ Int
→ Int
→ Text
→ Text
{-# INLINE substr #-}
substr n m = Text.take n ∘ Text.drop m
enclosedIn ∷ Text → Char → Text
{-# INLINE enclosedIn #-}
enclosedIn str ch = ch `cons` str `snoc` ch
norm ∷ (Integral a, Floating b) ⇒ [a] → b
norm = sqrt ∘ fromIntegral ∘ sum ∘ fmap (^2)
distance ∷ Text → Text → Double
distance s t = fromRational (toRational d)
where
d = levenshteinNorm s t
(<$$>) ∷ (Functor f, Functor g) ⇒ (a → b) → g (f a) → g (f b)
(<$$>) = fmap ∘ fmap
{-# INLINE (<$$>) #-}
ε ∷ HashMap k v
ε = empty
{-# INLINE ε #-}
#if !MIN_VERSION_base_unicode_symbols(0,2,3)
(−) ∷ Num α ⇒ α → α → α
(−) = (-)
{-# INLINE (−) #-}
#endif
(×) ∷ Num α ⇒ α → α → α
(×) = (*)
{-# INLINE (×) #-}