{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ScopedTypeVariables #-} -- | An implementation of 'Language.Distance.Search' based on a BK-tree: -- . It performs reasonably, and it -- scales decently as the query distance increases. Moreover the data -- structure can work on any instance of 'EditDistance', or in fact any metric -- space (although no interface for that purpose is defined): -- . -- -- However, for very short distances (less than 3), -- 'Language.Distance.Search.TST' is faster. module Language.Distance.Search.BK ( -- * Data type BKTree -- * Operations , empty , insert , query , levenshtein , damerauLevenshtein ) where import Data.IntMap (IntMap) import qualified Data.IntMap as IntMap import Data.ListLike (ListLike) import Language.Distance (EditDistance (..), Levenshtein, DamerauLevenshtein) import Language.Distance.Internal data BKTree full algo = EmptyBK | BKTree !full !(IntMap (BKTree full algo)) narrow :: Int -> Int -> IntMap a -> IntMap a narrow n m im | n == m = IntMap.fromList (maybe [] (\v -> [(n, v)]) (IntMap.lookup n im)) narrow n m im | otherwise = insMaybe m res pr where (_, pl, res0) = IntMap.splitLookup n im (res, pr, _) = IntMap.splitLookup m (insMaybe n res0 pl) insMaybe k im' = maybe im' (\v -> IntMap.insert k v im') empty :: BKTree full algo empty = EmptyBK insert :: forall full sym algo. (Eq sym, EditDistance sym algo, ListLike full sym) => full -> BKTree full algo -> BKTree full algo insert str EmptyBK = BKTree str IntMap.empty insert str bk@(BKTree str' bks) | dist == 0 = bk | otherwise = BKTree str' $ flip (IntMap.insert dist) bks $ maybe (insert str EmptyBK) (insert str) (IntMap.lookup dist bks) where dist = getDistance (distance str str' :: Distance algo) query :: forall full sym algo. (ListLike full sym, EditDistance sym algo) => Int -> full -> BKTree full algo -> [(full, Distance algo)] query _ _ EmptyBK = [] query maxd str (BKTree str' bks) = match ++ concatMap (query maxd str) children where dist = distance str str' :: Distance algo intDist = getDistance dist match = if (intDist <= maxd) then [(str', dist)] else [] children = IntMap.elems $ narrow (max (intDist - maxd) 0) (intDist + maxd) bks levenshtein :: (ListLike full sym, EditDistance sym Levenshtein) => Int -> full -> BKTree full Levenshtein -> [(full, Distance Levenshtein)] levenshtein = query damerauLevenshtein :: (ListLike full sym, EditDistance sym DamerauLevenshtein) => Int -> full -> BKTree full DamerauLevenshtein -> [(full, Distance DamerauLevenshtein)] damerauLevenshtein = query