{-# LANGUAGE PatternGuards #-}
-- | Computing the edit distances between strings
module Text.EditDistance (
Costs(..), EditCosts(..), defaultEditCosts,
levenshteinDistance, restrictedDamerauLevenshteinDistance
) where
import Text.EditDistance.EditCosts
import qualified Text.EditDistance.Bits as Bits
import qualified Text.EditDistance.SquareSTUArray as SquareSTUArray
-- | Find the Levenshtein edit distance between two strings. That is to say, the number of deletion,
-- insertion and substitution operations that are required to make the two strings equal. Note that
-- this algorithm therefore does not make use of the 'transpositionCost' field of the costs. See also:
-- .
levenshteinDistance :: EditCosts -> String -> String -> Int
levenshteinDistance costs str1 str2
| isDefaultEditCosts costs
, (str1_len <= 64) == (str2_len <= 64) -- The Integer implementation of the Bits algorithm is quite inefficient, but scales better than the
= Bits.levenshteinDistanceWithLengths str1_len str2_len str1 str2 -- STUArrays if both string lengths > 64. The Word64 implementation is always better, if it is applicable
| otherwise
= SquareSTUArray.levenshteinDistanceWithLengths costs str1_len str2_len str1 str2 -- SquareSTUArray usually beat making more use of the heap with STUArray
where
str1_len = length str1
str2_len = length str2
-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. This algorithm calculates the cost of
-- the so-called optimal string alignment, which does not always equal the appropriate edit distance. The cost of the optimal
-- string alignment is the number of edit operations needed to make the input strings equal under the condition that no substring
-- is edited more than once. See also: .
restrictedDamerauLevenshteinDistance :: EditCosts -> String -> String -> Int
restrictedDamerauLevenshteinDistance costs str1 str2
| isDefaultEditCosts costs
, (str1_len <= 64) == (str2_len <= 64) -- The Integer implementation of the Bits algorithm is quite inefficient, but scales better than the
= Bits.restrictedDamerauLevenshteinDistanceWithLengths str1_len str2_len str1 str2 -- STUArrays if both string lengths > 64. The Word64 implementation is always better, if it is applicable
| otherwise
= SquareSTUArray.restrictedDamerauLevenshteinDistanceWithLengths costs str1_len str2_len str1 str2 -- SquareSTUArray usually beat making more use of the heap with STUArray
where
str1_len = length str1
str2_len = length str2