-- Copyright © 2010 Greg Weber and Bart Massey -- [This program is licensed under the "3-clause ('new') BSD License"] -- Please see the file COPYING in this distribution for license information. -- | Implementation-level interface for spelling suggestion. module Text.SpellingSuggest.LowLevel ( SpellingWordFilter, SpellingWordCoder, nearbyWordFilter, anyWordFilter, editDistance, soundex, phonix, trivialPhoneticCode, tryWord ) where import Data.List import Data.Ord import Text.EditDistance import Text.PhoneticCode.Phonix import Text.PhoneticCode.Soundex -- | Return 'True' if the 'editDistance' from the target word to the -- given word is small enough. nearbyWordFilter :: String -> String -> Bool nearbyWordFilter target = (<= 10) . editDistance target -- | Always returns 'True'. anyWordFilter :: String -> String -> Bool anyWordFilter = const (const True) -- | The weighted edit distance between a pair of strings, -- with weights for insertion, deletion, transposition and -- substitution chose to try to mimic spelling errors. editDistance :: String -> String -> Int editDistance s t = restrictedDamerauLevenshteinDistance ec s t where ec = EditCosts { insertionCosts = ConstantCost 2, deletionCosts = ConstantCost 2, transpositionCosts = ConstantCost 1, substitutionCosts = ConstantCost 3 } -- | Map any given word to a constant "phonetic code". -- In other words, suppress phonetic coding. trivialPhoneticCode :: String -> String trivialPhoneticCode = const "" type SpellingWordFilter = String -> String -> Bool type SpellingWordCoder = String -> String -- | Core algorithm for spelling suggestion. Takes a -- prefiltering function, a phonetic coding function, a -- limit on the number of choices returned, a target word, -- and a list of candidate words. Returns an ordered list of -- suggested candidates. tryWord :: SpellingWordFilter -> SpellingWordCoder -> String -> [String] -> [String] tryWord prefilter pcode word = sortBy (comparing (editDistance word)) . map snd . filter ((== pcode word) . fst) . map sfs . filter (prefilter word) where sfs w = (pcode w, w)