module Synt.Similar (sim, ngramRange, ngram, ngrams) where
import Synt.Parser
import Data.List
import Data.List.Split
import Data.Ix
import Data.Maybe
import Text.RegexPR
import Synt.Similar.Jaccard
import Synt.Similar.Tanimoto
int :: String -> Int
int x = read x :: Int
tupint :: [String] -> (Int, Int)
tupint [x, y] = (int x, int y)
join :: [[String]] -> [String]
join = map (foldr (++) "")
ngramRange :: String -> [a] -> (Int, Int)
ngramRange n l
| null n || n == "" = (1, 1)
| n == "all" = (1, length l)
| isJust (matchRegexPR "\\.\\." n) = tupint $ splitOn ".." n
| not (null n) = (int n, int n)
ngram :: Int -> [String] -> [[String]]
ngram _ [_] = []
ngram 1 list = map (: []) list
ngram n list = take n list : if length list 1 >= n
then ngram n (tail list)
else []
ngrams :: (Int, Int) -> [String] -> [String]
ngrams r list = concatMap (\x -> join $ ngram x list) (range r)
sim' :: [String] -> [String] -> String -> String -> Float
sim' cmp to algo n = do
let r = ngramRange n cmp
let a = ngrams r cmp
let b = ngrams r to
if algo == "tanimoto"
then tanimoto a b
else jaccard a b
sim :: String -> String -> String -> String -> Float
sim from to = sim' (tokenize from) (tokenize to)