{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}

{-|
Module      : FuzzyFind
Description : Provides fuzzy matching on text
Copyright   : Unison Computing, 2021
License     : MIT
Maintainer  : runar.bjarnason@unison.cloud
Stability   : experimental

A package that provides an API for fuzzy text search in Haskell, using a
modified version of the Smith-Waterman algorithm. The search is intended to
behave similarly to the excellent fzf tool by Junegunn Choi.
-}
module Text.FuzzyFind where

import Control.Monad (join)
import Data.Massiv.Array
  ( Array,
    (!),
    Ix2(..),
    (...),
    forM,
    forM_
  )
import qualified Data.Massiv.Array as A
import qualified Data.Massiv.Array.Unsafe as A
import qualified Data.Massiv.Array.Mutable as M
import Data.Char (isAlphaNum, isLower, isUpper, toLower)
import Data.Foldable (maximumBy, toList, foldl')
import Data.Function (on)
import Data.Maybe (fromMaybe)
import GHC.Generics (Generic)
import Data.Text (Text)
import qualified Data.Text as Text
import Control.Monad.ST (runST)
import Data.Sequence
  ( Seq (..),
    ViewL (..),
    ViewR (..),
    viewl,
    viewr,
    (<|)
  )
import qualified Data.Sequence as Seq


-- | @bestMatch query string@ will return 'Nothing' if @query@ is not a
-- subsequence of @string@. Otherwise, it will return the "best" way to line up
-- the characters in @query@ with the characters in @string@. Lower-case
-- characters in the @query@ are assumed to be case-insensitive, and upper-case
-- characters are assumed to be case-sensitive.
--
-- For example:
--
-- @
-- > bestMatch "ff" \"FuzzyFind\"
-- Just (Alignment {score = 25, result = Result {[Match \"F\", Gap "uzzy", Match \"F\", Gap "ind"]}})
-- @
--
-- The score indicates how "good" the match is. Better matches have higher
-- scores. There's no maximum score (except for the upper limit of the 'Int'
-- datatype), but the lowest score is @0@.
--
-- A substring from the query will generate a 'Match', and any characters from
-- the input that don't result in a 'Match' will generate a 'Gap'.
-- Concatenating all the 'Match' and 'Gap' results should yield the original
-- input string.
--
-- Note that the matched characters in the input always occur in the same order
-- as they do in the query pattern.
--
-- The algorithm prefers (and will generate higher scores for) the following
-- kinds of matches:
--
--   1. Contiguous characters from the query string. For example, @bestMatch "pp"@
-- will find the last two ps in "pickled pepper".
--   2. Characters at the beginnings of words. For example, @bestMatch "pp"@
-- will find the first two Ps in \"Peter Piper\".
--   3. Characters at CamelCase humps. For example, @bestMatch "bm" \"BatMan\"@
-- will score higher than @bestMatch "bm" \"Batman\".@
--   4. The algorithm strongly prefers the first character of the query pattern
-- to be at the beginning of a word or CamelHump. For example,
-- @bestMatch "mn" \"Bat Man\"@ will score higher than @bestMatch "atn" \"Batman\"@.
--
-- All else being equal, matches that occur later in the input string are preferred.
bestMatch :: String -- ^ The query pattern.
          -> String -- ^ The input string.
          -> Maybe Alignment
bestMatch :: [Char] -> [Char] -> Maybe Alignment
bestMatch = Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> [Char]
-> [Char]
-> Maybe Alignment
bestMatch' Int
defaultMatchScore
                       Int
defaultMismatchScore
                       Int
defaultGapPenalty
                       Int
defaultBoundaryBonus
                       Int
defaultCamelCaseBonus
                       Int
defaultFirstCharBonusMultiplier
                       Int
defaultConsecutiveBonus
                       Int
defaultLaterBonusMultiplier

-- | Finds input strings that match all the given input patterns. For each input
-- that matches, it returns one 'Alignment'. The output is not sorted.
-- ascending.
--
-- For example:
--
-- @
-- > import Data.Foldable
-- > traverse_ (putStrLn . ("\\n" ++) . highlight) $ fuzzyFind ["dad", "mac", "dam"] ["red macadamia", "Madam Card"]
--
-- Madam Card
-- * *** ** *
--
-- red macadamia
--   * *******
-- @
fuzzyFind
  :: [String] -- ^ The query patterns.
  -> [String] -- ^ The input strings.
  -> [Alignment]
fuzzyFind :: [[Char]] -> [[Char]] -> [Alignment]
fuzzyFind = (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
.) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> [Char]) -> [[Char]] -> [a] -> [(Alignment, a)]
fuzzyFindOn forall a. a -> a
id

-- | A version of 'fuzzyFind' that searches on the given text field of the data.
fuzzyFindOn :: (a -> String) -> [String] -> [a] -> [(Alignment, a)]
fuzzyFindOn :: forall a. (a -> [Char]) -> [[Char]] -> [a] -> [(Alignment, a)]
fuzzyFindOn a -> [Char]
f [[Char]]
query [a]
d =
  [a]
d
    forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (\a
s ->
          forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
            forall a b. (a -> b) -> a -> b
$   (, a
s)
            forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Maybe Alignment
a [Char]
q -> forall a. Semigroup a => a -> a -> a
(<>) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Alignment
a forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Char] -> [Char] -> Maybe Alignment
bestMatch [Char]
q (a -> [Char]
f a
s))
                       (forall a. a -> Maybe a
Just forall a. Monoid a => a
mempty)
                       [[Char]]
query
        )

instance Semigroup Alignment where
  Alignment Int
n Result
r <> :: Alignment -> Alignment -> Alignment
<> Alignment Int
m Result
s = Int -> Result -> Alignment
Alignment (Int
n forall a. Num a => a -> a -> a
+ Int
m) (Result -> Result -> Result
mergeResults Result
r Result
s)

instance Monoid Alignment where
  mempty :: Alignment
mempty = Int -> Result -> Alignment
Alignment Int
0 forall a. Monoid a => a
mempty

type Score = Int

-- | An 'Alignment' is a 'Score' together with a 'Result'. Better results have
-- higher scores.
data Alignment
  = Alignment { Alignment -> Int
score :: !Score, Alignment -> Result
result :: !Result }
  deriving (Alignment -> Alignment -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Alignment -> Alignment -> Bool
$c/= :: Alignment -> Alignment -> Bool
== :: Alignment -> Alignment -> Bool
$c== :: Alignment -> Alignment -> Bool
Eq, Eq Alignment
Alignment -> Alignment -> Bool
Alignment -> Alignment -> Ordering
Alignment -> Alignment -> Alignment
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Alignment -> Alignment -> Alignment
$cmin :: Alignment -> Alignment -> Alignment
max :: Alignment -> Alignment -> Alignment
$cmax :: Alignment -> Alignment -> Alignment
>= :: Alignment -> Alignment -> Bool
$c>= :: Alignment -> Alignment -> Bool
> :: Alignment -> Alignment -> Bool
$c> :: Alignment -> Alignment -> Bool
<= :: Alignment -> Alignment -> Bool
$c<= :: Alignment -> Alignment -> Bool
< :: Alignment -> Alignment -> Bool
$c< :: Alignment -> Alignment -> Bool
compare :: Alignment -> Alignment -> Ordering
$ccompare :: Alignment -> Alignment -> Ordering
Ord, Int -> Alignment -> ShowS
[Alignment] -> ShowS
Alignment -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Alignment] -> ShowS
$cshowList :: [Alignment] -> ShowS
show :: Alignment -> [Char]
$cshow :: Alignment -> [Char]
showsPrec :: Int -> Alignment -> ShowS
$cshowsPrec :: Int -> Alignment -> ShowS
Show, forall x. Rep Alignment x -> Alignment
forall x. Alignment -> Rep Alignment x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Alignment x -> Alignment
$cfrom :: forall x. Alignment -> Rep Alignment x
Generic)

-- | The base score given to a matching character
defaultMatchScore :: Int
defaultMatchScore :: Int
defaultMatchScore = Int
16

-- | The base score given to a mismatched character
defaultMismatchScore :: Int
defaultMismatchScore :: Int
defaultMismatchScore = Int
0

-- | Bonus points given to characters matching at the beginning of words
defaultBoundaryBonus :: Int
defaultBoundaryBonus :: Int
defaultBoundaryBonus = Int
defaultMatchScore forall a. Integral a => a -> a -> a
`div` Int
2

-- | Bonus points given to characters matching a hump of a CamelCase word.
-- We subtract a point from the word boundary score, since a word boundary will
-- incur a gap penalty.
defaultCamelCaseBonus :: Int
defaultCamelCaseBonus :: Int
defaultCamelCaseBonus = Int
defaultBoundaryBonus forall a. Num a => a -> a -> a
- Int
1

-- | Double any bonus points for matching the first pattern of the character.
-- This way we strongly prefer starting the match at the beginning of a word.
defaultFirstCharBonusMultiplier :: Int
defaultFirstCharBonusMultiplier :: Int
defaultFirstCharBonusMultiplier = Int
2

-- | We prefer consecutive runs of matched characters in the pattern, so we
-- impose a penalty for any gaps, which is added to the size of the gap.
defaultGapPenalty :: Int
defaultGapPenalty :: Int
defaultGapPenalty = Int
3

-- | We give a bonus to consecutive matching characters.
-- A number about the same as the boundary bonus will prefer
-- runs of consecutive characters vs finding acronyms.
defaultConsecutiveBonus :: Int
defaultConsecutiveBonus :: Int
defaultConsecutiveBonus = Int
11

-- | We give a bonus for matches that occur later in the input string.
-- If this is e.g. 100, we give one bonus point for each percentage of the
-- length of the input that the match occurs at (100 points for the last
-- character, 50 for characters in the middle, and so on).
defaultLaterBonusMultiplier :: Int
defaultLaterBonusMultiplier :: Int
defaultLaterBonusMultiplier = Int
5

segmentToString :: ResultSegment -> String
segmentToString :: ResultSegment -> [Char]
segmentToString (Gap   [Char]
xs) = [Char]
xs
segmentToString (Match [Char]
xs) = [Char]
xs

-- | Renders an 'Alignment' as a pair of lines with "*" on the lower line
-- indicating the location of pattern matches.
highlight :: Alignment -> String
highlight :: Alignment -> [Char]
highlight (Alignment Int
s (Result Seq ResultSegment
segments)) =
  forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ResultSegment -> [Char]
segmentToString Seq ResultSegment
segments forall a. Semigroup a => a -> a -> a
<> [Char]
"\n" forall a. Semigroup a => a -> a -> a
<> forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ResultSegment -> [Char]
showGaps Seq ResultSegment
segments
 where
  showGaps :: ResultSegment -> [Char]
showGaps (Gap   [Char]
xs) = forall a. Int -> a -> [a]
replicate (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
xs) Char
' '
  showGaps (Match [Char]
xs) = forall a. Int -> a -> [a]
replicate (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
xs) Char
'*'

highlight' :: Alignment -> Text
highlight' :: Alignment -> Text
highlight' = [Char] -> Text
Text.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Char]
highlight

-- | A highly configurable version of 'bestMatch'.
bestMatch'
  :: Int -- ^ Base score for a matching character. See 'defaultMatchScore'.
  -> Int -- ^ Base score for a mismatched character. See 'defaultMismatchScore'.
  -> Int -- ^ Additional penalty for a gap. See 'defaultGapPenalty'.
  -> Int -- ^ Bonus score for a match at the beginning of a word. See 'defaultBoundaryBonus'.
  -> Int -- ^ Bonus score for a match on a CamelCase hump. See 'defaultCamelCaseBonus'.
  -> Int -- ^ Bonus multiplier for matching the first character of the pattern.
         --   See 'defaultFirstCharBonusMultiplier'.
  -> Int -- ^ Bonus score for each consecutive character matched.
         --   See 'defaultFirstCharBonusMultiplier'.
  -> Int -- ^ Bonus multiplier for matching later characters of the input.
         --   E.g. if this is 10, the bonus for matching the last character of
         --   the input is 10, and 5 for characters close to the middle, and so on.
  -> String -- ^ The query pattern.
  -> String -- ^ The input string.
  -> Maybe Alignment
bestMatch' :: Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> [Char]
-> [Char]
-> Maybe Alignment
bestMatch' Int
matchScore Int
mismatchScore Int
gapPenalty Int
boundaryBonus Int
camelCaseBonus Int
firstCharBonusMultiplier Int
consecutiveBonus Int
laterBonusMultiplier [Char]
query [Char]
str
  = Int -> Result -> Alignment
Alignment (Int -> Int -> Int
totalScore Int
m Int
nx) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Seq ResultSegment -> Result
Result forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> Seq a
Seq.fromList) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe [ResultSegment]
traceback
 where
  totalScore :: Int -> Int -> Int
totalScore Int
i Int
j =
    if Int
i forall a. Ord a => a -> a -> Bool
> Int
m then Int
0 else (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Ix2 Int
hs (Int
i Int -> Int -> Ix2
:. Int
j)) forall a. Num a => a -> a -> a
+ (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Ix2 Int
bonuses (Int
i Int -> Int -> Ix2
:. Int
j))
  -- table = unlines
  --   [ unwords
  --     $ (if y > 0 then show $ str' ! y else "   ")
  --     : [ show (totalScore x y) | x <- [0 .. m] ]
  --   | y <- [0 .. n]
  --   ]
  similarity :: Char -> Char -> Int
similarity Char
a Char
b =
    if Char
a forall a. Eq a => a -> a -> Bool
== Char
b Bool -> Bool -> Bool
|| Char
a forall a. Eq a => a -> a -> Bool
== Char -> Char
toLower Char
b then Int
matchScore else Int
mismatchScore
  traceback :: Maybe [ResultSegment]
  traceback :: Maybe [ResultSegment]
traceback = forall {t}.
(Eq t, Num t) =>
[ResultSegment]
-> [Char] -> t -> Int -> Int -> Maybe [ResultSegment]
go (if Int
nx forall a. Ord a => a -> a -> Bool
< Int
n then [[Char] -> ResultSegment
Gap forall a b. (a -> b) -> a -> b
$ forall a. Int -> [a] -> [a]
drop Int
nx [Char]
str] else []) [] (-Integer
1) Int
m Int
nx
  go :: [ResultSegment]
-> [Char] -> t -> Int -> Int -> Maybe [ResultSegment]
go [ResultSegment]
r [Char]
m t
currOp Int
0 Int
j =
    (if Int
j forall a. Ord a => a -> a -> Bool
> Int
0 then ([Char] -> ResultSegment
Gap (forall a. Int -> [a] -> [a]
take Int
j [Char]
str) forall a. a -> [a] -> [a]
:) else forall a. a -> a
id) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case [Char]
m of
      [] -> forall a. a -> Maybe a
Just [ResultSegment]
r
      [Char]
_  -> case t
currOp of
        t
1  -> forall a. a -> Maybe a
Just ([Char] -> ResultSegment
Match [Char]
m forall a. a -> [a] -> [a]
: [ResultSegment]
r)
        t
0  -> forall a. a -> Maybe a
Just ([Char] -> ResultSegment
Gap [Char]
m forall a. a -> [a] -> [a]
: [ResultSegment]
r)
        -1 -> forall a. Maybe a
Nothing
  go [ResultSegment]
_ [Char]
_ t
_ Int
_ Int
0 = forall a. Maybe a
Nothing
  go [ResultSegment]
r [Char]
m t
currOp Int
i Int
j =
    if Char -> Char -> Int
similarity (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
query' (Int
i forall a. Num a => a -> a -> a
- Int
1)) (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1)) forall a. Ord a => a -> a -> Bool
> Int
0
      then case t
currOp of
        t
0 -> [ResultSegment]
-> [Char] -> t -> Int -> Int -> Maybe [ResultSegment]
go ([Char] -> ResultSegment
Gap [Char]
m forall a. a -> [a] -> [a]
: [ResultSegment]
r) [forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1)] t
1 (Int
i forall a. Num a => a -> a -> a
- Int
1) (Int
j forall a. Num a => a -> a -> a
- Int
1)
        t
_ -> [ResultSegment]
-> [Char] -> t -> Int -> Int -> Maybe [ResultSegment]
go [ResultSegment]
r (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1) forall a. a -> [a] -> [a]
: [Char]
m) t
1 (Int
i forall a. Num a => a -> a -> a
- Int
1) (Int
j forall a. Num a => a -> a -> a
- Int
1)
      else case t
currOp of
        t
1 -> [ResultSegment]
-> [Char] -> t -> Int -> Int -> Maybe [ResultSegment]
go ([Char] -> ResultSegment
Match [Char]
m forall a. a -> [a] -> [a]
: [ResultSegment]
r) [forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1)] t
0 Int
i (Int
j forall a. Num a => a -> a -> a
- Int
1)
        t
_ -> [ResultSegment]
-> [Char] -> t -> Int -> Int -> Maybe [ResultSegment]
go [ResultSegment]
r (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1) forall a. a -> [a] -> [a]
: [Char]
m) t
0 Int
i (Int
j forall a. Num a => a -> a -> a
- Int
1)
  nx :: Int
nx = Int -> Int -> Int -> Int -> Int -> Int
localMax Int
m Int
n Int
1 Int
0 Int
0
  localMax :: Int -> Int -> Int -> Int -> Int -> Int
localMax Int
m Int
n Int
j Int
r Int
s = if Int
j forall a. Ord a => a -> a -> Bool
> Int
n
    then Int
r
    else
      let s' :: Int
s' = Int -> Int -> Int
totalScore Int
m Int
j
      in  Int -> Int -> Int -> Int -> Int -> Int
localMax Int
m Int
n (Int
j forall a. Num a => a -> a -> a
+ Int
1) (if Int
s' forall a. Ord a => a -> a -> Bool
> Int
s then Int
j else Int
r) Int
s'
  query' :: Array U Int Char
query' = forall r e. Manifest r e => Comp -> [e] -> Vector r e
A.fromList Comp
A.Seq [Char]
query :: Array A.U A.Ix1 Char
  str' :: Array U Int Char
str'   = forall r e. Manifest r e => Comp -> [e] -> Vector r e
A.fromList Comp
A.Seq [Char]
str :: Array A.U A.Ix1 Char
  m :: Int
m      = forall ix. Sz ix -> ix
A.unSz forall a b. (a -> b) -> a -> b
$ forall r ix e. Size r => Array r ix e -> Sz ix
A.size Array U Int Char
query'
  n :: Int
n      = forall ix. Sz ix -> ix
A.unSz forall a b. (a -> b) -> a -> b
$ forall r ix e. Size r => Array r ix e -> Sz ix
A.size Array U Int Char
str'
  hs :: Array A.U Ix2 Int
  hs :: Array U Ix2 Int
hs = forall r ix e a.
(Manifest r e, Index ix) =>
Sz ix -> (forall s. MArray s r ix e -> ST s a) -> Array r ix e
M.createArrayST_ (forall ix. Index ix => ix -> Sz ix
A.Sz (Int
m forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Ix2
:. Int
n forall a. Num a => a -> a -> a
+ Int
1)) forall a b. (a -> b) -> a -> b
$ \MArray s U Ix2 Int
marr -> do
    forall r a ix (m :: * -> *) b.
(Source r a, Index ix, Monad m) =>
Array r ix a -> (a -> m b) -> m ()
A.forM_ ((Int
0 Int -> Int -> Ix2
:. Int
0) forall ix. Index ix => ix -> ix -> Array D ix ix
... (Int
m Int -> Int -> Ix2
:. Int
n)) forall a b. (a -> b) -> a -> b
$ \(Int
i :. Int
j) -> if (Int
i forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| Int
j forall a. Eq a => a -> a -> Bool
== Int
0)
      then forall r e ix (m :: * -> *).
(Manifest r e, Index ix, PrimMonad m, MonadThrow m) =>
MArray (PrimState m) r ix e -> ix -> e -> m ()
M.writeM MArray s U Ix2 Int
marr (Int
i Int -> Int -> Ix2
:. Int
j) Int
0
      else do
        Int
scoreMatch <- do
          Int
hprev <- forall r e ix (m :: * -> *).
(Manifest r e, Index ix, PrimMonad m, MonadThrow m) =>
MArray (PrimState m) r ix e -> ix -> m e
M.readM MArray s U Ix2 Int
marr ((Int
i forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Ix2
:. (Int
j forall a. Num a => a -> a -> a
- Int
1))
          forall (f :: * -> *) a. Applicative f => a -> f a
pure
            forall a b. (a -> b) -> a -> b
$ Int
hprev
            forall a. Num a => a -> a -> a
+ Char -> Char -> Int
similarity (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
query' (Int
i forall a. Num a => a -> a -> a
- Int
1)) (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1))
            forall a. Num a => a -> a -> a
+ forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Ix2 Int
bonuses (Int
i Int -> Int -> Ix2
:. Int
j)
        Int
scoreGap <- do
          (Array U Int Int
arr :: Array A.U A.Ix1 Int) <- forall r ix b r' a (m :: * -> *).
(Source r' a, Manifest r b, Index ix, Monad m) =>
Array r' ix a -> (a -> m b) -> m (Array r ix b)
forM (Int
1 forall ix. Index ix => ix -> ix -> Array D ix ix
... Int
j) forall a b. (a -> b) -> a -> b
$ \Int
l ->
            (\Int
x -> Int
x forall a. Num a => a -> a -> a
- (Int
l forall a. Num a => a -> a -> a
+ Int
gapPenalty)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall r e ix (m :: * -> *).
(Manifest r e, Index ix, PrimMonad m, MonadThrow m) =>
MArray (PrimState m) r ix e -> ix -> m e
M.readM MArray s U Ix2 Int
marr (Int
i Int -> Int -> Ix2
:. (Int
j forall a. Num a => a -> a -> a
- Int
l))
          forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a -> a
fromMaybe Int
0 forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) r ix e.
(MonadThrow m, Shape r ix, Source r e, Ord e) =>
Array r ix e -> m e
A.maximumM Array U Int Int
arr
        forall r e ix (m :: * -> *).
(Manifest r e, Index ix, PrimMonad m, MonadThrow m) =>
MArray (PrimState m) r ix e -> ix -> e -> m ()
M.writeM MArray s U Ix2 Int
marr (Int
i Int -> Int -> Ix2
:. Int
j) (Int
scoreMatch forall a. Ord a => a -> a -> a
`max` Int
scoreGap forall a. Ord a => a -> a -> a
`max` Int
0)
  bonuses :: Array U Ix2 Int
bonuses = forall r ix e.
Load r ix e =>
Comp -> Sz ix -> (ix -> e) -> Array r ix e
A.makeArray Comp
A.Seq (forall ix. Index ix => ix -> Sz ix
A.Sz (Int
m forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Ix2
:. Int
n forall a. Num a => a -> a -> a
+ Int
1)) Ix2 -> Int
f :: Array A.U Ix2 Int
    where f :: Ix2 -> Int
f (Int
i :. Int
j) = Int -> Int -> Int
bonus Int
i Int
j
  bonus :: Int -> Int -> Int
  bonus :: Int -> Int -> Int
bonus Int
0 Int
j = Int
0
  bonus Int
i Int
0 = Int
0
  bonus Int
i Int
j =
    if Char -> Char -> Int
similarity (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
query' (Int
i forall a. Num a => a -> a -> a
- Int
1)) (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1)) forall a. Ord a => a -> a -> Bool
> Int
0
      then
        Int
multiplier
          forall a. Num a => a -> a -> a
* ( Int
boundary
            forall a. Num a => a -> a -> a
+ Int
camel
            forall a. Num a => a -> a -> a
+ Int
consecutive
            forall a. Num a => a -> a -> a
+ (if Int
n forall a. Ord a => a -> a -> Bool
> Int
0 then (Int
j forall a. Num a => a -> a -> a
* Int
laterBonusMultiplier) forall a. Integral a => a -> a -> a
`div` Int
n else Int
0)
            )
      else Int
0
   where
    boundary :: Int
boundary =
      if Int
j forall a. Ord a => a -> a -> Bool
< Int
2 Bool -> Bool -> Bool
|| Char -> Bool
isAlphaNum (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1)) Bool -> Bool -> Bool
&& Bool -> Bool
not
           (Char -> Bool
isAlphaNum (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
2)))
        then Int
boundaryBonus
        else Int
0
    camel :: Int
camel =
      if Int
j forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& Char -> Bool
isLower (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
2)) Bool -> Bool -> Bool
&& Char -> Bool
isUpper
         (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1))
      then
        Int
camelCaseBonus
      else
        Int
0
    multiplier :: Int
multiplier = if Int
i forall a. Eq a => a -> a -> Bool
== Int
1 then Int
firstCharBonusMultiplier else Int
1
    consecutive :: Int
consecutive =
      let
        similar :: Bool
similar =
          Int
i
            forall a. Ord a => a -> a -> Bool
>  Int
0
            Bool -> Bool -> Bool
&& Int
j
            forall a. Ord a => a -> a -> Bool
>  Int
0
            Bool -> Bool -> Bool
&& Char -> Char -> Int
similarity (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
query' (Int
i forall a. Num a => a -> a -> a
- Int
1)) (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
1))
            forall a. Ord a => a -> a -> Bool
>  Int
0
        afterMatch :: Bool
afterMatch =
          Int
i
            forall a. Ord a => a -> a -> Bool
>  Int
1
            Bool -> Bool -> Bool
&& Int
j
            forall a. Ord a => a -> a -> Bool
>  Int
1
            Bool -> Bool -> Bool
&& Char -> Char -> Int
similarity (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
query' (Int
i forall a. Num a => a -> a -> a
- Int
2)) (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' (Int
j forall a. Num a => a -> a -> a
- Int
2))
            forall a. Ord a => a -> a -> Bool
>  Int
0
        beforeMatch :: Bool
beforeMatch =
          Int
i forall a. Ord a => a -> a -> Bool
< Int
m Bool -> Bool -> Bool
&& Int
j forall a. Ord a => a -> a -> Bool
< Int
n Bool -> Bool -> Bool
&& Char -> Char -> Int
similarity (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
query' Int
i) (forall ix r e.
(HasCallStack, Index ix, Manifest r e) =>
Array r ix e -> ix -> e
A.index' Array U Int Char
str' Int
j) forall a. Ord a => a -> a -> Bool
> Int
0
      in
        if Bool
similar Bool -> Bool -> Bool
&& (Bool
afterMatch Bool -> Bool -> Bool
|| Bool
beforeMatch) then Int
consecutiveBonus else Int
0

-- gaps :: String -> Seq ResultSegment
-- gaps s = [Gap s]

data ResultSegment = Gap !String | Match !String
  deriving (ResultSegment -> ResultSegment -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ResultSegment -> ResultSegment -> Bool
$c/= :: ResultSegment -> ResultSegment -> Bool
== :: ResultSegment -> ResultSegment -> Bool
$c== :: ResultSegment -> ResultSegment -> Bool
Eq, Eq ResultSegment
ResultSegment -> ResultSegment -> Bool
ResultSegment -> ResultSegment -> Ordering
ResultSegment -> ResultSegment -> ResultSegment
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ResultSegment -> ResultSegment -> ResultSegment
$cmin :: ResultSegment -> ResultSegment -> ResultSegment
max :: ResultSegment -> ResultSegment -> ResultSegment
$cmax :: ResultSegment -> ResultSegment -> ResultSegment
>= :: ResultSegment -> ResultSegment -> Bool
$c>= :: ResultSegment -> ResultSegment -> Bool
> :: ResultSegment -> ResultSegment -> Bool
$c> :: ResultSegment -> ResultSegment -> Bool
<= :: ResultSegment -> ResultSegment -> Bool
$c<= :: ResultSegment -> ResultSegment -> Bool
< :: ResultSegment -> ResultSegment -> Bool
$c< :: ResultSegment -> ResultSegment -> Bool
compare :: ResultSegment -> ResultSegment -> Ordering
$ccompare :: ResultSegment -> ResultSegment -> Ordering
Ord, Int -> ResultSegment -> ShowS
[ResultSegment] -> ShowS
ResultSegment -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [ResultSegment] -> ShowS
$cshowList :: [ResultSegment] -> ShowS
show :: ResultSegment -> [Char]
$cshow :: ResultSegment -> [Char]
showsPrec :: Int -> ResultSegment -> ShowS
$cshowsPrec :: Int -> ResultSegment -> ShowS
Show, forall x. Rep ResultSegment x -> ResultSegment
forall x. ResultSegment -> Rep ResultSegment x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep ResultSegment x -> ResultSegment
$cfrom :: forall x. ResultSegment -> Rep ResultSegment x
Generic)

-- | Concatenating all the 'ResultSegment's should yield the original input string.
newtype Result = Result { Result -> Seq ResultSegment
segments :: Seq ResultSegment }
  deriving (Result -> Result -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Result -> Result -> Bool
$c/= :: Result -> Result -> Bool
== :: Result -> Result -> Bool
$c== :: Result -> Result -> Bool
Eq, Eq Result
Result -> Result -> Bool
Result -> Result -> Ordering
Result -> Result -> Result
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Result -> Result -> Result
$cmin :: Result -> Result -> Result
max :: Result -> Result -> Result
$cmax :: Result -> Result -> Result
>= :: Result -> Result -> Bool
$c>= :: Result -> Result -> Bool
> :: Result -> Result -> Bool
$c> :: Result -> Result -> Bool
<= :: Result -> Result -> Bool
$c<= :: Result -> Result -> Bool
< :: Result -> Result -> Bool
$c< :: Result -> Result -> Bool
compare :: Result -> Result -> Ordering
$ccompare :: Result -> Result -> Ordering
Ord, Int -> Result -> ShowS
[Result] -> ShowS
Result -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Result] -> ShowS
$cshowList :: [Result] -> ShowS
show :: Result -> [Char]
$cshow :: Result -> [Char]
showsPrec :: Int -> Result -> ShowS
$cshowsPrec :: Int -> Result -> ShowS
Show, forall x. Rep Result x -> Result
forall x. Result -> Rep Result x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Result x -> Result
$cfrom :: forall x. Result -> Rep Result x
Generic)

instance Monoid Result where
  mempty :: Result
mempty = Seq ResultSegment -> Result
Result []

instance Semigroup Result where
  Result Seq ResultSegment
Empty <> :: Result -> Result -> Result
<> Result
as = Result
as
  Result
as <> Result Seq ResultSegment
Empty = Result
as
  Result (forall a. Seq a -> ViewR a
viewr -> Seq ResultSegment
h :> Gap []) <> Result
as = Seq ResultSegment -> Result
Result Seq ResultSegment
h forall a. Semigroup a => a -> a -> a
<> Result
as
  Result
as <> Result (forall a. Seq a -> ViewL a
viewl -> Gap [] :< Seq ResultSegment
t) = Result
as forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment -> Result
Result Seq ResultSegment
t
  Result (forall a. Seq a -> ViewR a
viewr -> Seq ResultSegment
h :> Match []) <> Result
as = Seq ResultSegment -> Result
Result Seq ResultSegment
h forall a. Semigroup a => a -> a -> a
<> Result
as
  Result
as <> Result (forall a. Seq a -> ViewL a
viewl -> Match [] :< Seq ResultSegment
t) = Result
as forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment -> Result
Result Seq ResultSegment
t
  Result (forall a. Seq a -> ViewR a
viewr -> Seq ResultSegment
i :> Gap [Char]
l) <> Result (forall a. Seq a -> ViewL a
viewl -> Gap [Char]
h :< Seq ResultSegment
t) =
    Seq ResultSegment -> Result
Result (Seq ResultSegment
i forall a. Semigroup a => a -> a -> a
<> [[Char] -> ResultSegment
Gap ([Char]
l forall a. Semigroup a => a -> a -> a
<> [Char]
h)] forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment
t)
  Result (forall a. Seq a -> ViewR a
viewr -> Seq ResultSegment
i :> Match [Char]
l) <> Result (forall a. Seq a -> ViewL a
viewl -> Match [Char]
h :< Seq ResultSegment
t) =
    Seq ResultSegment -> Result
Result (Seq ResultSegment
i forall a. Semigroup a => a -> a -> a
<> [[Char] -> ResultSegment
Match ([Char]
l forall a. Semigroup a => a -> a -> a
<> [Char]
h)] forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment
t)
  Result Seq ResultSegment
a <> Result Seq ResultSegment
b = Seq ResultSegment -> Result
Result (Seq ResultSegment
a forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment
b)

mergeResults :: Result -> Result -> Result
mergeResults :: Result -> Result -> Result
mergeResults Result
as Result
bs = Result -> Result -> Result
merge Result
as Result
bs
 where
  drop' :: Int -> Result -> Result
  drop' :: Int -> Result -> Result
drop' Int
n Result
m | Int
n forall a. Ord a => a -> a -> Bool
< Int
1 = Result
m
  drop' Int
n (Result (forall a. Seq a -> ViewL a
viewl -> Gap [Char]
g :< Seq ResultSegment
t)) =
    Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Gap (forall a. Int -> [a] -> [a]
drop Int
n [Char]
g)] forall a. Semigroup a => a -> a -> a
<> Int -> Result -> Result
drop' (Int
n forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
g) (Seq ResultSegment -> Result
Result Seq ResultSegment
t)
  drop' Int
n (Result (forall a. Seq a -> ViewL a
viewl -> Match [Char]
g :< Seq ResultSegment
t)) =
    Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Match (forall a. Int -> [a] -> [a]
drop Int
n [Char]
g)] forall a. Semigroup a => a -> a -> a
<> Int -> Result -> Result
drop' (Int
n forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
g) (Seq ResultSegment -> Result
Result Seq ResultSegment
t)
  merge :: Result -> Result -> Result
  merge :: Result -> Result -> Result
merge (Result Seq ResultSegment
Seq.Empty) Result
ys                 = Result
ys
  merge Result
xs                 (Result Seq ResultSegment
Seq.Empty) = Result
xs
  merge (Result Seq ResultSegment
xs)        (Result Seq ResultSegment
ys       ) = case (forall a. Seq a -> ViewL a
viewl Seq ResultSegment
xs, forall a. Seq a -> ViewL a
viewl Seq ResultSegment
ys) of
    (Gap [Char]
g :< Seq ResultSegment
t, Gap [Char]
g' :< Seq ResultSegment
t')
      | forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
g forall a. Ord a => a -> a -> Bool
<= forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
g' -> Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Gap [Char]
g]
      forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Seq ResultSegment -> Result
Result Seq ResultSegment
t) (Int -> Result -> Result
drop' (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
g) (Seq ResultSegment -> Result
Result Seq ResultSegment
ys))
      | Bool
otherwise -> Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Gap [Char]
g']
      forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Int -> Result -> Result
drop' (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
g') (Seq ResultSegment -> Result
Result Seq ResultSegment
xs)) (Seq ResultSegment -> Result
Result Seq ResultSegment
t')
    (Match [Char]
m :< Seq ResultSegment
t, Match [Char]
m' :< Seq ResultSegment
t')
      | forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
m forall a. Ord a => a -> a -> Bool
>= forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
m' -> Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Match [Char]
m]
      forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Seq ResultSegment -> Result
Result Seq ResultSegment
t) (Int -> Result -> Result
drop' (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
m) (Seq ResultSegment -> Result
Result Seq ResultSegment
ys))
      | Bool
otherwise -> Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Match [Char]
m']
      forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Int -> Result -> Result
drop' (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
m') (Seq ResultSegment -> Result
Result Seq ResultSegment
xs)) (Seq ResultSegment -> Result
Result Seq ResultSegment
t')
    (Gap [Char]
g :< Seq ResultSegment
t, Match [Char]
m' :< Seq ResultSegment
t') ->
      Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Match [Char]
m'] forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Int -> Result -> Result
drop' (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
m') (Seq ResultSegment -> Result
Result Seq ResultSegment
xs)) (Seq ResultSegment -> Result
Result Seq ResultSegment
t')
    (Match [Char]
m :< Seq ResultSegment
t, Gap [Char]
g' :< Seq ResultSegment
t') ->
      Seq ResultSegment -> Result
Result [[Char] -> ResultSegment
Match [Char]
m] forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Seq ResultSegment -> Result
Result Seq ResultSegment
t) (Int -> Result -> Result
drop' (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
m) (Seq ResultSegment -> Result
Result Seq ResultSegment
ys))