{-# LANGUAGE OverloadedLists #-}

-- |
--  This module provides a 'showDistance' function showing the differences between 2 pieces of text
--   using the Levenshtein distance. That distance is defined as the minimum number of edits: insertions, deletions, substitutions
--   to go from one text to another.
--
--   Several options are available to customize this processing:
--     - split size: texts are broken into new lines first. Then if the texts are too large there are split into smaller pieces
--       in order to compute their difference. This is done in order to reduce the size of the edit matrix which is used to compute all the edit costs
--       the default is 200
--
--     - separators: opening and closing pieces of text (brackets by default) used to highlight a difference
--
--     - shorten size: there is the possibly to display mostly the differences with a bit of context around if the input text is too large.
--       The text gets elided around separators if it gets greater than the shorten size (the default is 20)
--
--     - shorten text: the text to use when eliding characters in the original text (the default is "...")
--
--     - display edit operations: edit operations, insert/delete/substitute/keep can be annotated if necessary
--
-- Here are some examples:
--
-- @
-- import Data.Text.Edits
--
-- -- "between the e and the n the letter i was added"
-- showDistance "kitten" "kittein" === "kitte[+i]n"
--
-- -- "at the end of the text 3 letters have been deleted"
-- showDistance "kitten" "kit" === "kit[-t-e-n]"
--
-- -- "between the t and the n 2 letters have been modified"
-- showDistance "kitten" "kitsin" === "kit[~t/s~e/i]"
-- @
module Data.Text.Edits
  ( SplitSize (..),
    ShortenOptions (..),
    Separators (..),
    DisplayOptions (..),
    EditOperation (..),
    Color (..),
    colorAs,
    showDistance,
    showDistanceColored,
    showDistanceWith,
    levenshteinOperations,
    defaultDisplayOptions,
    defaultDisplayEditOperations,
    coloredDisplayEditOperation,
    defaultSplitSize,
    parensSeparators,
    bracketsSeparators,
    makeCharSeparators,
  )
where

import Data.Text qualified as T
import Data.Text.Color
import Data.Text.Costs
import Data.Text.Difference
import Data.Text.EditMatrix
import Data.Text.EditOperation
import Data.Text.Shorten
import Data.Vector qualified as V
import Protolude

-- | Size to use when splitting a large piece of text
newtype SplitSize = SplitSize Int deriving (SplitSize -> SplitSize -> Bool
(SplitSize -> SplitSize -> Bool)
-> (SplitSize -> SplitSize -> Bool) -> Eq SplitSize
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SplitSize -> SplitSize -> Bool
$c/= :: SplitSize -> SplitSize -> Bool
== :: SplitSize -> SplitSize -> Bool
$c== :: SplitSize -> SplitSize -> Bool
Eq, Int -> SplitSize -> ShowS
[SplitSize] -> ShowS
SplitSize -> [Char]
(Int -> SplitSize -> ShowS)
-> (SplitSize -> [Char])
-> ([SplitSize] -> ShowS)
-> Show SplitSize
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [SplitSize] -> ShowS
$cshowList :: [SplitSize] -> ShowS
show :: SplitSize -> [Char]
$cshow :: SplitSize -> [Char]
showsPrec :: Int -> SplitSize -> ShowS
$cshowsPrec :: Int -> SplitSize -> ShowS
Show)

-- | Default split size
defaultSplitSize :: SplitSize
defaultSplitSize :: SplitSize
defaultSplitSize = Int -> SplitSize
SplitSize Int
200

-- | Show the distance between 2 pieces of text
showDistance :: Text -> Text -> Text
showDistance :: Text -> Text -> Text
showDistance = SplitSize -> DisplayOptions -> Text -> Text -> Text
showDistanceWith SplitSize
defaultSplitSize DisplayOptions
defaultDisplayOptions

-- | Show the distance between 2 pieces of text with colors instead of symbols
showDistanceColored :: Text -> Text -> Text
showDistanceColored :: Text -> Text -> Text
showDistanceColored = SplitSize -> DisplayOptions -> Text -> Text -> Text
showDistanceWith SplitSize
defaultSplitSize DisplayOptions
defaultDisplayOptions {_displayEditOperation :: EditOperation Char -> Text
_displayEditOperation = EditOperation Char -> Text
coloredDisplayEditOperation}

-- | Show the distance between 2 pieces of text and specify splitting / display options
showDistanceWith :: SplitSize -> DisplayOptions -> Text -> Text -> Text
showDistanceWith :: SplitSize -> DisplayOptions -> Text -> Text -> Text
showDistanceWith SplitSize
splitSize DisplayOptions
displayOptions Text
ts1 Text
ts2 =
  SplitSize
-> Text -> Text -> Text -> (Text -> (Text, Text) -> Text) -> Text
forall a.
SplitSize -> Text -> Text -> a -> (a -> (Text, Text) -> a) -> a
foldSplitTexts SplitSize
splitSize Text
ts1 Text
ts2 [] ((Text -> (Text, Text) -> Text) -> Text)
-> (Text -> (Text, Text) -> Text) -> Text
forall a b. (a -> b) -> a -> b
$ \Text
ts (Text
line1, Text
line2) -> do
    let operations :: [EditOperation Char]
operations = Text -> Text -> [EditOperation Char]
levenshteinOperations (Text -> Text
forall a b. ConvertText a b => a -> b
toS Text
line1) (Text -> Text
forall a b. ConvertText a b => a -> b
toS Text
line2)
    Text
ts Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> DisplayOptions -> [EditOperation Char] -> Text
displayDiffs DisplayOptions
displayOptions [EditOperation Char]
operations

-- | Return the list of operations necessary to go from one piece of text to another
--   using the Levenshtein distance
levenshteinOperations :: Text -> Text -> [EditOperation Char]
levenshteinOperations :: Text -> Text -> [EditOperation Char]
levenshteinOperations Text
t1 Text
t2 = do
  let matrix :: Matrix Cost
matrix = Costs Char -> [Char] -> [Char] -> Matrix Cost
forall a. Costs a -> [a] -> [a] -> Matrix Cost
createEditMatrix Costs Char
textLevenshteinCosts (Text -> [Char]
forall a b. ConvertText a b => a -> b
toS Text
t1) (Text -> [Char]
forall a b. ConvertText a b => a -> b
toS Text
t2)
  Vector (EditOperation Char) -> [EditOperation Char]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (Vector (EditOperation Char) -> [EditOperation Char])
-> Vector (EditOperation Char) -> [EditOperation Char]
forall a b. (a -> b) -> a -> b
$ Vector Char
-> Vector Char -> Matrix Cost -> Vector (EditOperation Char)
forall a.
Vector a -> Vector a -> Matrix Cost -> Vector (EditOperation a)
makeEditOperations ([Char] -> Vector Char
forall a. [a] -> Vector a
V.fromList ([Char] -> Vector Char) -> (Text -> [Char]) -> Text -> Vector Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
forall a b. ConvertText a b => a -> b
toS (Text -> Vector Char) -> Text -> Vector Char
forall a b. (a -> b) -> a -> b
$ Text
t1) ([Char] -> Vector Char
forall a. [a] -> Vector a
V.fromList ([Char] -> Vector Char) -> (Text -> [Char]) -> Text -> Vector Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
forall a b. ConvertText a b => a -> b
toS (Text -> Vector Char) -> Text -> Vector Char
forall a b. (a -> b) -> a -> b
$ Text
t2) Matrix Cost
matrix

-- | Split texts and apply the difference on each part
foldSplitTexts :: SplitSize -> Text -> Text -> a -> (a -> (Text, Text) -> a) -> a
foldSplitTexts :: forall a.
SplitSize -> Text -> Text -> a -> (a -> (Text, Text) -> a) -> a
foldSplitTexts SplitSize
splitSize Text
t1 Text
t2 a
initial a -> (Text, Text) -> a
f = do
  let ([Text]
s1, [Text]
s2) = (SplitSize -> Text -> [Text]
split SplitSize
splitSize Text
t1, SplitSize -> Text -> [Text]
split SplitSize
splitSize Text
t2)
  (a -> (Text, Text) -> a) -> a -> [(Text, Text)] -> a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' a -> (Text, Text) -> a
f a
initial ([Text] -> [Text] -> [(Text, Text)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
s1 [Text]
s2)

-- | Split a text on newlines then split each line on a maximum split size
--   We then perform the edit distance algorithm on smaller sizes of text in order to control memory and CPU
split :: SplitSize -> Text -> [Text]
split :: SplitSize -> Text -> [Text]
split SplitSize
splitSize = (Text -> [Text]) -> [Text] -> [Text]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (SplitSize -> Text -> [Text]
splitToSize SplitSize
splitSize) ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> [Text]
T.splitOn Text
"\n"

-- | Split a text on a maximum split size
splitToSize :: SplitSize -> Text -> [Text]
splitToSize :: SplitSize -> Text -> [Text]
splitToSize ss :: SplitSize
ss@(SplitSize Int
n) Text
t =
  if Text -> Int
T.length Text
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
n
    then [Item [Text]
Text
t]
    else Int -> Text -> Text
T.take Int
n Text
t Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: SplitSize -> Text -> [Text]
splitToSize SplitSize
ss (Int -> Text -> Text
T.drop Int
n Text
t)