module Darcs.Patch.TokenReplace
    ( tryTokReplace
    , forceTokReplace
    , annotateReplace
    , breakToTokens
    , defaultToks
    ) where

import Darcs.Prelude

import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC

import Darcs.Patch.RegChars ( regChars )

-- | @breakOutToken tokChars input@ splits the @input@ 'ByteString' into
-- @'Just' (before, token, after)@, where @token@ is the first non-empty
-- substring consisting only of 'Char's in @tokChars@, or 'Nothing' if no token
-- was found. The 'Char's in @tokChars@ should not have code points larger than
-- 255 (0xff).
breakOutToken :: String -> BC.ByteString
              -> Maybe (BC.ByteString, BC.ByteString, BC.ByteString)
breakOutToken :: String -> ByteString -> Maybe (ByteString, ByteString, ByteString)
breakOutToken String
tokChars ByteString
input
  | Bool -> Bool
not (ByteString -> Bool
B.null ByteString
tok) = (ByteString, ByteString, ByteString)
-> Maybe (ByteString, ByteString, ByteString)
forall a. a -> Maybe a
Just (ByteString
before, ByteString
tok, ByteString
remaining)
  | Bool
otherwise = Maybe (ByteString, ByteString, ByteString)
forall a. Maybe a
Nothing
  where
    isTokChar :: Char -> Bool
isTokChar = String -> Char -> Bool
regChars String
tokChars
    (ByteString
before, ByteString
tokAndRest) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BC.break Char -> Bool
isTokChar ByteString
input
    (ByteString
tok, ByteString
remaining) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BC.break (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isTokChar) ByteString
tokAndRest

-- | @tryTokReplace tokChars old new input@ tries to find the token @old@ and
-- replace it with the token @new@ everywhere in the @input@, returning 'Just'
-- the modified @input@, unless the token @new@ is already in the @input@ in
-- which case 'Nothing' is returned. A token is a sequence of bytes that match
-- the class defined by @tokChars@. This function is supposed to work
-- efficiently with large @input@s i.e. whole files.
tryTokReplace :: String -> B.ByteString -> B.ByteString
              -> B.ByteString -> Maybe B.ByteString
tryTokReplace :: String
-> ByteString -> ByteString -> ByteString -> Maybe ByteString
tryTokReplace String
tokChars ByteString
old ByteString
new
  | ByteString -> Bool
B.null ByteString
old = String -> ByteString -> Maybe ByteString
forall a. HasCallStack => String -> a
error String
"tryTokInternal called with empty old token"
  | (Char -> Bool) -> ByteString -> Bool
BC.any (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isTokChar) ByteString
old = String -> ByteString -> Maybe ByteString
forall a. HasCallStack => String -> a
error String
"tryTokInternal called with old non-token"
  | (Char -> Bool) -> ByteString -> Bool
BC.any (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isTokChar) ByteString
new = String -> ByteString -> Maybe ByteString
forall a. HasCallStack => String -> a
error String
"tryTokInternal called with new non-token"
  | Bool
otherwise = ([ByteString] -> ByteString)
-> Maybe [ByteString] -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [ByteString] -> ByteString
B.concat (Maybe [ByteString] -> Maybe ByteString)
-> (ByteString -> Maybe [ByteString])
-> ByteString
-> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> Maybe [ByteString]
loop Int
0
    where
      isTokChar :: Char -> Bool
isTokChar = String -> Char -> Bool
regChars String
tokChars
      loop :: Int -> ByteString -> Maybe [ByteString]
loop !Int
from ByteString
input =
        case (Char -> Bool) -> ByteString -> Maybe Int
BC.findIndex Char -> Bool
isTokChar (Int -> ByteString -> ByteString
B.drop Int
from ByteString
input) of
          Maybe Int
Nothing -> [ByteString] -> Maybe [ByteString]
forall a. a -> Maybe a
Just [ByteString
input]
          Just Int
start ->
            case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BC.span Char -> Bool
isTokChar (Int -> ByteString -> ByteString
B.drop (Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
start) ByteString
input) of
              (ByteString
tok, ByteString
rest)
                | ByteString
tok ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
old ->
                    (Int -> ByteString -> ByteString
B.take (Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
start) ByteString
input ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(ByteString
new ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:) ([ByteString] -> [ByteString])
-> Maybe [ByteString] -> Maybe [ByteString]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> ByteString -> Maybe [ByteString]
loop Int
0 ByteString
rest
                | ByteString
tok ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
new -> Maybe [ByteString]
forall a. Maybe a
Nothing
                | Bool
otherwise ->
                    Int -> ByteString -> Maybe [ByteString]
loop (Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
tok) ByteString
input

-- | @forceTokReplace tokChars old new input@ replaces all occurrences of
-- the @old@ token with the @new@ one, throughout the @input@.
forceTokReplace :: String -> B.ByteString -> B.ByteString
                -> B.ByteString -> B.ByteString
forceTokReplace :: String -> ByteString -> ByteString -> ByteString -> ByteString
forceTokReplace String
tokChars ByteString
old ByteString
new
  | ByteString -> Bool
B.null ByteString
old = String -> ByteString -> ByteString
forall a. HasCallStack => String -> a
error String
"tryTokInternal called with empty old token"
  | (Char -> Bool) -> ByteString -> Bool
BC.any (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isTokChar) ByteString
old = String -> ByteString -> ByteString
forall a. HasCallStack => String -> a
error String
"tryTokInternal called with old non-token"
  | (Char -> Bool) -> ByteString -> Bool
BC.any (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isTokChar) ByteString
new = String -> ByteString -> ByteString
forall a. HasCallStack => String -> a
error String
"tryTokInternal called with new non-token"
  | Bool
otherwise = [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> [ByteString]
loop Int
0
    where
      isTokChar :: Char -> Bool
isTokChar = String -> Char -> Bool
regChars String
tokChars
      len :: Int
len = ByteString -> Int
B.length ByteString
old
      loop :: Int -> ByteString -> [ByteString]
loop !Int
from ByteString
input =
        case ByteString -> ByteString -> (ByteString, ByteString)
B.breakSubstring ByteString
old (Int -> ByteString -> ByteString
B.drop Int
from ByteString
input) of
          (ByteString
before, ByteString
match)
            | ByteString -> Bool
B.null ByteString
match -> [ByteString
input] -- not found
            | ByteString -> Bool
B.null ByteString
before Bool -> Bool -> Bool
|| Bool -> Bool
not (Char -> Bool
isTokChar (ByteString -> Char
BC.last ByteString
before))
            , ByteString -> Int
B.length ByteString
match Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len Bool -> Bool -> Bool
|| Bool -> Bool
not (Char -> Bool
isTokChar (ByteString -> Int -> Char
BC.index ByteString
match Int
len)) ->
                -- found and is token
                Int -> ByteString -> ByteString
B.take (Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
before) ByteString
input ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString
new ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
                  Int -> ByteString -> [ByteString]
loop Int
0 (Int -> ByteString -> ByteString
B.drop Int
len ByteString
match)
            | Bool
otherwise ->
                -- found but not a token
                Int -> ByteString -> [ByteString]
loop (Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
before Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) ByteString
input

-- | Check if a token replace operation touches the given line.
annotateReplace :: String -> B.ByteString -> B.ByteString -> B.ByteString -> Bool
annotateReplace :: String -> ByteString -> ByteString -> ByteString -> Bool
annotateReplace String
tokChars ByteString
old ByteString
new ByteString
input =
  case String -> ByteString -> Maybe (ByteString, ByteString, ByteString)
breakOutToken String
tokChars ByteString
input of
    Just (ByteString
_, ByteString
tok, ByteString
remaining) ->
      (ByteString
tok ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
old Bool -> Bool -> Bool
|| String -> ByteString -> ByteString -> ByteString -> Bool
annotateReplace String
tokChars ByteString
old ByteString
new ByteString
remaining)
    Maybe (ByteString, ByteString, ByteString)
Nothing -> Bool
False

-- | Break a 'Bytestring' into tokens, according to 'defaultToks',
-- discarding non-tokens.
breakToTokens :: BC.ByteString -> [BC.ByteString]
breakToTokens :: ByteString -> [ByteString]
breakToTokens ByteString
input =
  case String -> ByteString -> Maybe (ByteString, ByteString, ByteString)
breakOutToken String
defaultToks ByteString
input of
    Maybe (ByteString, ByteString, ByteString)
Nothing -> []
    Just (ByteString
_, ByteString
tok, ByteString
remaining) -> ByteString
tok ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
breakToTokens ByteString
remaining

defaultToks :: String
defaultToks :: String
defaultToks = String
"A-Za-z_0-9"