module Darcs.Patch.TokenReplace ( tryTokReplace , forceTokReplace , annotateReplace , breakToTokens , defaultToks ) where import Prelude () import Darcs.Prelude import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import Darcs.Patch.RegChars ( regChars ) -- | @breakOutToken tokChars input@ splits the @input@ 'ByteString' into -- @'Just' (before, token, after)@, where @token@ is the first non-empty -- substring consisting only of 'Char's in @tokChars@, or 'Nothing' if no token -- was found. The 'Char's in @tokChars@ should not have code points larger than -- 255 (0xff). breakOutToken :: String -> BC.ByteString -> Maybe (BC.ByteString, BC.ByteString, BC.ByteString) breakOutToken tokChars input | not (B.null tok) = Just (before, tok, remaining) | otherwise = Nothing where isTokChar = regChars tokChars (before, tokAndRest) = BC.break isTokChar input (tok, remaining) = BC.break (not . isTokChar) tokAndRest -- | @tryTokReplace tokChars old new input@ tries to find the token @old@ and -- replace it with the token @new@ everywhere in the @input@, returning 'Just' -- the modified @input@, unless the token @new@ is already in the @input@ in -- which case 'Nothing' is returned. A token is a sequence of bytes that match -- the class defined by @tokChars@. This function is supposed to work -- efficiently with large @input@s i.e. whole files. tryTokReplace :: String -> B.ByteString -> B.ByteString -> B.ByteString -> Maybe B.ByteString tryTokReplace tokChars old new | B.null old = bug "tryTokInternal called with empty old token" | BC.any (not . isTokChar) old = bug "tryTokInternal called with old non-token" | BC.any (not . isTokChar) new = bug "tryTokInternal called with new non-token" | otherwise = fmap B.concat . loop 0 where isTokChar = regChars tokChars loop !from input = case BC.findIndex isTokChar (B.drop from input) of Nothing -> Just [input] Just start -> case BC.span isTokChar (B.drop (from + start) input) of (tok, rest) | tok == old -> (B.take (from + start) input :).(new :) <$> loop 0 rest | tok == new -> Nothing | otherwise -> loop (from + start + B.length tok) input -- | @forceTokReplace tokChars old new input@ replaces all occurrences of -- the @old@ token with the @new@ one, throughout the @input@. forceTokReplace :: String -> B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString forceTokReplace tokChars old new | B.null old = bug "tryTokInternal called with empty old token" | BC.any (not . isTokChar) old = bug "tryTokInternal called with old non-token" | BC.any (not . isTokChar) new = bug "tryTokInternal called with new non-token" | otherwise = B.concat . loop 0 where isTokChar = regChars tokChars len = B.length old loop !from input = case B.breakSubstring old (B.drop from input) of (before, match) | B.null match -> [input] -- not found | B.null before || not (isTokChar (BC.last before)) , B.length match == len || not (isTokChar (BC.index match len)) -> -- found and is token B.take (from + B.length before) input : new : loop 0 (B.drop len match) | otherwise -> -- found but not a token loop (from + B.length before + len) input -- | Check if a token replace operation touches the given line. annotateReplace :: String -> B.ByteString -> B.ByteString -> B.ByteString -> Bool annotateReplace tokChars old new input = case breakOutToken tokChars input of Just (_, tok, remaining) -> (tok == old || annotateReplace tokChars old new remaining) Nothing -> False -- | Break a 'Bytestring' into tokens, according to 'defaultToks', -- discarding non-tokens. breakToTokens :: BC.ByteString -> [BC.ByteString] breakToTokens input = case breakOutToken defaultToks input of Nothing -> [] Just (_, tok, remaining) -> tok : breakToTokens remaining defaultToks :: String defaultToks = "A-Za-z_0-9"