{-# LANGUAGE DeriveDataTypeable #-} {-# OPTIONS_HADDOCK show-extensions #-} -- | -- Module : Yi.Buffer.TextUnit -- License : GPL-2 -- Maintainer : yi-devel@googlegroups.com -- Stability : experimental -- Portability : portable -- -- Working with blocks (units) of text. -- module Yi.Buffer.TextUnit ( TextUnit(..) , outsideUnit , leftBoundaryUnit , unitWord , unitViWord , unitViWORD , unitViWordAnyBnd , unitViWORDAnyBnd , unitViWordOnLine , unitViWORDOnLine , unitDelimited , unitSentence, unitEmacsParagraph, unitParagraph , isAnySep, unitSep, unitSepThisLine, isWordChar , moveB, maybeMoveB , transformB, transposeB , regionOfB, regionOfNonEmptyB, regionOfPartB , regionWithTwoMovesB , regionOfPartNonEmptyB, regionOfPartNonEmptyAtB , readPrevUnitB, readUnitB , untilB, doUntilB_, untilB_, whileB, doIfCharB , atBoundaryB , numberOfB , deleteB, genMaybeMoveB , genMoveB, BoundarySide(..), genAtBoundaryB , checkPeekB , halfUnit , deleteUnitB ) where import Control.Applicative (Applicative ((<*>)), (<$>)) import Control.Monad (void, when, (<=<)) import Data.Char (GeneralCategory (LineSeparator, ParagraphSeparator, Space), generalCategory, isAlphaNum, isSeparator, isSpace) import Data.Typeable (Typeable) import Yi.Buffer.Basic (Direction (..), Point (Point), mayReverse, reverseDir) import Yi.Buffer.Misc import Yi.Buffer.Region import Yi.Rope (YiString) import qualified Yi.Rope as R (head, reverse, tail, toString) -- | Designate a given "unit" of text. data TextUnit = Character -- ^ a single character | Line -- ^ a line of text (between newlines) | VLine -- ^ a "vertical" line of text (area of text between two characters at the same column number) | Document -- ^ the whole document | GenUnit {genEnclosingUnit :: TextUnit, genUnitBoundary :: Direction -> BufferM Bool} -- there could be more text units, like Page, Searched, etc. it's probably a good -- idea to use GenUnit though. deriving Typeable -- | Turns a unit into its "negative" by inverting the boundaries. For example, -- @outsideUnit unitViWord@ will be the unit of spaces between words. For units -- without boundaries ('Character', 'Document', ...), this is the identity -- function. outsideUnit :: TextUnit -> TextUnit outsideUnit (GenUnit enclosing boundary) = GenUnit enclosing (boundary . reverseDir) outsideUnit x = x -- for a lack of better definition -- | Common boundary checking function: run the condition on @len@ -- characters in specified direction shifted by specified offset. genBoundary :: Int -- ^ Offset from current position -> Int -- ^ Look-ahead -> (YiString -> Bool) -- ^ predicate -> Direction -- ^ Direction to look in -> BufferM Bool genBoundary ofs len condition dir = condition <$> peekB where peekB = do Point p' <- pointB let pt@(Point p) = Point (p' + mayNegate ofs) case dir of Forward -> betweenB pt (Point $ max 0 p + len) Backward -> R.reverse <$> betweenB (Point $ p - len) pt mayNegate = case dir of Forward -> id Backward -> negate -- | a word as in use in Emacs (fundamental mode) unitWord :: TextUnit unitWord = GenUnit Document $ \direction -> checkPeekB (-1) [isWordChar, not . isWordChar] direction -- | delimited on the left and right by given characters, boolean -- argument tells if whether those are included. unitDelimited :: Char -> Char -> Bool -> TextUnit unitDelimited left right included = GenUnit Document $ \direction -> case (included,direction) of (False, Backward) -> do isCursorOnLeftChar <- (== left) <$> readB when isCursorOnLeftChar rightB checkPeekB 0 [(== left)] Backward (False, Forward) -> do isCursorOnRightChar <- (== right) <$> readB isTextUnitBlank <- checkPeekB 0 [(== left)] Backward if isTextUnitBlank && isCursorOnRightChar then leftB >> return True else return isCursorOnRightChar (True, Backward) -> checkPeekB 0 [(== left)] Forward (True, Forward) -> rightB >> checkPeekB 0 [(== right)] Backward isWordChar :: Char -> Bool isWordChar x = isAlphaNum x || x == '_' isNl :: Char -> Bool isNl = (== '\n') -- | Tells if a char can end a sentence ('.', '!', '?'). isEndOfSentence :: Char -> Bool isEndOfSentence = (`elem` ".!?") -- | Verifies that the string matches all the predicates, pairwise. If -- the string is "too small", then return 'False'. Note the length of -- predicates has to be finite. checks :: [Char -> Bool] -> YiString -> Bool checks ps' t' = go ps' (R.toString t') where go [] _ = True go _ [] = False go (p:ps) (x:xs) = p x && go ps xs checkPeekB :: Int -> [Char -> Bool] -> Direction -> BufferM Bool checkPeekB offset conds = genBoundary offset (length conds) (checks conds) -- | Helper that takes first two characters of YiString. Faster than -- take 2 and string conversion. firstTwo :: YiString -> Maybe (Char, Char) firstTwo t = case R.head t of Nothing -> Nothing Just c -> case R.tail t >>= R.head of Nothing -> Nothing Just c' -> Just (c, c') atViWordBoundary :: (Char -> Int) -> Direction -> BufferM Bool atViWordBoundary charType = genBoundary (-1) 2 $ \cs -> case firstTwo cs of Just (c1, c2) -> isNl c1 && isNl c2 -- stop at empty lines || not (isSpace c1) && (charType c1 /= charType c2) Nothing -> True atAnyViWordBoundary :: (Char -> Int) -> Direction -> BufferM Bool atAnyViWordBoundary charType = genBoundary (-1) 2 $ \cs -> case firstTwo cs of Just (c1, c2) -> isNl c1 || isNl c2 || charType c1 /= charType c2 Nothing -> True atViWordBoundaryOnLine :: (Char -> Int) -> Direction -> BufferM Bool atViWordBoundaryOnLine charType = genBoundary (-1) 2 $ \cs -> case firstTwo cs of Just (c1, c2)-> isNl c1 || isNl c2 || not (isSpace c1) && charType c1 /= charType c2 Nothing -> True unitViWord :: TextUnit unitViWord = GenUnit Document $ atViWordBoundary viWordCharType unitViWORD :: TextUnit unitViWORD = GenUnit Document $ atViWordBoundary viWORDCharType unitViWordAnyBnd :: TextUnit unitViWordAnyBnd = GenUnit Document $ atAnyViWordBoundary viWordCharType unitViWORDAnyBnd :: TextUnit unitViWORDAnyBnd = GenUnit Document $ atAnyViWordBoundary viWORDCharType unitViWordOnLine :: TextUnit unitViWordOnLine = GenUnit Document $ atViWordBoundaryOnLine viWordCharType unitViWORDOnLine :: TextUnit unitViWORDOnLine = GenUnit Document $ atViWordBoundaryOnLine viWORDCharType viWordCharType :: Char -> Int viWordCharType c | isSpace c = 1 | isWordChar c = 2 | otherwise = 3 viWORDCharType :: Char -> Int viWORDCharType c | isSpace c = 1 | otherwise = 2 -- | Separator characters (space, tab, unicode separators). Most of -- the units above attempt to identify "words" with various -- punctuation and symbols included or excluded. This set of units is -- a simple inverse: it is true for "whitespace" or "separators" and -- false for anything that is not (letters, numbers, symbols, -- punctuation, whatever). isAnySep :: Char -> Bool isAnySep c = isSeparator c || isSpace c || generalCategory c `elem` seps where seps = [ Space, LineSeparator, ParagraphSeparator ] atSepBoundary :: Direction -> BufferM Bool atSepBoundary = genBoundary (-1) 2 $ \cs -> case firstTwo cs of Just (c1, c2) -> isNl c1 || isNl c2 || isAnySep c1 /= isAnySep c2 Nothing -> True -- | unitSep is true for any kind of whitespace/separator unitSep :: TextUnit unitSep = GenUnit Document atSepBoundary -- | unitSepThisLine is true for any kind of whitespace/separator on this line only unitSepThisLine :: TextUnit unitSepThisLine = GenUnit Line atSepBoundary -- | Is the point at a @Unit@ boundary in the specified @Direction@? atBoundary :: TextUnit -> Direction -> BufferM Bool atBoundary Document Backward = (== 0) <$> pointB atBoundary Document Forward = (>=) <$> pointB <*> sizeB atBoundary Character _ = return True atBoundary VLine _ = return True -- a fallacy; this needs a little refactoring. atBoundary Line direction = checkPeekB 0 [isNl] direction atBoundary (GenUnit _ atBound) dir = atBound dir enclosingUnit :: TextUnit -> TextUnit enclosingUnit (GenUnit enclosing _) = enclosing enclosingUnit _ = Document atBoundaryB :: TextUnit -> Direction -> BufferM Bool atBoundaryB Document d = atBoundary Document d atBoundaryB u d = (||) <$> atBoundary u d <*> atBoundaryB (enclosingUnit u) d -- | Paragraph to implement emacs-like forward-paragraph/backward-paragraph unitEmacsParagraph :: TextUnit unitEmacsParagraph = GenUnit Document $ checkPeekB (-2) [not . isNl, isNl, isNl] -- | Paragraph that begins and ends in the paragraph, not the empty lines surrounding it. unitParagraph :: TextUnit unitParagraph = GenUnit Document $ checkPeekB (-1) [not . isNl, isNl, isNl] unitSentence :: TextUnit unitSentence = GenUnit unitEmacsParagraph $ \dir -> checkPeekB (if dir == Forward then -1 else 0) (mayReverse dir [isEndOfSentence, isSpace]) dir -- | Unit that have its left and right boundaries at the left boundary of the argument unit. leftBoundaryUnit :: TextUnit -> TextUnit leftBoundaryUnit u = GenUnit Document (\_dir -> atBoundaryB u Backward) -- | @genAtBoundaryB u d s@ returns whether the point is at a given boundary @(d,s)@ . -- Boundary @(d,s)@ , taking Word as example, means: -- Word -- ^^ ^^ -- 12 34 -- 1: (Backward,OutsideBound) -- 2: (Backward,InsideBound) -- 3: (Forward,InsideBound) -- 4: (Forward,OutsideBound) -- -- rules: -- genAtBoundaryB u Backward InsideBound = atBoundaryB u Backward -- genAtBoundaryB u Forward OutsideBound = atBoundaryB u Forward genAtBoundaryB :: TextUnit -> Direction -> BoundarySide -> BufferM Bool genAtBoundaryB u d s = withOffset (off u d s) $ atBoundaryB u d where withOffset 0 f = f withOffset ofs f = savingPointB (((ofs +) <$> pointB) >>= moveTo >> f) off _ Backward InsideBound = 0 off _ Backward OutsideBound = 1 off _ Forward InsideBound = 1 off _ Forward OutsideBound = 0 numberOfB :: TextUnit -> TextUnit -> BufferM Int numberOfB unit containingUnit = savingPointB $ do maybeMoveB containingUnit Backward start <- pointB moveB containingUnit Forward end <- pointB moveTo start length <$> untilB ((>= end) <$> pointB) (moveB unit Forward) whileB :: BufferM Bool -> BufferM a -> BufferM [a] whileB cond = untilB (not <$> cond) -- | Repeat an action until the condition is fulfilled or the cursor -- stops moving. The Action may be performed zero times. untilB :: BufferM Bool -> BufferM a -> BufferM [a] untilB cond f = do stop <- cond if stop then return [] else doUntilB cond f -- | Repeat an action until the condition is fulfilled or the cursor -- stops moving. The Action is performed at least once. doUntilB :: BufferM Bool -> BufferM a -> BufferM [a] doUntilB cond f = loop where loop = do p <- pointB x <- f p' <- pointB stop <- cond (x:) <$> if p /= p' && not stop then loop else return [] doUntilB_ :: BufferM Bool -> BufferM a -> BufferM () doUntilB_ cond f = void (doUntilB cond f) -- maybe do an optimized version? untilB_ :: BufferM Bool -> BufferM a -> BufferM () untilB_ cond f = void (untilB cond f) -- maybe do an optimized version? -- | Do an action if the current buffer character passes the predicate doIfCharB :: (Char -> Bool) -> BufferM a -> BufferM () doIfCharB p o = readB >>= \c -> when (p c) $ void o -- | Boundary side data BoundarySide = InsideBound | OutsideBound deriving Eq -- | Generic move operation -- Warning: moving To the (OutsideBound, Backward) bound of Document is impossible (offset -1!) -- @genMoveB u b d@: move in direction d until encountering boundary b or unit u. See 'genAtBoundaryB' for boundary explanation. genMoveB :: TextUnit -> (Direction, BoundarySide) -> Direction -> BufferM () genMoveB Document (Forward,InsideBound) Forward = moveTo =<< subtract 1 <$> sizeB genMoveB Document _ Forward = moveTo =<< sizeB genMoveB Document _ Backward = moveTo 0 -- impossible to go outside beginning of doc. genMoveB Character _ Forward = rightB genMoveB Character _ Backward = leftB genMoveB VLine _ Forward = do ofs <- lineMoveRel 1 when (ofs < 1) (maybeMoveB Line Forward) genMoveB VLine _ Backward = lineUp genMoveB unit (boundDir, boundSide) moveDir = doUntilB_ (genAtBoundaryB unit boundDir boundSide) (moveB Character moveDir) -- | Generic maybe move operation. -- As genMoveB, but don't move if we are at boundary already. genMaybeMoveB :: TextUnit -> (Direction, BoundarySide) -> Direction -> BufferM () -- optimized case for Document genMaybeMoveB Document boundSpec moveDir = genMoveB Document boundSpec moveDir -- optimized case for start/end of Line genMaybeMoveB Line (Backward, InsideBound) Backward = moveTo =<< solPointB =<< pointB genMaybeMoveB Line (Forward, OutsideBound) Forward = moveTo =<< eolPointB =<< pointB genMaybeMoveB unit (boundDir, boundSide) moveDir = untilB_ (genAtBoundaryB unit boundDir boundSide) (moveB Character moveDir) -- | Move to the next unit boundary moveB :: TextUnit -> Direction -> BufferM () moveB u d = genMoveB u (d, case d of Forward -> OutsideBound; Backward -> InsideBound) d -- | As 'moveB', unless the point is at a unit boundary -- So for example here moveToEol = maybeMoveB Line Forward; -- in that it will move to the end of current line and nowhere if we -- are already at the end of the current line. Similarly for moveToSol. maybeMoveB :: TextUnit -> Direction -> BufferM () maybeMoveB u d = genMaybeMoveB u (d, case d of Forward -> OutsideBound; Backward -> InsideBound) d transposeB :: TextUnit -> Direction -> BufferM () transposeB unit direction = do moveB unit (reverseDir direction) w0 <- pointB moveB unit direction w0' <- pointB moveB unit direction w1' <- pointB moveB unit (reverseDir direction) w1 <- pointB swapRegionsB (mkRegion w0 w0') (mkRegion w1 w1') moveTo w1' -- | Transforms the region given by 'TextUnit' in the 'Direction' with -- user-supplied function. transformB :: (YiString -> YiString) -> TextUnit -> Direction -> BufferM () transformB f unit direction = do p <- pointB moveB unit direction q <- pointB let r = mkRegion p q replaceRegionB r =<< f <$> readRegionB r -- | Delete between point and next unit boundary, return the deleted region. deleteB :: TextUnit -> Direction -> BufferM () deleteB unit dir = deleteRegionB =<< regionOfPartNonEmptyB unit dir regionWithTwoMovesB :: BufferM a -> BufferM b -> BufferM Region regionWithTwoMovesB move1 move2 = savingPointB $ mkRegion <$> (move1 >> pointB) <*> (move2 >> pointB) -- | Region of the whole textunit where the current point is. regionOfB :: TextUnit -> BufferM Region regionOfB unit = regionWithTwoMovesB (maybeMoveB unit Backward) (maybeMoveB unit Forward) -- An alternate definition would be the following, but it can return two units if the current point is between them. -- eg. "word1 ^ word2" would return both words. -- regionOfB unit = mkRegion -- <$> pointAfter (maybeMoveB unit Backward) -- <*> destinationOfMoveB (maybeMoveB unit Forward) -- | Non empty region of the whole textunit where the current point is. regionOfNonEmptyB :: TextUnit -> BufferM Region regionOfNonEmptyB unit = savingPointB $ mkRegion <$> (maybeMoveB unit Backward >> pointB) <*> (moveB unit Forward >> pointB) -- | Region between the point and the next boundary. -- The region is empty if the point is at the boundary. regionOfPartB :: TextUnit -> Direction -> BufferM Region regionOfPartB unit dir = mkRegion <$> pointB <*> destinationOfMoveB (maybeMoveB unit dir) -- | Non empty region between the point and the next boundary, -- In fact the region can be empty if we are at the end of file. regionOfPartNonEmptyB :: TextUnit -> Direction -> BufferM Region regionOfPartNonEmptyB unit dir = mkRegion <$> pointB <*> destinationOfMoveB (moveB unit dir) -- | Non empty region at given point and the next boundary, regionOfPartNonEmptyAtB :: TextUnit -> Direction -> Point -> BufferM Region regionOfPartNonEmptyAtB unit dir p = do oldP <- pointB moveTo p r <- regionOfPartNonEmptyB unit dir moveTo oldP return r readPrevUnitB :: TextUnit -> BufferM YiString readPrevUnitB unit = readRegionB =<< regionOfPartNonEmptyB unit Backward readUnitB :: TextUnit -> BufferM YiString readUnitB = readRegionB <=< regionOfB halfUnit :: Direction -> TextUnit -> TextUnit halfUnit dir (GenUnit enclosing boundary) = GenUnit enclosing (\d -> if d == dir then boundary d else return False) halfUnit _dir tu = tu deleteUnitB :: TextUnit -> Direction -> BufferM () deleteUnitB unit dir = deleteRegionB =<< regionOfPartNonEmptyB unit dir