{-# LANGUAGE DeriveDataTypeable #-} module Yi.Buffer.TextUnit ( TextUnit(..) , outsideUnit , leftBoundaryUnit , unitWord , unitViWord , unitViWORD , unitViWordAnyBnd , unitViWORDAnyBnd , unitViWordOnLine , unitViWORDOnLine , unitDelimited , unitSentence, unitEmacsParagraph, unitParagraph , isAnySep, unitSep, unitSepThisLine, isWordChar , moveB, maybeMoveB , transformB, transposeB , regionOfB, regionOfNonEmptyB, regionOfPartB , regionWithTwoMovesB , regionOfPartNonEmptyB, regionOfPartNonEmptyAtB , readPrevUnitB, readUnitB , untilB, doUntilB_, untilB_, whileB, doIfCharB , atBoundaryB , numberOfB , deleteB, genMaybeMoveB , genMoveB, BoundarySide(..), genAtBoundaryB , checkPeekB , halfUnit , deleteUnitB ) where import Prelude (length, subtract) import Yi.Prelude import Data.Char import Yi.Buffer.Basic import Yi.Buffer.Misc import Yi.Buffer.Region -- | Designate a given "unit" of text. data TextUnit = Character -- ^ a single character | Line -- ^ a line of text (between newlines) | VLine -- ^ a "vertical" line of text (area of text between two characters at the same column number) | Document -- ^ the whole document | GenUnit {genEnclosingUnit :: TextUnit, genUnitBoundary :: Direction -> BufferM Bool} -- there could be more text units, like Page, Searched, etc. it's probably a good -- idea to use GenUnit though. deriving Typeable -- | Turns a unit into its "negative" by inverting the boundaries. For example, -- @outsideUnit unitViWord@ will be the unit of spaces between words. For units -- without boundaries ('Character', 'Document', ...), this is the identity -- function. outsideUnit :: TextUnit -> TextUnit outsideUnit (GenUnit enclosing boundary) = GenUnit enclosing (boundary . reverseDir) outsideUnit x = x -- for a lack of better definition -- | Common boundary checking function: run the condition on @siz@ characters in specified direction -- shifted by specified offset. genBoundary :: Int -> (String -> Bool) -> Direction -> BufferM Bool genBoundary ofs condition dir = condition <$> peekB where -- | read some characters in the specified direction peekB = savingPointB $ do moveN $ mayNegate $ ofs fmap snd <$> (indexedStreamB dir =<< pointB) mayNegate = case dir of Forward -> id Backward -> negate -- | a word as in use in Emacs (fundamental mode) unitWord :: TextUnit unitWord = GenUnit Document $ \direction -> checkPeekB (-1) [isWordChar, not . isWordChar] direction -- ^ delimited on the left and right by given characters, boolean argument tells if whether those are included. unitDelimited :: Char -> Char -> Bool -> TextUnit unitDelimited left right included = GenUnit Document $ \direction -> case (included,direction) of (False, Backward) -> checkPeekB 0 [(== left)] Backward (False, Forward) -> (== right) <$> readB (True, Backward) -> checkPeekB (-1) [(== left)] Backward (True, Forward) -> checkPeekB 0 [(== right)] Backward isWordChar :: Char -> Bool isWordChar x = isAlphaNum x || x == '_' isNl :: Char -> Bool isNl = (== '\n') -- | Tells if a char can end a sentence ('.', '!', '?'). isEndOfSentence :: Char -> Bool isEndOfSentence = (`elem` ".!?") -- | Verifies that the list matches all the predicates, pairwise. -- If the list is "too small", then return 'False'. checks :: [a -> Bool] -> [a] -> Bool checks [] _ = True checks _ [] = False checks (p:ps) (x:xs) = p x && checks ps xs checkPeekB :: Int -> [Char -> Bool] -> Direction -> BufferM Bool checkPeekB offset conds = genBoundary offset (checks conds) atViWordBoundary :: (Char -> Int) -> Direction -> BufferM Bool atViWordBoundary charType = genBoundary (-1) $ \cs -> case cs of (c1:c2:_) -> isNl c1 && isNl c2 -- stop at empty lines || not (isSpace c1) && (charType c1 /= charType c2) _ -> True atAnyViWordBoundary :: (Char -> Int) -> Direction -> BufferM Bool atAnyViWordBoundary charType = genBoundary (-1) $ \cs -> case cs of (c1:c2:_) -> isNl c1 || isNl c2 || charType c1 /= charType c2 _ -> True atViWordBoundaryOnLine :: (Char -> Int) -> Direction -> BufferM Bool atViWordBoundaryOnLine charType = genBoundary (-1) $ \cs -> case cs of (c1:c2:_) -> isNl c1 || isNl c2 || not (isSpace c1) && charType c1 /= charType c2 _ -> True unitViWord :: TextUnit unitViWord = GenUnit Document $ atViWordBoundary viWordCharType unitViWORD :: TextUnit unitViWORD = GenUnit Document $ atViWordBoundary viWORDCharType unitViWordAnyBnd :: TextUnit unitViWordAnyBnd = GenUnit Document $ atAnyViWordBoundary viWordCharType unitViWORDAnyBnd :: TextUnit unitViWORDAnyBnd = GenUnit Document $ atAnyViWordBoundary viWORDCharType unitViWordOnLine :: TextUnit unitViWordOnLine = GenUnit Document $ atViWordBoundaryOnLine viWordCharType unitViWORDOnLine :: TextUnit unitViWORDOnLine = GenUnit Document $ atViWordBoundaryOnLine viWORDCharType viWordCharType :: Char -> Int viWordCharType c | isSpace c = 1 | isWordChar c = 2 | otherwise = 3 viWORDCharType :: Char -> Int viWORDCharType c | isSpace c = 1 | otherwise = 2 -- | Separator characters (space, tab, unicode separators). Most of the units -- above attempt to identify "words" with various punctuation and symbols included -- or excluded. This set of units is a simple inverse: it is true for "whitespace" -- or "separators" and false for anything that is not (letters, numbers, symbols, -- punctuation, whatever). isAnySep :: Char -> Bool isAnySep c = isSeparator c || isSpace c || generalCategory c `elem` [ Space, LineSeparator, ParagraphSeparator ] atSepBoundary :: Direction -> BufferM Bool atSepBoundary = genBoundary (-1) $ \cs -> case cs of (c1:c2:_) -> isNl c1 || isNl c2 || isAnySep c1 /= isAnySep c2 _ -> True -- | unitSep is true for any kind of whitespace/separator unitSep :: TextUnit unitSep = GenUnit Document atSepBoundary -- | unitSepThisLine is true for any kind of whitespace/separator on this line only unitSepThisLine :: TextUnit unitSepThisLine = GenUnit Line atSepBoundary -- | Is the point at a @Unit@ boundary in the specified @Direction@? atBoundary :: TextUnit -> Direction -> BufferM Bool atBoundary Document Backward = (== 0) <$> pointB atBoundary Document Forward = (>=) <$> pointB <*> sizeB atBoundary Character _ = return True atBoundary VLine _ = return True -- a fallacy; this needs a little refactoring. atBoundary Line direction = checkPeekB 0 [isNl] direction atBoundary (GenUnit _ atBound) dir = atBound dir enclosingUnit :: TextUnit -> TextUnit enclosingUnit (GenUnit enclosing _) = enclosing enclosingUnit _ = Document atBoundaryB :: TextUnit -> Direction -> BufferM Bool atBoundaryB Document d = atBoundary Document d atBoundaryB u d = (||) <$> atBoundary u d <*> atBoundaryB (enclosingUnit u) d -- | Paragraph to implement emacs-like forward-paragraph/backward-paragraph unitEmacsParagraph :: TextUnit unitEmacsParagraph = GenUnit Document $ checkPeekB (-2) [not . isNl, isNl, isNl] -- | Paragraph that begins and ends in the paragraph, not the empty lines surrounding it. unitParagraph :: TextUnit unitParagraph = GenUnit Document $ checkPeekB (-1) [not . isNl, isNl, isNl] unitSentence :: TextUnit unitSentence = GenUnit unitEmacsParagraph $ \dir -> checkPeekB (if dir == Forward then -1 else 0) (mayReverse dir [isEndOfSentence, isSpace]) dir -- | Unit that have its left and right boundaries at the left boundary of the argument unit. leftBoundaryUnit :: TextUnit -> TextUnit leftBoundaryUnit u = GenUnit Document $ (\_dir -> atBoundaryB u Backward) -- | @genAtBoundaryB u d s@ returns whether the point is at a given boundary @(d,s)@ . -- Boundary @(d,s)@ , taking Word as example, means: -- Word -- ^^ ^^ -- 12 34 -- 1: (Backward,OutsideBound) -- 2: (Backward,InsideBound) -- 3: (Forward,InsideBound) -- 4: (Forward,OutsideBound) -- -- rules: -- genAtBoundaryB u Backward InsideBound = atBoundaryB u Backward -- genAtBoundaryB u Forward OutsideBound = atBoundaryB u Forward genAtBoundaryB :: TextUnit -> Direction -> BoundarySide -> BufferM Bool genAtBoundaryB u d s = withOffset (off u d s) $ atBoundaryB u d where withOffset 0 f = f withOffset ofs f = savingPointB (((ofs +) <$> pointB) >>= moveTo >> f) off _ Backward InsideBound = 0 off _ Backward OutsideBound = 1 off _ Forward InsideBound = 1 off _ Forward OutsideBound = 0 numberOfB :: TextUnit -> TextUnit -> BufferM Int numberOfB unit containingUnit = savingPointB $ do maybeMoveB containingUnit Backward start <- pointB moveB containingUnit Forward end <- pointB moveTo start length <$> untilB ((>= end) <$> pointB) (moveB unit Forward) whileB :: BufferM Bool -> BufferM a -> BufferM [a] whileB cond = untilB (not <$> cond) -- | Repeat an action until the condition is fulfilled or the cursor stops moving. -- The Action may be performed zero times. untilB :: BufferM Bool -> BufferM a -> BufferM [a] untilB cond f = do stop <- cond if stop then return [] else doUntilB cond f -- | Repeat an action until the condition is fulfilled or the cursor stops moving. -- The Action is performed at least once. doUntilB :: BufferM Bool -> BufferM a -> BufferM [a] doUntilB cond f = loop where loop = do p <- pointB x <- f p' <- pointB stop <- cond (x:) <$> if p /= p' && not stop then loop else return [] doUntilB_ :: BufferM Bool -> BufferM a -> BufferM () doUntilB_ cond f = doUntilB cond f >> return () -- maybe do an optimized version? untilB_ :: BufferM Bool -> BufferM a -> BufferM () untilB_ cond f = untilB cond f >> return () -- maybe do an optimized version? -- | Do an action if the current buffer character passes the predicate doIfCharB :: (Char -> Bool) -> BufferM a -> BufferM () doIfCharB p o = readB >>= \c -> if p c then o >> return () else return () -- | Boundary side data BoundarySide = InsideBound | OutsideBound deriving Eq -- | Generic move operation -- Warning: moving To the (OutsideBound, Backward) bound of Document is impossible (offset -1!) -- @genMoveB u b d@: move in direction d until encountering boundary b or unit u. See 'genAtBoundaryB' for boundary explanation. genMoveB :: TextUnit -> (Direction, BoundarySide) -> Direction -> BufferM () genMoveB Document (Forward,InsideBound) Forward = moveTo =<< subtract 1 <$> sizeB genMoveB Document _ Forward = moveTo =<< sizeB genMoveB Document _ Backward = moveTo 0 -- impossible to go outside beginning of doc. genMoveB Character _ Forward = rightB genMoveB Character _ Backward = leftB genMoveB VLine _ Forward = do ofs <- lineMoveRel 1 when (ofs < 1) (maybeMoveB Line Forward) genMoveB VLine _ Backward = lineUp genMoveB unit (boundDir, boundSide) moveDir = doUntilB_ (genAtBoundaryB unit boundDir boundSide) (moveB Character moveDir) -- | Generic maybe move operation. -- As genMoveB, but don't move if we are at boundary already. genMaybeMoveB :: TextUnit -> (Direction, BoundarySide) -> Direction -> BufferM () genMaybeMoveB Document boundSpec moveDir = genMoveB Document boundSpec moveDir -- optimized case for Document genMaybeMoveB Line (Backward, InsideBound) Backward = moveTo =<< solPointB -- optimized case for begin of Line genMaybeMoveB unit (boundDir, boundSide) moveDir = untilB_ (genAtBoundaryB unit boundDir boundSide) (moveB Character moveDir) -- | Move to the next unit boundary moveB :: TextUnit -> Direction -> BufferM () moveB u d = genMoveB u (d, case d of Forward -> OutsideBound; Backward -> InsideBound) d -- | As 'moveB', unless the point is at a unit boundary -- So for example here moveToEol = maybeMoveB Line Forward; -- in that it will move to the end of current line and nowhere if we -- are already at the end of the current line. Similarly for moveToSol. maybeMoveB :: TextUnit -> Direction -> BufferM () maybeMoveB u d = genMaybeMoveB u (d, case d of Forward -> OutsideBound; Backward -> InsideBound) d transposeB :: TextUnit -> Direction -> BufferM () transposeB unit direction = do moveB unit (reverseDir direction) w0 <- pointB moveB unit direction w0' <- pointB moveB unit direction w1' <- pointB moveB unit (reverseDir direction) w1 <- pointB swapRegionsB (mkRegion w0 w0') (mkRegion w1 w1') moveTo w1' transformB :: (String -> String) -> TextUnit -> Direction -> BufferM () transformB f unit direction = do p <- pointB moveB unit direction q <- pointB let r = mkRegion p q replaceRegionB r =<< f <$> readRegionB r -- | Delete between point and next unit boundary, return the deleted region. deleteB :: TextUnit -> Direction -> BufferM () deleteB unit dir = deleteRegionB =<< regionOfPartNonEmptyB unit dir regionWithTwoMovesB :: BufferM a -> BufferM b -> BufferM Region regionWithTwoMovesB move1 move2 = savingPointB $ mkRegion <$> (move1 >> pointB) <*> (move2 >> pointB) -- | Region of the whole textunit where the current point is. regionOfB :: TextUnit -> BufferM Region regionOfB unit = regionWithTwoMovesB (maybeMoveB unit Backward) (maybeMoveB unit Forward) -- An alternate definition would be the following, but it can return two units if the current point is between them. -- eg. "word1 ^ word2" would return both words. -- regionOfB unit = mkRegion -- <$> pointAfter (maybeMoveB unit Backward) -- <*> destinationOfMoveB (maybeMoveB unit Forward) -- | Non empty region of the whole textunit where the current point is. regionOfNonEmptyB :: TextUnit -> BufferM Region regionOfNonEmptyB unit = savingPointB $ mkRegion <$> (maybeMoveB unit Backward >> pointB) <*> (moveB unit Forward >> pointB) -- | Region between the point and the next boundary. -- The region is empty if the point is at the boundary. regionOfPartB :: TextUnit -> Direction -> BufferM Region regionOfPartB unit dir = mkRegion <$> pointB <*> destinationOfMoveB (maybeMoveB unit dir) -- | Non empty region between the point and the next boundary, -- In fact the region can be empty if we are at the end of file. regionOfPartNonEmptyB :: TextUnit -> Direction -> BufferM Region regionOfPartNonEmptyB unit dir = mkRegion <$> pointB <*> destinationOfMoveB (moveB unit dir) -- | Non empty region at given point and the next boundary, regionOfPartNonEmptyAtB :: TextUnit -> Direction -> Point -> BufferM Region regionOfPartNonEmptyAtB unit dir p = do oldP <- pointB moveTo p r <- regionOfPartNonEmptyB unit dir moveTo oldP return r readPrevUnitB :: TextUnit -> BufferM String readPrevUnitB unit = readRegionB =<< regionOfPartNonEmptyB unit Backward readUnitB :: TextUnit -> BufferM String readUnitB = readRegionB <=< regionOfB halfUnit :: Direction -> TextUnit -> TextUnit halfUnit dir (GenUnit enclosing boundary) = GenUnit enclosing (\d -> if d == dir then boundary d else return False) halfUnit _dir tu = tu deleteUnitB :: TextUnit -> Direction -> BufferM () deleteUnitB unit dir = deleteRegionB =<< regionOfPartNonEmptyB unit dir