{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_HADDOCK hide #-} -- This is an Internal module, hidden from Haddock module Core.Text.Breaking ( breakWords, breakLines, breakPieces, intoPieces, intoChunks, isNewline, ) where import Core.Text.Rope import Data.Char (isSpace) import Data.List (uncons) import qualified Data.Text.Short as S (ShortText, break, empty, null, uncons) -- | -- Split a passage of text into a list of words. A line is broken wherever -- there is one or more whitespace characters, as defined by "Data.Char"'s -- 'Data.Char.isSpace'. -- -- Examples: -- -- @ -- λ> __breakWords \"This is a test\"__ -- [\"This\",\"is\",\"a\",\"test\"] -- λ> __breakWords (\"St\" <> \"op and \" <> \"go left\")__ -- [\"Stop\",\"and\",\"go\",\"left\"] -- λ> __breakWords emptyRope__ -- [] -- @ breakWords :: Rope -> [Rope] breakWords = filter (not . nullRope) . breakPieces isSpace -- | -- Split a paragraph of text into a list of its individual lines. The -- paragraph will be broken wherever there is a @'\n'@ character. -- -- Blank lines will be preserved. Note that as a special case you do /not/ get -- a blank entry at the end of the a list of newline terminated strings. -- -- @ -- λ> __breakLines \"Hello\\n\\nWorld\\n\"__ -- [\"Hello\",\"\",\"World\"] -- @ breakLines :: Rope -> [Rope] breakLines text = let result = breakPieces isNewline text n = length result - 1 (fore, aft) = splitAt n result in case result of [] -> [] [p] -> [p] _ -> if aft == [""] then fore else result -- | -- Predicate testing whether a character is a newline. After -- 'Data.Char.isSpace' et al in "Data.Char". isNewline :: Char -> Bool isNewline c = c == '\n' {-# INLINEABLE isNewline #-} -- | -- Break a Rope into pieces whereever the given predicate function returns -- @True@. If found, that character will not be included on either side. Empty -- runs, however, *will* be preserved. breakPieces :: (Char -> Bool) -> Rope -> [Rope] breakPieces predicate text = let x = unRope text (final, result) = foldr (intoPieces predicate) (Nothing, []) x in case final of Nothing -> result Just piece -> intoRope piece : result {- Was the previous piece a match, or are we in the middle of a run of characters? If we were, then join the previous run to the current piece before processing into chunks. -} -- now for right fold intoPieces :: (Char -> Bool) -> S.ShortText -> (Maybe S.ShortText, [Rope]) -> (Maybe S.ShortText, [Rope]) intoPieces predicate piece (stream, list) = let piece' = case stream of Nothing -> piece Just previous -> piece <> previous -- more rope, less text? pieces = intoChunks predicate piece' in case uncons pieces of Nothing -> (Nothing, list) Just (text, remainder) -> (Just (fromRope text), remainder ++ list) -- -- λ> S.break isSpace "a d" -- ("a"," d") -- -- λ> S.break isSpace " and" -- (""," and") -- -- λ> S.break isSpace "and " -- ("and"," ") -- -- λ> S.break isSpace "" -- ("","") -- -- λ> S.break isSpace " " -- (""," ") -- {- This was more easily expressed as let remainder' = S.drop 1 remainder in if remainder == " " for the case when we were breaking on spaces. But generalized to a predicate we have to strip off the leading character and test that its the only character; this is cheaper than S.length etc. -} intoChunks :: (Char -> Bool) -> S.ShortText -> [Rope] intoChunks _ piece | S.null piece = [] intoChunks predicate piece = let (chunk, remainder) = S.break predicate piece -- Handle the special case that a trailing " " (generalized to predicate) -- is the only character left. (trailing, remainder') = case S.uncons remainder of Nothing -> (False, S.empty) Just (c, remaining) -> if S.null remaining then (predicate c, S.empty) else (False, remaining) in if trailing then intoRope chunk : emptyRope : [] else intoRope chunk : intoChunks predicate remainder'