{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_HADDOCK hide #-} -- This is an Internal module, hidden from Haddock module Core.Text.Breaking ( breakWords , breakLines , breakPieces , intoPieces , intoChunks ) where import Data.Char (isSpace) import Data.Foldable (foldr) import Data.List (uncons) import qualified Data.Text.Short as S (ShortText, null, break, uncons,empty) import Core.Text.Rope {-| Split a passage of text into a list of words. A line is broken wherever there is one or more whitespace characters, as defined by "Data.Char"'s 'Data.Char.isSpace'. Examples: @ λ> __breakWords \"This is a test\"__ [\"This\",\"is\",\"a\",\"test\"] λ> __breakWords (\"St\" <> \"op and \" <> \"go left\")__ [\"Stop\",\"and\",\"go\",\"left\"] λ> __breakWords emptyRope__ [] @ -} breakWords :: Rope -> [Rope] breakWords = filter (not . nullRope) . breakPieces isSpace {-| Split a paragraph of text into a list of its individual lines. The paragraph will be broken wherever there is a @'\n'@ character. Blank lines will be preserved. Note that as a special case you do /not/ get a blank entry at the end of the a list of newline terminated strings. @ λ> __breakLines \"Hello\\n\\nWorld\\n\"__ [\"Hello\",\"\",\"World\"] @ -} breakLines :: Rope -> [Rope] breakLines text = let result = breakPieces isNewline text n = length result - 1 (fore,aft) = splitAt n result in case result of [] -> [] [p] -> [p] _ -> if aft == [""] then fore else result isNewline :: Char -> Bool isNewline c = c == '\n' {-| Break a Rope into pieces whereever the given predicate function returns @True@. If found, that character will not be included on either side. Empty runs, however, *will* be preserved. -} breakPieces :: (Char -> Bool) -> Rope -> [Rope] breakPieces predicate text = let x = unRope text (final,result) = foldr (intoPieces predicate) (Nothing,[]) x in case final of Nothing -> result Just piece -> intoRope piece : result {- Was the previous piece a match, or are we in the middle of a run of characters? If we were, then join the previous run to the current piece before processing into chunks. -} -- now for right fold intoPieces :: (Char -> Bool) -> S.ShortText -> (Maybe S.ShortText,[Rope]) -> (Maybe S.ShortText,[Rope]) intoPieces predicate piece (stream,list) = let piece' = case stream of Nothing -> piece Just previous -> piece <> previous -- more rope, less text? pieces = intoChunks predicate piece' in case uncons pieces of Nothing -> (Nothing,list) Just (text,remainder) -> (Just (fromRope text),remainder ++ list) -- -- λ> S.break isSpace "a d" -- ("a"," d") -- -- λ> S.break isSpace " and" -- (""," and") -- -- λ> S.break isSpace "and " -- ("and"," ") -- -- λ> S.break isSpace "" -- ("","") -- -- λ> S.break isSpace " " -- (""," ") -- {- This was more easily expressed as let remainder' = S.drop 1 remainder in if remainder == " " for the case when we were breaking on spaces. But generalized to a predicate we have to strip off the leading character and test that its the only character; this is cheaper than S.length etc. -} intoChunks :: (Char -> Bool) -> S.ShortText -> [Rope] intoChunks _ piece | S.null piece = [] intoChunks predicate piece = let (chunk,remainder) = S.break predicate piece -- Handle the special case that a trailing " " (generalized to predicate) -- is the only character left. (trailing,remainder') = case S.uncons remainder of Nothing -> (False,S.empty) Just (c,remaining) -> if S.null remaining then (predicate c,S.empty) else (False,remaining) in if trailing then intoRope chunk : emptyRope : [] else intoRope chunk : intoChunks predicate remainder'