{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_HADDOCK hide #-}

-- This is an Internal module, hidden from Haddock
module Core.Text.Breaking (
    breakRope,
    breakWords,
    breakLines,
    breakPieces,
    intoPieces,
    intoChunks,
    isNewline,
) where

import Core.Text.Rope
import Data.Char (isSpace)
import Data.List (uncons)
import qualified Data.Text.Short as S (ShortText, break, empty, null, uncons)

{- |
Split a passage of text into a list of words. A line is broken wherever there
is one or more whitespace characters, as defined by "Data.Char"'s
'Data.Char.isSpace'.

Examples:

@
λ> __breakWords \"This is a test\"__
[\"This\",\"is\",\"a\",\"test\"]
λ> __breakWords (\"St\" <> \"op and \" <> \"go left\")__
[\"Stop\",\"and\",\"go\",\"left\"]
λ> __breakWords emptyRope__
[]
@
-}
breakWords :: Rope -> [Rope]
breakWords :: Rope -> [Rope]
breakWords = (Rope -> Bool) -> [Rope] -> [Rope]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Rope -> Bool) -> Rope -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Rope -> Bool
nullRope) ([Rope] -> [Rope]) -> (Rope -> [Rope]) -> Rope -> [Rope]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
isSpace

{- |
Split a paragraph of text into a list of its individual lines. The paragraph
will be broken wherever there is a @'\n'@ character.

Blank lines will be preserved. Note that as a special case you do /not/ get a
blank entry at the end of the a list of newline terminated strings.

@
λ> __breakLines \"Hello\\n\\nWorld\\n\"__
[\"Hello\",\"\",\"World\"]
@
-}
breakLines :: Rope -> [Rope]
breakLines :: Rope -> [Rope]
breakLines Rope
text =
    let result :: [Rope]
result = (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
isNewline Rope
text
        n :: Int
n = [Rope] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Rope]
result Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        ([Rope]
fore, [Rope]
aft) = Int -> [Rope] -> ([Rope], [Rope])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [Rope]
result
     in case [Rope]
result of
            [] -> []
            [Rope
p] -> [Rope
p]
            [Rope]
_ ->
                if [Rope]
aft [Rope] -> [Rope] -> Bool
forall a. Eq a => a -> a -> Bool
== [Rope
""]
                    then [Rope]
fore
                    else [Rope]
result

{- |
Predicate testing whether a character is a newline. After
'Data.Char.isSpace' et al in "Data.Char".
-}
isNewline :: Char -> Bool
isNewline :: Char -> Bool
isNewline Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n'
{-# INLINEABLE isNewline #-}

{- |
Break a Rope into pieces whereever the given predicate function returns
@True@. If found, that character will not be included on either side. Empty
runs, however, *will* be preserved.
-}
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
predicate Rope
text =
    let x :: FingerTree Width ShortText
x = Rope -> FingerTree Width ShortText
unRope Rope
text
        (Maybe ShortText
final, [Rope]
result) = (ShortText
 -> (Maybe ShortText, [Rope]) -> (Maybe ShortText, [Rope]))
-> (Maybe ShortText, [Rope])
-> FingerTree Width ShortText
-> (Maybe ShortText, [Rope])
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Char -> Bool)
-> ShortText
-> (Maybe ShortText, [Rope])
-> (Maybe ShortText, [Rope])
intoPieces Char -> Bool
predicate) (Maybe ShortText
forall a. Maybe a
Nothing, []) FingerTree Width ShortText
x
     in case Maybe ShortText
final of
            Maybe ShortText
Nothing -> [Rope]
result
            Just ShortText
piece -> ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
piece Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: [Rope]
result

{-
Was the previous piece a match, or are we in the middle of a run of
characters? If we were, then join the previous run to the current piece
before processing into chunks.
-}
-- now for right fold
intoPieces :: (Char -> Bool) -> S.ShortText -> (Maybe S.ShortText, [Rope]) -> (Maybe S.ShortText, [Rope])
intoPieces :: (Char -> Bool)
-> ShortText
-> (Maybe ShortText, [Rope])
-> (Maybe ShortText, [Rope])
intoPieces Char -> Bool
predicate ShortText
piece (Maybe ShortText
stream, [Rope]
list) =
    let piece' :: ShortText
piece' = case Maybe ShortText
stream of
            Maybe ShortText
Nothing -> ShortText
piece
            Just ShortText
previous -> ShortText
piece ShortText -> ShortText -> ShortText
forall a. Semigroup a => a -> a -> a
<> ShortText
previous -- more rope, less text?
        pieces :: [Rope]
pieces = (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
predicate ShortText
piece'
     in case [Rope] -> Maybe (Rope, [Rope])
forall a. [a] -> Maybe (a, [a])
uncons [Rope]
pieces of
            Maybe (Rope, [Rope])
Nothing -> (Maybe ShortText
forall a. Maybe a
Nothing, [Rope]
list)
            Just (Rope
text, [Rope]
remainder) -> (ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (Rope -> ShortText
forall α. Textual α => Rope -> α
fromRope Rope
text), [Rope]
remainder [Rope] -> [Rope] -> [Rope]
forall a. [a] -> [a] -> [a]
++ [Rope]
list)

--
-- λ> S.break isSpace "a d"
-- ("a"," d")
--
-- λ> S.break isSpace " and"
-- (""," and")
--
-- λ> S.break isSpace "and "
-- ("and"," ")
--
-- λ> S.break isSpace ""
-- ("","")
--
-- λ> S.break isSpace " "
-- (""," ")
--

{-
This was more easily expressed as

  let
    remainder' = S.drop 1 remainder
  in
    if remainder == " "

for the case when we were breaking on spaces. But generalized to a predicate
we have to strip off the leading character and test that its the only character;
this is cheaper than S.length etc.
-}
intoChunks :: (Char -> Bool) -> S.ShortText -> [Rope]
intoChunks :: (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
_ ShortText
piece | ShortText -> Bool
S.null ShortText
piece = []
intoChunks Char -> Bool
predicate ShortText
piece =
    let (ShortText
chunk, ShortText
remainder) = (Char -> Bool) -> ShortText -> (ShortText, ShortText)
S.break Char -> Bool
predicate ShortText
piece

        -- Handle the special case that a trailing " " (generalized to predicate)
        -- is the only character left.
        (Bool
trailing, ShortText
remainder') = case ShortText -> Maybe (Char, ShortText)
S.uncons ShortText
remainder of
            Maybe (Char, ShortText)
Nothing -> (Bool
False, ShortText
S.empty)
            Just (Char
c, ShortText
remaining) ->
                if ShortText -> Bool
S.null ShortText
remaining
                    then (Char -> Bool
predicate Char
c, ShortText
S.empty)
                    else (Bool
False, ShortText
remaining)
     in if Bool
trailing
            then ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
chunk Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: Rope
emptyRope Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: []
            else ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
chunk Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
predicate ShortText
remainder'

{-
The utilities breakPieces and its helpers above were written long before this
code. The special purpose functions above might have been written more easily
if this below had been written first, but they _are_ special cases and they're
done, so {shrug} if someone wants to unify these go right head, otherwise this
can stand as almost but not-quite repetition.
-}

{- |
Given a piece of 'Rope' and a predicate, break the text into two pieces at the first
site where that predicate returns 'True'.

@since 0.3.7
-}
breakRope :: (Char -> Bool) -> Rope -> (Rope, Rope)
breakRope :: (Char -> Bool) -> Rope -> (Rope, Rope)
breakRope Char -> Bool
predicate Rope
text =
    let possibleIndex :: Maybe Int
possibleIndex = (Char -> Bool) -> Rope -> Maybe Int
findIndexRope Char -> Bool
predicate Rope
text
     in case Maybe Int
possibleIndex of
            Maybe Int
Nothing -> (Rope
text, Rope
emptyRope)
            Just Int
i -> Int -> Rope -> (Rope, Rope)
splitRope Int
i Rope
text