{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_HADDOCK hide #-}

-- This is an Internal module, hidden from Haddock
module Core.Text.Breaking
  ( breakWords,
    breakLines,
    breakPieces,
    intoPieces,
    intoChunks,
    isNewline,
  )
where

import Core.Text.Rope
import Data.Char (isSpace)
import Data.List (uncons)
import qualified Data.Text.Short as S (ShortText, break, empty, null, uncons)

-- |
-- Split a passage of text into a list of words. A line is broken wherever
-- there is one or more whitespace characters, as defined by "Data.Char"'s
-- 'Data.Char.isSpace'.
--
-- Examples:
--
-- @
-- λ> __breakWords \"This is a test\"__
-- [\"This\",\"is\",\"a\",\"test\"]
-- λ> __breakWords (\"St\" <> \"op and \" <> \"go left\")__
-- [\"Stop\",\"and\",\"go\",\"left\"]
-- λ> __breakWords emptyRope__
-- []
-- @
breakWords :: Rope -> [Rope]
breakWords :: Rope -> [Rope]
breakWords = (Rope -> Bool) -> [Rope] -> [Rope]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Rope -> Bool) -> Rope -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Rope -> Bool
nullRope) ([Rope] -> [Rope]) -> (Rope -> [Rope]) -> Rope -> [Rope]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
isSpace

-- |
-- Split a paragraph of text into a list of its individual lines. The
-- paragraph will be broken wherever there is a @'\n'@ character.
--
-- Blank lines will be preserved. Note that as a special case you do /not/ get
-- a blank entry at the end of the a list of newline terminated strings.
--
-- @
-- λ> __breakLines \"Hello\\n\\nWorld\\n\"__
-- [\"Hello\",\"\",\"World\"]
-- @
breakLines :: Rope -> [Rope]
breakLines :: Rope -> [Rope]
breakLines Rope
text =
  let result :: [Rope]
result = (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
isNewline Rope
text
      n :: Int
n = [Rope] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Rope]
result Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
      ([Rope]
fore, [Rope]
aft) = Int -> [Rope] -> ([Rope], [Rope])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [Rope]
result
   in case [Rope]
result of
        [] -> []
        [Rope
p] -> [Rope
p]
        [Rope]
_ ->
          if [Rope]
aft [Rope] -> [Rope] -> Bool
forall a. Eq a => a -> a -> Bool
== [Rope
""]
            then [Rope]
fore
            else [Rope]
result

-- |
-- Predicate testing whether a character is a newline. After
-- 'Data.Char.isSpace' et al in "Data.Char".
isNewline :: Char -> Bool
isNewline :: Char -> Bool
isNewline Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n'
{-# INLINEABLE isNewline #-}

-- |
-- Break a Rope into pieces whereever the given predicate function returns
-- @True@. If found, that character will not be included on either side. Empty
-- runs, however, *will* be preserved.
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces :: (Char -> Bool) -> Rope -> [Rope]
breakPieces Char -> Bool
predicate Rope
text =
  let x :: FingerTree Width ShortText
x = Rope -> FingerTree Width ShortText
unRope Rope
text
      (Maybe ShortText
final, [Rope]
result) = (ShortText
 -> (Maybe ShortText, [Rope]) -> (Maybe ShortText, [Rope]))
-> (Maybe ShortText, [Rope])
-> FingerTree Width ShortText
-> (Maybe ShortText, [Rope])
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Char -> Bool)
-> ShortText
-> (Maybe ShortText, [Rope])
-> (Maybe ShortText, [Rope])
intoPieces Char -> Bool
predicate) (Maybe ShortText
forall a. Maybe a
Nothing, []) FingerTree Width ShortText
x
   in case Maybe ShortText
final of
        Maybe ShortText
Nothing -> [Rope]
result
        Just ShortText
piece -> ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
piece Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: [Rope]
result

{-
Was the previous piece a match, or are we in the middle of a run of
characters? If we were, then join the previous run to the current piece
before processing into chunks.
-}
-- now for right fold
intoPieces :: (Char -> Bool) -> S.ShortText -> (Maybe S.ShortText, [Rope]) -> (Maybe S.ShortText, [Rope])
intoPieces :: (Char -> Bool)
-> ShortText
-> (Maybe ShortText, [Rope])
-> (Maybe ShortText, [Rope])
intoPieces Char -> Bool
predicate ShortText
piece (Maybe ShortText
stream, [Rope]
list) =
  let piece' :: ShortText
piece' = case Maybe ShortText
stream of
        Maybe ShortText
Nothing -> ShortText
piece
        Just ShortText
previous -> ShortText
piece ShortText -> ShortText -> ShortText
forall a. Semigroup a => a -> a -> a
<> ShortText
previous -- more rope, less text?
      pieces :: [Rope]
pieces = (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
predicate ShortText
piece'
   in case [Rope] -> Maybe (Rope, [Rope])
forall a. [a] -> Maybe (a, [a])
uncons [Rope]
pieces of
        Maybe (Rope, [Rope])
Nothing -> (Maybe ShortText
forall a. Maybe a
Nothing, [Rope]
list)
        Just (Rope
text, [Rope]
remainder) -> (ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (Rope -> ShortText
forall α. Textual α => Rope -> α
fromRope Rope
text), [Rope]
remainder [Rope] -> [Rope] -> [Rope]
forall a. [a] -> [a] -> [a]
++ [Rope]
list)

--
-- λ> S.break isSpace "a d"
-- ("a"," d")
--
-- λ> S.break isSpace " and"
-- (""," and")
--
-- λ> S.break isSpace "and "
-- ("and"," ")
--
-- λ> S.break isSpace ""
-- ("","")
--
-- λ> S.break isSpace " "
-- (""," ")
--

{-
This was more easily expressed as

  let
    remainder' = S.drop 1 remainder
  in
    if remainder == " "

for the case when we were breaking on spaces. But generalized to a predicate
we have to strip off the leading character and test that its the only character;
this is cheaper than S.length etc.
-}
intoChunks :: (Char -> Bool) -> S.ShortText -> [Rope]
intoChunks :: (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
_ ShortText
piece | ShortText -> Bool
S.null ShortText
piece = []
intoChunks Char -> Bool
predicate ShortText
piece =
  let (ShortText
chunk, ShortText
remainder) = (Char -> Bool) -> ShortText -> (ShortText, ShortText)
S.break Char -> Bool
predicate ShortText
piece

      -- Handle the special case that a trailing " " (generalized to predicate)
      -- is the only character left.
      (Bool
trailing, ShortText
remainder') = case ShortText -> Maybe (Char, ShortText)
S.uncons ShortText
remainder of
        Maybe (Char, ShortText)
Nothing -> (Bool
False, ShortText
S.empty)
        Just (Char
c, ShortText
remaining) ->
          if ShortText -> Bool
S.null ShortText
remaining
            then (Char -> Bool
predicate Char
c, ShortText
S.empty)
            else (Bool
False, ShortText
remaining)
   in if Bool
trailing
        then ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
chunk Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: Rope
emptyRope Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: []
        else ShortText -> Rope
forall α. Textual α => α -> Rope
intoRope ShortText
chunk Rope -> [Rope] -> [Rope]
forall a. a -> [a] -> [a]
: (Char -> Bool) -> ShortText -> [Rope]
intoChunks Char -> Bool
predicate ShortText
remainder'