{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE NoImplicitPrelude #-}

-- |
-- Module: Text.Ascii
-- Copyright: (C) 2021 Koz Ross
-- License: Apache 2.0
-- Maintainer: Koz Ross <koz.ross@retro-freedom.nz>
-- Stability: stable
-- Portability: GHC only
--
-- An implementation of ASCII strings.
--
-- This module is designed for qualified importing:
--
-- > import qualified Text.Ascii as Ascii
--
-- /See also:/ [Wikipedia entry for ASCII](https://en.wikipedia.org/wiki/ASCII)
module Text.Ascii
  ( -- * Type
    AsciiText,

    -- * Creation
    empty,
    singleton,
    ascii,

    -- * Basic interface
    cons,
    snoc,
    uncons,
    unsnoc,
    length,

    -- * Transformations
    map,
    intercalate,
    intersperse,
    transpose,
    reverse,
    replace,

    -- ** Justification
    justifyLeft,
    justifyRight,
    center,

    -- * Folds
    foldl,
    foldl',
    foldr,
    foldr',

    -- ** Special folds
    concat,
    concatMap,

    -- * Construction

    -- ** Scans
    scanl,
    scanr,

    -- ** Accumulating maps
    mapAccumL,
    mapAccumR,

    -- ** Generation and unfolding
    replicate,
    unfoldr,
    unfoldrN,

    -- * Substrings

    -- ** Breaking strings
    take,
    takeEnd,
    drop,
    dropEnd,
    takeWhile,
    takeWhileEnd,
    dropWhile,
    dropWhileEnd,
    dropAround,
    strip,
    stripStart,
    stripEnd,
    splitAt,
    breakOn,
    breakOnEnd,
    break,
    span,
    group,
    groupBy,
    inits,
    tails,

    -- ** Breaking into many substrings
    splitOn,
    split,
    chunksOf,

    -- ** Breaking into lines and words
    lines,
    unlines,
    words,
    unwords,

    -- * View patterns
    stripPrefix,
    stripSuffix,
    stripInfix,
    commonPrefixes,

    -- * Searching
    filter,
    breakOnAll,
    find,
    partition,

    -- * Indexing
    index,
    findIndex,
    count,

    -- * Zipping
    zip,
    zipWith,

    -- * Conversions
    fromText,
    eitherFromText,
    fromByteString,
    eitherFromByteString,
    toText,
    toByteString,

    -- * Optics
    textWise,
    byteStringWise,
    packedChars,
    chars,
    packedBytes,
    bytes,
  )
where

import Control.Category ((.))
import Data.Bifunctor (first)
import Data.Bool (Bool (False, True), otherwise, (&&))
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Optics as BSO
import Data.Char (isAscii)
import Data.Coerce (coerce)
import Data.Foldable (Foldable (foldMap))
import qualified Data.Foldable as F
import Data.Int (Int64)
import qualified Data.List as L
import Data.Maybe (Maybe (Just, Nothing))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Word (Word8)
import Optics.Coerce (coerceA, coerceB, coerceS, coerceT)
import Optics.Getter (Getter, view)
import Optics.Iso (Iso')
import Optics.IxFold (IxFold)
import Optics.IxTraversal (IxTraversal')
import Optics.Optic (castOptic)
import Optics.Prism (Prism', prism')
import Optics.Review (Review, review)
import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText))
import Text.Ascii.QQ (ascii, char)
import Prelude
  ( Int,
    not,
    pure,
    ($),
    (+),
    (-),
    (/=),
    (<),
    (<$>),
    (<=),
    (<>),
    (==),
    (>),
    (>=),
    (||),
  )
import qualified Prelude as P

-- Note on pragmata
--
-- This is cribbed directly from bytestring, as I figure they know what they're
-- doing way better than we do. When we add our own functionality, this probably
-- needs to be considered more carefully. - Koz

-- Creation

-- $setup
-- >>> :set -XNoImplicitPrelude
-- >>> :set -XQuasiQuotes
-- >>> :set -XOverloadedStrings
-- >>> import Text.Ascii
-- >>> import Text.Ascii.Char (char, upcase, AsciiCase (Lower), caseOf)
-- >>> import Prelude ((.), ($), (<>), (==), (<), (/=), (-), max, even)
-- >>> import qualified Prelude as Prelude
-- >>> import Data.Maybe (Maybe (Just), fromMaybe)
-- >>> import qualified Data.ByteString as BS
-- >>> import Optics.AffineFold (preview)
-- >>> import Optics.Review (review)
-- >>> import Optics.Getter (view)
-- >>> import Optics.IxTraversal (elementOf)
-- >>> import Optics.IxSetter (iover)
-- >>> import Data.Bool (bool)
-- >>> import Optics.IxFold (itoListOf)

-- | The empty text.
--
-- >>> empty
-- ""
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
empty :: AsciiText
empty :: AsciiText
empty = ByteString -> AsciiText
coerce ByteString
BS.empty

-- | A text consisting of a single ASCII character.
--
-- >>> singleton [char| 'w' |]
-- "w"
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE [1] singleton #-}
singleton :: AsciiChar -> AsciiText
singleton :: AsciiChar -> AsciiText
singleton = (Word8 -> ByteString) -> AsciiChar -> AsciiText
coerce Word8 -> ByteString
BS.singleton

-- Basic interface

-- | Adds a character to the front of a text. This requires copying, which gives
-- its complexity.
--
-- >>> cons [char| 'n' |] [ascii| "eko" |]
-- "neko"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE cons #-}
cons :: AsciiChar -> AsciiText -> AsciiText
cons :: AsciiChar -> AsciiText -> AsciiText
cons = (Word8 -> ByteString -> ByteString)
-> AsciiChar -> AsciiText -> AsciiText
coerce Word8 -> ByteString -> ByteString
BS.cons

-- | Adds a character to the back of a text. This requires copying, which gives
-- its complexity.
--
-- >>> snoc [ascii| "nek" |] [char| 'o' |]
-- "neko"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE snoc #-}
snoc :: AsciiText -> AsciiChar -> AsciiText
snoc :: AsciiText -> AsciiChar -> AsciiText
snoc = (ByteString -> Word8 -> ByteString)
-> AsciiText -> AsciiChar -> AsciiText
coerce ByteString -> Word8 -> ByteString
BS.snoc

-- | If the argument is non-empty, gives 'Just' the first character and the
-- rest, and 'Nothing' otherwise.
--
-- >>> uncons empty
-- Nothing
-- >>> uncons . singleton $ [char| 'w' |]
-- Just ('0x77',"")
-- >>> uncons [ascii| "nekomimi" |]
-- Just ('0x6e',"ekomimi")
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE uncons #-}
uncons :: AsciiText -> Maybe (AsciiChar, AsciiText)
uncons :: AsciiText -> Maybe (AsciiChar, AsciiText)
uncons = (ByteString -> Maybe (Word8, ByteString))
-> AsciiText -> Maybe (AsciiChar, AsciiText)
coerce ByteString -> Maybe (Word8, ByteString)
BS.uncons

-- | If the argument is non-empty, gives 'Just' the initial segment and the last
-- character, and 'Nothing' otherwise.
--
-- >>> unsnoc empty
-- Nothing
-- >>> unsnoc . singleton $ [char| 'w' |]
-- Just ("",'0x77')
-- >>> unsnoc [ascii| "catboy" |]
-- Just ("catbo",'0x79')
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE unsnoc #-}
unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar)
unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar)
unsnoc = (ByteString -> Maybe (ByteString, Word8))
-> AsciiText -> Maybe (AsciiText, AsciiChar)
coerce ByteString -> Maybe (ByteString, Word8)
BS.unsnoc

-- | The number of characters (and, since this is ASCII, bytes) in the text.
--
-- >>> length . singleton $ [char| 'w' |]
-- 1
-- >>> length [ascii| "nyan nyan" |]
-- 9
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE length #-}
length :: AsciiText -> Int
length :: AsciiText -> Int
length = (ByteString -> Int) -> AsciiText -> Int
coerce ByteString -> Int
BS.length

-- Transformations

-- | Copy, and apply the function to each element of, the text.
--
-- >>> map (\c -> fromMaybe c . upcase $ c) [ascii| "nyan!" |]
-- "NYAN!"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE map #-}
map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText
map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText
map = ((Word8 -> Word8) -> ByteString -> ByteString)
-> (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText
coerce (Word8 -> Word8) -> ByteString -> ByteString
BS.map

-- | Takes a text and a list of texts, and concatenates the list after
-- interspersing the first argument between each element of the list.
--
-- >>> intercalate [ascii| " ~ " |] []
-- ""
-- >>> intercalate [ascii| " ~ " |] [[ascii| "nyan" |]]
-- "nyan"
-- >>> intercalate [ascii| " ~ " |] . Prelude.replicate 3 $ [ascii| "nyan" |]
-- "nyan ~ nyan ~ nyan"
-- >>> intercalate empty . Prelude.replicate 3 $ [ascii| "nyan" |]
-- "nyannyannyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE [1] intercalate #-}
intercalate :: AsciiText -> [AsciiText] -> AsciiText
intercalate :: AsciiText -> [AsciiText] -> AsciiText
intercalate = (ByteString -> [ByteString] -> ByteString)
-> AsciiText -> [AsciiText] -> AsciiText
coerce ByteString -> [ByteString] -> ByteString
BS.intercalate

-- | Takes a character, and places it between the characters of a text.
--
-- >>> intersperse [char| '~' |] empty
-- ""
-- >>> intersperse [char| '~' |] . singleton $ [char| 'w' |]
-- "w"
-- >>> intersperse [char| '~' |] [ascii| "nyan" |]
-- "n~y~a~n"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
intersperse :: AsciiChar -> AsciiText -> AsciiText
intersperse :: AsciiChar -> AsciiText -> AsciiText
intersperse = (Word8 -> ByteString -> ByteString)
-> AsciiChar -> AsciiText -> AsciiText
coerce Word8 -> ByteString -> ByteString
BS.intersperse

-- | Transpose the rows and columns of the argument. This uses
-- 'Data.List.transpose' internally, and thus, isn't very efficient.
--
-- >>> transpose []
-- []
-- >>> transpose [[ascii| "w" |]]
-- ["w"]
-- >>> transpose [[ascii| "nyan" |]]
-- ["n","y","a","n"]
-- >>> transpose . Prelude.replicate 3 $ [ascii| "nyan" |]
-- ["nnn","yyy","aaa","nnn"]
-- >>> transpose [[ascii| "cat" |], [ascii| "boy" |], [ascii| "nyan" |]]
-- ["cbn","aoy","tya","n"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
transpose :: [AsciiText] -> [AsciiText]
transpose :: [AsciiText] -> [AsciiText]
transpose = ([ByteString] -> [ByteString]) -> [AsciiText] -> [AsciiText]
coerce [ByteString] -> [ByteString]
BS.transpose

-- | Reverse the text.
--
-- >>> reverse empty
-- ""
-- >>> reverse . singleton $ [char| 'w' |]
-- "w"
-- >>> reverse [ascii| "catboy goes nyan" |]
-- "nayn seog yobtac"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
reverse :: AsciiText -> AsciiText
reverse :: AsciiText -> AsciiText
reverse = (ByteString -> ByteString) -> AsciiText -> AsciiText
coerce ByteString -> ByteString
BS.reverse

-- | @replace needle replacement haystack@, given a @needle@ of length \(n\) and
-- a haystack of length \(h\), replaces each non-overlapping occurrence of
-- @needle@ in @haystack@ with @replacement@. If the @needle@ is empty, no
-- replacement will be performed. Equivalent to @'intercalate' replacement '.'
-- 'splitOn' needle '$' haystack@.
--
-- >>> replace empty [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
-- "catboy goes nyan nyan"
-- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] empty
-- ""
-- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
-- "catboy goes NYAN~ NYAN~"
-- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "nyanyan" |]
-- "NYAN~yan"
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- The analysis below also doesn't factor in the cost of performing the
-- replacement, as this is (among other things) proportional to the number of
-- matches of the needle (and thus is hard to quantify).
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
replace ::
  -- | @needle@ to search for
  AsciiText ->
  -- | @replacement@ to replace @needle@ with
  AsciiText ->
  -- | @haystack@ in which to search
  AsciiText ->
  AsciiText
replace :: AsciiText -> AsciiText -> AsciiText -> AsciiText
replace AsciiText
needle AsciiText
replacement AsciiText
haystack
  | AsciiText -> Int
length AsciiText
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| AsciiText -> Int
length AsciiText
haystack Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = AsciiText
haystack
  | AsciiText -> Int
length AsciiText
needle Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> AsciiText -> Int
length AsciiText
haystack = AsciiText
haystack
  | Bool
otherwise = AsciiText -> [AsciiText] -> AsciiText
intercalate AsciiText
replacement ([AsciiText] -> AsciiText)
-> (AsciiText -> [AsciiText]) -> AsciiText -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. AsciiText -> AsciiText -> [AsciiText]
splitOn AsciiText
needle (AsciiText -> AsciiText) -> AsciiText -> AsciiText
forall a b. (a -> b) -> a -> b
$ AsciiText
haystack

-- | @justifyLeft n c t@ produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
-- consisting of a copy of @t@ followed by (zero or more) copies
-- of @c@.
--
-- >>> justifyLeft (-100) [char| '~' |] [ascii| "nyan" |]
-- "nyan"
-- >>> justifyLeft 4 [char| '~' |] [ascii| "nyan" |]
-- "nyan"
-- >>> justifyLeft 10 [char| '~' |] [ascii| "nyan" |]
-- "nyan~~~~~~"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText
justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText
justifyLeft Int
n AsciiChar
c AsciiText
t = AsciiText
t AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> Int -> AsciiText -> AsciiText
replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- AsciiText -> Int
length AsciiText
t) (AsciiChar -> AsciiText
singleton AsciiChar
c)

-- | @justifyRight n c t@ produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
-- consisting of (zero or more) copies of @c@ followed by a copy of @t@.
--
-- >>> justifyRight (-100) [char| '~' |] [ascii| "nyan" |]
-- "nyan"
-- >>> justifyRight 4 [char| '~' |] [ascii| "nyan" |]
-- "nyan"
-- >>> justifyRight 10 [char| '~' |] [ascii| "nyan" |]
-- "~~~~~~nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText
justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText
justifyRight Int
n AsciiChar
c AsciiText
t = Int -> AsciiText -> AsciiText
replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- AsciiText -> Int
length AsciiText
t) (AsciiChar -> AsciiText
singleton AsciiChar
c) AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> AsciiText
t

-- | @center n c t@ produces a result of length \({\tt k } = \max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
-- consisting of:
--
-- * \(\lceil \frac{{\tt k} - {\tt length} \; {\tt t}}{2} \rceil\) copies of @c@;
-- followed by
-- * A copy of @t@; followed by
-- * Zero or more copies of @c@
--
-- This means that the centering is \'left-biased\'. This mimicks the behaviour
-- of the function of the same name in the [text
-- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:center),
-- although that function's documenation does not describe this behaviour.
--
-- >>> center (-100) [char| '~' |] [ascii| "nyan" |]
-- "nyan"
-- >>> center 4 [char| '~' |] [ascii| "nyan" |]
-- "nyan"
-- >>> center 5 [char| '~' |] [ascii| "nyan" |]
-- "~nyan"
-- >>> center 6 [char| '~' |] [ascii| "nyan" |]
-- "~nyan~"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
center :: Int -> AsciiChar -> AsciiText -> AsciiText
center :: Int -> AsciiChar -> AsciiText -> AsciiText
center Int
n AsciiChar
c AsciiText
t
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= AsciiText -> Int
length AsciiText
t = AsciiText
t
  | Int -> Bool
forall a. Integral a => a -> Bool
P.even (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- AsciiText -> Int
length AsciiText
t) = AsciiText
copied AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> AsciiText
t AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> AsciiText
copied
  | Bool
otherwise = AsciiText
copied AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> AsciiChar -> AsciiText
singleton AsciiChar
c AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> AsciiText
t AsciiText -> AsciiText -> AsciiText
forall a. Semigroup a => a -> a -> a
<> AsciiText
copied
  where
    copied :: AsciiText
    copied :: AsciiText
copied = Int -> AsciiText -> AsciiText
replicate ((Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- AsciiText -> Int
length AsciiText
t) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`P.div` Int
2) (AsciiChar -> AsciiText
singleton AsciiChar
c)

-- Folds

-- | Left-associative fold of a text.
--
-- >>> foldl (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
-- "f(f(f(f(f(f(ac)a)t)b)o)y)"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE foldl #-}
foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
foldl :: forall a. (a -> AsciiChar -> a) -> a -> AsciiText -> a
foldl a -> AsciiChar -> a
f a
x (AsciiText ByteString
bs) = (a -> Word8 -> a) -> a -> ByteString -> a
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl ((a -> AsciiChar -> a) -> a -> Word8 -> a
coerce a -> AsciiChar -> a
f) a
x ByteString
bs

-- | Left-associative fold of a text, strict in the accumulator.
--
-- >>> foldl' (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
-- "f(f(f(f(f(f(ac)a)t)b)o)y)"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE foldl' #-}
foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
foldl' :: forall a. (a -> AsciiChar -> a) -> a -> AsciiText -> a
foldl' a -> AsciiChar -> a
f a
x (AsciiText ByteString
bs) = (a -> Word8 -> a) -> a -> ByteString -> a
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' ((a -> AsciiChar -> a) -> a -> Word8 -> a
coerce a -> AsciiChar -> a
f) a
x ByteString
bs

-- | Right-associative fold of a text.
--
-- >>> foldr (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
-- "f(f(f(f(f(f(ay)o)b)t)a)c)"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE foldr #-}
foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
foldr :: forall a. (AsciiChar -> a -> a) -> a -> AsciiText -> a
foldr AsciiChar -> a -> a
f a
x (AsciiText ByteString
bs) = (Word8 -> a -> a) -> a -> ByteString -> a
forall a. (Word8 -> a -> a) -> a -> ByteString -> a
BS.foldr ((AsciiChar -> a -> a) -> Word8 -> a -> a
coerce AsciiChar -> a -> a
f) a
x ByteString
bs

-- | Right-associative fold of a text, strict in the accumulator.
--
-- >>> foldr' (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
-- "f(f(f(f(f(f(ay)o)b)t)a)c)"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE foldr' #-}
foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
foldr' :: forall a. (AsciiChar -> a -> a) -> a -> AsciiText -> a
foldr' AsciiChar -> a -> a
f a
x (AsciiText ByteString
bs) = (Word8 -> a -> a) -> a -> ByteString -> a
forall a. (Word8 -> a -> a) -> a -> ByteString -> a
BS.foldr' ((AsciiChar -> a -> a) -> Word8 -> a -> a
coerce AsciiChar -> a -> a
f) a
x ByteString
bs

-- Special folds

-- | Concatenate a list of texts.
--
-- >>> concat []
-- ""
-- >>> concat [[ascii| "catboy" |]]
-- "catboy"
-- >>> concat . Prelude.replicate 4 $ [ascii| "nyan" |]
-- "nyannyannyannyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
concat :: [AsciiText] -> AsciiText
concat :: [AsciiText] -> AsciiText
concat = ([ByteString] -> ByteString) -> [AsciiText] -> AsciiText
coerce [ByteString] -> ByteString
BS.concat

-- | Map a text-producing function over a text, then concatenate the results.
--
-- >>> concatMap singleton empty
-- ""
-- >>> concatMap singleton [ascii| "nyan" |]
-- "nyan"
-- >>> concatMap (\c -> singleton c <> singleton c) [ascii| "nekomimi" |]
-- "nneekkoommiimmii"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText
concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText
concatMap = ((Word8 -> ByteString) -> ByteString -> ByteString)
-> (AsciiChar -> AsciiText) -> AsciiText -> AsciiText
coerce (Word8 -> ByteString) -> ByteString -> ByteString
BS.concatMap

-- | 'scanl' is similar to 'foldl', but returns a list of successive values from
-- the left.
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE scanl #-}
scanl ::
  -- | accumulator -> element -> new accumulator
  (AsciiChar -> AsciiChar -> AsciiChar) ->
  -- | Starting accumulator value
  AsciiChar ->
  -- | Input of length \(n\)
  AsciiText ->
  -- | Output of length \(n + 1\)
  AsciiText
scanl :: (AsciiChar -> AsciiChar -> AsciiChar)
-> AsciiChar -> AsciiText -> AsciiText
scanl = ((Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString)
-> (AsciiChar -> AsciiChar -> AsciiChar)
-> AsciiChar
-> AsciiText
-> AsciiText
coerce (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
BS.scanl

-- | 'scanr' is similar to 'foldr', but returns a list of successive values from
-- the right.
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE scanr #-}
scanr ::
  -- | element -> accumulator -> new accumulator
  (AsciiChar -> AsciiChar -> AsciiChar) ->
  -- | Starting accumulator value
  AsciiChar ->
  -- | Input of length \(n\)
  AsciiText ->
  -- | Output of length \(n + 1\)
  AsciiText
scanr :: (AsciiChar -> AsciiChar -> AsciiChar)
-> AsciiChar -> AsciiText -> AsciiText
scanr = ((Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString)
-> (AsciiChar -> AsciiChar -> AsciiChar)
-> AsciiChar
-> AsciiText
-> AsciiText
coerce (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
BS.scanr

-- Accumulating maps

-- | Like a combination of 'map' and 'foldl''. Applies a function to each
-- element of an 'AsciiText', passing an accumulating parameter from left to
-- right, and returns a final 'AsciiText' along with the accumulating
-- parameter's final value.
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE mapAccumL #-}
mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
mapAccumL :: forall a.
(a -> AsciiChar -> (a, AsciiChar))
-> a -> AsciiText -> (a, AsciiText)
mapAccumL a -> AsciiChar -> (a, AsciiChar)
f a
x (AsciiText ByteString
bs) = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText) -> (a, ByteString) -> (a, AsciiText)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (a -> Word8 -> (a, Word8)) -> a -> ByteString -> (a, ByteString)
forall acc.
(acc -> Word8 -> (acc, Word8))
-> acc -> ByteString -> (acc, ByteString)
BS.mapAccumL ((a -> AsciiChar -> (a, AsciiChar)) -> a -> Word8 -> (a, Word8)
coerce a -> AsciiChar -> (a, AsciiChar)
f) a
x ByteString
bs

-- | Like a combination of 'map' and 'foldr'. Applies a function to each element
-- of an 'AsciiText', passing an accumulating parameter from right to left, and
-- returns a final 'AsciiText' along with the accumulating parameter's final
-- value.
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE mapAccumR #-}
mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
mapAccumR :: forall a.
(a -> AsciiChar -> (a, AsciiChar))
-> a -> AsciiText -> (a, AsciiText)
mapAccumR a -> AsciiChar -> (a, AsciiChar)
f a
x (AsciiText ByteString
bs) = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText) -> (a, ByteString) -> (a, AsciiText)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (a -> Word8 -> (a, Word8)) -> a -> ByteString -> (a, ByteString)
forall acc.
(acc -> Word8 -> (acc, Word8))
-> acc -> ByteString -> (acc, ByteString)
BS.mapAccumL ((a -> AsciiChar -> (a, AsciiChar)) -> a -> Word8 -> (a, Word8)
coerce a -> AsciiChar -> (a, AsciiChar)
f) a
x ByteString
bs

-- Generation and unfolding

-- | @replicate n t@ consists of @t@ repeated \(\max \{ 0, {\tt n } \}\) times.
--
-- >>> replicate (-100) [ascii| "nyan" |]
-- ""
-- >>> replicate 0 [ascii| "nyan" |]
-- ""
-- >>> replicate 3 [ascii| "nyan" |]
-- "nyannyannyan"
--
-- /Complexity:/ \(\Theta(n \cdot m)\)
--
-- @since 1.0.1
replicate :: Int -> AsciiText -> AsciiText
replicate :: Int -> AsciiText -> AsciiText
replicate Int
n AsciiText
t
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = AsciiText
empty
  | Bool
otherwise = [AsciiText] -> AsciiText
concat ([AsciiText] -> AsciiText)
-> (AsciiText -> [AsciiText]) -> AsciiText -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> AsciiText -> [AsciiText]
forall a. Int -> a -> [a]
P.replicate Int
n (AsciiText -> AsciiText) -> AsciiText -> AsciiText
forall a b. (a -> b) -> a -> b
$ AsciiText
t

-- | Similar to 'Data.List.unfoldr'. The function parameter takes a seed value,
-- and produces either 'Nothing' (indicating that we're done) or 'Just' an
-- 'AsciiChar' and a new seed value. 'unfoldr' then, given a starting seed, will
-- repeatedly call the function parameter on successive seed values, returning
-- the resulting 'AsciiText', based on the 'AsciiChar's produced, in the same
-- order.
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE unfoldr #-}
unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText
unfoldr :: forall a. (a -> Maybe (AsciiChar, a)) -> a -> AsciiText
unfoldr a -> Maybe (AsciiChar, a)
f = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText) -> (a -> ByteString) -> a -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (a -> Maybe (Word8, a)) -> a -> ByteString
forall a. (a -> Maybe (Word8, a)) -> a -> ByteString
BS.unfoldr ((a -> Maybe (AsciiChar, a)) -> a -> Maybe (Word8, a)
coerce a -> Maybe (AsciiChar, a)
f)

-- | Similar to 'unfoldr', but also takes a maximum length parameter. The second
-- element of the result tuple will be 'Nothing' if we finished with the
-- function argument returning 'Nothing', and 'Just' the final seed value if we
-- reached the maximum length before that happened.
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE unfoldrN #-}
unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a)
unfoldrN :: forall a.
Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a)
unfoldrN Int
n a -> Maybe (AsciiChar, a)
f = (ByteString -> AsciiText)
-> (ByteString, Maybe a) -> (AsciiText, Maybe a)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ByteString -> AsciiText
AsciiText ((ByteString, Maybe a) -> (AsciiText, Maybe a))
-> (a -> (ByteString, Maybe a)) -> a -> (AsciiText, Maybe a)
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
forall a.
Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
BS.unfoldrN Int
n ((a -> Maybe (AsciiChar, a)) -> a -> Maybe (Word8, a)
coerce a -> Maybe (AsciiChar, a)
f)

-- | @take n t@ returns the prefix of @t@ with length
-- \(\min \{ \max \{ 0, {\tt n}\}, {\tt length} \; {\tt t} \}\).
--
-- >>> take (-100) [ascii| "catboy" |]
-- ""
-- >>> take 0 [ascii| "catboy" |]
-- ""
-- >>> take 4 [ascii| "catboy" |]
-- "catb"
-- >>> take 1000 [ascii| "catboy" |]
-- "catboy"
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE take #-}
take :: Int -> AsciiText -> AsciiText
take :: Int -> AsciiText -> AsciiText
take = (Int -> ByteString -> ByteString) -> Int -> AsciiText -> AsciiText
coerce Int -> ByteString -> ByteString
BS.take

-- | @takeEnd n t@ returns the suffix of @t@ with length
-- \(\min \{ \max \{0, {\tt n} \}, {\tt length} \; {\tt t} \}\).
--
-- >>> takeEnd (-100) [ascii| "catboy" |]
-- ""
-- >>> takeEnd 0 [ascii| "catboy" |]
-- ""
-- >>> takeEnd 4 [ascii| "catboy" |]
-- "tboy"
-- >>> takeEnd 1000 [ascii| "catboy" |]
-- "catboy"
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.1
takeEnd :: Int -> AsciiText -> AsciiText
takeEnd :: Int -> AsciiText -> AsciiText
takeEnd Int
n AsciiText
t = Int -> AsciiText -> AsciiText
drop (AsciiText -> Int
length AsciiText
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n) AsciiText
t

-- | @drop n t@ returns the suffix of @t@ with length
-- \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).
--
-- >>> drop (-100) [ascii| "catboy" |]
-- "catboy"
-- >>> drop 0 [ascii| "catboy" |]
-- "catboy"
-- >>> drop 4 [ascii| "catboy" |]
-- "oy"
-- >>> drop 1000 [ascii| "catboy" |]
-- ""
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE drop #-}
drop :: Int -> AsciiText -> AsciiText
drop :: Int -> AsciiText -> AsciiText
drop = (Int -> ByteString -> ByteString) -> Int -> AsciiText -> AsciiText
coerce Int -> ByteString -> ByteString
BS.drop

-- | @dropEnd n t@ returns the prefix of @t@ with length
-- \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).
--
-- >>> dropEnd (-100) [ascii| "catboy" |]
-- "catboy"
-- >>> dropEnd 0 [ascii| "catboy" |]
-- "catboy"
-- >>> dropEnd 4 [ascii| "catboy" |]
-- "ca"
-- >>> dropEnd 1000 [ascii| "catboy" |]
-- ""
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.1
dropEnd :: Int -> AsciiText -> AsciiText
dropEnd :: Int -> AsciiText -> AsciiText
dropEnd Int
n AsciiText
t = Int -> AsciiText -> AsciiText
take (AsciiText -> Int
length AsciiText
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n) AsciiText
t

-- | @takeWhile p t@ returns the longest prefix of @t@ of characters that
-- satisfy @p@.
--
-- >>> takeWhile ((Just Lower ==) . caseOf) empty
-- ""
-- >>> takeWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
-- "catboy"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE [1] takeWhile #-}
takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
takeWhile AsciiChar -> Bool
f (AsciiText ByteString
at) = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText)
-> (ByteString -> ByteString) -> ByteString -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Bool) -> ByteString -> ByteString
BS.takeWhile ((AsciiChar -> Bool) -> Word8 -> Bool
coerce AsciiChar -> Bool
f) (ByteString -> AsciiText) -> ByteString -> AsciiText
forall a b. (a -> b) -> a -> b
$ ByteString
at

-- | @takeWhileEnd p t@ returns the longest suffix of @t@ of characters that
-- satisfy @p@. Equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
--
-- >>> takeWhileEnd ((Just Lower ==) . caseOf) empty
-- ""
-- >>> takeWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
-- "nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE takeWhileEnd #-}
takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
takeWhileEnd AsciiChar -> Bool
f = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText)
-> (AsciiText -> ByteString) -> AsciiText -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Bool) -> ByteString -> ByteString
BS.takeWhileEnd ((AsciiChar -> Bool) -> Word8 -> Bool
coerce AsciiChar -> Bool
f) (ByteString -> ByteString)
-> (AsciiText -> ByteString) -> AsciiText -> ByteString
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. AsciiText -> ByteString
coerce

-- | @dropWhile p t@ returns the suffix remaining after @'takeWhile' p t@.
--
-- >>> dropWhile ((Just Lower ==) . caseOf) empty
-- ""
-- >>> dropWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
-- " goes nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE [1] dropWhile #-}
dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhile AsciiChar -> Bool
f (AsciiText ByteString
at) = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText)
-> (ByteString -> ByteString) -> ByteString -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Bool) -> ByteString -> ByteString
BS.dropWhile ((AsciiChar -> Bool) -> Word8 -> Bool
coerce AsciiChar -> Bool
f) (ByteString -> AsciiText) -> ByteString -> AsciiText
forall a b. (a -> b) -> a -> b
$ ByteString
at

-- | @dropWhileEnd p t@ returns the prefix remaining after @'takeWhileEnd' p t@.
-- Equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
--
-- >>> dropWhileEnd ((Just Lower ==) . caseOf) empty
-- ""
-- >>> dropWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
-- "catboy goes "
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE dropWhileEnd #-}
dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhileEnd AsciiChar -> Bool
f = ByteString -> AsciiText
AsciiText (ByteString -> AsciiText)
-> (AsciiText -> ByteString) -> AsciiText -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Bool) -> ByteString -> ByteString
BS.dropWhileEnd ((AsciiChar -> Bool) -> Word8 -> Bool
coerce AsciiChar -> Bool
f) (ByteString -> ByteString)
-> (AsciiText -> ByteString) -> AsciiText -> ByteString
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. AsciiText -> ByteString
coerce

-- | @dropAround p@ is equivalent to @'dropWhile' p '.' 'dropWhileEnd' p@.
--
-- >>> dropAround ((Just Lower ==) . caseOf) empty
-- ""
-- >>> dropAround ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
-- " goes "
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropAround AsciiChar -> Bool
p = (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhile AsciiChar -> Bool
p (AsciiText -> AsciiText)
-> (AsciiText -> AsciiText) -> AsciiText -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhileEnd AsciiChar -> Bool
p

-- | Remove the longest prefix /and/ suffix of the input comprised entirely of
-- whitespace characters. We define a \'whitespace character\' as any of the
-- following:
--
-- * TAB (0x09)
-- * LF (0x0a)
-- * VT (0x0b)
-- * FF (0x0c)
-- * CR (0x0d)
-- * Space (0x20)
--
-- >>> strip empty
-- ""
-- >>> strip [ascii| "catboy goes nyan" |]
-- "catboy goes nyan"
-- >>> strip [ascii| "\n\n    \tcatboy goes nyan" |]
-- "catboy goes nyan"
-- >>> strip [ascii| "catboy goes nyan   \t\t\n" |]
-- "catboy goes nyan"
-- >>> strip [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
-- "catboy goes nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
strip :: AsciiText -> AsciiText
strip :: AsciiText -> AsciiText
strip = (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropAround AsciiChar -> Bool
isSpace

-- | Remove the longest prefix of the input comprised entirely of whitespace
-- characters. We define a \'whitespace character\' as any of the following:
--
-- * TAB (0x09)
-- * LF (0x0a)
-- * VT (0x0b)
-- * FF (0x0c)
-- * CR (0x0d)
-- * Space (0x20)
--
-- >>> stripStart empty
-- ""
-- >>> stripStart [ascii| "catboy goes nyan" |]
-- "catboy goes nyan"
-- >>> stripStart [ascii| "\n\n    \tcatboy goes nyan" |]
-- "catboy goes nyan"
-- >>> stripStart [ascii| "catboy goes nyan   \t\t\n" |]
-- "catboy goes nyan   \t\t\n"
-- >>> stripStart [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
-- "catboy goes nyan   \t\t\n"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
stripStart :: AsciiText -> AsciiText
stripStart :: AsciiText -> AsciiText
stripStart = (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhile AsciiChar -> Bool
isSpace

-- | Remove the longest suffix of the input comprised entirely of whitespace
-- characters. We define a \'whitespace character\' as any of the following:
--
-- * TAB (0x09)
-- * LF (0x0a)
-- * VT (0x0b)
-- * FF (0x0c)
-- * CR (0x0d)
-- * Space (0x20)
--
-- >>> stripEnd empty
-- ""
-- >>> stripEnd [ascii| "catboy goes nyan" |]
-- "catboy goes nyan"
-- >>> stripEnd [ascii| "\n\n    \tcatboy goes nyan" |]
-- "\n\n    \tcatboy goes nyan"
-- >>> stripEnd [ascii| "catboy goes nyan   \t\t\n" |]
-- "catboy goes nyan"
-- >>> stripEnd [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
-- "\n\n    \tcatboy goes nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
stripEnd :: AsciiText -> AsciiText
stripEnd :: AsciiText -> AsciiText
stripEnd = (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhileEnd AsciiChar -> Bool
isSpace

-- | @splitAt n t@ is equivalent to @('take' n t, 'drop' n t)@.
--
-- >>> splitAt (-3) [ascii| "catboy" |]
-- ("","catboy")
-- >>> splitAt 0 [ascii| "catboy" |]
-- ("","catboy")
-- >>> splitAt 3 [ascii| "catboy" |]
-- ("cat","boy")
-- >>> splitAt 1000 [ascii| "catboy" |]
-- ("catboy","")
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
{-# INLINE splitAt #-}
splitAt :: Int -> AsciiText -> (AsciiText, AsciiText)
splitAt :: Int -> AsciiText -> (AsciiText, AsciiText)
splitAt = (Int -> ByteString -> (ByteString, ByteString))
-> Int -> AsciiText -> (AsciiText, AsciiText)
coerce Int -> ByteString -> (ByteString, ByteString)
BS.splitAt

-- | @breakOn needle haystack@, given a @needle@ of length \(n\) and a
-- @haystack@ of length \(h\), attempts to find the first instance of @needle@
-- in @haystack@. If successful, return a tuple consisting of:
--
-- * The prefix of @haystack@ before the match; and
-- * The rest of @haystack@, starting with the match.
--
-- If the needle is empty, this returns @('empty', haystack)@. If no match can
-- be found, this instead returns @(haystack, 'empty')@.
--
-- If you need to repeatedly split on the same needle, consider 'breakOnAll', as
-- this will be more efficient due to only having to run the matching algorithm
-- once.
--
-- >>> breakOn empty [ascii| "catboy goes nyan" |]
-- ("","catboy goes nyan")
-- >>> breakOn [ascii| "nyan" |] empty
-- ("","")
-- >>> breakOn [ascii| "goes" |] [ascii| "catboy goes nyan" |]
-- ("catboy ","goes nyan")
-- >>> breakOn [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
-- ("nyan nyan nyan","")
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
breakOn needle :: AsciiText
needle@(AsciiText ByteString
n) haystack :: AsciiText
haystack@(AsciiText ByteString
h)
  | AsciiText -> Int
length AsciiText
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = (AsciiText
empty, AsciiText
haystack)
  | Bool
otherwise = case ByteString -> ByteString -> [Int]
indices ByteString
n ByteString
h of
    [] -> (AsciiText
haystack, AsciiText
empty)
    Int
ix : [Int]
_ -> Int -> AsciiText -> (AsciiText, AsciiText)
splitAt Int
ix AsciiText
haystack

-- | @breakOnEnd needle haystack@, given a @needle@ of length \(n\) and a
-- @haystack@ of length \(h\), attempts to find the last instance of @needle@ in
-- @haystack@. If successful, return a tuple consisting of:
--
-- * The prefix of @haystack@ up to, and including, the match; and
-- * The rest of @haystack@.
--
-- If the needle is empty, this returns @(haystack, 'empty')@. If no match can
-- be found, this instead returns @('empty', haystack)@.
--
-- This function is similar to 'breakOn'. If you need to repeatedly split on the
-- same needle, consider 'breakOnAll', as this will be more efficient due to
-- only having to run the matching algorithm once.
--
-- >>> breakOnEnd empty [ascii| "catboy goes nyan" |]
-- ("catboy goes nyan","")
-- >>> breakOnEnd [ascii| "nyan" |] empty
-- ("","")
-- >>> breakOnEnd [ascii| "goes" |] [ascii| "catboy goes nyan" |]
-- ("catboy goes"," nyan")
-- >>> breakOnEnd [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
-- ("","nyan nyan nyan")
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
breakOnEnd needle :: AsciiText
needle@(AsciiText ByteString
n) haystack :: AsciiText
haystack@(AsciiText ByteString
h)
  | AsciiText -> Int
length AsciiText
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = (AsciiText
haystack, AsciiText
empty)
  | Bool
otherwise = case [Int] -> Maybe Int
go ([Int] -> Maybe Int)
-> (ByteString -> [Int]) -> ByteString -> Maybe Int
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> ByteString -> [Int]
indices ByteString
n (ByteString -> Maybe Int) -> ByteString -> Maybe Int
forall a b. (a -> b) -> a -> b
$ ByteString
h of
    Maybe Int
Nothing -> (AsciiText
empty, AsciiText
haystack)
    Just Int
ix -> Int -> AsciiText -> (AsciiText, AsciiText)
splitAt (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ AsciiText -> Int
length AsciiText
needle) AsciiText
haystack
  where
    go :: [Int] -> Maybe Int
    go :: [Int] -> Maybe Int
go = \case
      [] -> Maybe Int
forall a. Maybe a
Nothing
      [Int
i] -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
i
      (Int
_ : [Int]
is) -> [Int] -> Maybe Int
go [Int]
is

-- | @break p t@ is equivalent to @('takeWhile' ('not' p) t, 'dropWhile' ('not'
-- p) t)@.
--
-- >>> break ([char| ' ' |] ==) [ascii| "catboy goes nyan" |]
-- ("catboy"," goes nyan")
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
break = ((Word8 -> Bool) -> ByteString -> (ByteString, ByteString))
-> (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
coerce (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break

-- | @span p t@ is equivalent to @('takeWhile' p t, 'dropWhile' p t)@.
--
-- >>> span ([char| 'c' |] ==) [ascii| "catboy goes nyan" |]
-- ("c","atboy goes nyan")
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE [1] span #-}
span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
span = ((Word8 -> Bool) -> ByteString -> (ByteString, ByteString))
-> (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
coerce (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span

-- | Separate a text into a list of texts such that:
--
-- * Their concatenation is equal to the original argument; and
-- * Equal adjacent characters in the original argument are in the same text in
-- the result.
--
-- This is a specialized form of 'groupBy', and is about 40% faster than
-- @'groupBy' '=='@.
--
-- >>> group empty
-- []
-- >>> group . singleton $ [char| 'w' |]
-- ["w"]
-- >>> group [ascii| "nyan" |]
-- ["n","y","a","n"]
-- >>> group [ascii| "nyaaaan" |]
-- ["n","y","aaaa","n"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
group :: AsciiText -> [AsciiText]
group :: AsciiText -> [AsciiText]
group = (ByteString -> [ByteString]) -> AsciiText -> [AsciiText]
coerce ByteString -> [ByteString]
BS.group

-- | Separate a text into a list of texts such that:
--
-- * Their concatenation is equal to the original argument; and
-- * Adjacent characters for which the function argument returns @True@ are in
-- the same text in the result.
--
-- 'group' is a special case for the function argument '=='; it is also about
-- 40% faster.
--
-- >>> groupBy (<) empty
-- []
-- >>> groupBy (<) . singleton $ [char| 'w' |]
-- ["w"]
-- >>> groupBy (<) [ascii| "catboy goes nyan" |]
-- ["c","atboy"," goes"," nyan"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText]
groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText]
groupBy = ((Word8 -> Word8 -> Bool) -> ByteString -> [ByteString])
-> (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText]
coerce (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
BS.groupBy

-- | All prefixes of the argument, from shortest to longest.
--
-- >>> inits empty
-- [""]
-- >>> inits . singleton $ [char| 'w' |]
-- ["","w"]
-- >>> inits [ascii| "nyan" |]
-- ["","n","ny","nya","nyan"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
inits :: AsciiText -> [AsciiText]
inits :: AsciiText -> [AsciiText]
inits = (ByteString -> [ByteString]) -> AsciiText -> [AsciiText]
coerce ByteString -> [ByteString]
BS.inits

-- | All suffixes of the argument, from shortest to longest.
--
-- >>> tails empty
-- [""]
-- >>> tails . singleton $ [char| 'w' |]
-- ["w",""]
-- >>> tails [ascii| "nyan" |]
-- ["nyan","yan","an","n",""]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
tails :: AsciiText -> [AsciiText]
tails :: AsciiText -> [AsciiText]
tails = (ByteString -> [ByteString]) -> AsciiText -> [AsciiText]
coerce ByteString -> [ByteString]
BS.tails

-- Breaking into many substrings

-- | @splitOn needle haystack@, given a @needle@ of length \(n\) and a haystack
-- of length \(h\), breaks @haystack@ into pieces, separated by @needle@. Any
-- occurrences of @needle@ in @haystack@ are consumed.
--
-- >>> splitOn empty [ascii| "catboy goes nyan and goes nyan" |]
-- ["catboy goes nyan and goes nyan"]
-- >>> splitOn [ascii| "nyan" |] empty
-- [""]
-- >>> splitOn [ascii| "nyan" |] [ascii| "catboy goes nyan and goes nyan" |]
-- ["catboy goes "," and goes ",""]
-- >>> splitOn [ascii| "nyan" |] [ascii| "nyan" |]
-- ["",""]
-- >>> splitOn [ascii| "nyan" |] [ascii| "catboy" |]
-- ["catboy"]
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
splitOn :: AsciiText -> AsciiText -> [AsciiText]
splitOn :: AsciiText -> AsciiText -> [AsciiText]
splitOn needle :: AsciiText
needle@(AsciiText ByteString
n) haystack :: AsciiText
haystack@(AsciiText ByteString
h)
  | Int
needleLen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [AsciiText
haystack]
  | AsciiText -> Int
length AsciiText
haystack Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [AsciiText
empty]
  | Int
needleLen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
split (AsciiChar -> AsciiChar -> Bool
forall a. Eq a => a -> a -> Bool
== (Word8 -> AsciiChar
AsciiChar (Word8 -> AsciiChar)
-> (ByteString -> Word8) -> ByteString -> AsciiChar
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> Word8
BS.head (ByteString -> AsciiChar) -> ByteString -> AsciiChar
forall a b. (a -> b) -> a -> b
$ ByteString
n)) AsciiText
haystack
  | Bool
otherwise = Int -> [Int] -> [AsciiText]
go Int
0 (ByteString -> ByteString -> [Int]
indices ByteString
n ByteString
h)
  where
    needleLen :: Int
    needleLen :: Int
needleLen = AsciiText -> Int
length AsciiText
needle
    go :: Int -> [Int] -> [AsciiText]
    go :: Int -> [Int] -> [AsciiText]
go Int
pos = \case
      [] -> [Int -> AsciiText -> AsciiText
drop Int
pos AsciiText
haystack]
      (Int
ix : [Int]
ixes) ->
        let chunkLen :: Int
chunkLen = Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
pos
            segment :: AsciiText
segment = Int -> AsciiText -> AsciiText
take Int
chunkLen (AsciiText -> AsciiText)
-> (AsciiText -> AsciiText) -> AsciiText -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> AsciiText -> AsciiText
drop Int
pos (AsciiText -> AsciiText) -> AsciiText -> AsciiText
forall a b. (a -> b) -> a -> b
$ AsciiText
haystack
         in AsciiText
segment AsciiText -> [AsciiText] -> [AsciiText]
forall a. a -> [a] -> [a]
: Int -> [Int] -> [AsciiText]
go (Int
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
chunkLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
needleLen) [Int]
ixes

-- | @split p t@ separates @t@ into components delimited by separators, for
-- which @p@ returns @True@. The results do not contain the separators.
--
-- \(n\) adjacent separators result in \(n - 1\) empty components in the result.
--
-- >>> split ([char| '~' |] ==) empty
-- []
-- >>> split ([char| '~' |] ==) . singleton $ [char| '~' |]
-- ["",""]
-- >>> split ([char| '~' |] ==) [ascii| "nyan" |]
-- ["nyan"]
-- >>> split ([char| '~' |] ==) [ascii| "~nyan" |]
-- ["","nyan"]
-- >>> split ([char| '~' |] ==) [ascii| "nyan~" |]
-- ["nyan",""]
-- >>> split ([char| '~' |] ==) [ascii| "nyan~nyan"|]
-- ["nyan","nyan"]
-- >>> split ([char| '~' |] ==) [ascii| "nyan~~nyan" |]
-- ["nyan","","nyan"]
-- >>> split ([char| '~' |] ==) [ascii| "nyan~~~nyan" |]
-- ["nyan","","","nyan"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE split #-}
split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
split = ((Word8 -> Bool) -> ByteString -> [ByteString])
-> (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
coerce (Word8 -> Bool) -> ByteString -> [ByteString]
BS.splitWith

-- | Splits a text into chunks of the specified length. Equivalent to repeatedly
-- 'take'ing the specified length until exhaustion. The last item in the result
-- may thus be shorter than requested.
--
-- For any @n <= 0@ and any @t@, @chunksOf n t@ yields the empty list. This is
-- identical to the behaviour of the function of the same name in the [text
-- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:chunksOf),
-- although it doesn't document this fact.
--
-- >>> chunksOf (-100) [ascii| "I am a catboy" |]
-- []
-- >>> chunksOf (-100) empty
-- []
-- >>> chunksOf 0 [ascii| "I am a catboy" |]
-- []
-- >>> chunksOf 0 empty
-- []
-- >>> chunksOf 1 [ascii| "I am a catboy" |]
-- ["I"," ","a","m"," ","a"," ","c","a","t","b","o","y"]
-- >>> chunksOf 1 empty
-- []
-- >>> chunksOf 2 [ascii| "I am a catboy" |]
-- ["I ","am"," a"," c","at","bo","y"]
-- >>> chunksOf 300 [ascii| "I am a catboy" |]
-- ["I am a catboy"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
chunksOf :: Int -> AsciiText -> [AsciiText]
chunksOf :: Int -> AsciiText -> [AsciiText]
chunksOf Int
n AsciiText
t
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = []
  | AsciiText
t AsciiText -> AsciiText -> Bool
forall a. Eq a => a -> a -> Bool
== AsciiText
empty = []
  | Bool
otherwise = case Int -> AsciiText -> (AsciiText, AsciiText)
splitAt Int
n AsciiText
t of
    (AsciiText
h, AsciiText
t') -> AsciiText
h AsciiText -> [AsciiText] -> [AsciiText]
forall a. a -> [a] -> [a]
: Int -> AsciiText -> [AsciiText]
chunksOf Int
n AsciiText
t'

-- Breaking into lines and words

-- | Identical to the functions of the same name in the [text
-- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:lines),
-- and [the
-- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:lines).
-- Specifically, separates the argument into pieces, with LF characters (0x0a) as
-- separators. A single trailing LF is ignored. None of the final results
-- contain LF.
--
-- We chose to follow the same semantics for this function as the text package
-- and the Prelude. This has some consequences,
-- which the documentation of both the text package and the Prelude does not
-- properly explain. We list them here - bear these in mind when using this
-- function, as well as 'unlines':
--
-- * No platform-specific concept of a \'newline\' is ever used by this
-- function. Separation is done on LF, and /only/ LF, regardless of platform.
-- The documentation in both the text package and the Prelude confusingly refers
-- to \'newline characters\', which is a category error. We thus specify that LF
-- is the character being split on, rather than mentioning \'newlines\' in any
-- way, shape or form.
-- * @'unlines' '.' 'lines'@ is /not/ the same as @'Prelude.id'@. This is
-- misleadingly described in the Prelude, which claims that (its version of)
-- @unlines@ is \'an inverse operation\' to (its version of) @lines@. For a
-- precise explanation of why this is the case, please see the documentation for
-- 'unlines'.
-- * @'lines'@ is not the same as @'split' (['char'| \'\n\' |] '==')@. See the
-- doctests below for a demonstration of how they differ.
--
-- >>> lines empty
-- []
-- >>> split ([char| '\n' |] ==) empty
-- []
-- >>> lines [ascii| "catboy goes nyan" |]
-- ["catboy goes nyan"]
-- >>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan" |]
-- ["catboy goes nyan"]
-- >>> lines [ascii| "catboy goes nyan\n" |]
-- ["catboy goes nyan"]
-- >>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan\n" |]
-- ["catboy goes nyan",""]
-- >>> lines [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
-- ["","catboy","","","goes","","nyan",""]
-- >>> split ([char| '\n' |] ==) [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
-- ["","catboy","","","goes","","nyan","",""]
-- >>> lines [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
-- ["\r","catboy\r","goes\r","nyan\r"]
-- >>> split ([char| '\n' |] ==) [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
-- ["\r","catboy\r","goes\r","nyan\r",""]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- /See also:/ [Wikipedia on newlines](https://en.wikipedia.org/wiki/Newline)
--
-- @since 1.0.1
lines :: AsciiText -> [AsciiText]
lines :: AsciiText -> [AsciiText]
lines (AsciiText ByteString
bs) = [ByteString] -> [AsciiText]
coerce ([ByteString] -> [AsciiText])
-> (ByteString -> [ByteString]) -> ByteString -> [AsciiText]
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> [ByteString]
go (ByteString -> [AsciiText]) -> ByteString -> [AsciiText]
forall a b. (a -> b) -> a -> b
$ ByteString
bs
  where
    go :: ByteString -> [ByteString]
    go :: ByteString -> [ByteString]
go ByteString
rest = case ByteString -> Maybe (Word8, ByteString)
BS.uncons ByteString
rest of
      Maybe (Word8, ByteString)
Nothing -> []
      Just (Word8, ByteString)
_ -> case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break (Word8
0x0a Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==) ByteString
rest of
        (ByteString
h, ByteString
t) ->
          ByteString
h ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: case ByteString -> Maybe (Word8, ByteString)
BS.uncons ByteString
t of
            Maybe (Word8, ByteString)
Nothing -> []
            Just (Word8
_, ByteString
t') -> ByteString -> [ByteString]
go ByteString
t'

-- | Identical to the functions of the same name in the [text
-- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:words)
-- and [the
-- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:words).
-- Specifically, separates the argument into pieces, with (non-empty sequences
-- of) word separator characters as separators. A \'word separator character\'
-- is any of the following:
--
-- * TAB (0x09)
-- * LF (0x0a)
-- * VT (0x0b)
-- * FF (0x0c)
-- * CR (0x0d)
-- * Space (0x20)
--
-- None of the final results contain any word separator characters. Any sequence
-- of leading, or trailing, word separator characters will be ignored.
--
-- We chose to follow the same semantics for this function as the text package
-- and the Prelude. This has the consequence that @'unwords' '.' 'words'@ is
-- /not/ the same as 'Prelude.id', although the documentation for the Prelude
-- confusingly describes (its version of) @unwords@ as an \'inverse operation\'
-- to (its version of) @words@. See the documentation for 'unwords' for an
-- explanation of why this is the case.
--
-- >>> words empty
-- []
-- >>> words [ascii| "catboy" |]
-- ["catboy"]
-- >>> words [ascii| "  \r\r\r\rcatboy   \n\rgoes\t\t\t\t\tnyan\n  " |]
-- ["catboy","goes","nyan"]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
words :: AsciiText -> [AsciiText]
words :: AsciiText -> [AsciiText]
words (AsciiText ByteString
bs) = [ByteString] -> [AsciiText]
coerce ([ByteString] -> [AsciiText])
-> (ByteString -> [ByteString]) -> ByteString -> [AsciiText]
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> [ByteString]
go (ByteString -> [AsciiText]) -> ByteString -> [AsciiText]
forall a b. (a -> b) -> a -> b
$ ByteString
bs
  where
    go :: ByteString -> [ByteString]
    go :: ByteString -> [ByteString]
go ByteString
rest =
      let rest' :: ByteString
rest' = (Word8 -> Bool) -> ByteString -> ByteString
BS.dropWhile Word8 -> Bool
isSep ByteString
rest
       in case ByteString -> Int
BS.length ByteString
rest' of
            Int
0 -> []
            Int
_ -> case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
BS.break Word8 -> Bool
isSep ByteString
rest' of
              (ByteString
h, ByteString
t) -> ByteString
h ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
go ByteString
t
    isSep :: Word8 -> Bool
    isSep :: Word8 -> Bool
isSep Word8
w8
      | Word8
w8 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
32 = Bool
True
      | Word8
9 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w8 Bool -> Bool -> Bool
&& Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
13 = Bool
True
      | Bool
otherwise = Bool
False

-- | Identical to the functions of the same name in the [text
-- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:unlines)
-- and [the
-- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:unlines).
-- Specifically, appends an LF character to each of the texts, then concatenates. Equivalent
-- to @'foldMap' (`'snoc'` [char| '\n' |])@.
--
-- We chose to follow the same semantics for this function as the text package
-- and the Prelude. This has some consequences, which the documentation of both
-- the text package and the Prelude does not properly explain. We list them here
-- - bear these in mind when using this function, as well as 'lines':
--
-- * No platform-specific concept of a \'newline\' is ever used by this
-- function. The documentation in both the text package and the Prelude
-- confusing refer to appending a \'terminating newline\', which is only a
-- correct statement on platforms where a newline is LF. We thus specify that we
-- append LF, rather than mentioning \'newlines\' in any way, shape or form.
-- * @'unlines' '.' 'lines'@ is /not/ the same as @'Prelude.id'@. This is
-- misleadingly described in the Prelude, which claims that (its version of)
-- @unlines@ is \'an inverse operation\' to (its version of) @lines@. See the
-- doctests below for a demonstration of this.
--
-- >>> unlines []
-- ""
-- >>> unlines [[ascii| "nyan" |]]
-- "nyan\n"
-- >>> unlines . Prelude.replicate 3 $ [ascii| "nyan" |]
-- "nyan\nnyan\nnyan\n"
-- >>> unlines . lines $ [ascii| "catboy goes nyan" |]
-- "catboy goes nyan\n"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- /See also:/ [Wikipedia on newlines](https://en.wikipedia.org/wiki/Newline)
--
-- @since 1.0.1
unlines :: (Foldable f) => f AsciiText -> AsciiText
unlines :: forall (f :: * -> *). Foldable f => f AsciiText -> AsciiText
unlines = (AsciiText -> AsciiText) -> f AsciiText -> AsciiText
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (AsciiText -> AsciiChar -> AsciiText
`snoc` [char| '\n' |])

-- | Identical to the functions of the same name in the [text
-- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:unwords)
-- and [the
-- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:unwords).
-- Specifically, links together adjacent texts with a Space character. Equivalent to
-- @'intercalate' [ascii| " " |]@.
--
-- We chose to follow the same semantics for this function as the text package
-- and the Prelude. This has the consequence that @'unwords' '.' 'words'@ is
-- /not/ the same as 'Prelude.id', although the documentation for the Prelude
-- confusingly describes (its version of) @unwords@ as an \'inverse operation\'
-- to (its version of) @words@. See the doctests below for a demonstration of
-- this.
--
-- >>> unwords []
-- ""
-- >>> unwords [[ascii| "nyan" |]]
-- "nyan"
-- >>> unwords . Prelude.replicate 3 $ [ascii| "nyan" |]
-- "nyan nyan nyan"
-- >>> unwords . words $ [ascii| "nyan\nnyan\nnyan" |]
-- "nyan nyan nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
unwords :: [AsciiText] -> AsciiText
unwords :: [AsciiText] -> AsciiText
unwords = AsciiText -> [AsciiText] -> AsciiText
intercalate [ascii| " " |]

-- View patterns

-- | Return 'Just' the suffix of the second text if it has the first text as
-- a prefix, 'Nothing' otherwise.
--
-- >>> stripPrefix [ascii| "catboy" |] empty
-- Nothing
-- >>> stripPrefix empty [ascii| "catboy" |]
-- Just "catboy"
-- >>> stripPrefix [ascii| "nyan" |] [ascii| "nyan" |]
-- Just ""
-- >>> stripPrefix [ascii| "nyan" |] [ascii| "catboy" |]
-- Nothing
-- >>> stripPrefix [ascii| "catboy" |] [ascii| "catboy goes nyan" |]
-- Just " goes nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText
stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText
stripPrefix = (ByteString -> ByteString -> Maybe ByteString)
-> AsciiText -> AsciiText -> Maybe AsciiText
coerce ByteString -> ByteString -> Maybe ByteString
BS.stripPrefix

-- | Return 'Just' the prefix of the second text if it has the first text as
-- a suffix, 'Nothing' otherwise.
--
-- >>> stripSuffix [ascii| "catboy" |] empty
-- Nothing
-- >>> stripSuffix empty [ascii| "catboy" |]
-- Just "catboy"
-- >>> stripSuffix [ascii| "nyan" |] [ascii| "nyan" |]
-- Just ""
-- >>> stripSuffix [ascii| "nyan" |] [ascii| "catboy" |]
-- Nothing
-- >>> stripSuffix [ascii| "nyan" |] [ascii| "catboy goes nyan" |]
-- Just "catboy goes "
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText
stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText
stripSuffix = (ByteString -> ByteString -> Maybe ByteString)
-> AsciiText -> AsciiText -> Maybe AsciiText
coerce ByteString -> ByteString -> Maybe ByteString
BS.stripSuffix

-- | @stripInfix needle haystack@, given a needle of length \(n\) and a haystack
-- of length \(h\), attempts to find the first instance of @needle@ in
-- @haystack@. If successful, it returns 'Just' the pair consisting of:
--
-- * All the text in @haystack@ before the first instance of @needle@; and
-- * All the text in @haystack@ after, but not including, the first instance of
-- @needle@.
--
-- If there is no instance of @needle@ in @haystack@, this returns 'Nothing'.
--
-- >>> stripInfix [ascii| "catboy" |] empty
-- Nothing
-- >>> stripInfix empty [ascii| "nyan catboy nyan nyan" |]
-- Nothing
-- >>> stripInfix [ascii| "catboy" |] [ascii| "catboy" |]
-- Just ("","")
-- >>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy" |]
-- Just ("nyan ","")
-- >>> stripInfix [ascii| "catboy" |] [ascii| "catboy nyan" |]
-- Just (""," nyan")
-- >>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy nyan nyan" |]
-- Just ("nyan "," nyan nyan")
-- >>> stripInfix [ascii| "nyan" |] [ascii| "nyanyanyan" |]
-- Just ("","yanyan")
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText)
stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText)
stripInfix needle :: AsciiText
needle@(AsciiText ByteString
n) haystack :: AsciiText
haystack@(AsciiText ByteString
h)
  | Int -> Int -> Int
forall a. Ord a => a -> a -> a
P.min (AsciiText -> Int
length AsciiText
needle) (AsciiText -> Int
length AsciiText
haystack) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Maybe (AsciiText, AsciiText)
forall a. Maybe a
Nothing
  | Bool
otherwise = case ByteString -> ByteString -> [Int]
indices ByteString
n ByteString
h of
    [] -> Maybe (AsciiText, AsciiText)
forall a. Maybe a
Nothing
    (Int
ix : [Int]
_) -> (AsciiText, AsciiText) -> Maybe (AsciiText, AsciiText)
forall a. a -> Maybe a
Just (Int -> AsciiText -> AsciiText
take Int
ix AsciiText
haystack, Int -> AsciiText -> AsciiText
drop (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ AsciiText -> Int
length AsciiText
needle) AsciiText
haystack)

-- | Find the longest non-empty common prefix of the arguments and return it,
-- along with the remaining suffixes of both arguments. If the arguments lack a
-- common, non-empty prefix, returns 'Nothing'.
--
-- >>> commonPrefixes empty [ascii| "catboy" |]
-- Nothing
-- >>> commonPrefixes [ascii| "catboy" |] empty
-- Nothing
-- >>> commonPrefixes [ascii| "catboy" |] [ascii| "nyan" |]
-- Nothing
-- >>> commonPrefixes [ascii| "catboy" |] [ascii| "catboy" |]
-- Just ("catboy","","")
-- >>> commonPrefixes [ascii| "nyan" |] [ascii| "nyan nyan" |]
-- Just ("nyan",""," nyan")
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText)
commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText)
commonPrefixes (AsciiText ByteString
t1) (AsciiText ByteString
t2) =
  Int -> (AsciiText, AsciiText, AsciiText)
go2 (Int -> (AsciiText, AsciiText, AsciiText))
-> Maybe Int -> Maybe (AsciiText, AsciiText, AsciiText)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Maybe Int -> Int -> Maybe Int) -> Maybe Int -> [Int] -> Maybe Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' Maybe Int -> Int -> Maybe Int
go Maybe Int
forall a. Maybe a
Nothing [Int
0 .. Int -> Int -> Int
forall a. Ord a => a -> a -> a
P.min (ByteString -> Int
BS.length ByteString
t1) (ByteString -> Int
BS.length ByteString
t2) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
  where
    go :: Maybe Int -> Int -> Maybe Int
    go :: Maybe Int -> Int -> Maybe Int
go Maybe Int
acc Int
i
      | ByteString -> Int -> Word8
BS.index ByteString
t1 Int
i Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> Int -> Word8
BS.index ByteString
t2 Int
i = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
i
      | Bool
otherwise = Maybe Int
acc
    go2 :: Int -> (AsciiText, AsciiText, AsciiText)
    go2 :: Int -> (AsciiText, AsciiText, AsciiText)
go2 Int
i = case Int -> ByteString -> (ByteString, ByteString)
BS.splitAt (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ByteString
t1 of
      (ByteString
h, ByteString
t) -> (ByteString, ByteString, ByteString)
-> (AsciiText, AsciiText, AsciiText)
coerce (ByteString
h, ByteString
t, Int -> ByteString -> ByteString
BS.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ByteString
t2)

-- Searching

-- | Return the text comprised of all the characters that satisfy the function
-- argument (that is, for which it returns 'True'), in the same order as in the
-- original.
--
-- >>> filter ([char| 'n' |] ==) empty
-- ""
-- >>> filter ([char| 'n' |] ==) [ascii| "catboy" |]
-- ""
-- >>> filter ([char| 'n' |] ==) [ascii| "nyan" |]
-- "nn"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE filter #-}
filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
filter = ((Word8 -> Bool) -> ByteString -> ByteString)
-> (AsciiChar -> Bool) -> AsciiText -> AsciiText
coerce (Word8 -> Bool) -> ByteString -> ByteString
BS.filter

-- | @breakOnAll needle haystack@, given a @needle@ of length \(n\) and a
-- @haystack@ of length \(h\), finds all non-overlapping instances of @needle@
-- in @haystack@. Each result consists of the following elements:
--
-- * The prefix prior to the match; and
-- * The match, followed by the rest of the string.
--
-- If given an empty needle, the result is a singleton list containing a pair of
-- the entire haystack and the empty text. If given an empty haystack, the
-- result is an empty list.
--
-- >>> breakOnAll empty [ascii| "nyan nyan nyan" |]
-- [("nyan nyan nyan","")]
-- >>> breakOnAll [ascii| "nyan" |] empty
-- []
-- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyan" |]
-- [("","nyan")]
-- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
-- [("","nyan nyan nyan"),("nyan ","nyan nyan"),("nyan nyan ","nyan")]
-- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyanyanyan" |]
-- [("","nyanyanyan"),("nyanya","nyan")]
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)]
breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)]
breakOnAll needle :: AsciiText
needle@(AsciiText ByteString
n) haystack :: AsciiText
haystack@(AsciiText ByteString
h)
  | AsciiText -> Int
length AsciiText
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [(AsciiText
haystack, AsciiText
empty)]
  | AsciiText -> Int
length AsciiText
haystack Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = []
  | Bool
otherwise = (Int -> AsciiText -> (AsciiText, AsciiText)
`splitAt` AsciiText
haystack) (Int -> (AsciiText, AsciiText))
-> [Int] -> [(AsciiText, AsciiText)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> ByteString -> [Int]
indices ByteString
n ByteString
h

-- | Returns 'Just' the first character in the text satisfying the predicate,
-- 'Nothing' otherwise.
--
-- >>> find ([char| 'n' |] ==) empty
-- Nothing
-- >>> find ([char| 'n' |] ==) [ascii| "catboy" |]
-- Nothing
-- >>> find ([char| 'n' |] ==) [ascii| "nyan" |]
-- Just '0x6e'
-- >>> find ([char| 'n' |] /=) [ascii| "nyan" |]
-- Just '0x79'
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE find #-}
find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar
find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar
find = ((Word8 -> Bool) -> ByteString -> Maybe Word8)
-> (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar
coerce (Word8 -> Bool) -> ByteString -> Maybe Word8
BS.find

-- | @partition p t@ is equivalent to @('filter' p t, 'filter' ('not' p) t)@.
--
-- >>> partition ([char| 'n' |] ==) empty
-- ("","")
-- >>> partition ([char| 'n' |] ==) . singleton $ [char| 'n' |]
-- ("n","")
-- >>> partition ([char| 'n' |] ==) . singleton $ [char| 'w' |]
-- ("","w")
-- >>> partition ([char| 'n' |] ==) [ascii| "nyan!" |]
-- ("nn","ya!")
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
partition = ((Word8 -> Bool) -> ByteString -> (ByteString, ByteString))
-> (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
coerce (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
BS.partition

-- Indexing

-- | Retrieve the ASCII character at the given position in the text. Indexes
-- begin from 0. If the index provided is invalid (that is, less than 0, equal
-- to the length of the text, or greater), return 'Nothing'; otherwise, return
-- 'Just' the character at that position.
--
-- >>> index [ascii| "nyan nyan nyan" |] (-100)
-- Nothing
-- >>> index [ascii| "nyan nyan nyan" |] 0
-- Just '0x6e'
-- >>> index [ascii| "nyan nyan nyan" |] 5
-- Just '0x6e'
-- >>> index [ascii| "nyan nyan nyan" |] 2000
-- Nothing
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.1
index :: AsciiText -> Int -> Maybe AsciiChar
index :: AsciiText -> Int -> Maybe AsciiChar
index AsciiText
at Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= AsciiText -> Int
length AsciiText
at = Maybe AsciiChar
forall a. Maybe a
Nothing
  | Bool
otherwise = AsciiChar -> Maybe AsciiChar
forall a. a -> Maybe a
Just (AsciiChar -> Maybe AsciiChar)
-> (Int -> AsciiChar) -> Int -> Maybe AsciiChar
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (ByteString -> Int -> Word8) -> AsciiText -> Int -> AsciiChar
coerce ByteString -> Int -> Word8
BS.index AsciiText
at (Int -> Maybe AsciiChar) -> Int -> Maybe AsciiChar
forall a b. (a -> b) -> a -> b
$ Int
i

-- | Returns 'Just' the first index in the text such that the character at that
-- index satisfies the predicate, 'Nothing' otherwise.
--
-- >>> findIndex ([char| 'n' |] ==) empty
-- Nothing
-- >>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'n' |]
-- Just 0
-- >>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'w' |]
-- Nothing
-- >>> findIndex ([char| 'n' |] ==) [ascii| "nyan" |]
-- Just 0
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
{-# INLINE [1] findIndex #-}
findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int
findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int
findIndex = ((Word8 -> Bool) -> ByteString -> Maybe Int)
-> (AsciiChar -> Bool) -> AsciiText -> Maybe Int
coerce (Word8 -> Bool) -> ByteString -> Maybe Int
BS.findIndex

-- | @count needle haystack@, given a @needle@ of length \(n\) and a haystack of
-- length \(h\), counts the number of non-overlapping occurrences of @needle@ in
-- @haystack@. If @needle@ is empty, the count will be 0.
--
-- >>> count empty [ascii| "nyan nyan nyan" |]
-- 0
-- >>> count [ascii| "nyan" |] empty
-- 0
-- >>> count [ascii| "nyan" |] [ascii| "nyan" |]
-- 1
-- >>> count [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
-- 3
-- >>> count [ascii| "nyan" |] [ascii| "nyanyanyan" |]
-- 2
--
-- = On complexity
--
-- This function is based on a variant of the
-- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
-- except it does not detect overlapping needles. Its average-case analysis is
-- based on the assumption that:
--
-- * All ASCII symbols are equally likely to occur in both the needle and the
-- haystack; and
-- * The needle has length at least two; and
-- * Both the needle and the haystack contain at least four unique symbols.
--
-- We fall back to 'split' for singleton needles, and there is no work to be
-- done on empty needles, which means the second assumption always holds.
--
-- Worst-case behaviour becomes more likely the more your input satisfies the
-- following conditions:
--
-- * The needle and/or haystack use few unique symbols (less than four is the
-- worst); or
-- * The haystack contains many instances of the second symbol of the needle
-- which don't lead to full matches.
--
-- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
--
-- /See also:/ Note that all the below are references for the original
-- algorithm, which includes searching for overlapping needles. Thus, our
-- implementation will perform better than the analysis suggests.
--
-- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
-- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
--
-- @since 1.0.1
count :: AsciiText -> AsciiText -> Int
count :: AsciiText -> AsciiText -> Int
count needle :: AsciiText
needle@(AsciiText ByteString
n) haystack :: AsciiText
haystack@(AsciiText ByteString
h)
  | Int -> Int -> Int
forall a. Ord a => a -> a -> a
P.min (AsciiText -> Int
length AsciiText
needle) (AsciiText -> Int
length AsciiText
haystack) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int
0
  | AsciiText -> Int
length AsciiText
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Word8 -> ByteString -> Int
BS.count (ByteString -> Word8
BS.head ByteString
n) ByteString
h
  | Bool
otherwise = [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
P.length ([Int] -> Int) -> (ByteString -> [Int]) -> ByteString -> Int
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> ByteString -> [Int]
indices ByteString
n (ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ ByteString
h

-- Zipping

-- | \'Pair off\' characters in both texts at corresponding indices. The result
-- will be limited to the shorter of the two arguments.
--
-- >>> zip empty [ascii| "catboy" |]
-- []
-- >>> zip [ascii| "catboy" |] empty
-- []
-- >>> zip [ascii| "catboy" |] [ascii| "nyan" |]
-- [('0x63','0x6e'),('0x61','0x79'),('0x74','0x61'),('0x62','0x6e')]
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)]
zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)]
zip = (ByteString -> ByteString -> [(Word8, Word8)])
-> AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)]
coerce ByteString -> ByteString -> [(Word8, Word8)]
BS.zip

-- | Combine two texts together in lockstep to produce a new text, using the
-- provided function to combine ASCII characters at each step. The length of the
-- result will be the minimum of the lengths of the two text arguments.
--
-- >>> zipWith max [ascii| "I am a catboy" |] empty
-- ""
-- >>> zipWith max empty [ascii| "I am a catboy" |]
-- ""
-- >>> zipWith max [ascii| "I am a catboy" |] [ascii| "Nyan nyan nyan nyan nyan" |]
-- "Nyan nycntnyy"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.1
zipWith ::
  (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText
zipWith :: (AsciiChar -> AsciiChar -> AsciiChar)
-> AsciiText -> AsciiText -> AsciiText
zipWith AsciiChar -> AsciiChar -> AsciiChar
f AsciiText
t1 AsciiText
t2 = ((AsciiText, AsciiText)
 -> Maybe (AsciiChar, (AsciiText, AsciiText)))
-> (AsciiText, AsciiText) -> AsciiText
forall a. (a -> Maybe (AsciiChar, a)) -> a -> AsciiText
unfoldr (AsciiText, AsciiText) -> Maybe (AsciiChar, (AsciiText, AsciiText))
go (AsciiText
t1, AsciiText
t2)
  where
    go :: (AsciiText, AsciiText) -> Maybe (AsciiChar, (AsciiText, AsciiText))
    go :: (AsciiText, AsciiText) -> Maybe (AsciiChar, (AsciiText, AsciiText))
go (AsciiText
acc1, AsciiText
acc2) = do
      (AsciiChar
h1, AsciiText
t1') <- AsciiText -> Maybe (AsciiChar, AsciiText)
uncons AsciiText
acc1
      (AsciiChar
h2, AsciiText
t2') <- AsciiText -> Maybe (AsciiChar, AsciiText)
uncons AsciiText
acc2
      (AsciiChar, (AsciiText, AsciiText))
-> Maybe (AsciiChar, (AsciiText, AsciiText))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AsciiChar -> AsciiChar -> AsciiChar
f AsciiChar
h1 AsciiChar
h2, (AsciiText
t1', AsciiText
t2'))

-- Conversions

-- | Try and convert a 'Text' into an 'AsciiText'. Gives 'Nothing' if the 'Text'
-- contains any symbols which lack an ASCII equivalent.
--
-- >>> fromText "catboy"
-- Just "catboy"
-- >>> fromText "😺😺😺😺😺"
-- Nothing
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
fromText :: Text -> Maybe AsciiText
fromText :: Text -> Maybe AsciiText
fromText Text
t = case (Char -> Bool) -> Text -> Maybe Char
T.find (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Char -> Bool
isAscii) Text
t of
  Maybe Char
Nothing -> AsciiText -> Maybe AsciiText
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AsciiText -> Maybe AsciiText)
-> (Text -> AsciiText) -> Text -> Maybe AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> AsciiText
AsciiText (ByteString -> AsciiText)
-> (Text -> ByteString) -> Text -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> ByteString
encodeUtf8 (Text -> Maybe AsciiText) -> Text -> Maybe AsciiText
forall a b. (a -> b) -> a -> b
$ Text
t
  Just Char
_ -> Maybe AsciiText
forall a. Maybe a
Nothing

-- | Try and convert a 'Text' into an 'AsciiText'. Gives @'Prelude.Left' c@ if the 'Text'
-- contains a 'Prelude.Char' @c@ that lacks an ASCII representation.
--
-- >>> eitherFromText "catboy"
-- Right "catboy"
-- >>> eitherFromText "😺😺😺😺😺"
-- Left '\128570'
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.1
eitherFromText :: Text -> P.Either P.Char AsciiText
eitherFromText :: Text -> Either Char AsciiText
eitherFromText Text
t = case (Char -> Bool) -> Text -> Maybe Char
T.find (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Char -> Bool
isAscii) Text
t of
  Maybe Char
Nothing -> AsciiText -> Either Char AsciiText
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AsciiText -> Either Char AsciiText)
-> (Text -> AsciiText) -> Text -> Either Char AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> AsciiText
AsciiText (ByteString -> AsciiText)
-> (Text -> ByteString) -> Text -> AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> ByteString
encodeUtf8 (Text -> Either Char AsciiText) -> Text -> Either Char AsciiText
forall a b. (a -> b) -> a -> b
$ Text
t
  Just Char
c -> Char -> Either Char AsciiText
forall a b. a -> Either a b
P.Left Char
c

-- | Try and convert a 'ByteString' into an 'AsciiText'. Gives 'Nothing' if the
-- 'ByteString' contains any bytes outside the ASCII range (that is, from 0 to
-- 127 inclusive).
--
-- >>> fromByteString "catboy"
-- Just "catboy"
-- >>> fromByteString . BS.pack $ [128]
-- Nothing
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
fromByteString :: ByteString -> Maybe AsciiText
fromByteString :: ByteString -> Maybe AsciiText
fromByteString ByteString
bs = case (Word8 -> Bool) -> ByteString -> Maybe Word8
BS.find (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
127) ByteString
bs of
  Maybe Word8
Nothing -> AsciiText -> Maybe AsciiText
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AsciiText -> Maybe AsciiText)
-> (ByteString -> AsciiText) -> ByteString -> Maybe AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> AsciiText
AsciiText (ByteString -> Maybe AsciiText) -> ByteString -> Maybe AsciiText
forall a b. (a -> b) -> a -> b
$ ByteString
bs
  Just Word8
_ -> Maybe AsciiText
forall a. Maybe a
Nothing

-- | Try and convert a 'ByteString' into an 'AsciiText'. Gives @'Prelude.Left' w8@ if
-- the 'ByteString' contains a byte @w8@ that is outside the ASCII range (that
-- is, from 0 to 127 inclusive).
--
-- >>> eitherFromByteString "catboy"
-- Right "catboy"
-- >>> eitherFromByteString . BS.pack $ [128]
-- Left 128
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.1
eitherFromByteString :: ByteString -> P.Either Word8 AsciiText
eitherFromByteString :: ByteString -> Either Word8 AsciiText
eitherFromByteString ByteString
bs = case (Word8 -> Bool) -> ByteString -> Maybe Word8
BS.find (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
127) ByteString
bs of
  Maybe Word8
Nothing -> AsciiText -> Either Word8 AsciiText
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AsciiText -> Either Word8 AsciiText)
-> (ByteString -> AsciiText)
-> ByteString
-> Either Word8 AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> AsciiText
AsciiText (ByteString -> Either Word8 AsciiText)
-> ByteString -> Either Word8 AsciiText
forall a b. (a -> b) -> a -> b
$ ByteString
bs
  Just Word8
w8 -> Word8 -> Either Word8 AsciiText
forall a b. a -> Either a b
P.Left Word8
w8

-- | Convert an 'AsciiText' into a 'Text' (by copying).
--
-- >>> toText empty
-- ""
-- >>> toText . singleton $ [char| 'w' |]
-- "w"
-- >>> toText [ascii| "nyan" |]
-- "nyan"
--
-- /Complexity:/ \(\Theta(n)\)
--
-- @since 1.0.0
toText :: AsciiText -> Text
toText :: AsciiText -> Text
toText (AsciiText ByteString
bs) = ByteString -> Text
decodeUtf8 ByteString
bs

-- | Reinterpret an 'AsciiText' as a 'ByteString' (without copying).
--
-- >>> toByteString empty
-- ""
-- >>> toByteString . singleton $ [char| 'w' |]
-- "w"
-- >>> toByteString [ascii| "nyan" |]
-- "nyan"
--
-- /Complexity:/ \(\Theta(1)\)
--
-- @since 1.0.0
toByteString :: AsciiText -> ByteString
toByteString :: AsciiText -> ByteString
toByteString = AsciiText -> ByteString
coerce

-- Optics

-- | A convenient demonstration of the relationship between 'toText' and
-- 'fromText'.
--
-- >>> preview textWise "catboy goes nyan"
-- Just "catboy goes nyan"
-- >>> preview textWise "😺😺😺😺😺"
-- Nothing
-- >>> review textWise [ascii| "catboys are amazing" |]
-- "catboys are amazing"
--
-- @since 1.0.0
textWise :: Prism' Text AsciiText
textWise :: Prism' Text AsciiText
textWise = (AsciiText -> Text)
-> (Text -> Maybe AsciiText) -> Prism' Text AsciiText
forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' AsciiText -> Text
toText Text -> Maybe AsciiText
fromText

-- | A convenient demonstration of the relationship between 'toByteString' and
-- 'fromByteString'.
--
-- >>> preview byteStringWise "catboy goes nyan"
-- Just "catboy goes nyan"
-- >>> preview byteStringWise . BS.pack $ [0xff, 0xff]
-- Nothing
-- >>> review byteStringWise [ascii| "I love catboys" |]
-- "I love catboys"
--
-- @since 1.0.0
byteStringWise :: Prism' ByteString AsciiText
byteStringWise :: Prism' ByteString AsciiText
byteStringWise = (AsciiText -> ByteString)
-> (ByteString -> Maybe AsciiText) -> Prism' ByteString AsciiText
forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' AsciiText -> ByteString
toByteString ByteString -> Maybe AsciiText
fromByteString

-- | Pack (or unpack) a list of ASCII characters into a text.
--
-- >>> view packedChars [[char| 'n' |], [char| 'y' |], [char| 'a' |], [char| 'n' |]]
-- "nyan"
-- >>> review packedChars [ascii| "nyan" |]
-- ['0x6e','0x79','0x61','0x6e']
--
-- @since 1.0.1
packedChars :: Iso' [AsciiChar] AsciiText
packedChars :: Iso' [AsciiChar] AsciiText
packedChars =
  Optic An_Iso NoIx [Word8] [AsciiChar] AsciiText AsciiText
-> Iso' [AsciiChar] AsciiText
forall s s' k (is :: IxList) t a b.
Coercible s s' =>
Optic k is s t a b -> Optic k is s' t a b
coerceS (Optic An_Iso NoIx [Word8] [AsciiChar] AsciiText AsciiText
 -> Iso' [AsciiChar] AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [AsciiChar] AsciiText AsciiText)
-> Iso' [Word8] ByteString
-> Iso' [AsciiChar] AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
-> Optic An_Iso NoIx [Word8] [AsciiChar] AsciiText AsciiText
forall t t' k (is :: IxList) s a b.
Coercible t t' =>
Optic k is s t a b -> Optic k is s t' a b
coerceT (Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
 -> Optic An_Iso NoIx [Word8] [AsciiChar] AsciiText AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText)
-> Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [AsciiChar] AsciiText AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
-> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
forall a a' k (is :: IxList) s t b.
Coercible a a' =>
Optic k is s t a b -> Optic k is s t a' b
coerceA (Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
 -> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText)
-> Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
forall b b' k (is :: IxList) s t a.
Coercible b b' =>
Optic k is s t a b -> Optic k is s t a b'
coerceB (Iso' [Word8] ByteString -> Iso' [AsciiChar] AsciiText)
-> Iso' [Word8] ByteString -> Iso' [AsciiChar] AsciiText
forall a b. (a -> b) -> a -> b
$ forall t. IsByteString t => Iso' [Word8] t
BSO.packedBytes @ByteString

-- | Traverse the individual ASCII characters in a text.
--
-- >>> preview (elementOf chars 0) [ascii| "I am a catboy" |]
-- Just '0x49'
-- >>> preview (elementOf chars 100) [ascii| "I am a catboy" |]
-- Nothing
-- >>> iover chars (\i x -> bool x [char| 'w' |] . even $ i) [ascii| "I am a catboy" |]
-- "w wmwawcwtwow"
--
-- @since 1.0.1
chars :: IxTraversal' Int64 AsciiText AsciiChar
chars :: IxTraversal' Int64 AsciiText AsciiChar
chars = Optic
  A_Traversal (WithIx Int64) ByteString AsciiText AsciiChar AsciiChar
-> IxTraversal' Int64 AsciiText AsciiChar
forall s s' k (is :: IxList) t a b.
Coercible s s' =>
Optic k is s t a b -> Optic k is s' t a b
coerceS (Optic
   A_Traversal (WithIx Int64) ByteString AsciiText AsciiChar AsciiChar
 -> IxTraversal' Int64 AsciiText AsciiChar)
-> (IxTraversal' Int64 ByteString Word8
    -> Optic
         A_Traversal
         (WithIx Int64)
         ByteString
         AsciiText
         AsciiChar
         AsciiChar)
-> IxTraversal' Int64 ByteString Word8
-> IxTraversal' Int64 AsciiText AsciiChar
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic
  A_Traversal
  (WithIx Int64)
  ByteString
  ByteString
  AsciiChar
  AsciiChar
-> Optic
     A_Traversal (WithIx Int64) ByteString AsciiText AsciiChar AsciiChar
forall t t' k (is :: IxList) s a b.
Coercible t t' =>
Optic k is s t a b -> Optic k is s t' a b
coerceT (Optic
   A_Traversal
   (WithIx Int64)
   ByteString
   ByteString
   AsciiChar
   AsciiChar
 -> Optic
      A_Traversal
      (WithIx Int64)
      ByteString
      AsciiText
      AsciiChar
      AsciiChar)
-> (IxTraversal' Int64 ByteString Word8
    -> Optic
         A_Traversal
         (WithIx Int64)
         ByteString
         ByteString
         AsciiChar
         AsciiChar)
-> IxTraversal' Int64 ByteString Word8
-> Optic
     A_Traversal (WithIx Int64) ByteString AsciiText AsciiChar AsciiChar
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic
  A_Traversal (WithIx Int64) ByteString ByteString Word8 AsciiChar
-> Optic
     A_Traversal
     (WithIx Int64)
     ByteString
     ByteString
     AsciiChar
     AsciiChar
forall a a' k (is :: IxList) s t b.
Coercible a a' =>
Optic k is s t a b -> Optic k is s t a' b
coerceA (Optic
   A_Traversal (WithIx Int64) ByteString ByteString Word8 AsciiChar
 -> Optic
      A_Traversal
      (WithIx Int64)
      ByteString
      ByteString
      AsciiChar
      AsciiChar)
-> (IxTraversal' Int64 ByteString Word8
    -> Optic
         A_Traversal (WithIx Int64) ByteString ByteString Word8 AsciiChar)
-> IxTraversal' Int64 ByteString Word8
-> Optic
     A_Traversal
     (WithIx Int64)
     ByteString
     ByteString
     AsciiChar
     AsciiChar
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. IxTraversal' Int64 ByteString Word8
-> Optic
     A_Traversal (WithIx Int64) ByteString ByteString Word8 AsciiChar
forall b b' k (is :: IxList) s t a.
Coercible b b' =>
Optic k is s t a b -> Optic k is s t a b'
coerceB (IxTraversal' Int64 ByteString Word8
 -> IxTraversal' Int64 AsciiText AsciiChar)
-> IxTraversal' Int64 ByteString Word8
-> IxTraversal' Int64 AsciiText AsciiChar
forall a b. (a -> b) -> a -> b
$ forall t. IsByteString t => IxTraversal' Int64 t Word8
BSO.bytes @ByteString

-- | Pack (or unpack) a list of bytes into a text. This isn't as capable as
-- 'packedChars', as that would allow construction of invalid texts.
--
-- >>> preview packedBytes [0x6e, 0x79, 0x61, 0x6e]
-- Just "nyan"
-- >>> preview packedBytes [0xff, 0xfe]
-- Nothing
-- >>> review packedBytes [ascii| "nyan" |]
-- [110,121,97,110]
--
-- @since 1.0.1
packedBytes :: Prism' [Word8] AsciiText
packedBytes :: Prism' [Word8] AsciiText
packedBytes = (AsciiText -> [Word8])
-> ([Word8] -> Maybe AsciiText) -> Prism' [Word8] AsciiText
forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' (Optic' A_Review NoIx [Word8] AsciiText -> AsciiText -> [Word8]
forall k (is :: IxList) t b.
Is k A_Review =>
Optic' k is t b -> b -> t
review Optic' A_Review NoIx [Word8] AsciiText
go) (([Word8] -> AsciiText) -> Maybe [Word8] -> Maybe AsciiText
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
P.fmap (Optic' A_Getter NoIx [Word8] AsciiText -> [Word8] -> AsciiText
forall k (is :: IxList) s a.
Is k A_Getter =>
Optic' k is s a -> s -> a
view Optic' A_Getter NoIx [Word8] AsciiText
go2) (Maybe [Word8] -> Maybe AsciiText)
-> ([Word8] -> Maybe [Word8]) -> [Word8] -> Maybe AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Maybe Word8) -> [Word8] -> Maybe [Word8]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
P.traverse Word8 -> Maybe Word8
asciify)
  where
    go :: Review [Word8] AsciiText
    go :: Optic' A_Review NoIx [Word8] AsciiText
go = Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
-> Optic' A_Review NoIx [Word8] AsciiText
forall destKind srcKind (is :: IxList) s t a b.
Is srcKind destKind =>
Optic srcKind is s t a b -> Optic destKind is s t a b
castOptic (Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
 -> Optic' A_Review NoIx [Word8] AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText)
-> Iso' [Word8] ByteString
-> Optic' A_Review NoIx [Word8] AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
-> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
forall a a' k (is :: IxList) s t b.
Coercible a a' =>
Optic k is s t a b -> Optic k is s t a' b
coerceA (Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
 -> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText)
-> Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
forall b b' k (is :: IxList) s t a.
Coercible b b' =>
Optic k is s t a b -> Optic k is s t a b'
coerceB (Iso' [Word8] ByteString -> Optic' A_Review NoIx [Word8] AsciiText)
-> Iso' [Word8] ByteString
-> Optic' A_Review NoIx [Word8] AsciiText
forall a b. (a -> b) -> a -> b
$ forall t. IsByteString t => Iso' [Word8] t
BSO.packedBytes @ByteString
    go2 :: Getter [Word8] AsciiText
    go2 :: Optic' A_Getter NoIx [Word8] AsciiText
go2 = Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
-> Optic' A_Getter NoIx [Word8] AsciiText
forall destKind srcKind (is :: IxList) s t a b.
Is srcKind destKind =>
Optic srcKind is s t a b -> Optic destKind is s t a b
castOptic (Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
 -> Optic' A_Getter NoIx [Word8] AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText)
-> Iso' [Word8] ByteString
-> Optic' A_Getter NoIx [Word8] AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
-> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
forall a a' k (is :: IxList) s t b.
Coercible a a' =>
Optic k is s t a b -> Optic k is s t a' b
coerceA (Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
 -> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText)
-> (Iso' [Word8] ByteString
    -> Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText)
-> Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [Word8] AsciiText AsciiText
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Iso' [Word8] ByteString
-> Optic An_Iso NoIx [Word8] [Word8] ByteString AsciiText
forall b b' k (is :: IxList) s t a.
Coercible b b' =>
Optic k is s t a b -> Optic k is s t a b'
coerceB (Iso' [Word8] ByteString -> Optic' A_Getter NoIx [Word8] AsciiText)
-> Iso' [Word8] ByteString
-> Optic' A_Getter NoIx [Word8] AsciiText
forall a b. (a -> b) -> a -> b
$ forall t. IsByteString t => Iso' [Word8] t
BSO.packedBytes @ByteString

-- | Access the individual bytes in a text. This isn't as capable as 'chars', as
-- that would allow modifications of the bytes in ways that aren't valid as
-- ASCII.
--
-- >>> itoListOf bytes [ascii| "I am a catboy" |]
-- [(0,73),(1,32),(2,97),(3,109),(4,32),(5,97),(6,32),(7,99),(8,97),(9,116),(10,98),(11,111),(12,121)]
--
-- @since 1.0.1
bytes :: IxFold Int64 AsciiText Word8
bytes :: IxFold Int64 AsciiText Word8
bytes = Optic A_Traversal (WithIx Int64) AsciiText AsciiText Word8 Word8
-> IxFold Int64 AsciiText Word8
forall destKind srcKind (is :: IxList) s t a b.
Is srcKind destKind =>
Optic srcKind is s t a b -> Optic destKind is s t a b
castOptic (Optic A_Traversal (WithIx Int64) AsciiText AsciiText Word8 Word8
 -> IxFold Int64 AsciiText Word8)
-> (IxTraversal' Int64 ByteString Word8
    -> Optic
         A_Traversal (WithIx Int64) AsciiText AsciiText Word8 Word8)
-> IxTraversal' Int64 ByteString Word8
-> IxFold Int64 AsciiText Word8
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Optic A_Traversal (WithIx Int64) ByteString AsciiText Word8 Word8
-> Optic A_Traversal (WithIx Int64) AsciiText AsciiText Word8 Word8
forall s s' k (is :: IxList) t a b.
Coercible s s' =>
Optic k is s t a b -> Optic k is s' t a b
coerceS (Optic A_Traversal (WithIx Int64) ByteString AsciiText Word8 Word8
 -> Optic
      A_Traversal (WithIx Int64) AsciiText AsciiText Word8 Word8)
-> (IxTraversal' Int64 ByteString Word8
    -> Optic
         A_Traversal (WithIx Int64) ByteString AsciiText Word8 Word8)
-> IxTraversal' Int64 ByteString Word8
-> Optic A_Traversal (WithIx Int64) AsciiText AsciiText Word8 Word8
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. IxTraversal' Int64 ByteString Word8
-> Optic
     A_Traversal (WithIx Int64) ByteString AsciiText Word8 Word8
forall t t' k (is :: IxList) s a b.
Coercible t t' =>
Optic k is s t a b -> Optic k is s t' a b
coerceT (IxTraversal' Int64 ByteString Word8
 -> IxFold Int64 AsciiText Word8)
-> IxTraversal' Int64 ByteString Word8
-> IxFold Int64 AsciiText Word8
forall a b. (a -> b) -> a -> b
$ forall t. IsByteString t => IxTraversal' Int64 t Word8
BSO.bytes @ByteString

-- Helpers

isSpace :: AsciiChar -> Bool
isSpace :: AsciiChar -> Bool
isSpace (AsciiChar Word8
w8)
  | Word8
w8 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
32 = Bool
True
  | Word8
9 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w8 Bool -> Bool -> Bool
&& Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
13 = Bool
True
  | Bool
otherwise = Bool
False

asciify :: Word8 -> Maybe Word8
asciify :: Word8 -> Maybe Word8
asciify Word8
w8
  | Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
127 = Word8 -> Maybe Word8
forall a. a -> Maybe a
Just Word8
w8
  | Bool
otherwise = Maybe Word8
forall a. Maybe a
Nothing

indices :: ByteString -> ByteString -> [Int]
indices :: ByteString -> ByteString -> [Int]
indices ByteString
needle ByteString
haystack
  | Int -> Int -> Int
forall a. Ord a => a -> a -> a
P.min Int
needleLen Int
haystackLen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = []
  | Int
needleLen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Word8 -> ByteString -> [Int]
BS.elemIndices (ByteString -> Word8
BS.head ByteString
needle) ByteString
haystack
  | Bool
otherwise = (Int -> Maybe (Int, Int)) -> Int -> [Int]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr Int -> Maybe (Int, Int)
go Int
0
  where
    go :: Int -> Maybe (Int, Int)
    go :: Int -> Maybe (Int, Int)
go Int
j
      | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> (Int
haystackLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
needleLen) = Maybe (Int, Int)
forall a. Maybe a
Nothing
      | ByteString -> Int -> Word8
BS.index ByteString
needle Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteString -> Int -> Word8
BS.index ByteString
haystack (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) = Int -> Maybe (Int, Int)
go (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
kay)
      | Bool
otherwise = do
        let fragment :: ByteString
fragment = Int -> ByteString -> ByteString
BS.take Int
needleLen (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> ByteString -> ByteString
BS.drop Int
j (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
haystack
        if ByteString
fragment ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
needle
          then (Int, Int) -> Maybe (Int, Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
j, Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
needleLen)
          else Int -> Maybe (Int, Int)
go (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ell)
    kay :: Int
    kay :: Int
kay
      | ByteString -> Word8
BS.head ByteString
needle Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> Int -> Word8
BS.index ByteString
needle Int
1 = Int
2
      | Bool
otherwise = Int
1
    ell :: Int
    ell :: Int
ell
      | ByteString -> Word8
BS.head ByteString
needle Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> Int -> Word8
BS.index ByteString
needle Int
1 = Int
1
      | Bool
otherwise = Int
2
    needleLen :: Int
    needleLen :: Int
needleLen = ByteString -> Int
BS.length ByteString
needle
    haystackLen :: Int
    haystackLen :: Int
haystackLen = ByteString -> Int
BS.length ByteString
haystack