Copyright	(C) 2021 Koz Ross
License	Apache 2.0
Maintainer	Koz Ross <koz.ross@retro-freedom.nz>
Stability	stable
Portability	GHC only
Safe Haskell	Trustworthy
Language	Haskell2010

Text.Ascii

Contents

Type
Creation
Basic interface
Transformations
- Justification
Folds
- Special folds
Construction
Substrings
View patterns
Searching
Indexing
Zipping
Conversions
Optics

Description

An implementation of ASCII strings.

This module is designed for qualified importing:

import qualified Text.Ascii as Ascii

See also: Wikipedia entry for ASCII

Synopsis

data AsciiText
empty :: AsciiText
singleton :: AsciiChar -> AsciiText
ascii :: QuasiQuoter
cons :: AsciiChar -> AsciiText -> AsciiText
snoc :: AsciiText -> AsciiChar -> AsciiText
uncons :: AsciiText -> Maybe (AsciiChar, AsciiText)
unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar)
length :: AsciiText -> Int
map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText
intercalate :: AsciiText -> [AsciiText] -> AsciiText
intersperse :: AsciiChar -> AsciiText -> AsciiText
transpose :: [AsciiText] -> [AsciiText]
reverse :: AsciiText -> AsciiText
replace :: AsciiText -> AsciiText -> AsciiText -> AsciiText
justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText
justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText
center :: Int -> AsciiChar -> AsciiText -> AsciiText
foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
concat :: [AsciiText] -> AsciiText
concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText
scanl :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiChar -> AsciiText -> AsciiText
scanr :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiChar -> AsciiText -> AsciiText
mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
replicate :: Int -> AsciiText -> AsciiText
unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText
unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a)
take :: Int -> AsciiText -> AsciiText
takeEnd :: Int -> AsciiText -> AsciiText
drop :: Int -> AsciiText -> AsciiText
dropEnd :: Int -> AsciiText -> AsciiText
takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
strip :: AsciiText -> AsciiText
stripStart :: AsciiText -> AsciiText
stripEnd :: AsciiText -> AsciiText
splitAt :: Int -> AsciiText -> (AsciiText, AsciiText)
breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
group :: AsciiText -> [AsciiText]
groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText]
inits :: AsciiText -> [AsciiText]
tails :: AsciiText -> [AsciiText]
splitOn :: AsciiText -> AsciiText -> [AsciiText]
split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
chunksOf :: Int -> AsciiText -> [AsciiText]
lines :: AsciiText -> [AsciiText]
unlines :: Foldable f => f AsciiText -> AsciiText
words :: AsciiText -> [AsciiText]
unwords :: [AsciiText] -> AsciiText
stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText
stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText
stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText)
commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText)
filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)]
find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar
partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
index :: AsciiText -> Int -> Maybe AsciiChar
findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int
count :: AsciiText -> AsciiText -> Int
zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)]
zipWith :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText
fromText :: Text -> Maybe AsciiText
fromByteString :: ByteString -> Maybe AsciiText
toText :: AsciiText -> Text
toByteString :: AsciiText -> ByteString
textWise :: Prism' Text AsciiText
byteStringWise :: Prism' ByteString AsciiText
packedChars :: Iso' [AsciiChar] AsciiText
chars :: IxTraversal' Int64 AsciiText AsciiChar
packedBytes :: Prism' [Word8] AsciiText
bytes :: IxFold Int64 AsciiText Word8

Type

data AsciiText Source #

A string of ASCII characters, represented as a packed byte array.

Since: 1.0.0

Instances

IsList AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Associated Types type Item AsciiText :: Type # Methods fromList :: [Item AsciiText] -> AsciiText # fromListN :: Int -> [Item AsciiText] -> AsciiText # toList :: AsciiText -> [Item AsciiText] #
Eq AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Methods (==) :: AsciiText -> AsciiText -> Bool # (/=) :: AsciiText -> AsciiText -> Bool #
Ord AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Methods compare :: AsciiText -> AsciiText -> Ordering # (<) :: AsciiText -> AsciiText -> Bool # (<=) :: AsciiText -> AsciiText -> Bool # (>) :: AsciiText -> AsciiText -> Bool # (>=) :: AsciiText -> AsciiText -> Bool # max :: AsciiText -> AsciiText -> AsciiText # min :: AsciiText -> AsciiText -> AsciiText #
Show AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Methods showsPrec :: Int -> AsciiText -> ShowS # show :: AsciiText -> String # showList :: [AsciiText] -> ShowS #
Semigroup AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Methods (<>) :: AsciiText -> AsciiText -> AsciiText # sconcat :: NonEmpty AsciiText -> AsciiText # stimes :: Integral b => b -> AsciiText -> AsciiText #
Monoid AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Methods mempty :: AsciiText # mappend :: AsciiText -> AsciiText -> AsciiText # mconcat :: [AsciiText] -> AsciiText #
FoldCase AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal Methods foldCase :: AsciiText -> AsciiText # foldCaseList :: [AsciiText] -> [AsciiText]
NFData AsciiText Source #	Since: 1.0.0
Instance details Defined in Text.Ascii.Internal Methods rnf :: AsciiText -> () #
Stream AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal Associated Types type Token AsciiText :: Type # type Tokens AsciiText :: Type # Methods tokenToChunk :: Proxy AsciiText -> Token AsciiText -> Tokens AsciiText # tokensToChunk :: Proxy AsciiText -> [Token AsciiText] -> Tokens AsciiText # chunkToTokens :: Proxy AsciiText -> Tokens AsciiText -> [Token AsciiText] # chunkLength :: Proxy AsciiText -> Tokens AsciiText -> Int # chunkEmpty :: Proxy AsciiText -> Tokens AsciiText -> Bool # take1_ :: AsciiText -> Maybe (Token AsciiText, AsciiText) # takeN_ :: Int -> AsciiText -> Maybe (Tokens AsciiText, AsciiText) # takeWhile_ :: (Token AsciiText -> Bool) -> AsciiText -> (Tokens AsciiText, AsciiText) #
VisualStream AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal Methods showTokens :: Proxy AsciiText -> NonEmpty (Token AsciiText) -> String # tokensLength :: Proxy AsciiText -> NonEmpty (Token AsciiText) -> Int #
TraversableStream AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal Methods reachOffset :: Int -> PosState AsciiText -> (Maybe String, PosState AsciiText) # reachOffsetNoLine :: Int -> PosState AsciiText -> PosState AsciiText #
Ixed AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal Associated Types type IxKind AsciiText :: OpticKind # Methods ix :: Index AsciiText -> Optic' (IxKind AsciiText) NoIx AsciiText (IxValue AsciiText) #
Read (Unsafe AsciiText) Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Unsafe Methods readsPrec :: Int -> ReadS (Unsafe AsciiText) # readList :: ReadS [Unsafe AsciiText] # readPrec :: ReadPrec (Unsafe AsciiText) # readListPrec :: ReadPrec [Unsafe AsciiText] #
type Item AsciiText Source #
Instance details Defined in Text.Ascii.Internal type Item AsciiText = AsciiChar
type Tokens AsciiText Source #
Instance details Defined in Text.Ascii.Internal type Tokens AsciiText = AsciiText
type Token AsciiText Source #
Instance details Defined in Text.Ascii.Internal type Token AsciiText = AsciiChar
type Index AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal type Index AsciiText = Int
type IxValue AsciiText Source #	Since: 1.0.1
Instance details Defined in Text.Ascii.Internal type IxValue AsciiText = AsciiChar
type IxKind AsciiText Source #
Instance details Defined in Text.Ascii.Internal type IxKind AsciiText = An_AffineTraversal

Creation

empty :: AsciiText Source #

The empty text.

>>> empty
""

Complexity: $\Theta(1)$

Since: 1.0.0

singleton :: AsciiChar -> AsciiText Source #

A text consisting of a single ASCII character.

>>> singleton [char| 'w' |]
"w"

Complexity: $\Theta(1)$

Since: 1.0.0

ascii :: QuasiQuoter Source #

Allows constructing ASCII strings from literals, whose correctness is checked by the compiler.

Currently accepts literal syntax similar to the Haskell parser, with escape sequences preceded by '\'. In particular, this includes the double quote (see the example below).

>>> [ascii| "\"Nyan!\", said the catboy." |]
"\"Nyan!\", said the catboy."

Since: 1.0.0

Basic interface

cons :: AsciiChar -> AsciiText -> AsciiText Source #

Adds a character to the front of a text. This requires copying, which gives its complexity.

>>> cons [char| 'n' |] [ascii| "eko" |]
"neko"

Complexity: $\Theta(n)$

Since: 1.0.0

snoc :: AsciiText -> AsciiChar -> AsciiText Source #

Adds a character to the back of a text. This requires copying, which gives its complexity.

>>> snoc [ascii| "nek" |] [char| 'o' |]
"neko"

Complexity: $\Theta(n)$

Since: 1.0.0

uncons :: AsciiText -> Maybe (AsciiChar, AsciiText) Source #

If the argument is non-empty, gives Just the first character and the rest, and Nothing otherwise.

>>> uncons empty
Nothing
>>> uncons . singleton $ [char| 'w' |]
Just ('0x77',"")
>>> uncons [ascii| "nekomimi" |]
Just ('0x6e',"ekomimi")

Complexity: $\Theta(1)$

Since: 1.0.0

unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar) Source #

If the argument is non-empty, gives Just the initial segment and the last character, and Nothing otherwise.

>>> unsnoc empty
Nothing
>>> unsnoc . singleton $ [char| 'w' |]
Just ("",'0x77')
>>> unsnoc [ascii| "catboy" |]
Just ("catbo",'0x79')

Complexity: $\Theta(1)$

Since: 1.0.0

length :: AsciiText -> Int Source #

The number of characters (and, since this is ASCII, bytes) in the text.

>>> length . singleton $ [char| 'w' |]
1
>>> length [ascii| "nyan nyan" |]
9

Complexity: $\Theta(1)$

Since: 1.0.0

Transformations

map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText Source #

Copy, and apply the function to each element of, the text.

>>> map (\c -> fromMaybe c . upcase $ c) [ascii| "nyan!" |]
"NYAN!"

Complexity: $\Theta(n)$

Since: 1.0.0

intercalate :: AsciiText -> [AsciiText] -> AsciiText Source #

Takes a text and a list of texts, and concatenates the list after interspersing the first argument between each element of the list.

>>> intercalate [ascii| " ~ " |] []
""
>>> intercalate [ascii| " ~ " |] [[ascii| "nyan" |]]
"nyan"
>>> intercalate [ascii| " ~ " |] . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan ~ nyan ~ nyan"
>>> intercalate empty . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyannyannyan"

Complexity: $\Theta(n)$

Since: 1.0.0

intersperse :: AsciiChar -> AsciiText -> AsciiText Source #

Takes a character, and places it between the characters of a text.

>>> intersperse [char| '~' |] empty
""
>>> intersperse [char| '~' |] . singleton $ [char| 'w' |]
"w"
>>> intersperse [char| '~' |] [ascii| "nyan" |]
"n~y~a~n"

Complexity: $\Theta(n)$

Since: 1.0.0

transpose :: [AsciiText] -> [AsciiText] Source #

Transpose the rows and columns of the argument. This uses transpose internally, and thus, isn't very efficient.

>>> transpose []
[]
>>> transpose [[ascii| "w" |]]
["w"]
>>> transpose [[ascii| "nyan" |]]
["n","y","a","n"]
>>> transpose . Prelude.replicate 3 $ [ascii| "nyan" |]
["nnn","yyy","aaa","nnn"]
>>> transpose [[ascii| "cat" |], [ascii| "boy" |], [ascii| "nyan" |]]
["cbn","aoy","tya","n"]

Complexity: $\Theta(n)$

Since: 1.0.0

reverse :: AsciiText -> AsciiText Source #

Reverse the text.

>>> reverse empty
""
>>> reverse . singleton $ [char| 'w' |]
"w"
>>> reverse [ascii| "catboy goes nyan" |]
"nayn seog yobtac"

Complexity: $\Theta(n)$

Since: 1.0.0

replace Source #

Arguments

:: AsciiText	`needle` to search for
-> AsciiText	`replacement` to replace `needle` with
-> AsciiText	`haystack` in which to search
-> AsciiText

replace needle replacement haystack, given a needle of length $n$ and a haystack of length $h$, replaces each non-overlapping occurrence of needle in haystack with replacement. If the needle is empty, no replacement will be performed. Equivalent to intercalate replacement . splitOn needle $ haystack.

>>> replace empty [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
"catboy goes nyan nyan"
>>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] empty
""
>>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
"catboy goes NYAN~ NYAN~"
>>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "nyanyan" |]
"NYAN~yan"

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

The analysis below also doesn't factor in the cost of performing the replacement, as this is (among other things) proportional to the number of matches of the needle (and thus is hard to quantify).

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

Justification

justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText Source #

justifyLeft n c t produces a result of length $\max \{ {\tt n }, {\tt length} \; {\tt t} \}$, consisting of a copy of t followed by (zero or more) copies of c.

>>> justifyLeft (-100) [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyLeft 4 [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyLeft 10 [char| '~' |] [ascii| "nyan" |]
"nyan~~~~~~"

Complexity: $\Theta(n)$

Since: 1.0.1

justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText Source #

justifyRight n c t produces a result of length $\max \{ {\tt n }, {\tt length} \; {\tt t} \}$, consisting of (zero or more) copies of c followed by a copy of t.

>>> justifyRight (-100) [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyRight 4 [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyRight 10 [char| '~' |] [ascii| "nyan" |]
"~~~~~~nyan"

Complexity: $\Theta(n)$

Since: 1.0.1

center :: Int -> AsciiChar -> AsciiText -> AsciiText Source #

center n c t produces a result of length ${\tt k } = \max \{ {\tt n }, {\tt length} \; {\tt t} \}$, consisting of:

$\lceil \frac{{\tt k} - {\tt length} \; {\tt t}}{2} \rceil$ copies of c; followed by
A copy of t; followed by
Zero or more copies of c

This means that the centering is 'left-biased'. This mimicks the behaviour of the function of the same name in the text package, although that function's documenation does not describe this behaviour.

>>> center (-100) [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> center 4 [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> center 5 [char| '~' |] [ascii| "nyan" |]
"~nyan"
>>> center 6 [char| '~' |] [ascii| "nyan" |]
"~nyan~"

Complexity: $\Theta(n)$

Since: 1.0.1

Folds

foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a Source #

Left-associative fold of a text.

>>> foldl (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ac)a)t)b)o)y)"

Complexity: $\Theta(n)$

Since: 1.0.0

foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a Source #

Left-associative fold of a text, strict in the accumulator.

>>> foldl' (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ac)a)t)b)o)y)"

Complexity: $\Theta(n)$

Since: 1.0.0

foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a Source #

Right-associative fold of a text.

>>> foldr (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ay)o)b)t)a)c)"

Complexity: $\Theta(n)$

Since: 1.0.0

foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a Source #

Right-associative fold of a text, strict in the accumulator.

>>> foldr' (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ay)o)b)t)a)c)"

Complexity: $\Theta(n)$

Since: 1.0.0

Special folds

concat :: [AsciiText] -> AsciiText Source #

Concatenate a list of texts.

>>> concat []
""
>>> concat [[ascii| "catboy" |]]
"catboy"
>>> concat . Prelude.replicate 4 $ [ascii| "nyan" |]
"nyannyannyannyan"

Complexity: $\Theta(n)$

Since: 1.0.0

concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText Source #

Map a text-producing function over a text, then concatenate the results.

>>> concatMap singleton empty
""
>>> concatMap singleton [ascii| "nyan" |]
"nyan"
>>> concatMap (\c -> singleton c <> singleton c) [ascii| "nekomimi" |]
"nneekkoommiimmii"

Complexity: $\Theta(n)$

Since: 1.0.0

Construction

Scans

scanl Source #

Arguments

:: (AsciiChar -> AsciiChar -> AsciiChar)	accumulator -> element -> new accumulator
-> AsciiChar	Starting accumulator value
-> AsciiText	Input of length $n$
-> AsciiText	Output of length $n + 1$

scanl is similar to foldl, but returns a list of successive values from the left.

Complexity: $\Theta(n)$

Since: 1.0.0

scanr Source #

Arguments

:: (AsciiChar -> AsciiChar -> AsciiChar)	element -> accumulator -> new accumulator
-> AsciiChar	Starting accumulator value
-> AsciiText	Input of length $n$
-> AsciiText	Output of length $n + 1$

scanr is similar to foldr, but returns a list of successive values from the right.

Complexity: $\Theta(n)$

Since: 1.0.0

Accumulating maps

mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText) Source #

Like a combination of map and foldl'. Applies a function to each element of an AsciiText, passing an accumulating parameter from left to right, and returns a final AsciiText along with the accumulating parameter's final value.

Complexity: $\Theta(n)$

Since: 1.0.0

mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText) Source #

Like a combination of map and foldr. Applies a function to each element of an AsciiText, passing an accumulating parameter from right to left, and returns a final AsciiText along with the accumulating parameter's final value.

Complexity: $\Theta(n)$

Since: 1.0.0

Generation and unfolding

replicate :: Int -> AsciiText -> AsciiText Source #

replicate n t consists of t repeated $\max \{ 0, {\tt n } \}$ times.

>>> replicate (-100) [ascii| "nyan" |]
""
>>> replicate 0 [ascii| "nyan" |]
""
>>> replicate 3 [ascii| "nyan" |]
"nyannyannyan"

Complexity: $\Theta(n \cdot m)$

Since: 1.0.1

unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText Source #

Similar to unfoldr. The function parameter takes a seed value, and produces either Nothing (indicating that we're done) or Just an AsciiChar and a new seed value. unfoldr then, given a starting seed, will repeatedly call the function parameter on successive seed values, returning the resulting AsciiText, based on the AsciiChars produced, in the same order.

Complexity: $\Theta(n)$

Since: 1.0.0

unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a) Source #

Similar to unfoldr, but also takes a maximum length parameter. The second element of the result tuple will be Nothing if we finished with the function argument returning Nothing, and Just the final seed value if we reached the maximum length before that happened.

Complexity: $\Theta(n)$

Since: 1.0.0

Substrings

Breaking strings

take :: Int -> AsciiText -> AsciiText Source #

take n t returns the prefix of t with length $\min \{ \max \{ 0, {\tt n}\}, {\tt length} \; {\tt t} \}$.

>>> take (-100) [ascii| "catboy" |]
""
>>> take 0 [ascii| "catboy" |]
""
>>> take 4 [ascii| "catboy" |]
"catb"
>>> take 1000 [ascii| "catboy" |]
"catboy"

Complexity: $\Theta(1)$

Since: 1.0.0

takeEnd :: Int -> AsciiText -> AsciiText Source #

takeEnd n t returns the suffix of t with length $\min \{ \max \{0, {\tt n} \}, {\tt length} \; {\tt t} \}$.

>>> takeEnd (-100) [ascii| "catboy" |]
""
>>> takeEnd 0 [ascii| "catboy" |]
""
>>> takeEnd 4 [ascii| "catboy" |]
"tboy"
>>> takeEnd 1000 [ascii| "catboy" |]
"catboy"

/Complexity:] $\Theta(1)$

Since: 1.0.1

drop :: Int -> AsciiText -> AsciiText Source #

drop n t returns the suffix of t with length $\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}$.

>>> drop (-100) [ascii| "catboy" |]
"catboy"
>>> drop 0 [ascii| "catboy" |]
"catboy"
>>> drop 4 [ascii| "catboy" |]
"oy"
>>> drop 1000 [ascii| "catboy" |]
""

Complexity: $\Theta(1)$

Since: 1.0.0

dropEnd :: Int -> AsciiText -> AsciiText Source #

dropEnd n t returns the prefix of t with length $\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}$.

>>> dropEnd (-100) [ascii| "catboy" |]
"catboy"
>>> dropEnd 0 [ascii| "catboy" |]
"catboy"
>>> dropEnd 4 [ascii| "catboy" |]
"ca"
>>> dropEnd 1000 [ascii| "catboy" |]
""

Complexity: $\Theta(1)$

Since: 1.0.1

takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

takeWhile p t returns the longest prefix of t of characters that satisfy p.

>>> takeWhile ((Just Lower ==) . caseOf) empty
""
>>> takeWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"catboy"

Complexity: $\Theta(n)$

Since: 1.0.0

takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

takeWhileEnd p t returns the longest suffix of t of characters that satisfy p. Equivalent to reverse . takeWhile p . reverse.

>>> takeWhileEnd ((Just Lower ==) . caseOf) empty
""
>>> takeWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"nyan"

Complexity: $\Theta(n)$

Since: 1.0.0

dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

dropWhile p t returns the suffix remaining after takeWhile p t.

>>> dropWhile ((Just Lower ==) . caseOf) empty
""
>>> dropWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
" goes nyan"

Complexity: $\Theta(n)$

Since: 1.0.0

dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

dropWhileEnd p t returns the prefix remaining after takeWhileEnd p t. Equivalent to reverse . dropWhile p . reverse.

>>> dropWhileEnd ((Just Lower ==) . caseOf) empty
""
>>> dropWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"catboy goes "

Complexity: $\Theta(n)$

Since: 1.0.0

dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

dropAround p is equivalent to dropWhile p . dropWhileEnd p.

>>> dropAround ((Just Lower ==) . caseOf) empty
""
>>> dropAround ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
" goes "

Complexity: $\Theta(n)$

Since: 1.0.1

strip :: AsciiText -> AsciiText Source #

Remove the longest prefix and suffix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:

TAB (0x09)
LF (0x0a)
VT (0x0b)
FF (0x0c)
CR (0x0d)
Space (0x20)

>>> strip empty
""
>>> strip [ascii| "catboy goes nyan" |]
"catboy goes nyan"
>>> strip [ascii| "\n\n    \tcatboy goes nyan" |]
"catboy goes nyan"
>>> strip [ascii| "catboy goes nyan   \t\t\n" |]
"catboy goes nyan"
>>> strip [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
"catboy goes nyan"

Complexity: $\Theta(n)$

Since: 1.0.1

stripStart :: AsciiText -> AsciiText Source #

Remove the longest prefix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:

TAB (0x09)
LF (0x0a)
VT (0x0b)
FF (0x0c)
CR (0x0d)
Space (0x20)

>>> stripStart empty
""
>>> stripStart [ascii| "catboy goes nyan" |]
"catboy goes nyan"
>>> stripStart [ascii| "\n\n    \tcatboy goes nyan" |]
"catboy goes nyan"
>>> stripStart [ascii| "catboy goes nyan   \t\t\n" |]
"catboy goes nyan   \t\t\n"
>>> stripStart [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
"catboy goes nyan   \t\t\n"

Complexity: $\Theta(n)$

Since: 1.0.1

stripEnd :: AsciiText -> AsciiText Source #

Remove the longest suffix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:

TAB (0x09)
LF (0x0a)
VT (0x0b)
FF (0x0c)
CR (0x0d)
Space (0x20)

>>> stripEnd empty
""
>>> stripEnd [ascii| "catboy goes nyan" |]
"catboy goes nyan"
>>> stripEnd [ascii| "\n\n    \tcatboy goes nyan" |]
"\n\n    \tcatboy goes nyan"
>>> stripEnd [ascii| "catboy goes nyan   \t\t\n" |]
"catboy goes nyan"
>>> stripEnd [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
"\n\n    \tcatboy goes nyan"

Complexity: $\Theta(n)$

Since: 1.0.1

splitAt :: Int -> AsciiText -> (AsciiText, AsciiText) Source #

splitAt n t is equivalent to (take n t, drop n t).

>>> splitAt (-3) [ascii| "catboy" |]
("","catboy")
>>> splitAt 0 [ascii| "catboy" |]
("","catboy")
>>> splitAt 3 [ascii| "catboy" |]
("cat","boy")
>>> splitAt 1000 [ascii| "catboy" |]
("catboy","")

Complexity: $\Theta(1)$

Since: 1.0.0

breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText) Source #

breakOn needle haystack, given a needle of length $n$ and a haystack of length $h$, attempts to find the first instance of needle in haystack. If successful, return a tuple consisting of:

The prefix of haystack before the match; and
The rest of haystack, starting with the match.

If the needle is empty, this returns (empty, haystack). If no match can be found, this instead returns (haystack, empty).

If you need to repeatedly split on the same needle, consider breakOnAll, as this will be more efficient due to only having to run the matching algorithm once.

>>> breakOn empty [ascii| "catboy goes nyan" |]
("","catboy goes nyan")
>>> breakOn [ascii| "nyan" |] empty
("","")
>>> breakOn [ascii| "goes" |] [ascii| "catboy goes nyan" |]
("catboy ","goes nyan")
>>> breakOn [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
("nyan nyan nyan","")

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText) Source #

breakOnEnd needle haystack, given a needle of length $n$ and a haystack of length $h$, attempts to find the last instance of needle in haystack. If successful, return a tuple consisting of:

The prefix of haystack up to, and including, the match; and
The rest of haystack.

If the needle is empty, this returns (haystack, empty). If no match can be found, this instead returns (empty, haystack).

This function is similar to breakOn. If you need to repeatedly split on the same needle, consider breakOnAll, as this will be more efficient due to only having to run the matching algorithm once.

>>> breakOnEnd empty [ascii| "catboy goes nyan" |]
("catboy goes nyan","")
>>> breakOnEnd [ascii| "nyan" |] empty
("","")
>>> breakOnEnd [ascii| "goes" |] [ascii| "catboy goes nyan" |]
("catboy goes"," nyan")
>>> breakOnEnd [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
("","nyan nyan nyan")

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #

break p t is equivalent to (takeWhile (not p) t, dropWhile (not p) t).

>>> break ([char| ' ' |] ==) [ascii| "catboy goes nyan" |]
("catboy"," goes nyan")

Complexity: $\Theta(n)$

Since: 1.0.0

span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #

span p t is equivalent to (takeWhile p t, dropWhile p t).

>>> span ([char| 'c' |] ==) [ascii| "catboy goes nyan" |]
("c","atboy goes nyan")

Complexity: $\Theta(n)$

Since: 1.0.0

group :: AsciiText -> [AsciiText] Source #

Separate a text into a list of texts such that:

Their concatenation is equal to the original argument; and
Equal adjacent characters in the original argument are in the same text in the result.

This is a specialized form of groupBy, and is about 40% faster than groupBy ==.

>>> group empty
[]
>>> group . singleton $ [char| 'w' |]
["w"]
>>> group [ascii| "nyan" |]
["n","y","a","n"]
>>> group [ascii| "nyaaaan" |]
["n","y","aaaa","n"]

Complexity: $\Theta(n)$

Since: 1.0.0

groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText] Source #

Separate a text into a list of texts such that:

Their concatenation is equal to the original argument; and
Adjacent characters for which the function argument returns True are in the same text in the result.

group is a special case for the function argument ==; it is also about 40% faster.

>>> groupBy (<) empty
[]
>>> groupBy (<) . singleton $ [char| 'w' |]
["w"]
>>> groupBy (<) [ascii| "catboy goes nyan" |]
["c","atboy"," goes"," nyan"]

Complexity: $\Theta(n)$

Since: 1.0.0

inits :: AsciiText -> [AsciiText] Source #

All prefixes of the argument, from shortest to longest.

>>> inits empty
[""]
>>> inits . singleton $ [char| 'w' |]
["","w"]
>>> inits [ascii| "nyan" |]
["","n","ny","nya","nyan"]

Complexity: $\Theta(n)$

Since: 1.0.0

tails :: AsciiText -> [AsciiText] Source #

All suffixes of the argument, from shortest to longest.

>>> tails empty
[""]
>>> tails . singleton $ [char| 'w' |]
["w",""]
>>> tails [ascii| "nyan" |]
["nyan","yan","an","n",""]

Complexity: $\Theta(n)$

Since: 1.0.0

Breaking into many substrings

splitOn :: AsciiText -> AsciiText -> [AsciiText] Source #

splitOn needle haystack, given a needle of length $n$ and a haystack of length $h$, breaks haystack into pieces, separated by needle. Any occurrences of needle in haystack are consumed.

>>> splitOn empty [ascii| "catboy goes nyan and goes nyan" |]
["catboy goes nyan and goes nyan"]
>>> splitOn [ascii| "nyan" |] empty
[""]
>>> splitOn [ascii| "nyan" |] [ascii| "catboy goes nyan and goes nyan" |]
["catboy goes "," and goes ",""]
>>> splitOn [ascii| "nyan" |] [ascii| "nyan" |]
["",""]
>>> splitOn [ascii| "nyan" |] [ascii| "catboy" |]
["catboy"]

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText] Source #

split p t separates t into components delimited by separators, for which p returns True. The results do not contain the separators.

$n$ adjacent separators result in $n - 1$ empty components in the result.

>>> split ([char| '~' |] ==) empty
[]
>>> split ([char| '~' |] ==) . singleton $ [char| '~' |]
["",""]
>>> split ([char| '~' |] ==) [ascii| "nyan" |]
["nyan"]
>>> split ([char| '~' |] ==) [ascii| "~nyan" |]
["","nyan"]
>>> split ([char| '~' |] ==) [ascii| "nyan~" |]
["nyan",""]
>>> split ([char| '~' |] ==) [ascii| "nyan~nyan"|]
["nyan","nyan"]
>>> split ([char| '~' |] ==) [ascii| "nyan~~nyan" |]
["nyan","","nyan"]
>>> split ([char| '~' |] ==) [ascii| "nyan~~~nyan" |]
["nyan","","","nyan"]

Complexity: $\Theta(n)$

Since: 1.0.0

chunksOf :: Int -> AsciiText -> [AsciiText] Source #

Splits a text into chunks of the specified length. Equivalent to repeatedly takeing the specified length until exhaustion. The last item in the result may thus be shorter than requested.

For any n <= 0 and any t, chunksOf n t yields the empty list. This is identical to the behaviour of the function of the same name in the text package, although it doesn't document this fact.

>>> chunksOf (-100) [ascii| "I am a catboy" |]
[]
>>> chunksOf (-100) empty
[]
>>> chunksOf 0 [ascii| "I am a catboy" |]
[]
>>> chunksOf 0 empty
[]
>>> chunksOf 1 [ascii| "I am a catboy" |]
["I"," ","a","m"," ","a"," ","c","a","t","b","o","y"]
>>> chunksOf 1 empty
[]
>>> chunksOf 2 [ascii| "I am a catboy" |]
["I ","am"," a"," c","at","bo","y"]
>>> chunksOf 300 [ascii| "I am a catboy" |]
["I am a catboy"]

Complexity: $\Theta(n)$

Since: 1.0.1

Breaking into lines and words

lines :: AsciiText -> [AsciiText] Source #

Identical to the functions of the same name in the text package, and the Prelude. Specifically, separates the argument into pieces, with LF characters (0x0a) as separators. A single trailing LF is ignored. None of the final results contain LF.

We chose to follow the same semantics for this function as the text package and the Prelude. This has some consequences, which the documentation of both the text package and the Prelude does not properly explain. We list them here - bear these in mind when using this function, as well as unlines:

No platform-specific concept of a 'newline' is ever used by this function. Separation is done on LF, and only LF, regardless of platform. The documentation in both the text package and the Prelude confusingly refers to 'newline characters', which is a category error. We thus specify that LF is the character being split on, rather than mentioning 'newlines' in any way, shape or form.
unlines . lines is not the same as id. This is misleadingly described in the Prelude, which claims that (its version of) unlines is 'an inverse operation' to (its version of) lines. For a precise explanation of why this is the case, please see the documentation for unlines.
lines is not the same as split ([char| 'n' |] ==). See the doctests below for a demonstration of how they differ.

>>> lines empty
[]
>>> split ([char| '\n' |] ==) empty
[]
>>> lines [ascii| "catboy goes nyan" |]
["catboy goes nyan"]
>>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan" |]
["catboy goes nyan"]
>>> lines [ascii| "catboy goes nyan\n" |]
["catboy goes nyan"]
>>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan\n" |]
["catboy goes nyan",""]
>>> lines [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
["","catboy","","","goes","","nyan",""]
>>> split ([char| '\n' |] ==) [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
["","catboy","","","goes","","nyan","",""]
>>> lines [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
["\r","catboy\r","goes\r","nyan\r"]
>>> split ([char| '\n' |] ==) [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
["\r","catboy\r","goes\r","nyan\r",""]

Complexity: $\Theta(n)$

See also: Wikipedia on newlines

Since: 1.0.1

unlines :: Foldable f => f AsciiText -> AsciiText Source #

Identical to the functions of the same name in the text package and the Prelude. Specifically, appends an LF character to each of the texts, then concatenates. Equivalent to foldMap (`snoc' [char| '\n' |]).

We chose to follow the same semantics for this function as the text package and the Prelude. This has some consequences, which the documentation of both the text package and the Prelude does not properly explain. We list them here - bear these in mind when using this function, as well as lines:

No platform-specific concept of a 'newline' is ever used by this function. The documentation in both the text package and the Prelude confusing refer to appending a 'terminating newline', which is only a correct statement on platforms where a newline is LF. We thus specify that we append LF, rather than mentioning 'newlines' in any way, shape or form.
unlines . lines is not the same as id. This is misleadingly described in the Prelude, which claims that (its version of) unlines is 'an inverse operation' to (its version of) lines. See the doctests below for a demonstration of this.

>>> unlines []
""
>>> unlines [[ascii| "nyan" |]]
"nyan\n"
>>> unlines . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan\nnyan\nnyan\n"
>>> unlines . lines $ [ascii| "catboy goes nyan" |]
"catboy goes nyan\n"

Complexity: $\Theta(n)$

See also: Wikipedia on newlines

Since: 1.0.1

words :: AsciiText -> [AsciiText] Source #

Identical to the functions of the same name in the text package and the Prelude. Specifically, separates the argument into pieces, with (non-empty sequences of) word separator characters as separators. A 'word separator character' is any of the following:

TAB (0x09)
LF (0x0a)
VT (0x0b)
FF (0x0c)
CR (0x0d)
Space (0x20)

None of the final results contain any word separator characters. Any sequence of leading, or trailing, word separator characters will be ignored.

We chose to follow the same semantics for this function as the text package and the Prelude. This has the consequence that unwords . words is not the same as id, although the documentation for the Prelude confusingly describes (its version of) unwords as an 'inverse operation' to (its version of) words. See the documentation for unwords for an explanation of why this is the case.

>>> words empty
[]
>>> words [ascii| "catboy" |]
["catboy"]
>>> words [ascii| "  \r\r\r\rcatboy   \n\rgoes\t\t\t\t\tnyan\n  " |]
["catboy","goes","nyan"]

Complexity: $\Theta(n)$

Since: 1.0.1

unwords :: [AsciiText] -> AsciiText Source #

Identical to the functions of the same name in the text package and the Prelude. Specifically, links together adjacent texts with a Space character. Equivalent to intercalate [ascii| " " |].

We chose to follow the same semantics for this function as the text package and the Prelude. This has the consequence that unwords . words is not the same as id, although the documentation for the Prelude confusingly describes (its version of) unwords as an 'inverse operation' to (its version of) words. See the doctests below for a demonstration of this.

>>> unwords []
""
>>> unwords [[ascii| "nyan" |]]
"nyan"
>>> unwords . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan nyan nyan"
>>> unwords . words $ [ascii| "nyan\nnyan\nnyan" |]
"nyan nyan nyan"

Complexity: $\Theta(n)$

Since: 1.0.1

View patterns

stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText Source #

Return Just the suffix of the second text if it has the first text as a prefix, Nothing otherwise.

>>> stripPrefix [ascii| "catboy" |] empty
Nothing
>>> stripPrefix empty [ascii| "catboy" |]
Just "catboy"
>>> stripPrefix [ascii| "nyan" |] [ascii| "nyan" |]
Just ""
>>> stripPrefix [ascii| "nyan" |] [ascii| "catboy" |]
Nothing
>>> stripPrefix [ascii| "catboy" |] [ascii| "catboy goes nyan" |]
Just " goes nyan"

Complexity: $\Theta(n)$

Since: 1.0.0

stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText Source #

Return Just the prefix of the second text if it has the first text as a suffix, Nothing otherwise.

>>> stripSuffix [ascii| "catboy" |] empty
Nothing
>>> stripSuffix empty [ascii| "catboy" |]
Just "catboy"
>>> stripSuffix [ascii| "nyan" |] [ascii| "nyan" |]
Just ""
>>> stripSuffix [ascii| "nyan" |] [ascii| "catboy" |]
Nothing
>>> stripSuffix [ascii| "nyan" |] [ascii| "catboy goes nyan" |]
Just "catboy goes "

Complexity: $\Theta(n)$

Since: 1.0.0

stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText) Source #

stripInfix needle haystack, given a needle of length $n$ and a haystack of length $h$, attempts to find the first instance of needle in haystack. If successful, it returns Just the pair consisting of:

All the text in haystack before the first instance of needle; and
All the text in haystack after, but not including, the first instance of needle.

If there is no instance of needle in haystack, this returns Nothing.

>>> stripInfix [ascii| "catboy" |] empty
Nothing
>>> stripInfix empty [ascii| "nyan catboy nyan nyan" |]
Nothing
>>> stripInfix [ascii| "catboy" |] [ascii| "catboy" |]
Just ("","")
>>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy" |]
Just ("nyan ","")
>>> stripInfix [ascii| "catboy" |] [ascii| "catboy nyan" |]
Just (""," nyan")
>>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy nyan nyan" |]
Just ("nyan "," nyan nyan")
>>> stripInfix [ascii| "nyan" |] [ascii| "nyanyanyan" |]
Just ("","yanyan")

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText) Source #

Find the longest non-empty common prefix of the arguments and return it, along with the remaining suffixes of both arguments. If the arguments lack a common, non-empty prefix, returns Nothing.

>>> commonPrefixes empty [ascii| "catboy" |]
Nothing
>>> commonPrefixes [ascii| "catboy" |] empty
Nothing
>>> commonPrefixes [ascii| "catboy" |] [ascii| "nyan" |]
Nothing
>>> commonPrefixes [ascii| "catboy" |] [ascii| "catboy" |]
Just ("catboy","","")
>>> commonPrefixes [ascii| "nyan" |] [ascii| "nyan nyan" |]
Just ("nyan",""," nyan")

Complexity: $\Theta(n)$

Since: 1.0.1

Searching

filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

Return the text comprised of all the characters that satisfy the function argument (that is, for which it returns True), in the same order as in the original.

>>> filter ([char| 'n' |] ==) empty
""
>>> filter ([char| 'n' |] ==) [ascii| "catboy" |]
""
>>> filter ([char| 'n' |] ==) [ascii| "nyan" |]
"nn"

Complexity: $\Theta(n)$

Since: 1.0.0

breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)] Source #

breakOnAll needle haystack, given a needle of length $n$ and a haystack of length $h$, finds all non-overlapping instances of needle in haystack. Each result consists of the following elements:

The prefix prior to the match; and
The match, followed by the rest of the string.

If given an empty needle, the result is a singleton list containing a pair of the entire haystack and the empty text. If given an empty haystack, the result is an empty list.

>>> breakOnAll empty [ascii| "nyan nyan nyan" |]
[("nyan nyan nyan","")]
>>> breakOnAll [ascii| "nyan" |] empty
[]
>>> breakOnAll [ascii| "nyan" |] [ascii| "nyan" |]
[("","nyan")]
>>> breakOnAll [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
[("","nyan nyan nyan"),("nyan ","nyan nyan"),("nyan nyan ","nyan")]
>>> breakOnAll [ascii| "nyan" |] [ascii| "nyanyanyan" |]
[("","nyanyanyan"),("nyanya","nyan")]

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar Source #

Returns Just the first character in the text satisfying the predicate, Nothing otherwise.

>>> find ([char| 'n' |] ==) empty
Nothing
>>> find ([char| 'n' |] ==) [ascii| "catboy" |]
Nothing
>>> find ([char| 'n' |] ==) [ascii| "nyan" |]
Just '0x6e'
>>> find ([char| 'n' |] /=) [ascii| "nyan" |]
Just '0x79'

Complexity: $\Theta(n)$

Since: 1.0.0

partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #

partition p t is equivalent to (filter p t, filter (not p) t).

>>> partition ([char| 'n' |] ==) empty
("","")
>>> partition ([char| 'n' |] ==) . singleton $ [char| 'n' |]
("n","")
>>> partition ([char| 'n' |] ==) . singleton $ [char| 'w' |]
("","w")
>>> partition ([char| 'n' |] ==) [ascii| "nyan!" |]
("nn","ya!")

Complexity: $\Theta(n)$

Since: 1.0.0

Indexing

index :: AsciiText -> Int -> Maybe AsciiChar Source #

Retrieve the ASCII character at the given position in the text. Indexes begin from 0. If the index provided is invalid (that is, less than 0, equal to the length of the text, or greater), return Nothing; otherwise, return Just the character at that position.

>>> index [ascii| "nyan nyan nyan" |] (-100)
Nothing
>>> index [ascii| "nyan nyan nyan" |] 0
Just '0x6e'
>>> index [ascii| "nyan nyan nyan" |] 5
Just '0x6e'
>>> index [ascii| "nyan nyan nyan" |] 2000
Nothing

Complexity: $\Theta(1)$

Since: 1.0.1

findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int Source #

Returns Just the first index in the text such that the character at that index satisfies the predicate, Nothing otherwise.

>>> findIndex ([char| 'n' |] ==) empty
Nothing
>>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'n' |]
Just 0
>>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'w' |]
Nothing
>>> findIndex ([char| 'n' |] ==) [ascii| "nyan" |]
Just 0

Complexity: $\Theta(n)$

Since: 1.0.0

count :: AsciiText -> AsciiText -> Int Source #

count needle haystack, given a needle of length $n$ and a haystack of length $h$, counts the number of non-overlapping occurrences of needle in haystack. If needle is empty, the count will be 0.

>>> count empty [ascii| "nyan nyan nyan" |]
0
>>> count [ascii| "nyan" |] empty
0
>>> count [ascii| "nyan" |] [ascii| "nyan" |]
1
>>> count [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
3
>>> count [ascii| "nyan" |] [ascii| "nyanyanyan" |]
2

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

All ASCII symbols are equally likely to occur in both the needle and the haystack; and
The needle has length at least two; and
Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

The needle and/or haystack use few unique symbols (less than four is the worst); or
The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: $\Theta(h)$ average case, $\Theta(h \cdot n$) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Description and pseudocode
"Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.

Since: 1.0.1

Zipping

zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)] Source #

'Pair off' characters in both texts at corresponding indices. The result will be limited to the shorter of the two arguments.

>>> zip empty [ascii| "catboy" |]
[]
>>> zip [ascii| "catboy" |] empty
[]
>>> zip [ascii| "catboy" |] [ascii| "nyan" |]
[('0x63','0x6e'),('0x61','0x79'),('0x74','0x61'),('0x62','0x6e')]

Complexity: $\Theta(n)$

Since: 1.0.0

zipWith :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText Source #

Combine two texts together in lockstep to produce a new text, using the provided function to combine ASCII characters at each step. The length of the result will be the minimum of the lengths of the two text arguments.

>>> zipWith max [ascii| "I am a catboy" |] empty
""
>>> zipWith max empty [ascii| "I am a catboy" |]
""
>>> zipWith max [ascii| "I am a catboy" |] [ascii| "Nyan nyan nyan nyan nyan" |]
"Nyan nycntnyy"

Complexity: $\Theta(n)$

Since: 1.0.1

Conversions

fromText :: Text -> Maybe AsciiText Source #

Try and convert a Text into an AsciiText. Gives Nothing if the Text contains any symbols which lack an ASCII equivalent.

>>> fromText "catboy"
Just "catboy"
>>> fromText "😺😺😺😺😺"
Nothing

Complexity: $\Theta(n)$

Since: 1.0.0

fromByteString :: ByteString -> Maybe AsciiText Source #

Try and convert a ByteString into an AsciiText. Gives Nothing if the ByteString contains any bytes outside the ASCII range (that is, from 0 to 127 inclusive).

>>> fromByteString "catboy"
Just "catboy"
>>> fromByteString . BS.pack $ [128]
Nothing

Complexity: $\Theta(n)$

Since: 1.0.0

toText :: AsciiText -> Text Source #

Convert an AsciiText into a Text (by copying).

>>> toText empty
""
>>> toText . singleton $ [char| 'w' |]
"w"
>>> toText [ascii| "nyan" |]
"nyan"

Complexity: $\Theta(n)$

Since: 1.0.0

toByteString :: AsciiText -> ByteString Source #

Reinterpret an AsciiText as a ByteString (without copying).

>>> toByteString empty
""
>>> toByteString . singleton $ [char| 'w' |]
"w"
>>> toByteString [ascii| "nyan" |]
"nyan"

Complexity: $\Theta(1)$

Since: 1.0.0

Optics

textWise :: Prism' Text AsciiText Source #

A convenient demonstration of the relationship between toText and fromText.

>>> preview textWise "catboy goes nyan"
Just "catboy goes nyan"
>>> preview textWise "😺😺😺😺😺"
Nothing
>>> review textWise [ascii| "catboys are amazing" |]
"catboys are amazing"

Since: 1.0.0

byteStringWise :: Prism' ByteString AsciiText Source #

A convenient demonstration of the relationship between toByteString and fromByteString.

>>> preview byteStringWise "catboy goes nyan"
Just "catboy goes nyan"
>>> preview byteStringWise . BS.pack $ [0xff, 0xff]
Nothing
>>> review byteStringWise [ascii| "I love catboys" |]
"I love catboys"

Since: 1.0.0

packedChars :: Iso' [AsciiChar] AsciiText Source #

Pack (or unpack) a list of ASCII characters into a text.

>>> view packedChars [[char| 'n' |], [char| 'y' |], [char| 'a' |], [char| 'n' |]]
"nyan"
>>> review packedChars [ascii| "nyan" |]
['0x6e','0x79','0x61','0x6e']

Since: 1.0.1

chars :: IxTraversal' Int64 AsciiText AsciiChar Source #

Traverse the individual ASCII characters in a text.

>>> preview (elementOf chars 0) [ascii| "I am a catboy" |]
Just '0x49'
>>> preview (elementOf chars 100) [ascii| "I am a catboy" |]
Nothing
>>> iover chars (\i x -> bool x [char| 'w' |] . even $ i) [ascii| "I am a catboy" |]
"w wmwawcwtwow"

Since: 1.0.1

packedBytes :: Prism' [Word8] AsciiText Source #

Pack (or unpack) a list of bytes into a text. This isn't as capable as packedChars, as that would allow construction of invalid texts.

>>> preview packedBytes [0x6e, 0x79, 0x61, 0x6e]
Just "nyan"
>>> preview packedBytes [0xff, 0xfe]
Nothing
>>> review packedBytes [ascii| "nyan" |]
[110,121,97,110]

Since: 1.0.1

bytes :: IxFold Int64 AsciiText Word8 Source #

Access the individual bytes in a text. This isn't as capable as chars, as that would allow modifications of the bytes in ways that aren't valid as ASCII.

>>> itoListOf bytes [ascii| "I am a catboy" |]
[(0,73),(1,32),(2,97),(3,109),(4,32),(5,97),(6,32),(7,99),(8,97),(9,116),(10,98),(11,111),(12,121)]

Since: 1.0.1