text-ascii-1.0.1: ASCII string and character processing.

Copyright(C) 2021 Koz Ross
LicenseApache 2.0
MaintainerKoz Ross <koz.ross@retro-freedom.nz>
Stabilitystable
PortabilityGHC only
Safe HaskellTrustworthy
LanguageHaskell2010

Text.Ascii

Contents

Description

An implementation of ASCII strings.

This module is designed for qualified importing:

import qualified Text.Ascii as Ascii

See also: Wikipedia entry for ASCII

Synopsis

Type

data AsciiText Source #

A string of ASCII characters, represented as a packed byte array.

Since: 1.0.0

Instances
IsList AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

Associated Types

type Item AsciiText :: Type #

Eq AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

Ord AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

Show AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

Semigroup AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

Monoid AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

FoldCase AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

NFData AsciiText Source #

Since: 1.0.0

Instance details

Defined in Text.Ascii.Internal

Methods

rnf :: AsciiText -> () #

Stream AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

Associated Types

type Token AsciiText :: Type #

type Tokens AsciiText :: Type #

VisualStream AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

TraversableStream AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

Ixed AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

Associated Types

type IxKind AsciiText :: OpticKind #

Read (Unsafe AsciiText) Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Unsafe

type Item AsciiText Source # 
Instance details

Defined in Text.Ascii.Internal

type Tokens AsciiText Source # 
Instance details

Defined in Text.Ascii.Internal

type Token AsciiText Source # 
Instance details

Defined in Text.Ascii.Internal

type Index AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

type IxValue AsciiText Source #

Since: 1.0.1

Instance details

Defined in Text.Ascii.Internal

type IxKind AsciiText Source # 
Instance details

Defined in Text.Ascii.Internal

Creation

empty :: AsciiText Source #

The empty text.

>>> empty
""

Complexity: \(\Theta(1)\)

Since: 1.0.0

singleton :: AsciiChar -> AsciiText Source #

A text consisting of a single ASCII character.

>>> singleton [char| 'w' |]
"w"

Complexity: \(\Theta(1)\)

Since: 1.0.0

ascii :: QuasiQuoter Source #

Allows constructing ASCII strings from literals, whose correctness is checked by the compiler.

Currently accepts literal syntax similar to the Haskell parser, with escape sequences preceded by '\'. In particular, this includes the double quote (see the example below).

>>> [ascii| "\"Nyan!\", said the catboy." |]
"\"Nyan!\", said the catboy."

Since: 1.0.0

Basic interface

cons :: AsciiChar -> AsciiText -> AsciiText Source #

Adds a character to the front of a text. This requires copying, which gives its complexity.

>>> cons [char| 'n' |] [ascii| "eko" |]
"neko"

Complexity: \(\Theta(n)\)

Since: 1.0.0

snoc :: AsciiText -> AsciiChar -> AsciiText Source #

Adds a character to the back of a text. This requires copying, which gives its complexity.

>>> snoc [ascii| "nek" |] [char| 'o' |]
"neko"

Complexity: \(\Theta(n)\)

Since: 1.0.0

uncons :: AsciiText -> Maybe (AsciiChar, AsciiText) Source #

If the argument is non-empty, gives Just the first character and the rest, and Nothing otherwise.

>>> uncons empty
Nothing
>>> uncons . singleton $ [char| 'w' |]
Just ('0x77',"")
>>> uncons [ascii| "nekomimi" |]
Just ('0x6e',"ekomimi")

Complexity: \(\Theta(1)\)

Since: 1.0.0

unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar) Source #

If the argument is non-empty, gives Just the initial segment and the last character, and Nothing otherwise.

>>> unsnoc empty
Nothing
>>> unsnoc . singleton $ [char| 'w' |]
Just ("",'0x77')
>>> unsnoc [ascii| "catboy" |]
Just ("catbo",'0x79')

Complexity: \(\Theta(1)\)

Since: 1.0.0

length :: AsciiText -> Int Source #

The number of characters (and, since this is ASCII, bytes) in the text.

>>> length . singleton $ [char| 'w' |]
1
>>> length [ascii| "nyan nyan" |]
9

Complexity: \(\Theta(1)\)

Since: 1.0.0

Transformations

map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText Source #

Copy, and apply the function to each element of, the text.

>>> map (\c -> fromMaybe c . upcase $ c) [ascii| "nyan!" |]
"NYAN!"

Complexity: \(\Theta(n)\)

Since: 1.0.0

intercalate :: AsciiText -> [AsciiText] -> AsciiText Source #

Takes a text and a list of texts, and concatenates the list after interspersing the first argument between each element of the list.

>>> intercalate [ascii| " ~ " |] []
""
>>> intercalate [ascii| " ~ " |] [[ascii| "nyan" |]]
"nyan"
>>> intercalate [ascii| " ~ " |] . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan ~ nyan ~ nyan"
>>> intercalate empty . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyannyannyan"

Complexity: \(\Theta(n)\)

Since: 1.0.0

intersperse :: AsciiChar -> AsciiText -> AsciiText Source #

Takes a character, and places it between the characters of a text.

>>> intersperse [char| '~' |] empty
""
>>> intersperse [char| '~' |] . singleton $ [char| 'w' |]
"w"
>>> intersperse [char| '~' |] [ascii| "nyan" |]
"n~y~a~n"

Complexity: \(\Theta(n)\)

Since: 1.0.0

transpose :: [AsciiText] -> [AsciiText] Source #

Transpose the rows and columns of the argument. This uses transpose internally, and thus, isn't very efficient.

>>> transpose []
[]
>>> transpose [[ascii| "w" |]]
["w"]
>>> transpose [[ascii| "nyan" |]]
["n","y","a","n"]
>>> transpose . Prelude.replicate 3 $ [ascii| "nyan" |]
["nnn","yyy","aaa","nnn"]
>>> transpose [[ascii| "cat" |], [ascii| "boy" |], [ascii| "nyan" |]]
["cbn","aoy","tya","n"]

Complexity: \(\Theta(n)\)

Since: 1.0.0

reverse :: AsciiText -> AsciiText Source #

Reverse the text.

>>> reverse empty
""
>>> reverse . singleton $ [char| 'w' |]
"w"
>>> reverse [ascii| "catboy goes nyan" |]
"nayn seog yobtac"

Complexity: \(\Theta(n)\)

Since: 1.0.0

replace Source #

Arguments

:: AsciiText

needle to search for

-> AsciiText

replacement to replace needle with

-> AsciiText

haystack in which to search

-> AsciiText 

replace needle replacement haystack, given a needle of length \(n\) and a haystack of length \(h\), replaces each non-overlapping occurrence of needle in haystack with replacement. If the needle is empty, no replacement will be performed. Equivalent to intercalate replacement . splitOn needle $ haystack.

>>> replace empty [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
"catboy goes nyan nyan"
>>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] empty
""
>>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
"catboy goes NYAN~ NYAN~"
>>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "nyanyan" |]
"NYAN~yan"

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

The analysis below also doesn't factor in the cost of performing the replacement, as this is (among other things) proportional to the number of matches of the needle (and thus is hard to quantify).

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

Justification

justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText Source #

justifyLeft n c t produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\), consisting of a copy of t followed by (zero or more) copies of c.

>>> justifyLeft (-100) [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyLeft 4 [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyLeft 10 [char| '~' |] [ascii| "nyan" |]
"nyan~~~~~~"

Complexity: \(\Theta(n)\)

Since: 1.0.1

justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText Source #

justifyRight n c t produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\), consisting of (zero or more) copies of c followed by a copy of t.

>>> justifyRight (-100) [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyRight 4 [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> justifyRight 10 [char| '~' |] [ascii| "nyan" |]
"~~~~~~nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.1

center :: Int -> AsciiChar -> AsciiText -> AsciiText Source #

center n c t produces a result of length \({\tt k } = \max \{ {\tt n }, {\tt length} \; {\tt t} \}\), consisting of:

  • \(\lceil \frac{{\tt k} - {\tt length} \; {\tt t}}{2} \rceil\) copies of c; followed by
  • A copy of t; followed by
  • Zero or more copies of c

This means that the centering is 'left-biased'. This mimicks the behaviour of the function of the same name in the text package, although that function's documenation does not describe this behaviour.

>>> center (-100) [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> center 4 [char| '~' |] [ascii| "nyan" |]
"nyan"
>>> center 5 [char| '~' |] [ascii| "nyan" |]
"~nyan"
>>> center 6 [char| '~' |] [ascii| "nyan" |]
"~nyan~"

Complexity: \(\Theta(n)\)

Since: 1.0.1

Folds

foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a Source #

Left-associative fold of a text.

>>> foldl (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ac)a)t)b)o)y)"

Complexity: \(\Theta(n)\)

Since: 1.0.0

foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a Source #

Left-associative fold of a text, strict in the accumulator.

>>> foldl' (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ac)a)t)b)o)y)"

Complexity: \(\Theta(n)\)

Since: 1.0.0

foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a Source #

Right-associative fold of a text.

>>> foldr (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ay)o)b)t)a)c)"

Complexity: \(\Theta(n)\)

Since: 1.0.0

foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a Source #

Right-associative fold of a text, strict in the accumulator.

>>> foldr' (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ay)o)b)t)a)c)"

Complexity: \(\Theta(n)\)

Since: 1.0.0

Special folds

concat :: [AsciiText] -> AsciiText Source #

Concatenate a list of texts.

>>> concat []
""
>>> concat [[ascii| "catboy" |]]
"catboy"
>>> concat . Prelude.replicate 4 $ [ascii| "nyan" |]
"nyannyannyannyan"

Complexity: \(\Theta(n)\)

Since: 1.0.0

concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText Source #

Map a text-producing function over a text, then concatenate the results.

>>> concatMap singleton empty
""
>>> concatMap singleton [ascii| "nyan" |]
"nyan"
>>> concatMap (\c -> singleton c <> singleton c) [ascii| "nekomimi" |]
"nneekkoommiimmii"

Complexity: \(\Theta(n)\)

Since: 1.0.0

Construction

Scans

scanl Source #

Arguments

:: (AsciiChar -> AsciiChar -> AsciiChar)

accumulator -> element -> new accumulator

-> AsciiChar

Starting accumulator value

-> AsciiText

Input of length \(n\)

-> AsciiText

Output of length \(n + 1\)

scanl is similar to foldl, but returns a list of successive values from the left.

Complexity: \(\Theta(n)\)

Since: 1.0.0

scanr Source #

Arguments

:: (AsciiChar -> AsciiChar -> AsciiChar)

element -> accumulator -> new accumulator

-> AsciiChar

Starting accumulator value

-> AsciiText

Input of length \(n\)

-> AsciiText

Output of length \(n + 1\)

scanr is similar to foldr, but returns a list of successive values from the right.

Complexity: \(\Theta(n)\)

Since: 1.0.0

Accumulating maps

mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText) Source #

Like a combination of map and foldl'. Applies a function to each element of an AsciiText, passing an accumulating parameter from left to right, and returns a final AsciiText along with the accumulating parameter's final value.

Complexity: \(\Theta(n)\)

Since: 1.0.0

mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText) Source #

Like a combination of map and foldr. Applies a function to each element of an AsciiText, passing an accumulating parameter from right to left, and returns a final AsciiText along with the accumulating parameter's final value.

Complexity: \(\Theta(n)\)

Since: 1.0.0

Generation and unfolding

replicate :: Int -> AsciiText -> AsciiText Source #

replicate n t consists of t repeated \(\max \{ 0, {\tt n } \}\) times.

>>> replicate (-100) [ascii| "nyan" |]
""
>>> replicate 0 [ascii| "nyan" |]
""
>>> replicate 3 [ascii| "nyan" |]
"nyannyannyan"

Complexity: \(\Theta(n \cdot m)\)

Since: 1.0.1

unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText Source #

Similar to unfoldr. The function parameter takes a seed value, and produces either Nothing (indicating that we're done) or Just an AsciiChar and a new seed value. unfoldr then, given a starting seed, will repeatedly call the function parameter on successive seed values, returning the resulting AsciiText, based on the AsciiChars produced, in the same order.

Complexity: \(\Theta(n)\)

Since: 1.0.0

unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a) Source #

Similar to unfoldr, but also takes a maximum length parameter. The second element of the result tuple will be Nothing if we finished with the function argument returning Nothing, and Just the final seed value if we reached the maximum length before that happened.

Complexity: \(\Theta(n)\)

Since: 1.0.0

Substrings

Breaking strings

take :: Int -> AsciiText -> AsciiText Source #

take n t returns the prefix of t with length \(\min \{ \max \{ 0, {\tt n}\}, {\tt length} \; {\tt t} \}\).

>>> take (-100) [ascii| "catboy" |]
""
>>> take 0 [ascii| "catboy" |]
""
>>> take 4 [ascii| "catboy" |]
"catb"
>>> take 1000 [ascii| "catboy" |]
"catboy"

Complexity: \(\Theta(1)\)

Since: 1.0.0

takeEnd :: Int -> AsciiText -> AsciiText Source #

takeEnd n t returns the suffix of t with length \(\min \{ \max \{0, {\tt n} \}, {\tt length} \; {\tt t} \}\).

>>> takeEnd (-100) [ascii| "catboy" |]
""
>>> takeEnd 0 [ascii| "catboy" |]
""
>>> takeEnd 4 [ascii| "catboy" |]
"tboy"
>>> takeEnd 1000 [ascii| "catboy" |]
"catboy"

/Complexity:] \(\Theta(1)\)

Since: 1.0.1

drop :: Int -> AsciiText -> AsciiText Source #

drop n t returns the suffix of t with length \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).

>>> drop (-100) [ascii| "catboy" |]
"catboy"
>>> drop 0 [ascii| "catboy" |]
"catboy"
>>> drop 4 [ascii| "catboy" |]
"oy"
>>> drop 1000 [ascii| "catboy" |]
""

Complexity: \(\Theta(1)\)

Since: 1.0.0

dropEnd :: Int -> AsciiText -> AsciiText Source #

dropEnd n t returns the prefix of t with length \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).

>>> dropEnd (-100) [ascii| "catboy" |]
"catboy"
>>> dropEnd 0 [ascii| "catboy" |]
"catboy"
>>> dropEnd 4 [ascii| "catboy" |]
"ca"
>>> dropEnd 1000 [ascii| "catboy" |]
""

Complexity: \(\Theta(1)\)

Since: 1.0.1

takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

takeWhile p t returns the longest prefix of t of characters that satisfy p.

>>> takeWhile ((Just Lower ==) . caseOf) empty
""
>>> takeWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"catboy"

Complexity: \(\Theta(n)\)

Since: 1.0.0

takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

takeWhileEnd p t returns the longest suffix of t of characters that satisfy p. Equivalent to reverse . takeWhile p . reverse.

>>> takeWhileEnd ((Just Lower ==) . caseOf) empty
""
>>> takeWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.0

dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

dropWhile p t returns the suffix remaining after takeWhile p t.

>>> dropWhile ((Just Lower ==) . caseOf) empty
""
>>> dropWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
" goes nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.0

dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

dropWhileEnd p t returns the prefix remaining after takeWhileEnd p t. Equivalent to reverse . dropWhile p . reverse.

>>> dropWhileEnd ((Just Lower ==) . caseOf) empty
""
>>> dropWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"catboy goes "

Complexity: \(\Theta(n)\)

Since: 1.0.0

dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

dropAround p is equivalent to dropWhile p . dropWhileEnd p.

>>> dropAround ((Just Lower ==) . caseOf) empty
""
>>> dropAround ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
" goes "

Complexity: \(\Theta(n)\)

Since: 1.0.1

strip :: AsciiText -> AsciiText Source #

Remove the longest prefix and suffix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:

  • TAB (0x09)
  • LF (0x0a)
  • VT (0x0b)
  • FF (0x0c)
  • CR (0x0d)
  • Space (0x20)
>>> strip empty
""
>>> strip [ascii| "catboy goes nyan" |]
"catboy goes nyan"
>>> strip [ascii| "\n\n    \tcatboy goes nyan" |]
"catboy goes nyan"
>>> strip [ascii| "catboy goes nyan   \t\t\n" |]
"catboy goes nyan"
>>> strip [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
"catboy goes nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.1

stripStart :: AsciiText -> AsciiText Source #

Remove the longest prefix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:

  • TAB (0x09)
  • LF (0x0a)
  • VT (0x0b)
  • FF (0x0c)
  • CR (0x0d)
  • Space (0x20)
>>> stripStart empty
""
>>> stripStart [ascii| "catboy goes nyan" |]
"catboy goes nyan"
>>> stripStart [ascii| "\n\n    \tcatboy goes nyan" |]
"catboy goes nyan"
>>> stripStart [ascii| "catboy goes nyan   \t\t\n" |]
"catboy goes nyan   \t\t\n"
>>> stripStart [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
"catboy goes nyan   \t\t\n"

Complexity: \(\Theta(n)\)

Since: 1.0.1

stripEnd :: AsciiText -> AsciiText Source #

Remove the longest suffix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:

  • TAB (0x09)
  • LF (0x0a)
  • VT (0x0b)
  • FF (0x0c)
  • CR (0x0d)
  • Space (0x20)
>>> stripEnd empty
""
>>> stripEnd [ascii| "catboy goes nyan" |]
"catboy goes nyan"
>>> stripEnd [ascii| "\n\n    \tcatboy goes nyan" |]
"\n\n    \tcatboy goes nyan"
>>> stripEnd [ascii| "catboy goes nyan   \t\t\n" |]
"catboy goes nyan"
>>> stripEnd [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
"\n\n    \tcatboy goes nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.1

splitAt :: Int -> AsciiText -> (AsciiText, AsciiText) Source #

splitAt n t is equivalent to (take n t, drop n t).

>>> splitAt (-3) [ascii| "catboy" |]
("","catboy")
>>> splitAt 0 [ascii| "catboy" |]
("","catboy")
>>> splitAt 3 [ascii| "catboy" |]
("cat","boy")
>>> splitAt 1000 [ascii| "catboy" |]
("catboy","")

Complexity: \(\Theta(1)\)

Since: 1.0.0

breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText) Source #

breakOn needle haystack, given a needle of length \(n\) and a haystack of length \(h\), attempts to find the first instance of needle in haystack. If successful, return a tuple consisting of:

  • The prefix of haystack before the match; and
  • The rest of haystack, starting with the match.

If the needle is empty, this returns (empty, haystack). If no match can be found, this instead returns (haystack, empty).

If you need to repeatedly split on the same needle, consider breakOnAll, as this will be more efficient due to only having to run the matching algorithm once.

>>> breakOn empty [ascii| "catboy goes nyan" |]
("","catboy goes nyan")
>>> breakOn [ascii| "nyan" |] empty
("","")
>>> breakOn [ascii| "goes" |] [ascii| "catboy goes nyan" |]
("catboy ","goes nyan")
>>> breakOn [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
("nyan nyan nyan","")

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText) Source #

breakOnEnd needle haystack, given a needle of length \(n\) and a haystack of length \(h\), attempts to find the last instance of needle in haystack. If successful, return a tuple consisting of:

  • The prefix of haystack up to, and including, the match; and
  • The rest of haystack.

If the needle is empty, this returns (haystack, empty). If no match can be found, this instead returns (empty, haystack).

This function is similar to breakOn. If you need to repeatedly split on the same needle, consider breakOnAll, as this will be more efficient due to only having to run the matching algorithm once.

>>> breakOnEnd empty [ascii| "catboy goes nyan" |]
("catboy goes nyan","")
>>> breakOnEnd [ascii| "nyan" |] empty
("","")
>>> breakOnEnd [ascii| "goes" |] [ascii| "catboy goes nyan" |]
("catboy goes"," nyan")
>>> breakOnEnd [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
("","nyan nyan nyan")

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #

break p t is equivalent to (takeWhile (not p) t, dropWhile (not p) t).

>>> break ([char| ' ' |] ==) [ascii| "catboy goes nyan" |]
("catboy"," goes nyan")

Complexity: \(\Theta(n)\)

Since: 1.0.0

span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #

span p t is equivalent to (takeWhile p t, dropWhile p t).

>>> span ([char| 'c' |] ==) [ascii| "catboy goes nyan" |]
("c","atboy goes nyan")

Complexity: \(\Theta(n)\)

Since: 1.0.0

group :: AsciiText -> [AsciiText] Source #

Separate a text into a list of texts such that:

  • Their concatenation is equal to the original argument; and
  • Equal adjacent characters in the original argument are in the same text in the result.

This is a specialized form of groupBy, and is about 40% faster than groupBy ==.

>>> group empty
[]
>>> group . singleton $ [char| 'w' |]
["w"]
>>> group [ascii| "nyan" |]
["n","y","a","n"]
>>> group [ascii| "nyaaaan" |]
["n","y","aaaa","n"]

Complexity: \(\Theta(n)\)

Since: 1.0.0

groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText] Source #

Separate a text into a list of texts such that:

  • Their concatenation is equal to the original argument; and
  • Adjacent characters for which the function argument returns True are in the same text in the result.

group is a special case for the function argument ==; it is also about 40% faster.

>>> groupBy (<) empty
[]
>>> groupBy (<) . singleton $ [char| 'w' |]
["w"]
>>> groupBy (<) [ascii| "catboy goes nyan" |]
["c","atboy"," goes"," nyan"]

Complexity: \(\Theta(n)\)

Since: 1.0.0

inits :: AsciiText -> [AsciiText] Source #

All prefixes of the argument, from shortest to longest.

>>> inits empty
[""]
>>> inits . singleton $ [char| 'w' |]
["","w"]
>>> inits [ascii| "nyan" |]
["","n","ny","nya","nyan"]

Complexity: \(\Theta(n)\)

Since: 1.0.0

tails :: AsciiText -> [AsciiText] Source #

All suffixes of the argument, from shortest to longest.

>>> tails empty
[""]
>>> tails . singleton $ [char| 'w' |]
["w",""]
>>> tails [ascii| "nyan" |]
["nyan","yan","an","n",""]

Complexity: \(\Theta(n)\)

Since: 1.0.0

Breaking into many substrings

splitOn :: AsciiText -> AsciiText -> [AsciiText] Source #

splitOn needle haystack, given a needle of length \(n\) and a haystack of length \(h\), breaks haystack into pieces, separated by needle. Any occurrences of needle in haystack are consumed.

>>> splitOn empty [ascii| "catboy goes nyan and goes nyan" |]
["catboy goes nyan and goes nyan"]
>>> splitOn [ascii| "nyan" |] empty
[""]
>>> splitOn [ascii| "nyan" |] [ascii| "catboy goes nyan and goes nyan" |]
["catboy goes "," and goes ",""]
>>> splitOn [ascii| "nyan" |] [ascii| "nyan" |]
["",""]
>>> splitOn [ascii| "nyan" |] [ascii| "catboy" |]
["catboy"]

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText] Source #

split p t separates t into components delimited by separators, for which p returns True. The results do not contain the separators.

\(n\) adjacent separators result in \(n - 1\) empty components in the result.

>>> split ([char| '~' |] ==) empty
[]
>>> split ([char| '~' |] ==) . singleton $ [char| '~' |]
["",""]
>>> split ([char| '~' |] ==) [ascii| "nyan" |]
["nyan"]
>>> split ([char| '~' |] ==) [ascii| "~nyan" |]
["","nyan"]
>>> split ([char| '~' |] ==) [ascii| "nyan~" |]
["nyan",""]
>>> split ([char| '~' |] ==) [ascii| "nyan~nyan"|]
["nyan","nyan"]
>>> split ([char| '~' |] ==) [ascii| "nyan~~nyan" |]
["nyan","","nyan"]
>>> split ([char| '~' |] ==) [ascii| "nyan~~~nyan" |]
["nyan","","","nyan"]

Complexity: \(\Theta(n)\)

Since: 1.0.0

chunksOf :: Int -> AsciiText -> [AsciiText] Source #

Splits a text into chunks of the specified length. Equivalent to repeatedly takeing the specified length until exhaustion. The last item in the result may thus be shorter than requested.

For any n <= 0 and any t, chunksOf n t yields the empty list. This is identical to the behaviour of the function of the same name in the text package, although it doesn't document this fact.

>>> chunksOf (-100) [ascii| "I am a catboy" |]
[]
>>> chunksOf (-100) empty
[]
>>> chunksOf 0 [ascii| "I am a catboy" |]
[]
>>> chunksOf 0 empty
[]
>>> chunksOf 1 [ascii| "I am a catboy" |]
["I"," ","a","m"," ","a"," ","c","a","t","b","o","y"]
>>> chunksOf 1 empty
[]
>>> chunksOf 2 [ascii| "I am a catboy" |]
["I ","am"," a"," c","at","bo","y"]
>>> chunksOf 300 [ascii| "I am a catboy" |]
["I am a catboy"]

Complexity: \(\Theta(n)\)

Since: 1.0.1

Breaking into lines and words

lines :: AsciiText -> [AsciiText] Source #

Identical to the functions of the same name in the text package, and the Prelude. Specifically, separates the argument into pieces, with LF characters (0x0a) as separators. A single trailing LF is ignored. None of the final results contain LF.

We chose to follow the same semantics for this function as the text package and the Prelude. This has some consequences, which the documentation of both the text package and the Prelude does not properly explain. We list them here - bear these in mind when using this function, as well as unlines:

  • No platform-specific concept of a 'newline' is ever used by this function. Separation is done on LF, and only LF, regardless of platform. The documentation in both the text package and the Prelude confusingly refers to 'newline characters', which is a category error. We thus specify that LF is the character being split on, rather than mentioning 'newlines' in any way, shape or form.
  • unlines . lines is not the same as id. This is misleadingly described in the Prelude, which claims that (its version of) unlines is 'an inverse operation' to (its version of) lines. For a precise explanation of why this is the case, please see the documentation for unlines.
  • lines is not the same as split ([char| 'n' |] ==). See the doctests below for a demonstration of how they differ.
>>> lines empty
[]
>>> split ([char| '\n' |] ==) empty
[]
>>> lines [ascii| "catboy goes nyan" |]
["catboy goes nyan"]
>>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan" |]
["catboy goes nyan"]
>>> lines [ascii| "catboy goes nyan\n" |]
["catboy goes nyan"]
>>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan\n" |]
["catboy goes nyan",""]
>>> lines [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
["","catboy","","","goes","","nyan",""]
>>> split ([char| '\n' |] ==) [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
["","catboy","","","goes","","nyan","",""]
>>> lines [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
["\r","catboy\r","goes\r","nyan\r"]
>>> split ([char| '\n' |] ==) [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
["\r","catboy\r","goes\r","nyan\r",""]

Complexity: \(\Theta(n)\)

See also: Wikipedia on newlines

Since: 1.0.1

unlines :: Foldable f => f AsciiText -> AsciiText Source #

Identical to the functions of the same name in the text package and the Prelude. Specifically, appends an LF character to each of the texts, then concatenates. Equivalent to foldMap (`snoc' [char| '\n' |]).

We chose to follow the same semantics for this function as the text package and the Prelude. This has some consequences, which the documentation of both the text package and the Prelude does not properly explain. We list them here - bear these in mind when using this function, as well as lines:

  • No platform-specific concept of a 'newline' is ever used by this function. The documentation in both the text package and the Prelude confusing refer to appending a 'terminating newline', which is only a correct statement on platforms where a newline is LF. We thus specify that we append LF, rather than mentioning 'newlines' in any way, shape or form.
  • unlines . lines is not the same as id. This is misleadingly described in the Prelude, which claims that (its version of) unlines is 'an inverse operation' to (its version of) lines. See the doctests below for a demonstration of this.
>>> unlines []
""
>>> unlines [[ascii| "nyan" |]]
"nyan\n"
>>> unlines . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan\nnyan\nnyan\n"
>>> unlines . lines $ [ascii| "catboy goes nyan" |]
"catboy goes nyan\n"

Complexity: \(\Theta(n)\)

See also: Wikipedia on newlines

Since: 1.0.1

words :: AsciiText -> [AsciiText] Source #

Identical to the functions of the same name in the text package and the Prelude. Specifically, separates the argument into pieces, with (non-empty sequences of) word separator characters as separators. A 'word separator character' is any of the following:

  • TAB (0x09)
  • LF (0x0a)
  • VT (0x0b)
  • FF (0x0c)
  • CR (0x0d)
  • Space (0x20)

None of the final results contain any word separator characters. Any sequence of leading, or trailing, word separator characters will be ignored.

We chose to follow the same semantics for this function as the text package and the Prelude. This has the consequence that unwords . words is not the same as id, although the documentation for the Prelude confusingly describes (its version of) unwords as an 'inverse operation' to (its version of) words. See the documentation for unwords for an explanation of why this is the case.

>>> words empty
[]
>>> words [ascii| "catboy" |]
["catboy"]
>>> words [ascii| "  \r\r\r\rcatboy   \n\rgoes\t\t\t\t\tnyan\n  " |]
["catboy","goes","nyan"]

Complexity: \(\Theta(n)\)

Since: 1.0.1

unwords :: [AsciiText] -> AsciiText Source #

Identical to the functions of the same name in the text package and the Prelude. Specifically, links together adjacent texts with a Space character. Equivalent to intercalate [ascii| " " |].

We chose to follow the same semantics for this function as the text package and the Prelude. This has the consequence that unwords . words is not the same as id, although the documentation for the Prelude confusingly describes (its version of) unwords as an 'inverse operation' to (its version of) words. See the doctests below for a demonstration of this.

>>> unwords []
""
>>> unwords [[ascii| "nyan" |]]
"nyan"
>>> unwords . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan nyan nyan"
>>> unwords . words $ [ascii| "nyan\nnyan\nnyan" |]
"nyan nyan nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.1

View patterns

stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText Source #

Return Just the suffix of the second text if it has the first text as a prefix, Nothing otherwise.

>>> stripPrefix [ascii| "catboy" |] empty
Nothing
>>> stripPrefix empty [ascii| "catboy" |]
Just "catboy"
>>> stripPrefix [ascii| "nyan" |] [ascii| "nyan" |]
Just ""
>>> stripPrefix [ascii| "nyan" |] [ascii| "catboy" |]
Nothing
>>> stripPrefix [ascii| "catboy" |] [ascii| "catboy goes nyan" |]
Just " goes nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.0

stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText Source #

Return Just the prefix of the second text if it has the first text as a suffix, Nothing otherwise.

>>> stripSuffix [ascii| "catboy" |] empty
Nothing
>>> stripSuffix empty [ascii| "catboy" |]
Just "catboy"
>>> stripSuffix [ascii| "nyan" |] [ascii| "nyan" |]
Just ""
>>> stripSuffix [ascii| "nyan" |] [ascii| "catboy" |]
Nothing
>>> stripSuffix [ascii| "nyan" |] [ascii| "catboy goes nyan" |]
Just "catboy goes "

Complexity: \(\Theta(n)\)

Since: 1.0.0

stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText) Source #

stripInfix needle haystack, given a needle of length \(n\) and a haystack of length \(h\), attempts to find the first instance of needle in haystack. If successful, it returns Just the pair consisting of:

  • All the text in haystack before the first instance of needle; and
  • All the text in haystack after, but not including, the first instance of needle.

If there is no instance of needle in haystack, this returns Nothing.

>>> stripInfix [ascii| "catboy" |] empty
Nothing
>>> stripInfix empty [ascii| "nyan catboy nyan nyan" |]
Nothing
>>> stripInfix [ascii| "catboy" |] [ascii| "catboy" |]
Just ("","")
>>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy" |]
Just ("nyan ","")
>>> stripInfix [ascii| "catboy" |] [ascii| "catboy nyan" |]
Just (""," nyan")
>>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy nyan nyan" |]
Just ("nyan "," nyan nyan")
>>> stripInfix [ascii| "nyan" |] [ascii| "nyanyanyan" |]
Just ("","yanyan")

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText) Source #

Find the longest non-empty common prefix of the arguments and return it, along with the remaining suffixes of both arguments. If the arguments lack a common, non-empty prefix, returns Nothing.

>>> commonPrefixes empty [ascii| "catboy" |]
Nothing
>>> commonPrefixes [ascii| "catboy" |] empty
Nothing
>>> commonPrefixes [ascii| "catboy" |] [ascii| "nyan" |]
Nothing
>>> commonPrefixes [ascii| "catboy" |] [ascii| "catboy" |]
Just ("catboy","","")
>>> commonPrefixes [ascii| "nyan" |] [ascii| "nyan nyan" |]
Just ("nyan",""," nyan")

Complexity: \(\Theta(n)\)

Since: 1.0.1

Searching

filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #

Return the text comprised of all the characters that satisfy the function argument (that is, for which it returns True), in the same order as in the original.

>>> filter ([char| 'n' |] ==) empty
""
>>> filter ([char| 'n' |] ==) [ascii| "catboy" |]
""
>>> filter ([char| 'n' |] ==) [ascii| "nyan" |]
"nn"

Complexity: \(\Theta(n)\)

Since: 1.0.0

breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)] Source #

breakOnAll needle haystack, given a needle of length \(n\) and a haystack of length \(h\), finds all non-overlapping instances of needle in haystack. Each result consists of the following elements:

  • The prefix prior to the match; and
  • The match, followed by the rest of the string.

If given an empty needle, the result is a singleton list containing a pair of the entire haystack and the empty text. If given an empty haystack, the result is an empty list.

>>> breakOnAll empty [ascii| "nyan nyan nyan" |]
[("nyan nyan nyan","")]
>>> breakOnAll [ascii| "nyan" |] empty
[]
>>> breakOnAll [ascii| "nyan" |] [ascii| "nyan" |]
[("","nyan")]
>>> breakOnAll [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
[("","nyan nyan nyan"),("nyan ","nyan nyan"),("nyan nyan ","nyan")]
>>> breakOnAll [ascii| "nyan" |] [ascii| "nyanyanyan" |]
[("","nyanyanyan"),("nyanya","nyan")]

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar Source #

Returns Just the first character in the text satisfying the predicate, Nothing otherwise.

>>> find ([char| 'n' |] ==) empty
Nothing
>>> find ([char| 'n' |] ==) [ascii| "catboy" |]
Nothing
>>> find ([char| 'n' |] ==) [ascii| "nyan" |]
Just '0x6e'
>>> find ([char| 'n' |] /=) [ascii| "nyan" |]
Just '0x79'

Complexity: \(\Theta(n)\)

Since: 1.0.0

partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #

partition p t is equivalent to (filter p t, filter (not p) t).

>>> partition ([char| 'n' |] ==) empty
("","")
>>> partition ([char| 'n' |] ==) . singleton $ [char| 'n' |]
("n","")
>>> partition ([char| 'n' |] ==) . singleton $ [char| 'w' |]
("","w")
>>> partition ([char| 'n' |] ==) [ascii| "nyan!" |]
("nn","ya!")

Complexity: \(\Theta(n)\)

Since: 1.0.0

Indexing

index :: AsciiText -> Int -> Maybe AsciiChar Source #

Retrieve the ASCII character at the given position in the text. Indexes begin from 0. If the index provided is invalid (that is, less than 0, equal to the length of the text, or greater), return Nothing; otherwise, return Just the character at that position.

>>> index [ascii| "nyan nyan nyan" |] (-100)
Nothing
>>> index [ascii| "nyan nyan nyan" |] 0
Just '0x6e'
>>> index [ascii| "nyan nyan nyan" |] 5
Just '0x6e'
>>> index [ascii| "nyan nyan nyan" |] 2000
Nothing

Complexity: \(\Theta(1)\)

Since: 1.0.1

findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int Source #

Returns Just the first index in the text such that the character at that index satisfies the predicate, Nothing otherwise.

>>> findIndex ([char| 'n' |] ==) empty
Nothing
>>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'n' |]
Just 0
>>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'w' |]
Nothing
>>> findIndex ([char| 'n' |] ==) [ascii| "nyan" |]
Just 0

Complexity: \(\Theta(n)\)

Since: 1.0.0

count :: AsciiText -> AsciiText -> Int Source #

count needle haystack, given a needle of length \(n\) and a haystack of length \(h\), counts the number of non-overlapping occurrences of needle in haystack. If needle is empty, the count will be 0.

>>> count empty [ascii| "nyan nyan nyan" |]
0
>>> count [ascii| "nyan" |] empty
0
>>> count [ascii| "nyan" |] [ascii| "nyan" |]
1
>>> count [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
3
>>> count [ascii| "nyan" |] [ascii| "nyanyanyan" |]
2

On complexity

This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:

  • All ASCII symbols are equally likely to occur in both the needle and the haystack; and
  • The needle has length at least two; and
  • Both the needle and the haystack contain at least four unique symbols.

We fall back to split for singleton needles, and there is no work to be done on empty needles, which means the second assumption always holds.

Worst-case behaviour becomes more likely the more your input satisfies the following conditions:

  • The needle and/or haystack use few unique symbols (less than four is the worst); or
  • The haystack contains many instances of the second symbol of the needle which don't lead to full matches.

Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.

See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.

Since: 1.0.1

Zipping

zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)] Source #

'Pair off' characters in both texts at corresponding indices. The result will be limited to the shorter of the two arguments.

>>> zip empty [ascii| "catboy" |]
[]
>>> zip [ascii| "catboy" |] empty
[]
>>> zip [ascii| "catboy" |] [ascii| "nyan" |]
[('0x63','0x6e'),('0x61','0x79'),('0x74','0x61'),('0x62','0x6e')]

Complexity: \(\Theta(n)\)

Since: 1.0.0

zipWith :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText Source #

Combine two texts together in lockstep to produce a new text, using the provided function to combine ASCII characters at each step. The length of the result will be the minimum of the lengths of the two text arguments.

>>> zipWith max [ascii| "I am a catboy" |] empty
""
>>> zipWith max empty [ascii| "I am a catboy" |]
""
>>> zipWith max [ascii| "I am a catboy" |] [ascii| "Nyan nyan nyan nyan nyan" |]
"Nyan nycntnyy"

Complexity: \(\Theta(n)\)

Since: 1.0.1

Conversions

fromText :: Text -> Maybe AsciiText Source #

Try and convert a Text into an AsciiText. Gives Nothing if the Text contains any symbols which lack an ASCII equivalent.

>>> fromText "catboy"
Just "catboy"
>>> fromText "😺😺😺😺😺"
Nothing

Complexity: \(\Theta(n)\)

Since: 1.0.0

fromByteString :: ByteString -> Maybe AsciiText Source #

Try and convert a ByteString into an AsciiText. Gives Nothing if the ByteString contains any bytes outside the ASCII range (that is, from 0 to 127 inclusive).

>>> fromByteString "catboy"
Just "catboy"
>>> fromByteString . BS.pack $ [128]
Nothing

Complexity: \(\Theta(n)\)

Since: 1.0.0

toText :: AsciiText -> Text Source #

Convert an AsciiText into a Text (by copying).

>>> toText empty
""
>>> toText . singleton $ [char| 'w' |]
"w"
>>> toText [ascii| "nyan" |]
"nyan"

Complexity: \(\Theta(n)\)

Since: 1.0.0

toByteString :: AsciiText -> ByteString Source #

Reinterpret an AsciiText as a ByteString (without copying).

>>> toByteString empty
""
>>> toByteString . singleton $ [char| 'w' |]
"w"
>>> toByteString [ascii| "nyan" |]
"nyan"

Complexity: \(\Theta(1)\)

Since: 1.0.0

Optics

textWise :: Prism' Text AsciiText Source #

A convenient demonstration of the relationship between toText and fromText.

>>> preview textWise "catboy goes nyan"
Just "catboy goes nyan"
>>> preview textWise "😺😺😺😺😺"
Nothing
>>> review textWise [ascii| "catboys are amazing" |]
"catboys are amazing"

Since: 1.0.0

byteStringWise :: Prism' ByteString AsciiText Source #

A convenient demonstration of the relationship between toByteString and fromByteString.

>>> preview byteStringWise "catboy goes nyan"
Just "catboy goes nyan"
>>> preview byteStringWise . BS.pack $ [0xff, 0xff]
Nothing
>>> review byteStringWise [ascii| "I love catboys" |]
"I love catboys"

Since: 1.0.0

packedChars :: Iso' [AsciiChar] AsciiText Source #

Pack (or unpack) a list of ASCII characters into a text.

>>> view packedChars [[char| 'n' |], [char| 'y' |], [char| 'a' |], [char| 'n' |]]
"nyan"
>>> review packedChars [ascii| "nyan" |]
['0x6e','0x79','0x61','0x6e']

Since: 1.0.1

chars :: IxTraversal' Int64 AsciiText AsciiChar Source #

Traverse the individual ASCII characters in a text.

>>> preview (elementOf chars 0) [ascii| "I am a catboy" |]
Just '0x49'
>>> preview (elementOf chars 100) [ascii| "I am a catboy" |]
Nothing
>>> iover chars (\i x -> bool x [char| 'w' |] . even $ i) [ascii| "I am a catboy" |]
"w wmwawcwtwow"

Since: 1.0.1

packedBytes :: Prism' [Word8] AsciiText Source #

Pack (or unpack) a list of bytes into a text. This isn't as capable as packedChars, as that would allow construction of invalid texts.

>>> preview packedBytes [0x6e, 0x79, 0x61, 0x6e]
Just "nyan"
>>> preview packedBytes [0xff, 0xfe]
Nothing
>>> review packedBytes [ascii| "nyan" |]
[110,121,97,110]

Since: 1.0.1

bytes :: IxFold Int64 AsciiText Word8 Source #

Access the individual bytes in a text. This isn't as capable as chars, as that would allow modifications of the bytes in ways that aren't valid as ASCII.

>>> itoListOf bytes [ascii| "I am a catboy" |]
[(0,73),(1,32),(2,97),(3,109),(4,32),(5,97),(6,32),(7,99),(8,97),(9,116),(10,98),(11,111),(12,121)]

Since: 1.0.1