License	BSD-style
Maintainer	Vincent Hanquez <vincent@snarc.org>
Stability	experimental
Portability	portable
Safe Haskell	None
Language	Haskell2010

Basement.String

Contents

Binary conversion
Legacy utility

Description

A String type backed by a UTF8 encoded byte array and all the necessary functions to manipulate the string.

You can think of String as a specialization of a byte array that have element of type Char.

The String data must contain UTF8 valid data.

Synopsis

Documentation

newtype String Source #

Opaque packed array of characters in the UTF8 encoding

Constructors

String (UArray Word8)

Instances

IsList String Source #
Associated Types type Item String :: * # Methods fromList :: [Item String] -> String # fromListN :: Int -> [Item String] -> String # toList :: String -> [Item String] #
Eq String Source #
Methods (==) :: String -> String -> Bool # (/=) :: String -> String -> Bool #
Data String Source #
Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> String -> c String # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c String # toConstr :: String -> Constr # dataTypeOf :: String -> DataType # dataCast1 :: Typeable (* -> ) t => (forall d. Data d => c (t d)) -> Maybe (c String) # dataCast2 :: Typeable ( -> * -> *) t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c String) # gmapT :: (forall b. Data b => b -> b) -> String -> String # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQ :: (forall d. Data d => d -> u) -> String -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> String -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> String -> m String # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String #
Ord String Source #
Methods compare :: String -> String -> Ordering # (<) :: String -> String -> Bool # (<=) :: String -> String -> Bool # (>) :: String -> String -> Bool # (>=) :: String -> String -> Bool # max :: String -> String -> String # min :: String -> String -> String #
Show String Source #
Methods showsPrec :: Int -> String -> ShowS # show :: String -> String # showList :: [String] -> ShowS #
IsString String Source #
Methods fromString :: String -> String #
Monoid String Source #
Methods mempty :: String # mappend :: String -> String -> String # mconcat :: [String] -> String #
NormalForm String Source #
Methods toNormalForm :: String -> () Source #
type Item String Source #
type Item String = Char

newtype MutableString st Source #

Mutable String Buffer.

Use as an *append* buffer, as UTF8 variable encoding doesn't really allow to change previously written character without potentially shifting bytes.

Constructors

MutableString (MUArray Word8 st)

create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String Source #

Unsafely create a string of up to sz bytes.

The callback f needs to return the number of bytes filled in the underlaying bytes buffer. No check is made on the callback return values, and if it's not contained without the bounds, bad things will happen.

replicate :: CountOf Char -> Char -> String Source #

Replicate a character c n times to create a string of length n

length :: String -> CountOf Char Source #

Length of a String using CountOf

this size is available in o(n)

Binary conversion

data Encoding Source #

Various String Encoding that can be use to convert to and from bytes

Constructors

ASCII7
UTF8
UTF16
UTF32
ISO_8859_1

Instances

Bounded Encoding Source #
Methods minBound :: Encoding # maxBound :: Encoding #
Enum Encoding Source #
Methods succ :: Encoding -> Encoding # pred :: Encoding -> Encoding # toEnum :: Int -> Encoding # fromEnum :: Encoding -> Int # enumFrom :: Encoding -> [Encoding] # enumFromThen :: Encoding -> Encoding -> [Encoding] # enumFromTo :: Encoding -> Encoding -> [Encoding] # enumFromThenTo :: Encoding -> Encoding -> Encoding -> [Encoding] #
Eq Encoding Source #
Methods (==) :: Encoding -> Encoding -> Bool # (/=) :: Encoding -> Encoding -> Bool #
Data Encoding Source #
Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Encoding -> c Encoding # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Encoding # toConstr :: Encoding -> Constr # dataTypeOf :: Encoding -> DataType # dataCast1 :: Typeable (* -> ) t => (forall d. Data d => c (t d)) -> Maybe (c Encoding) # dataCast2 :: Typeable ( -> * -> *) t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding) # gmapT :: (forall b. Data b => b -> b) -> Encoding -> Encoding # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQ :: (forall d. Data d => d -> u) -> Encoding -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Encoding -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding #
Ord Encoding Source #
Methods compare :: Encoding -> Encoding -> Ordering # (<) :: Encoding -> Encoding -> Bool # (<=) :: Encoding -> Encoding -> Bool # (>) :: Encoding -> Encoding -> Bool # (>=) :: Encoding -> Encoding -> Bool # max :: Encoding -> Encoding -> Encoding # min :: Encoding -> Encoding -> Encoding #
Show Encoding Source #
Methods showsPrec :: Int -> Encoding -> ShowS # show :: Encoding -> String # showList :: [Encoding] -> ShowS #

fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8) Source #

Convert a ByteArray to a string assuming a specific encoding.

It returns a 3-tuple of:

The string that has been succesfully converted without any error
An optional validation error
The remaining buffer that hasn't been processed (either as a result of an error, or because the encoded sequence is not fully available)

Considering a stream of data that is fetched chunk by chunk, it's valid to assume that some sequence might fall in a chunk boundary. When converting chunks, if the error is Nothing and the remaining buffer is not empty, then this buffer need to be prepended to the next chunk

fromChunkBytes :: [UArray Word8] -> [String] Source #

Decode a stream of binary chunks containing UTF8 encoding in a list of valid String

Chunk not necessarily contains a valid string, as a UTF8 sequence could be split over 2 chunks.

fromBytesUnsafe :: UArray Word8 -> String Source #

Convert a Byte Array representing UTF8 data directly to a string without checking for UTF8 validity

If the input contains invalid sequences, it will trigger runtime async errors when processing data.

In doubt, use fromBytes

fromBytesLenient :: UArray Word8 -> (String, UArray Word8) Source #

Convert a UTF8 array of bytes to a String.

If there's any error in the stream, it will automatically insert replacement bytes to replace invalid sequences.

In the case of sequence that fall in the middle of 2 chunks, the remaining buffer is supposed to be preprended to the next chunk, and resume the parsing.

toBytes :: Encoding -> String -> UArray Word8 Source #

Convert a String to a bytearray in a specific encoding

if the encoding is UTF8, the underlying buffer is returned without extra allocation or any processing

In any other encoding, some allocation and processing are done to convert.

mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure) Source #

Similar to validate but works on a MutableByteArray

copy :: String -> String Source #

Copy the String

The slice of memory is copied to a new slice, making the new string independent from the original string..

data ValidationFailure Source #

Possible failure related to validating bytes of UTF8 sequences.

Constructors

InvalidHeader
InvalidContinuation
MissingByte
BuildingFailure

Instances

Eq ValidationFailure Source #
Methods (==) :: ValidationFailure -> ValidationFailure -> Bool # (/=) :: ValidationFailure -> ValidationFailure -> Bool #
Show ValidationFailure Source #
Methods showsPrec :: Int -> ValidationFailure -> ShowS # show :: ValidationFailure -> String # showList :: [ValidationFailure] -> ShowS #
Exception ValidationFailure Source #
Methods toException :: ValidationFailure -> SomeException # fromException :: SomeException -> Maybe ValidationFailure # displayException :: ValidationFailure -> String #

index :: String -> Offset Char -> Maybe Char Source #

Return the nth character in a String

Compared to an array, the string need to be scanned from the beginning since the UTF8 encoding is variable.

null :: String -> Bool Source #

Check if a String is null

drop :: CountOf Char -> String -> String Source #

Create a string with the remaining Chars after dropping @n Chars from the beginning

take :: CountOf Char -> String -> String Source #

Create a string composed of a number @n of Chars (Unicode code points).

if the input @s contains less characters than required, then the input string is returned.

splitAt :: CountOf Char -> String -> (String, String) Source #

Split a string at the Offset specified (in Char) returning both the leading part and the remaining part.

revDrop :: CountOf Char -> String -> String Source #

Similar to drop but from the end

revTake :: CountOf Char -> String -> String Source #

Similar to take but from the end

revSplitAt :: CountOf Char -> String -> (String, String) Source #

Similar to splitAt but from the end

splitOn :: (Char -> Bool) -> String -> [String] Source #

Split on the input string using the predicate as separator

e.g.

splitOn (== ',') ","          == ["",""]
splitOn (== ',') ",abc,"      == ["","abc",""]
splitOn (== ':') "abc"        == ["abc"]
splitOn (== ':') "abc::def"   == ["abc","","def"]
splitOn (== ':') "::abc::def" == ["","","abc","","def"]

sub :: String -> Offset8 -> Offset8 -> String Source #

Internal call to make a substring given offset in bytes.

This is unsafe considering that one can create a substring starting and/or ending on the middle of a UTF8 sequence.

elem :: Char -> String -> Bool Source #

Return whereas the string contains a specific character or not

indices :: String -> String -> [Offset8] Source #

intersperse :: Char -> String -> String Source #

Intersperse the character sep between each character in the string

intersperse ' ' "Hello Foundation"

"H e l l o F o u n d a t i o n"

span :: (Char -> Bool) -> String -> (String, String) Source #

Apply a predicate to the string to return the longest prefix that satisfy the predicate and the remaining

break :: (Char -> Bool) -> String -> (String, String) Source #

Break a string into 2 strings at the location where the predicate return True

breakElem :: Char -> String -> (String, String) Source #

Break a string into 2 strings at the first occurence of the character

breakLine :: String -> Either Bool (String, String) Source #

Same as break but cut on a line feed with an optional carriage return.

This is the same operation as 'breakElem LF' dropping the last character of the string if it's a CR.

Also for efficiency reason (streaming), it returns if the last character was a CR character.

dropWhile :: (Char -> Bool) -> String -> String Source #

Drop character from the beginning while the predicate is true

singleton :: Char -> String Source #

Create a single element String

charMap :: (Char -> Char) -> String -> String Source #

Monomorphically map the character in a string and return the transformed one

snoc :: String -> Char -> String Source #

Append a Char to the end of the String and return this new String

cons :: Char -> String -> String Source #

Prepend a Char to the beginning of the String and return this new String

unsnoc :: String -> Maybe (String, Char) Source #

Extract the String stripped of the last character and the last character if not empty

If empty, Nothing is returned

uncons :: String -> Maybe (Char, String) Source #

Extract the First character of a string, and the String stripped of the first character.

If empty, Nothing is returned

find :: (Char -> Bool) -> String -> Maybe Char Source #

Look for a predicate in the String and return the matched character, if any.

findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char) Source #

Return the index in unit of Char of the first occurence of the predicate returning True

If not found, Nothing is returned

sortBy :: (Char -> Char -> Ordering) -> String -> String Source #

Sort the character in a String using a specific sort function

TODO: optimise not going through a list

filter :: (Char -> Bool) -> String -> String Source #

Filter characters of a string using the predicate

reverse :: String -> String Source #

Reverse a string

replace :: String -> String -> String -> String Source #

Replace all the occurrencies of needle with replacement in the haystack string.

builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err () Source #

Append a character to a String builder

builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String) Source #

Create a new String builder using chunks of sizeChunksI

builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String Source #

readInteger :: String -> Maybe Integer Source #

readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i Source #

Read an Integer from a String

Consume an optional minus sign and many digits until end of string.

readNatural :: String -> Maybe Natural Source #

Read a Natural from a String

Consume many digits until end of string.

readDouble :: String -> Maybe Double Source #

Try to read a Double

readRational :: String -> Maybe Rational Source #

Try to read a floating number as a Rational

Note that for safety reason, only exponent between -10000 and 10000 is allowed as otherwise DoS/OOM is very likely. if you don't want this behavior, switching to a scientific type (not provided yet) that represent the exponent separately is the advised solution.

readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a Source #

Read an Floating like number of the form:

-: numbers [ . numbers ] [ ( e | E ) [ - ] number ]

Call a function with:

A boolean representing if the number is negative
The digits part represented as a single natural number (123.456 is represented as 123456)
The number of digits in the fractional part (e.g. 123.456 => 3)
The exponent if any

The code is structured as a simple state machine that:

Optionally Consume a - sign
Consume number for the integral part
Optionally
Consume .
Consume remaining digits if not already end of string
Optionally Consume a e or E follow by an optional - and a number

upper :: String -> String Source #

Convert a String to the upper-case equivalent. Does not properly support multicharacter Unicode conversions.

lower :: String -> String Source #

Convert a String to the upper-case equivalent. Does not properly support multicharacter Unicode conversions.

isPrefixOf :: String -> String -> Bool Source #

Check whether the first string is a prefix of the second string.

isSuffixOf :: String -> String -> Bool Source #

Check whether the first string is a suffix of the second string.

isInfixOf :: String -> String -> Bool Source #

Check whether the first string is contains within the second string.

TODO: implemented the naive way and thus terribly inefficient, reimplement properly

stripPrefix :: String -> String -> Maybe String Source #

Try to strip a prefix from the start of a String.

If the prefix is not starting the string, then Nothing is returned, otherwise the striped string is returned

stripSuffix :: String -> String -> Maybe String Source #

Try to strip a suffix from the end of a String.

If the suffix is not ending the string, then Nothing is returned, otherwise the striped string is returned

all :: (Char -> Bool) -> String -> Bool Source #

any :: (Char -> Bool) -> String -> Bool Source #

Legacy utility

lines :: String -> [String] Source #

Split lines in a string using newline as separation.

Note that carriage return preceding a newline are also strip for maximum compatibility between Windows and Unix system.

words :: String -> [String] Source #

Split words in a string using spaces as separation

words "Hello Foundation"

Hello, Foundation

toBase64 :: String -> String Source #

Transform string src to base64 binary representation.

toBase64URL :: Bool -> String -> String Source #

Transform string src to URL-safe base64 binary representation. The result will be either padded or unpadded, depending on the boolean padded argument.

toBase64OpenBSD :: String -> String Source #

Transform string src to OpenBSD base64 binary representation.