rio-0.1.2.0: A standard library for Haskell

Safe HaskellSafe
LanguageHaskell2010

RIO.Char

Contents

Description

Unicode Char. Import as:

import qualified RIO.Char as C

Synopsis

Documentation

data Char :: * #

The character type Char is an enumeration whose values represent Unicode (or equivalently ISO/IEC 10646) characters (see http://www.unicode.org/ for details). This set extends the ISO 8859-1 (Latin-1) character set (the first 256 characters), which is itself an extension of the ASCII character set (the first 128 characters). A character literal in Haskell has type Char.

To convert a Char to or from the corresponding Int value defined by Unicode, use toEnum and fromEnum from the Enum class respectively (or equivalently ord and chr).

Instances

Bounded Char

Since: 2.1

Enum Char

Since: 2.1

Methods

succ :: Char -> Char #

pred :: Char -> Char #

toEnum :: Int -> Char #

fromEnum :: Char -> Int #

enumFrom :: Char -> [Char] #

enumFromThen :: Char -> Char -> [Char] #

enumFromTo :: Char -> Char -> [Char] #

enumFromThenTo :: Char -> Char -> Char -> [Char] #

Eq Char 

Methods

(==) :: Char -> Char -> Bool #

(/=) :: Char -> Char -> Bool #

Data Char

Since: 4.0.0.0

Methods

gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Char -> c Char #

gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Char #

toConstr :: Char -> Constr #

dataTypeOf :: Char -> DataType #

dataCast1 :: Typeable (* -> *) t => (forall d. Data d => c (t d)) -> Maybe (c Char) #

dataCast2 :: Typeable (* -> * -> *) t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Char) #

gmapT :: (forall b. Data b => b -> b) -> Char -> Char #

gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Char -> r #

gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Char -> r #

gmapQ :: (forall d. Data d => d -> u) -> Char -> [u] #

gmapQi :: Int -> (forall d. Data d => d -> u) -> Char -> u #

gmapM :: Monad m => (forall d. Data d => d -> m d) -> Char -> m Char #

gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Char -> m Char #

gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Char -> m Char #

Ord Char 

Methods

compare :: Char -> Char -> Ordering #

(<) :: Char -> Char -> Bool #

(<=) :: Char -> Char -> Bool #

(>) :: Char -> Char -> Bool #

(>=) :: Char -> Char -> Bool #

max :: Char -> Char -> Char #

min :: Char -> Char -> Char #

Read Char

Since: 2.1

Show Char

Since: 2.1

Methods

showsPrec :: Int -> Char -> ShowS #

show :: Char -> String #

showList :: [Char] -> ShowS #

Ix Char

Since: 2.1

Methods

range :: (Char, Char) -> [Char] #

index :: (Char, Char) -> Char -> Int #

unsafeIndex :: (Char, Char) -> Char -> Int

inRange :: (Char, Char) -> Char -> Bool #

rangeSize :: (Char, Char) -> Int #

unsafeRangeSize :: (Char, Char) -> Int

Lift Char 

Methods

lift :: Char -> Q Exp #

Storable Char

Since: 2.1

Methods

sizeOf :: Char -> Int #

alignment :: Char -> Int #

peekElemOff :: Ptr Char -> Int -> IO Char #

pokeElemOff :: Ptr Char -> Int -> Char -> IO () #

peekByteOff :: Ptr b -> Int -> IO Char #

pokeByteOff :: Ptr b -> Int -> Char -> IO () #

peek :: Ptr Char -> IO Char #

poke :: Ptr Char -> Char -> IO () #

NFData Char 

Methods

rnf :: Char -> () #

Hashable Char 

Methods

hashWithSalt :: Int -> Char -> Int #

hash :: Char -> Int #

Prim Char 
ErrorList Char 

Methods

listMsg :: String -> [Char] #

Unbox Char 
Display Char Source #

Since: 0.1.0.0

Vector Vector Char 
MVector MVector Char 
Generic1 k (URec k Char) 

Associated Types

type Rep1 (URec k Char) (f :: URec k Char -> *) :: k -> * #

Methods

from1 :: f a -> Rep1 (URec k Char) f a #

to1 :: Rep1 (URec k Char) f a -> f a #

IsString (Seq Char) 

Methods

fromString :: String -> Seq Char #

Functor (URec * Char) 

Methods

fmap :: (a -> b) -> URec * Char a -> URec * Char b #

(<$) :: a -> URec * Char b -> URec * Char a #

Foldable (URec * Char) 

Methods

fold :: Monoid m => URec * Char m -> m #

foldMap :: Monoid m => (a -> m) -> URec * Char a -> m #

foldr :: (a -> b -> b) -> b -> URec * Char a -> b #

foldr' :: (a -> b -> b) -> b -> URec * Char a -> b #

foldl :: (b -> a -> b) -> b -> URec * Char a -> b #

foldl' :: (b -> a -> b) -> b -> URec * Char a -> b #

foldr1 :: (a -> a -> a) -> URec * Char a -> a #

foldl1 :: (a -> a -> a) -> URec * Char a -> a #

toList :: URec * Char a -> [a] #

null :: URec * Char a -> Bool #

length :: URec * Char a -> Int #

elem :: Eq a => a -> URec * Char a -> Bool #

maximum :: Ord a => URec * Char a -> a #

minimum :: Ord a => URec * Char a -> a #

sum :: Num a => URec * Char a -> a #

product :: Num a => URec * Char a -> a #

Traversable (URec * Char) 

Methods

traverse :: Applicative f => (a -> f b) -> URec * Char a -> f (URec * Char b) #

sequenceA :: Applicative f => URec * Char (f a) -> f (URec * Char a) #

mapM :: Monad m => (a -> m b) -> URec * Char a -> m (URec * Char b) #

sequence :: Monad m => URec * Char (m a) -> m (URec * Char a) #

Eq (URec k Char p) 

Methods

(==) :: URec k Char p -> URec k Char p -> Bool #

(/=) :: URec k Char p -> URec k Char p -> Bool #

Ord (URec k Char p) 

Methods

compare :: URec k Char p -> URec k Char p -> Ordering #

(<) :: URec k Char p -> URec k Char p -> Bool #

(<=) :: URec k Char p -> URec k Char p -> Bool #

(>) :: URec k Char p -> URec k Char p -> Bool #

(>=) :: URec k Char p -> URec k Char p -> Bool #

max :: URec k Char p -> URec k Char p -> URec k Char p #

min :: URec k Char p -> URec k Char p -> URec k Char p #

Show (URec k Char p) 

Methods

showsPrec :: Int -> URec k Char p -> ShowS #

show :: URec k Char p -> String #

showList :: [URec k Char p] -> ShowS #

Generic (URec k Char p) 

Associated Types

type Rep (URec k Char p) :: * -> * #

Methods

from :: URec k Char p -> Rep (URec k Char p) x #

to :: Rep (URec k Char p) x -> URec k Char p #

data Vector Char 
data URec k Char

Used for marking occurrences of Char#

Since: 4.9.0.0

data URec k Char = UChar {}
data MVector s Char 
type Rep1 k (URec k Char) 
type Rep1 k (URec k Char) = D1 k (MetaData "URec" "GHC.Generics" "base" False) (C1 k (MetaCons "UChar" PrefixI True) (S1 k (MetaSel (Just Symbol "uChar#") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (UChar k)))
type Rep (URec k Char p) 
type Rep (URec k Char p) = D1 * (MetaData "URec" "GHC.Generics" "base" False) (C1 * (MetaCons "UChar" PrefixI True) (S1 * (MetaSel (Just Symbol "uChar#") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (UChar *)))

Character classification

Unicode characters are divided into letters, Data.Char.numbers, marks, punctuation, Data.Char.symbols, separators (including spaces) and others (including control characters).

isControl :: Char -> Bool #

Selects control characters, which are the non-printing characters of the Latin-1 subset of Unicode.

isSpace :: Char -> Bool #

Returns True for any Unicode space character, and the control characters \t, \n, \r, \f, \v.

isLower :: Char -> Bool #

Selects lower-case alphabetic Unicode characters (letters).

isUpper :: Char -> Bool #

Selects upper-case or title-case alphabetic Unicode characters (letters). Title case is used by a small number of letter ligatures like the single-character form of Lj.

isAlpha :: Char -> Bool #

Selects alphabetic Unicode characters (lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters). This function is equivalent to isLetter.

isAlphaNum :: Char -> Bool #

Selects alphabetic or numeric digit Unicode characters.

Note that numeric digits outside the ASCII range are selected by this function but not by isDigit. Such digits may be part of identifiers but are not used by the printer and reader to represent numbers.

isPrint :: Char -> Bool #

Selects printable Unicode characters (letters, numbers, marks, punctuation, symbols and spaces).

isDigit :: Char -> Bool #

Selects ASCII digits, i.e. '0'..'9'.

isOctDigit :: Char -> Bool #

Selects ASCII octal digits, i.e. '0'..'7'.

isHexDigit :: Char -> Bool #

Selects ASCII hexadecimal digits, i.e. '0'..'9', 'a'..'f', 'A'..'F'.

isLetter :: Char -> Bool #

Selects alphabetic Unicode characters (lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters). This function is equivalent to isAlpha.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Letter".

Examples

Basic usage:

>>> isLetter 'a'
True
>>> isLetter 'A'
True
>>> isLetter '0'
False
>>> isLetter '%'
False
>>> isLetter '♥'
False
>>> isLetter '\31'
False

Ensure that isLetter and isAlpha are equivalent.

>>> let chars = [(chr 0)..]
>>> let letters = map isLetter chars
>>> let alphas = map isAlpha chars
>>> letters == alphas
True

isMark :: Char -> Bool #

Selects Unicode mark characters, for example accents and the like, which combine with preceding characters.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Mark".

Examples

Basic usage:

>>> isMark 'a'
False
>>> isMark '0'
False

Combining marks such as accent characters usually need to follow another character before they become printable:

>>> map isMark "ò"
[False,True]

Puns are not necessarily supported:

>>> isMark '✓'
False

isNumber :: Char -> Bool #

Selects Unicode numeric characters, including digits from various scripts, Roman numerals, et cetera.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Number".

Examples

Basic usage:

>>> isNumber 'a'
False
>>> isNumber '%'
False
>>> isNumber '3'
True

ASCII '0' through '9' are all numbers:

>>> and $ map isNumber ['0'..'9']
True

Unicode Roman numerals are "numbers" as well:

>>> isNumber 'Ⅸ'
True

isPunctuation :: Char -> Bool #

Selects Unicode punctuation characters, including various kinds of connectors, brackets and quotes.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Punctuation".

Examples

Basic usage:

>>> isPunctuation 'a'
False
>>> isPunctuation '7'
False
>>> isPunctuation '♥'
False
>>> isPunctuation '"'
True
>>> isPunctuation '?'
True
>>> isPunctuation '—'
True

isSymbol :: Char -> Bool #

Selects Unicode symbol characters, including mathematical and currency symbols.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Symbol".

Examples

Basic usage:

>>> isSymbol 'a'
False
>>> isSymbol '6'
False
>>> isSymbol '='
True

The definition of "math symbol" may be a little counter-intuitive depending on one's background:

>>> isSymbol '+'
True
>>> isSymbol '-'
False

isSeparator :: Char -> Bool #

Selects Unicode space and separator characters.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Separator".

Examples

Basic usage:

>>> isSeparator 'a'
False
>>> isSeparator '6'
False
>>> isSeparator ' '
True

Warning: newlines and tab characters are not considered separators.

>>> isSeparator '\n'
False
>>> isSeparator '\t'
False

But some more exotic characters are (like HTML's &nbsp;):

>>> isSeparator '\160'
True

Subranges

isAscii :: Char -> Bool #

Selects the first 128 characters of the Unicode character set, corresponding to the ASCII character set.

isLatin1 :: Char -> Bool #

Selects the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.

isAsciiUpper :: Char -> Bool #

Selects ASCII upper-case letters, i.e. characters satisfying both isAscii and isUpper.

isAsciiLower :: Char -> Bool #

Selects ASCII lower-case letters, i.e. characters satisfying both isAscii and isLower.

Unicode general categories

data GeneralCategory :: * #

Unicode General Categories (column 2 of the UnicodeData table) in the order they are listed in the Unicode standard (the Unicode Character Database, in particular).

Examples

Basic usage:

>>> :t OtherLetter
OtherLetter :: GeneralCategory

Eq instance:

>>> UppercaseLetter == UppercaseLetter
True
>>> UppercaseLetter == LowercaseLetter
False

Ord instance:

>>> NonSpacingMark <= MathSymbol
True

Enum instance:

>>> enumFromTo ModifierLetter SpacingCombiningMark
[ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark]

Read instance:

>>> read "DashPunctuation" :: GeneralCategory
DashPunctuation
>>> read "17" :: GeneralCategory
*** Exception: Prelude.read: no parse

Show instance:

>>> show EnclosingMark
"EnclosingMark"

Bounded instance:

>>> minBound :: GeneralCategory
UppercaseLetter
>>> maxBound :: GeneralCategory
NotAssigned

Ix instance:

>>> import Data.Ix ( index )
>>> index (OtherLetter,Control) FinalQuote
12
>>> index (OtherLetter,Control) Format
*** Exception: Error in array index

Constructors

UppercaseLetter

Lu: Letter, Uppercase

LowercaseLetter

Ll: Letter, Lowercase

TitlecaseLetter

Lt: Letter, Titlecase

ModifierLetter

Lm: Letter, Modifier

OtherLetter

Lo: Letter, Other

NonSpacingMark

Mn: Mark, Non-Spacing

SpacingCombiningMark

Mc: Mark, Spacing Combining

EnclosingMark

Me: Mark, Enclosing

DecimalNumber

Nd: Number, Decimal

LetterNumber

Nl: Number, Letter

OtherNumber

No: Number, Other

ConnectorPunctuation

Pc: Punctuation, Connector

DashPunctuation

Pd: Punctuation, Dash

OpenPunctuation

Ps: Punctuation, Open

ClosePunctuation

Pe: Punctuation, Close

InitialQuote

Pi: Punctuation, Initial quote

FinalQuote

Pf: Punctuation, Final quote

OtherPunctuation

Po: Punctuation, Other

MathSymbol

Sm: Symbol, Math

CurrencySymbol

Sc: Symbol, Currency

ModifierSymbol

Sk: Symbol, Modifier

OtherSymbol

So: Symbol, Other

Space

Zs: Separator, Space

LineSeparator

Zl: Separator, Line

ParagraphSeparator

Zp: Separator, Paragraph

Control

Cc: Other, Control

Format

Cf: Other, Format

Surrogate

Cs: Other, Surrogate

PrivateUse

Co: Other, Private Use

NotAssigned

Cn: Other, Not Assigned

Instances

Bounded GeneralCategory 
Enum GeneralCategory 
Eq GeneralCategory 
Ord GeneralCategory 
Read GeneralCategory 
Show GeneralCategory 
Ix GeneralCategory 

generalCategory :: Char -> GeneralCategory #

The Unicode general category of the character. This relies on the Enum instance of GeneralCategory, which must remain in the same order as the categories are presented in the Unicode standard.

Examples

Basic usage:

>>> generalCategory 'a'
LowercaseLetter
>>> generalCategory 'A'
UppercaseLetter
>>> generalCategory '0'
DecimalNumber
>>> generalCategory '%'
OtherPunctuation
>>> generalCategory '♥'
OtherSymbol
>>> generalCategory '\31'
Control
>>> generalCategory ' '
Space

Case conversion

toUpper :: Char -> Char #

Convert a letter to the corresponding upper-case letter, if any. Any other character is returned unchanged.

toLower :: Char -> Char #

Convert a letter to the corresponding lower-case letter, if any. Any other character is returned unchanged.

toTitle :: Char -> Char #

Convert a letter to the corresponding title-case or upper-case letter, if any. (Title case differs from upper case only for a small number of ligature letters.) Any other character is returned unchanged.

Numeric representations

ord :: Char -> Int #

The fromEnum method restricted to the type Char.

String representations

showLitChar :: Char -> ShowS #

Convert a character to a string using only printable characters, using Haskell source-language escape conventions. For example:

showLitChar '\n' s  =  "\\n" ++ s

lexLitChar :: ReadS String #

Read a string representation of a character, using Haskell source-language escape conventions. For example:

lexLitChar  "\\nHello"  =  [("\\n", "Hello")]

readLitChar :: ReadS Char #

Read a string representation of a character, using Haskell source-language escape conventions, and convert it to the character that it encodes. For example:

readLitChar "\\nHello"  =  [('\n', "Hello")]