zenacy-unicode-1.0.2: Unicode utilities for Haskell
Safe HaskellSafe-Inferred
LanguageHaskell2010

Zenacy.Unicode

Description

Tools to check and prepare data to be parsed as valid unicode.

The following is an example of converting dubious data to a text.

textDecode :: ByteString -> Text
textDecode b =
  case bomStrip b of
    (Nothing, s)           -> T.decodeUtf8 $ unicodeCleanUTF8 s -- Assume UTF8
    (Just BOM_UTF8, s)     -> T.decodeUtf8 $ unicodeCleanUTF8 s
    (Just BOM_UTF16_BE, s) -> T.decodeUtf16BE s
    (Just BOM_UTF16_LE, s) -> T.decodeUtf16LE s
    (Just BOM_UTF32_BE, s) -> T.decodeUtf32BE s
    (Just BOM_UTF32_LE, s) -> T.decodeUtf32LE s
Synopsis

Documentation

data BOM Source #

Defines the unicode byte order mark.

Instances

Instances details
Show BOM Source # 
Instance details

Defined in Zenacy.Unicode

Methods

showsPrec :: Int -> BOM -> ShowS #

show :: BOM -> String #

showList :: [BOM] -> ShowS #

Eq BOM Source # 
Instance details

Defined in Zenacy.Unicode

Methods

(==) :: BOM -> BOM -> Bool #

(/=) :: BOM -> BOM -> Bool #

Ord BOM Source # 
Instance details

Defined in Zenacy.Unicode

Methods

compare :: BOM -> BOM -> Ordering #

(<) :: BOM -> BOM -> Bool #

(<=) :: BOM -> BOM -> Bool #

(>) :: BOM -> BOM -> Bool #

(>=) :: BOM -> BOM -> Bool #

max :: BOM -> BOM -> BOM #

min :: BOM -> BOM -> BOM #

bomStrings :: [(BOM, ByteString)] Source #

Defines the byte order mark signatures.

bomStrip :: ByteString -> (Maybe BOM, ByteString) Source #

Remove the BOM from the start of a string.

unicodeCleanUTF8 :: ByteString -> ByteString Source #

Removes bad characters and nulls from a UTF8 byte string.