-- | -- Module : Data.Text.Normalize -- Copyright : (c) 2016 Harendra Kumar -- -- License : BSD-style -- Maintainer : harendra.kumar@gmail.com -- Stability : experimental -- Portability : GHC -- -- Unicode normalization for @Text@ data type. -- module Data.Text.Normalize ( -- * Normalization Modes NormalizationMode(..) -- * Normalization API , normalize ) where import Data.Text (Text) import Data.Text.Foreign (fromPtr, useAsPtr) import Data.Unicode.Types (NormalizationMode (..)) import Data.Unicode.UTF8Proc import Foreign.Ptr (castPtr) import System.IO.Unsafe (unsafePerformIO) -- | Perform Unicode normalization on @Text@ according to the specified -- normalization mode. normalize :: NormalizationMode -> Text -> Text normalize mode txt = case mode of NFD -> tr Decomposed [] NFKD -> tr Decomposed [Compat] NFC -> tr Composed [] NFKC -> tr Composed [Compat] where tr nf opts = transformText [StableMode, UTF16Mode] [Normalize (mkNormalizeOp nf opts)] txt transformText :: [TransformMode] -> [TransformOp] -> Text -> Text transformText modes ops txt = unsafePerformIO $ useAsPtr txt transform2txt where transform2txt buf len16 = transform modes ops (castPtr buf, fromIntegral (len16) * 2) >>= fromCstringLen -- XXX test whether the length is not truncated fromCstringLen (str, len8) = fromPtr (castPtr str) ((fromIntegral len8) `div` 2)