-- | -- module: Data.Text.Encoding.Locale -- copyright: © kudah 2013 -- license: BSD3 -- -- maintainer: kudahkukarek@gmail.com -- stability: experimental -- portability: GHC-only -- -- This module provides functions to encode and decode 'Data.Text.Lazy.Text' -- to/from 'Data.ByteString.Lazy.ByteString' using 'System.IO.TextEncoding' -- -- For performance, Text\'s native decode\* functions are used if the conditions -- are right (LF NewlineMode and UTF encoding). {-# LANGUAGE CPP #-} #ifdef TRUSTWORTHY {-# LANGUAGE Trustworthy #-} #endif module Data.Text.Lazy.Encoding.Locale (decodeLocale ,encodeLocale ,decodeLocale' ,encodeLocale' ,decodeFromHandle ,encodeFromHandle ) where import Import (getLocale' ,whenJust' ,hGetEncAndNlMode') import qualified Data.ByteString.Lazy as L import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.IO as TLIO import qualified Data.Text.Lazy.Encoding as TLE import System.IO import Data.ByteString.Handle import qualified Data.List as L import Data.Maybe handleDecoder :: Maybe TextEncoding -> Maybe NewlineMode -> L.ByteString -> IO TL.Text handleDecoder menc mnlmode = \bs -> do h <- readHandle False bs whenJust' menc $ hSetEncoding h whenJust' mnlmode $ hSetNewlineMode h TLIO.hGetContents h handleEncoder :: Maybe TextEncoding -> Maybe NewlineMode -> TL.Text -> IO L.ByteString handleEncoder menc mnlmode = \t -> do (res, ()) <- writeHandle False $ \h -> do whenJust' menc $ hSetEncoding h whenJust' mnlmode $ hSetNewlineMode h TLIO.hPutStr h t return res chooseDecoder :: Maybe TextEncoding -> Maybe NewlineMode -> L.ByteString -> IO TL.Text chooseDecoder menc mnlmode = \bs -> do if inputNL nlmode == LF -- decode* functions won't convert line ends then do enc <- maybe getLocale' return menc case L.stripPrefix "UTF-" (show enc) of Just s -> case s of "8" -> return (TLE.decodeUtf8 bs) ('1':'6':x:'E':_) | 'L' == x -> return (TLE.decodeUtf16LE bs) | 'B' == x -> return (TLE.decodeUtf16BE bs) ('3':'2':x:'E':_) | 'L' == x -> return (TLE.decodeUtf32LE bs) | 'B' == x -> return (TLE.decodeUtf32BE bs) _ -> fallback bs _ -> fallback bs else fallback bs where nlmode = fromMaybe nativeNewlineMode mnlmode fallback = handleDecoder menc mnlmode chooseEncoder :: Maybe TextEncoding -> Maybe NewlineMode -> TL.Text -> IO L.ByteString chooseEncoder menc mnlmode = \bs -> if outputNL nlmode == LF -- encode* functions won't convert line ends then do enc <- maybe getLocale' return menc case L.stripPrefix "UTF-" (show enc) of Just s -> case s of "8" -> return (TLE.encodeUtf8 bs) ('1':'6':x:'E':_) | 'L' == x -> return (TLE.encodeUtf16LE bs) | 'B' == x -> return (TLE.encodeUtf16BE bs) ('3':'2':x:'E':_) | 'L' == x -> return (TLE.encodeUtf32LE bs) | 'B' == x -> return (TLE.encodeUtf32BE bs) _ -> fallback bs _ -> fallback bs else fallback bs where nlmode = fromMaybe nativeNewlineMode mnlmode fallback = handleEncoder menc mnlmode -- | Decode 'L.ByteString' to 'TL.Text' using current locale decodeLocale :: L.ByteString -> IO TL.Text decodeLocale = chooseDecoder Nothing Nothing -- | Encode 'TL.Text' to 'L.ByteString' using current locale encodeLocale :: TL.Text -> IO L.ByteString encodeLocale = chooseEncoder Nothing Nothing -- | Decode 'L.ByteString' to 'TL.Text' using supplied 'TextEncoding' and 'NewlineMode' decodeLocale' :: TextEncoding -> NewlineMode -> L.ByteString -> IO TL.Text decodeLocale' enc nlmode = chooseDecoder (Just enc) (Just nlmode) -- | Encode 'TL.Text' to 'L.ByteString' using supplied 'TextEncoding' and 'NewlineMode' encodeLocale' :: TextEncoding -> NewlineMode -> TL.Text -> IO L.ByteString encodeLocale' enc nlmode = chooseEncoder (Just enc) (Just nlmode) -- | Decode 'L.ByteString' to 'TL.Text' using 'Handle's 'TextEncoding' and 'NewlineMode' decodeFromHandle :: Handle -> L.ByteString -> IO TL.Text decodeFromHandle h bs = do (enc, nlmode) <- hGetEncAndNlMode' h decodeLocale' enc nlmode bs -- | Encode 'TL.Text' to 'L.ByteString' using 'Handle's 'TextEncoding' and 'NewlineMode' encodeFromHandle :: Handle -> TL.Text -> IO L.ByteString encodeFromHandle h t = do (enc, nlmode) <- hGetEncAndNlMode' h encodeLocale' enc nlmode t