{-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-} -- | -- Module : Data.Text.IO -- Copyright : (c) 2009, 2010 Bryan O'Sullivan, -- (c) 2009 Simon Marlow -- License : BSD-style -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : GHC -- -- Efficient locale-sensitive support for text I\/O. -- -- Skip past the synopsis for some important notes on performance and -- portability across different versions of GHC. module Data.Text.IO ( -- * Performance -- $performance -- * Locale support -- $locale -- * File-at-a-time operations readFile , writeFile , appendFile -- * Operations on handles , hGetContents , hGetLine , hPutStr , hPutStrLn -- * Special cases for standard input and output , interact , getContents , getLine , putStr , putStrLn ) where import Data.Text (Text) import Prelude hiding (appendFile, catch, getContents, getLine, interact, putStr, putStrLn, readFile, writeFile) import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout, withFile) #if __GLASGOW_HASKELL__ <= 610 import qualified Data.ByteString.Char8 as B import Data.Text.Encoding (decodeUtf8, encodeUtf8) #else import Control.Exception (catch, throwIO) import Control.Monad (liftM2, when) import Data.IORef (readIORef, writeIORef) import qualified Data.Text as T import Data.Text.Fusion (stream) import Data.Text.Fusion.Internal (Step(..), Stream(..)) import Data.Text.IO.Internal (hGetLineWith, readChunk) import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer, RawCharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer, writeCharBuf) import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType)) import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle, wantWritableHandle) import GHC.IO.Handle.Text (commitBuffer') import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), HandleType(..), Newline(..)) import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell) import System.IO.Error (isEOFError) #endif -- $performance -- #performance# -- -- The functions in this module obey the runtime system's locale, -- character set encoding, and line ending conversion settings. -- -- If you know in advance that you will be working with data that has -- a specific encoding (e.g. UTF-8), and your application is highly -- performance sensitive, you may find that it is faster to perform -- I\/O with bytestrings and to encode and decode yourself than to use -- the functions in this module. -- -- Whether this will hold depends on the version of GHC you are using, -- the platform you are working on, the data you are working with, and -- the encodings you are using, so be sure to test for yourself. -- | The 'readFile' function reads a file and returns the contents of -- the file as a string. The entire file is read strictly, as with -- 'getContents'. readFile :: FilePath -> IO Text readFile name = openFile name ReadMode >>= hGetContents -- | Write a string to a file. The file is truncated to zero length -- before writing begins. writeFile :: FilePath -> Text -> IO () writeFile p = withFile p WriteMode . flip hPutStr -- | Write a string the end of a file. appendFile :: FilePath -> Text -> IO () appendFile p = withFile p AppendMode . flip hPutStr -- | Read the remaining contents of a 'Handle' as a string. The -- 'Handle' is closed once the contents have been read, or if an -- exception is thrown. -- -- Internally, this function reads a chunk at a time from the -- lower-level buffering abstraction, and concatenates the chunks into -- a single string once the entire file has been read. -- -- As a result, it requires approximately twice as much memory as its -- result to construct its result. For files more than a half of -- available RAM in size, this may result in memory exhaustion. hGetContents :: Handle -> IO Text #if __GLASGOW_HASKELL__ <= 610 hGetContents = fmap decodeUtf8 . B.hGetContents #else hGetContents h = do chooseGoodBuffering h wantReadableHandle "hGetContents" h readAll where readAll hh@Handle__{..} = do let catchError e | isEOFError e = do buf <- readIORef haCharBuffer return $ if isEmptyBuffer buf then T.empty else T.singleton '\r' | otherwise = throwIO (augmentIOError e "hGetContents" h) readChunks = do buf <- readIORef haCharBuffer t <- readChunk hh buf `catch` catchError if T.null t then return [t] else (t:) `fmap` readChunks ts <- readChunks (hh', _) <- hClose_help hh return (hh'{haType=ClosedHandle}, T.concat ts) -- | Use a more efficient buffer size if we're reading in -- block-buffered mode with the default buffer size. When we can -- determine the size of the handle we're reading, set the buffer size -- to that, so that we can read the entire file in one chunk. -- Otherwise, use a buffer size of at least 16KB. chooseGoodBuffering :: Handle -> IO () chooseGoodBuffering h = do bufMode <- hGetBuffering h case bufMode of BlockBuffering Nothing -> do d <- catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) -> if ioe_type e == InappropriateType then return 16384 -- faster than the 2KB default else throwIO e when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d _ -> return () #endif -- | Read a single line from a handle. hGetLine :: Handle -> IO Text #if __GLASGOW_HASKELL__ <= 610 hGetLine = fmap decodeUtf8 . B.hGetLine #else hGetLine = hGetLineWith T.concat #endif -- | Write a string to a handle. hPutStr :: Handle -> Text -> IO () #if __GLASGOW_HASKELL__ <= 610 hPutStr h = B.hPutStr h . encodeUtf8 #else -- This function is lifted almost verbatim from GHC.IO.Handle.Text. hPutStr h t = do (buffer_mode, nl) <- wantWritableHandle "hPutStr" h $ \h_ -> do bmode <- getSpareBuffer h_ return (bmode, haOutputNL h_) let str = stream t case buffer_mode of (NoBuffering, _) -> hPutChars h str (LineBuffering, buf) -> writeLines h nl buf str (BlockBuffering _, buf) | nl == CRLF -> writeBlocksCRLF h buf str | otherwise -> writeBlocksRaw h buf str hPutChars :: Handle -> Stream Char -> IO () hPutChars h (Stream next0 s0 _len) = loop s0 where loop !s = case next0 s of Done -> return () Skip s' -> loop s' Yield x s' -> hPutChar h x >> loop s' -- The following functions are largely lifted from GHC.IO.Handle.Text, -- but adapted to a coinductive stream of data instead of an inductive -- list. -- -- We have several variations of more or less the same code for -- performance reasons. Splitting the original buffered write -- function into line- and block-oriented versions gave us a 2.1x -- performance improvement. Lifting out the raw/cooked newline -- handling gave a few more percent on top. writeLines :: Handle -> Newline -> Buffer CharBufElem -> Stream Char -> IO () writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0 where outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) where inner !s !n = case next0 s of Done -> commit n False{-no flush-} True{-release-} >> return () Skip s' -> inner s' n Yield x s' | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s | x == '\n' -> do n' <- if nl == CRLF then do n1 <- writeCharBuf raw n '\r' writeCharBuf raw n1 '\n' else writeCharBuf raw n x commit n' True{-needs flush-} False >>= outer s' | otherwise -> writeCharBuf raw n x >>= inner s' commit = commitBuffer h raw len writeBlocksCRLF :: Handle -> Buffer CharBufElem -> Stream Char -> IO () writeBlocksCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0 where outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) where inner !s !n = case next0 s of Done -> commit n False{-no flush-} True{-release-} >> return () Skip s' -> inner s' n Yield x s' | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s | x == '\n' -> do n1 <- writeCharBuf raw n '\r' writeCharBuf raw n1 '\n' >>= inner s' | otherwise -> writeCharBuf raw n x >>= inner s' commit = commitBuffer h raw len writeBlocksRaw :: Handle -> Buffer CharBufElem -> Stream Char -> IO () writeBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0 where outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) where inner !s !n = case next0 s of Done -> commit n False{-no flush-} True{-release-} >> return () Skip s' -> inner s' n Yield x s' | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s | otherwise -> writeCharBuf raw n x >>= inner s' commit = commitBuffer h raw len -- This function is completely lifted from GHC.IO.Handle.Text. getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer) getSpareBuffer Handle__{haCharBuffer=ref, haBuffers=spare_ref, haBufferMode=mode} = do case mode of NoBuffering -> return (mode, error "no buffer!") _ -> do bufs <- readIORef spare_ref buf <- readIORef ref case bufs of BufferListCons b rest -> do writeIORef spare_ref rest return ( mode, emptyBuffer b (bufSize buf) WriteBuffer) BufferListNil -> do new_buf <- newCharBuffer (bufSize buf) WriteBuffer return (mode, new_buf) -- This function is completely lifted from GHC.IO.Handle.Text. commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool -> IO CharBuffer commitBuffer hdl !raw !sz !count flush release = wantWritableHandle "commitAndReleaseBuffer" hdl $ commitBuffer' raw sz count flush release {-# INLINE commitBuffer #-} #endif -- | Write a string to a handle, followed by a newline. hPutStrLn :: Handle -> Text -> IO () hPutStrLn h t = hPutStr h t >> hPutChar h '\n' -- | The 'interact' function takes a function of type @Text -> Text@ -- as its argument. The entire input from the standard input device is -- passed to this function as its argument, and the resulting string -- is output on the standard output device. interact :: (Text -> Text) -> IO () interact f = putStr . f =<< getContents -- | Read all user input on 'stdin' as a single string. getContents :: IO Text getContents = hGetContents stdin -- | Read a single line of user input from 'stdin'. getLine :: IO Text getLine = hGetLine stdin -- | Write a string to 'stdout'. putStr :: Text -> IO () putStr = hPutStr stdout -- | Write a string to 'stdout', followed by a newline. putStrLn :: Text -> IO () putStrLn = hPutStrLn stdout -- $locale -- -- /Note/: The behaviour of functions in this module depends on the -- version of GHC you are using. -- -- Beginning with GHC 6.12, text I\/O is performed using the system or -- handle's current locale and line ending conventions. -- -- Under GHC 6.10 and earlier, the system I\/O libraries do not -- support locale-sensitive I\/O or line ending conversion. On these -- versions of GHC, functions in this library all use UTF-8. What -- does this mean in practice? -- -- * All data that is read will be decoded as UTF-8. -- -- * Before data is written, it is first encoded as UTF-8. -- -- * On both reading and writing, the platform's native newline -- conversion is performed. -- -- If you must use a non-UTF-8 locale on an older version of GHC, you -- will have to perform the transcoding yourself, e.g. as follows: -- -- > import qualified Data.ByteString as B -- > import Data.Text (Text) -- > import Data.Text.Encoding (encodeUtf16) -- > -- > putStr_Utf16LE :: Text -> IO () -- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t)