-- | Text IO using the UTF8 character encoding. module Agda.Utils.IO.UTF8 ( readTextFile , Agda.Utils.IO.UTF8.writeFile , writeTextToFile ) where import Data.Text.Lazy (Text) import qualified Data.Text.Lazy as T import qualified Data.Text.Lazy.IO as T import qualified System.IO as IO -- | Converts many character sequences which may be interpreted as -- line or paragraph separators into '\n'. -- -- Note that '\r\n' is assumed to have already been converted to '\n'. convertLineEndings :: Text -> Text convertLineEndings = T.map convert where -- ASCII: convert '\x000D' = '\n' -- CR (Carriage return) convert '\x000C' = '\n' -- FF (Form feed) -- Unicode: convert '\x0085' = '\n' -- NEXT LINE convert '\x2028' = '\n' -- LINE SEPARATOR convert '\x2029' = '\n' -- PARAGRAPH SEPARATOR -- Not a line ending (or '\x000A'): convert c = c -- | Reads a UTF8-encoded text file and converts many character -- sequences which may be interpreted as line or paragraph separators -- into '\n'. readTextFile :: FilePath -> IO Text readTextFile file = convertLineEndings <$> do h <- IO.openFile file IO.ReadMode IO.hSetNewlineMode h $ IO.NewlineMode { IO.inputNL = IO.CRLF, IO.outputNL = IO.LF } IO.hSetEncoding h IO.utf8 T.hGetContents h -- | Writes a UTF8-encoded text file. The native convention for line -- endings is used. writeFile :: FilePath -> String -> IO () writeFile file s = IO.withFile file IO.WriteMode $ \h -> do IO.hSetEncoding h IO.utf8 IO.hPutStr h s -- | Writes a UTF8-encoded text file. The native convention for line -- endings is used. writeTextToFile :: FilePath -> Text -> IO () writeTextToFile file s = IO.withFile file IO.WriteMode $ \h -> do IO.hSetEncoding h IO.utf8 T.hPutStr h s