-- | Text IO using the UTF8 character encoding.

module Agda.Utils.IO.UTF8
  ( readTextFile
  , Agda.Utils.IO.UTF8.writeFile
  , writeTextToFile
  ) where

import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.IO as T
import qualified System.IO as IO

-- | Converts many character sequences which may be interpreted as
-- line or paragraph separators into '\n'.
--
-- Note that '\r\n' is assumed to have already been converted to '\n'.

convertLineEndings :: Text -> Text
convertLineEndings :: Text -> Text
convertLineEndings = (Char -> Char) -> Text -> Text
T.map Char -> Char
convert
  where
  -- ASCII:
  convert :: Char -> Char
convert Char
'\x000D' = Char
'\n'  -- CR  (Carriage return)
  convert Char
'\x000C' = Char
'\n'  -- FF  (Form feed)
  -- Unicode:
  convert Char
'\x0085' = Char
'\n'  -- NEXT LINE
  convert Char
'\x2028' = Char
'\n'  -- LINE SEPARATOR
  convert Char
'\x2029' = Char
'\n'  -- PARAGRAPH SEPARATOR
  -- Not a line ending (or '\x000A'):
  convert Char
c        = Char
c

-- | Reads a UTF8-encoded text file and converts many character
-- sequences which may be interpreted as line or paragraph separators
-- into '\n'.

readTextFile :: FilePath -> IO Text
readTextFile :: FilePath -> IO Text
readTextFile FilePath
file = Text -> Text
convertLineEndings (Text -> Text) -> IO Text -> IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> do
  Handle
h <- FilePath -> IOMode -> IO Handle
IO.openFile FilePath
file IOMode
IO.ReadMode
  Handle -> NewlineMode -> IO ()
IO.hSetNewlineMode Handle
h (NewlineMode -> IO ()) -> NewlineMode -> IO ()
forall a b. (a -> b) -> a -> b
$
    NewlineMode :: Newline -> Newline -> NewlineMode
IO.NewlineMode { inputNL :: Newline
IO.inputNL = Newline
IO.CRLF, outputNL :: Newline
IO.outputNL = Newline
IO.LF }
  Handle -> TextEncoding -> IO ()
IO.hSetEncoding Handle
h TextEncoding
IO.utf8
  Handle -> IO Text
T.hGetContents Handle
h

-- | Writes a UTF8-encoded text file. The native convention for line
-- endings is used.

writeFile :: FilePath -> String -> IO ()
writeFile :: FilePath -> FilePath -> IO ()
writeFile FilePath
file FilePath
s = FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
IO.withFile FilePath
file IOMode
IO.WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
  Handle -> TextEncoding -> IO ()
IO.hSetEncoding Handle
h TextEncoding
IO.utf8
  Handle -> FilePath -> IO ()
IO.hPutStr Handle
h FilePath
s

-- | Writes a UTF8-encoded text file. The native convention for line
-- endings is used.

writeTextToFile :: FilePath -> Text -> IO ()
writeTextToFile :: FilePath -> Text -> IO ()
writeTextToFile FilePath
file Text
s = FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
IO.withFile FilePath
file IOMode
IO.WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
  Handle -> TextEncoding -> IO ()
IO.hSetEncoding Handle
h TextEncoding
IO.utf8
  Handle -> Text -> IO ()
T.hPutStr Handle
h Text
s