{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.UTF8 Copyright : Copyright (C) 2010-2022 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. -} module Text.Pandoc.UTF8 ( readFile , getContents , writeFileWith , writeFile , putStrWith , putStr , putStrLnWith , putStrLn , hPutStrWith , hPutStr , hPutStrLnWith , hPutStrLn , hGetContents , toString , toText , fromString , fromText , toStringLazy , fromTextLazy , toTextLazy , fromStringLazy , encodePath , decodeArg ) where import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile) import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr, putStrLn, readFile, writeFile) readFile :: FilePath -> IO Text readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h getContents :: IO Text getContents = hGetContents stdin writeFileWith :: Newline -> FilePath -> Text -> IO () writeFileWith eol f s = withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s writeFile :: FilePath -> Text -> IO () writeFile = writeFileWith nativeNewline putStrWith :: Newline -> Text -> IO () putStrWith eol s = hPutStrWith eol stdout s putStr :: Text -> IO () putStr = putStrWith nativeNewline putStrLnWith :: Newline -> Text -> IO () putStrLnWith eol s = hPutStrLnWith eol stdout s putStrLn :: Text -> IO () putStrLn = putStrLnWith nativeNewline hPutStrWith :: Newline -> Handle -> Text -> IO () hPutStrWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> hSetEncoding h utf8 >> TIO.hPutStr h s hPutStr :: Handle -> Text -> IO () hPutStr = hPutStrWith nativeNewline hPutStrLnWith :: Newline -> Handle -> Text -> IO () hPutStrLnWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> hSetEncoding h utf8 >> TIO.hPutStrLn h s hPutStrLn :: Handle -> Text -> IO () hPutStrLn = hPutStrLnWith nativeNewline hGetContents :: Handle -> IO Text hGetContents = fmap toText . B.hGetContents -- | Convert UTF8-encoded ByteString to Text, also -- removing '\\r' characters. toText :: B.ByteString -> Text toText = T.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = if "\xEF\xBB\xBF" `B.isPrefixOf` bs then B.drop 3 bs else bs filterCRs = B.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\\r' characters. toString :: B.ByteString -> String toString = T.unpack . toText -- | Convert UTF8-encoded ByteString to Text, also -- removing '\\r' characters. toTextLazy :: BL.ByteString -> TL.Text toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = if "\xEF\xBB\xBF" `BL.isPrefixOf` bs then BL.drop 3 bs else bs filterCRs = BL.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\\r' characters. toStringLazy :: BL.ByteString -> String toStringLazy = TL.unpack . toTextLazy fromText :: Text -> B.ByteString fromText = T.encodeUtf8 fromTextLazy :: TL.Text -> BL.ByteString fromTextLazy = TL.encodeUtf8 fromString :: String -> B.ByteString fromString = fromText . T.pack fromStringLazy :: String -> BL.ByteString fromStringLazy = fromTextLazy . TL.pack encodePath :: FilePath -> FilePath encodePath = id decodeArg :: String -> String decodeArg = id