{-# LANGUAGE CPP #-}
-- | There are old index files that have funky characters like 'ø'
-- that are not properly UTF8 encoded.  As far as I can tell, these
-- files are otherwise plain ascii, so just naivelyinsert the
-- character into the output stream.
module Debian.UTF8
    ( decode
    , readFile
    ) where

#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>))
#endif
import qualified Data.ByteString.Char8 as B (concat)
import qualified Data.ByteString.Lazy.Char8 as L (ByteString, readFile, toChunks)
import Data.Char (chr)
import Data.Text as T
import Data.Text.Encoding (decodeUtf8With)
import Data.Word (Word8)
import Prelude hiding (readFile)

decode :: L.ByteString -> T.Text
decode :: ByteString -> Text
decode ByteString
b = OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
e ([ByteString] -> ByteString
B.concat (ByteString -> [ByteString]
L.toChunks ByteString
b))
    where
      e :: String -> Maybe Word8 -> Maybe Char
      e :: OnDecodeError
e String
_description Maybe Word8
w = (Word8 -> Char) -> Maybe Word8 -> Maybe Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) Maybe Word8
w

readFile :: FilePath -> IO T.Text
readFile :: String -> IO Text
readFile String
path = ByteString -> Text
decode (ByteString -> Text) -> IO ByteString -> IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
L.readFile String
path