{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}

{-|
  Module: Data.Text.Conversions

  This module provides a set of typeclasses for safely converting between
  textual data. The built-in 'String' type, as well as strict 'T.Text' and lazy
  'TL.Text', are safely convertible between one another. The 'B.ByteString' type
  is frequently treated in much the same manner, but this is unsafe for two
  reasons:

  * Since 'B.ByteString' encodes binary data, it does not specify a particular
    encoding, so assuming a particular encoding like UTF-8 would be incorrect.

  * Furthermore, decoding binary data into text given a particular encoding can
    fail. Most systems simply use 'T.decodeUtf8' and similar functions, which
    will dangerously throw exceptions when given invalid data.

  This module addresses both problems by providing a 'DecodeText' typeclass for
  decoding binary data in a way that can fail and by providing a 'UTF8' wrapper
  type for selecting the desired encoding.

  Most of the time, you will not need to create your own instances or use the
  underlying functions that make the conversion machinery tick. Instead, just
  use the 'convertText' method to convert between two textual datatypes or the
  'decodeConvertText' method to perform a conversion that can fail.

  Examples:

  >>> convertText ("hello" :: String) :: Text
  "hello"
  >>> decodeConvertText (UTF8 ("hello" :: ByteString)) :: Maybe Text
  Just "hello"
  >>> decodeConvertText (UTF8 ("\xc3\x28" :: ByteString)) :: Maybe Text
  Nothing
-}
module Data.Text.Conversions (
  -- * Conversion typeclasses and functions
    FromText(..)
  , ToText(..)
  , DecodeText(..)
  , convertText
  , decodeConvertText
  -- * Encoding newtypes
  , UTF8(..)
  , Base16(..)
  , Base64(..)
  ) where

import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL

import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Base16.Lazy as Base16L
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Base64.Lazy as Base64L

{-|
  Simple wrapper type that is used to select a desired encoding when encoding or
  decoding text from binary data, such as 'B.ByteString's. The conversion is not
  partial; it will result in 'Nothing' when a 'B.ByteString' is provided with
  data that is not valid in UTF-8.

  >>> convertText ("hello" :: Text) :: UTF8 ByteString
  UTF8 "hello"
  >>> decodeConvertText (UTF8 ("hello" :: ByteString)) :: Maybe Text
  Just "hello"
  >>> decodeConvertText (UTF8 ("invalid \xc3\x28" :: ByteString)) :: Maybe Text
  Nothing
-}
newtype UTF8 a = UTF8 { UTF8 a -> a
unUTF8 :: a }
  deriving (UTF8 a -> UTF8 a -> Bool
(UTF8 a -> UTF8 a -> Bool)
-> (UTF8 a -> UTF8 a -> Bool) -> Eq (UTF8 a)
forall a. Eq a => UTF8 a -> UTF8 a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF8 a -> UTF8 a -> Bool
$c/= :: forall a. Eq a => UTF8 a -> UTF8 a -> Bool
== :: UTF8 a -> UTF8 a -> Bool
$c== :: forall a. Eq a => UTF8 a -> UTF8 a -> Bool
Eq, Int -> UTF8 a -> ShowS
[UTF8 a] -> ShowS
UTF8 a -> String
(Int -> UTF8 a -> ShowS)
-> (UTF8 a -> String) -> ([UTF8 a] -> ShowS) -> Show (UTF8 a)
forall a. Show a => Int -> UTF8 a -> ShowS
forall a. Show a => [UTF8 a] -> ShowS
forall a. Show a => UTF8 a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF8 a] -> ShowS
$cshowList :: forall a. Show a => [UTF8 a] -> ShowS
show :: UTF8 a -> String
$cshow :: forall a. Show a => UTF8 a -> String
showsPrec :: Int -> UTF8 a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> UTF8 a -> ShowS
Show, a -> UTF8 b -> UTF8 a
(a -> b) -> UTF8 a -> UTF8 b
(forall a b. (a -> b) -> UTF8 a -> UTF8 b)
-> (forall a b. a -> UTF8 b -> UTF8 a) -> Functor UTF8
forall a b. a -> UTF8 b -> UTF8 a
forall a b. (a -> b) -> UTF8 a -> UTF8 b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> UTF8 b -> UTF8 a
$c<$ :: forall a b. a -> UTF8 b -> UTF8 a
fmap :: (a -> b) -> UTF8 a -> UTF8 b
$cfmap :: forall a b. (a -> b) -> UTF8 a -> UTF8 b
Functor)

{-|
  Wrapper type used to select a base 16 encoding when encoding or decoding
  binary data. Safe because base 16 encoding will always produce ASCII output.
-}
newtype Base16 a = Base16 { Base16 a -> a
unBase16 :: a }
  deriving (Base16 a -> Base16 a -> Bool
(Base16 a -> Base16 a -> Bool)
-> (Base16 a -> Base16 a -> Bool) -> Eq (Base16 a)
forall a. Eq a => Base16 a -> Base16 a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Base16 a -> Base16 a -> Bool
$c/= :: forall a. Eq a => Base16 a -> Base16 a -> Bool
== :: Base16 a -> Base16 a -> Bool
$c== :: forall a. Eq a => Base16 a -> Base16 a -> Bool
Eq, Int -> Base16 a -> ShowS
[Base16 a] -> ShowS
Base16 a -> String
(Int -> Base16 a -> ShowS)
-> (Base16 a -> String) -> ([Base16 a] -> ShowS) -> Show (Base16 a)
forall a. Show a => Int -> Base16 a -> ShowS
forall a. Show a => [Base16 a] -> ShowS
forall a. Show a => Base16 a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Base16 a] -> ShowS
$cshowList :: forall a. Show a => [Base16 a] -> ShowS
show :: Base16 a -> String
$cshow :: forall a. Show a => Base16 a -> String
showsPrec :: Int -> Base16 a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Base16 a -> ShowS
Show, a -> Base16 b -> Base16 a
(a -> b) -> Base16 a -> Base16 b
(forall a b. (a -> b) -> Base16 a -> Base16 b)
-> (forall a b. a -> Base16 b -> Base16 a) -> Functor Base16
forall a b. a -> Base16 b -> Base16 a
forall a b. (a -> b) -> Base16 a -> Base16 b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> Base16 b -> Base16 a
$c<$ :: forall a b. a -> Base16 b -> Base16 a
fmap :: (a -> b) -> Base16 a -> Base16 b
$cfmap :: forall a b. (a -> b) -> Base16 a -> Base16 b
Functor)

{-|
  Wrapper type used to select a base 64 encoding when encoding or decoding
  binary data. Safe because base 64 encoding will always produce ASCII output.
-}
newtype Base64 a = Base64 { Base64 a -> a
unBase64 :: a }
  deriving (Base64 a -> Base64 a -> Bool
(Base64 a -> Base64 a -> Bool)
-> (Base64 a -> Base64 a -> Bool) -> Eq (Base64 a)
forall a. Eq a => Base64 a -> Base64 a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Base64 a -> Base64 a -> Bool
$c/= :: forall a. Eq a => Base64 a -> Base64 a -> Bool
== :: Base64 a -> Base64 a -> Bool
$c== :: forall a. Eq a => Base64 a -> Base64 a -> Bool
Eq, Int -> Base64 a -> ShowS
[Base64 a] -> ShowS
Base64 a -> String
(Int -> Base64 a -> ShowS)
-> (Base64 a -> String) -> ([Base64 a] -> ShowS) -> Show (Base64 a)
forall a. Show a => Int -> Base64 a -> ShowS
forall a. Show a => [Base64 a] -> ShowS
forall a. Show a => Base64 a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Base64 a] -> ShowS
$cshowList :: forall a. Show a => [Base64 a] -> ShowS
show :: Base64 a -> String
$cshow :: forall a. Show a => Base64 a -> String
showsPrec :: Int -> Base64 a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Base64 a -> ShowS
Show, a -> Base64 b -> Base64 a
(a -> b) -> Base64 a -> Base64 b
(forall a b. (a -> b) -> Base64 a -> Base64 b)
-> (forall a b. a -> Base64 b -> Base64 a) -> Functor Base64
forall a b. a -> Base64 b -> Base64 a
forall a b. (a -> b) -> Base64 a -> Base64 b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> Base64 b -> Base64 a
$c<$ :: forall a b. a -> Base64 b -> Base64 a
fmap :: (a -> b) -> Base64 a -> Base64 b
$cfmap :: forall a b. (a -> b) -> Base64 a -> Base64 b
Functor)

{-|
  A simple typeclass that handles converting arbitrary datatypes to 'T.Text'
  when the operation cannot fail. If you have a type that satisfies that
  requirement, implement this typeclass, but if the operation can fail, use
  'DecodeText' instead.
-}
class ToText a where
  toText :: a -> T.Text

{-|
  A simple typeclass that handles converting 'T.Text' to arbitrary datatypes. If
  you have a type that can be produced from text, implement this typeclass.
  However, you probably do not want to call 'fromText' directly; call
  'convertText', instead.
-}
class FromText a where
  fromText :: T.Text -> a

{-|
  A simple typeclass that handles converting arbitrary datatypes to
  'T.Text' when the operation can fail. If you have a type that satisfies that
  requirement, implement this typeclass, but if the operation cannot fail, use
  'ToText' instead.
-}
class Functor f => DecodeText f a where
  decodeText :: a -> f T.Text

{-|
  A function that provides a way to /safely/ convert between arbitrary textual
  datatypes where the conversion to text cannot fail.

  >>> convertText ("hello" :: String) :: Text
  "hello"
-}
convertText :: (ToText a, FromText b) => a -> b
convertText :: a -> b
convertText = Text -> b
forall a. FromText a => Text -> a
fromText (Text -> b) -> (a -> Text) -> a -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Text
forall a. ToText a => a -> Text
toText


{-|
  A function that provides a way to /safely/ convert between arbitrary textual
  datatypes where the conversion to text can fail, such as decoding binary data
  to text. Since binary data can represent text in many different potential
  encodings, it is necessary to use a newtype that picks the particular
  encoding, like 'UTF8':

  >>> decodeConvertText (UTF8 ("hello" :: ByteString)) :: Maybe Text
  Just "hello"
-}
decodeConvertText :: (DecodeText f a, FromText b) => a -> f b
decodeConvertText :: a -> f b
decodeConvertText = (Text -> b) -> f Text -> f b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> b
forall a. FromText a => Text -> a
fromText (f Text -> f b) -> (a -> f Text) -> a -> f b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> f Text
forall (f :: * -> *) a. DecodeText f a => a -> f Text
decodeText

hush :: Either a b -> Maybe b
hush :: Either a b -> Maybe b
hush (Left  a
_) = Maybe b
forall a. Maybe a
Nothing
hush (Right b
x) = b -> Maybe b
forall a. a -> Maybe a
Just b
x

instance ToText   T.Text  where toText :: Text -> Text
toText   = Text -> Text
forall a. a -> a
id
instance FromText T.Text  where fromText :: Text -> Text
fromText = Text -> Text
forall a. a -> a
id
instance ToText   String  where toText :: String -> Text
toText   = String -> Text
T.pack
instance FromText String  where fromText :: Text -> String
fromText = Text -> String
T.unpack
instance ToText   TL.Text where toText :: Text -> Text
toText   = Text -> Text
TL.toStrict
instance FromText TL.Text where fromText :: Text -> Text
fromText = Text -> Text
TL.fromStrict

instance DecodeText Maybe (UTF8 B.ByteString)  where decodeText :: UTF8 ByteString -> Maybe Text
decodeText = Either UnicodeException Text -> Maybe Text
forall a b. Either a b -> Maybe b
hush (Either UnicodeException Text -> Maybe Text)
-> (UTF8 ByteString -> Either UnicodeException Text)
-> UTF8 ByteString
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
T.decodeUtf8' (ByteString -> Either UnicodeException Text)
-> (UTF8 ByteString -> ByteString)
-> UTF8 ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UTF8 ByteString -> ByteString
forall a. UTF8 a -> a
unUTF8
instance FromText         (UTF8 B.ByteString)  where fromText :: Text -> UTF8 ByteString
fromText   = ByteString -> UTF8 ByteString
forall a. a -> UTF8 a
UTF8 (ByteString -> UTF8 ByteString)
-> (Text -> ByteString) -> Text -> UTF8 ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8
instance DecodeText Maybe (UTF8 BL.ByteString) where decodeText :: UTF8 ByteString -> Maybe Text
decodeText = Either UnicodeException Text -> Maybe Text
forall a b. Either a b -> Maybe b
hush (Either UnicodeException Text -> Maybe Text)
-> (UTF8 ByteString -> Either UnicodeException Text)
-> UTF8 ByteString
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text)
-> Either UnicodeException Text -> Either UnicodeException Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.toStrict (Either UnicodeException Text -> Either UnicodeException Text)
-> (UTF8 ByteString -> Either UnicodeException Text)
-> UTF8 ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
TL.decodeUtf8' (ByteString -> Either UnicodeException Text)
-> (UTF8 ByteString -> ByteString)
-> UTF8 ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UTF8 ByteString -> ByteString
forall a. UTF8 a -> a
unUTF8
instance FromText         (UTF8 BL.ByteString) where fromText :: Text -> UTF8 ByteString
fromText   = ByteString -> UTF8 ByteString
forall a. a -> UTF8 a
UTF8 (ByteString -> UTF8 ByteString)
-> (Text -> ByteString) -> Text -> UTF8 ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TL.encodeUtf8 (Text -> ByteString) -> (Text -> Text) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
TL.fromStrict

instance ToText (Base16 B.ByteString) where
  toText :: Base16 ByteString -> Text
toText = ByteString -> Text
T.decodeUtf8 (ByteString -> Text)
-> (Base16 ByteString -> ByteString) -> Base16 ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
Base16.encode (ByteString -> ByteString)
-> (Base16 ByteString -> ByteString)
-> Base16 ByteString
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Base16 ByteString -> ByteString
forall a. Base16 a -> a
unBase16
instance FromText (Maybe (Base16 B.ByteString)) where
#if MIN_VERSION_base16_bytestring(1,0,0)
  fromText :: Text -> Maybe (Base16 ByteString)
fromText Text
txt = case ByteString -> Either String ByteString
Base16.decode (Text -> ByteString
T.encodeUtf8 Text
txt) of
    Right ByteString
bs -> Base16 ByteString -> Maybe (Base16 ByteString)
forall a. a -> Maybe a
Just (Base16 ByteString -> Maybe (Base16 ByteString))
-> Base16 ByteString -> Maybe (Base16 ByteString)
forall a b. (a -> b) -> a -> b
$ ByteString -> Base16 ByteString
forall a. a -> Base16 a
Base16 ByteString
bs
    Left String
_   -> Maybe (Base16 ByteString)
forall a. Maybe a
Nothing
#else
  fromText txt = case Base16.decode (T.encodeUtf8 txt) of
    (bs, "") -> Just $ Base16 bs
    (_,  _)  -> Nothing
#endif

instance ToText (Base64 B.ByteString) where
  toText :: Base64 ByteString -> Text
toText = ByteString -> Text
T.decodeUtf8 (ByteString -> Text)
-> (Base64 ByteString -> ByteString) -> Base64 ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
Base64.encode (ByteString -> ByteString)
-> (Base64 ByteString -> ByteString)
-> Base64 ByteString
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Base64 ByteString -> ByteString
forall a. Base64 a -> a
unBase64
instance FromText (Maybe (Base64 B.ByteString)) where
  fromText :: Text -> Maybe (Base64 ByteString)
fromText = (ByteString -> Base64 ByteString)
-> Maybe ByteString -> Maybe (Base64 ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Base64 ByteString
forall a. a -> Base64 a
Base64 (Maybe ByteString -> Maybe (Base64 ByteString))
-> (Text -> Maybe ByteString) -> Text -> Maybe (Base64 ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either String ByteString -> Maybe ByteString
forall a b. Either a b -> Maybe b
hush (Either String ByteString -> Maybe ByteString)
-> (Text -> Either String ByteString) -> Text -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either String ByteString
Base64.decode (ByteString -> Either String ByteString)
-> (Text -> ByteString) -> Text -> Either String ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8

instance ToText (Base16 BL.ByteString) where
  toText :: Base16 ByteString -> Text
toText = Text -> Text
TL.toStrict (Text -> Text)
-> (Base16 ByteString -> Text) -> Base16 ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TL.decodeUtf8 (ByteString -> Text)
-> (Base16 ByteString -> ByteString) -> Base16 ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
Base16L.encode (ByteString -> ByteString)
-> (Base16 ByteString -> ByteString)
-> Base16 ByteString
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Base16 ByteString -> ByteString
forall a. Base16 a -> a
unBase16
instance FromText (Maybe (Base16 BL.ByteString)) where
#if MIN_VERSION_base16_bytestring(1,0,0)
  fromText :: Text -> Maybe (Base16 ByteString)
fromText Text
txt = case ByteString -> Either String ByteString
Base16L.decode (Text -> ByteString
TL.encodeUtf8 (Text -> ByteString) -> Text -> ByteString
forall a b. (a -> b) -> a -> b
$ Text -> Text
TL.fromStrict Text
txt) of
    Right ByteString
bs -> Base16 ByteString -> Maybe (Base16 ByteString)
forall a. a -> Maybe a
Just (Base16 ByteString -> Maybe (Base16 ByteString))
-> Base16 ByteString -> Maybe (Base16 ByteString)
forall a b. (a -> b) -> a -> b
$ ByteString -> Base16 ByteString
forall a. a -> Base16 a
Base16 ByteString
bs
    Left String
_   -> Maybe (Base16 ByteString)
forall a. Maybe a
Nothing
#else
  fromText txt = case Base16L.decode (TL.encodeUtf8 $ TL.fromStrict txt) of
    (bs, "") -> Just $ Base16 bs
    (_,  _)  -> Nothing
#endif

instance ToText (Base64 BL.ByteString) where
  toText :: Base64 ByteString -> Text
toText = Text -> Text
TL.toStrict (Text -> Text)
-> (Base64 ByteString -> Text) -> Base64 ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TL.decodeUtf8 (ByteString -> Text)
-> (Base64 ByteString -> ByteString) -> Base64 ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
Base64L.encode (ByteString -> ByteString)
-> (Base64 ByteString -> ByteString)
-> Base64 ByteString
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Base64 ByteString -> ByteString
forall a. Base64 a -> a
unBase64
instance FromText (Maybe (Base64 BL.ByteString)) where
  fromText :: Text -> Maybe (Base64 ByteString)
fromText = (ByteString -> Base64 ByteString)
-> Maybe ByteString -> Maybe (Base64 ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Base64 ByteString
forall a. a -> Base64 a
Base64 (Maybe ByteString -> Maybe (Base64 ByteString))
-> (Text -> Maybe ByteString) -> Text -> Maybe (Base64 ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either String ByteString -> Maybe ByteString
forall a b. Either a b -> Maybe b
hush (Either String ByteString -> Maybe ByteString)
-> (Text -> Either String ByteString) -> Text -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either String ByteString
Base64L.decode (ByteString -> Either String ByteString)
-> (Text -> ByteString) -> Text -> Either String ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TL.encodeUtf8 (Text -> ByteString) -> (Text -> Text) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
TL.fromStrict