---------------------------------------------------------------------
--
-- Module      :  Strings
-- Copyright   :
--
-- | a module to convert between character string encodings
--

-- would require systematic checks what are permited characters
-- (especially for input to urlEncoding)

-- the latin encoding is produced by show ...
-- t2u is nearly invertible...

-- strings remain here, to be used when constructing the wrappers for
-- functions used from other packages (with String interfaces)
----------------------------------------------------------------------
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE StandaloneDeriving
    , DeriveGeneric
    , DeriveAnyClass
    , TypeSynonymInstances
      #-}
{-# OPTIONS_GHC -fno-warn-missing-methods #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -w #-}

module Uniform.Strings.Conversion (
    ByteString, LazyByteString
    , s2b, b2s, b2t,   t2b, t2u,  s2u
    , s2t, t2s
    , t2tl, tl2t
    -- uses UTF8 as encoding in ByteString
    -- urlencode is always represented the same as the input
    , Text (..), BSUTF (..), URL (..)
    -- , URLform , b2uf, b2urlf, urlf2b,

    , b2bu, bu2b, bu2s, bu2t, t2bu, s2bu
    , u2b, u2t,  u2s, b2u
    , b2bl, bl2b -- lazy bytestring
    , bl2t, t2bl
    , bb2t, bb2s  -- conversion with error if not UTF8
    , s2latin, t2latin, latin2t, latin2s -- conversion to the latin1 encoding
    , BSlat (..), s2lat, lat2s, t2lat, lat2t
    , s3lat, t3lat, s3latin, t3latin
    , s2url, url2s,  unURL, t22latin
    , convertLatin, findNonLatinChars, findNonLatinCharsT
    , filterLatin
    , module Safe
    )   where
-- 
import           Safe (fromJustNote)
import GHC.Generics (Generic)
import Uniform.Zero (Zeros(zero) )

import Control.Monad (join)

import           Data.Text            (Text)
import qualified Data.Text            as T
import Data.Char (ord)
import Data.List (nub)
import           Data.ByteString      (ByteString)
import qualified Data.ByteString      as ByteString
import qualified Data.ByteString.Lazy as Lazy 
import Data.ByteString.Char8 (pack, unpack)

import           Data.Text.Encoding   (decodeUtf8, decodeUtf8', encodeUtf8)

import qualified Network.URI          as URI
-- import qualified Snap.Core            as SN

import qualified Data.Text.Lazy as LText  -- (toStrict, fromStrict)

bl2t :: LazyByteString ->Text
-- ^ conversion from LazyByteString to text (only if guarantee that only utf8 values)
bl2t :: LazyByteString -> Text
bl2t =    BSUTF -> Text
bu2t (BSUTF -> Text)
-> (LazyByteString -> BSUTF) -> LazyByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BSUTF
BSUTF (ByteString -> BSUTF)
-> (LazyByteString -> ByteString) -> LazyByteString -> BSUTF
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyByteString -> ByteString
bl2b

t2bl :: Text -> LazyByteString
t2bl :: Text -> LazyByteString
t2bl =   ByteString -> LazyByteString
b2bl (ByteString -> LazyByteString)
-> (Text -> ByteString) -> Text -> LazyByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
t2b


s2t :: String -> Text
-- ^ String to Text (invertable)
s2t :: String -> Text
s2t = String -> Text
T.pack

t2s :: Text -> String
-- ^ String to Text (invertable)
t2s :: Text -> String
t2s = Text -> String
T.unpack

tl2t :: LText.Text -> Text
tl2t :: Text -> Text
tl2t = Text -> Text
LText.toStrict

t2tl :: Text -> LText.Text
t2tl :: Text -> Text
t2tl = Text -> Text
LText.fromStrict

type LazyByteString = Lazy.ByteString

instance Zeros ByteString where zero :: ByteString
zero = Text -> ByteString
t2b Text
""
instance Zeros LazyByteString where zero :: LazyByteString
zero = ByteString -> LazyByteString
b2bl ByteString
forall z. Zeros z => z
zero

newtype BSUTF = BSUTF ByteString
    deriving (Int -> BSUTF -> ShowS
[BSUTF] -> ShowS
BSUTF -> String
(Int -> BSUTF -> ShowS)
-> (BSUTF -> String) -> ([BSUTF] -> ShowS) -> Show BSUTF
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BSUTF] -> ShowS
$cshowList :: [BSUTF] -> ShowS
show :: BSUTF -> String
$cshow :: BSUTF -> String
showsPrec :: Int -> BSUTF -> ShowS
$cshowsPrec :: Int -> BSUTF -> ShowS
Show, ReadPrec [BSUTF]
ReadPrec BSUTF
Int -> ReadS BSUTF
ReadS [BSUTF]
(Int -> ReadS BSUTF)
-> ReadS [BSUTF]
-> ReadPrec BSUTF
-> ReadPrec [BSUTF]
-> Read BSUTF
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [BSUTF]
$creadListPrec :: ReadPrec [BSUTF]
readPrec :: ReadPrec BSUTF
$creadPrec :: ReadPrec BSUTF
readList :: ReadS [BSUTF]
$creadList :: ReadS [BSUTF]
readsPrec :: Int -> ReadS BSUTF
$creadsPrec :: Int -> ReadS BSUTF
Read, BSUTF -> BSUTF -> Bool
(BSUTF -> BSUTF -> Bool) -> (BSUTF -> BSUTF -> Bool) -> Eq BSUTF
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BSUTF -> BSUTF -> Bool
$c/= :: BSUTF -> BSUTF -> Bool
== :: BSUTF -> BSUTF -> Bool
$c== :: BSUTF -> BSUTF -> Bool
Eq, Eq BSUTF
Eq BSUTF
-> (BSUTF -> BSUTF -> Ordering)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> BSUTF)
-> (BSUTF -> BSUTF -> BSUTF)
-> Ord BSUTF
BSUTF -> BSUTF -> Bool
BSUTF -> BSUTF -> Ordering
BSUTF -> BSUTF -> BSUTF
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BSUTF -> BSUTF -> BSUTF
$cmin :: BSUTF -> BSUTF -> BSUTF
max :: BSUTF -> BSUTF -> BSUTF
$cmax :: BSUTF -> BSUTF -> BSUTF
>= :: BSUTF -> BSUTF -> Bool
$c>= :: BSUTF -> BSUTF -> Bool
> :: BSUTF -> BSUTF -> Bool
$c> :: BSUTF -> BSUTF -> Bool
<= :: BSUTF -> BSUTF -> Bool
$c<= :: BSUTF -> BSUTF -> Bool
< :: BSUTF -> BSUTF -> Bool
$c< :: BSUTF -> BSUTF -> Bool
compare :: BSUTF -> BSUTF -> Ordering
$ccompare :: BSUTF -> BSUTF -> Ordering
$cp1Ord :: Eq BSUTF
Ord, (forall x. BSUTF -> Rep BSUTF x)
-> (forall x. Rep BSUTF x -> BSUTF) -> Generic BSUTF
forall x. Rep BSUTF x -> BSUTF
forall x. BSUTF -> Rep BSUTF x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BSUTF x -> BSUTF
$cfrom :: forall x. BSUTF -> Rep BSUTF x
Generic, BSUTF
Eq BSUTF => BSUTF -> Bool
BSUTF
-> (Eq BSUTF => BSUTF -> Bool)
-> (Eq BSUTF => BSUTF -> Bool)
-> Zeros BSUTF
forall z.
z -> (Eq z => z -> Bool) -> (Eq z => z -> Bool) -> Zeros z
notZero :: BSUTF -> Bool
$cnotZero :: Eq BSUTF => BSUTF -> Bool
isZero :: BSUTF -> Bool
$cisZero :: Eq BSUTF => BSUTF -> Bool
zero :: BSUTF
$czero :: BSUTF
Zeros, Addr#
Addr# -> BSUTF -> BSUTF -> BSUTF
NonEmpty BSUTF -> BSUTF
BSUTF -> BSUTF -> BSUTF
(BSUTF -> BSUTF -> BSUTF)
-> (NonEmpty BSUTF -> BSUTF)
-> (forall b. Integral b => b -> BSUTF -> BSUTF)
-> Semigroup BSUTF
forall b. Integral b => b -> BSUTF -> BSUTF
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
forall a. Addr# -> a
noMethodBindingError :: forall a. Addr# -> a
stimes :: b -> BSUTF -> BSUTF
$cstimes :: forall b. Integral b => b -> BSUTF -> BSUTF
sconcat :: NonEmpty BSUTF -> BSUTF
$csconcat :: NonEmpty BSUTF -> BSUTF
$c<> :: BSUTF -> BSUTF -> BSUTF
Semigroup, Semigroup BSUTF
Addr#
BSUTF
Semigroup BSUTF
-> BSUTF
-> (BSUTF -> BSUTF -> BSUTF)
-> ([BSUTF] -> BSUTF)
-> Monoid BSUTF
Addr# -> BSUTF
[BSUTF] -> BSUTF
BSUTF -> BSUTF -> BSUTF
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
forall a. Addr# -> a
noMethodBindingError :: forall a. Addr# -> a
mconcat :: [BSUTF] -> BSUTF
$cmconcat :: [BSUTF] -> BSUTF
mappend :: BSUTF -> BSUTF -> BSUTF
$cmappend :: BSUTF -> BSUTF -> BSUTF
$cmempty :: BSUTF
$cp1Monoid :: Semigroup BSUTF
Monoid)

unBSUTF :: BSUTF -> ByteString
unBSUTF :: BSUTF -> ByteString
unBSUTF (BSUTF ByteString
a) = ByteString
a


t2bu :: Text ->  BSUTF
-- ^ Text to Bytestring (invertable)
t2bu :: Text -> BSUTF
t2bu = ByteString -> BSUTF
BSUTF (ByteString -> BSUTF) -> (Text -> ByteString) -> Text -> BSUTF
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8

bu2t ::  BSUTF -> Text
-- ^ ByteString to Text --  inverse (not an arbitrary input)
bu2t :: BSUTF -> Text
bu2t = ByteString -> Text
decodeUtf8 (ByteString -> Text) -> (BSUTF -> ByteString) -> BSUTF -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSUTF -> ByteString
unBSUTF


-- conversion ByteString BSUTF
b2bu :: ByteString -> Maybe BSUTF
b2bu :: ByteString -> Maybe BSUTF
b2bu ByteString
a = if ByteString -> Bool
testByteStringUtf8 ByteString
a then BSUTF -> Maybe BSUTF
forall a. a -> Maybe a
Just (ByteString -> BSUTF
BSUTF ByteString
a) else Maybe BSUTF
forall a. Maybe a
Nothing

bu2b :: BSUTF -> ByteString
bu2b :: BSUTF -> ByteString
bu2b = BSUTF -> ByteString
unBSUTF

bu2s :: BSUTF -> String
bu2s :: BSUTF -> String
bu2s = Text -> String
t2s (Text -> String) -> (BSUTF -> Text) -> BSUTF -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSUTF -> Text
bu2t

testByteStringUtf8 :: ByteString -> Bool
-- ^ test whether a byte string is valid utf8 encoded
-- used for avoiding problems with the quickcheck conversions
testByteStringUtf8 :: ByteString -> Bool
testByteStringUtf8 ByteString
b =
    case ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
b of
                -- :: ByteString -> Either UnicodeException Text
                    Left UnicodeException
s  -> Bool
False
                    Right Text
t -> Bool
True

t2b :: Text -> ByteString
t2b :: Text -> ByteString
t2b = BSUTF -> ByteString
bu2b (BSUTF -> ByteString) -> (Text -> BSUTF) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> BSUTF
t2bu

b2t :: ByteString -> Maybe Text
b2t :: ByteString -> Maybe Text
b2t = (BSUTF -> Text) -> Maybe BSUTF -> Maybe Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap BSUTF -> Text
bu2t (Maybe BSUTF -> Maybe Text)
-> (ByteString -> Maybe BSUTF) -> ByteString -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe BSUTF
b2bu

bb2s :: ByteString -> String
-- converts and stops with error when not UTF8
bb2s :: ByteString -> String
bb2s ByteString
s = String -> Maybe String -> String
forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"bb2s - bytestring to string conversion: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
s
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" was not a utf8") (Maybe String -> String)
-> (ByteString -> Maybe String) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ ByteString
s

bb2t :: ByteString -> Text
-- converts and stopw with error when not UTF8
bb2t :: ByteString -> Text
bb2t ByteString
s = String -> Maybe Text -> Text
forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"bb2s - bytestring to text conversion: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
s
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" was not a utf8") (Maybe Text -> Text)
-> (ByteString -> Maybe Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text
b2t (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ ByteString
s
-- bytestring -- string (just a composition of t2s . b2t and reverse)
s2bu :: String ->  BSUTF
-- ^ String to Bytestring (invertable)
s2bu :: String -> BSUTF
s2bu = ByteString -> BSUTF
BSUTF (ByteString -> BSUTF) -> (String -> ByteString) -> String -> BSUTF
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8 (Text -> ByteString) -> (String -> Text) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
s2t

--bu2s ::  BSUTF -> String
---- ^ ByteString to String -- not inverse (not any arbitrary input)
--bu2s = t2s . decodeUtf8 . unBSUTF

s2b :: String -> ByteString
s2b :: String -> ByteString
s2b = Text -> ByteString
t2b (Text -> ByteString) -> (String -> Text) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
s2t

b2bl :: ByteString -> Lazy.ByteString
b2bl :: ByteString -> LazyByteString
b2bl = ByteString -> LazyByteString
Lazy.fromStrict

bl2b ::  Lazy.ByteString -> ByteString
bl2b :: LazyByteString -> ByteString
bl2b = LazyByteString -> ByteString
Lazy.toStrict

b2s :: ByteString -> Maybe String
b2s :: ByteString -> Maybe String
b2s = (Text -> String) -> Maybe Text -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> String
t2s (Maybe Text -> Maybe String)
-> (ByteString -> Maybe Text) -> ByteString -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text
b2t


newtype URL = URL String deriving (Int -> URL -> ShowS
[URL] -> ShowS
URL -> String
(Int -> URL -> ShowS)
-> (URL -> String) -> ([URL] -> ShowS) -> Show URL
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [URL] -> ShowS
$cshowList :: [URL] -> ShowS
show :: URL -> String
$cshow :: URL -> String
showsPrec :: Int -> URL -> ShowS
$cshowsPrec :: Int -> URL -> ShowS
Show, URL -> URL -> Bool
(URL -> URL -> Bool) -> (URL -> URL -> Bool) -> Eq URL
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: URL -> URL -> Bool
$c/= :: URL -> URL -> Bool
== :: URL -> URL -> Bool
$c== :: URL -> URL -> Bool
Eq)
instance Zeros URL where zero :: URL
zero = String -> URL
URL String
forall z. Zeros z => z
zero

unURL :: URL -> String
unURL :: URL -> String
unURL (URL String
t) = String
t


s2url :: String -> URL
-- ^ convert string to url   (uses code from Network.HTTP, which converts space into %20)
s2url :: String -> URL
s2url =   String -> URL
URL (String -> URL) -> ShowS -> String -> URL
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
URI.escapeURIString Char -> Bool
URI.isUnescapedInURIComponent
--s2url =   URL . HTTP.urlEncode

url2s :: URL -> String
-- ^ convert url to string   (uses code from Network.HTTP, which converts space into %20)
url2s :: URL -> String
url2s  =   ShowS
URI.unEscapeString ShowS -> (URL -> String) -> URL -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> String
unURL

testUrlEncodingURI :: String -> Bool
testUrlEncodingURI :: String -> Bool
testUrlEncodingURI String
a = String
a String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== (URL -> String
unURL (URL -> String) -> (String -> URL) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
s2url (String -> URL) -> ShowS -> String -> URL
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> String
url2s (URL -> String) -> (String -> URL) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
URL ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
a)

url2u :: URL -> String
url2u :: URL -> String
url2u = URL -> String
unURL
u2url :: String -> Maybe URL
u2url :: String -> Maybe URL
u2url String
a = if String -> Bool
testUrlEncodingURI String
a then URL -> Maybe URL
forall a. a -> Maybe a
Just (String -> URL
URL String
a) else Maybe URL
forall a. Maybe a
Nothing


s2u :: String -> String
-- ^ convert string to url   (uses code from Network.HTTP, which converts space into %20)
s2u :: ShowS
s2u = URL -> String
url2u (URL -> String) -> (String -> URL) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
s2url

u2s :: String -> Maybe String     --not inverse
-- ^ convert url to string   (uses code from Network.HTTP, which converts space into %20)
u2s :: String -> Maybe String
u2s  =   (URL -> String) -> Maybe URL -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URL -> String
url2s (Maybe URL -> Maybe String)
-> (String -> Maybe URL) -> String -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe URL
u2url


-- case for encoding of form content (with + for space)
-- removed for 9.2.1 
-- newtype URLform = URLform ByteString deriving (Show, Eq)
-- unURLform :: URLform -> ByteString
-- unURLform (URLform t) = t


-- b2urlf :: ByteString -> URLform
-- -- ^ convert string to url   (uses code from SNAP, which converts space into +)
-- b2urlf =   URLform . SN.urlEncode

-- urlf2b :: URLform -> ByteString
-- -- ^ convert url to string   (uses code from SNAP, which converts space into +)
-- urlf2b = fromJustNote "urlf2b nothing" . SN.urlDecode . unURLform


-- testUrlEncodingSNAP :: ByteString -> Bool
-- testUrlEncodingSNAP a =  maybe False ((a ==). SN.urlEncode) . SN.urlDecode $ a
 
-- urlf2u :: URLform -> ByteString
-- urlf2u = unURLform
-- u2urlf :: ByteString -> Maybe URLform
-- u2urlf a = if testUrlEncodingSNAP a then Just (URLform a) else Nothing
-- -- this test allows control in url encoded strings ...


-- b2uf :: ByteString -> ByteString
-- -- ^ convert ByteString to url   (uses code from SNAP which converts space into +)
-- b2uf = urlf2u . b2urlf

-- uf2b :: ByteString -> Maybe ByteString     --not inverse
-- -- ^ convert url to ByteString   (uses code from SNAP, which converts space into +)
-- uf2b  =   fmap urlf2b . u2urlf


t2u :: Text -> Text
t2u :: Text -> Text
t2u = String -> Text
s2t (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
s2u ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s
u2t :: Text -> Maybe Text
u2t :: Text -> Maybe Text
u2t = (String -> Text) -> Maybe String -> Maybe Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Text
s2t (Maybe String -> Maybe Text)
-> (Text -> Maybe String) -> Text -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
u2s (String -> Maybe String)
-> (Text -> String) -> Text -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s

b2u :: ByteString -> Maybe ByteString
b2u :: ByteString -> Maybe ByteString
b2u = (String -> ByteString) -> Maybe String -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> ByteString
s2b (String -> ByteString) -> ShowS -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
s2u) (Maybe String -> Maybe ByteString)
-> (ByteString -> Maybe String) -> ByteString -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s
u2b :: ByteString -> Maybe ByteString
u2b :: ByteString -> Maybe ByteString
u2b = (String -> ByteString) -> Maybe String -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> ByteString
s2b (Maybe String -> Maybe ByteString)
-> (ByteString -> Maybe String) -> ByteString -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe (Maybe String) -> Maybe String
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join  (Maybe (Maybe String) -> Maybe String)
-> (ByteString -> Maybe (Maybe String))
-> ByteString
-> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Maybe String) -> Maybe String -> Maybe (Maybe String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Maybe String
u2s (Maybe String -> Maybe (Maybe String))
-> (ByteString -> Maybe String)
-> ByteString
-> Maybe (Maybe String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s


-- | bytestring with latin1 encoded characters
newtype BSlat = BSlat ByteString deriving (Int -> BSlat -> ShowS
[BSlat] -> ShowS
BSlat -> String
(Int -> BSlat -> ShowS)
-> (BSlat -> String) -> ([BSlat] -> ShowS) -> Show BSlat
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BSlat] -> ShowS
$cshowList :: [BSlat] -> ShowS
show :: BSlat -> String
$cshow :: BSlat -> String
showsPrec :: Int -> BSlat -> ShowS
$cshowsPrec :: Int -> BSlat -> ShowS
Show, BSlat -> BSlat -> Bool
(BSlat -> BSlat -> Bool) -> (BSlat -> BSlat -> Bool) -> Eq BSlat
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BSlat -> BSlat -> Bool
$c/= :: BSlat -> BSlat -> Bool
== :: BSlat -> BSlat -> Bool
$c== :: BSlat -> BSlat -> Bool
Eq)
unBSlat :: BSlat -> ByteString
unBSlat :: BSlat -> ByteString
unBSlat (BSlat ByteString
a) = ByteString
a


lat2s :: BSlat -> String
-- ^ bytestring with latin encoding to string
lat2s :: BSlat -> String
lat2s = ByteString -> String
latin2s (ByteString -> String) -> (BSlat -> ByteString) -> BSlat -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSlat -> ByteString
unBSlat

s2lat :: String -> Maybe BSlat   -- is this always possible ?
-- ^ string encoded as ByteString with latin encoding, if possible
s2lat :: String -> Maybe BSlat
s2lat =  (ByteString -> BSlat) -> Maybe ByteString -> Maybe BSlat
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BSlat
BSlat (Maybe ByteString -> Maybe BSlat)
-> (String -> Maybe ByteString) -> String -> Maybe BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe ByteString
s22latin

s3lat :: String ->  BSlat   -- is this always possible ?
-- ^ string converted to represenatable as latin and then encoded
-- lossy!
s3lat :: String -> BSlat
s3lat   =  ByteString -> BSlat
BSlat (ByteString -> BSlat) -> (String -> ByteString) -> String -> BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
s3latin

lat2t :: BSlat -> Text
-- ^ Text encoded as ByteString with latin encoding, if possible
lat2t :: BSlat -> Text
lat2t = ByteString -> Text
latin2t (ByteString -> Text) -> (BSlat -> ByteString) -> BSlat -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSlat -> ByteString
unBSlat

t2lat :: Text -> Maybe BSlat   -- is this always possible
-- ^ Text encoded as ByteString with latin encoding, if possible
t2lat :: Text -> Maybe BSlat
t2lat = (ByteString -> BSlat) -> Maybe ByteString -> Maybe BSlat
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BSlat
BSlat (Maybe ByteString -> Maybe BSlat)
-> (Text -> Maybe ByteString) -> Text -> Maybe BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe ByteString
t22latin

t3lat :: Text -> BSlat   -- is this always possible
-- ^ Text converted to represenatable as latin and then encoded
-- lossy!
t3lat :: Text -> BSlat
t3lat =  ByteString -> BSlat
BSlat (ByteString -> BSlat) -> (Text -> ByteString) -> Text -> BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
t3latin

latin2s :: ByteString -> String
    --    | works always, but produces unexpected results if bytestring is not latin encoded
latin2s :: ByteString -> String
latin2s = ByteString -> String
Data.ByteString.Char8.unpack
--
s2latin :: String ->  ByteString
        --  | works always, but produces unexpected results if bytestring is not latin encoded
s2latin :: String -> ByteString
s2latin =  String -> ByteString
Data.ByteString.Char8.pack

s22latin :: String -> Maybe ByteString
s22latin :: String -> Maybe ByteString
s22latin String
s = if (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all  ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<Int
256) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) String
s  then  ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString)
-> (String -> ByteString) -> String -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.  String -> ByteString
s2latin  (String -> Maybe ByteString) -> String -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ String
s else Maybe ByteString
forall a. Maybe a
Nothing   -- Data.ByteString.Char8.pack . T.unpack

s3latin :: String ->  ByteString
s3latin :: String -> ByteString
s3latin =   String -> ByteString
s2latin  (String -> ByteString) -> ShowS -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
convertLatin

filterLatin :: String -> String
filterLatin :: ShowS
filterLatin = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<Int
256)(Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord )

convertLatin :: String -> String
-- ^ convert a string to contain only characters in latin1
convertLatin :: ShowS
convertLatin = (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
conv2latinChar

conv2latinChar :: Char -> Char
-- ^ convert character not in the latin1 encoding (intelligently treating quotes and double quotes)
-- possibly other cases later added
conv2latinChar :: Char -> Char
conv2latinChar Char
c = if Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
256 then Char
c else
    case Char
c of
        Char
'\x201C' -> Char
'"'
        Char
'\x201D' -> Char
'"'
        Char
'\x201E' -> Char
'"'
        Char
'\8212' -> Char
'-'    -- em dash
        Char
'\8222' -> Char
'"'    -- lower quote
        Char
'\8216' -> Char
'\''    --  left single quote
        Char
'\8217' -> Char
'\''    -- right single quote
        Char
'\8218' -> Char
'\''    --  quote
        Char
'\8221' -> Char
'"'    -- unclear why 8221 but is quote
--        '\x2018' -> '\''   -- same as 8216
--        '\x2019' -> '\''  -- same as 8217

        Char
_ -> Char
c -- '\SUB'    -- could be another char ? \SUB

findNonLatinChars :: String -> String
-- ^ the result is a string of all the characters not in the latin1 encoding
-- possibly apply conv2latinChar first
findNonLatinChars :: ShowS
findNonLatinChars = ShowS
forall a. Eq a => [a] -> [a]
nub ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
256)(Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord )
--            (\c -> conv2latinChar c == '\SUB')

findNonLatinCharsT :: Text -> Text
-- ^ the result is a string of all the characters not in the latin1 encoding
findNonLatinCharsT :: Text -> Text
findNonLatinCharsT = String -> Text
s2t (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
findNonLatinChars ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s



latin2t :: ByteString -> Text
latin2t :: ByteString -> Text
latin2t = String -> Text
s2t (String -> Text) -> (ByteString -> String) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
latin2s  -- T.pack .  Data.ByteString.Char8.unpack

t2latin :: Text ->  ByteString
-- text to bytestring -  works always, but produces unexpected results if bytestring is not latin encoded
t2latin :: Text -> ByteString
t2latin   = String -> ByteString
s2latin (String -> ByteString) -> (Text -> String) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s  -- Data.ByteString.Char8.pack . T.unpack

t22latin :: Text -> Maybe ByteString
-- ^ converts text to bytestring, if meaningful
t22latin :: Text -> Maybe ByteString
t22latin   = String -> Maybe ByteString
s22latin (String -> Maybe ByteString)
-> (Text -> String) -> Text -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s    -- Data.ByteString.Char8.pack . T.unpack
--t22latin t = if all  ((<256) . ord) (t2s t) then  Just .  s2latin . t2s $ t else Nothing   -- Data.ByteString.Char8.pack . T.unpack

t3latin :: Text ->  ByteString
-- text to bytestring - meaningful, but converted -- lossy!
t3latin :: Text -> ByteString
t3latin   = String -> ByteString
s3latin (String -> ByteString) -> (Text -> String) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s  -- Data.ByteString.Char8.pack . T.unpack
--

putIOwords :: [Text] -> IO ()
putIOwords :: [Text] -> IO ()
putIOwords = String -> IO ()
putStrLn (String -> IO ()) -> ([Text] -> String) -> [Text] -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlines ([String] -> String) -> ([Text] -> [String]) -> [Text] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> String) -> [Text] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Text -> String
t2s

-- chars :: [GHC.Word.Word8]
chars :: [Word8]
chars = [Word8
198, Word8
216, Word8
197, Word8
206, Word8
219,Word8
140,Word8
252,Word8
202, Word8
419, Word8
420, Word8
1937 ]
difficultBString :: ByteString
difficultBString = [Word8] -> ByteString
ByteString.pack [Word8]
chars
difficultTString :: String
difficultTString = String
"\198\216\197\206\219\140\252\202\419\420\1937"