---------------------------------------------------------------------
--
-- Module      :  Strings
-- Copyright   :
--
-- | a module to convert between character string encodings
--

-- would require systematic checks what are permited characters
-- (especially for input to urlEncoding)

-- the latin encoding is produced by show ...
-- t2u is nearly invertible...

-- strings remain here, to be used when constructing the wrappers for
-- functions used from other packages (with String interfaces)
----------------------------------------------------------------------
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE StandaloneDeriving
    , DeriveGeneric
    , DeriveAnyClass
    , TypeSynonymInstances
      #-}
{-# OPTIONS_GHC -fno-warn-missing-methods #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -w #-}

module Uniform.Strings.Conversion (
    ByteString, LazyByteString
    , s2b, b2s, b2t,   t2b, t2u,  s2u
    , s2t, t2s
    , t2tl, tl2t
    -- uses UTF8 as encoding in ByteString
    -- urlencode is always represented the same as the input
    , Text (..), BSUTF (..), URL (..)
    , URLform , b2uf, b2urlf, urlf2b

    , b2bu, bu2b, bu2s, bu2t, t2bu, s2bu
    , u2b, u2t,  u2s, b2u
    , b2bl, bl2b -- lazy bytestring
    , bl2t, t2bl
    , bb2t, bb2s  -- conversion with error if not UTF8
    , s2latin, t2latin, latin2t, latin2s -- conversion to the latin1 encoding
    , BSlat (..), s2lat, lat2s, t2lat, lat2t
    , s3lat, t3lat, s3latin, t3latin
    , s2url, url2s,  unURL, t22latin
    , convertLatin, findNonLatinChars, findNonLatinCharsT
    , filterLatin
    , module Safe
    )   where
-- 
import           Safe (fromJustNote)
import GHC.Generics (Generic)
import Uniform.Zero (Zeros(zero) )

import Control.Monad (join)

import           Data.Text            (Text)
import qualified Data.Text            as T
import Data.Char (ord)
import Data.List (nub)
import           Data.ByteString      (ByteString)
import qualified Data.ByteString      as ByteString
import qualified Data.ByteString.Lazy as Lazy 
import Data.ByteString.Char8 (pack, unpack)

import           Data.Text.Encoding   (decodeUtf8, decodeUtf8', encodeUtf8)

import qualified Network.URI          as URI
import qualified Snap.Core            as SN

import qualified Data.Text.Lazy as LText  -- (toStrict, fromStrict)

bl2t :: LazyByteString ->Text
-- ^ conversion from LazyByteString to text (only if guarantee that only utf8 values)
bl2t :: LazyByteString -> Text
bl2t =    BSUTF -> Text
bu2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BSUTF
BSUTF forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyByteString -> ByteString
bl2b

t2bl :: Text -> LazyByteString
t2bl :: Text -> LazyByteString
t2bl =   ByteString -> LazyByteString
b2bl forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
t2b


s2t :: String -> Text
-- ^ String to Text (invertable)
s2t :: String -> Text
s2t = String -> Text
T.pack

t2s :: Text -> String
-- ^ String to Text (invertable)
t2s :: Text -> String
t2s = Text -> String
T.unpack

tl2t :: LText.Text -> Text
tl2t :: Text -> Text
tl2t = Text -> Text
LText.toStrict

t2tl :: Text -> LText.Text
t2tl :: Text -> Text
t2tl = Text -> Text
LText.fromStrict

type LazyByteString = Lazy.ByteString

instance Zeros ByteString where zero :: ByteString
zero = Text -> ByteString
t2b Text
""
instance Zeros LazyByteString where zero :: LazyByteString
zero = ByteString -> LazyByteString
b2bl forall z. Zeros z => z
zero

newtype BSUTF = BSUTF ByteString
    deriving (Int -> BSUTF -> ShowS
[BSUTF] -> ShowS
BSUTF -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BSUTF] -> ShowS
$cshowList :: [BSUTF] -> ShowS
show :: BSUTF -> String
$cshow :: BSUTF -> String
showsPrec :: Int -> BSUTF -> ShowS
$cshowsPrec :: Int -> BSUTF -> ShowS
Show, ReadPrec [BSUTF]
ReadPrec BSUTF
Int -> ReadS BSUTF
ReadS [BSUTF]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [BSUTF]
$creadListPrec :: ReadPrec [BSUTF]
readPrec :: ReadPrec BSUTF
$creadPrec :: ReadPrec BSUTF
readList :: ReadS [BSUTF]
$creadList :: ReadS [BSUTF]
readsPrec :: Int -> ReadS BSUTF
$creadsPrec :: Int -> ReadS BSUTF
Read, BSUTF -> BSUTF -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BSUTF -> BSUTF -> Bool
$c/= :: BSUTF -> BSUTF -> Bool
== :: BSUTF -> BSUTF -> Bool
$c== :: BSUTF -> BSUTF -> Bool
Eq, Eq BSUTF
BSUTF -> BSUTF -> Bool
BSUTF -> BSUTF -> Ordering
BSUTF -> BSUTF -> BSUTF
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BSUTF -> BSUTF -> BSUTF
$cmin :: BSUTF -> BSUTF -> BSUTF
max :: BSUTF -> BSUTF -> BSUTF
$cmax :: BSUTF -> BSUTF -> BSUTF
>= :: BSUTF -> BSUTF -> Bool
$c>= :: BSUTF -> BSUTF -> Bool
> :: BSUTF -> BSUTF -> Bool
$c> :: BSUTF -> BSUTF -> Bool
<= :: BSUTF -> BSUTF -> Bool
$c<= :: BSUTF -> BSUTF -> Bool
< :: BSUTF -> BSUTF -> Bool
$c< :: BSUTF -> BSUTF -> Bool
compare :: BSUTF -> BSUTF -> Ordering
$ccompare :: BSUTF -> BSUTF -> Ordering
Ord, forall x. Rep BSUTF x -> BSUTF
forall x. BSUTF -> Rep BSUTF x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BSUTF x -> BSUTF
$cfrom :: forall x. BSUTF -> Rep BSUTF x
Generic, BSUTF
Eq BSUTF => BSUTF -> Bool
forall z.
z -> (Eq z => z -> Bool) -> (Eq z => z -> Bool) -> Zeros z
notZero :: Eq BSUTF => BSUTF -> Bool
$cnotZero :: Eq BSUTF => BSUTF -> Bool
isZero :: Eq BSUTF => BSUTF -> Bool
$cisZero :: Eq BSUTF => BSUTF -> Bool
zero :: BSUTF
$czero :: BSUTF
Zeros, Addr#
NonEmpty BSUTF -> BSUTF
BSUTF -> BSUTF -> BSUTF
forall b. Integral b => b -> BSUTF -> BSUTF
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
forall a. Addr# -> a
noMethodBindingError :: forall a. Addr# -> a
stimes :: forall b. Integral b => b -> BSUTF -> BSUTF
$cstimes :: forall b. Integral b => b -> BSUTF -> BSUTF
sconcat :: NonEmpty BSUTF -> BSUTF
$csconcat :: NonEmpty BSUTF -> BSUTF
$c<> :: BSUTF -> BSUTF -> BSUTF
Semigroup, Semigroup BSUTF
Addr#
BSUTF
[BSUTF] -> BSUTF
BSUTF -> BSUTF -> BSUTF
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
forall a. Addr# -> a
noMethodBindingError :: forall a. Addr# -> a
mconcat :: [BSUTF] -> BSUTF
$cmconcat :: [BSUTF] -> BSUTF
mappend :: BSUTF -> BSUTF -> BSUTF
$cmappend :: BSUTF -> BSUTF -> BSUTF
$cmempty :: BSUTF
Monoid)

unBSUTF :: BSUTF -> ByteString
unBSUTF :: BSUTF -> ByteString
unBSUTF (BSUTF ByteString
a) = ByteString
a


t2bu :: Text ->  BSUTF
-- ^ Text to Bytestring (invertable)
t2bu :: Text -> BSUTF
t2bu = ByteString -> BSUTF
BSUTF forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8

bu2t ::  BSUTF -> Text
-- ^ ByteString to Text --  inverse (not an arbitrary input)
bu2t :: BSUTF -> Text
bu2t = ByteString -> Text
decodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSUTF -> ByteString
unBSUTF


-- conversion ByteString BSUTF
b2bu :: ByteString -> Maybe BSUTF
b2bu :: ByteString -> Maybe BSUTF
b2bu ByteString
a = if ByteString -> Bool
testByteStringUtf8 ByteString
a then forall a. a -> Maybe a
Just (ByteString -> BSUTF
BSUTF ByteString
a) else forall a. Maybe a
Nothing

bu2b :: BSUTF -> ByteString
bu2b :: BSUTF -> ByteString
bu2b = BSUTF -> ByteString
unBSUTF

bu2s :: BSUTF -> String
bu2s :: BSUTF -> String
bu2s = Text -> String
t2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSUTF -> Text
bu2t

testByteStringUtf8 :: ByteString -> Bool
-- ^ test whether a byte string is valid utf8 encoded
-- used for avoiding problems with the quickcheck conversions
testByteStringUtf8 :: ByteString -> Bool
testByteStringUtf8 ByteString
b =
    case ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
b of
                -- :: ByteString -> Either UnicodeException Text
                    Left UnicodeException
s  -> Bool
False
                    Right Text
t -> Bool
True

t2b :: Text -> ByteString
t2b :: Text -> ByteString
t2b = BSUTF -> ByteString
bu2b forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> BSUTF
t2bu

b2t :: ByteString -> Maybe Text
b2t :: ByteString -> Maybe Text
b2t = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap BSUTF -> Text
bu2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe BSUTF
b2bu

bb2s :: ByteString -> String
-- converts and stops with error when not UTF8
bb2s :: ByteString -> String
bb2s ByteString
s = forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"bb2s - bytestring to string conversion: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show ByteString
s
        forall a. [a] -> [a] -> [a]
++ String
" was not a utf8") forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s forall a b. (a -> b) -> a -> b
$ ByteString
s

bb2t :: ByteString -> Text
-- converts and stopw with error when not UTF8
bb2t :: ByteString -> Text
bb2t ByteString
s = forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"bb2s - bytestring to text conversion: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show ByteString
s
        forall a. [a] -> [a] -> [a]
++ String
" was not a utf8") forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text
b2t forall a b. (a -> b) -> a -> b
$ ByteString
s
-- bytestring -- string (just a composition of t2s . b2t and reverse)
s2bu :: String ->  BSUTF
-- ^ String to Bytestring (invertable)
s2bu :: String -> BSUTF
s2bu = ByteString -> BSUTF
BSUTF forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
s2t

--bu2s ::  BSUTF -> String
---- ^ ByteString to String -- not inverse (not any arbitrary input)
--bu2s = t2s . decodeUtf8 . unBSUTF

s2b :: String -> ByteString
s2b :: String -> ByteString
s2b = Text -> ByteString
t2b forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
s2t

b2bl :: ByteString -> Lazy.ByteString
b2bl :: ByteString -> LazyByteString
b2bl = ByteString -> LazyByteString
Lazy.fromStrict

bl2b ::  Lazy.ByteString -> ByteString
bl2b :: LazyByteString -> ByteString
bl2b = LazyByteString -> ByteString
Lazy.toStrict

b2s :: ByteString -> Maybe String
b2s :: ByteString -> Maybe String
b2s = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> String
t2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text
b2t


newtype URL = URL String deriving (Int -> URL -> ShowS
[URL] -> ShowS
URL -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [URL] -> ShowS
$cshowList :: [URL] -> ShowS
show :: URL -> String
$cshow :: URL -> String
showsPrec :: Int -> URL -> ShowS
$cshowsPrec :: Int -> URL -> ShowS
Show, URL -> URL -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: URL -> URL -> Bool
$c/= :: URL -> URL -> Bool
== :: URL -> URL -> Bool
$c== :: URL -> URL -> Bool
Eq)
instance Zeros URL where zero :: URL
zero = String -> URL
URL forall z. Zeros z => z
zero

unURL :: URL -> String
unURL :: URL -> String
unURL (URL String
t) = String
t


s2url :: String -> URL
-- ^ convert string to url   (uses code from Network.HTTP, which converts space into %20)
s2url :: String -> URL
s2url =   String -> URL
URL forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
URI.escapeURIString Char -> Bool
URI.isUnescapedInURIComponent
--s2url =   URL . HTTP.urlEncode

url2s :: URL -> String
-- ^ convert url to string   (uses code from Network.HTTP, which converts space into %20)
url2s :: URL -> String
url2s  =   ShowS
URI.unEscapeString forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> String
unURL

testUrlEncodingURI :: String -> Bool
testUrlEncodingURI :: String -> Bool
testUrlEncodingURI String
a = String
a forall a. Eq a => a -> a -> Bool
== (URL -> String
unURL forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
s2url forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> String
url2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
URL forall a b. (a -> b) -> a -> b
$ String
a)

url2u :: URL -> String
url2u :: URL -> String
url2u = URL -> String
unURL
u2url :: String -> Maybe URL
u2url :: String -> Maybe URL
u2url String
a = if String -> Bool
testUrlEncodingURI String
a then forall a. a -> Maybe a
Just (String -> URL
URL String
a) else forall a. Maybe a
Nothing


s2u :: String -> String
-- ^ convert string to url   (uses code from Network.HTTP, which converts space into %20)
s2u :: ShowS
s2u = URL -> String
url2u forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
s2url

u2s :: String -> Maybe String     --not inverse
-- ^ convert url to string   (uses code from Network.HTTP, which converts space into %20)
u2s :: String -> Maybe String
u2s  =   forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URL -> String
url2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe URL
u2url


-- case for encoding of form content (with + for space)
-- to remove for 9.2.1 

newtype URLform = URLform ByteString deriving (Int -> URLform -> ShowS
[URLform] -> ShowS
URLform -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [URLform] -> ShowS
$cshowList :: [URLform] -> ShowS
show :: URLform -> String
$cshow :: URLform -> String
showsPrec :: Int -> URLform -> ShowS
$cshowsPrec :: Int -> URLform -> ShowS
Show, URLform -> URLform -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: URLform -> URLform -> Bool
$c/= :: URLform -> URLform -> Bool
== :: URLform -> URLform -> Bool
$c== :: URLform -> URLform -> Bool
Eq)
unURLform :: URLform -> ByteString
unURLform :: URLform -> ByteString
unURLform (URLform ByteString
t) = ByteString
t


b2urlf :: ByteString -> URLform
-- ^ convert string to url   (uses code from SNAP, which converts space into +)
b2urlf :: ByteString -> URLform
b2urlf =   ByteString -> URLform
URLform forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
SN.urlEncode

urlf2b :: URLform -> ByteString
-- ^ convert url to string   (uses code from SNAP, which converts space into +)
urlf2b :: URLform -> ByteString
urlf2b = forall a. Partial => String -> Maybe a -> a
fromJustNote String
"urlf2b nothing" forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe ByteString
SN.urlDecode forall b c a. (b -> c) -> (a -> b) -> a -> c
. URLform -> ByteString
unURLform


testUrlEncodingSNAP :: ByteString -> Bool
testUrlEncodingSNAP :: ByteString -> Bool
testUrlEncodingSNAP ByteString
a =  forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ((ByteString
a forall a. Eq a => a -> a -> Bool
==)forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
SN.urlEncode) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe ByteString
SN.urlDecode forall a b. (a -> b) -> a -> b
$ ByteString
a
 
urlf2u :: URLform -> ByteString
urlf2u :: URLform -> ByteString
urlf2u = URLform -> ByteString
unURLform
u2urlf :: ByteString -> Maybe URLform
u2urlf :: ByteString -> Maybe URLform
u2urlf ByteString
a = if ByteString -> Bool
testUrlEncodingSNAP ByteString
a then forall a. a -> Maybe a
Just (ByteString -> URLform
URLform ByteString
a) else forall a. Maybe a
Nothing
-- this test allows control in url encoded strings ...


b2uf :: ByteString -> ByteString
-- ^ convert ByteString to url   (uses code from SNAP which converts space into +)
b2uf :: ByteString -> ByteString
b2uf = URLform -> ByteString
urlf2u forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> URLform
b2urlf

uf2b :: ByteString -> Maybe ByteString     --not inverse
-- ^ convert url to ByteString   (uses code from SNAP, which converts space into +)
uf2b :: ByteString -> Maybe ByteString
uf2b  =   forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URLform -> ByteString
urlf2b forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe URLform
u2urlf


t2u :: Text -> Text
t2u :: Text -> Text
t2u = String -> Text
s2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
s2u forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s
u2t :: Text -> Maybe Text
u2t :: Text -> Maybe Text
u2t = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Text
s2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
u2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s

b2u :: ByteString -> Maybe ByteString
b2u :: ByteString -> Maybe ByteString
b2u = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> ByteString
s2b forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
s2u) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s
u2b :: ByteString -> Maybe ByteString
u2b :: ByteString -> Maybe ByteString
u2b = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> ByteString
s2b forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a. Monad m => m (m a) -> m a
join  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Maybe String
u2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s


-- | bytestring with latin1 encoded characters
newtype BSlat = BSlat ByteString deriving (Int -> BSlat -> ShowS
[BSlat] -> ShowS
BSlat -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BSlat] -> ShowS
$cshowList :: [BSlat] -> ShowS
show :: BSlat -> String
$cshow :: BSlat -> String
showsPrec :: Int -> BSlat -> ShowS
$cshowsPrec :: Int -> BSlat -> ShowS
Show, BSlat -> BSlat -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BSlat -> BSlat -> Bool
$c/= :: BSlat -> BSlat -> Bool
== :: BSlat -> BSlat -> Bool
$c== :: BSlat -> BSlat -> Bool
Eq)
unBSlat :: BSlat -> ByteString
unBSlat :: BSlat -> ByteString
unBSlat (BSlat ByteString
a) = ByteString
a


lat2s :: BSlat -> String
-- ^ bytestring with latin encoding to string
lat2s :: BSlat -> String
lat2s = ByteString -> String
latin2s forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSlat -> ByteString
unBSlat

s2lat :: String -> Maybe BSlat   -- is this always possible ?
-- ^ string encoded as ByteString with latin encoding, if possible
s2lat :: String -> Maybe BSlat
s2lat =  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BSlat
BSlat forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe ByteString
s22latin

s3lat :: String ->  BSlat   -- is this always possible ?
-- ^ string converted to represenatable as latin and then encoded
-- lossy!
s3lat :: String -> BSlat
s3lat   =  ByteString -> BSlat
BSlat forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
s3latin

lat2t :: BSlat -> Text
-- ^ Text encoded as ByteString with latin encoding, if possible
lat2t :: BSlat -> Text
lat2t = ByteString -> Text
latin2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSlat -> ByteString
unBSlat

t2lat :: Text -> Maybe BSlat   -- is this always possible
-- ^ Text encoded as ByteString with latin encoding, if possible
t2lat :: Text -> Maybe BSlat
t2lat = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BSlat
BSlat forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe ByteString
t22latin

t3lat :: Text -> BSlat   -- is this always possible
-- ^ Text converted to represenatable as latin and then encoded
-- lossy!
t3lat :: Text -> BSlat
t3lat =  ByteString -> BSlat
BSlat forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
t3latin

latin2s :: ByteString -> String
    --    | works always, but produces unexpected results if bytestring is not latin encoded
latin2s :: ByteString -> String
latin2s = ByteString -> String
Data.ByteString.Char8.unpack
--
s2latin :: String ->  ByteString
        --  | works always, but produces unexpected results if bytestring is not latin encoded
s2latin :: String -> ByteString
s2latin =  String -> ByteString
Data.ByteString.Char8.pack

s22latin :: String -> Maybe ByteString
s22latin :: String -> Maybe ByteString
s22latin String
s = if forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all  ((forall a. Ord a => a -> a -> Bool
<Int
256) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) String
s  then  forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
.  String -> ByteString
s2latin  forall a b. (a -> b) -> a -> b
$ String
s else forall a. Maybe a
Nothing   -- Data.ByteString.Char8.pack . T.unpack

s3latin :: String ->  ByteString
s3latin :: String -> ByteString
s3latin =   String -> ByteString
s2latin  forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
convertLatin

filterLatin :: String -> String
filterLatin :: ShowS
filterLatin = forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Ord a => a -> a -> Bool
<Int
256)forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord )

convertLatin :: String -> String
-- ^ convert a string to contain only characters in latin1
convertLatin :: ShowS
convertLatin = forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
conv2latinChar

conv2latinChar :: Char -> Char
-- ^ convert character not in the latin1 encoding (intelligently treating quotes and double quotes)
-- possibly other cases later added
conv2latinChar :: Char -> Char
conv2latinChar Char
c = if Char -> Int
ord Char
c forall a. Ord a => a -> a -> Bool
< Int
256 then Char
c else
    case Char
c of
        Char
'\x201C' -> Char
'"'
        Char
'\x201D' -> Char
'"'
        Char
'\x201E' -> Char
'"'
        Char
'\8212' -> Char
'-'    -- em dash
        Char
'\8222' -> Char
'"'    -- lower quote
        Char
'\8216' -> Char
'\''    --  left single quote
        Char
'\8217' -> Char
'\''    -- right single quote
        Char
'\8218' -> Char
'\''    --  quote
        Char
'\8221' -> Char
'"'    -- unclear why 8221 but is quote
--        '\x2018' -> '\''   -- same as 8216
--        '\x2019' -> '\''  -- same as 8217

        Char
_ -> Char
c -- '\SUB'    -- could be another char ? \SUB

findNonLatinChars :: String -> String
-- ^ the result is a string of all the characters not in the latin1 encoding
-- possibly apply conv2latinChar first
findNonLatinChars :: ShowS
findNonLatinChars = forall a. Eq a => [a] -> [a]
nub forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Ord a => a -> a -> Bool
>Int
256)forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord )
--            (\c -> conv2latinChar c == '\SUB')

findNonLatinCharsT :: Text -> Text
-- ^ the result is a string of all the characters not in the latin1 encoding
findNonLatinCharsT :: Text -> Text
findNonLatinCharsT = String -> Text
s2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
findNonLatinChars forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s



latin2t :: ByteString -> Text
latin2t :: ByteString -> Text
latin2t = String -> Text
s2t forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
latin2s  -- T.pack .  Data.ByteString.Char8.unpack

t2latin :: Text ->  ByteString
-- text to bytestring -  works always, but produces unexpected results if bytestring is not latin encoded
t2latin :: Text -> ByteString
t2latin   = String -> ByteString
s2latin forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s  -- Data.ByteString.Char8.pack . T.unpack

t22latin :: Text -> Maybe ByteString
-- ^ converts text to bytestring, if meaningful
t22latin :: Text -> Maybe ByteString
t22latin   = String -> Maybe ByteString
s22latin forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s    -- Data.ByteString.Char8.pack . T.unpack
--t22latin t = if all  ((<256) . ord) (t2s t) then  Just .  s2latin . t2s $ t else Nothing   -- Data.ByteString.Char8.pack . T.unpack

t3latin :: Text ->  ByteString
-- text to bytestring - meaningful, but converted -- lossy!
t3latin :: Text -> ByteString
t3latin   = String -> ByteString
s3latin forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s  -- Data.ByteString.Char8.pack . T.unpack
--

putIOwords :: [Text] -> IO ()
putIOwords :: [Text] -> IO ()
putIOwords = String -> IO ()
putStrLn forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlines forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Text -> String
t2s

-- chars :: [GHC.Word.Word8]
chars :: [Word8]
chars = [Word8
198, Word8
216, Word8
197, Word8
206, Word8
219,Word8
140,Word8
252,Word8
202, Word8
419, Word8
420, Word8
1937 ]
difficultBString :: ByteString
difficultBString = [Word8] -> ByteString
ByteString.pack [Word8]
chars
difficultTString :: String
difficultTString = String
"\198\216\197\206\219\140\252\202\419\420\1937"