-----------------------------------------------------------------------------
-- |
-- Module      :  Network.HTTP.Utils
-- Copyright   :  See LICENSE file
-- License     :  BSD
--
-- Maintainer  :  Ganesh Sittampalam <ganesh@earth.li>
-- Stability   :  experimental
-- Portability :  non-portable (not tested)
--
-- Set of utility functions and definitions used by package modules.
--
module Network.HTTP.Utils
       ( trim     -- :: String -> String
       , trimL    -- :: String -> String
       , trimR    -- :: String -> String
       
       , crlf     -- :: String
       , lf       -- :: String
       , sp       -- :: String

       , split    -- :: Eq a => a -> [a] -> Maybe ([a],[a])
       , splitBy  -- :: Eq a => a -> [a] -> [[a]]
       
       , readsOne -- :: Read a => (a -> b) -> b -> String -> b

       , dropWhileTail -- :: (a -> Bool) -> [a] -> [a]
       , chopAtDelim   -- :: Eq a => a -> [a] -> ([a],[a])
       
       , toUTF8BS
       , fromUTF8BS
       ) where
       
import Data.Bits
import Data.Char
import Data.List ( elemIndex )
import Data.Maybe ( fromMaybe )
import Data.Word ( Word8 )

import qualified Data.ByteString as BS

-- | @crlf@ is our beloved two-char line terminator.
crlf :: String
crlf :: String
crlf = String
"\r\n"

-- | @lf@ is a tolerated line terminator, per RFC 2616 section 19.3.
lf :: String
lf :: String
lf = String
"\n"

-- | @sp@ lets you save typing one character.
sp :: String
sp :: String
sp   = String
" "

-- | @split delim ls@ splits a list into two parts, the @delim@ occurring
-- at the head of the second list.  If @delim@ isn't in @ls@, @Nothing@ is
-- returned.
split :: Eq a => a -> [a] -> Maybe ([a],[a])
split :: a -> [a] -> Maybe ([a], [a])
split a
delim [a]
list = case a
delim a -> [a] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
`elemIndex` [a]
list of
    Maybe Int
Nothing -> Maybe ([a], [a])
forall a. Maybe a
Nothing
    Just Int
x  -> ([a], [a]) -> Maybe ([a], [a])
forall a. a -> Maybe a
Just (([a], [a]) -> Maybe ([a], [a])) -> ([a], [a]) -> Maybe ([a], [a])
forall a b. (a -> b) -> a -> b
$ Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
x [a]
list

-- | @trim str@ removes leading and trailing whitespace from @str@.
trim :: String -> String
trim :: String -> String
trim String
xs = String -> String
trimR (String -> String
trimL String
xs)
   
-- | @trimL str@ removes leading whitespace (as defined by 'Data.Char.isSpace')
-- from @str@.
trimL :: String -> String
trimL :: String -> String
trimL String
xs = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
xs

-- | @trimL str@ removes trailing whitespace (as defined by 'Data.Char.isSpace')
-- from @str@.
trimR :: String -> String
trimR :: String -> String
trimR String
str = String -> Maybe String -> String
forall a. a -> Maybe a -> a
fromMaybe String
"" (Maybe String -> String) -> Maybe String -> String
forall a b. (a -> b) -> a -> b
$ (Char -> Maybe String -> Maybe String)
-> Maybe String -> String -> Maybe String
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Char -> Maybe String -> Maybe String
trimIt Maybe String
forall a. Maybe a
Nothing String
str
 where
  trimIt :: Char -> Maybe String -> Maybe String
trimIt Char
x (Just String
xs) = String -> Maybe String
forall a. a -> Maybe a
Just (Char
xChar -> String -> String
forall a. a -> [a] -> [a]
:String
xs)
  trimIt Char
x Maybe String
Nothing   
   | Char -> Bool
isSpace Char
x = Maybe String
forall a. Maybe a
Nothing
   | Bool
otherwise = String -> Maybe String
forall a. a -> Maybe a
Just [Char
x]

-- | @splitMany delim ls@ removes the delimiter @delim@ from @ls@.
splitBy :: Eq a => a -> [a] -> [[a]]
splitBy :: a -> [a] -> [[a]]
splitBy a
_ [] = []
splitBy a
c [a]
xs = 
    case (a -> Bool) -> [a] -> ([a], [a])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
c) [a]
xs of
      ([a]
_,[]) -> [[a]
xs]
      ([a]
as,a
_:[a]
bs) -> [a]
as [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: a -> [a] -> [[a]]
forall a. Eq a => a -> [a] -> [[a]]
splitBy a
c [a]
bs

-- | @readsOne f def str@ tries to 'read' @str@, taking
-- the first result and passing it to @f@. If the 'read'
-- doesn't succeed, return @def@.
readsOne :: Read a => (a -> b) -> b -> String -> b
readsOne :: (a -> b) -> b -> String -> b
readsOne a -> b
f b
n String
str = 
 case ReadS a
forall a. Read a => ReadS a
reads String
str of
   ((a
v,String
_):[(a, String)]
_) -> a -> b
f a
v
   [(a, String)]
_ -> b
n


-- | @dropWhileTail p ls@ chops off trailing elements from @ls@
-- until @p@ returns @False@.
dropWhileTail :: (a -> Bool) -> [a] -> [a]
dropWhileTail :: (a -> Bool) -> [a] -> [a]
dropWhileTail a -> Bool
f [a]
ls =
 case (a -> Maybe [a] -> Maybe [a]) -> Maybe [a] -> [a] -> Maybe [a]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr a -> Maybe [a] -> Maybe [a]
chop Maybe [a]
forall a. Maybe a
Nothing [a]
ls of { Just [a]
xs -> [a]
xs; Maybe [a]
Nothing -> [] }
  where
    chop :: a -> Maybe [a] -> Maybe [a]
chop a
x (Just [a]
xs) = [a] -> Maybe [a]
forall a. a -> Maybe a
Just (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
xs)
    chop a
x Maybe [a]
_
     | a -> Bool
f a
x       = Maybe [a]
forall a. Maybe a
Nothing
     | Bool
otherwise = [a] -> Maybe [a]
forall a. a -> Maybe a
Just [a
x]

-- | @chopAtDelim elt ls@ breaks up @ls@ into two at first occurrence
-- of @elt@; @elt@ is elided too. If @elt@ does not occur, the second
-- list is empty and the first is equal to @ls@.
chopAtDelim :: Eq a => a -> [a] -> ([a],[a])
chopAtDelim :: a -> [a] -> ([a], [a])
chopAtDelim a
elt [a]
xs =
  case (a -> Bool) -> [a] -> ([a], [a])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
elt) [a]
xs of
    ([a]
_,[])    -> ([a]
xs,[])
    ([a]
as,a
_:[a]
bs) -> ([a]
as,[a]
bs)

toUTF8BS :: String -> BS.ByteString
toUTF8BS :: String -> ByteString
toUTF8BS = [Word8] -> ByteString
BS.pack ([Word8] -> ByteString)
-> (String -> [Word8]) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Word8]
encodeStringUtf8

fromUTF8BS :: BS.ByteString -> String
fromUTF8BS :: ByteString -> String
fromUTF8BS = [Word8] -> String
decodeStringUtf8 ([Word8] -> String)
-> (ByteString -> [Word8]) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack

-- | Encode 'String' to a list of UTF8-encoded octets
--
-- Code-points in the @U+D800@-@U+DFFF@ range will be encoded
-- as the replacement character (i.e. @U+FFFD@).
--
-- The code is extracted from Cabal library, written originally
-- Herbert Valerio Riedel under BSD-3-Clause license
encodeStringUtf8 :: String -> [Word8]
encodeStringUtf8 :: String -> [Word8]
encodeStringUtf8 []        = []
encodeStringUtf8 (Char
c:String
cs)
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x07F' = Word8
w8
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x7FF' = (Word8
0xC0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR  Int
6          )
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Word8
w8          Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF'= (Word8
0xE0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR Int
12          )
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR  Int
6 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Word8
w8          Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xDFFF'= Word8
0xEF Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: Word8
0xBF Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: Word8
0xBD -- U+FFFD
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFF'= (Word8
0xE0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR Int
12          )
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR  Int
6 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Word8
w8          Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
  | Bool
otherwise    = (Word8
0xf0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR Int
18          )
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR Int
12 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR  Int
6 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: (Word8
0x80 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. (Word8
w8          Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                 Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
  where
    w8 :: Word8
w8 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word8
    w8ShiftR :: Int -> Word8
    w8ShiftR :: Int -> Word8
w8ShiftR = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Int -> Int) -> Int -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftR (Char -> Int
ord Char
c)

-- | Decode 'String' from UTF8-encoded octets.
--
-- Invalid data in the UTF8 stream (this includes code-points @U+D800@
-- through @U+DFFF@) will be decoded as the replacement character (@U+FFFD@).
--
-- See also 'encodeStringUtf8'
decodeStringUtf8 :: [Word8] -> String
decodeStringUtf8 :: [Word8] -> String
decodeStringUtf8 = [Word8] -> String
go
  where
    go :: [Word8] -> String
    go :: [Word8] -> String
go []       = []
    go (Word8
c : [Word8]
cs)
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F = Int -> Char
chr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c) Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xBF = Char
replacementChar Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xDF = Word8 -> [Word8] -> String
twoBytes Word8
c [Word8]
cs
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xEF = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
3 Int
0x800     [Word8]
cs (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
c Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xF)
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xF7 = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
4 Int
0x10000   [Word8]
cs (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
c Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7)
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xFB = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
5 Int
0x200000  [Word8]
cs (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
c Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3)
      | Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xFD = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
6 Int
0x4000000 [Word8]
cs (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
c Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x1)
      | Bool
otherwise   = Char
replacementChar Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs

    twoBytes :: Word8 -> [Word8] -> String
    twoBytes :: Word8 -> [Word8] -> String
twoBytes Word8
c0 (Word8
c1:[Word8]
cs')
      | Word8
c1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xC0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80
      = let d :: Int
d = (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
c0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x1F) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
             Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
c1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
         in if Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x80
               then  Int -> Char
chr Int
d                Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
               else  Char
replacementChar      Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
    twoBytes Word8
_ [Word8]
cs' = Char
replacementChar      Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'

    moreBytes :: Int -> Int -> [Word8] -> Int -> [Char]
    moreBytes :: Int -> Int -> [Word8] -> Int -> String
moreBytes Int
1 Int
overlong [Word8]
cs' Int
acc
      | Int
overlong Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
acc Bool -> Bool -> Bool
&& Int
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF Bool -> Bool -> Bool
&& (Int
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xD800 Bool -> Bool -> Bool
|| Int
0xDFFF Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
acc)
      = Int -> Char
chr Int
acc Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'

      | Bool
otherwise
      = Char
replacementChar Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'

    moreBytes Int
byteCount Int
overlong (Word8
cn:[Word8]
cs') Int
acc
      | Word8
cn Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xC0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80
      = Int -> Int -> [Word8] -> Int -> String
moreBytes (Int
byteCountInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int
overlong [Word8]
cs'
          ((Int
acc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
cn Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)

    moreBytes Int
_ Int
_ [Word8]
cs' Int
_
      = Char
replacementChar Char -> String -> String
forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'

    replacementChar :: Char
replacementChar = Char
'\xfffd'