-- |
-- Module      :  Foreign.C.Test.UTF8
-- Copyright   :  (c) 2004 Oleg Kiselyov, Alistair Bayley
-- License     :  BSD-style
-- Maintainer  :  alistair@abayley.org
-- Stability   :  experimental
-- Portability :  non-portable
-- 
-- UTF8 decoder and encoder tests.


module Foreign.C.Test.UTF8 where

import Prelude hiding (catch)
import Control.Exception.Extensible
import Data.Char
import Foreign.C.String
import Foreign.C.UTF8
import Foreign.Marshal.Array
import Foreign.Ptr
import Test.MiniUnit
import qualified Test.QuickCheck as QC
import Data.Word (Word8)


runTest :: a -> [String] -> IO ()
runTest _ args = runTestTT "UTF8" mkTestList >> return ()

--mkTestList :: Test
--mkTestList = TestList (map TestCase testlist)
mkTestList = testlist

testlist :: [IO ()]
testlist = 
  [ testUTF8RoundTrip
  , testUTF8StringLenRoundTrip
  , quickCheckUTF8RoundTrip
  , testFromUTF8Failure5Bytes
  , testFromUTF8Failure6Bytes
  -- , testLargeFromUTF8
  -- , testLargeFromUTF8Len
  ]


instance QC.Arbitrary Char where
  arbitrary = QC.choose (chr 1, chr 0x10FFFF)
  coarbitrary = undefined

prop_roundtrip s = s == fromUTF8 (toUTF8 s)
quickCheckUTF8RoundTrip = QC.test prop_roundtrip

testUTF8RoundTrip = do
  utf8RoundTrip "1ByteLow"   0x000001 [0x01]
  utf8RoundTrip "1ByteHigh"  0x00007F [0x7F]
  utf8RoundTrip "2BytesLow"  0x000080 [0xC2, 0x80]
  utf8RoundTrip "2BytesHigh" 0x0007FF [0xDF, 0xBF]
  utf8RoundTrip "3BytesLow"  0x000800 [0xE0, 0xA0, 0x80]
  utf8RoundTrip "3BytesHigh" 0x00FFFF [0xEF, 0xBF, 0xBF]
  utf8RoundTrip "4BytesLow"  0x010000 [0xF0, 0x90, 0x80, 0x80]
  -- chr 0x10FFFF is the largest code-point Haskell currently allows
  utf8RoundTrip "4BytesHigh" 0x10FFFF [0xF4, 0x8F, 0xBF, 0xBF]

utf8RoundTrip :: String -> Int -> [Word8] -> IO ()
utf8RoundTrip msg unicode utf8 = do
  assertEqual ("testToUTF8-" ++ msg) utf8 (toUTF8 [chr unicode])
  assertEqual ("testFromUTF8-" ++ msg) [chr unicode] (fromUTF8 utf8)
  withUTF8String [chr unicode] $ \cstr -> do
    w8 <- peekArray0 0 (castPtr cstr)
    assertEqual ("testToUTF8Ptr-" ++ msg) utf8 w8
  withCString (map (chr . fromIntegral) utf8) $ \cstr -> do
    s <- peekUTF8String cstr
    assertEqual ("testFromUTF8Ptr-" ++ msg) [chr unicode] s

testUTF8StringLenRoundTrip = do
  utf8StringLenRoundTrip "1ByteLow"   [0x000001, 0x000001] [0x01, 0x01]
  utf8StringLenRoundTrip "2BytesLow"  [0x000080, 0x00007F] [0xC2, 0x80, 0x7F]
  utf8StringLenRoundTrip "3BytesLow"  [0x000800, 0x000001] [0xE0, 0xA0, 0x80, 0x01]
  utf8StringLenRoundTrip "4BytesLow"  [0x010000, 0x00007F] [0xF0, 0x90, 0x80, 0x80, 0x7F]
  utf8StringLenRoundTrip "4BytesHigh" [0x10FFFF, 0x00007F] [0xF4, 0x8F, 0xBF, 0xBF, 0x01]

utf8StringLenRoundTrip :: String -> [Int] -> [Word8] -> IO ()
utf8StringLenRoundTrip msg codepoints utf8 = do
  let unicode = map chr codepoints
  let expect = take (length unicode - 1) unicode
  -- convert our unicode code-points (Haskell String)
  -- into a UTF8 CStringLen.
  -- Pass this back to peekUTF8StringLen to convert back into Haskell String.
  -- Chop off the last char - make sure it's only one byte.
  withUTF8StringLen unicode $ \(cstr, clen) -> do
    assertEqual ("testUTF8StringLenRoundTrip-" ++ msg) (length utf8) clen
    s <- peekUTF8StringLen (cstr, clen - 1)
    assertEqual ("testUTF8StringLenRoundTrip-" ++ msg) expect s



--testUTF8StringLenRoundTrip = do

testFromUTF8Failure5Bytes = do
  let utf8 = [0xF8, 0x80, 0x80, 0x80, 0x80]
  let handler :: ErrorCall -> IO ()
      handler (ErrorCall msg) = assertEqual "testFromUTF8Failure5Bytes" "fromUTF8: illegal UTF-8 character 248" msg
  catch (do
    assertEqual "testFromUTF8Failure5Bytes" [chr 0x10FFFF] (fromUTF8 utf8)
    assertBool "testFromUTF8Failure5Bytes: no error raised" False
    ) handler

testFromUTF8Failure6Bytes = do
  let utf8 = [0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF]
  let handler :: ErrorCall -> IO ()
      handler (ErrorCall msg) = assertEqual "testFromUTF8Failure6Bytes" "fromUTF8: illegal UTF-8 character 253" msg
  catch (do
    assertEqual "testFromUTF8Failure6Bytes" [chr 0x10FFFF] (fromUTF8 utf8)
    assertBool "testFromUTF8Failure6Bytes: no error raised" False
    ) handler
{-
testLargeFromUTF8 = do
  let sz = 1000000
  withCString (take sz (repeat 'a')) $ \cstr -> do
    s <- peekUTF8String cstr
    assertEqual "testLargeFromUTF8" sz (length s)
    s <- peekUTF8StringB cstr
    assertEqual "testLargeFromUTF8B" sz (length s)
    s <- peekUTF8StringLen (cstr, sz)
    assertEqual "testLargeFromUTF8Len" sz (length s)
    s <- peekUTF8StringLenB (cstr, sz)
    assertEqual "testLargeFromUTF8LenB" sz (length s)
-}

{-
testLargeFromUTF8Len = do
  let sz = 1000000
  withCStringLen (take sz (repeat 'a')) $ \(cstr, clen) -> do
    putStrLn "testLargeFromUTF8Len: call peekUTF8StringLen"
    s <- peekUTF8StringLen (cstr, clen)
    putStrLn "testLargeFromUTF8Len: assert length"
    assertEqual "testLargeFromUTF8Len" sz (length s)
-}