{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE UnboxedSums #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE MagicHash #-}

import Data.Set (Set)
import Data.Word (Word8)
import Hedgehog (Property,Gen,property,forAll,(===),failure)
import Hedgehog.Gen (list,enumBounded,int,frequency,choice,element,integral,word8)
import Hedgehog.Range (Range,linear)
import Test.Tasty (defaultMain,testGroup,TestTree)
import Data.Bits ((.&.))
import Data.Char (chr)
import Test.Tasty.HUnit (testCase)
import Test.Tasty.Hedgehog (testProperty)
import Data.Bifunctor (bimap)
import Data.Monoid
import GHC.Exts (Int#)
import GHC.Types
import Packed.Bytes (Bytes)
import Packed.Bytes.Small (ByteArray)
import GHC.Int (Int(I#))
import Data.Bits ((.&.),(.|.),unsafeShiftR)

import qualified Data.Char
import qualified Test.Tasty.Hedgehog as H
import qualified Packed.Text as T
import qualified Packed.Bytes.Small as BA
import qualified Packed.Bytes.Window as BAW
import qualified Packed.Bytes as B
import qualified Data.Set as S
import qualified GHC.OldList as L

main :: IO ()
main = defaultMain tests

tests :: TestTree
tests = testGroup "Tests"
  [ testGroup "ByteArray"
    [ testProperty "findByte" findByteProp
    , testProperty "zipAnd" zipAndProp
    ]
  , testGroup "Bytes"
    [ testProperty "findByte" sliceFindByteProp
    ]
  , testGroup "Text"
    [ testProperty "pack" textPackProp
    , testProperty "take" textTakeProp
    , testProperty "drop" textDropProp
    , testProperty "breakOnChar" textBreakCharProp
    , testProperty "toUpper" textToUpperProp
    , testProperty "decodeAscii" textDecodeAscii
    , testGroup "decodeUtf8"
      [ testProperty "isomorphism" textDecodeUtf8Iso
      , testProperty "surrogates" textDecodeUtf8Surrogates
        -- TODO: test against malformed inputs to decodeUtf8
      ]
    ]
  ]

isAscii :: Word8 -> Bool
isAscii w = w < 128

decodeAsciiList :: [Word8] -> Maybe String
decodeAsciiList = mapM
  (\w -> if isAscii w then Just (chr (fromIntegral w)) else Nothing)

textDecodeAscii :: Property
textDecodeAscii = property $ do
  bytes <- forAll genMostlyAsciiBytes
  front <- forAll (genOffset (L.length bytes))
  back <- forAll (genOffset (L.length bytes))
  let truncatedBytes = listDropEnd back (L.drop front bytes)
      expected = decodeAsciiList truncatedBytes
      actual = fmap T.unpack (T.decodeAscii (B.dropEnd back (B.drop front (B.pack bytes))))
  expected === actual

textDecodeUtf8Iso :: Property
textDecodeUtf8Iso = property $ do
  chars <- forAll genString
  front <- forAll (genOffset (L.length chars))
  back <- forAll (genOffset (L.length chars))
  let text = T.dropEnd back (T.drop front (T.pack chars))
      decoded = T.decodeUtf8 (T.encodeUtf8 text)
  Just (T.unpack text) === fmap T.unpack decoded

textDecodeUtf8Surrogates :: Property
textDecodeUtf8Surrogates = property $ do
  chars <- forAll genStringSurrogates
  front <- forAll (genOffset (L.length chars))
  back <- forAll (genOffset (L.length chars))
  let truncatedChars = listDropEnd back (L.drop front chars)
      cleanChars = map
        (\c -> if Data.Char.ord c >= 0xD800 && Data.Char.ord c < 0xE000 then chr 0xFFFD else c)
        truncatedChars
      bytes = B.pack (foldMap charToBytesWithSurrogates truncatedChars)
      text = T.dropEnd back (T.drop front (T.pack chars))
      decoded = T.decodeUtf8 (T.encodeUtf8 text)
  Just cleanChars === fmap T.unpack decoded
  Just (T.unpack text) === fmap T.unpack decoded
  Just cleanChars === fmap T.unpack (T.decodeUtf8 bytes)

textPackProp :: Property
textPackProp = property $ do
  chars <- forAll genString
  front <- forAll (genOffset (L.length chars))
  back <- forAll (genOffset (L.length chars))
  let expected = listDropEnd back (L.drop front chars)
      actual = T.unpack (T.dropEnd back (T.drop front (T.pack chars)))
  expected === actual

textTakeProp :: Property
textTakeProp = property $ do
  chars <- forAll genString
  n <- forAll (genChop (L.length chars))
  let expected = L.take n chars
      actual = T.unpack (T.take n (T.pack chars))
  expected === actual

textDropProp :: Property
textDropProp = property $ do
  chars <- forAll genString
  n <- forAll (genChop (L.length chars))
  let expected = L.drop n chars
      actual = T.unpack (T.drop n (T.pack chars))
  expected === actual


textBreakCharProp :: Property
textBreakCharProp = property $ do
  chars <- forAll genString
  front <- forAll (genOffset (L.length chars))
  back <- forAll (genOffset (L.length chars))
  let truncatedChars = listDropEnd back (L.drop front chars)
  c <- forAll (pickChar truncatedChars)
  let expected = L.break (== c) truncatedChars
      actual = bimap T.unpack T.unpack (T.breakChar c (T.dropEnd back (T.drop front (T.pack chars))))
  expected === actual

textToUpperProp :: Property
textToUpperProp = property $ do
  chars <- forAll genString
  front <- forAll (genOffset (L.length chars))
  back <- forAll (genOffset (L.length chars))
  let expected = L.map Data.Char.toUpper (listDropEnd back (L.drop front chars))
      actual = T.unpack (T.toUpper (T.dropEnd back (T.drop front (T.pack chars))))
  expected === actual

listDropEnd :: Int -> [a] -> [a]
listDropEnd n xs = L.take (L.length xs - n) xs

pickChar :: String -> Gen Char
pickChar s = if L.null s
  then genCharUnicode
  else element s

pickByte :: [Word8] -> Gen Word8
pickByte s = if L.null s
  then genByte
  else element s

genChop :: Int -> Gen Int
genChop originalLen = integral (linear 0 maxDiscard)
  where
  maxDiscard = div (originalLen * 6) 5

genOffset :: Int -> Gen Int
genOffset originalLen = integral (linear 0 maxDiscard)
  where
  maxDiscard = min 19 (div originalLen 3)

-- Generates a string that is either entirely ascii
-- or that is a healthy mixture of characters with
-- variable UTF-8 byte lengths.
genString :: Gen String
genString = frequency [ (3, genStringAscii), (7, genStringUnicode) ]

genSurrogate :: Gen Char
genSurrogate = fmap chr (int (linear 0xD800 0xDFFF))

genSurrogates :: Gen String
genSurrogates = list (linear 0 3) genSurrogate

-- Generates a string that may contain unicode characters
-- in the range U+D800 to U+DFFF.
genStringSurrogates :: Gen String
genStringSurrogates = choice
  [ apcat [genSurrogates, genStringAscii, genSurrogates]
  , apcat [genSurrogates, genStringUnicode, genSurrogates]
  ]

-- Only uses ascii characters
genStringAscii :: Gen String
genStringAscii = list (linear 0 128) (fmap chr (int (linear 0x00 0x7F)))

-- Pulls from unicode characters that have all different
-- UTF-8 byte-lengths. 
genStringUnicode :: Gen String
genStringUnicode = list (linear 0 128) genCharUnicode

genCharUnicode :: Gen Char
genCharUnicode = choice
  [ fmap chr (int (linear 0x00 0x7F))
  , fmap chr (int (linear 0x80 0x7FF))
  , fmap (chr . (\x -> if x >= 0xD800 && x <= 0xDFFF then 0xD799 else x)) (int (linear 0x800 0xFFFF))
  , fmap chr (int (linear 0x10000 0x10FFFF))
  ]

genByte :: Gen Word8
genByte = word8 (linear minBound maxBound)

genMostlyAsciiBytes :: Gen [Word8]
genMostlyAsciiBytes = choice
  [ apcat
    [ list (linear 0 20) (word8 (linear 0x00 0x7F))
    , fmap pure (word8 (linear 0xF0 0xFF))
    , list (linear 0 20) (word8 (linear 0x00 0x7F))
    ]
  , list (linear 0 45) (word8 (linear 0x00 0x7F))
  ]


findByteProp :: Property
findByteProp = property $ do
  wordList :: [Word8] <- forAll (list (linear 0 128) enumBounded)
  let len = L.length wordList
  mindex <- forAll $ frequency
    [ (4, fmap Just (int (linear 0 (len - 1))))
    , (1, pure Nothing)
    ]
  w <- case mindex of
    Just ix -> case safeIndex ix wordList of
      Just b -> pure b
      Nothing -> if len == 0 && ix == 0 then pure 0 else failure
    Nothing -> case findUnusedByte (S.fromList wordList) of
      Just b -> pure b
      Nothing -> failure
  L.elemIndex w wordList === BA.findByte w (BA.pack wordList)

sliceFindByteProp :: Property
sliceFindByteProp = property $ do
  byteList <- forAll (list (linear 0 128) genByte)
  front <- forAll (genOffset (L.length byteList))
  back <- forAll (genOffset (L.length byteList))
  let truncatedByteList = listDropEnd back (L.drop front byteList)
  w <- forAll (pickByte truncatedByteList)
  let expected = L.elemIndex w truncatedByteList
      actual = B.findByte w (B.dropEnd back (B.drop front (B.pack byteList)))
  expected === actual


zipAndProp :: Property
zipAndProp = property $ do
  xsList :: [Word8] <- forAll (list (linear 0 128) enumBounded)
  ysList :: [Word8] <- forAll (list (linear 0 128) enumBounded)
  let xs = BA.pack xsList
      ys = BA.pack ysList
  L.zipWith (.&.) xsList ysList === BA.unpack (BA.zipAnd xs ys)

safeIndex :: Int -> [a] -> Maybe a
safeIndex !_ [] = Nothing
safeIndex !ix (x : xs) = case compare ix 0 of
  EQ -> Just x
  LT -> Nothing
  GT -> safeIndex (ix - 1) xs

findUnusedByte :: Set Word8 -> Maybe Word8
findUnusedByte s = S.lookupMin (S.difference allBytes s)

allBytes :: Set Word8
allBytes = S.fromList (enumFromTo minBound maxBound)

apcat :: (Applicative f, Monoid a) => [f a] -> f a
apcat = fmap mconcat . sequenceA

-- This is nearly the same this as Packed.Text.pack. However, it does
-- not replace surrogates with U+FFFD. This is useful for testing
-- that we handle surrogates correctly when decoding UTF-8 text.
charToBytesWithSurrogates :: Char -> [Word8]
charToBytesWithSurrogates c
  | p < 0x80 = [wordToWord8 p]
  | p < 0x800 = [wordToWord8 (byteTwoOne p), wordToWord8 (byteTwoTwo p)]
  | p < 0x10000 = [wordToWord8 (byteThreeOne p), wordToWord8 (byteThreeTwo p), wordToWord8 (byteThreeThree p)]
  | otherwise = [wordToWord8 (byteFourOne p), wordToWord8 (byteFourTwo p), wordToWord8 (byteFourThree p), wordToWord8 (byteFourFour p)]
  where
  p :: Word
  p = fromIntegral (Data.Char.ord c)
  
wordToWord8 :: Word -> Word8
wordToWord8 = fromIntegral

-- precondition: codepoint is less than 0x800
byteTwoOne :: Word -> Word
byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000

byteTwoTwo :: Word -> Word
byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000

-- precondition: codepoint is less than 0x1000
byteThreeOne :: Word -> Word
byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000

byteThreeTwo :: Word -> Word
byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000

byteThreeThree :: Word -> Word
byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000

-- precondition: codepoint is less than 0x110000
byteFourOne :: Word -> Word
byteFourOne w = unsafeShiftR w 18 .|. 0b11110000

byteFourTwo :: Word -> Word
byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000

byteFourThree :: Word -> Word
byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000

byteFourFour :: Word -> Word
byteFourFour w = (0b00111111 .&. w) .|. 0b10000000