{-| Description: Copyright: (c) 2020 Samuel May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: experimental Portability: portable -} module Test.Willow.Property.Encoding.Utf8 ( tests , genUtf8 ) where import qualified Data.ByteString as BS import qualified Hedgehog as H import qualified Hedgehog.Gen as H.G import qualified Hedgehog.Range as H.R import Hedgehog ( (===) ) import Web.Willow.Common.Encoding import Test.Willow.Property.Common tests :: H.Group tests = packGroup "Web.Willow.Common.Encoding.Utf8" [ utf8encodeRoundtrip , utf8decodeRoundtrip -- Single byte tests handled in "unit" due to limited number , decodeEndOfStream , decodeTwoByteInvalid , decodeThreeByteInvalid , decodeFourByteInvalid , decodeOneByteLookup , decodeTwoByteLookup , decodeThreeByteLookup , decodeFourByteLookup ] genUtf8 :: H.Gen BS.ByteString genUtf8 = H.G.utf8 (H.R.linear 0 64) (H.G.choice [H.G.latin1, H.G.unicode]) utf8encodeRoundtrip :: Test utf8encodeRoundtrip = packTest "encode roundtrip" $ do txt <- H.forAll genText txt === fst (decodeUtf8' . fst $ encodeUtf8 txt) utf8decodeRoundtrip :: Test utf8decodeRoundtrip = packTest "decode roundtrip" $ do bs <- H.forAll genUtf8 bs === fst (encodeUtf8 . fst $ decodeUtf8' bs) decodeEndOfStream :: Test decodeEndOfStream = packTest "incomplete character" $ do char <- H.forAll $ H.G.choice [ fmap pure . H.G.word8 $ H.R.linear 0xC2 0xDF , do b1 <- H.G.word8 $ H.R.linear 0xE0 0xEF let (l2, h2) = case b1 of 0xE0 -> (0xA0, 0xBF) 0xED -> (0x80, 0x9F) _ -> (0x80, 0xBF) b2 <- H.G.word8 $ H.R.linear l2 h2 return [b1, b2] , do b1 <- H.G.word8 $ H.R.linear 0xF0 0xF4 let (l2, h2) = case b1 of 0xF0 -> (0x90, 0xBF) 0xF4 -> (0x80, 0x8F) _ -> (0x80, 0xBF) b2 <- H.G.word8 $ H.R.linear l2 h2 b3 <- H.G.word8 $ H.R.linear 0x80 0xBF return [b1, b2, b3] ] checkEndOfStream Utf8 char decodeTwoByteInvalid :: Test decodeTwoByteInvalid = packTest "invalid second byte" $ do char <- H.forAll $ H.G.choice [ do b1 <- H.G.word8 $ H.R.linear 0xC2 0xF4 b2 <- H.G.choice $ map H.G.word8 [ H.R.linear 0x00 0x7F , H.R.linear 0xC0 0xFF ] return [b1, b2] , do b2 <- H.G.word8 $ H.R.linear 0x80 0x9F return [0xE0, b2] , do b2 <- H.G.word8 $ H.R.linear 0xA0 0xBF return [0xED, b2] , do b2 <- H.G.word8 $ H.R.linear 0x80 0x8F return [0xF0, b2] , do b2 <- H.G.word8 $ H.R.linear 0x90 0xBF return [0xF4, b2] ] checkInvalidInit 1 Utf8 char decodeThreeByteInvalid :: Test decodeThreeByteInvalid = packTest "invalid third byte" $ do b1 <- H.forAll . H.G.word8 $ H.R.linear 0xE0 0xF4 let (l2, h2) = case b1 of 0xE0 -> (0xA0, 0xBF) 0xED -> (0x80, 0x9F) 0xF0 -> (0x90, 0xBF) 0xF4 -> (0x80, 0x8F) _ -> (0x80, 0xBF) b2 <- H.forAll . H.G.word8 $ H.R.linear l2 h2 b3 <- H.forAll . H.G.choice $ map H.G.word8 [ H.R.linear 0x00 0x7F , H.R.linear 0xC0 0xFF ] checkInvalidInit 1 Utf8 [b1, b2, b3] decodeFourByteInvalid :: Test decodeFourByteInvalid = packTest "invalid fourth byte" $ do b1 <- H.forAll . H.G.word8 $ H.R.linear 0xF0 0xF4 let (l2, h2) = case b1 of 0xF0 -> (0x90, 0xBF) 0xF4 -> (0x80, 0x8F) _ -> (0x80, 0xBF) b2 <- H.forAll . H.G.word8 $ H.R.linear l2 h2 b3 <- H.forAll . H.G.word8 $ H.R.linear 0x80 0xBF b4 <- H.forAll . H.G.choice $ map H.G.word8 [ H.R.linear 0x00 0x7F , H.R.linear 0xC0 0xFF ] checkInvalidInit 1 Utf8 [b1, b2, b3, b4] decodeOneByteLookup :: Test decodeOneByteLookup = packTest "one-byte sequence" $ do b1 <- H.forAll $ H.G.choice [ H.G.word8 $ H.R.linear 0x00 0xC1 , H.G.word8 $ H.R.linear 0xF5 0xFF ] checkTrailing Utf8 [b1] decodeTwoByteLookup :: Test decodeTwoByteLookup = packTest "two-byte sequence" $ do b1 <- H.forAll . H.G.word8 $ H.R.linear 0xC2 0xDF b2 <- H.forAll . H.G.word8 $ H.R.linear 0x80 0xBF checkTrailingInit 1 Utf8 [b1, b2] decodeThreeByteLookup :: Test decodeThreeByteLookup = packTest "three-byte sequence" $ do b1 <- H.forAll . H.G.word8 $ H.R.linear 0xE0 0xEF let (l2, h2) = case b1 of 0xE0 -> (0xA0, 0xBF) 0xED -> (0x80, 0x9F) _ -> (0x80, 0xBF) b2 <- H.forAll . H.G.word8 $ H.R.linear l2 h2 b3 <- H.forAll . H.G.word8 $ H.R.linear 0x80 0xBF checkTrailingInit 1 Utf8 [b1, b2, b3] decodeFourByteLookup :: Test decodeFourByteLookup = packTest "four-byte sequence" $ do b1 <- H.forAll . H.G.word8 $ H.R.linear 0xF0 0xF4 let (l2, h2) = case b1 of 0xF0 -> (0x90, 0xBF) 0xF4 -> (0x80, 0x8F) _ -> (0x80, 0xBF) b2 <- H.forAll . H.G.word8 $ H.R.linear l2 h2 b3 <- H.forAll . H.G.word8 $ H.R.linear 0x80 0xBF b4 <- H.forAll . H.G.word8 $ H.R.linear 0x80 0xBF checkTrailingInit 1 Utf8 [b1, b2, b3, b4]