{-| Description: Copyright: (c) 2020 Samuel May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: experimental Portability: portable -} module Test.Willow.Property.Encoding ( tests ) where import qualified Data.ByteString as BS import qualified Data.Text.Encoding as T import qualified Hedgehog as H import qualified Hedgehog.Gen as H.G import qualified Hedgehog.Range as H.R import Hedgehog ( (===) ) import Web.Willow.Common.Encoding import Test.Willow.Property.Common import qualified Test.Willow.Property.Encoding.Big5 as Big5 import qualified Test.Willow.Property.Encoding.EucJp as EucJp import qualified Test.Willow.Property.Encoding.EucKr as EucKr import qualified Test.Willow.Property.Encoding.GB as GB import qualified Test.Willow.Property.Encoding.ShiftJis as ShiftJis import qualified Test.Willow.Property.Encoding.Utf8 as Utf8 import qualified Test.Willow.Property.Encoding.Utf16 as Utf16 tests :: [H.Group] tests = [ generalTests , Big5.tests , EucJp.tests , EucKr.tests , GB.tests , ShiftJis.tests , Utf8.tests , Utf16.tests ] generalTests :: H.Group generalTests = packGroup "Web.Willow.Common.Encoding" [ decodeIndexConstant , bomDecodeOverride ] decodeIndexConstant :: Test decodeIndexConstant = packTest "consistent memoized decoder lookups" $ do (enc, bs) <- H.forAll $ H.G.choice [ big5Bytes , gbBytes , eucJpBytes , eucKrBytes ] let state = initialDecoderState enc case decode state . BS.pack $ bs ++ bs of ([], state') -> finalizeDecode state' === [] ([c1, c2], _) -> c1 === c2 ([c1, c2, c3, c4], _) | elem enc [Big5, EucKr] -> [c1, c2] === [c3, c4] (cs, _) -> H.footnote ("Wrong string length: " ++ show cs) where big5Bytes = do b1 <- H.G.word8 $ H.R.linear 0x81 0xFE b2 <- H.G.choice [ H.G.word8 $ H.R.linear 0x40 0x7E , H.G.word8 $ H.R.linear 0xA1 0xFE ] return (Big5, [b1, b2]) gbBytes = do enc <- H.G.element [Gbk, Gb18030] b1 <- H.G.word8 $ H.R.linear 0x81 0xFE b2 <- H.G.choice [ H.G.word8 $ H.R.linear 0x40 0x7E , H.G.word8 $ H.R.linear 0x80 0xFE ] return (enc, [b1, b2]) eucJpBytes = do b1 <- H.G.word8 $ H.R.linear 0xA1 0xFE b2 <- H.G.word8 $ H.R.linear 0xA1 0xFE bs <- H.G.element [ [b1, b2] , [0x8F, b1, b2] ] return (EucJp, bs) eucKrBytes = do b1 <- H.G.word8 $ H.R.linear 0x81 0xFE b2 <- H.G.word8 $ H.R.linear 0x41 0xFE return (EucKr, [b1, b2]) {- nonFatalDecode :: Test nonFatalDecode = packTest "non-fatal decoding consumes the entire stream" $ do enc <- H.forAll genEncoding bs <- H.forAll genUtf8 H.assert . Y.isJust $ decode' enc bs -} bomDecodeOverride :: Test bomDecodeOverride = packTest "byte-order mark override" $ do enc <- H.forAll genEncoding enc' <- H.forAll $ H.G.element [Utf8, Utf16be, Utf16le] txt <- H.forAll genText let (bom, ref) = case enc' of Utf16be -> (BS.pack [0xFE, 0xFF], T.encodeUtf16BE) Utf16le -> (BS.pack [0xFF, 0xFE], T.encodeUtf16LE) _ -> (BS.pack [0xEF, 0xBB, 0xBF], T.encodeUtf8) txt === fst (decode' (initialDecoderState enc) $ bom <> ref txt) genEncoding :: H.Gen Encoding genEncoding = H.G.choice [ H.G.constant Big5 , H.G.enum Gb18030 Ibm866 , H.G.enum Iso8859_2 MacintoshCyrillic , H.G.enum Windows874 Windows1258 ]