{-# LANGUAGE FlexibleInstances #-} -- | -- Module : Test.QuickCheck.Unicode -- Copyright : (c) 2014 Bryan O'Sullivan -- License : BSD-style -- Maintainer : bos@serpentine.com -- Stability : stable -- Portability : portable -- -- QuickCheck Generator and shrink functions for testing software that -- uses Unicode data. -- -- The default 'Arbitrary' instance for the 'Char' type intentionally -- generates only ASCII values. This can lead to a false sense of -- security in cases where Unicode compliance is required, as -- encodings that span multiple bytes or code units will simply not be -- exercised at all. -- -- This module deliberately avoids using the @text@ and @bytestring@ -- packages to avoid pulling in extra dependencies. module Test.QuickCheck.Unicode ( -- * Newtype wrapper for convenience Unicode(fromUnicode) -- * Generators , char , string -- ** Helpers , list -- ** Basic generators , planes , ascii , plane0 , plane1 , plane2 , plane14 -- * Predicates , reserved -- * Shrinking functions , shrinkChar ) where import Control.Applicative ((<$>)) import Data.Bits ((.&.)) import Data.Char (chr, ord) import Test.QuickCheck hiding ((.&.)) -- | A wrapper for 'Char' and 'String', for which the 'Arbitrary' -- instance generates full-Unicode characters. newtype Unicode a = Unicode { fromUnicode :: a } deriving (Eq, Ord, Show, Read) instance Arbitrary (Unicode Char) where arbitrary = Unicode <$> char shrink = map Unicode . shrinkChar . fromUnicode instance Arbitrary (Unicode [Char]) where arbitrary = Unicode <$> string shrink = map Unicode . shrinkList shrinkChar . fromUnicode -- | Generate a Unicode code point. This has a much larger range than -- the default 'Arbitrary' instance for 'Char'. char :: Gen Char char = chr `fmap` excluding reserved (frequency planes) -- | Generate a list of Unicode code points. string :: Gen String string = list char -- | Generate a list of values. list :: Gen a -> Gen [a] list gen = sized $ \n -> do k <- choose (0,n) vectorOf k gen -- | Shrink a Unicode code point. shrinkChar :: Char -> [Char] shrinkChar = map chr . filter (not . reserved) . shrinkIntegral . ord excluding :: (a -> Bool) -> Gen a -> Gen a excluding bad gen = loop where loop = do x <- gen if bad x then loop else return x -- | Indicate whether a code point is reserved. reserved :: Int -> Bool reserved = anyOf [(<0), (>0x10FFFF), lowSurrogate, highSurrogate, nonCharacter] where anyOf fs xs = or (map ($ xs) fs) lowSurrogate c = c >= 0xDC00 && c <= 0xDFFF highSurrogate c = c >= 0xD800 && c <= 0xDBFF nonCharacter c = masked == 0xFFFE || masked == 0xFFFF where masked = c .&. 0xFFFF -- | A weighted list of generators that favours ASCII characters, -- followed by planes 0 and 1. planes :: [(Int, Gen Int)] planes = [(60, ascii), (14, plane0), (14, plane1), (6, plane2), (6, plane14)] -- ASCII. ascii :: Gen Int ascii = choose (0,0x7F) -- | Basic Multilingual Plane. plane0 :: Gen Int plane0 = choose (0xF0, 0xFFFF) -- | Supplementary Multilingual Plane. plane1 :: Gen Int plane1 = oneof [ choose (0x10000, 0x10FFF) , choose (0x11000, 0x11FFF) , choose (0x12000, 0x12FFF) , choose (0x13000, 0x13FFF) , choose (0x1D000, 0x1DFFF) , choose (0x1F000, 0x1FFFF) ] -- | Supplementary Ideographic Plane. plane2 :: Gen Int plane2 = oneof [ choose (0x20000, 0x20FFF) , choose (0x21000, 0x21FFF) , choose (0x22000, 0x22FFF) , choose (0x23000, 0x23FFF) , choose (0x24000, 0x24FFF) , choose (0x25000, 0x25FFF) , choose (0x26000, 0x26FFF) , choose (0x27000, 0x27FFF) , choose (0x28000, 0x28FFF) , choose (0x29000, 0x29FFF) , choose (0x2A000, 0x2AFFF) , choose (0x2B000, 0x2BFFF) , choose (0x2F000, 0x2FFFF) ] -- | Supplementary Special-Purpose Plane. plane14 :: Gen Int plane14 = choose (0xE0000, 0xE0FFF)