-- "src/Dao/Random.hs" generates objects from integers that can be used
-- to test the parsers/pretty-printer, and the binary encoder/decoder.
--
-- Copyright (C) 2008-2014 Ramin Honary.
-- This file is part of the Dao System.
--
-- The Dao System is free software: you can redistribute it and/or
-- modify it under the terms of the GNU General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- The Dao System is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program (see the file called "LICENSE"). If not, see
-- .
-- | Hints for making good random object generators:
-- Avoid using scramble more than once per constructor. If you have a data type constructed with:
-- > return DataType <*> 'scrambO' <*> 'scrambO'
-- then just do this instead
-- > 'scramble' $ return DataType <*> 'randO' <*> 'randO'
--
-- Do not bother preceding generators for newtype data types with 'recurse' or 'countNode', it isn't
-- really necessary.
--
-- Recursive data types containing lists with 'randList' should probably generate small lists for
-- 'randO' instances, and generate larger lists for 'defaultO' instances.
--
-- When instantiating 'randO', use only 'randO' to fill-in all the objects contained in the object.
-- When instantiating 'defaultO', use only 'defaultO' to fill-in all the objects contained in the
-- object.
--
-- When instantiating 'defaultO', it is OK to use 'defaultO' to fill-in other non-recursive data
-- types, but /BE VERY CAREFUL/ not to call 'defaultO' for another non-recursive type.
module Dao.Random where
import Dao.String
import qualified Dao.Tree as T
import Control.Exception
import Control.Applicative
import Control.Monad.Trans.Class
import Control.Monad.IO.Class
import Control.Monad.State
import Data.Monoid
import Data.Char
import Data.Word
import Data.Ratio
import Data.Time
import Data.Array.IArray
import qualified Data.ByteString.Char8 as B
import System.Random
import System.IO
----------------------------------------------------------------------------------------------------
-- | A simple, stateful monad for generating arbitrary data types based on pseudo-random numbers
-- without lifting the @IO@ or @ST@ monads, i.e. it can be evaluated in a pure way.
newtype RandT m a = RandT { runRandT :: StateT RandOState m a }
deriving (Functor, Applicative, Monad, MonadPlus, Alternative)
instance MonadTrans RandT where { lift = RandT . lift }
instance MonadIO m => MonadIO (RandT m) where { liftIO = RandT . liftIO }
type RandO a = RandT IO a
data RandOState
= RandOState
{ integerState :: Integer -- ^ the predictable random generator
, stdGenState :: StdGen -- ^ the unpredictable random generator
, nodeCounter :: Int -- ^ counts how many nodes have been created for this particular run
, depthLimit :: Int -- ^ sets the limit of the recursion depth
, currentDepth :: Int -- ^ counts the current recursion depth for this particular run
, deepestSoFar :: Int -- ^ keeps track of how deep the deepest recursion has gone
, traceLevel :: Int
-- ^ when performing a trace, keeps track of how many trace recursions there have been
}
-- | Initializes the 'RandOState' with two integer values: a maximium recursion depth value (limits
-- the number of times you can recursively call 'limSubRandO') and the seed value passed to
-- 'System.Random.mkStdGen'.
initRandOState :: Int -> Int -> RandOState
initRandOState subdepthlim seed =
RandOState
{ integerState = fromIntegral seed
, stdGenState = mkStdGen seed
, nodeCounter = 0
, depthLimit = subdepthlim
, currentDepth = 0
, deepestSoFar = 0
, traceLevel = 0
}
-- | Increment the internal node counter of the random generator state. This is good for measuring
-- the "weight" of randomly generated objects. /NOTE:/ do not use this if you also start your
-- 'randO' instance with 'recurse', because 'recurse' also calls this function.
countNode_ :: RandO Int
countNode_ = do
i' <- RandT $ gets nodeCounter
let i = i' + 1
RandT $ modify (\st -> st{nodeCounter=i})
return i
-- | Algorithmically identical 'countNode_' but its function type is such that it can be written like so:
-- > 'countNode' $ do ...
-- whereas the 'countNode' function must be written like so:
-- > do { 'countNode_'; ... }
-- or
-- > countNode_ >> ...
-- /NOTE:/ do not use this if you also start your 'randO' instance with 'recurse', because 'recurse'
-- also calls this function.
countNode :: RandO a -> RandO a
countNode fn = countNode_ >> fn
newtype RandChoice o = RandChoice{ getChoiceArray :: Maybe (Array Int (RandO o)) }
instance Functor RandChoice where
fmap f (RandChoice arr) = RandChoice $ fmap (fmap (fmap f)) arr
instance Monoid (RandChoice o) where
mempty = RandChoice Nothing
mappend (RandChoice a) (RandChoice b) = RandChoice $ msum
[ do (loA, hiA) <- fmap bounds a
(loB, hiB) <- fmap bounds b
listA <- fmap elems a
listB <- fmap elems b
return $ listArray (min loA loB, max hiA hiB) (listA++listB)
, a, b
]
-- | Similar to monadic bind, allows you to create a new 'RandChoice' by using the value produced by
-- another 'RandChoice'.
bindRandChoice :: RandChoice o -> (o -> RandChoice p) -> RandChoice p
bindRandChoice (RandChoice arr) f = RandChoice $ fmap (fmap (\o -> o >>= runRandChoiceOf . f)) arr
-- | Instantiate your data types into this class if you can generate arbitrary objects from random
-- numbers using the 'RandO' monad. Minimal complete definition is one of either 'randO' or
-- 'randChoice', and one of either 'defaultO' or 'defaultChoice'.
class HasRandGen o where
-- | This is the function used to generate a random object of a data type that only has one
-- constructor. You must define either this or 'randChoice' or both. The 'randChoice' will be
-- defined as @('randChoiceList' ['randO'])@.
randO :: RandO o
randO = runRandChoice
-- | This is the function used to generate a random object of a data type that has multiple
-- constructors. Use 'randChoiceList' to build a list of 'RandO' data types, each item producing
-- an object with a different constructor. You must define either this or 'randO' or both. The
-- 'randO' will be defined using @('runChoiceList' 'randChoice')@ by default.
randChoice :: RandChoice o
randChoice = randChoiceList [randO]
-- | The 'randO' and/or 'randChoice' functions can be defined without any restrictions, they can
-- even be called recursively. But since we are working with randomness, recursion may produce
-- arbitrarily large objects which may consume all memory and crash the program. The 'recurse'
-- function can be used to count the depth of the recursive data type constructed and to limit the
-- depth, and when the limit is reached a non-recursive default value should be constructed
-- instead. This is the function that should produce the non-recursive data.
--
-- Keep in mind that it is impossible to enforce whether or not the data generated by any function
-- will be recursive or not unless the data type itself is inherently not recursive. So it is the
-- programmers responsiblity to understand how to use this function. This function must terminate,
-- it is your responsibility to see that it does, it is your responsability to make sure you never
-- call a recursive function within this function.
--
-- For data types that are inherenly not recursive, for example types instantiating
-- 'Prelude.Enum', this function may safely be defined by calling 'randO'. For recursive data
-- types, if the data type instantiates 'Data.Monoid.Monoid', consider returning
-- 'Data.Monoid.mempty'.
--
-- Either this function or 'defaultChoice' must be defined.
defaultO :: RandO o
defaultO = runDefaultChoice
-- | This function is to 'defaultO' what 'randChoice' is to 'randO': it lets you construct a
-- random object from a list of choices, but like 'defaultO' every choice provided here must NOT
-- be a recursive function.
defaultChoice :: RandChoice o
defaultChoice = randChoiceList [defaultO]
runRandChoiceOf :: RandChoice o -> RandO o
runRandChoiceOf (RandChoice{ getChoiceArray=arr }) = case arr of
Nothing -> fail "null RandChoice"
Just arr -> let (lo, hi) = bounds arr in join $ (arr!) . (lo+) <$> nextInt (hi-lo)
runRandChoice :: HasRandGen o => RandO o
runRandChoice = runRandChoiceOf randChoice
runDefaultChoice :: HasRandGen o => RandO o
runDefaultChoice = runRandChoiceOf defaultChoice
randChoiceList :: forall o . [RandO o] -> RandChoice o
randChoiceList items = RandChoice{ getChoiceArray = guard (not $ null items) >> (Just arr) } where
len = length items
arr :: Array Int (RandO o)
arr = listArray (0, len-1) items
instance HasRandGen () where { randO = return (); defaultO = randO; }
instance HasRandGen Int where { randO = randInt; defaultO = randO; }
instance HasRandGen Integer where { randO = fmap fromIntegral randInt; defaultO = randO; }
instance HasRandGen Char where { randO = fmap chr randInt; defaultO = randO }
instance HasRandGen Word64 where { randO = fromIntegral <$> (randO::RandO Int); defaultO = randO; }
instance HasRandGen Rational where
randO = return (%) <*> defaultO <*> ((+1) <$> defaultO)
defaultO = randO
instance HasRandGen Double where { randO = fromRational <$> randO; defaultO = randO; }
instance HasRandGen UTCTime where
randO = do
day <- fmap (ModifiedJulianDay . unsign . flip mod 73000) randInt
sec <- fmap (fromRational . toRational . flip mod 86400) randInt
return (UTCTime{utctDay=day, utctDayTime=sec})
defaultO = randO
instance HasRandGen NominalDiffTime where
randO = randInteger (fromRational 0) $ \i -> do
div <- randInt
fmap (fromRational . (% fromIntegral div) . longFromInts) (replicateM (mod i 2 + 1) randInt)
defaultO = randO
instance HasRandGen Name where { randO = fmap (fromUStr . randUStr) randInt; defaultO = randO; }
instance HasRandGen UStr where
randO = fmap (ustr . unwords . fmap (uchars . toUStr)) (randList 0 9 :: RandO [Name])
defaultO = randO
instance HasRandGen Bool where { randO = fmap (0/=) (nextInt 2); defaultO = randO; }
instance HasRandGen a => HasRandGen (Maybe a) where
randO = randO >>= \n -> if n then return Nothing else fmap Just randO
defaultO = return Nothing
instance (Ord p, HasRandGen p, HasRandGen o) => HasRandGen (T.Tree p o) where
defaultO = return T.Void
randO = recurse $ do
branchCount <- nextInt 4
cuts <- fmap (map (+1) . randToBase 6) randInt
fmap (T.fromList . concat) $ replicateM (branchCount+1) $ do
wx <- replicateM 6 randO
forM cuts $ \cut -> do
obj <- randO
return (take cut wx, obj)
-- | Construct a value from an 'Prelude.Int'. Actually, you have a 50/50 chance of drawing a zero,
-- but this is because zeros are used often for you data type.
randInteger :: a -> (Int -> RandO a) -> RandO a
randInteger zero mkOther = do
i <- randInt
let (x, r) = divMod i 2
if r==0 then return zero else mkOther x
-- | Generate a random object given a maximum recursion limit, a seed value, and a 'RandO' generator
-- function. The weight (meaning the number of calls to 'countNode', 'countNode_', or 'recurse') of
-- the generated item is also returned.
genRandWeightedWith :: RandO a -> Int -> Int -> IO (a, Int)
genRandWeightedWith (RandT gen) subdepthlim seed =
fmap (fmap nodeCounter) $ runStateT gen (initRandOState subdepthlim seed)
-- | This function you probably will care most about. does the work of evaluating the
-- 'Control.Monad.State.evalState' function with a 'RandOState' defined by the same two parameters
-- you would pass to 'initRandOState'. In other words, arbitrary random values for any data type @a@
-- that instantates 'HasRandGen' can be generated using two integer values passed to this function.
genRandWeighted :: HasRandGen a => Int -> Int -> IO (a, Int)
genRandWeighted subdepthlim seed = genRandWeightedWith randO subdepthlim seed
-- | Like 'genRandWeightedWith' but the weight value is ignored, only being evaluated to the random
-- object,
genRandWith :: RandO a -> Int -> Int -> IO a
genRandWith gen subdepthlim seed = fmap fst $ genRandWeightedWith gen subdepthlim seed
-- | Like 'genRandWeightedWith' but the weight value is ignored, only being evaluated to the random
-- object.
genRand :: HasRandGen a => Int -> Int -> IO a
genRand subdepthlim seed = genRandWith randO subdepthlim seed
randTrace :: MonadIO m => String -> RandT m a -> RandT m a
randTrace msg rand = do
trlev <- RandT $ gets traceLevel
nc <- RandT $ gets nodeCounter
sd <- RandT $ gets currentDepth
let prin msg = liftIO $ do
hPutStrLn stderr (replicate trlev ' ' ++ msg)
hFlush stderr >>= evaluate
() <- prin $ "begin "++msg++" c="++show nc++" d="++show sd
RandT $ modify $ \st -> st{traceLevel=trlev+1}
a <- rand >>= liftIO . evaluate
RandT $ modify $ \st -> st{traceLevel=trlev}
() <- prin $ "end "++msg
return a
-- | Take another integer from the seed value. Provide a maximum value, the pseudo-random integer
-- returned will be the seed value modulo this maximum value (so passing 0 will result in a
-- divide-by-zero exception, passing 1 will always return 0). The seed value is then updated with
-- the result of this division. For example, if the seed value is 10023, and you pass 10 to this
-- function, the result returned will be 3, and the new seed value will be 1002.
-- Using numbers generated from this seed value is very useful for generating objects that are
-- somewhat predictable, but the contents of which are otherwise unpredictable. For example, if you
-- want to generate random functions but always with the names "a", "b", or "c", like so:
-- > a(...), b(...), c(...)
-- where the arguments to these functions can be arbitrary, then have your function generator
-- generate the names of these functions using 'nextInt' like so:
-- > 'Prelude.fmap' ('Prelude.flip' 'Prelude.lookup ('Prelude.zip' "abc" [0,1,2]))'nextInt' 3
-- then the arguments to these functions can be generated using 'randInt'. The names of the
-- functions will be predictable for your seed values: any seed value divisible by 3 will generate a
-- function named "a", but the arguments will be arbitrary because they were generated by 'randInt'.
nextInt :: Int -> RandO Int
nextInt maxval = if abs maxval==1 || maxval==0 then return 0 else do
st <- RandT $ get
let (i, rem) = divMod (integerState st) (fromIntegral (abs maxval))
RandT $ put $ st{integerState=i}
return (fromIntegral rem)
-- | Generate a random integer from the pseudo-random number generator.
randInt :: RandO Int
randInt = RandT $
state (\st -> let (i, gen) = next (stdGenState st) in (i, st{stdGenState=gen}))
-- | Mark a recursion point, also increments the 'nodeCounter'. The recusion depth limit set when
-- evaluating a 'randO' computation will not be exceeded. When the number of 'recurse' functions
-- called without returning has reached this limit and this function is evaluated again, the given
-- 'RandO' generator will not be evaluated, the default value will be returned.
recurse :: HasRandGen a => RandO a -> RandO a
recurse fn = do
st <- RandT get
if currentDepth st > depthLimit st
then defaultO
else do
countNode_
i <- (+1) <$> (RandT $ gets currentDepth)
RandT $ modify (\st -> st{currentDepth = i, deepestSoFar = max (deepestSoFar st) i})
a <- fn
RandT $ modify (\st -> st{currentDepth = currentDepth st - 1})
return a
-- | The 'nextInt' function lets you derive objects from a non-random seed value internal to the
-- state of the 'RandT' monad. This is useful for random objects that have multiple constructors,
-- and you want to generate one of every constructor by simply initializing the random seed with
-- incrementing integers.
--
-- However every instantiation of 'randChoice' makes use of this seed value. Consequently if your
-- data type is composed entirely of objects which all instantiate 'randChoice', every node of the
-- object will be generated by a non-random number. In some cases this is desirable, it allows you
-- to generate every possible object with a large enough sequence of random numbers.
--
-- However when you wish to generate a very random, varied set of random objects, this
-- predictability is not desirable. To get around this, you can use the 'scramble' function. This
-- will use the current seed value to initialize a new child random generator with a child random
-- seed, and the provided 'RandT' function will be evaluated with in this child environment. After
-- evaluation is complete, the parent seed is restored. Since the child random seed is derived from the
-- parent seed, you are still guaranteed to always generate the same object from the same seed value,
-- but the child object generated will be much more varied and less predictable.
--
-- So instead of generating a child node of your object with ordinary 'randO', use 'scrambO' (which
-- is equivalent to @('scramble' 'randO')@ and this will make your objects more varied, even for
-- predictable input.
scramble :: RandO a -> RandO a
scramble fn = do
newGen <- randInt
oldst <- RandT get
RandT (put $ oldst{ stdGenState=mkStdGen newGen })
let wrap x = toInteger (fromIntegral x :: Word)
x <- wrap <$> randInt
y <- wrap <$> randInt
RandT (modify $ \st -> st{ integerState = x*(1 + (toInteger (maxBound::Word))) + y })
a <- fn
RandT (modify $ \st -> st{ integerState=integerState oldst, stdGenState=stdGenState oldst })
return a
-- | This function is defined simply as @('scramble' 'randO')@, but I expect it to be used often
-- enough it warrants it's own function name.
scrambO :: HasRandGen a => RandO a
scrambO = scramble randO
-- | The number of unique values a 'Prelude.Int' can be, which is @('Prelude.maxBound'+1)*2@.
intBase :: Integer
intBase = (fromIntegral (maxBound::Int) + 1) * 2
-- | Take an ordinary 'Prelude.Int' and make it unsigned by checking if it is a negative value, and
-- if it is, returning the maximum unsigned value plus the negative value, otherwise returning the
-- positive value unchanged. For example, -1 will return @2*('Prelude.maxBound'+1)-1@ and @+1@ will
-- return @1@.
unsign :: Int -> Integer
unsign i = if i<0 then intBase + fromIntegral i else fromIntegral i
-- | Creates a string of digits from 0 to the given @base@ value by converting a random unsigned
-- integer to the list of digits that represents the random integer in that @base@. For example, if
-- you want a list of digits from 0 to 4 to be produced from a number 54, pass 4 as the base, then
-- the number 54. Each digit of the base-4 number representation of 54 will be returned as a
-- separate integer: @[2,1,3]@ (from lowest to highest place value, where 123 in base 10 would
-- return the list @[3,2,1]@).
randToBase :: Int -> Int -> [Int]
randToBase base i = loop (unsign i) where
loop i = if i==0 then [] else let (i' , sym) = divMod i b in fromIntegral sym : loop i'
b = fromIntegral base
-- | When generating 'Prelude.Integers' from 'Int's, treat a list of 'Int's as a list of symbols in
-- a base M number, where M is the @('Prelude.maxBound'::'Prelude.Int')@ multiplied by two to allow
-- for every negative number to also be considered a unique symbol.
longFromInts :: [Int] -> Integer
longFromInts = foldl (\a b -> a*intBase + unsign b) 0
randEnum :: (Bounded x, Enum x) => x -> x -> RandO x
randEnum lo hi = fmap toEnum (nextInt (abs (fromEnum lo - fromEnum hi)))
----------------------------------------------------------------------------------------------------
randUStr :: Int -> UStr
randUStr = ustr . B.unpack . getRandomWord
randMultiName :: RandO [UStr]
randMultiName = do
i0 <- randInt
let (i1, len) = divMod i0 4
fmap ((randUStr i1 :) . map randUStr) (replicateM len randInt)
-- | When you want to use 'randList' or 'randListOf', you must provide a maximum bound for the
-- number of values generated for the list. Lets say you want a maximum bound of 20 items for your
-- data types. It sounds reasonable, but if your data type is recursive, and your recursion depth
-- limit 'depthLimit' is set to 4, your data type has a chance of creating 20^4 or 160000 nodes! You
-- may want to call 'randListOf' or 'randList' with a diminishing upper bound, a bound which gets
-- lower and lower as the recursion depth increases.
--
-- That is the purpose of this function. You provide an initial integer value (like 24) and this
-- value will be logarithmically scaled based on the 'currentDepth' value. The scaling equation is:
-- > \x -> 'Prelude.floor' (x / 2^'depthLimit')
-- So if you provide a value of 24 to this function, the value returned will be:
-- > 'Prelude.floor' (24 / 2^'depthLimit')
-- And if the 'depthLimit' is 4 then @'Prelude.floor' (2 / 2^4) == 'Prelude.floor' (2/16) == 1@. So
-- for passing a value of 24 means the maximum number of nodes will be @24*12*6*3*1 == 5184@ nodes,
-- which is large, but considerably smaller than 160000 nodes.
depthLimitedInt :: Int -> RandO Int
depthLimitedInt x = getCurrentDepth >>= \d -> return (div x (2^d))
getCurrentDepth :: Monad m => RandT m Int
getCurrentDepth = RandT $ gets currentDepth
randListOf :: Int -> Int -> RandO a -> RandO [a]
randListOf minlen maxlen rando = do
-- half of all lists will be null, unless the 'minlen' parameter is greater than 0
minlen <- return (min minlen maxlen)
maxlen <- return (max minlen maxlen)
empt <- if minlen==0 then nextInt 2 else return 0
if empt==1
then return []
else do
ln <- nextInt (maxlen-minlen)
replicateM (minlen+ln) rando
randList :: HasRandGen a => Int -> Int -> RandO [a]
randList lo hi = randListOf lo hi randO
defaultList :: HasRandGen a => Int -> Int -> RandO [a]
defaultList lo hi = randListOf lo hi defaultO
randRational :: Int -> RandO Rational
randRational i0 = do
let (i1, len1) = divMod i0 4
(_ , len2) = divMod i1 4
a <- fmap longFromInts (replicateM (len1+1) randInt)
b <- fmap longFromInts (replicateM (len2+1) randInt)
return (a%b)
getRandomWord :: Int -> B.ByteString
getRandomWord i = randomWords ! (mod i (rangeSize (bounds randomWords) - 1))
randomWords :: Array Int B.ByteString
randomWords = listArray (0, length list - 1) (map B.pack list) where
list = words $ unwords $
[ "a academia accomplished added also an analysis and application applications apply are arent slim"
, "argument arguments as at avoids be because been behavior between book both by calculus plus were"
, "calling can change changes code commercial computability computation computer concepts earth was"
, "constructs contrast conversely declarative definition depending depends describing metal key fee"
, "designed developed development difference different domains domain easier effects fire water add"
, "elaborations elements eliminating emphasize entscheidungsproblem eschewing star best least being"
, "especially evaluation example executing expression facilitate financial formal greatest open etc"
, "functional has have hope how however imperative industrial input investigate is home close where"
, "it key lack lambda language languages largely like make many math mathematical may from flow she"
, "motivations much mutable notion numeric of on one ones only organizations output paradigm pit he"
, "specific pioneering practice predict produce program programming prominent purely rather trust I"
, "recursion referential result roots same science side so software some specifically state move me"
, "statistics style subject such supported symbolic system than that the they child this super mesh"
, "transparency treats twice understand use used value values variety viewed which wide will bill X"
, "dates times database structured listing setting dictionary returning throwing catching law factor"
, "option procedure alpha beta electron proton neutron shift hard soft bean beam fix drug undo minus"
, "field magic latice jump assemble area volume interesting slice sector region cylinder sphere plan"
, "inside without trying patterned rules"
]