Use distribution function with rejection sampling.

This commit is contained in:
Mark Morschhäuser 2014-03-10 14:13:46 +01:00
parent 424513eb90
commit 800f7e6373

View file

@ -1,7 +1,10 @@
#include "rng_sfmt.h" #include "rng_sfmt.h"
#include <QDateTime> #include <QDateTime>
#include <stdlib.h> #include <algorithm>
#include <iostream>
// This is from gcc sources, namely from fixincludes/inclhack.def
// On C++11 systems, <cstdint> could be included instead.
#define UINT64_MAX (~(uint64_t)0)
RNG_SFMT::RNG_SFMT(QObject *parent) RNG_SFMT::RNG_SFMT(QObject *parent)
: RNG_Abstract(parent) : RNG_Abstract(parent)
@ -10,13 +13,72 @@ RNG_SFMT::RNG_SFMT(QObject *parent)
sfmt_init_gen_rand(&sfmt, QDateTime::currentDateTime().toTime_t()); sfmt_init_gen_rand(&sfmt, QDateTime::currentDateTime().toTime_t());
} }
/**
* Much thought went into this, please read this comment before you modify the code.
* Let SFMT() be an alias for sfmt_genrand_uint64() aka SFMT's rand() function.
*
* SMFT() returns a uniformly distributed pseudorandom number from 0 to UINT64_MAX.
* As SFMT() operates on a limited integer range, it is a _discrete_ function.
*
* We want a random number from a given interval [min, max] though, so we need to
* implement the (discrete) cumulative distribution function SFMT(min, max), which
* returns a random number X from [min, max].
*
* This CDF is by formal definition:
* SFMT(X; min, max) = (floor(X) - min + 1) / (max - min + 1)
*
* To get out the random variable, solve for X:
* floor(X) = SFMT(X; min, max) * (max - min + 1) + min - 1
* So this is, what getNumber(min, max) should look like.
* Problem: SFMT(X; min, max) * (max - min + 1) could produce an integer overflow,
* so it is not safe.
*
* One solution is to divide the universe into buckets of equal size depending on the
* range [min, max] and assign X to the bucket that contains the number generated
* by SFMT(). This equals to modulo computation and is not satisfying:
* If the buckets don't divide the universe equally, because the bucket size is not
* a divisor of 2, there will be a range in the universe that is biased because one
* bucket is too small thus will be chosen less equally!
*
* This is solved by rejection sampling:
* As SFMT() is assumed to be unbiased, we are allowed to ignore those random numbers
* from SFMT() that would force us to have an unequal bucket and generate new random
* numbers until one number fits into one of the other buckets.
* This can be compared to an ideal six sided die that is rolled until only sides
* 1-5 show up, while 6 represents something that you don't want. So you basically roll
* a five sided die.
*/
unsigned int RNG_SFMT::getNumber(unsigned int min, unsigned int max) unsigned int RNG_SFMT::getNumber(unsigned int min, unsigned int max)
{ {
// To make the random number generation thread safe, a mutex is created around the generation. // This all makes no sense if min > max.
// So in debug mode Q_ASSERT will print a warning...
Q_ASSERT(min <= max);
// ... and at runtime min and max will be swapped; this should never happen.
if(min > max)
std::swap(min, max);
// First compute the diameter (aka size, length) of the [min, max] interval
const unsigned int diameter = max - min + 1;
// Compute how many buckets (each in size of the diameter) will fit into the
// universe.
// If the division has a remainder, the result is floored automatically.
const uint64_t buckets = UINT64_MAX / diameter;
// Compute the last valid random number. All numbers beyond have to be ignored.
// If there was no remainder in the previous step, limit is equal to UINT64_MAX.
const uint64_t limit = diameter * buckets;
uint64_t rand;
// To make the random number generation thread-safe, a mutex is created around
// the generation. Outside of the loop of course, to avoid lock/unlock overhead.
mutex.lock(); mutex.lock();
uint64_t r = sfmt_genrand_uint64(&sfmt); do {
rand = sfmt_genrand_uint64(&sfmt);
} while (rand >= limit);
mutex.unlock(); mutex.unlock();
// return a random number from the interval [min, max] // Now determine the bucket containing the SFMT() random number and after adding
return (unsigned int) (r % (max - min + 1) + min); // the lower bound, a random number from [min, max] can be returned.
return (unsigned int) (rand / buckets + min);
} }