From a15eb6f29fcfba8657ee2d782afb1e72343a4610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mark=20Morschh=C3=A4user?= Date: Sat, 8 Mar 2014 17:08:40 +0100 Subject: [PATCH] Updated SFMT RNG code, removed Qt RNG --- common/CMakeLists.txt | 2 - common/rng_qt.cpp | 16 - common/rng_qt.h | 13 - common/rng_sfmt.cpp | 15 +- common/rng_sfmt.h | 14 + common/sfmt/LICENSE.txt | 13 +- common/sfmt/SFMT-common.h | 164 ++++++++++ common/sfmt/SFMT-params.h | 81 ++--- common/sfmt/SFMT-params19937.h | 66 ++-- common/sfmt/SFMT.c | 533 +++++++++++---------------------- common/sfmt/SFMT.h | 293 +++++++++++++----- 11 files changed, 653 insertions(+), 557 deletions(-) delete mode 100644 common/rng_qt.cpp delete mode 100644 common/rng_qt.h create mode 100644 common/sfmt/SFMT-common.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 3a8031b1..17d3fa46 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -5,7 +5,6 @@ SET(common_SOURCES decklist.cpp get_pb_extension.cpp rng_abstract.cpp - rng_qt.cpp rng_sfmt.cpp server.cpp server_abstractuserinterface.cpp @@ -25,7 +24,6 @@ SET(common_SOURCES SET(common_HEADERS decklist.h rng_abstract.h - rng_qt.h rng_sfmt.h server.h server_arrowtarget.h diff --git a/common/rng_qt.cpp b/common/rng_qt.cpp deleted file mode 100644 index 6704cdb8..00000000 --- a/common/rng_qt.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "rng_qt.h" -#include -#include - -RNG_Qt::RNG_Qt(QObject *parent) - : RNG_Abstract(parent) -{ - int seed = QDateTime::currentDateTime().toTime_t(); - qsrand(seed); -} - -unsigned int RNG_Qt::getNumber(unsigned int min, unsigned int max) -{ - int r = qrand(); - return min + (unsigned int) (((double) (max + 1 - min)) * r / (RAND_MAX + 1.0)); -} diff --git a/common/rng_qt.h b/common/rng_qt.h deleted file mode 100644 index e30719ef..00000000 --- a/common/rng_qt.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef RNG_QT_H -#define RNG_QT_H - -#include "rng_abstract.h" - -class RNG_Qt : public RNG_Abstract { - Q_OBJECT -public: - RNG_Qt(QObject *parent = 0); - unsigned int getNumber(unsigned int min, unsigned int max); -}; - -#endif diff --git a/common/rng_sfmt.cpp b/common/rng_sfmt.cpp index 9dff7448..438d8f59 100644 --- a/common/rng_sfmt.cpp +++ b/common/rng_sfmt.cpp @@ -1,5 +1,4 @@ #include "rng_sfmt.h" -#include "sfmt/SFMT.h" #include #include #include @@ -7,19 +6,17 @@ RNG_SFMT::RNG_SFMT(QObject *parent) : RNG_Abstract(parent) { - std::cerr << "Using SFMT random number generator." << std::endl; - - int seed = QDateTime::currentDateTime().toTime_t(); - init_gen_rand(seed); - for (int i = 0; i < 100000; ++i) - gen_rand64(); + // initialize the random number generator with a 32bit integer seed (timestamp) + sfmt_init_gen_rand(&sfmt, QDateTime::currentDateTime().toTime_t()); } unsigned int RNG_SFMT::getNumber(unsigned int min, unsigned int max) { + // To make the random number generation thread safe, a mutex is created around the generation. mutex.lock(); - uint64_t r = gen_rand64(); + uint64_t r = sfmt_genrand_uint64(&sfmt); mutex.unlock(); - return min + (unsigned int) (((double) (max + 1 - min)) * r / (18446744073709551616.0 + 1.0)); + // return a random number from the interval [min, max] + return (unsigned int) (r % (max - min + 1)); } diff --git a/common/rng_sfmt.h b/common/rng_sfmt.h index 4d131b38..80ff9f2f 100644 --- a/common/rng_sfmt.h +++ b/common/rng_sfmt.h @@ -1,13 +1,27 @@ #ifndef RNG_SFMT_H #define RNG_SFMT_H +#include "sfmt/SFMT.h" #include "rng_abstract.h" #include +/** + * This class represents the random number generator. + * It uses the SIMD-oriented Fast Mersenne Twister code v1.4.1 from + * http://www.math.sci.hiroshima-u.ac.jp/~%20m-mat/MT/SFMT/index.html + * To use this RNG, the class needs a sfmt_t structure for the RNG's internal state. + * It has to be initialized by sfmt_init_gen_rand() which is done in the constructor. + * The function sfmt_genrand_uint64() can then be used to create a 64 bit unsigned int + * pseudorandom number. This is done in getNumber(). + * For more information see the author's website and look at the documentation and + * examples that are part of the official downloads. + */ + class RNG_SFMT : public RNG_Abstract { Q_OBJECT private: QMutex mutex; + sfmt_t sfmt; public: RNG_SFMT(QObject *parent = 0); unsigned int getNumber(unsigned int min, unsigned int max); diff --git a/common/sfmt/LICENSE.txt b/common/sfmt/LICENSE.txt index decb2991..6f9c4a6d 100644 --- a/common/sfmt/LICENSE.txt +++ b/common/sfmt/LICENSE.txt @@ -1,5 +1,8 @@ Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima -University. All rights reserved. +University. +Copyright (c) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima University +and The University of Tokyo. +All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -11,10 +14,10 @@ met: copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the Hiroshima University nor the names of - its contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. + * Neither the names of Hiroshima University, The University of + Tokyo nor the names of its contributors may be used to endorse + or promote products derived from this software without specific + prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/common/sfmt/SFMT-common.h b/common/sfmt/SFMT-common.h new file mode 100644 index 00000000..c7d8aa9f --- /dev/null +++ b/common/sfmt/SFMT-common.h @@ -0,0 +1,164 @@ +#pragma once +/** + * @file SFMT-common.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom + * number generator with jump function. This file includes common functions + * used in random number generation and jump. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#ifndef SFMT_COMMON_H +#define SFMT_COMMON_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "SFMT.h" + +inline static void do_recursion(w128_t * r, w128_t * a, w128_t * b, + w128_t * c, w128_t * d); + +inline static void rshift128(w128_t *out, w128_t const *in, int shift); +inline static void lshift128(w128_t *out, w128_t const *in, int shift); + +/** + * This function simulates SIMD 128-bit right shift by the standard C. + * The 128-bit integer given in in is shifted by (shift * 8) bits. + * This function simulates the LITTLE ENDIAN SIMD. + * @param out the output of this function + * @param in the 128-bit data to be shifted + * @param shift the shift value + */ +#ifdef ONLY64 +inline static void rshift128(w128_t *out, w128_t const *in, int shift) { + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); + tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); + + oh = th >> (shift * 8); + ol = tl >> (shift * 8); + ol |= th << (64 - shift * 8); + out->u[0] = (uint32_t)(ol >> 32); + out->u[1] = (uint32_t)ol; + out->u[2] = (uint32_t)(oh >> 32); + out->u[3] = (uint32_t)oh; +} +#else +inline static void rshift128(w128_t *out, w128_t const *in, int shift) +{ + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); + tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); + + oh = th >> (shift * 8); + ol = tl >> (shift * 8); + ol |= th << (64 - shift * 8); + out->u[1] = (uint32_t)(ol >> 32); + out->u[0] = (uint32_t)ol; + out->u[3] = (uint32_t)(oh >> 32); + out->u[2] = (uint32_t)oh; +} +#endif +/** + * This function simulates SIMD 128-bit left shift by the standard C. + * The 128-bit integer given in in is shifted by (shift * 8) bits. + * This function simulates the LITTLE ENDIAN SIMD. + * @param out the output of this function + * @param in the 128-bit data to be shifted + * @param shift the shift value + */ +#ifdef ONLY64 +inline static void lshift128(w128_t *out, w128_t const *in, int shift) { + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); + tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); + + oh = th << (shift * 8); + ol = tl << (shift * 8); + oh |= tl >> (64 - shift * 8); + out->u[0] = (uint32_t)(ol >> 32); + out->u[1] = (uint32_t)ol; + out->u[2] = (uint32_t)(oh >> 32); + out->u[3] = (uint32_t)oh; +} +#else +inline static void lshift128(w128_t *out, w128_t const *in, int shift) +{ + uint64_t th, tl, oh, ol; + + th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); + tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); + + oh = th << (shift * 8); + ol = tl << (shift * 8); + oh |= tl >> (64 - shift * 8); + out->u[1] = (uint32_t)(ol >> 32); + out->u[0] = (uint32_t)ol; + out->u[3] = (uint32_t)(oh >> 32); + out->u[2] = (uint32_t)oh; +} +#endif +/** + * This function represents the recursion formula. + * @param r output + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param c a 128-bit part of the internal state array + * @param d a 128-bit part of the internal state array + */ +#ifdef ONLY64 +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, + w128_t *d) { + w128_t x; + w128_t y; + + lshift128(&x, a, SFMT_SL2); + rshift128(&y, c, SFMT_SR2); + r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SFMT_SR1) & SFMT_MSK2) ^ y.u[0] + ^ (d->u[0] << SFMT_SL1); + r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SFMT_SR1) & SFMT_MSK1) ^ y.u[1] + ^ (d->u[1] << SFMT_SL1); + r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SFMT_SR1) & SFMT_MSK4) ^ y.u[2] + ^ (d->u[2] << SFMT_SL1); + r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK3) ^ y.u[3] + ^ (d->u[3] << SFMT_SL1); +} +#else +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, + w128_t *c, w128_t *d) +{ + w128_t x; + w128_t y; + + lshift128(&x, a, SFMT_SL2); + rshift128(&y, c, SFMT_SR2); + r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SFMT_SR1) & SFMT_MSK1) + ^ y.u[0] ^ (d->u[0] << SFMT_SL1); + r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SFMT_SR1) & SFMT_MSK2) + ^ y.u[1] ^ (d->u[1] << SFMT_SL1); + r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SFMT_SR1) & SFMT_MSK3) + ^ y.u[2] ^ (d->u[2] << SFMT_SL1); + r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK4) + ^ y.u[3] ^ (d->u[3] << SFMT_SL1); +} +#endif +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/common/sfmt/SFMT-params.h b/common/sfmt/SFMT-params.h index 661bbf26..372e6f11 100644 --- a/common/sfmt/SFMT-params.h +++ b/common/sfmt/SFMT-params.h @@ -1,95 +1,96 @@ +#pragma once #ifndef SFMT_PARAMS_H #define SFMT_PARAMS_H -#if !defined(MEXP) -#ifdef __GNUC__ - #warning "MEXP is not defined. I assume MEXP is 19937." +#if !defined(SFMT_MEXP) +#if defined(__GNUC__) && !defined(__ICC) + #warning "SFMT_MEXP is not defined. I assume MEXP is 19937." #endif - #define MEXP 19937 + #define SFMT_MEXP 19937 #endif /*----------------- BASIC DEFINITIONS -----------------*/ -/** Mersenne Exponent. The period of the sequence +/** Mersenne Exponent. The period of the sequence * is a multiple of 2^MEXP-1. - * #define MEXP 19937 */ + * #define SFMT_MEXP 19937 */ /** SFMT generator has an internal state array of 128-bit integers, * and N is its size. */ -#define N (MEXP / 128 + 1) +#define SFMT_N (SFMT_MEXP / 128 + 1) /** N32 is the size of internal state array when regarded as an array * of 32-bit integers.*/ -#define N32 (N * 4) +#define SFMT_N32 (SFMT_N * 4) /** N64 is the size of internal state array when regarded as an array * of 64-bit integers.*/ -#define N64 (N * 2) +#define SFMT_N64 (SFMT_N * 2) /*---------------------- the parameters of SFMT following definitions are in paramsXXXX.h file. ----------------------*/ /** the pick up position of the array. -#define POS1 122 +#define SFMT_POS1 122 */ /** the parameter of shift left as four 32-bit registers. -#define SL1 18 +#define SFMT_SL1 18 */ -/** the parameter of shift left as one 128-bit register. - * The 128-bit integer is shifted by (SL2 * 8) bits. -#define SL2 1 +/** the parameter of shift left as one 128-bit register. + * The 128-bit integer is shifted by (SFMT_SL2 * 8) bits. +#define SFMT_SL2 1 */ /** the parameter of shift right as four 32-bit registers. -#define SR1 11 +#define SFMT_SR1 11 */ -/** the parameter of shift right as one 128-bit register. - * The 128-bit integer is shifted by (SL2 * 8) bits. -#define SR2 1 +/** the parameter of shift right as one 128-bit register. + * The 128-bit integer is shifted by (SFMT_SL2 * 8) bits. +#define SFMT_SR21 1 */ /** A bitmask, used in the recursion. These parameters are introduced * to break symmetry of SIMD. -#define MSK1 0xdfffffefU -#define MSK2 0xddfecb7fU -#define MSK3 0xbffaffffU -#define MSK4 0xbffffff6U +#define SFMT_MSK1 0xdfffffefU +#define SFMT_MSK2 0xddfecb7fU +#define SFMT_MSK3 0xbffaffffU +#define SFMT_MSK4 0xbffffff6U */ /** These definitions are part of a 128-bit period certification vector. -#define PARITY1 0x00000001U -#define PARITY2 0x00000000U -#define PARITY3 0x00000000U -#define PARITY4 0xc98e126aU +#define SFMT_PARITY1 0x00000001U +#define SFMT_PARITY2 0x00000000U +#define SFMT_PARITY3 0x00000000U +#define SFMT_PARITY4 0xc98e126aU */ -#if MEXP == 607 +#if SFMT_MEXP == 607 #include "SFMT-params607.h" -#elif MEXP == 1279 +#elif SFMT_MEXP == 1279 #include "SFMT-params1279.h" -#elif MEXP == 2281 +#elif SFMT_MEXP == 2281 #include "SFMT-params2281.h" -#elif MEXP == 4253 +#elif SFMT_MEXP == 4253 #include "SFMT-params4253.h" -#elif MEXP == 11213 +#elif SFMT_MEXP == 11213 #include "SFMT-params11213.h" -#elif MEXP == 19937 +#elif SFMT_MEXP == 19937 #include "SFMT-params19937.h" -#elif MEXP == 44497 +#elif SFMT_MEXP == 44497 #include "SFMT-params44497.h" -#elif MEXP == 86243 +#elif SFMT_MEXP == 86243 #include "SFMT-params86243.h" -#elif MEXP == 132049 +#elif SFMT_MEXP == 132049 #include "SFMT-params132049.h" -#elif MEXP == 216091 +#elif SFMT_MEXP == 216091 #include "SFMT-params216091.h" #else -#ifdef __GNUC__ - #error "MEXP is not valid." - #undef MEXP +#if defined(__GNUC__) && !defined(__ICC) + #error "SFMT_MEXP is not valid." + #undef SFMT_MEXP #else - #undef MEXP + #undef SFMT_MEXP #endif #endif diff --git a/common/sfmt/SFMT-params19937.h b/common/sfmt/SFMT-params19937.h index 04708cdf..fc49fa14 100644 --- a/common/sfmt/SFMT-params19937.h +++ b/common/sfmt/SFMT-params19937.h @@ -1,46 +1,50 @@ +#pragma once #ifndef SFMT_PARAMS19937_H #define SFMT_PARAMS19937_H -#define POS1 122 -#define SL1 18 -#define SL2 1 -#define SR1 11 -#define SR2 1 -#define MSK1 0xdfffffefU -#define MSK2 0xddfecb7fU -#define MSK3 0xbffaffffU -#define MSK4 0xbffffff6U -#define PARITY1 0x00000001U -#define PARITY2 0x00000000U -#define PARITY3 0x00000000U -#define PARITY4 0x13c9e684U +#define SFMT_POS1 122 +#define SFMT_SL1 18 +#define SFMT_SL2 1 +#define SFMT_SR1 11 +#define SFMT_SR2 1 +#define SFMT_MSK1 0xdfffffefU +#define SFMT_MSK2 0xddfecb7fU +#define SFMT_MSK3 0xbffaffffU +#define SFMT_MSK4 0xbffffff6U +#define SFMT_PARITY1 0x00000001U +#define SFMT_PARITY2 0x00000000U +#define SFMT_PARITY3 0x00000000U +#define SFMT_PARITY4 0x13c9e684U /* PARAMETERS FOR ALTIVEC */ #if defined(__APPLE__) /* For OSX */ - #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) - #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) - #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) - #define ALTI_MSK64 \ - (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) - #define ALTI_SL2_PERM \ + #define SFMT_ALTI_SL1 \ + (vector unsigned int)(SFMT_SL1, SFMT_SL1, SFMT_SL1, SFMT_SL1) + #define SFMT_ALTI_SR1 \ + (vector unsigned int)(SFMT_SR1, SFMT_SR1, SFMT_SR1, SFMT_SR1) + #define SFMT_ALTI_MSK \ + (vector unsigned int)(SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4) + #define SFMT_ALTI_MSK64 \ + (vector unsigned int)(SFMT_MSK2, SFMT_MSK1, SFMT_MSK4, SFMT_MSK3) + #define SFMT_ALTI_SL2_PERM \ (vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8) - #define ALTI_SL2_PERM64 \ + #define SFMT_ALTI_SL2_PERM64 \ (vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0) - #define ALTI_SR2_PERM \ + #define SFMT_ALTI_SR2_PERM \ (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) - #define ALTI_SR2_PERM64 \ + #define SFMT_ALTI_SR2_PERM64 \ (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) #else /* For OTHER OSs(Linux?) */ - #define ALTI_SL1 {SL1, SL1, SL1, SL1} - #define ALTI_SR1 {SR1, SR1, SR1, SR1} - #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} - #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} - #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} - #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} - #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} - #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} + #define SFMT_ALTI_SL1 {SFMT_SL1, SFMT_SL1, SFMT_SL1, SFMT_SL1} + #define SFMT_ALTI_SR1 {SFMT_SR1, SFMT_SR1, SFMT_SR1, SFMT_SR1} + #define SFMT_ALTI_MSK {SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4} + #define SFMT_ALTI_MSK64 {SFMT_MSK2, SFMT_MSK1, SFMT_MSK4, SFMT_MSK3} + #define SFMT_ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} + #define SFMT_ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} + #define SFMT_ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define SFMT_ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} #endif /* For OSX */ -#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6" +#define SFMT_IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6" #endif /* SFMT_PARAMS19937_H */ diff --git a/common/sfmt/SFMT.c b/common/sfmt/SFMT.c index d36465d9..2652df7d 100644 --- a/common/sfmt/SFMT.c +++ b/common/sfmt/SFMT.c @@ -1,19 +1,31 @@ -/** +/** * @file SFMT.c * @brief SIMD oriented Fast Mersenne Twister(SFMT) * * @author Mutsuo Saito (Hiroshima University) * @author Makoto Matsumoto (Hiroshima University) * - * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima - * University. All rights reserved. + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * Copyright (C) 2013 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * All rights reserved. * - * The new BSD License is applied to this software, see LICENSE.txt + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt */ + +#if defined(__cplusplus) +extern "C" { +#endif + #include #include #include "SFMT.h" #include "SFMT-params.h" +#include "SFMT-common.h" #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) #define BIG_ENDIAN64 1 @@ -27,74 +39,20 @@ #endif #undef ONLY64 #endif -/*------------------------------------------------------ - 128-bit SIMD data type for Altivec, SSE2 or standard C - ------------------------------------------------------*/ -#if defined(HAVE_ALTIVEC) - #if !defined(__APPLE__) - #include - #endif -/** 128-bit data structure */ -union W128_T { - vector unsigned int s; - uint32_t u[4]; -}; -/** 128-bit data type */ -typedef union W128_T w128_t; - -#elif defined(HAVE_SSE2) - #include - -/** 128-bit data structure */ -union W128_T { - __m128i si; - uint32_t u[4]; -}; -/** 128-bit data type */ -typedef union W128_T w128_t; - -#else - -/** 128-bit data structure */ -struct W128_T { - uint32_t u[4]; -}; -/** 128-bit data type */ -typedef struct W128_T w128_t; - -#endif - -/*-------------------------------------- - FILE GLOBAL VARIABLES - internal state, index counter and flag - --------------------------------------*/ -/** the 128-bit internal state array */ -static w128_t sfmt[N]; -/** the 32bit integer pointer to the 128-bit internal state array */ -static uint32_t *psfmt32 = &sfmt[0].u[0]; -#if !defined(BIG_ENDIAN64) || defined(ONLY64) -/** the 64bit integer pointer to the 128-bit internal state array */ -static uint64_t *psfmt64 = (uint64_t *)&sfmt[0].u[0]; -#endif -/** index counter to the 32-bit internal state array */ -static int idx; -/** a flag: it is 0 if and only if the internal state is not yet - * initialized. */ -static int initialized = 0; -/** a parity check vector which certificate the period of 2^{MEXP} */ -static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4}; +/** + * parameters used by sse2. + */ +static const w128_t sse2_param_mask = {{SFMT_MSK1, SFMT_MSK2, + SFMT_MSK3, SFMT_MSK4}}; /*---------------- STATIC FUNCTIONS ----------------*/ inline static int idxof(int i); -inline static void rshift128(w128_t *out, w128_t const *in, int shift); -inline static void lshift128(w128_t *out, w128_t const *in, int shift); -inline static void gen_rand_all(void); -inline static void gen_rand_array(w128_t *array, int size); +inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size); inline static uint32_t func1(uint32_t x); inline static uint32_t func2(uint32_t x); -static void period_certification(void); +static void period_certification(sfmt_t * sfmt); #if defined(BIG_ENDIAN64) && !defined(ONLY64) inline static void swap(w128_t *array, int size); #endif @@ -102,11 +60,15 @@ inline static void swap(w128_t *array, int size); #if defined(HAVE_ALTIVEC) #include "SFMT-alti.h" #elif defined(HAVE_SSE2) - #include "SFMT-sse2.h" + #if defined(_MSC_VER) + #include "SFMT-sse2-msc.h" + #else + #include "SFMT-sse2.h" + #endif #endif /** - * This function simulate a 64-bit index of LITTLE ENDIAN + * This function simulate a 64-bit index of LITTLE ENDIAN * in BIG ENDIAN machine. */ #ifdef ONLY64 @@ -118,190 +80,48 @@ inline static int idxof(int i) { return i; } #endif -/** - * This function simulates SIMD 128-bit right shift by the standard C. - * The 128-bit integer given in in is shifted by (shift * 8) bits. - * This function simulates the LITTLE ENDIAN SIMD. - * @param out the output of this function - * @param in the 128-bit data to be shifted - * @param shift the shift value - */ -#ifdef ONLY64 -inline static void rshift128(w128_t *out, w128_t const *in, int shift) { - uint64_t th, tl, oh, ol; - - th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); - tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); - - oh = th >> (shift * 8); - ol = tl >> (shift * 8); - ol |= th << (64 - shift * 8); - out->u[0] = (uint32_t)(ol >> 32); - out->u[1] = (uint32_t)ol; - out->u[2] = (uint32_t)(oh >> 32); - out->u[3] = (uint32_t)oh; -} -#else -inline static void rshift128(w128_t *out, w128_t const *in, int shift) { - uint64_t th, tl, oh, ol; - - th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); - tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); - - oh = th >> (shift * 8); - ol = tl >> (shift * 8); - ol |= th << (64 - shift * 8); - out->u[1] = (uint32_t)(ol >> 32); - out->u[0] = (uint32_t)ol; - out->u[3] = (uint32_t)(oh >> 32); - out->u[2] = (uint32_t)oh; -} -#endif -/** - * This function simulates SIMD 128-bit left shift by the standard C. - * The 128-bit integer given in in is shifted by (shift * 8) bits. - * This function simulates the LITTLE ENDIAN SIMD. - * @param out the output of this function - * @param in the 128-bit data to be shifted - * @param shift the shift value - */ -#ifdef ONLY64 -inline static void lshift128(w128_t *out, w128_t const *in, int shift) { - uint64_t th, tl, oh, ol; - - th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); - tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); - - oh = th << (shift * 8); - ol = tl << (shift * 8); - oh |= tl >> (64 - shift * 8); - out->u[0] = (uint32_t)(ol >> 32); - out->u[1] = (uint32_t)ol; - out->u[2] = (uint32_t)(oh >> 32); - out->u[3] = (uint32_t)oh; -} -#else -inline static void lshift128(w128_t *out, w128_t const *in, int shift) { - uint64_t th, tl, oh, ol; - - th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); - tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); - - oh = th << (shift * 8); - ol = tl << (shift * 8); - oh |= tl >> (64 - shift * 8); - out->u[1] = (uint32_t)(ol >> 32); - out->u[0] = (uint32_t)ol; - out->u[3] = (uint32_t)(oh >> 32); - out->u[2] = (uint32_t)oh; -} -#endif - -/** - * This function represents the recursion formula. - * @param r output - * @param a a 128-bit part of the internal state array - * @param b a 128-bit part of the internal state array - * @param c a 128-bit part of the internal state array - * @param d a 128-bit part of the internal state array - */ -#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) -#ifdef ONLY64 -inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, - w128_t *d) { - w128_t x; - w128_t y; - - lshift128(&x, a, SL2); - rshift128(&y, c, SR2); - r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] - ^ (d->u[0] << SL1); - r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] - ^ (d->u[1] << SL1); - r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] - ^ (d->u[2] << SL1); - r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] - ^ (d->u[3] << SL1); -} -#else -inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, - w128_t *d) { - w128_t x; - w128_t y; - - lshift128(&x, a, SL2); - rshift128(&y, c, SR2); - r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] - ^ (d->u[0] << SL1); - r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] - ^ (d->u[1] << SL1); - r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] - ^ (d->u[2] << SL1); - r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] - ^ (d->u[3] << SL1); -} -#endif -#endif #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) -/** - * This function fills the internal state array with pseudorandom - * integers. - */ -inline static void gen_rand_all(void) { - int i; - w128_t *r1, *r2; - - r1 = &sfmt[N - 2]; - r2 = &sfmt[N - 1]; - for (i = 0; i < N - POS1; i++) { - do_recursion(&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2); - r1 = r2; - r2 = &sfmt[i]; - } - for (; i < N; i++) { - do_recursion(&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2); - r1 = r2; - r2 = &sfmt[i]; - } -} - /** * This function fills the user-specified array with pseudorandom * integers. * - * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param sfmt SFMT internal state + * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array(w128_t *array, int size) { +inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size) { int i, j; w128_t *r1, *r2; - r1 = &sfmt[N - 2]; - r2 = &sfmt[N - 1]; - for (i = 0; i < N - POS1; i++) { - do_recursion(&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2); - r1 = r2; - r2 = &array[i]; + r1 = &sfmt->state[SFMT_N - 2]; + r2 = &sfmt->state[SFMT_N - 1]; + for (i = 0; i < SFMT_N - SFMT_POS1; i++) { + do_recursion(&array[i], &sfmt->state[i], &sfmt->state[i + SFMT_POS1], r1, r2); + r1 = r2; + r2 = &array[i]; } - for (; i < N; i++) { - do_recursion(&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2); - r1 = r2; - r2 = &array[i]; + for (; i < SFMT_N; i++) { + do_recursion(&array[i], &sfmt->state[i], + &array[i + SFMT_POS1 - SFMT_N], r1, r2); + r1 = r2; + r2 = &array[i]; } - for (; i < size - N; i++) { - do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); - r1 = r2; - r2 = &array[i]; + for (; i < size - SFMT_N; i++) { + do_recursion(&array[i], &array[i - SFMT_N], + &array[i + SFMT_POS1 - SFMT_N], r1, r2); + r1 = r2; + r2 = &array[i]; } - for (j = 0; j < 2 * N - size; j++) { - sfmt[j] = array[j + size - N]; + for (j = 0; j < 2 * SFMT_N - size; j++) { + sfmt->state[j] = array[j + size - SFMT_N]; } for (; i < size; i++, j++) { - do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); - r1 = r2; - r2 = &array[i]; - sfmt[j] = array[i]; + do_recursion(&array[i], &array[i - SFMT_N], + &array[i + SFMT_POS1 - SFMT_N], r1, r2); + r1 = r2; + r2 = &array[i]; + sfmt->state[j] = array[i]; } } #endif @@ -312,12 +132,12 @@ inline static void swap(w128_t *array, int size) { uint32_t x, y; for (i = 0; i < size; i++) { - x = array[i].u[0]; - y = array[i].u[2]; - array[i].u[0] = array[i].u[1]; - array[i].u[2] = array[i].u[3]; - array[i].u[1] = x; - array[i].u[3] = y; + x = array[i].u[0]; + y = array[i].u[2]; + array[i].u[0] = array[i].u[1]; + array[i].u[2] = array[i].u[3]; + array[i].u[1] = x; + array[i].u[3] = y; } } #endif @@ -343,114 +163,101 @@ static uint32_t func2(uint32_t x) { /** * This function certificate the period of 2^{MEXP} + * @param sfmt SFMT internal state */ -static void period_certification(void) { +static void period_certification(sfmt_t * sfmt) { int inner = 0; int i, j; uint32_t work; + uint32_t *psfmt32 = &sfmt->state[0].u[0]; + const uint32_t parity[4] = {SFMT_PARITY1, SFMT_PARITY2, + SFMT_PARITY3, SFMT_PARITY4}; for (i = 0; i < 4; i++) - inner ^= psfmt32[idxof(i)] & parity[i]; + inner ^= psfmt32[idxof(i)] & parity[i]; for (i = 16; i > 0; i >>= 1) - inner ^= inner >> i; + inner ^= inner >> i; inner &= 1; /* check OK */ if (inner == 1) { - return; + return; } /* check NG, and modification */ for (i = 0; i < 4; i++) { - work = 1; - for (j = 0; j < 32; j++) { - if ((work & parity[i]) != 0) { - psfmt32[idxof(i)] ^= work; - return; - } - work = work << 1; - } + work = 1; + for (j = 0; j < 32; j++) { + if ((work & parity[i]) != 0) { + psfmt32[idxof(i)] ^= work; + return; + } + work = work << 1; + } } } /*---------------- PUBLIC FUNCTIONS ----------------*/ +#define UNUSED_VARIABLE(x) (void)(x) /** * This function returns the identification string. * The string shows the word size, the Mersenne exponent, * and all parameters of this generator. + * @param sfmt SFMT internal state */ -const char *get_idstring(void) { - return IDSTR; +const char *sfmt_get_idstring(sfmt_t * sfmt) { + UNUSED_VARIABLE(sfmt); + return SFMT_IDSTR; } /** * This function returns the minimum size of array used for \b * fill_array32() function. + * @param sfmt SFMT internal state * @return minimum size of array used for fill_array32() function. */ -int get_min_array_size32(void) { - return N32; +int sfmt_get_min_array_size32(sfmt_t * sfmt) { + UNUSED_VARIABLE(sfmt); + return SFMT_N32; } /** * This function returns the minimum size of array used for \b * fill_array64() function. + * @param sfmt SFMT internal state * @return minimum size of array used for fill_array64() function. */ -int get_min_array_size64(void) { - return N64; +int sfmt_get_min_array_size64(sfmt_t * sfmt) { + UNUSED_VARIABLE(sfmt); + return SFMT_N64; } -#ifndef ONLY64 +#if !defined(HAVE_SSE2) && !defined(HAVE_ALTIVEC) /** - * This function generates and returns 32-bit pseudorandom number. - * init_gen_rand or init_by_array must be called before this function. - * @return 32-bit pseudorandom number + * This function fills the internal state array with pseudorandom + * integers. + * @param sfmt SFMT internal state */ -uint32_t gen_rand32(void) { - uint32_t r; +void sfmt_gen_rand_all(sfmt_t * sfmt) { + int i; + w128_t *r1, *r2; - assert(initialized); - if (idx >= N32) { - gen_rand_all(); - idx = 0; + r1 = &sfmt->state[SFMT_N - 2]; + r2 = &sfmt->state[SFMT_N - 1]; + for (i = 0; i < SFMT_N - SFMT_POS1; i++) { + do_recursion(&sfmt->state[i], &sfmt->state[i], + &sfmt->state[i + SFMT_POS1], r1, r2); + r1 = r2; + r2 = &sfmt->state[i]; + } + for (; i < SFMT_N; i++) { + do_recursion(&sfmt->state[i], &sfmt->state[i], + &sfmt->state[i + SFMT_POS1 - SFMT_N], r1, r2); + r1 = r2; + r2 = &sfmt->state[i]; } - r = psfmt32[idx++]; - return r; } #endif -/** - * This function generates and returns 64-bit pseudorandom number. - * init_gen_rand or init_by_array must be called before this function. - * The function gen_rand64 should not be called after gen_rand32, - * unless an initialization is again executed. - * @return 64-bit pseudorandom number - */ -uint64_t gen_rand64(void) { -#if defined(BIG_ENDIAN64) && !defined(ONLY64) - uint32_t r1, r2; -#else - uint64_t r; -#endif - - assert(initialized); - assert(idx % 2 == 0); - - if (idx >= N32) { - gen_rand_all(); - idx = 0; - } -#if defined(BIG_ENDIAN64) && !defined(ONLY64) - r1 = psfmt32[idx]; - r2 = psfmt32[idx + 1]; - idx += 2; - return ((uint64_t)r2 << 32) | r1; -#else - r = psfmt64[idx / 2]; - idx += 2; - return r; -#endif -} #ifndef ONLY64 /** @@ -464,6 +271,7 @@ uint64_t gen_rand64(void) { * before the first call of this function. This function can not be * used after calling gen_rand function, without initialization. * + * @param sfmt SFMT internal state * @param array an array where pseudorandom 32-bit integers are filled * by this function. The pointer to the array must be \b "aligned" * (namely, must be a multiple of 16) in the SIMD version, since it @@ -478,14 +286,13 @@ uint64_t gen_rand64(void) { * memory. Mac OSX doesn't have these functions, but \b malloc of OSX * returns the pointer to the aligned memory block. */ -void fill_array32(uint32_t *array, int size) { - assert(initialized); - assert(idx == N32); +void sfmt_fill_array32(sfmt_t * sfmt, uint32_t *array, int size) { + assert(sfmt->idx == SFMT_N32); assert(size % 4 == 0); - assert(size >= N32); + assert(size >= SFMT_N32); - gen_rand_array((w128_t *)array, size / 4); - idx = N32; + gen_rand_array(sfmt, (w128_t *)array, size / 4); + sfmt->idx = SFMT_N32; } #endif @@ -496,6 +303,7 @@ void fill_array32(uint32_t *array, int size) { * multiple of two. The generation by this function is much faster * than the following gen_rand function. * + * @param sfmt SFMT internal state * For initialization, init_gen_rand or init_by_array must be called * before the first call of this function. This function can not be * used after calling gen_rand function, without initialization. @@ -514,14 +322,13 @@ void fill_array32(uint32_t *array, int size) { * memory. Mac OSX doesn't have these functions, but \b malloc of OSX * returns the pointer to the aligned memory block. */ -void fill_array64(uint64_t *array, int size) { - assert(initialized); - assert(idx == N32); +void sfmt_fill_array64(sfmt_t * sfmt, uint64_t *array, int size) { + assert(sfmt->idx == SFMT_N32); assert(size % 2 == 0); - assert(size >= N64); + assert(size >= SFMT_N64); - gen_rand_array((w128_t *)array, size / 2); - idx = N32; + gen_rand_array(sfmt, (w128_t *)array, size / 2); + sfmt->idx = SFMT_N32; #if defined(BIG_ENDIAN64) && !defined(ONLY64) swap((w128_t *)array, size /2); @@ -532,54 +339,58 @@ void fill_array64(uint64_t *array, int size) { * This function initializes the internal state array with a 32-bit * integer seed. * + * @param sfmt SFMT internal state * @param seed a 32-bit integer used as the seed. */ -void init_gen_rand(uint32_t seed) { +void sfmt_init_gen_rand(sfmt_t * sfmt, uint32_t seed) { int i; + uint32_t *psfmt32 = &sfmt->state[0].u[0]; + psfmt32[idxof(0)] = seed; - for (i = 1; i < N32; i++) { - psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] - ^ (psfmt32[idxof(i - 1)] >> 30)) - + i; + for (i = 1; i < SFMT_N32; i++) { + psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] + ^ (psfmt32[idxof(i - 1)] >> 30)) + + i; } - idx = N32; - period_certification(); - initialized = 1; + sfmt->idx = SFMT_N32; + period_certification(sfmt); } /** * This function initializes the internal state array, * with an array of 32-bit integers used as the seeds + * @param sfmt SFMT internal state * @param init_key the array of 32-bit integers, used as a seed. * @param key_length the length of init_key. */ -void init_by_array(uint32_t *init_key, int key_length) { +void sfmt_init_by_array(sfmt_t * sfmt, uint32_t *init_key, int key_length) { int i, j, count; uint32_t r; int lag; int mid; - int size = N * 4; + int size = SFMT_N * 4; + uint32_t *psfmt32 = &sfmt->state[0].u[0]; if (size >= 623) { - lag = 11; + lag = 11; } else if (size >= 68) { - lag = 7; + lag = 7; } else if (size >= 39) { - lag = 5; + lag = 5; } else { - lag = 3; + lag = 3; } mid = (size - lag) / 2; - memset(sfmt, 0x8b, sizeof(sfmt)); - if (key_length + 1 > N32) { - count = key_length + 1; + memset(sfmt, 0x8b, sizeof(sfmt_t)); + if (key_length + 1 > SFMT_N32) { + count = key_length + 1; } else { - count = N32; + count = SFMT_N32; } - r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] - ^ psfmt32[idxof(N32 - 1)]); + r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] + ^ psfmt32[idxof(SFMT_N32 - 1)]); psfmt32[idxof(mid)] += r; r += key_length; psfmt32[idxof(mid + lag)] += r; @@ -587,34 +398,36 @@ void init_by_array(uint32_t *init_key, int key_length) { count--; for (i = 1, j = 0; (j < count) && (j < key_length); j++) { - r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] - ^ psfmt32[idxof((i + N32 - 1) % N32)]); - psfmt32[idxof((i + mid) % N32)] += r; - r += init_key[j] + i; - psfmt32[idxof((i + mid + lag) % N32)] += r; - psfmt32[idxof(i)] = r; - i = (i + 1) % N32; + r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)] + ^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]); + psfmt32[idxof((i + mid) % SFMT_N32)] += r; + r += init_key[j] + i; + psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % SFMT_N32; } for (; j < count; j++) { - r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] - ^ psfmt32[idxof((i + N32 - 1) % N32)]); - psfmt32[idxof((i + mid) % N32)] += r; - r += i; - psfmt32[idxof((i + mid + lag) % N32)] += r; - psfmt32[idxof(i)] = r; - i = (i + 1) % N32; + r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)] + ^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]); + psfmt32[idxof((i + mid) % SFMT_N32)] += r; + r += i; + psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % SFMT_N32; } - for (j = 0; j < N32; j++) { - r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)] - + psfmt32[idxof((i + N32 - 1) % N32)]); - psfmt32[idxof((i + mid) % N32)] ^= r; - r -= i; - psfmt32[idxof((i + mid + lag) % N32)] ^= r; - psfmt32[idxof(i)] = r; - i = (i + 1) % N32; + for (j = 0; j < SFMT_N32; j++) { + r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % SFMT_N32)] + + psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]); + psfmt32[idxof((i + mid) % SFMT_N32)] ^= r; + r -= i; + psfmt32[idxof((i + mid + lag) % SFMT_N32)] ^= r; + psfmt32[idxof(i)] = r; + i = (i + 1) % SFMT_N32; } - idx = N32; - period_certification(); - initialized = 1; + sfmt->idx = SFMT_N32; + period_certification(sfmt); } +#if defined(__cplusplus) +} +#endif diff --git a/common/sfmt/SFMT.h b/common/sfmt/SFMT.h index f0aba198..dca308a0 100644 --- a/common/sfmt/SFMT.h +++ b/common/sfmt/SFMT.h @@ -1,24 +1,28 @@ -/** - * @file SFMT.h +#pragma once +/** + * @file SFMT.h * * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom - * number generator + * number generator using C structure. * * @author Mutsuo Saito (Hiroshima University) - * @author Makoto Matsumoto (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) * * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima - * University. All rights reserved. + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * All rights reserved. * - * The new BSD License is applied to this software. - * see LICENSE.txt + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt * * @note We assume that your system has inttypes.h. If your system * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, * and you have to define PRIu64 and PRIx64 in this file as follows: * @verbatim typedef unsigned int uint32_t - typedef unsigned long long uint64_t + typedef unsigned long long uint64_t #define PRIu64 "llu" #define PRIx64 "llx" @endverbatim @@ -28,14 +32,14 @@ * unsigned int and 64-bit unsigned int in hexadecimal format. */ -#ifndef SFMT_H -#define SFMT_H - -#ifdef __cplusplus +#ifndef SFMTST_H +#define SFMTST_H +#if defined(__cplusplus) extern "C" { #endif #include +#include #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) #include @@ -60,105 +64,232 @@ extern "C" { #endif #endif -#if defined(__GNUC__) -#define ALWAYSINLINE __attribute__((always_inline)) -#else -#define ALWAYSINLINE -#endif +#include "SFMT-params.h" -#if defined(_MSC_VER) - #if _MSC_VER >= 1200 - #define PRE_ALWAYS __forceinline - #else - #define PRE_ALWAYS inline +/*------------------------------------------ + 128-bit SIMD like data type for standard C + ------------------------------------------*/ +#if defined(HAVE_ALTIVEC) + #if !defined(__APPLE__) + #include #endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint32_t u[4]; + uint64_t u64[2]; +}; +#elif defined(HAVE_SSE2) + #include + +/** 128-bit data structure */ +union W128_T { + uint32_t u[4]; + uint64_t u64[2]; + __m128i si; +}; #else - #define PRE_ALWAYS inline +/** 128-bit data structure */ +union W128_T { + uint32_t u[4]; + uint64_t u64[2]; +}; #endif -uint32_t gen_rand32(void); -uint64_t gen_rand64(void); -void fill_array32(uint32_t *array, int size); -void fill_array64(uint64_t *array, int size); -void init_gen_rand(uint32_t seed); -void init_by_array(uint32_t *init_key, int key_length); -const char *get_idstring(void); -int get_min_array_size32(void); -int get_min_array_size64(void); +/** 128-bit data type */ +typedef union W128_T w128_t; -/* These real versions are due to Isaku Wada */ -/** generates a random number on [0,1]-real-interval */ -inline static double to_real1(uint32_t v) -{ - return v * (1.0/4294967295.0); - /* divided by 2^32-1 */ +/** + * SFMT internal state + */ +struct SFMT_T { + /** the 128-bit internal state array */ + w128_t state[SFMT_N]; + /** index counter to the 32-bit internal state array */ + int idx; +}; + +typedef struct SFMT_T sfmt_t; + +void sfmt_fill_array32(sfmt_t * sfmt, uint32_t * array, int size); +void sfmt_fill_array64(sfmt_t * sfmt, uint64_t * array, int size); +void sfmt_init_gen_rand(sfmt_t * sfmt, uint32_t seed); +void sfmt_init_by_array(sfmt_t * sfmt, uint32_t * init_key, int key_length); +const char * sfmt_get_idstring(sfmt_t * sfmt); +int sfmt_get_min_array_size32(sfmt_t * sfmt); +int sfmt_get_min_array_size64(sfmt_t * sfmt); +void sfmt_gen_rand_all(sfmt_t * sfmt); + +#ifndef ONLY64 +/** + * This function generates and returns 32-bit pseudorandom number. + * init_gen_rand or init_by_array must be called before this function. + * @param sfmt SFMT internal state + * @return 32-bit pseudorandom number + */ +inline static uint32_t sfmt_genrand_uint32(sfmt_t * sfmt) { + uint32_t r; + uint32_t * psfmt32 = &sfmt->state[0].u[0]; + + if (sfmt->idx >= SFMT_N32) { + sfmt_gen_rand_all(sfmt); + sfmt->idx = 0; + } + r = psfmt32[sfmt->idx++]; + return r; +} +#endif +/** + * This function generates and returns 64-bit pseudorandom number. + * init_gen_rand or init_by_array must be called before this function. + * The function gen_rand64 should not be called after gen_rand32, + * unless an initialization is again executed. + * @param sfmt SFMT internal state + * @return 64-bit pseudorandom number + */ +inline static uint64_t sfmt_genrand_uint64(sfmt_t * sfmt) { +#if defined(BIG_ENDIAN64) && !defined(ONLY64) + uint32_t * psfmt32 = &sfmt->state[0].u[0]; + uint32_t r1, r2; +#else + uint64_t r; +#endif + uint64_t * psfmt64 = &sfmt->state[0].u64[0]; + assert(sfmt->idx % 2 == 0); + + if (sfmt->idx >= SFMT_N32) { + sfmt_gen_rand_all(sfmt); + sfmt->idx = 0; + } +#if defined(BIG_ENDIAN64) && !defined(ONLY64) + r1 = psfmt32[sfmt->idx]; + r2 = psfmt32[sfmt->idx + 1]; + sfmt->idx += 2; + return ((uint64_t)r2 << 32) | r1; +#else + r = psfmt64[sfmt->idx / 2]; + sfmt->idx += 2; + return r; +#endif } -/** generates a random number on [0,1]-real-interval */ -inline static double genrand_real1(void) +/* ================================================= + The following real versions are due to Isaku Wada + ================================================= */ +/** + * converts an unsigned 32-bit number to a double on [0,1]-real-interval. + * @param v 32-bit unsigned integer + * @return double on [0,1]-real-interval + */ +inline static double sfmt_to_real1(uint32_t v) { - return to_real1(gen_rand32()); + return v * (1.0/4294967295.0); + /* divided by 2^32-1 */ } -/** generates a random number on [0,1)-real-interval */ -inline static double to_real2(uint32_t v) +/** + * generates a random number on [0,1]-real-interval + * @param sfmt SFMT internal state + * @return double on [0,1]-real-interval + */ +inline static double sfmt_genrand_real1(sfmt_t * sfmt) { - return v * (1.0/4294967296.0); + return sfmt_to_real1(sfmt_genrand_uint32(sfmt)); +} + +/** + * converts an unsigned 32-bit integer to a double on [0,1)-real-interval. + * @param v 32-bit unsigned integer + * @return double on [0,1)-real-interval + */ +inline static double sfmt_to_real2(uint32_t v) +{ + return v * (1.0/4294967296.0); /* divided by 2^32 */ } -/** generates a random number on [0,1)-real-interval */ -inline static double genrand_real2(void) +/** + * generates a random number on [0,1)-real-interval + * @param sfmt SFMT internal state + * @return double on [0,1)-real-interval + */ +inline static double sfmt_genrand_real2(sfmt_t * sfmt) { - return to_real2(gen_rand32()); + return sfmt_to_real2(sfmt_genrand_uint32(sfmt)); } -/** generates a random number on (0,1)-real-interval */ -inline static double to_real3(uint32_t v) +/** + * converts an unsigned 32-bit integer to a double on (0,1)-real-interval. + * @param v 32-bit unsigned integer + * @return double on (0,1)-real-interval + */ +inline static double sfmt_to_real3(uint32_t v) { - return (((double)v) + 0.5)*(1.0/4294967296.0); + return (((double)v) + 0.5)*(1.0/4294967296.0); /* divided by 2^32 */ } -/** generates a random number on (0,1)-real-interval */ -inline static double genrand_real3(void) +/** + * generates a random number on (0,1)-real-interval + * @param sfmt SFMT internal state + * @return double on (0,1)-real-interval + */ +inline static double sfmt_genrand_real3(sfmt_t * sfmt) { - return to_real3(gen_rand32()); -} -/** These real versions are due to Isaku Wada */ - -/** generates a random number on [0,1) with 53-bit resolution*/ -inline static double to_res53(uint64_t v) -{ - return v * (1.0/18446744073709551616.0L); + return sfmt_to_real3(sfmt_genrand_uint32(sfmt)); } -/** generates a random number on [0,1) with 53-bit resolution from two - * 32 bit integers */ -inline static double to_res53_mix(uint32_t x, uint32_t y) -{ - return to_res53(x | ((uint64_t)y << 32)); -} - -/** generates a random number on [0,1) with 53-bit resolution +/** + * converts an unsigned 32-bit integer to double on [0,1) + * with 53-bit resolution. + * @param v 32-bit unsigned integer + * @return double on [0,1)-real-interval with 53-bit resolution. */ -inline static double genrand_res53(void) -{ - return to_res53(gen_rand64()); -} +inline static double sfmt_to_res53(uint64_t v) +{ + return v * (1.0/18446744073709551616.0); +} -/** generates a random number on [0,1) with 53-bit resolution - using 32bit integer. +/** + * generates a random number on [0,1) with 53-bit resolution + * @param sfmt SFMT internal state + * @return double on [0,1) with 53-bit resolution */ -inline static double genrand_res53_mix(void) -{ +inline static double sfmt_genrand_res53(sfmt_t * sfmt) +{ + return sfmt_to_res53(sfmt_genrand_uint64(sfmt)); +} + + +/* ================================================= + The following function are added by Saito. + ================================================= */ +/** + * generates a random number on [0,1) with 53-bit resolution from two + * 32 bit integers + */ +inline static double sfmt_to_res53_mix(uint32_t x, uint32_t y) +{ + return sfmt_to_res53(x | ((uint64_t)y << 32)); +} + +/** + * generates a random number on [0,1) with 53-bit resolution + * using two 32bit integers. + * @param sfmt SFMT internal state + * @return double on [0,1) with 53-bit resolution + */ +inline static double sfmt_genrand_res53_mix(sfmt_t * sfmt) +{ uint32_t x, y; - x = gen_rand32(); - y = gen_rand32(); - return to_res53_mix(x, y); -} -#ifdef __cplusplus + x = sfmt_genrand_uint32(sfmt); + y = sfmt_genrand_uint32(sfmt); + return sfmt_to_res53_mix(x, y); +} + +#if defined(__cplusplus) } #endif + #endif