From a15eb6f29fcfba8657ee2d782afb1e72343a4610 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mark=20Morschh=C3=A4user?= <mark@tvk.rwth-aachen.de>
Date: Sat, 8 Mar 2014 17:08:40 +0100
Subject: [PATCH] Updated SFMT RNG code, removed Qt RNG

---
 common/CMakeLists.txt          |   2 -
 common/rng_qt.cpp              |  16 -
 common/rng_qt.h                |  13 -
 common/rng_sfmt.cpp            |  15 +-
 common/rng_sfmt.h              |  14 +
 common/sfmt/LICENSE.txt        |  13 +-
 common/sfmt/SFMT-common.h      | 164 ++++++++++
 common/sfmt/SFMT-params.h      |  81 ++---
 common/sfmt/SFMT-params19937.h |  66 ++--
 common/sfmt/SFMT.c             | 533 +++++++++++----------------------
 common/sfmt/SFMT.h             | 293 +++++++++++++-----
 11 files changed, 653 insertions(+), 557 deletions(-)
 delete mode 100644 common/rng_qt.cpp
 delete mode 100644 common/rng_qt.h
 create mode 100644 common/sfmt/SFMT-common.h
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 3a8031b1..17d3fa46 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -5,7 +5,6 @@ SET(common_SOURCES
 	decklist.cpp
 	get_pb_extension.cpp
 	rng_abstract.cpp
-	rng_qt.cpp
 	rng_sfmt.cpp
 	server.cpp
 	server_abstractuserinterface.cpp
@@ -25,7 +24,6 @@ SET(common_SOURCES
 SET(common_HEADERS
 	decklist.h
 	rng_abstract.h
-	rng_qt.h
 	rng_sfmt.h
 	server.h
 	server_arrowtarget.h
diff --git a/common/rng_qt.cpp b/common/rng_qt.cpp
deleted file mode 100644
index 6704cdb8..00000000
--- a/common/rng_qt.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "rng_qt.h"
-#include <QDateTime>
-#include <stdlib.h>
-
-RNG_Qt::RNG_Qt(QObject *parent)
-	: RNG_Abstract(parent)
-{
-	int seed = QDateTime::currentDateTime().toTime_t();
-	qsrand(seed);
-}
-
-unsigned int RNG_Qt::getNumber(unsigned int min, unsigned int max)
-{
-	int r = qrand();
-	return min + (unsigned int) (((double) (max + 1 - min)) * r / (RAND_MAX + 1.0));
-}
diff --git a/common/rng_qt.h b/common/rng_qt.h
deleted file mode 100644
index e30719ef..00000000
--- a/common/rng_qt.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef RNG_QT_H
-#define RNG_QT_H
-
-#include "rng_abstract.h"
-
-class RNG_Qt : public RNG_Abstract {
-	Q_OBJECT
-public:
-	RNG_Qt(QObject *parent = 0);
-	unsigned int getNumber(unsigned int min, unsigned int max);
-};
-
-#endif
diff --git a/common/rng_sfmt.cpp b/common/rng_sfmt.cpp
index 9dff7448..438d8f59 100644
--- a/common/rng_sfmt.cpp
+++ b/common/rng_sfmt.cpp
@@ -1,5 +1,4 @@
 #include "rng_sfmt.h"
-#include "sfmt/SFMT.h"
 #include <QDateTime>
 #include <stdlib.h>
 #include <iostream>
@@ -7,19 +6,17 @@
 RNG_SFMT::RNG_SFMT(QObject *parent)
 	: RNG_Abstract(parent)
 {
-	std::cerr << "Using SFMT random number generator." << std::endl;
-	
-	int seed = QDateTime::currentDateTime().toTime_t();
-	init_gen_rand(seed);
-	for (int i = 0; i < 100000; ++i)
-		gen_rand64();
+	// initialize the random number generator with a 32bit integer seed (timestamp)
+	sfmt_init_gen_rand(&sfmt, QDateTime::currentDateTime().toTime_t());
 }
 
 unsigned int RNG_SFMT::getNumber(unsigned int min, unsigned int max)
 {
+	// To make the random number generation thread safe, a mutex is created around the generation.
 	mutex.lock();
-	uint64_t r = gen_rand64();
+	uint64_t r = sfmt_genrand_uint64(&sfmt);
 	mutex.unlock();
 	
-	return min + (unsigned int) (((double) (max + 1 - min)) * r / (18446744073709551616.0 + 1.0));
+	// return a random number from the interval [min, max]
+	return (unsigned int) (r % (max - min + 1));
 }
diff --git a/common/rng_sfmt.h b/common/rng_sfmt.h
index 4d131b38..80ff9f2f 100644
--- a/common/rng_sfmt.h
+++ b/common/rng_sfmt.h
@@ -1,13 +1,27 @@
 #ifndef RNG_SFMT_H
 #define RNG_SFMT_H
 
+#include "sfmt/SFMT.h"
 #include "rng_abstract.h"
 #include <QMutex>
 
+/**
+ * This class represents the random number generator.
+ * It uses the SIMD-oriented Fast Mersenne Twister code v1.4.1 from
+ * http://www.math.sci.hiroshima-u.ac.jp/~%20m-mat/MT/SFMT/index.html
+ * To use this RNG, the class needs a sfmt_t structure for the RNG's internal state.
+ * It has to be initialized by sfmt_init_gen_rand() which is done in the constructor.
+ * The function sfmt_genrand_uint64() can then be used to create a 64 bit unsigned int 
+ * pseudorandom number. This is done in getNumber().
+ * For more information see the author's website and look at the documentation and 
+ * examples that are part of the official downloads.
+ */
+
 class RNG_SFMT : public RNG_Abstract {
 	Q_OBJECT
 private:
 	QMutex mutex;
+	sfmt_t sfmt;
 public:
 	RNG_SFMT(QObject *parent = 0);
 	unsigned int getNumber(unsigned int min, unsigned int max);
diff --git a/common/sfmt/LICENSE.txt b/common/sfmt/LICENSE.txt
index decb2991..6f9c4a6d 100644
--- a/common/sfmt/LICENSE.txt
+++ b/common/sfmt/LICENSE.txt
@@ -1,5 +1,8 @@
 Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
-University. All rights reserved.
+University.
+Copyright (c) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima University
+and The University of Tokyo.
+All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
@@ -11,10 +14,10 @@ met:
       copyright notice, this list of conditions and the following
       disclaimer in the documentation and/or other materials provided
       with the distribution.
-    * Neither the name of the Hiroshima University nor the names of
-      its contributors may be used to endorse or promote products
-      derived from this software without specific prior written
-      permission.
+    * Neither the names of Hiroshima University, The University of
+      Tokyo nor the names of its contributors may be used to endorse
+      or promote products derived from this software without specific
+      prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/common/sfmt/SFMT-common.h b/common/sfmt/SFMT-common.h
new file mode 100644
index 00000000..c7d8aa9f
--- /dev/null
+++ b/common/sfmt/SFMT-common.h
@@ -0,0 +1,164 @@
+#pragma once
+/**
+ * @file SFMT-common.h
+ *
+ * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
+ * number generator with jump function. This file includes common functions
+ * used in random number generation and jump.
+ *
+ * @author Mutsuo Saito (Hiroshima University)
+ * @author Makoto Matsumoto (The University of Tokyo)
+ *
+ * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+ * University.
+ * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+ * University and The University of Tokyo.
+ * All rights reserved.
+ *
+ * The 3-clause BSD License is applied to this software, see
+ * LICENSE.txt
+ */
+#ifndef SFMT_COMMON_H
+#define SFMT_COMMON_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "SFMT.h"
+
+inline static void do_recursion(w128_t * r, w128_t * a, w128_t * b,
+				w128_t * c, w128_t * d);
+
+inline static void rshift128(w128_t *out,  w128_t const *in, int shift);
+inline static void lshift128(w128_t *out,  w128_t const *in, int shift);
+
+/**
+ * This function simulates SIMD 128-bit right shift by the standard C.
+ * The 128-bit integer given in in is shifted by (shift * 8) bits.
+ * This function simulates the LITTLE ENDIAN SIMD.
+ * @param out the output of this function
+ * @param in the 128-bit data to be shifted
+ * @param shift the shift value
+ */
+#ifdef ONLY64
+inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
+    uint64_t th, tl, oh, ol;
+
+    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+
+    oh = th >> (shift * 8);
+    ol = tl >> (shift * 8);
+    ol |= th << (64 - shift * 8);
+    out->u[0] = (uint32_t)(ol >> 32);
+    out->u[1] = (uint32_t)ol;
+    out->u[2] = (uint32_t)(oh >> 32);
+    out->u[3] = (uint32_t)oh;
+}
+#else
+inline static void rshift128(w128_t *out, w128_t const *in, int shift)
+{
+    uint64_t th, tl, oh, ol;
+
+    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+
+    oh = th >> (shift * 8);
+    ol = tl >> (shift * 8);
+    ol |= th << (64 - shift * 8);
+    out->u[1] = (uint32_t)(ol >> 32);
+    out->u[0] = (uint32_t)ol;
+    out->u[3] = (uint32_t)(oh >> 32);
+    out->u[2] = (uint32_t)oh;
+}
+#endif
+/**
+ * This function simulates SIMD 128-bit left shift by the standard C.
+ * The 128-bit integer given in in is shifted by (shift * 8) bits.
+ * This function simulates the LITTLE ENDIAN SIMD.
+ * @param out the output of this function
+ * @param in the 128-bit data to be shifted
+ * @param shift the shift value
+ */
+#ifdef ONLY64
+inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
+    uint64_t th, tl, oh, ol;
+
+    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+
+    oh = th << (shift * 8);
+    ol = tl << (shift * 8);
+    oh |= tl >> (64 - shift * 8);
+    out->u[0] = (uint32_t)(ol >> 32);
+    out->u[1] = (uint32_t)ol;
+    out->u[2] = (uint32_t)(oh >> 32);
+    out->u[3] = (uint32_t)oh;
+}
+#else
+inline static void lshift128(w128_t *out, w128_t const *in, int shift)
+{
+    uint64_t th, tl, oh, ol;
+
+    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+
+    oh = th << (shift * 8);
+    ol = tl << (shift * 8);
+    oh |= tl >> (64 - shift * 8);
+    out->u[1] = (uint32_t)(ol >> 32);
+    out->u[0] = (uint32_t)ol;
+    out->u[3] = (uint32_t)(oh >> 32);
+    out->u[2] = (uint32_t)oh;
+}
+#endif
+/**
+ * This function represents the recursion formula.
+ * @param r output
+ * @param a a 128-bit part of the internal state array
+ * @param b a 128-bit part of the internal state array
+ * @param c a 128-bit part of the internal state array
+ * @param d a 128-bit part of the internal state array
+ */
+#ifdef ONLY64
+inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
+				w128_t *d) {
+    w128_t x;
+    w128_t y;
+
+    lshift128(&x, a, SFMT_SL2);
+    rshift128(&y, c, SFMT_SR2);
+    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SFMT_SR1) & SFMT_MSK2) ^ y.u[0]
+	^ (d->u[0] << SFMT_SL1);
+    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SFMT_SR1) & SFMT_MSK1) ^ y.u[1]
+	^ (d->u[1] << SFMT_SL1);
+    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SFMT_SR1) & SFMT_MSK4) ^ y.u[2]
+	^ (d->u[2] << SFMT_SL1);
+    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK3) ^ y.u[3]
+	^ (d->u[3] << SFMT_SL1);
+}
+#else
+inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b,
+				w128_t *c, w128_t *d)
+{
+    w128_t x;
+    w128_t y;
+
+    lshift128(&x, a, SFMT_SL2);
+    rshift128(&y, c, SFMT_SR2);
+    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SFMT_SR1) & SFMT_MSK1)
+	^ y.u[0] ^ (d->u[0] << SFMT_SL1);
+    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SFMT_SR1) & SFMT_MSK2)
+	^ y.u[1] ^ (d->u[1] << SFMT_SL1);
+    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SFMT_SR1) & SFMT_MSK3)
+	^ y.u[2] ^ (d->u[2] << SFMT_SL1);
+    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK4)
+	^ y.u[3] ^ (d->u[3] << SFMT_SL1);
+}
+#endif
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/common/sfmt/SFMT-params.h b/common/sfmt/SFMT-params.h
index 661bbf26..372e6f11 100644
--- a/common/sfmt/SFMT-params.h
+++ b/common/sfmt/SFMT-params.h
@@ -1,95 +1,96 @@
+#pragma once
 #ifndef SFMT_PARAMS_H
 #define SFMT_PARAMS_H
 
-#if !defined(MEXP)
-#ifdef __GNUC__
-  #warning "MEXP is not defined. I assume MEXP is 19937."
+#if !defined(SFMT_MEXP)
+#if defined(__GNUC__) && !defined(__ICC)
+  #warning "SFMT_MEXP is not defined. I assume MEXP is 19937."
 #endif
-  #define MEXP 19937
+  #define SFMT_MEXP 19937
 #endif
 /*-----------------
   BASIC DEFINITIONS
   -----------------*/
-/** Mersenne Exponent. The period of the sequence 
+/** Mersenne Exponent. The period of the sequence
  *  is a multiple of 2^MEXP-1.
- * #define MEXP 19937 */
+ * #define SFMT_MEXP 19937 */
 /** SFMT generator has an internal state array of 128-bit integers,
  * and N is its size. */
-#define N (MEXP / 128 + 1)
+#define SFMT_N (SFMT_MEXP / 128 + 1)
 /** N32 is the size of internal state array when regarded as an array
  * of 32-bit integers.*/
-#define N32 (N * 4)
+#define SFMT_N32 (SFMT_N * 4)
 /** N64 is the size of internal state array when regarded as an array
  * of 64-bit integers.*/
-#define N64 (N * 2)
+#define SFMT_N64 (SFMT_N * 2)
 
 /*----------------------
   the parameters of SFMT
   following definitions are in paramsXXXX.h file.
   ----------------------*/
 /** the pick up position of the array.
-#define POS1 122 
+#define SFMT_POS1 122
 */
 
 /** the parameter of shift left as four 32-bit registers.
-#define SL1 18
+#define SFMT_SL1 18
  */
 
-/** the parameter of shift left as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SL2 1 
+/** the parameter of shift left as one 128-bit register.
+ * The 128-bit integer is shifted by (SFMT_SL2 * 8) bits.
+#define SFMT_SL2 1
 */
 
 /** the parameter of shift right as four 32-bit registers.
-#define SR1 11
+#define SFMT_SR1 11
 */
 
-/** the parameter of shift right as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SR2 1 
+/** the parameter of shift right as one 128-bit register.
+ * The 128-bit integer is shifted by (SFMT_SL2 * 8) bits.
+#define SFMT_SR21 1
 */
 
 /** A bitmask, used in the recursion.  These parameters are introduced
  * to break symmetry of SIMD.
-#define MSK1 0xdfffffefU
-#define MSK2 0xddfecb7fU
-#define MSK3 0xbffaffffU
-#define MSK4 0xbffffff6U 
+#define SFMT_MSK1 0xdfffffefU
+#define SFMT_MSK2 0xddfecb7fU
+#define SFMT_MSK3 0xbffaffffU
+#define SFMT_MSK4 0xbffffff6U
 */
 
 /** These definitions are part of a 128-bit period certification vector.
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0xc98e126aU
+#define SFMT_PARITY1	0x00000001U
+#define SFMT_PARITY2	0x00000000U
+#define SFMT_PARITY3	0x00000000U
+#define SFMT_PARITY4	0xc98e126aU
 */
 
-#if MEXP == 607
+#if SFMT_MEXP == 607
   #include "SFMT-params607.h"
-#elif MEXP == 1279
+#elif SFMT_MEXP == 1279
   #include "SFMT-params1279.h"
-#elif MEXP == 2281
+#elif SFMT_MEXP == 2281
   #include "SFMT-params2281.h"
-#elif MEXP == 4253
+#elif SFMT_MEXP == 4253
   #include "SFMT-params4253.h"
-#elif MEXP == 11213
+#elif SFMT_MEXP == 11213
   #include "SFMT-params11213.h"
-#elif MEXP == 19937
+#elif SFMT_MEXP == 19937
   #include "SFMT-params19937.h"
-#elif MEXP == 44497
+#elif SFMT_MEXP == 44497
   #include "SFMT-params44497.h"
-#elif MEXP == 86243
+#elif SFMT_MEXP == 86243
   #include "SFMT-params86243.h"
-#elif MEXP == 132049
+#elif SFMT_MEXP == 132049
   #include "SFMT-params132049.h"
-#elif MEXP == 216091
+#elif SFMT_MEXP == 216091
   #include "SFMT-params216091.h"
 #else
-#ifdef __GNUC__
-  #error "MEXP is not valid."
-  #undef MEXP
+#if defined(__GNUC__) && !defined(__ICC)
+  #error "SFMT_MEXP is not valid."
+  #undef SFMT_MEXP
 #else
-  #undef MEXP
+  #undef SFMT_MEXP
 #endif
 
 #endif
diff --git a/common/sfmt/SFMT-params19937.h b/common/sfmt/SFMT-params19937.h
index 04708cdf..fc49fa14 100644
--- a/common/sfmt/SFMT-params19937.h
+++ b/common/sfmt/SFMT-params19937.h
@@ -1,46 +1,50 @@
+#pragma once
 #ifndef SFMT_PARAMS19937_H
 #define SFMT_PARAMS19937_H
 
-#define POS1	122
-#define SL1	18
-#define SL2	1
-#define SR1	11
-#define SR2	1
-#define MSK1	0xdfffffefU
-#define MSK2	0xddfecb7fU
-#define MSK3	0xbffaffffU
-#define MSK4	0xbffffff6U
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x13c9e684U
+#define SFMT_POS1	122
+#define SFMT_SL1	18
+#define SFMT_SL2	1
+#define SFMT_SR1	11
+#define SFMT_SR2	1
+#define SFMT_MSK1	0xdfffffefU
+#define SFMT_MSK2	0xddfecb7fU
+#define SFMT_MSK3	0xbffaffffU
+#define SFMT_MSK4	0xbffffff6U
+#define SFMT_PARITY1	0x00000001U
+#define SFMT_PARITY2	0x00000000U
+#define SFMT_PARITY3	0x00000000U
+#define SFMT_PARITY4	0x13c9e684U
 
 
 /* PARAMETERS FOR ALTIVEC */
 #if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
+    #define SFMT_ALTI_SL1 \
+	(vector unsigned int)(SFMT_SL1, SFMT_SL1, SFMT_SL1, SFMT_SL1)
+    #define SFMT_ALTI_SR1 \
+	(vector unsigned int)(SFMT_SR1, SFMT_SR1, SFMT_SR1, SFMT_SR1)
+    #define SFMT_ALTI_MSK \
+	(vector unsigned int)(SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4)
+    #define SFMT_ALTI_MSK64 \
+	(vector unsigned int)(SFMT_MSK2, SFMT_MSK1, SFMT_MSK4, SFMT_MSK3)
+    #define SFMT_ALTI_SL2_PERM \
 	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
+    #define SFMT_ALTI_SL2_PERM64 \
 	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
+    #define SFMT_ALTI_SR2_PERM \
 	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
+    #define SFMT_ALTI_SR2_PERM64 \
 	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
 #else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
+    #define SFMT_ALTI_SL1	{SFMT_SL1, SFMT_SL1, SFMT_SL1, SFMT_SL1}
+    #define SFMT_ALTI_SR1	{SFMT_SR1, SFMT_SR1, SFMT_SR1, SFMT_SR1}
+    #define SFMT_ALTI_MSK	{SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4}
+    #define SFMT_ALTI_MSK64	{SFMT_MSK2, SFMT_MSK1, SFMT_MSK4, SFMT_MSK3}
+    #define SFMT_ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
+    #define SFMT_ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
+    #define SFMT_ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
+    #define SFMT_ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
 #endif	/* For OSX */
-#define IDSTR	"SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
+#define SFMT_IDSTR	"SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
 
 #endif /* SFMT_PARAMS19937_H */
diff --git a/common/sfmt/SFMT.c b/common/sfmt/SFMT.c
index d36465d9..2652df7d 100644
--- a/common/sfmt/SFMT.c
+++ b/common/sfmt/SFMT.c
@@ -1,19 +1,31 @@
-/** 
+/**
  * @file  SFMT.c
  * @brief SIMD oriented Fast Mersenne Twister(SFMT)
  *
  * @author Mutsuo Saito (Hiroshima University)
  * @author Makoto Matsumoto (Hiroshima University)
  *
- * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
- * University. All rights reserved.
+ * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+ * University.
+ * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+ * University and The University of Tokyo.
+ * Copyright (C) 2013 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+ * University.
+ * All rights reserved.
  *
- * The new BSD License is applied to this software, see LICENSE.txt
+ * The 3-clause BSD License is applied to this software, see
+ * LICENSE.txt
  */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 #include <string.h>
 #include <assert.h>
 #include "SFMT.h"
 #include "SFMT-params.h"
+#include "SFMT-common.h"
 
 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
 #define BIG_ENDIAN64 1
@@ -27,74 +39,20 @@
   #endif
 #undef ONLY64
 #endif
-/*------------------------------------------------------
-  128-bit SIMD data type for Altivec, SSE2 or standard C
-  ------------------------------------------------------*/
-#if defined(HAVE_ALTIVEC)
-  #if !defined(__APPLE__)
-    #include <altivec.h>
-  #endif
-/** 128-bit data structure */
-union W128_T {
-    vector unsigned int s;
-    uint32_t u[4];
-};
-/** 128-bit data type */
-typedef union W128_T w128_t;
-
-#elif defined(HAVE_SSE2)
-  #include <emmintrin.h>
-
-/** 128-bit data structure */
-union W128_T {
-    __m128i si;
-    uint32_t u[4];
-};
-/** 128-bit data type */
-typedef union W128_T w128_t;
-
-#else
-
-/** 128-bit data structure */
-struct W128_T {
-    uint32_t u[4];
-};
-/** 128-bit data type */
-typedef struct W128_T w128_t;
-
-#endif
-
-/*--------------------------------------
-  FILE GLOBAL VARIABLES
-  internal state, index counter and flag 
-  --------------------------------------*/
-/** the 128-bit internal state array */
-static w128_t sfmt[N];
-/** the 32bit integer pointer to the 128-bit internal state array */
-static uint32_t *psfmt32 = &sfmt[0].u[0];
-#if !defined(BIG_ENDIAN64) || defined(ONLY64)
-/** the 64bit integer pointer to the 128-bit internal state array */
-static uint64_t *psfmt64 = (uint64_t *)&sfmt[0].u[0];
-#endif
-/** index counter to the 32-bit internal state array */
-static int idx;
-/** a flag: it is 0 if and only if the internal state is not yet
- * initialized. */
-static int initialized = 0;
-/** a parity check vector which certificate the period of 2^{MEXP} */
-static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
 
+/**
+ * parameters used by sse2.
+ */
+static const w128_t sse2_param_mask = {{SFMT_MSK1, SFMT_MSK2,
+                                        SFMT_MSK3, SFMT_MSK4}};
 /*----------------
   STATIC FUNCTIONS
   ----------------*/
 inline static int idxof(int i);
-inline static void rshift128(w128_t *out,  w128_t const *in, int shift);
-inline static void lshift128(w128_t *out,  w128_t const *in, int shift);
-inline static void gen_rand_all(void);
-inline static void gen_rand_array(w128_t *array, int size);
+inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size);
 inline static uint32_t func1(uint32_t x);
 inline static uint32_t func2(uint32_t x);
-static void period_certification(void);
+static void period_certification(sfmt_t * sfmt);
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
 inline static void swap(w128_t *array, int size);
 #endif
@@ -102,11 +60,15 @@ inline static void swap(w128_t *array, int size);
 #if defined(HAVE_ALTIVEC)
   #include "SFMT-alti.h"
 #elif defined(HAVE_SSE2)
-  #include "SFMT-sse2.h"
+  #if defined(_MSC_VER)
+    #include "SFMT-sse2-msc.h"
+  #else
+    #include "SFMT-sse2.h"
+  #endif
 #endif
 
 /**
- * This function simulate a 64-bit index of LITTLE ENDIAN 
+ * This function simulate a 64-bit index of LITTLE ENDIAN
  * in BIG ENDIAN machine.
  */
 #ifdef ONLY64
@@ -118,190 +80,48 @@ inline static int idxof(int i) {
     return i;
 }
 #endif
-/**
- * This function simulates SIMD 128-bit right shift by the standard C.
- * The 128-bit integer given in in is shifted by (shift * 8) bits.
- * This function simulates the LITTLE ENDIAN SIMD.
- * @param out the output of this function
- * @param in the 128-bit data to be shifted
- * @param shift the shift value
- */
-#ifdef ONLY64
-inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
-
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
-
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
-}
-#else
-inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
-
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
-
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
-}
-#endif
-/**
- * This function simulates SIMD 128-bit left shift by the standard C.
- * The 128-bit integer given in in is shifted by (shift * 8) bits.
- * This function simulates the LITTLE ENDIAN SIMD.
- * @param out the output of this function
- * @param in the 128-bit data to be shifted
- * @param shift the shift value
- */
-#ifdef ONLY64
-inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
-
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
-
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
-}
-#else
-inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
-
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
-
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
-}
-#endif
-
-/**
- * This function represents the recursion formula.
- * @param r output
- * @param a a 128-bit part of the internal state array
- * @param b a 128-bit part of the internal state array
- * @param c a 128-bit part of the internal state array
- * @param d a 128-bit part of the internal state array
- */
-#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
-
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] 
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] 
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] 
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] 
-	^ (d->u[3] << SL1);
-}
-#else
-inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
-
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] 
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] 
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] 
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] 
-	^ (d->u[3] << SL1);
-}
-#endif
-#endif
 
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-/**
- * This function fills the internal state array with pseudorandom
- * integers.
- */
-inline static void gen_rand_all(void) {
-    int i;
-    w128_t *r1, *r2;
-
-    r1 = &sfmt[N - 2];
-    r2 = &sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
-	r1 = r2;
-	r2 = &sfmt[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &sfmt[i];
-    }
-}
-
 /**
  * This function fills the user-specified array with pseudorandom
  * integers.
  *
- * @param array an 128-bit array to be filled by pseudorandom numbers.  
+ * @param sfmt SFMT internal state
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pseudorandom numbers to be generated.
  */
-inline static void gen_rand_array(w128_t *array, int size) {
+inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size) {
     int i, j;
     w128_t *r1, *r2;
 
-    r1 = &sfmt[N - 2];
-    r2 = &sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
+    r1 = &sfmt->state[SFMT_N - 2];
+    r2 = &sfmt->state[SFMT_N - 1];
+    for (i = 0; i < SFMT_N - SFMT_POS1; i++) {
+        do_recursion(&array[i], &sfmt->state[i], &sfmt->state[i + SFMT_POS1], r1, r2);
+        r1 = r2;
+        r2 = &array[i];
     }
-    for (; i < N; i++) {
-	do_recursion(&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
+    for (; i < SFMT_N; i++) {
+        do_recursion(&array[i], &sfmt->state[i],
+                     &array[i + SFMT_POS1 - SFMT_N], r1, r2);
+        r1 = r2;
+        r2 = &array[i];
     }
-    for (; i < size - N; i++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
+    for (; i < size - SFMT_N; i++) {
+        do_recursion(&array[i], &array[i - SFMT_N],
+                     &array[i + SFMT_POS1 - SFMT_N], r1, r2);
+        r1 = r2;
+        r2 = &array[i];
     }
-    for (j = 0; j < 2 * N - size; j++) {
-	sfmt[j] = array[j + size - N];
+    for (j = 0; j < 2 * SFMT_N - size; j++) {
+        sfmt->state[j] = array[j + size - SFMT_N];
     }
     for (; i < size; i++, j++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-	sfmt[j] = array[i];
+        do_recursion(&array[i], &array[i - SFMT_N],
+                     &array[i + SFMT_POS1 - SFMT_N], r1, r2);
+        r1 = r2;
+        r2 = &array[i];
+        sfmt->state[j] = array[i];
     }
 }
 #endif
@@ -312,12 +132,12 @@ inline static void swap(w128_t *array, int size) {
     uint32_t x, y;
 
     for (i = 0; i < size; i++) {
-	x = array[i].u[0];
-	y = array[i].u[2];
-	array[i].u[0] = array[i].u[1];
-	array[i].u[2] = array[i].u[3];
-	array[i].u[1] = x;
-	array[i].u[3] = y;
+        x = array[i].u[0];
+        y = array[i].u[2];
+        array[i].u[0] = array[i].u[1];
+        array[i].u[2] = array[i].u[3];
+        array[i].u[1] = x;
+        array[i].u[3] = y;
     }
 }
 #endif
@@ -343,114 +163,101 @@ static uint32_t func2(uint32_t x) {
 
 /**
  * This function certificate the period of 2^{MEXP}
+ * @param sfmt SFMT internal state
  */
-static void period_certification(void) {
+static void period_certification(sfmt_t * sfmt) {
     int inner = 0;
     int i, j;
     uint32_t work;
+    uint32_t *psfmt32 = &sfmt->state[0].u[0];
+    const uint32_t parity[4] = {SFMT_PARITY1, SFMT_PARITY2,
+                                SFMT_PARITY3, SFMT_PARITY4};
 
     for (i = 0; i < 4; i++)
-	inner ^= psfmt32[idxof(i)] & parity[i];
+        inner ^= psfmt32[idxof(i)] & parity[i];
     for (i = 16; i > 0; i >>= 1)
-	inner ^= inner >> i;
+        inner ^= inner >> i;
     inner &= 1;
     /* check OK */
     if (inner == 1) {
-	return;
+        return;
     }
     /* check NG, and modification */
     for (i = 0; i < 4; i++) {
-	work = 1;
-	for (j = 0; j < 32; j++) {
-	    if ((work & parity[i]) != 0) {
-		psfmt32[idxof(i)] ^= work;
-		return;
-	    }
-	    work = work << 1;
-	}
+        work = 1;
+        for (j = 0; j < 32; j++) {
+            if ((work & parity[i]) != 0) {
+                psfmt32[idxof(i)] ^= work;
+                return;
+            }
+            work = work << 1;
+        }
     }
 }
 
 /*----------------
   PUBLIC FUNCTIONS
   ----------------*/
+#define UNUSED_VARIABLE(x) (void)(x)
 /**
  * This function returns the identification string.
  * The string shows the word size, the Mersenne exponent,
  * and all parameters of this generator.
+ * @param sfmt SFMT internal state
  */
-const char *get_idstring(void) {
-    return IDSTR;
+const char *sfmt_get_idstring(sfmt_t * sfmt) {
+    UNUSED_VARIABLE(sfmt);
+    return SFMT_IDSTR;
 }
 
 /**
  * This function returns the minimum size of array used for \b
  * fill_array32() function.
+ * @param sfmt SFMT internal state
  * @return minimum size of array used for fill_array32() function.
  */
-int get_min_array_size32(void) {
-    return N32;
+int sfmt_get_min_array_size32(sfmt_t * sfmt) {
+    UNUSED_VARIABLE(sfmt);
+    return SFMT_N32;
 }
 
 /**
  * This function returns the minimum size of array used for \b
  * fill_array64() function.
+ * @param sfmt SFMT internal state
  * @return minimum size of array used for fill_array64() function.
  */
-int get_min_array_size64(void) {
-    return N64;
+int sfmt_get_min_array_size64(sfmt_t * sfmt) {
+    UNUSED_VARIABLE(sfmt);
+    return SFMT_N64;
 }
 
-#ifndef ONLY64
+#if !defined(HAVE_SSE2) && !defined(HAVE_ALTIVEC)
 /**
- * This function generates and returns 32-bit pseudorandom number.
- * init_gen_rand or init_by_array must be called before this function.
- * @return 32-bit pseudorandom number
+ * This function fills the internal state array with pseudorandom
+ * integers.
+ * @param sfmt SFMT internal state
  */
-uint32_t gen_rand32(void) {
-    uint32_t r;
+void sfmt_gen_rand_all(sfmt_t * sfmt) {
+    int i;
+    w128_t *r1, *r2;
 
-    assert(initialized);
-    if (idx >= N32) {
-	gen_rand_all();
-	idx = 0;
+    r1 = &sfmt->state[SFMT_N - 2];
+    r2 = &sfmt->state[SFMT_N - 1];
+    for (i = 0; i < SFMT_N - SFMT_POS1; i++) {
+        do_recursion(&sfmt->state[i], &sfmt->state[i],
+                     &sfmt->state[i + SFMT_POS1], r1, r2);
+        r1 = r2;
+        r2 = &sfmt->state[i];
+    }
+    for (; i < SFMT_N; i++) {
+        do_recursion(&sfmt->state[i], &sfmt->state[i],
+                     &sfmt->state[i + SFMT_POS1 - SFMT_N], r1, r2);
+        r1 = r2;
+        r2 = &sfmt->state[i];
     }
-    r = psfmt32[idx++];
-    return r;
 }
 #endif
-/**
- * This function generates and returns 64-bit pseudorandom number.
- * init_gen_rand or init_by_array must be called before this function.
- * The function gen_rand64 should not be called after gen_rand32,
- * unless an initialization is again executed. 
- * @return 64-bit pseudorandom number
- */
-uint64_t gen_rand64(void) {
-#if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    uint32_t r1, r2;
-#else
-    uint64_t r;
-#endif
-
-    assert(initialized);
-    assert(idx % 2 == 0);
-
-    if (idx >= N32) {
-	gen_rand_all();
-	idx = 0;
-    }
-#if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    r1 = psfmt32[idx];
-    r2 = psfmt32[idx + 1];
-    idx += 2;
-    return ((uint64_t)r2 << 32) | r1;
-#else
-    r = psfmt64[idx / 2];
-    idx += 2;
-    return r;
-#endif
-}
 
 #ifndef ONLY64
 /**
@@ -464,6 +271,7 @@ uint64_t gen_rand64(void) {
  * before the first call of this function. This function can not be
  * used after calling gen_rand function, without initialization.
  *
+ * @param sfmt SFMT internal state
  * @param array an array where pseudorandom 32-bit integers are filled
  * by this function.  The pointer to the array must be \b "aligned"
  * (namely, must be a multiple of 16) in the SIMD version, since it
@@ -478,14 +286,13 @@ uint64_t gen_rand64(void) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array32(uint32_t *array, int size) {
-    assert(initialized);
-    assert(idx == N32);
+void sfmt_fill_array32(sfmt_t * sfmt, uint32_t *array, int size) {
+    assert(sfmt->idx == SFMT_N32);
     assert(size % 4 == 0);
-    assert(size >= N32);
+    assert(size >= SFMT_N32);
 
-    gen_rand_array((w128_t *)array, size / 4);
-    idx = N32;
+    gen_rand_array(sfmt, (w128_t *)array, size / 4);
+    sfmt->idx = SFMT_N32;
 }
 #endif
 
@@ -496,6 +303,7 @@ void fill_array32(uint32_t *array, int size) {
  * multiple of two.  The generation by this function is much faster
  * than the following gen_rand function.
  *
+ * @param sfmt SFMT internal state
  * For initialization, init_gen_rand or init_by_array must be called
  * before the first call of this function. This function can not be
  * used after calling gen_rand function, without initialization.
@@ -514,14 +322,13 @@ void fill_array32(uint32_t *array, int size) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array64(uint64_t *array, int size) {
-    assert(initialized);
-    assert(idx == N32);
+void sfmt_fill_array64(sfmt_t * sfmt, uint64_t *array, int size) {
+    assert(sfmt->idx == SFMT_N32);
     assert(size % 2 == 0);
-    assert(size >= N64);
+    assert(size >= SFMT_N64);
 
-    gen_rand_array((w128_t *)array, size / 2);
-    idx = N32;
+    gen_rand_array(sfmt, (w128_t *)array, size / 2);
+    sfmt->idx = SFMT_N32;
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
     swap((w128_t *)array, size /2);
@@ -532,54 +339,58 @@ void fill_array64(uint64_t *array, int size) {
  * This function initializes the internal state array with a 32-bit
  * integer seed.
  *
+ * @param sfmt SFMT internal state
  * @param seed a 32-bit integer used as the seed.
  */
-void init_gen_rand(uint32_t seed) {
+void sfmt_init_gen_rand(sfmt_t * sfmt, uint32_t seed) {
     int i;
 
+    uint32_t *psfmt32 = &sfmt->state[0].u[0];
+
     psfmt32[idxof(0)] = seed;
-    for (i = 1; i < N32; i++) {
-	psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] 
-					    ^ (psfmt32[idxof(i - 1)] >> 30))
-	    + i;
+    for (i = 1; i < SFMT_N32; i++) {
+        psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
+                                            ^ (psfmt32[idxof(i - 1)] >> 30))
+            + i;
     }
-    idx = N32;
-    period_certification();
-    initialized = 1;
+    sfmt->idx = SFMT_N32;
+    period_certification(sfmt);
 }
 
 /**
  * This function initializes the internal state array,
  * with an array of 32-bit integers used as the seeds
+ * @param sfmt SFMT internal state
  * @param init_key the array of 32-bit integers, used as a seed.
  * @param key_length the length of init_key.
  */
-void init_by_array(uint32_t *init_key, int key_length) {
+void sfmt_init_by_array(sfmt_t * sfmt, uint32_t *init_key, int key_length) {
     int i, j, count;
     uint32_t r;
     int lag;
     int mid;
-    int size = N * 4;
+    int size = SFMT_N * 4;
+    uint32_t *psfmt32 = &sfmt->state[0].u[0];
 
     if (size >= 623) {
-	lag = 11;
+        lag = 11;
     } else if (size >= 68) {
-	lag = 7;
+        lag = 7;
     } else if (size >= 39) {
-	lag = 5;
+        lag = 5;
     } else {
-	lag = 3;
+        lag = 3;
     }
     mid = (size - lag) / 2;
 
-    memset(sfmt, 0x8b, sizeof(sfmt));
-    if (key_length + 1 > N32) {
-	count = key_length + 1;
+    memset(sfmt, 0x8b, sizeof(sfmt_t));
+    if (key_length + 1 > SFMT_N32) {
+        count = key_length + 1;
     } else {
-	count = N32;
+        count = SFMT_N32;
     }
-    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] 
-	      ^ psfmt32[idxof(N32 - 1)]);
+    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
+              ^ psfmt32[idxof(SFMT_N32 - 1)]);
     psfmt32[idxof(mid)] += r;
     r += key_length;
     psfmt32[idxof(mid + lag)] += r;
@@ -587,34 +398,36 @@ void init_by_array(uint32_t *init_key, int key_length) {
 
     count--;
     for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] 
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += init_key[j] + i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
+        r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)]
+                  ^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]);
+        psfmt32[idxof((i + mid) % SFMT_N32)] += r;
+        r += init_key[j] + i;
+        psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r;
+        psfmt32[idxof(i)] = r;
+        i = (i + 1) % SFMT_N32;
     }
     for (; j < count; j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] 
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
+        r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)]
+                  ^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]);
+        psfmt32[idxof((i + mid) % SFMT_N32)] += r;
+        r += i;
+        psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r;
+        psfmt32[idxof(i)] = r;
+        i = (i + 1) % SFMT_N32;
     }
-    for (j = 0; j < N32; j++) {
-	r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)] 
-		  + psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] ^= r;
-	r -= i;
-	psfmt32[idxof((i + mid + lag) % N32)] ^= r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
+    for (j = 0; j < SFMT_N32; j++) {
+        r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % SFMT_N32)]
+                  + psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]);
+        psfmt32[idxof((i + mid) % SFMT_N32)] ^= r;
+        r -= i;
+        psfmt32[idxof((i + mid + lag) % SFMT_N32)] ^= r;
+        psfmt32[idxof(i)] = r;
+        i = (i + 1) % SFMT_N32;
     }
 
-    idx = N32;
-    period_certification();
-    initialized = 1;
+    sfmt->idx = SFMT_N32;
+    period_certification(sfmt);
 }
+#if defined(__cplusplus)
+}
+#endif
diff --git a/common/sfmt/SFMT.h b/common/sfmt/SFMT.h
index f0aba198..dca308a0 100644
--- a/common/sfmt/SFMT.h
+++ b/common/sfmt/SFMT.h
@@ -1,24 +1,28 @@
-/** 
- * @file SFMT.h 
+#pragma once
+/**
+ * @file SFMT.h
  *
  * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
- * number generator
+ * number generator using C structure.
  *
  * @author Mutsuo Saito (Hiroshima University)
- * @author Makoto Matsumoto (Hiroshima University)
+ * @author Makoto Matsumoto (The University of Tokyo)
  *
  * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
- * University. All rights reserved.
+ * University.
+ * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+ * University and The University of Tokyo.
+ * All rights reserved.
  *
- * The new BSD License is applied to this software.
- * see LICENSE.txt
+ * The 3-clause BSD License is applied to this software, see
+ * LICENSE.txt
  *
  * @note We assume that your system has inttypes.h.  If your system
  * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t,
  * and you have to define PRIu64 and PRIx64 in this file as follows:
  * @verbatim
  typedef unsigned int uint32_t
- typedef unsigned long long uint64_t  
+ typedef unsigned long long uint64_t
  #define PRIu64 "llu"
  #define PRIx64 "llx"
 @endverbatim
@@ -28,14 +32,14 @@
  * unsigned int and 64-bit unsigned int in hexadecimal format.
  */
 
-#ifndef SFMT_H
-#define SFMT_H
-
-#ifdef __cplusplus
+#ifndef SFMTST_H
+#define SFMTST_H
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
 #include <stdio.h>
+#include <assert.h>
 
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
   #include <inttypes.h>
@@ -60,105 +64,232 @@ extern "C" {
   #endif
 #endif
 
-#if defined(__GNUC__)
-#define ALWAYSINLINE __attribute__((always_inline))
-#else
-#define ALWAYSINLINE
-#endif
+#include "SFMT-params.h"
 
-#if defined(_MSC_VER)
-  #if _MSC_VER >= 1200
-    #define PRE_ALWAYS __forceinline
-  #else
-    #define PRE_ALWAYS inline
+/*------------------------------------------
+  128-bit SIMD like data type for standard C
+  ------------------------------------------*/
+#if defined(HAVE_ALTIVEC)
+  #if !defined(__APPLE__)
+    #include <altivec.h>
   #endif
+/** 128-bit data structure */
+union W128_T {
+    vector unsigned int s;
+    uint32_t u[4];
+    uint64_t u64[2];
+};
+#elif defined(HAVE_SSE2)
+  #include <emmintrin.h>
+
+/** 128-bit data structure */
+union W128_T {
+    uint32_t u[4];
+    uint64_t u64[2];
+    __m128i si;
+};
 #else
-  #define PRE_ALWAYS inline
+/** 128-bit data structure */
+union W128_T {
+    uint32_t u[4];
+    uint64_t u64[2];
+};
 #endif
 
-uint32_t gen_rand32(void);
-uint64_t gen_rand64(void);
-void fill_array32(uint32_t *array, int size);
-void fill_array64(uint64_t *array, int size);
-void init_gen_rand(uint32_t seed);
-void init_by_array(uint32_t *init_key, int key_length);
-const char *get_idstring(void);
-int get_min_array_size32(void);
-int get_min_array_size64(void);
+/** 128-bit data type */
+typedef union W128_T w128_t;
 
-/* These real versions are due to Isaku Wada */
-/** generates a random number on [0,1]-real-interval */
-inline static double to_real1(uint32_t v)
-{
-    return v * (1.0/4294967295.0); 
-    /* divided by 2^32-1 */ 
+/**
+ * SFMT internal state
+ */
+struct SFMT_T {
+    /** the 128-bit internal state array */
+    w128_t state[SFMT_N];
+    /** index counter to the 32-bit internal state array */
+    int idx;
+};
+
+typedef struct SFMT_T sfmt_t;
+
+void sfmt_fill_array32(sfmt_t * sfmt, uint32_t * array, int size);
+void sfmt_fill_array64(sfmt_t * sfmt, uint64_t * array, int size);
+void sfmt_init_gen_rand(sfmt_t * sfmt, uint32_t seed);
+void sfmt_init_by_array(sfmt_t * sfmt, uint32_t * init_key, int key_length);
+const char * sfmt_get_idstring(sfmt_t * sfmt);
+int sfmt_get_min_array_size32(sfmt_t * sfmt);
+int sfmt_get_min_array_size64(sfmt_t * sfmt);
+void sfmt_gen_rand_all(sfmt_t * sfmt);
+
+#ifndef ONLY64
+/**
+ * This function generates and returns 32-bit pseudorandom number.
+ * init_gen_rand or init_by_array must be called before this function.
+ * @param sfmt SFMT internal state
+ * @return 32-bit pseudorandom number
+ */
+inline static uint32_t sfmt_genrand_uint32(sfmt_t * sfmt) {
+    uint32_t r;
+    uint32_t * psfmt32 = &sfmt->state[0].u[0];
+
+    if (sfmt->idx >= SFMT_N32) {
+        sfmt_gen_rand_all(sfmt);
+        sfmt->idx = 0;
+    }
+    r = psfmt32[sfmt->idx++];
+    return r;
+}
+#endif
+/**
+ * This function generates and returns 64-bit pseudorandom number.
+ * init_gen_rand or init_by_array must be called before this function.
+ * The function gen_rand64 should not be called after gen_rand32,
+ * unless an initialization is again executed.
+ * @param sfmt SFMT internal state
+ * @return 64-bit pseudorandom number
+ */
+inline static uint64_t sfmt_genrand_uint64(sfmt_t * sfmt) {
+#if defined(BIG_ENDIAN64) && !defined(ONLY64)
+    uint32_t * psfmt32 = &sfmt->state[0].u[0];
+    uint32_t r1, r2;
+#else
+    uint64_t r;
+#endif
+    uint64_t * psfmt64 = &sfmt->state[0].u64[0];
+    assert(sfmt->idx % 2 == 0);
+
+    if (sfmt->idx >= SFMT_N32) {
+        sfmt_gen_rand_all(sfmt);
+        sfmt->idx = 0;
+    }
+#if defined(BIG_ENDIAN64) && !defined(ONLY64)
+    r1 = psfmt32[sfmt->idx];
+    r2 = psfmt32[sfmt->idx + 1];
+    sfmt->idx += 2;
+    return ((uint64_t)r2 << 32) | r1;
+#else
+    r = psfmt64[sfmt->idx / 2];
+    sfmt->idx += 2;
+    return r;
+#endif
 }
 
-/** generates a random number on [0,1]-real-interval */
-inline static double genrand_real1(void)
+/* =================================================
+   The following real versions are due to Isaku Wada
+   ================================================= */
+/**
+ * converts an unsigned 32-bit number to a double on [0,1]-real-interval.
+ * @param v 32-bit unsigned integer
+ * @return double on [0,1]-real-interval
+ */
+inline static double sfmt_to_real1(uint32_t v)
 {
-    return to_real1(gen_rand32());
+    return v * (1.0/4294967295.0);
+    /* divided by 2^32-1 */
 }
 
-/** generates a random number on [0,1)-real-interval */
-inline static double to_real2(uint32_t v)
+/**
+ * generates a random number on [0,1]-real-interval
+ * @param sfmt SFMT internal state
+ * @return double on [0,1]-real-interval
+ */
+inline static double sfmt_genrand_real1(sfmt_t * sfmt)
 {
-    return v * (1.0/4294967296.0); 
+    return sfmt_to_real1(sfmt_genrand_uint32(sfmt));
+}
+
+/**
+ * converts an unsigned 32-bit integer to a double on [0,1)-real-interval.
+ * @param v 32-bit unsigned integer
+ * @return double on [0,1)-real-interval
+ */
+inline static double sfmt_to_real2(uint32_t v)
+{
+    return v * (1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
-/** generates a random number on [0,1)-real-interval */
-inline static double genrand_real2(void)
+/**
+ * generates a random number on [0,1)-real-interval
+ * @param sfmt SFMT internal state
+ * @return double on [0,1)-real-interval
+ */
+inline static double sfmt_genrand_real2(sfmt_t * sfmt)
 {
-    return to_real2(gen_rand32());
+    return sfmt_to_real2(sfmt_genrand_uint32(sfmt));
 }
 
-/** generates a random number on (0,1)-real-interval */
-inline static double to_real3(uint32_t v)
+/**
+ * converts an unsigned 32-bit integer to a double on (0,1)-real-interval.
+ * @param v 32-bit unsigned integer
+ * @return double on (0,1)-real-interval
+ */
+inline static double sfmt_to_real3(uint32_t v)
 {
-    return (((double)v) + 0.5)*(1.0/4294967296.0); 
+    return (((double)v) + 0.5)*(1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
-/** generates a random number on (0,1)-real-interval */
-inline static double genrand_real3(void)
+/**
+ * generates a random number on (0,1)-real-interval
+ * @param sfmt SFMT internal state
+ * @return double on (0,1)-real-interval
+ */
+inline static double sfmt_genrand_real3(sfmt_t * sfmt)
 {
-    return to_real3(gen_rand32());
-}
-/** These real versions are due to Isaku Wada */
-
-/** generates a random number on [0,1) with 53-bit resolution*/
-inline static double to_res53(uint64_t v) 
-{ 
-    return v * (1.0/18446744073709551616.0L);
+    return sfmt_to_real3(sfmt_genrand_uint32(sfmt));
 }
 
-/** generates a random number on [0,1) with 53-bit resolution from two
- * 32 bit integers */
-inline static double to_res53_mix(uint32_t x, uint32_t y) 
-{ 
-    return to_res53(x | ((uint64_t)y << 32));
-}
-
-/** generates a random number on [0,1) with 53-bit resolution
+/**
+ * converts an unsigned 32-bit integer to double on [0,1)
+ * with 53-bit resolution.
+ * @param v 32-bit unsigned integer
+ * @return double on [0,1)-real-interval with 53-bit resolution.
  */
-inline static double genrand_res53(void) 
-{ 
-    return to_res53(gen_rand64());
-} 
+inline static double sfmt_to_res53(uint64_t v)
+{
+    return v * (1.0/18446744073709551616.0);
+}
 
-/** generates a random number on [0,1) with 53-bit resolution
-    using 32bit integer.
+/**
+ * generates a random number on [0,1) with 53-bit resolution
+ * @param sfmt SFMT internal state
+ * @return double on [0,1) with 53-bit resolution
  */
-inline static double genrand_res53_mix(void) 
-{ 
+inline static double sfmt_genrand_res53(sfmt_t * sfmt)
+{
+    return sfmt_to_res53(sfmt_genrand_uint64(sfmt));
+}
+
+
+/* =================================================
+   The following function are added by Saito.
+   ================================================= */
+/**
+ * generates a random number on [0,1) with 53-bit resolution from two
+ * 32 bit integers
+ */
+inline static double sfmt_to_res53_mix(uint32_t x, uint32_t y)
+{
+    return sfmt_to_res53(x | ((uint64_t)y << 32));
+}
+
+/**
+ * generates a random number on [0,1) with 53-bit resolution
+ * using two 32bit integers.
+ * @param sfmt SFMT internal state
+ * @return double on [0,1) with 53-bit resolution
+ */
+inline static double sfmt_genrand_res53_mix(sfmt_t * sfmt)
+{
     uint32_t x, y;
 
-    x = gen_rand32();
-    y = gen_rand32();
-    return to_res53_mix(x, y);
-} 
-#ifdef __cplusplus
+    x = sfmt_genrand_uint32(sfmt);
+    y = sfmt_genrand_uint32(sfmt);
+    return sfmt_to_res53_mix(x, y);
+}
+
+#if defined(__cplusplus)
 }
 #endif
+
 #endif