diff --git a/common/sfmt/SFMT-common.h b/common/sfmt/SFMT-common.h index c7d8aa9f..a5a9b050 100644 --- a/common/sfmt/SFMT-common.h +++ b/common/sfmt/SFMT-common.h @@ -28,7 +28,7 @@ extern "C" { #include "SFMT.h" inline static void do_recursion(w128_t * r, w128_t * a, w128_t * b, - w128_t * c, w128_t * d); + w128_t * c, w128_t * d); inline static void rshift128(w128_t *out, w128_t const *in, int shift); inline static void lshift128(w128_t *out, w128_t const *in, int shift); @@ -123,24 +123,24 @@ inline static void lshift128(w128_t *out, w128_t const *in, int shift) */ #ifdef ONLY64 inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, - w128_t *d) { + w128_t *d) { w128_t x; w128_t y; lshift128(&x, a, SFMT_SL2); rshift128(&y, c, SFMT_SR2); r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SFMT_SR1) & SFMT_MSK2) ^ y.u[0] - ^ (d->u[0] << SFMT_SL1); + ^ (d->u[0] << SFMT_SL1); r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SFMT_SR1) & SFMT_MSK1) ^ y.u[1] - ^ (d->u[1] << SFMT_SL1); + ^ (d->u[1] << SFMT_SL1); r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SFMT_SR1) & SFMT_MSK4) ^ y.u[2] - ^ (d->u[2] << SFMT_SL1); + ^ (d->u[2] << SFMT_SL1); r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK3) ^ y.u[3] - ^ (d->u[3] << SFMT_SL1); + ^ (d->u[3] << SFMT_SL1); } #else inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, - w128_t *c, w128_t *d) + w128_t *c, w128_t *d) { w128_t x; w128_t y; @@ -148,17 +148,17 @@ inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, lshift128(&x, a, SFMT_SL2); rshift128(&y, c, SFMT_SR2); r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SFMT_SR1) & SFMT_MSK1) - ^ y.u[0] ^ (d->u[0] << SFMT_SL1); + ^ y.u[0] ^ (d->u[0] << SFMT_SL1); r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SFMT_SR1) & SFMT_MSK2) - ^ y.u[1] ^ (d->u[1] << SFMT_SL1); + ^ y.u[1] ^ (d->u[1] << SFMT_SL1); r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SFMT_SR1) & SFMT_MSK3) - ^ y.u[2] ^ (d->u[2] << SFMT_SL1); + ^ y.u[2] ^ (d->u[2] << SFMT_SL1); r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK4) - ^ y.u[3] ^ (d->u[3] << SFMT_SL1); + ^ y.u[3] ^ (d->u[3] << SFMT_SL1); } #endif -#endif - #if defined(__cplusplus) } #endif + +#endif // SFMT_COMMON_H diff --git a/common/sfmt/SFMT-params.h b/common/sfmt/SFMT-params.h index 372e6f11..2fe663ab 100644 --- a/common/sfmt/SFMT-params.h +++ b/common/sfmt/SFMT-params.h @@ -46,8 +46,8 @@ */ /** the parameter of shift right as one 128-bit register. - * The 128-bit integer is shifted by (SFMT_SL2 * 8) bits. -#define SFMT_SR21 1 + * The 128-bit integer is shifted by (SFMT_SR2 * 8) bits. +#define SFMT_SR2 1 */ /** A bitmask, used in the recursion. These parameters are introduced @@ -59,10 +59,10 @@ */ /** These definitions are part of a 128-bit period certification vector. -#define SFMT_PARITY1 0x00000001U -#define SFMT_PARITY2 0x00000000U -#define SFMT_PARITY3 0x00000000U -#define SFMT_PARITY4 0xc98e126aU +#define SFMT_PARITY1 0x00000001U +#define SFMT_PARITY2 0x00000000U +#define SFMT_PARITY3 0x00000000U +#define SFMT_PARITY4 0xc98e126aU */ #if SFMT_MEXP == 607 diff --git a/common/sfmt/SFMT.c b/common/sfmt/SFMT.c index 2652df7d..b4ac9308 100644 --- a/common/sfmt/SFMT.c +++ b/common/sfmt/SFMT.c @@ -40,11 +40,6 @@ extern "C" { #undef ONLY64 #endif -/** - * parameters used by sse2. - */ -static const w128_t sse2_param_mask = {{SFMT_MSK1, SFMT_MSK2, - SFMT_MSK3, SFMT_MSK4}}; /*---------------- STATIC FUNCTIONS ----------------*/ @@ -60,11 +55,18 @@ inline static void swap(w128_t *array, int size); #if defined(HAVE_ALTIVEC) #include "SFMT-alti.h" #elif defined(HAVE_SSE2) +/** + * parameters used by sse2. + */ + static const w128_t sse2_param_mask = {{SFMT_MSK1, SFMT_MSK2, + SFMT_MSK3, SFMT_MSK4}}; #if defined(_MSC_VER) #include "SFMT-sse2-msc.h" #else #include "SFMT-sse2.h" #endif +#elif defined(HAVE_NEON) + #include "SFMT-neon.h" #endif /** @@ -81,7 +83,7 @@ inline static int idxof(int i) { } #endif -#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) +#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) && (!defined(HAVE_NEON)) /** * This function fills the user-specified array with pseudorandom * integers. @@ -166,17 +168,19 @@ static uint32_t func2(uint32_t x) { * @param sfmt SFMT internal state */ static void period_certification(sfmt_t * sfmt) { - int inner = 0; + uint32_t inner = 0; int i, j; uint32_t work; uint32_t *psfmt32 = &sfmt->state[0].u[0]; const uint32_t parity[4] = {SFMT_PARITY1, SFMT_PARITY2, SFMT_PARITY3, SFMT_PARITY4}; - for (i = 0; i < 4; i++) + for (i = 0; i < 4; i++) { inner ^= psfmt32[idxof(i)] & parity[i]; - for (i = 16; i > 0; i >>= 1) + } + for (i = 16; i > 0; i >>= 1) { inner ^= inner >> i; + } inner &= 1; /* check OK */ if (inner == 1) { @@ -232,7 +236,7 @@ int sfmt_get_min_array_size64(sfmt_t * sfmt) { return SFMT_N64; } -#if !defined(HAVE_SSE2) && !defined(HAVE_ALTIVEC) +#if !defined(HAVE_SSE2) && !defined(HAVE_ALTIVEC) && !defined(HAVE_NEON) /** * This function fills the internal state array with pseudorandom * integers. diff --git a/common/sfmt/SFMT.h b/common/sfmt/SFMT.h index dca308a0..79e012d6 100644 --- a/common/sfmt/SFMT.h +++ b/common/sfmt/SFMT.h @@ -79,6 +79,15 @@ union W128_T { uint32_t u[4]; uint64_t u64[2]; }; +#elif defined(HAVE_NEON) + #include + +/** 128-bit data structure */ +union W128_T { + uint32_t u[4]; + uint64_t u64[2]; + uint32x4_t si; +}; #elif defined(HAVE_SSE2) #include @@ -247,7 +256,7 @@ inline static double sfmt_genrand_real3(sfmt_t * sfmt) */ inline static double sfmt_to_res53(uint64_t v) { - return v * (1.0/18446744073709551616.0); + return (v >> 11) * (1.0/9007199254740992.0); } /**