diff --git a/RandomWELL512a.h b/RandomWELL512a.h index 7a9e491..89c5996 100644 --- a/RandomWELL512a.h +++ b/RandomWELL512a.h @@ -34,75 +34,75 @@ class RandomWELL512a { public: - RandomWELL512a(int seed); - RandomWELL512a(unsigned *seed); - - unsigned GetUnsigned(); + RandomWELL512a(int seed); + RandomWELL512a(unsigned *seed); - double GetDouble(); + unsigned GetUnsigned(); + + double GetDouble(); private: - unsigned state[16]; - unsigned index; + unsigned state[16]; + unsigned index; }; inline RandomWELL512a::RandomWELL512a(int seed) - : index(0) + : index(0) { - srand(seed); - for (int i = 0; i < 16; ++i) - state[i] = rand(); + srand(seed); + for (int i = 0; i < 16; ++i) + state[i] = rand(); } inline RandomWELL512a::RandomWELL512a(unsigned *seed) - : index(0) + : index(0) { - memcpy(state, seed, 16 * sizeof(unsigned)); + memcpy(state, seed, 16 * sizeof(unsigned)); } inline unsigned RandomWELL512a::GetUnsigned() { - #define MUTATE_LEFT(value, shift) value ^ (value << shift) - #define MUTATE_RIGHT(value, shift) value ^ (value >> shift) - #define MUTATE_LEFT_MIX(value, shift, mix) value ^ ((value << shift) & mix) + #define MUTATE_LEFT(value, shift) value ^ (value << shift) + #define MUTATE_RIGHT(value, shift) value ^ (value >> shift) + #define MUTATE_LEFT_MIX(value, shift, mix) value ^ ((value << shift) & mix) - unsigned index_9 = (index + 9) & 15; - unsigned index_13 = (index + 13) & 15; - unsigned index_15 = (index + 15) & 15; + unsigned index_9 = (index + 9) & 15; + unsigned index_13 = (index + 13) & 15; + unsigned index_15 = (index + 15) & 15; - unsigned state_index = state[index]; - unsigned state_index_9 = state[index_9]; - unsigned state_index_13 = state[index_13]; - unsigned state_index_15 = state[index_15]; + unsigned state_index = state[index]; + unsigned state_index_9 = state[index_9]; + unsigned state_index_13 = state[index_13]; + unsigned state_index_15 = state[index_15]; - unsigned z1 = MUTATE_LEFT(state_index, 16); - z1 ^= MUTATE_LEFT(state_index_13, 15); + unsigned z1 = MUTATE_LEFT(state_index, 16); + z1 ^= MUTATE_LEFT(state_index_13, 15); - unsigned z2 = MUTATE_RIGHT(state_index_9, 11); + unsigned z2 = MUTATE_RIGHT(state_index_9, 11); - unsigned result0 = z1 ^ z2; - state[index] = result0; + unsigned result0 = z1 ^ z2; + state[index] = result0; - unsigned result1 = MUTATE_LEFT(state_index_15, 2); - result1 ^= MUTATE_LEFT(z1, 18);; - result1 ^= z2 << 28; + unsigned result1 = MUTATE_LEFT(state_index_15, 2); + result1 ^= MUTATE_LEFT(z1, 18);; + result1 ^= z2 << 28; - result1 ^= MUTATE_LEFT_MIX(result0, 5, 0xda442d24U); + result1 ^= MUTATE_LEFT_MIX(result0, 5, 0xda442d24U); - index = index_15; - state[index] = result1; - return result1; + index = index_15; + state[index] = result1; + return result1; - #undef MUTATE_LEFT - #undef MUTATE_RIGHT - #undef MUTATE_LEFT_MIX + #undef MUTATE_LEFT + #undef MUTATE_RIGHT + #undef MUTATE_LEFT_MIX } inline double RandomWELL512a::GetDouble() { - const double kToFloat = 2.32830643653869628906e-10; - return GetUnsigned() * kToFloat; + const double kToFloat = 2.32830643653869628906e-10; + return GetUnsigned() * kToFloat; } #endif // RANDOM_WELL512A_H diff --git a/RandomWELL512a_SSE2.h b/RandomWELL512a_SSE2.h index 24d2bf3..fa483dc 100644 --- a/RandomWELL512a_SSE2.h +++ b/RandomWELL512a_SSE2.h @@ -34,92 +34,92 @@ class RandomWELL512a_SSE2 { public: - RandomWELL512a_SSE2(int seed); - RandomWELL512a_SSE2(unsigned *seed); + RandomWELL512a_SSE2(int seed); + RandomWELL512a_SSE2(unsigned *seed); - unsigned GetUnsigned(); - double GetDouble(); + unsigned GetUnsigned(); + double GetDouble(); - void GetUnsigned4(unsigned *result4); - void GetDouble4(double *result4); + void GetUnsigned4(unsigned *result4); + void GetDouble4(double *result4); private: - __m128i xmm_state[16]; - unsigned index; + __m128i xmm_state[16]; + unsigned index; - // Helper to allow us to return one number per call. - unsigned result[4]; - unsigned resultIndex; + // Helper to allow us to return one number per call. + unsigned result[4]; + unsigned resultIndex; }; inline RandomWELL512a_SSE2::RandomWELL512a_SSE2(int seed) - : index(0) - , resultIndex(4) + : index(0) + , resultIndex(4) { - srand(seed); - for (int i = 0; i < 16; ++i) - xmm_state[i] = _mm_set_epi32(rand(), rand(), rand(), rand()); + srand(seed); + for (int i = 0; i < 16; ++i) + xmm_state[i] = _mm_set_epi32(rand(), rand(), rand(), rand()); } inline RandomWELL512a_SSE2::RandomWELL512a_SSE2(unsigned *seed) - : index(0) - , resultIndex(4) + : index(0) + , resultIndex(4) { - for (int i = 0; i < 16; ++i) - xmm_state[i] = _mm_set_epi32(seed[i + 48], seed[i + 32], seed[i + 16], seed[i]); + for (int i = 0; i < 16; ++i) + xmm_state[i] = _mm_set_epi32(seed[i + 48], seed[i + 32], seed[i + 16], seed[i]); } inline unsigned RandomWELL512a_SSE2::GetUnsigned() { - if (resultIndex >= 4) - { - GetUnsigned4(result); - resultIndex = 0; - } - return result[resultIndex++]; + if (resultIndex >= 4) + { + GetUnsigned4(result); + resultIndex = 0; + } + return result[resultIndex++]; } inline double RandomWELL512a_SSE2::GetDouble() { - const double kToFloat = 2.32830643653869628906e-10; - return GetUnsigned() * kToFloat; + const double kToFloat = 2.32830643653869628906e-10; + return GetUnsigned() * kToFloat; } inline void RandomWELL512a_SSE2::GetUnsigned4(unsigned *result4) { - unsigned index_15 = (index + 15) & 15; - __m128i state_index = xmm_state[index]; - __m128i state_index_9 = xmm_state[(index + 9) & 15]; - __m128i state_index_13 = xmm_state[(index + 13) & 15]; - __m128i state_index_15 = xmm_state[index_15]; - const __m128i kMix = _mm_set1_epi32(0xda442d24); + unsigned index_15 = (index + 15) & 15; + __m128i state_index = xmm_state[index]; + __m128i state_index_9 = xmm_state[(index + 9) & 15]; + __m128i state_index_13 = xmm_state[(index + 13) & 15]; + __m128i state_index_15 = xmm_state[index_15]; + const __m128i kMix = _mm_set1_epi32(0xda442d24); - __m128i left = _mm_xor_si128(state_index, _mm_slli_epi32(state_index, 16)); - __m128i right = _mm_xor_si128(state_index_13, _mm_slli_epi32(state_index_13, 15)); - __m128i z1 = _mm_xor_si128(left, right); - __m128i z2 = _mm_xor_si128(state_index_9, _mm_srli_epi32(state_index_9, 11)); - __m128i result0 = _mm_xor_si128(z1, z2); - xmm_state[index] = result0; + __m128i left = _mm_xor_si128(state_index, _mm_slli_epi32(state_index, 16)); + __m128i right = _mm_xor_si128(state_index_13, _mm_slli_epi32(state_index_13, 15)); + __m128i z1 = _mm_xor_si128(left, right); + __m128i z2 = _mm_xor_si128(state_index_9, _mm_srli_epi32(state_index_9, 11)); + __m128i result0 = _mm_xor_si128(z1, z2); + xmm_state[index] = result0; - __m128i result1 = _mm_xor_si128(state_index_15, _mm_slli_epi32(state_index_15, 2)); - result1 = _mm_xor_si128(result1, _mm_xor_si128(z1, _mm_slli_epi32(z1, 18))); - result1 = _mm_xor_si128(result1, _mm_slli_epi32(z2, 28)); - result1 = _mm_xor_si128(result1, _mm_xor_si128(result0, _mm_and_si128(_mm_slli_epi32(result0, 5), kMix))); - index = index_15; - xmm_state[index] = result1; + __m128i result1 = _mm_xor_si128(state_index_15, _mm_slli_epi32(state_index_15, 2)); + result1 = _mm_xor_si128(result1, _mm_xor_si128(z1, _mm_slli_epi32(z1, 18))); + result1 = _mm_xor_si128(result1, _mm_slli_epi32(z2, 28)); + result1 = _mm_xor_si128(result1, _mm_xor_si128(result0, _mm_and_si128(_mm_slli_epi32(result0, 5), kMix))); + index = index_15; + xmm_state[index] = result1; - _mm_storeu_si128((__m128i *)result4, result1); + _mm_storeu_si128((__m128i *)result4, result1); } inline void RandomWELL512a_SSE2::GetDouble4(double *result4) { - unsigned unsignedResult[4]; - GetUnsigned4(unsignedResult); + unsigned unsignedResult[4]; + GetUnsigned4(unsignedResult); - const double kToFloat = 2.32830643653869628906e-10; - for (unsigned loop = 0; loop < 4; ++loop) - result4[loop] = unsignedResult[loop] * kToFloat; + const double kToFloat = 2.32830643653869628906e-10; + for (unsigned loop = 0; loop < 4; ++loop) + result4[loop] = unsignedResult[loop] * kToFloat; } #endif // RANDOM_WELL512A_SSE2_H diff --git a/Timer.h b/Timer.h index 41722bc..924978d 100644 --- a/Timer.h +++ b/Timer.h @@ -34,22 +34,22 @@ class Timer { public: - Timer() - { - startTime = clock(); - } + Timer() + { + startTime = clock(); + } - void Report(const char *msg = NULL) - { - clock_t totalTime = clock() - startTime; - float seconds = (float)totalTime / CLOCKS_PER_SEC; - if (msg) - std::cout << msg << " "; - std::cout << "took " << seconds << " seconds." << std::endl; - } + void Report(const char *msg = NULL) + { + clock_t totalTime = clock() - startTime; + float seconds = (float)totalTime / CLOCKS_PER_SEC; + if (msg) + std::cout << msg << " "; + std::cout << "took " << seconds << " seconds." << std::endl; + } private: - clock_t startTime; + clock_t startTime; }; #endif // TIMER_H diff --git a/main.cpp b/main.cpp index bfd9a32..7d16b07 100644 --- a/main.cpp +++ b/main.cpp @@ -33,104 +33,104 @@ void Benchmark() { - const unsigned kSeed = 123; - const unsigned kNumLoops = 5; - const unsigned kNumIterations = 40000000; - double *result0 = new double [kNumIterations]; - double *result1 = new double [kNumIterations]; - double *result2 = new double [kNumIterations]; - double *result3 = new double [kNumIterations]; + const unsigned kSeed = 123; + const unsigned kNumLoops = 5; + const unsigned kNumIterations = 40000000; + double *result0 = new double [kNumIterations]; + double *result1 = new double [kNumIterations]; + double *result2 = new double [kNumIterations]; + double *result3 = new double [kNumIterations]; - srand(kSeed); - unsigned seed[16]; - for (unsigned i = 0; i < 16; ++i) - seed[i] = rand(); + srand(kSeed); + unsigned seed[16]; + for (unsigned i = 0; i < 16; ++i) + seed[i] = rand(); - for (unsigned loop = 0; loop < kNumLoops; ++loop) - { - // RAND implementation. - srand(kSeed); - Timer timer0; - for (unsigned i = 0; i < kNumIterations; ++i) - result0[0] = rand() / (double)RAND_MAX; - timer0.Report("Rand(): "); + for (unsigned loop = 0; loop < kNumLoops; ++loop) + { + // RAND implementation. + srand(kSeed); + Timer timer0; + for (unsigned i = 0; i < kNumIterations; ++i) + result0[0] = rand() / (double)RAND_MAX; + timer0.Report("Rand(): "); - // WELL512 C++ implementation. - RandomWELL512a random(kSeed); - Timer timer1; - for (unsigned i = 0; i < kNumIterations; ++i) - result1[i] = random.GetDouble(); - timer1.Report("WELL512 C++: "); + // WELL512 C++ implementation. + RandomWELL512a random(kSeed); + Timer timer1; + for (unsigned i = 0; i < kNumIterations; ++i) + result1[i] = random.GetDouble(); + timer1.Report("WELL512 C++: "); - // WELL512 SSE2 implementation. - RandomWELL512a_SSE2 randomSSE2(kSeed); - Timer timer2; - for (unsigned i = 0; i < kNumIterations; i += 4) - randomSSE2.GetDouble4(result2 + i); - timer2.Report("WELL512 SSE2: "); + // WELL512 SSE2 implementation. + RandomWELL512a_SSE2 randomSSE2(kSeed); + Timer timer2; + for (unsigned i = 0; i < kNumIterations; i += 4) + randomSSE2.GetDouble4(result2 + i); + timer2.Report("WELL512 SSE2: "); - // WELL512 C implementation. - InitWELLRNG512a(seed); - Timer timer3; - for (unsigned i = 0; i < kNumIterations; ++i) - result3[i] = WELLRNG512a(); - timer3.Report("WELL512 C: "); + // WELL512 C implementation. + InitWELLRNG512a(seed); + Timer timer3; + for (unsigned i = 0; i < kNumIterations; ++i) + result3[i] = WELLRNG512a(); + timer3.Report("WELL512 C: "); - std::cout << "---" << std::endl; - } + std::cout << "---" << std::endl; + } - delete [] result0; - delete [] result1; - delete [] result2; - delete [] result3; + delete [] result0; + delete [] result1; + delete [] result2; + delete [] result3; } // Verify that the SIMD implementation returns the same values as the original // algorithm would. void Test() { - const int kSeed = 123; - const unsigned kNumIterations = 4 * 1024; + const int kSeed = 123; + const unsigned kNumIterations = 4 * 1024; - double *result0 = new double [kNumIterations]; - double *result1 = new double [kNumIterations]; + double *result0 = new double [kNumIterations]; + double *result1 = new double [kNumIterations]; - srand(kSeed); - unsigned seed[4 * 16]; - for (unsigned i = 0; i < 4 * 16; ++i) - seed[i] = rand(); - RandomWELL512a randomWell0(seed + 0 * 16); - RandomWELL512a randomWell1(seed + 1 * 16); - RandomWELL512a randomWell2(seed + 2 * 16); - RandomWELL512a randomWell3(seed + 3 * 16); + srand(kSeed); + unsigned seed[4 * 16]; + for (unsigned i = 0; i < 4 * 16; ++i) + seed[i] = rand(); + RandomWELL512a randomWell0(seed + 0 * 16); + RandomWELL512a randomWell1(seed + 1 * 16); + RandomWELL512a randomWell2(seed + 2 * 16); + RandomWELL512a randomWell3(seed + 3 * 16); - RandomWELL512a_SSE2 randomWellSSE2(seed); + RandomWELL512a_SSE2 randomWellSSE2(seed); - for (unsigned i = 0; i < kNumIterations; i += 4) - { - result0[i + 0] = randomWell0.GetDouble(); - result0[i + 1] = randomWell1.GetDouble(); - result0[i + 2] = randomWell2.GetDouble(); - result0[i + 3] = randomWell3.GetDouble(); + for (unsigned i = 0; i < kNumIterations; i += 4) + { + result0[i + 0] = randomWell0.GetDouble(); + result0[i + 1] = randomWell1.GetDouble(); + result0[i + 2] = randomWell2.GetDouble(); + result0[i + 3] = randomWell3.GetDouble(); - result1[i + 0] = randomWellSSE2.GetDouble(); - result1[i + 1] = randomWellSSE2.GetDouble(); - result1[i + 2] = randomWellSSE2.GetDouble(); - result1[i + 3] = randomWellSSE2.GetDouble(); - } + result1[i + 0] = randomWellSSE2.GetDouble(); + result1[i + 1] = randomWellSSE2.GetDouble(); + result1[i + 2] = randomWellSSE2.GetDouble(); + result1[i + 3] = randomWellSSE2.GetDouble(); + } - if (memcmp(result0, result1, kNumIterations * sizeof(double))) - std::cout << "ERROR: C++ vs SSE2: The results don't match!" << std::endl; - else - std::cout << "C++ vs SSE2: Results match" << std::endl; + if (memcmp(result0, result1, kNumIterations * sizeof(double))) + std::cout << "ERROR: C++ vs SSE2: The results don't match!" << std::endl; + else + std::cout << "C++ vs SSE2: Results match" << std::endl; - delete [] result0; - delete [] result1; + delete [] result0; + delete [] result1; } int main(int argc, char **argv) { - Benchmark(); - Test(); - return 0; + Benchmark(); + Test(); + return 0; }