From 0a03261329addc6ea028f51b2ac064fbc5c26a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sun, 26 Feb 2017 18:30:16 -0800 Subject: [PATCH] Added SIMD rsqrt bench. --- tests/handle_bench.cpp | 9 +++- tests/simd_bench.cpp | 115 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tests/simd_bench.cpp diff --git a/tests/handle_bench.cpp b/tests/handle_bench.cpp index 6169bc4..e8d8bee 100644 --- a/tests/handle_bench.cpp +++ b/tests/handle_bench.cpp @@ -1,4 +1,8 @@ -#include +/* + * Copyright 2010-2017 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + #include #include #include @@ -99,5 +103,8 @@ int main() printf("HandleHashMap: %15f\n", double(elapsed) ); } + extern void simd_bench(); + simd_bench(); + return EXIT_SUCCESS; } diff --git a/tests/simd_bench.cpp b/tests/simd_bench.cpp new file mode 100644 index 0000000..205fef2 --- /dev/null +++ b/tests/simd_bench.cpp @@ -0,0 +1,115 @@ +/* + * Copyright 2010-2017 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#include +#include +#include +#include + +#include + +static void flushCache() +{ + static uint32_t length = 1 << 26; + static uint8_t* input = new uint8_t[length]; + static uint8_t* output = new uint8_t[length]; + bx::memCopy(output, input, length); +} + +typedef bx::simd128_t (*SimdRsqrtFn)(bx::simd128_t _a); + +template +void simd_rsqrt_bench(bx::simd128_t* _dst, bx::simd128_t* _src, uint32_t _numVertices) +{ + for (uint32_t ii = 0, num = _numVertices/4; ii < num; ++ii) + { + bx::simd128_t* ptr = &_src[ii*4]; + bx::simd128_t tmp0 = bx::simd_ld(ptr + 0); + bx::simd128_t tmp1 = bx::simd_ld(ptr + 1); + bx::simd128_t tmp2 = bx::simd_ld(ptr + 2); + bx::simd128_t tmp3 = bx::simd_ld(ptr + 3); + bx::simd128_t rsqrt0 = simdRsqrtFn(tmp0); + bx::simd128_t rsqrt1 = simdRsqrtFn(tmp1); + bx::simd128_t rsqrt2 = simdRsqrtFn(tmp2); + bx::simd128_t rsqrt3 = simdRsqrtFn(tmp3); + + ptr = &_dst[ii*4]; + bx::simd_st(ptr + 0, rsqrt0); + bx::simd_st(ptr + 1, rsqrt1); + bx::simd_st(ptr + 2, rsqrt2); + bx::simd_st(ptr + 3, rsqrt3); + } +} + +void simd_bench() +{ + bx::CrtAllocator allocator; + bx::RngMwc rng; + + const uint32_t numVertices = 1024*1024; + + uint8_t* data = (uint8_t*)BX_ALIGNED_ALLOC(&allocator, 2*numVertices*sizeof(bx::simd128_t), 16); + bx::simd128_t* src = (bx::simd128_t*)data; + bx::simd128_t* dst = &src[numVertices]; + + for (uint32_t ii = 0; ii < numVertices; ++ii) + { + float* ptr = (float*)&src[ii]; + randUnitSphere(ptr, &rng); + ptr[3] = 1.0f; + } + + const uint32_t numIterations = 10; + + { + int64_t elapsed = 0; + for (uint32_t test = 0; test < numIterations; ++test) + { + flushCache(); + elapsed += -bx::getHPCounter(); + simd_rsqrt_bench(dst, src, numVertices); + elapsed += bx::getHPCounter(); + } + printf(" simd_rsqrt_est: %15f\n", double(elapsed) ); + } + + { + int64_t elapsed = 0; + for (uint32_t test = 0; test < numIterations; ++test) + { + flushCache(); + elapsed += -bx::getHPCounter(); + simd_rsqrt_bench(dst, src, numVertices); + elapsed += bx::getHPCounter(); + } + printf(" simd_rsqrt_nr: %15f\n", double(elapsed) ); + } + + { + int64_t elapsed = 0; + for (uint32_t test = 0; test < numIterations; ++test) + { + flushCache(); + elapsed += -bx::getHPCounter(); + simd_rsqrt_bench(dst, src, numVertices); + elapsed += bx::getHPCounter(); + } + printf("simd_rsqrt_carmack: %15f\n", double(elapsed) ); + } + + { + int64_t elapsed = 0; + for (uint32_t test = 0; test < numIterations; ++test) + { + flushCache(); + elapsed += -bx::getHPCounter(); + simd_rsqrt_bench(dst, src, numVertices); + elapsed += bx::getHPCounter(); + } + printf(" simd_rsqrt: %15f\n", double(elapsed) ); + } + + BX_ALIGNED_FREE(&allocator, data, 16); +}