From d652f283b2eb0eca7c0a4ec7c48f57827658d541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sun, 28 Jan 2018 19:03:22 -0800 Subject: [PATCH] Implemented SIMD sqrt. --- include/bx/inline/math.inl | 23 +++++++++- include/bx/inline/simd128_ref.inl | 2 + include/bx/simd_t.h | 19 ++++++--- tests/handle_bench.cpp | 3 ++ tests/math_bench.cpp | 71 +++++++++++++++++++++++++++++++ 5 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 tests/math_bench.cpp diff --git a/include/bx/inline/math.inl b/include/bx/inline/math.inl index 1664e85..a35cee8 100644 --- a/include/bx/inline/math.inl +++ b/include/bx/inline/math.inl @@ -9,6 +9,8 @@ # error "Must be included from bx/math.h!" #endif // BX_MATH_H_HEADER_GUARD +#include + namespace bx { inline float toRad(float _deg) @@ -177,7 +179,7 @@ namespace bx return log(_a) * kInvLogNat2; } - inline float sqrt(float _a) + inline float sqrtRef(float _a) { if (_a < kNearZero) { @@ -187,6 +189,25 @@ namespace bx return 1.0f/rsqrt(_a); } + inline float sqrtSimd(float _a) + { + const simd128_t aa = simd_splat(_a); + const simd128_t sqrta = simd_sqrt(aa); + float result; + simd_stx(&result, sqrta); + + return result; + } + + inline float sqrt(float _a) + { +#if BX_CONFIG_SUPPORTS_SIMD + return sqrtSimd(_a); +#else + return sqrtRef(_a); +#endif // BX_CONFIG_SUPPORTS_SIMD + } + inline float rsqrt(float _a) { return pow(_a, -0.5f); diff --git a/include/bx/inline/simd128_ref.inl b/include/bx/inline/simd128_ref.inl index 0fa28d4..121be13 100644 --- a/include/bx/inline/simd128_ref.inl +++ b/include/bx/inline/simd128_ref.inl @@ -7,6 +7,8 @@ # error "Must be included from bx/simd_t.h!" #endif // BX_SIMD_T_H_HEADER_GUARD +#include + namespace bx { #define ELEMx 0 diff --git a/include/bx/simd_t.h b/include/bx/simd_t.h index 2757194..33c3f48 100644 --- a/include/bx/simd_t.h +++ b/include/bx/simd_t.h @@ -7,7 +7,6 @@ #define BX_SIMD_T_H_HEADER_GUARD #include "bx.h" -#include "math.h" #define BX_SIMD_FORCE_INLINE BX_FORCE_INLINE #define BX_SIMD_INLINE inline @@ -17,6 +16,8 @@ #define BX_SIMD_NEON 0 #define BX_SIMD_SSE 0 +#define BX_CONFIG_SUPPORTS_SIMD 0 + #if defined(__AVX__) || defined(__AVX2__) # include # undef BX_SIMD_AVX @@ -484,6 +485,15 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw); # include "inline/simd128_sse.inl" #endif // BX_SIMD_SSE +#if ( BX_SIMD_LANGEXT \ + || BX_SIMD_NEON \ + || BX_SIMD_SSE \ + || BX_SIMD_AVX \ + ) +# undef BX_CONFIG_SUPPORTS_SIMD +# define BX_CONFIG_SUPPORTS_SIMD 1 +#endif // BX_SIMD_* + namespace bx { union simd128_ref_t @@ -497,16 +507,13 @@ namespace bx # define BX_SIMD_WARN_REFERENCE_IMPL 0 #endif // BX_SIMD_WARN_REFERENCE_IMPL -#if !( BX_SIMD_LANGEXT \ - || BX_SIMD_NEON \ - || BX_SIMD_SSE \ - ) +#if !BX_CONFIG_SUPPORTS_SIMD # if BX_SIMD_WARN_REFERENCE_IMPL # pragma message("*** Using SIMD128 reference implementation! ***") # endif // BX_SIMD_WARN_REFERENCE_IMPL typedef simd128_ref_t simd128_t; -#endif // +#endif // BX_SIMD_REFERENCE struct simd256_ref_t { diff --git a/tests/handle_bench.cpp b/tests/handle_bench.cpp index 6a8ecb8..4effb59 100644 --- a/tests/handle_bench.cpp +++ b/tests/handle_bench.cpp @@ -106,5 +106,8 @@ int main() extern void simd_bench(); simd_bench(); + extern void math_bench(); + math_bench(); + return bx::kExitSuccess; } diff --git a/tests/math_bench.cpp b/tests/math_bench.cpp new file mode 100644 index 0000000..2526400 --- /dev/null +++ b/tests/math_bench.cpp @@ -0,0 +1,71 @@ +/* + * Copyright 2010-2018 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#include +#include +#include + +#include + +void math_bench() +{ + bx::WriterI* writer = bx::getStdOut(); + bx::writePrintf(writer, "Math bench\n\n"); + + float result = 0.0f; + float max = 1389.0f; + + { + int64_t elapsed = -bx::getHPCounter(); + + result = 0.0f; + for (float xx = 0.0f; xx < max; xx += 0.1f) + { + result += ::sqrtf(xx); + } + + elapsed += bx::getHPCounter(); + bx::writePrintf(writer, " ::sqrtf: %15f, %f\n", double(elapsed), result); + } + + { + int64_t elapsed = -bx::getHPCounter(); + + result = 0.0f; + for (float xx = 0.0f; xx < max; xx += 0.1f) + { + result += bx::sqrtRef(xx); + } + + elapsed += bx::getHPCounter(); + bx::writePrintf(writer, " bx::sqrtRef: %15f, %f\n", double(elapsed), result); + } + + { + int64_t elapsed = -bx::getHPCounter(); + + result = 0.0f; + for (float xx = 0.0f; xx < max; xx += 0.1f) + { + result += bx::sqrtRef(xx); + } + + elapsed += bx::getHPCounter(); + bx::writePrintf(writer, "bx::sqrtSimd: %15f, %f\n", double(elapsed), result); + } + + { + int64_t elapsed = -bx::getHPCounter(); + + result = 0.0f; + for (float xx = 0.0f; xx < max; xx += 0.1f) + { + result += bx::sqrt(xx); + } + + elapsed += bx::getHPCounter(); + bx::writePrintf(writer, " bx::sqrt: %15f, %f\n", double(elapsed), result); + } +}