SIMD: AVX WIP.

This commit is contained in:
Branimir Karadžić
2016-08-14 21:18:18 -07:00
parent 68f9ee72a7
commit 76d42a9e03
5 changed files with 280 additions and 48 deletions

View File

@@ -1,9 +1,45 @@
/*
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_SIMD256_AVX_H_HEADER_GUARD
#define BX_SIMD256_AVX_H_HEADER_GUARD
#endif // BX_SIMD256_AVX_H_HEADER_GUARD
/*
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_SIMD256_AVX_H_HEADER_GUARD
#define BX_SIMD256_AVX_H_HEADER_GUARD
#include "simd_ni.inl"
namespace bx
{
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t simd_ld(const void* _ptr)
{
return _mm256_load_ps(reinterpret_cast<const float*>(_ptr) );
}
template<>
BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_avx_t _a)
{
_mm256_store_ps(reinterpret_cast<float*>(_ptr), _a);
}
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t simd_ld(float _x, float _y, float _z, float _w, float _A, float _B, float _C, float _D)
{
return _mm256_set_ps(_D, _C, _B, _A, _w, _z, _y, _x);
}
template<>
BX_SIMD_FORCE_INLINE simd256_avx_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _A, uint32_t _B, uint32_t _C, uint32_t _D)
{
const __m256i set = _mm256_set_epi32(_D, _C, _B, _A, _w, _z, _y, _x);
const simd256_avx_t result = _mm256_castsi256_ps(set);
return result;
}
typedef simd256_avx_t simd256_t;
} // namespace bx
#endif // BX_SIMD256_AVX_H_HEADER_GUARD

View File

@@ -1,9 +1,51 @@
/*
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_SIMD256_REF_H_HEADER_GUARD
#define BX_SIMD256_REF_H_HEADER_GUARD
#endif // BX_SIMD256_REF_H_HEADER_GUARD
/*
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_SIMD256_REF_H_HEADER_GUARD
#define BX_SIMD256_REF_H_HEADER_GUARD
#include "simd_ni.inl"
namespace bx
{
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(const void* _ptr)
{
const simd128_t* ptr = reinterpret_cast<const simd128_t*>(_ptr);
simd256_ref_t result;
result.simd128[0] = simd_ld<simd128_t>(&ptr[0]);
result.simd128[1] = simd_ld<simd128_t>(&ptr[1]);
return result;
}
template<>
BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, simd256_ref_t _a)
{
simd128_t* result = reinterpret_cast<simd128_t*>(_ptr);
simd_st<simd128_t>(&result[0], _a.simd128[0]);
simd_st<simd128_t>(&result[1], _a.simd128[1]);
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ld(float _x, float _y, float _z, float _w, float _A, float _B, float _C, float _D)
{
simd256_ref_t result;
result.simd128[0] = simd_ld<simd128_t>(_x, _y, _z, _w);
result.simd128[1] = simd_ld<simd128_t>(_A, _B, _C, _D);
return result;
}
template<>
BX_SIMD_FORCE_INLINE simd256_ref_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _A, uint32_t _B, uint32_t _C, uint32_t _D)
{
simd256_ref_t result;
result.simd128[0] = simd_ild<simd128_t>(_x, _y, _z, _w);
result.simd128[1] = simd_ild<simd128_t>(_A, _B, _C, _D);
return result;
}
} // namespace bx
#endif // BX_SIMD256_REF_H_HEADER_GUARD

View File

@@ -135,9 +135,15 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
template<typename Ty>
BX_SIMD_FORCE_INLINE Ty simd_ld(float _x, float _y, float _z, float _w);
template<typename Ty>
BX_SIMD_FORCE_INLINE Ty simd_ld(float _x, float _y, float _z, float _w, float _A, float _B, float _C, float _D);
template<typename Ty>
BX_SIMD_FORCE_INLINE Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w);
template<typename Ty>
BX_SIMD_FORCE_INLINE Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _A, uint32_t _B, uint32_t _C, uint32_t _D);
template<typename Ty>
BX_SIMD_FORCE_INLINE Ty simd_splat(const void* _ptr);
@@ -352,14 +358,6 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
typedef __m128 simd128_sse_t;
#endif // BX_SIMD_SSE
union simd128_ref_t
{
float fxyzw[4];
int32_t ixyzw[4];
uint32_t uxyzw[4];
};
} // namespace bx
#if BX_SIMD_AVX
@@ -378,27 +376,50 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
# include "simd128_sse.inl"
#endif // BX_SIMD_SSE
#include "simd128_ref.inl"
#include "simd256_ref.inl"
namespace bx
{
#if !( BX_SIMD_AVX \
|| BX_SIMD_LANGEXT \
union simd128_ref_t
{
float fxyzw[4];
int32_t ixyzw[4];
uint32_t uxyzw[4];
};
#ifndef BX_SIMD_WARN_REFERENCE_IMPL
# define BX_SIMD_WARN_REFERENCE_IMPL 0
#endif // BX_SIMD_WARN_REFERENCE_IMPL
#if !( BX_SIMD_LANGEXT \
|| BX_SIMD_NEON \
|| BX_SIMD_SSE \
)
# ifndef BX_SIMD_WARN_REFERENCE_IMPL
# define BX_SIMD_WARN_REFERENCE_IMPL 0
# endif // BX_SIMD_WARN_REFERENCE_IMPL
# if BX_SIMD_WARN_REFERENCE_IMPL
# pragma message("************************************\nUsing SIMD reference implementation!\n************************************")
# pragma message("*** Using SIMD128 reference implementation! ***")
# endif // BX_SIMD_WARN_REFERENCE_IMPL
typedef simd128_ref_t simd128_t;
#endif //
union simd256_ref_t
{
simd128_t simd128[2];
};
#if !BX_SIMD_AVX
# if BX_SIMD_WARN_REFERENCE_IMPL
# pragma message("*** Using SIMD256 reference implementation! ***")
# endif // BX_SIMD_WARN_REFERENCE_IMPL
typedef simd256_ref_t simd256_t;
#endif // !BX_SIMD_AVX
} // namespace bx
#include "simd128_ref.inl"
#include "simd256_ref.inl"
namespace bx
{
BX_SIMD_FORCE_INLINE simd128_t simd_zero()
{
return simd_zero<simd128_t>();

View File

@@ -100,6 +100,11 @@ function toolchain(_buildDir, _libDir)
description = "Use 32-bit compiler instead 64-bit.",
}
newoption {
trigger = "with-avx",
description = "Use AVX extension.",
}
-- Avoid error when invoking genie --help.
if (_ACTION == nil) then return false end
@@ -460,6 +465,10 @@ function toolchain(_buildDir, _libDir)
flags { "StaticRuntime" }
end
if _OPTIONS["with-avx"] then
flags { "EnableAVX" }
end
flags {
"NoPCH",
"NativeWChar",

View File

@@ -12,11 +12,12 @@ using namespace bx;
union simd_cast
{
bx::simd128_t f4;
float f[4];
uint32_t ui[4];
int32_t i[4];
char c[16];
bx::simd256_t simd256;
bx::simd128_t simd128;
float f[8];
uint32_t ui[8];
int32_t i[8];
char c[32];
};
void simd_check_bool(const char* _str, bool _a, bool _0)
@@ -30,9 +31,16 @@ void simd_check_bool(const char* _str, bool _a, bool _0)
CHECK_EQUAL(_a, _0);
}
void simd_check_int32(const char* _str, bx::simd128_t _a, int32_t _0, int32_t _1, int32_t _2, int32_t _3)
void simd_check_int32(
const char* _str
, bx::simd128_t _a
, int32_t _0
, int32_t _1
, int32_t _2
, int32_t _3
)
{
simd_cast c; c.f4 = _a;
simd_cast c; c.simd128 = _a;
DBG("%s (%d, %d, %d, %d) == (%d, %d, %d, %d)"
, _str
, c.i[0], c.i[1], c.i[2], c.i[3]
@@ -45,9 +53,46 @@ void simd_check_int32(const char* _str, bx::simd128_t _a, int32_t _0, int32_t _1
CHECK_EQUAL(c.i[3], _3);
}
void simd_check_uint32(const char* _str, bx::simd128_t _a, uint32_t _0, uint32_t _1, uint32_t _2, uint32_t _3)
void simd_check_int32(
const char* _str
, bx::simd256_t _a
, int32_t _0
, int32_t _1
, int32_t _2
, int32_t _3
, int32_t _4
, int32_t _5
, int32_t _6
, int32_t _7
)
{
simd_cast c; c.f4 = _a;
simd_cast c; c.simd256 = _a;
DBG("%s (%d, %d, %d, %d, %d, %d, %d, %d) == (%d, %d, %d, %d, %d, %d, %d, %d)"
, _str
, c.i[0], c.i[1], c.i[2], c.i[3], c.i[4], c.i[5], c.i[6], c.i[7]
, _0, _1, _2, _3, _4, _5, _6, _7
);
CHECK_EQUAL(c.i[0], _0);
CHECK_EQUAL(c.i[1], _1);
CHECK_EQUAL(c.i[2], _2);
CHECK_EQUAL(c.i[3], _3);
CHECK_EQUAL(c.i[4], _4);
CHECK_EQUAL(c.i[5], _5);
CHECK_EQUAL(c.i[6], _6);
CHECK_EQUAL(c.i[7], _7);
}
void simd_check_uint32(
const char* _str
, bx::simd128_t _a
, uint32_t _0
, uint32_t _1
, uint32_t _2
, uint32_t _3
)
{
simd_cast c; c.simd128 = _a;
DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x)"
, _str
@@ -61,9 +106,47 @@ void simd_check_uint32(const char* _str, bx::simd128_t _a, uint32_t _0, uint32_t
CHECK_EQUAL(c.ui[3], _3);
}
void simd_check_float(const char* _str, bx::simd128_t _a, float _0, float _1, float _2, float _3)
void simd_check_uint32(
const char* _str
, bx::simd256_t _a
, uint32_t _0
, uint32_t _1
, uint32_t _2
, uint32_t _3
, uint32_t _4
, uint32_t _5
, uint32_t _6
, uint32_t _7
)
{
simd_cast c; c.f4 = _a;
simd_cast c; c.simd256 = _a;
DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x)"
, _str
, c.ui[0], c.ui[1], c.ui[2], c.ui[3], c.ui[4], c.ui[5], c.ui[6], c.ui[7]
, _0, _1, _2, _3, _4, _5, _6, _7
);
CHECK_EQUAL(c.ui[0], _0);
CHECK_EQUAL(c.ui[1], _1);
CHECK_EQUAL(c.ui[2], _2);
CHECK_EQUAL(c.ui[3], _3);
CHECK_EQUAL(c.ui[4], _4);
CHECK_EQUAL(c.ui[5], _5);
CHECK_EQUAL(c.ui[6], _6);
CHECK_EQUAL(c.ui[7], _7);
}
void simd_check_float(
const char* _str
, bx::simd128_t _a
, float _0
, float _1
, float _2
, float _3
)
{
simd_cast c; c.simd128 = _a;
DBG("%s (%f, %f, %f, %f) == (%f, %f, %f, %f)"
, _str
@@ -77,9 +160,40 @@ void simd_check_float(const char* _str, bx::simd128_t _a, float _0, float _1, fl
CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
}
void simd_check_float(
const char* _str
, bx::simd256_t _a
, float _0
, float _1
, float _2
, float _3
, float _4
, float _5
, float _6
, float _7
)
{
simd_cast c; c.simd256 = _a;
DBG("%s (%f, %f, %f, %f, %f, %f, %f, %f) == (%f, %f, %f, %f, %f, %f, %f, %f)"
, _str
, c.f[0], c.f[1], c.f[2], c.f[3], c.f[4], c.f[5], c.f[6], c.f[7]
, _0, _1, _2, _3, _4, _5, _6, _7
);
CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
CHECK(bx::fequal(c.f[4], _4, 0.0001f) );
CHECK(bx::fequal(c.f[5], _5, 0.0001f) );
CHECK(bx::fequal(c.f[6], _6, 0.0001f) );
CHECK(bx::fequal(c.f[7], _7, 0.0001f) );
}
void simd_check_string(const char* _str, bx::simd128_t _a)
{
simd_cast c; c.f4 = _a;
simd_cast c; c.simd128 = _a;
const char test[5] = { c.c[0], c.c[4], c.c[8], c.c[12], '\0' };
DBG("%s %s", _str, test);
@@ -200,11 +314,21 @@ TEST(simd_load)
, 0.0f, 1.0f, 2.0f, 3.0f
);
simd_check_float("ld"
, simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
);
simd_check_int32("ild"
, simd_ild(uint32_t(-1), 0, 1, 2)
, uint32_t(-1), 0, 1, 2
);
simd_check_int32("ild"
, simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
, uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
);
simd_check_int32("ild"
, simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) )
, uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4)