From 6d911800f2855cd24525a473df06ab633d330848 Mon Sep 17 00:00:00 2001 From: bkaradzic Date: Mon, 26 Nov 2012 23:40:51 -0800 Subject: [PATCH] Fixed SSE2 compile time detection for MSVC. --- include/bx/bx.h | 1 - include/bx/cpu.h | 8 -------- include/bx/float4_neon.h | 15 +++++++++++++++ include/bx/float4_ref.h | 5 +++++ include/bx/float4_sse.h | 9 +++++---- include/bx/float4_t.h | 4 ++++ include/bx/macros.h | 3 +++ 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/include/bx/bx.h b/include/bx/bx.h index f3481e0..a5808df 100644 --- a/include/bx/bx.h +++ b/include/bx/bx.h @@ -20,4 +20,3 @@ using namespace bx; #endif // BX_NAMESPACE #endif // __BX_H__ - diff --git a/include/bx/cpu.h b/include/bx/cpu.h index 76b310f..90d3204 100644 --- a/include/bx/cpu.h +++ b/include/bx/cpu.h @@ -31,14 +31,6 @@ extern "C" void _ReadWriteBarrier(); namespace bx { -#if BX_COMPILER_MSVC -# define BX_CACHE_LINE_ALIGN_MARKER() __declspec(align(BX_CACHE_LINE_SIZE) ) struct {} -#else -# define BX_CACHE_LINE_ALIGN_MARKER() struct {} __attribute__( (__aligned__(BX_CACHE_LINE_SIZE) ) ) -#endif // BX_COMPILER_ - -#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER() - inline void readBarrier() { #if BX_COMPILER_MSVC diff --git a/include/bx/float4_neon.h b/include/bx/float4_neon.h index 2331309..d22e76a 100644 --- a/include/bx/float4_neon.h +++ b/include/bx/float4_neon.h @@ -104,6 +104,21 @@ namespace bx return _a.fxyzw[3]; } +// BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr) +// { +// return _mm_load_ps(reinterpret_cast(_ptr) ); +// } + +// BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a) +// { +// _mm_store_ps(reinterpret_cast(_ptr), _a); +// } + +// BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) +// { +// _mm_stream_ps(reinterpret_cast(_ptr), _a); +// } + BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) { const float32_t val[4] = {_x, _y, _z, _w}; diff --git a/include/bx/float4_ref.h b/include/bx/float4_ref.h index 4359119..e9dde1a 100644 --- a/include/bx/float4_ref.h +++ b/include/bx/float4_ref.h @@ -190,6 +190,11 @@ IMPLEMENT_TEST(xyzw , 0xf); *reinterpret_cast(_ptr) = _a; } + BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) + { + *reinterpret_cast(_ptr) = _a; + } + BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) { float4_t result; diff --git a/include/bx/float4_sse.h b/include/bx/float4_sse.h index 93636f3..9c173c8 100644 --- a/include/bx/float4_sse.h +++ b/include/bx/float4_sse.h @@ -6,10 +6,6 @@ #ifndef __BX_FLOAT4_SSE_H__ #define __BX_FLOAT4_SSE_H__ -#if !defined(__SSE2__) -# error "float4_t requires at least SSE2" -#endif // !defined(__SSE2__) - #include #include // __m128i @@ -149,6 +145,11 @@ IMPLEMENT_TEST(xyzw , 0xf); _mm_store_ps(reinterpret_cast(_ptr), _a); } + BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a) + { + _mm_stream_ps(reinterpret_cast(_ptr), _a); + } + BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w) { return _mm_set_ps(_w, _z, _y, _x); diff --git a/include/bx/float4_t.h b/include/bx/float4_t.h index 5ce22b7..384af27 100644 --- a/include/bx/float4_t.h +++ b/include/bx/float4_t.h @@ -11,6 +11,10 @@ #define BX_FLOAT4_INLINE BX_FORCE_INLINE +#if BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) +# define __SSE2__ +#endif // BX_COMPILER_ + #if defined(__SSE2__) # include "float4_sse.h" #elif 0 // __ARM_NEON__ diff --git a/include/bx/macros.h b/include/bx/macros.h index 825fac9..8ceb3dd 100644 --- a/include/bx/macros.h +++ b/include/bx/macros.h @@ -57,6 +57,9 @@ # error "Unknown BX_COMPILER_?" #endif +#define BX_CACHE_LINE_ALIGN_MARKER() BX_ALIGN_STRUCT(BX_CACHE_LINE_SIZE, struct) {} +#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER() + #define BX_ALIGN_STRUCT_16(_struct) BX_ALIGN_STRUCT(16, _struct) #define BX_ALIGN_STRUCT_256(_struct) BX_ALIGN_STRUCT(256, _struct)