diff --git a/include/bx/inline/math.inl b/include/bx/inline/math.inl
index 710ff0b..1664e85 100644
--- a/include/bx/inline/math.inl
+++ b/include/bx/inline/math.inl
@@ -127,6 +127,41 @@ namespace bx
 		return cos(_a - kPiHalf);
 	}
 
+	inline float sinh(float _a)
+	{
+		return 0.5f*(exp(_a) - exp(-_a) );
+	}
+
+	inline float asin(float _a)
+	{
+		return kPiHalf - acos(_a);
+	}
+
+	inline float cosh(float _a)
+	{
+		return 0.5f*(exp(_a) + exp(-_a) );
+	}
+
+	inline float tan(float _a)
+	{
+		return sin(_a) / cos(_a);
+	}
+
+	inline float tanh(float _a)
+	{
+		const float tmp0   = exp(2.0f*_a);
+		const float tmp1   = tmp0 - 1.0f;
+		const float tmp2   = tmp0 + 1.0f;
+		const float result = tmp1 / tmp2;
+
+		return result;
+	}
+
+	inline float atan(float _a)
+	{
+		return atan2(_a, 1.0f);
+	}
+
 	inline float pow(float _a, float _b)
 	{
 		return exp(_b * log(_a) );
@@ -142,9 +177,19 @@ namespace bx
 		return log(_a) * kInvLogNat2;
 	}
 
+	inline float sqrt(float _a)
+	{
+		if (_a < kNearZero)
+		{
+			return 0.0f;
+		}
+
+		return 1.0f/rsqrt(_a);
+	}
+
 	inline float rsqrt(float _a)
 	{
-		return 1.0f/sqrt(_a);
+		return pow(_a, -0.5f);
 	}
 
 	inline float trunc(float _a)
@@ -157,6 +202,11 @@ namespace bx
 		return _a - trunc(_a);
 	}
 
+	inline float mad(float _a, float _b, float _c)
+	{
+		return _a * _b + _c;
+	}
+
 	inline float mod(float _a, float _b)
 	{
 		return _a - _b * floor(_a / _b);
diff --git a/include/bx/inline/simd128_langext.inl b/include/bx/inline/simd128_langext.inl
index 4aa051f..0c4479e 100644
--- a/include/bx/inline/simd128_langext.inl
+++ b/include/bx/inline/simd128_langext.inl
@@ -318,10 +318,10 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
 	BX_SIMD_FORCE_INLINE simd128_langext_t simd_sqrt(simd128_langext_t _a)
 	{
 		simd128_langext_t result;
-		result.vf[0] = sqrtf(_a.vf[0]);
-		result.vf[1] = sqrtf(_a.vf[1]);
-		result.vf[2] = sqrtf(_a.vf[2]);
-		result.vf[3] = sqrtf(_a.vf[3]);
+		result.vf[0] = sqrt(_a.vf[0]);
+		result.vf[1] = sqrt(_a.vf[1]);
+		result.vf[2] = sqrt(_a.vf[2]);
+		result.vf[3] = sqrt(_a.vf[3]);
 		return result;
 	}
 
@@ -329,10 +329,10 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
 	BX_SIMD_FORCE_INLINE simd128_langext_t simd_rsqrt_est(simd128_langext_t _a)
 	{
 		simd128_langext_t result;
-		result.vf[0] = 1.0f / sqrtf(_a.vf[0]);
-		result.vf[1] = 1.0f / sqrtf(_a.vf[1]);
-		result.vf[2] = 1.0f / sqrtf(_a.vf[2]);
-		result.vf[3] = 1.0f / sqrtf(_a.vf[3]);
+		result.vf[0] = 1.0f / sqrt(_a.vf[0]);
+		result.vf[1] = 1.0f / sqrt(_a.vf[1]);
+		result.vf[2] = 1.0f / sqrt(_a.vf[2]);
+		result.vf[3] = 1.0f / sqrt(_a.vf[3]);
 		return result;
 	}
 
diff --git a/include/bx/inline/uint32_t.inl b/include/bx/inline/uint32_t.inl
index 4f97f65..8b85bbe 100644
--- a/include/bx/inline/uint32_t.inl
+++ b/include/bx/inline/uint32_t.inl
@@ -119,11 +119,21 @@ namespace bx
 		return _a + _b;
 	}
 
+	inline uint32_t uint32_iadd(uint32_t _a, uint32_t _b)
+	{
+		return int32_t(_a) + int32_t(_b);
+	}
+
 	inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)
 	{
 		return _a - _b;
 	}
 
+	inline uint32_t uint32_isub(uint32_t _a, uint32_t _b)
+	{
+		return int32_t(_a) - int32_t(_b);
+	}
+
 	inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)
 	{
 		return _a * _b;
diff --git a/include/bx/math.h b/include/bx/math.h
index 834fbe9..b4707d5 100644
--- a/include/bx/math.h
+++ b/include/bx/math.h
@@ -17,9 +17,15 @@ namespace bx
 	extern const float kPi2;
 	extern const float kInvPi;
 	extern const float kPiHalf;
+	extern const float kPiQuarter;
 	extern const float kSqrt2;
+	extern const float kLogNat10;
 	extern const float kInvLogNat2;
-	extern const float kHuge;
+	extern const float kLogNat2Hi;
+	extern const float kLogNat2Lo;
+	extern const float kE;
+	extern const float kNearZero;
+	extern const float kInfinity;
 
 	///
 	typedef float (*LerpFn)(float _a, float _b, float _t);
@@ -65,22 +71,22 @@ namespace bx
 	///
 	uint32_t floatFlip(uint32_t _value);
 
-	///
+	/// Returns true if _f is a number that is NaN.
 	bool isNan(float _f);
 
-	///
+	/// Returns true if _f is a number that is NaN.
 	bool isNan(double _f);
 
-	///
+	/// Returns true if _f is not infinite and is not a NaN.
 	bool isFinite(float _f);
 
-	///
+	/// Returns true if _f is not infinite and is not a NaN.
 	bool isFinite(double _f);
 
-	///
+	/// Returns true if _f is infinite and is not a NaN.
 	bool isInfinite(float _f);
 
-	///
+	/// Returns true if _f is infinite and is not a NaN.
 	bool isInfinite(double _f);
 
 	///
@@ -98,49 +104,64 @@ namespace bx
 	///
 	float sign(float _a);
 
-	///
+	/// Returns the absolute of _a.
 	float abs(float _a);
 
-	///
+	/// Returns the square of _a.
 	float square(float _a);
 
-	///
+	/// Returns the cosine of the argument _a.
 	float sin(float _a);
 
-	///
+	/// Returns hyperbolic sine of the argument _a.
+	float sinh(float _a);
+
+	/// Returns radian angle between -pi/2 and +pi/2 whose sine is _a.
 	float asin(float _a);
 
-	///
+	/// Returns the cosine of the argument _a.
 	float cos(float _a);
 
-	///
-	float tan(float _a);
+	/// Returns hyperbolic cosine of the argument _a.
+	float cosh(float _a);
 
-	///
+	/// Returns radian angle between 0 and pi whose cosine is _a.
 	float acos(float _a);
 
-	///
+	/// Returns the circular tangent of the radian argument _a.
+	float tan(float _a);
+
+	/// Returns hyperbolic tangent of the argument _a.
+	float tanh(float _a);
+
+	/// Returns radian angle between -pi/2 and +pi/2 whose tangent is _a.
+	float atan(float _a);
+
+	/// Retruns the inverse tangent of _y/_x.
 	float atan2(float _y, float _x);
 
-	///
+	/// Computes _a raised to the _b power.
 	float pow(float _a, float _b);
 
 	///
+	float ldexp(float _a, int32_t _b);
+
+	/// Returns e (2.71828...) raised to the _a power.
 	float exp(float _a);
 
-	///
+	/// Returns 2 raised to the _a power.
 	float exp2(float _a);
 
-	///
+	/// Returns the base e (2.71828...) logarithm of _a.
 	float log(float _a);
 
-	///
+	/// Returns the base 2 logarithm of _a.
 	float log2(float _a);
 
-	///
+	/// Returns the square root of _a.
 	float sqrt(float _a);
 
-	///
+	/// Returns reciprocal square root of _a.
 	float rsqrt(float _a);
 
 	///
@@ -149,6 +170,9 @@ namespace bx
 	///
 	float fract(float _a);
 
+	/// Returns result of multipla and add (_a * _b + _c).
+	float mad(float _a, float _b, float _c);
+
 	///
 	float mod(float _a, float _b);
 
diff --git a/include/bx/simd_t.h b/include/bx/simd_t.h
index 8676a93..2757194 100644
--- a/include/bx/simd_t.h
+++ b/include/bx/simd_t.h
@@ -39,7 +39,6 @@
 	&& !BX_PLATFORM_EMSCRIPTEN \
 	&& !BX_PLATFORM_IOS \
 	&&  BX_CLANG_HAS_EXTENSION(attribute_ext_vector_type)
-#	include <math.h>
 #	undef  BX_SIMD_LANGEXT
 #	define BX_SIMD_LANGEXT 1
 #endif //
diff --git a/src/math.cpp b/src/math.cpp
index 7fe5a79..53f36cc 100644
--- a/src/math.cpp
+++ b/src/math.cpp
@@ -5,8 +5,7 @@
 
 #include "bx_p.h"
 #include <bx/math.h>
-
-#include <math.h>
+#include <bx/uint32_t.h>
 
 namespace bx
 {
@@ -14,18 +13,17 @@ namespace bx
 	const float kPi2        = 6.2831853071795864769252867665590f;
 	const float kInvPi      = 1.0f/kPi;
 	const float kPiHalf     = 1.5707963267948966192313216916398f;
+	const float kPiQuarter  = 0.7853981633974483096156608458199f;
 	const float kSqrt2      = 1.4142135623730950488016887242097f;
+	const float kLogNat10   = 2.3025850929940456840179914546844f;
 	const float kInvLogNat2 = 1.4426950408889634073599246810019f;
-#if BX_COMPILER_MSVC
-	const float kHuge = float(HUGE_VAL);
-#else
-	const float kHuge = HUGE_VALF;
-#endif // BX_COMPILER_MSVC
-
-	float asin(float _a)
-	{
-		return ::asinf(_a);
-	}
+	const float kLogNat2Hi  = 0.6931471805599453094172321214582f;
+	const float kLogNat2Lo  = 1.90821492927058770002e-10f;
+	const float kE          = 2.7182818284590452353602874713527f;
+	const float kNearZero   = 1.0f/float(1 << 28);
+	const float kFloatMin   = 1.175494e-38f;
+	const float kFloatMax   = 3.402823e+38f;
+	const float kInfinity   = bitsToFloat(UINT32_C(0x7f800000) );
 
 	namespace
 	{
@@ -72,14 +70,13 @@ namespace bx
 			c10 = kSinC10;
 		}
 
-		const float xsq = square(xx);
-		float result;
-		result = xsq * c10    + c8;
-		result = xsq * result + c6;
-		result = xsq * result + c4;
-		result = xsq * result + c2;
-		result = xsq * result + 1.0f;
-		result *= c0;
+		const float xsq    = square(xx);
+		const float tmp0   = mad(c10,  xsq, c8 );
+		const float tmp1   = mad(tmp0, xsq, c6 );
+		const float tmp2   = mad(tmp1, xsq, c4 );
+		const float tmp3   = mad(tmp2, xsq, c2 );
+		const float tmp4   = mad(tmp3, xsq, 1.0);
+		const float result = tmp4 * c0;
 
 		return bits == 1 || bits == 2
 			? -result
@@ -87,38 +84,168 @@ namespace bx
 			;
 	}
 
-	float tan(float _a)
+	namespace
 	{
-#if 0
-		return sin(_a) / cos(_a);
-#else
-		return ::tanf(_a);
-#endif
-	}
+		static const float kAcosC0 =  1.5707288f;
+		static const float kAcosC1 = -0.2121144f;
+		static const float kAcosC2 =  0.0742610f;
+		static const float kAcosC3 = -0.0187293f;
+
+	} // namespace
 
 	float acos(float _a)
 	{
-		return ::acosf(_a);
+		const float absa   = abs(_a);
+		const float tmp0   = mad(kAcosC3, absa, kAcosC2);
+		const float tmp1   = mad(tmp0,    absa, kAcosC1);
+		const float tmp2   = mad(tmp1,    absa, kAcosC0);
+		const float tmp3   = tmp2 * sqrt(1.0 - absa);
+		const float negate = float(_a < 0.0f);
+		const float tmp4   = tmp3 - 2.0f*negate*tmp3;
+		const float result = negate*kPi + tmp4;
+
+		return result;
 	}
 
+	namespace
+	{
+		static const float kAtan2C0 = -0.013480470f;
+		static const float kAtan2C1 =  0.057477314f;
+		static const float kAtan2C2 = -0.121239071f;
+		static const float kAtan2C3 =  0.195635925f;
+		static const float kAtan2C4 = -0.332994597f;
+		static const float kAtan2C5 =  0.999995630f;
+
+	} // namespace
+
 	float atan2(float _y, float _x)
 	{
-		return ::atan2f(_y, _x);
+		const float ax     = abs(_x);
+		const float ay     = abs(_y);
+		const float maxaxy = max(ax, ay);
+		const float minaxy = min(ax, ay);
+		const float mxy    = minaxy / maxaxy;
+		const float mxysq  = square(mxy);
+		const float tmp0   = mad(kAtan2C0, mxysq, kAtan2C1);
+		const float tmp1   = mad(tmp0,     mxysq, kAtan2C2);
+		const float tmp2   = mad(tmp1,     mxysq, kAtan2C3);
+		const float tmp3   = mad(tmp2,     mxysq, kAtan2C4);
+		const float tmp4   = mad(tmp3,     mxysq, kAtan2C5);
+		const float tmp5   = tmp4 * mxy;
+		const float tmp6   = ay > ax   ? kPiHalf - tmp5 : tmp5;
+		const float tmp7   = _x < 0.0f ? kPi     - tmp6 : tmp6;
+		const float result = sign(_y)*tmp7;
+
+		return result;
 	}
 
+	float ldexp(float _a, int32_t _b)
+	{
+		const uint32_t ftob     = floatToBits(_a);
+		const uint32_t masked   = uint32_and(ftob, UINT32_C(0xff800000) );
+		const uint32_t expsign0 = uint32_sra(masked, 23);
+		const uint32_t tmp      = uint32_iadd(expsign0, _b);
+		const uint32_t expsign1 = uint32_sll(tmp, 23);
+		const uint32_t mantissa = uint32_and(ftob, UINT32_C(0x007fffff) );
+		const uint32_t bits     = uint32_or(mantissa, expsign1);
+		const float    result   = bitsToFloat(bits);
+
+		return result;
+	}
+
+	float frexp(float _a, int32_t* _exp)
+	{
+		const uint32_t ftob     = floatToBits(_a);
+		const uint32_t masked0  = uint32_and(ftob, UINT32_C(0x7f800000) );
+		const uint32_t exp0     = uint32_srl(masked0, 23);
+		const uint32_t masked1  = uint32_and(ftob,   UINT32_C(0x807fffff) );
+		const uint32_t bits     = uint32_or(masked1, UINT32_C(0x3f000000) );
+		const float    result   = bitsToFloat(bits);
+
+		*_exp = int32_t(exp0 - 0x7e);
+
+		return result;
+	}
+
+	namespace
+	{
+		static const float kExpC0  =  1.66666666666666019037e-01f;
+		static const float kExpC1  = -2.77777777770155933842e-03f;
+		static const float kExpC2  =  6.61375632143793436117e-05f;
+		static const float kExpC3  = -1.65339022054652515390e-06f;
+		static const float kExpC4  =  4.13813679705723846039e-08f;
+		static const float kExpMax =  7.09782712893383973096e+02f;
+
+	} // namespace
+
 	float exp(float _a)
 	{
-		return ::expf(_a);
+		if (abs(_a) <= kNearZero)
+		{
+			return _a + 1.0f;
+		}
+
+		const float kk     = round(_a*kInvLogNat2);
+		const float hi     = _a - kk*kLogNat2Hi;
+		const float lo     =      kk*kLogNat2Lo;
+		const float hml    = hi - lo;
+		const float hmlsq  = square(hml);
+		const float tmp0   = mad(kExpC4, hmlsq, kExpC3);
+		const float tmp1   = mad(tmp0,   hmlsq, kExpC2);
+		const float tmp2   = mad(tmp1,   hmlsq, kExpC1);
+		const float tmp3   = mad(tmp2,   hmlsq, kExpC0);
+		const float tmp4   = hml - hmlsq * tmp3;
+		const float tmp5   = hml*tmp4/(2.0f-tmp4);
+		const float tmp6   = 1.0f - ( (lo - tmp5) - hi);
+		const float result = ldexp(tmp6, int32_t(kk) );
+
+		return result;
 	}
 
+	namespace
+	{
+		static const float kLogC0 = 6.666666666666735130e-01f;
+		static const float kLogC1 = 3.999999999940941908e-01f;
+		static const float kLogC2 = 2.857142874366239149e-01f;
+		static const float kLogC3 = 2.222219843214978396e-01f;
+		static const float kLogC4 = 1.818357216161805012e-01f;
+		static const float kLogC5 = 1.531383769920937332e-01f;
+		static const float kLogC6 = 1.479819860511658591e-01f;
+
+	} // namespace
+
 	float log(float _a)
 	{
-		return ::logf(_a);
-	}
+		int32_t exp;
+		float ff = frexp(_a, &exp);
+		if (ff < kSqrt2*0.5f)
+		{
+			ff *= 2.0f;
+			--exp;
+		}
 
-	float sqrt(float _a)
-	{
-		return ::sqrtf(_a);
+		ff -= 1.0f;
+		const float kk     = float(exp);
+		const float hi     = kk*kLogNat2Hi;
+		const float lo     = kk*kLogNat2Lo;
+		const float ss     = ff / (2.0f + ff);
+		const float s2     = square(ss);
+		const float s4     = square(s2);
+
+		const float tmp0   = mad(kLogC6, s4, kLogC4);
+		const float tmp1   = mad(tmp0,   s4, kLogC2);
+		const float tmp2   = mad(tmp1,   s4, kLogC0);
+		const float t1     = s2*tmp2;
+
+		const float tmp3   = mad(kLogC5, s4, kLogC3);
+		const float tmp4   = mad(tmp3,   s4, kLogC1);
+		const float t2     = s4*tmp4;
+
+		const float t12    = t1 + t2;
+		const float hfsq   = 0.5*square(ff);
+		const float result = hi - ( (hfsq - (ss*(hfsq+t12) + lo) ) - ff);
+
+		return result;
 	}
 
 	float floor(float _a)
@@ -126,7 +253,7 @@ namespace bx
 		if (_a < 0.0f)
 		{
 			const float fr = fract(-_a);
-			float result = -_a - fr;
+			const float result = -_a - fr;
 
 			return -(0.0f != fr
 				? result + 1.0f
diff --git a/tests/math_test.cpp b/tests/math_test.cpp
index 13a75b4..2114674 100644
--- a/tests/math_test.cpp
+++ b/tests/math_test.cpp
@@ -5,8 +5,9 @@
 
 #include "test.h"
 #include <bx/math.h>
+#include <bx/file.h>
 
-#include <cmath>
+#include <math.h>
 
 #if !BX_COMPILER_MSVC || BX_COMPILER_MSVC >= 1800
 TEST_CASE("isFinite, isInfinite, isNan", "")
@@ -34,6 +35,8 @@ TEST_CASE("log2", "")
 
 TEST_CASE("libm", "")
 {
+	bx::WriterI* writer = bx::getNullOut();
+
 	REQUIRE(1389.0f == bx::abs(-1389.0f) );
 	REQUIRE(1389.0f == bx::abs( 1389.0f) );
 	REQUIRE(   0.0f == bx::abs(-0.0f) );
@@ -52,24 +55,88 @@ TEST_CASE("libm", "")
 	REQUIRE(bx::equal( 0.89f, bx::fract( 13.89f), 0.000001f) );
 	REQUIRE(bx::equal(-0.89f, bx::fract(-13.89f), 0.000001f) );
 
-	for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
+	for (int32_t yy = -10; yy < 10; ++yy)
 	{
-		REQUIRE(bx::equal(bx::pow(1.389f, xx), ::pow(1.389f, xx), 0.00001f) );
+		for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
+		{
+			bx::writePrintf(writer, "ldexp(%f, %d) == %f (expected: %f)\n", xx, yy, bx::ldexp(xx, yy), ::ldexpf(xx, yy) );
+			REQUIRE(bx::equal(bx::ldexp(xx, yy), ::ldexpf(xx, yy), 0.00001f) );
+		}
+	}
+
+	for (float xx = -80.0f; xx < 80.0f; xx += 0.1f)
+	{
+		bx::writePrintf(writer, "exp(%f) == %f (expected: %f)\n", xx, bx::exp(xx), ::expf(xx) );
+		REQUIRE(bx::equal(bx::exp(xx), ::expf(xx), 0.00001f) );
+	}
+
+	for (float xx = 0.0f; xx < 100.0f; xx += 0.1f)
+	{
+		bx::writePrintf(writer, "sqrt(%f) == %f (expected: %f)\n", xx, bx::sqrt(xx), ::sqrtf(xx) );
+		REQUIRE(bx::equal(bx::sqrt(xx), ::sqrtf(xx), 0.00001f) );
 	}
 
 	for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
 	{
-		REQUIRE(bx::equal(bx::sin(xx), ::sin(xx), 0.00001f) );
+		bx::writePrintf(writer, "pow(1.389f, %f) == %f (expected: %f)\n", xx, bx::pow(1.389f, xx), ::powf(1.389f, xx) );
+		REQUIRE(bx::equal(bx::pow(1.389f, xx), ::powf(1.389f, xx), 0.00001f) );
+	}
+
+	for (float xx = -1.0f; xx < 1.0f; xx += 0.001f)
+	{
+		bx::writePrintf(writer, "asin(%f) == %f (expected: %f)\n", xx, bx::asin(xx), ::asinf(xx) );
+		REQUIRE(bx::equal(bx::asin(xx), ::asinf(xx), 0.0001f) );
 	}
 
 	for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
 	{
-		REQUIRE(bx::equal(bx::cos(xx), ::cos(xx), 0.00001f) );
+		bx::writePrintf(writer, "sin(%f) == %f (expected: %f)\n", xx, bx::sin(xx), ::sinf(xx) );
+		REQUIRE(bx::equal(bx::sin(xx), ::sinf(xx), 0.00001f) );
+	}
+
+	for (float xx = -1.0f; xx < 1.0f; xx += 0.1f)
+	{
+		bx::writePrintf(writer, "sinh(%f) == %f (expected: %f)\n", xx, bx::sinh(xx), ::sinhf(xx) );
+		REQUIRE(bx::equal(bx::sinh(xx), ::sinhf(xx), 0.00001f) );
+	}
+
+	for (float xx = -1.0f; xx < 1.0f; xx += 0.001f)
+	{
+		bx::writePrintf(writer, "acos(%f) == %f (expected: %f\n)", xx, bx::acos(xx), ::acosf(xx) );
+		REQUIRE(bx::equal(bx::acos(xx), ::acosf(xx), 0.0001f) );
 	}
 
 	for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
 	{
-		REQUIRE(bx::equal(bx::tan(xx), ::tan(xx), 0.00001f) );
+		bx::writePrintf(writer, "cos(%f) == %f (expected: %f)\n", xx, bx::cos(xx), ::cosf(xx) );
+		REQUIRE(bx::equal(bx::cos(xx), ::cosf(xx), 0.00001f) );
+	}
+
+	for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
+	{
+		bx::writePrintf(writer, "tan(%f) == %f (expected: %f)\n", xx, bx::tan(xx), ::tanf(xx) );
+		REQUIRE(bx::equal(bx::tan(xx), ::tanf(xx), 0.001f) );
+	}
+
+	for (float xx = -1.0f; xx < 1.0f; xx += 0.1f)
+	{
+		bx::writePrintf(writer, "tanh(%f) == %f (expected: %f\n", xx, bx::tanh(xx), ::tanhf(xx) );
+		REQUIRE(bx::equal(bx::tanh(xx), ::tanhf(xx), 0.00001f) );
+	}
+
+	for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
+	{
+		bx::writePrintf(writer, "atan(%f) == %f (expected: %f)\n", xx, bx::atan(xx), ::atanf(xx) );
+		REQUIRE(bx::equal(bx::atan(xx), ::atanf(xx), 0.00001f) );
+	}
+
+	for (float yy = -100.0f; yy < 100.0f; yy += 0.1f)
+	{
+		for (float xx = -100.0f; xx < 100.0f; xx += 0.1f)
+		{
+			bx::writePrintf(writer, "atan2(%f, %f) == %f (expected: %f)\n", yy, xx, bx::atan2(yy, xx), ::atan2f(yy, xx) );
+			REQUIRE(bx::equal(bx::atan2(yy, xx), ::atan2f(yy, xx), 0.00001f) );
+		}
 	}
 }