#[cfg(feature = "num-traits")]
use num_traits::Float;
#[inline]
pub(crate) fn scalar_sin_cos(x: f32) -> (f32, f32) {
x.sin_cos()
}
#[inline]
pub fn scalar_acos(value: f32) -> f32 {
let nonnegative = value >= 0.0;
let x = value.abs();
let mut omx = 1.0 - x;
if omx < 0.0 {
omx = 0.0;
}
let root = omx.sqrt();
#[allow(clippy::approx_constant)]
let mut result =
((((((-0.001_262_491_1 * x + 0.006_670_09) * x - 0.017_088_126) * x + 0.030_891_88) * x
- 0.050_174_303)
* x
+ 0.088_978_99)
* x
- 0.214_598_8)
* x
+ 1.570_796_3;
result *= root;
if nonnegative {
result
} else {
core::f32::consts::PI - result
}
}
#[cfg(vec4_sse2)]
#[allow(clippy::excessive_precision)]
pub(crate) mod sse2 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::f32::cast::UnionCast;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
macro_rules! _ps_const_ty {
($name:ident, $field:ident, $x:expr) => {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
const $name: UnionCast = UnionCast {
$field: [$x, $x, $x, $x],
};
};
($name:ident, $field:ident, $x:expr, $y:expr, $z:expr, $w:expr) => {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
const $name: UnionCast = UnionCast {
$field: [$x, $y, $z, $w],
};
};
}
_ps_const_ty!(PS_INV_SIGN_MASK, u32x4, !0x8000_0000);
_ps_const_ty!(PS_SIGN_MASK, u32x4, 0x8000_0000);
_ps_const_ty!(PS_NO_FRACTION, f32x4, 8388608.0);
_ps_const_ty!(PS_NEGATIVE_ZERO, u32x4, 0x80000000);
_ps_const_ty!(PS_PI, f32x4, core::f32::consts::PI);
_ps_const_ty!(PS_HALF_PI, f32x4, core::f32::consts::FRAC_PI_2);
_ps_const_ty!(
PS_SIN_COEFFICIENTS0,
f32x4,
-0.16666667,
0.0083333310,
-0.00019840874,
2.7525562e-06
);
_ps_const_ty!(
PS_SIN_COEFFICIENTS1,
f32x4,
-2.3889859e-08,
-0.16665852,
0.0083139502,
-0.00018524670
);
_ps_const_ty!(PS_ONE, f32x4, 1.0);
_ps_const_ty!(PS_TWO_PI, f32x4, core::f32::consts::PI * 2.0);
_ps_const_ty!(PS_RECIPROCAL_TWO_PI, f32x4, 0.159154943);
#[cfg(target_feature = "fma")]
macro_rules! m128_mul_add {
($a:expr, $b:expr, $c:expr) => {
_mm_fmadd_ps($a, $b, $c)
};
}
#[cfg(not(target_feature = "fma"))]
macro_rules! m128_mul_add {
($a:expr, $b:expr, $c:expr) => {
_mm_add_ps(_mm_mul_ps($a, $b), $c)
};
}
#[cfg(target_feature = "fma")]
macro_rules! m128_neg_mul_sub {
($a:expr, $b:expr, $c:expr) => {
_mm_fnmadd_ps($a, $b, $c)
};
}
#[cfg(not(target_feature = "fma"))]
macro_rules! m128_neg_mul_sub {
($a:expr, $b:expr, $c:expr) => {
_mm_sub_ps($c, _mm_mul_ps($a, $b))
};
}
#[inline]
pub(crate) unsafe fn m128_round(v: __m128) -> __m128 {
let sign = _mm_and_ps(v, PS_SIGN_MASK.m128);
let s_magic = _mm_or_ps(PS_NO_FRACTION.m128, sign);
let r1 = _mm_add_ps(v, s_magic);
let r1 = _mm_sub_ps(r1, s_magic);
let r2 = _mm_and_ps(v, PS_INV_SIGN_MASK.m128);
let mask = _mm_cmple_ps(r2, PS_NO_FRACTION.m128);
let r2 = _mm_andnot_ps(mask, v);
let r1 = _mm_and_ps(r1, mask);
_mm_xor_ps(r1, r2)
}
#[inline]
pub(crate) unsafe fn m128_floor(v: __m128) -> __m128 {
let test = _mm_and_si128(_mm_castps_si128(v), PS_INV_SIGN_MASK.m128i);
let test = _mm_cmplt_epi32(test, PS_NO_FRACTION.m128i);
let vint = _mm_cvttps_epi32(v);
let result = _mm_cvtepi32_ps(vint);
let larger = _mm_cmpgt_ps(result, v);
let larger = _mm_cvtepi32_ps(_mm_castps_si128(larger));
let result = _mm_add_ps(result, larger);
let result = _mm_and_ps(result, _mm_castsi128_ps(test));
let test = _mm_andnot_si128(test, _mm_castps_si128(v));
_mm_or_ps(result, _mm_castsi128_ps(test))
}
#[inline]
pub(crate) unsafe fn m128_ceil(v: __m128) -> __m128 {
let test = _mm_and_si128(_mm_castps_si128(v), PS_INV_SIGN_MASK.m128i);
let test = _mm_cmplt_epi32(test, PS_NO_FRACTION.m128i);
let vint = _mm_cvttps_epi32(v);
let result = _mm_cvtepi32_ps(vint);
let smaller = _mm_cmplt_ps(result, v);
let smaller = _mm_cvtepi32_ps(_mm_castps_si128(smaller));
let result = _mm_sub_ps(result, smaller);
let result = _mm_and_ps(result, _mm_castsi128_ps(test));
let test = _mm_andnot_si128(test, _mm_castps_si128(v));
_mm_or_ps(result, _mm_castsi128_ps(test))
}
#[inline]
pub(crate) unsafe fn m128_mod_angles(angles: __m128) -> __m128 {
let v = _mm_mul_ps(angles, PS_RECIPROCAL_TWO_PI.m128);
let v = m128_round(v);
m128_neg_mul_sub!(PS_TWO_PI.m128, v, angles)
}
#[inline]
pub(crate) unsafe fn m128_sin(v: __m128) -> __m128 {
let mut x = m128_mod_angles(v);
let sign = _mm_and_ps(x, PS_NEGATIVE_ZERO.m128);
let c = _mm_or_ps(PS_PI.m128, sign);
let absx = _mm_andnot_ps(sign, x);
let rflx = _mm_sub_ps(c, x);
let comp = _mm_cmple_ps(absx, PS_HALF_PI.m128);
let select0 = _mm_and_ps(comp, x);
let select1 = _mm_andnot_ps(comp, rflx);
x = _mm_or_ps(select0, select1);
let x2 = _mm_mul_ps(x, x);
const SC1: __m128 = unsafe { PS_SIN_COEFFICIENTS1.m128 };
let v_constants_b = _mm_shuffle_ps(SC1, SC1, 0b00_00_00_00);
const SC0: __m128 = unsafe { PS_SIN_COEFFICIENTS0.m128 };
let mut v_constants = _mm_shuffle_ps(SC0, SC0, 0b11_11_11_11);
let mut result = m128_mul_add!(v_constants_b, x2, v_constants);
v_constants = _mm_shuffle_ps(SC0, SC0, 0b10_10_10_10);
result = m128_mul_add!(result, x2, v_constants);
v_constants = _mm_shuffle_ps(SC0, SC0, 0b01_01_01_01);
result = m128_mul_add!(result, x2, v_constants);
v_constants = _mm_shuffle_ps(SC0, SC0, 0b00_00_00_00);
result = m128_mul_add!(result, x2, v_constants);
result = m128_mul_add!(result, x2, PS_ONE.m128);
result = _mm_mul_ps(result, x);
result
}
}
#[cfg(test)]
macro_rules! assert_approx_eq {
($a:expr, $b:expr) => {{
assert_approx_eq!($a, $b, core::f32::EPSILON);
}};
($a:expr, $b:expr, $eps:expr) => {{
let (a, b) = (&$a, &$b);
let eps = $eps;
assert!(
(a - b).abs() <= eps,
"assertion failed: `(left !== right)` \
(left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)",
*a,
*b,
eps,
(a - b).abs()
);
}};
}
#[cfg(test)]
macro_rules! assert_relative_eq {
($a:expr, $b:expr) => {{
assert_relative_eq!($a, $b, core::f32::EPSILON);
}};
($a:expr, $b:expr, $eps:expr) => {{
let (a, b) = (&$a, &$b);
let eps = $eps;
let diff = (a - b).abs();
let largest = a.abs().max(b.abs());
assert!(
diff <= largest * eps,
"assertion failed: `(left !== right)` \
(left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)",
*a,
*b,
largest * eps,
diff
);
}};
}
#[test]
fn test_scalar_acos() {
fn test_scalar_acos_angle(a: f32) {
assert_relative_eq!(scalar_acos(a), a.acos(), 1e-6);
}
const MAX_TESTS: u32 = 1024 / 2;
const SIGN: u32 = 0x80_00_00_00;
const PTVE_ONE: u32 = 0x3f_80_00_00; const NGVE_ONE: u32 = SIGN | PTVE_ONE;
const STEP_SIZE: usize = (PTVE_ONE / MAX_TESTS) as usize;
for f in (SIGN..=NGVE_ONE)
.step_by(STEP_SIZE)
.map(|i| f32::from_bits(i))
{
test_scalar_acos_angle(f);
}
for f in (0..=PTVE_ONE).step_by(STEP_SIZE).map(|i| f32::from_bits(i)) {
test_scalar_acos_angle(f);
}
assert_approx_eq!(scalar_acos(2.0), 0.0);
assert_approx_eq!(scalar_acos(-2.0), core::f32::consts::PI);
}
#[test]
fn test_scalar_sin_cos() {
fn test_scalar_sin_cos_angle(a: f32) {
let (s1, c1) = scalar_sin_cos(a);
let (s2, c2) = a.sin_cos();
assert_approx_eq!(s1, s2);
assert_approx_eq!(c1, c2);
}
const MAX_TESTS: u32 = 1024 / 2;
const SIGN: u32 = 0x80_00_00_00;
let ptve_pi = core::f32::consts::PI.to_bits();
let ngve_pi = SIGN | ptve_pi;
let step_pi = (ptve_pi / MAX_TESTS) as usize;
for f in (SIGN..=ngve_pi).step_by(step_pi).map(|i| f32::from_bits(i)) {
test_scalar_sin_cos_angle(f);
}
for f in (0..=ptve_pi).step_by(step_pi).map(|i| f32::from_bits(i)) {
test_scalar_sin_cos_angle(f);
}
let ptve_inf = core::f32::INFINITY.to_bits();
let ngve_inf = core::f32::NEG_INFINITY.to_bits();
let step_inf = (ptve_inf / MAX_TESTS) as usize;
for f in (SIGN..ngve_inf)
.step_by(step_inf)
.map(|i| f32::from_bits(i))
{
test_scalar_sin_cos_angle(f);
}
for f in (0..ptve_inf).step_by(step_inf).map(|i| f32::from_bits(i)) {
test_scalar_sin_cos_angle(f);
}
let (s, c) = scalar_sin_cos(core::f32::INFINITY);
assert!(s.is_nan());
assert!(c.is_nan());
let (s, c) = scalar_sin_cos(core::f32::NEG_INFINITY);
assert!(s.is_nan());
assert!(c.is_nan());
}
#[test]
#[cfg(vec4_sse2)]
fn test_sse2_m128_sin() {
use crate::Vec4;
use core::f32::consts::PI;
fn test_sse2_m128_sin_angle(a: f32) {
let v = Vec4::splat(a);
let v = unsafe { Vec4(sse2::m128_sin(v.0)) };
let a_sin = a.sin();
assert_approx_eq!(v.x, a_sin, 1e-6);
assert_approx_eq!(v.z, a_sin, 1e-6);
assert_approx_eq!(v.y, a_sin, 1e-6);
assert_approx_eq!(v.w, a_sin, 1e-6);
}
let mut a = -PI;
let end = PI;
let step = PI / 8192.0;
while a <= end {
test_sse2_m128_sin_angle(a);
a += step;
}
}