glam 0.20.3 - Docs.rs

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

use super::float::*;
use crate::core::{
    storage::XYZ,
    traits::{quaternion::Quaternion, scalar::*, vector::*},
};

impl Quaternion<f32> for __m128 {
    type SIMDVector3 = __m128;

    #[inline(always)]
    fn conjugate(self) -> Self {
        const SIGN: __m128 = const_f32x4!([-0.0, -0.0, -0.0, 0.0]);
        unsafe { _mm_xor_ps(self, SIGN) }
    }

    #[inline]
    fn lerp(self, end: Self, s: f32) -> Self {
        glam_assert!(FloatVector4::is_normalized(self));
        glam_assert!(FloatVector4::is_normalized(end));

        unsafe {
            const NEG_ZERO: __m128 = const_f32x4!([-0.0; 4]);
            let start = self;
            let end = end;
            let dot = Vector4::dot_into_vec(start, end);
            // Calculate the bias, if the dot product is positive or zero, there is no bias
            // but if it is negative, we want to flip the 'end' rotation XYZW components
            let bias = _mm_and_ps(dot, NEG_ZERO);
            let interpolated = _mm_add_ps(
                _mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
                start,
            );
            FloatVector4::normalize(interpolated)
        }
    }

    #[inline]
    fn slerp(self, end: Self, s: f32) -> Self {
        // http://number-none.com/product/Understanding%20Slerp,%20Then%20Not%20Using%20It/
        glam_assert!(FloatVector4::is_normalized(self));
        glam_assert!(FloatVector4::is_normalized(end));

        const DOT_THRESHOLD: f32 = 0.9995;

        let dot = Vector4::dot(self, end);

        if dot > DOT_THRESHOLD {
            // assumes lerp returns a normalized quaternion
            self.lerp(end, s)
        } else {
            // assumes scalar_acos clamps the input to [-1.0, 1.0]
            let theta = dot.acos_approx();

            let x = 1.0 - s;
            let y = s;
            let z = 1.0;

            unsafe {
                let tmp = _mm_mul_ps(_mm_set_ps1(theta), _mm_set_ps(0.0, z, y, x));
                let tmp = m128_sin(tmp);

                let scale1 = _mm_shuffle_ps(tmp, tmp, 0b00_00_00_00);
                let scale2 = _mm_shuffle_ps(tmp, tmp, 0b01_01_01_01);
                let theta_sin = _mm_shuffle_ps(tmp, tmp, 0b10_10_10_10);

                self.mul(scale1).add(end.mul(scale2)).div(theta_sin)
            }
        }
    }

    #[inline]
    fn mul_quaternion(self, other: Self) -> Self {
        glam_assert!(FloatVector4::is_normalized(self));
        glam_assert!(FloatVector4::is_normalized(other));
        unsafe {
            // Based on https://github.com/nfrechette/rtm `rtm::quat_mul`
            let lhs = self;
            let rhs = other;

            const CONTROL_WZYX: __m128 = const_f32x4!([1.0, -1.0, 1.0, -1.0]);
            const CONTROL_ZWXY: __m128 = const_f32x4!([1.0, 1.0, -1.0, -1.0]);
            const CONTROL_YXWZ: __m128 = const_f32x4!([-1.0, 1.0, 1.0, -1.0]);

            let r_xxxx = _mm_shuffle_ps(lhs, lhs, 0b00_00_00_00);
            let r_yyyy = _mm_shuffle_ps(lhs, lhs, 0b01_01_01_01);
            let r_zzzz = _mm_shuffle_ps(lhs, lhs, 0b10_10_10_10);
            let r_wwww = _mm_shuffle_ps(lhs, lhs, 0b11_11_11_11);

            let lxrw_lyrw_lzrw_lwrw = _mm_mul_ps(r_wwww, rhs);
            let l_wzyx = _mm_shuffle_ps(rhs, rhs, 0b00_01_10_11);

            let lwrx_lzrx_lyrx_lxrx = _mm_mul_ps(r_xxxx, l_wzyx);
            let l_zwxy = _mm_shuffle_ps(l_wzyx, l_wzyx, 0b10_11_00_01);

            let lwrx_nlzrx_lyrx_nlxrx = _mm_mul_ps(lwrx_lzrx_lyrx_lxrx, CONTROL_WZYX);

            let lzry_lwry_lxry_lyry = _mm_mul_ps(r_yyyy, l_zwxy);
            let l_yxwz = _mm_shuffle_ps(l_zwxy, l_zwxy, 0b00_01_10_11);

            let lzry_lwry_nlxry_nlyry = _mm_mul_ps(lzry_lwry_lxry_lyry, CONTROL_ZWXY);

            let lyrz_lxrz_lwrz_lzrz = _mm_mul_ps(r_zzzz, l_yxwz);
            let result0 = _mm_add_ps(lxrw_lyrw_lzrw_lwrw, lwrx_nlzrx_lyrx_nlxrx);

            let nlyrz_lxrz_lwrz_wlzrz = _mm_mul_ps(lyrz_lxrz_lwrz_lzrz, CONTROL_YXWZ);
            let result1 = _mm_add_ps(lzry_lwry_nlxry_nlyry, nlyrz_lxrz_lwrz_wlzrz);
            _mm_add_ps(result0, result1)
        }
    }

    #[inline]
    fn mul_vector3(self, other: XYZ<f32>) -> XYZ<f32> {
        self.mul_float4_as_vector3(other.into()).into()
    }

    #[inline]
    fn mul_float4_as_vector3(self, other: __m128) -> __m128 {
        glam_assert!(FloatVector4::is_normalized(self));
        unsafe {
            const TWO: __m128 = const_f32x4!([2.0; 4]);
            let w = _mm_shuffle_ps(self, self, 0b11_11_11_11);
            let b = self;
            let b2 = Vector3::dot_into_vec(b, b);
            other
                .mul(w.mul(w).sub(b2))
                .add(b.mul(Vector3::dot_into_vec(other, b).mul(TWO)))
                .add(b.cross(other).mul(w.mul(TWO)))
        }
    }
}