[go: up one dir, main page]

glam 0.30.3

A simple and fast 3D math library for games and graphics
Documentation
{% macro impl_mat4_inverse() %}
    unsafe {
        // Based on https://github.com/g-truc/glm `glm_mat4_inverse`
        let fac0 = {
            let swp0a = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b11_11_11_11);
            let swp0b = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b10_10_10_10);

            let swp00 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b10_10_10_10);
            let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00);
            let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00);
            let swp03 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b11_11_11_11);

            let mul00 = _mm_mul_ps(swp00, swp01);
            let mul01 = _mm_mul_ps(swp02, swp03);
            _mm_sub_ps(mul00, mul01)
        };
        let fac1 = {
            let swp0a = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b11_11_11_11);
            let swp0b = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b01_01_01_01);

            let swp00 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b01_01_01_01);
            let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00);
            let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00);
            let swp03 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b11_11_11_11);

            let mul00 = _mm_mul_ps(swp00, swp01);
            let mul01 = _mm_mul_ps(swp02, swp03);
            _mm_sub_ps(mul00, mul01)
        };
        let fac2 = {
            let swp0a = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b10_10_10_10);
            let swp0b = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b01_01_01_01);

            let swp00 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b01_01_01_01);
            let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00);
            let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00);
            let swp03 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b10_10_10_10);

            let mul00 = _mm_mul_ps(swp00, swp01);
            let mul01 = _mm_mul_ps(swp02, swp03);
            _mm_sub_ps(mul00, mul01)
        };
        let fac3 = {
            let swp0a = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b11_11_11_11);
            let swp0b = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b00_00_00_00);

            let swp00 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b00_00_00_00);
            let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00);
            let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00);
            let swp03 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b11_11_11_11);

            let mul00 = _mm_mul_ps(swp00, swp01);
            let mul01 = _mm_mul_ps(swp02, swp03);
            _mm_sub_ps(mul00, mul01)
        };
        let fac4 = {
            let swp0a = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b10_10_10_10);
            let swp0b = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b00_00_00_00);

            let swp00 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b00_00_00_00);
            let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00);
            let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00);
            let swp03 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b10_10_10_10);

            let mul00 = _mm_mul_ps(swp00, swp01);
            let mul01 = _mm_mul_ps(swp02, swp03);
            _mm_sub_ps(mul00, mul01)
        };
        let fac5 = {
            let swp0a = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b01_01_01_01);
            let swp0b = _mm_shuffle_ps(self.w_axis.0, self.z_axis.0, 0b00_00_00_00);

            let swp00 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b00_00_00_00);
            let swp01 = _mm_shuffle_ps(swp0a, swp0a, 0b10_00_00_00);
            let swp02 = _mm_shuffle_ps(swp0b, swp0b, 0b10_00_00_00);
            let swp03 = _mm_shuffle_ps(self.z_axis.0, self.y_axis.0, 0b01_01_01_01);

            let mul00 = _mm_mul_ps(swp00, swp01);
            let mul01 = _mm_mul_ps(swp02, swp03);
            _mm_sub_ps(mul00, mul01)
        };
        let sign_a = _mm_set_ps(1.0, -1.0, 1.0, -1.0);
        let sign_b = _mm_set_ps(-1.0, 1.0, -1.0, 1.0);

        let temp0 = _mm_shuffle_ps(self.y_axis.0, self.x_axis.0, 0b00_00_00_00);
        let vec0 = _mm_shuffle_ps(temp0, temp0, 0b10_10_10_00);

        let temp1 = _mm_shuffle_ps(self.y_axis.0, self.x_axis.0, 0b01_01_01_01);
        let vec1 = _mm_shuffle_ps(temp1, temp1, 0b10_10_10_00);

        let temp2 = _mm_shuffle_ps(self.y_axis.0, self.x_axis.0, 0b10_10_10_10);
        let vec2 = _mm_shuffle_ps(temp2, temp2, 0b10_10_10_00);

        let temp3 = _mm_shuffle_ps(self.y_axis.0, self.x_axis.0, 0b11_11_11_11);
        let vec3 = _mm_shuffle_ps(temp3, temp3, 0b10_10_10_00);

        let mul00 = _mm_mul_ps(vec1, fac0);
        let mul01 = _mm_mul_ps(vec2, fac1);
        let mul02 = _mm_mul_ps(vec3, fac2);
        let sub00 = _mm_sub_ps(mul00, mul01);
        let add00 = _mm_add_ps(sub00, mul02);
        let inv0 = _mm_mul_ps(sign_b, add00);

        let mul03 = _mm_mul_ps(vec0, fac0);
        let mul04 = _mm_mul_ps(vec2, fac3);
        let mul05 = _mm_mul_ps(vec3, fac4);
        let sub01 = _mm_sub_ps(mul03, mul04);
        let add01 = _mm_add_ps(sub01, mul05);
        let inv1 = _mm_mul_ps(sign_a, add01);

        let mul06 = _mm_mul_ps(vec0, fac1);
        let mul07 = _mm_mul_ps(vec1, fac3);
        let mul08 = _mm_mul_ps(vec3, fac5);
        let sub02 = _mm_sub_ps(mul06, mul07);
        let add02 = _mm_add_ps(sub02, mul08);
        let inv2 = _mm_mul_ps(sign_b, add02);

        let mul09 = _mm_mul_ps(vec0, fac2);
        let mul10 = _mm_mul_ps(vec1, fac4);
        let mul11 = _mm_mul_ps(vec2, fac5);
        let sub03 = _mm_sub_ps(mul09, mul10);
        let add03 = _mm_add_ps(sub03, mul11);
        let inv3 = _mm_mul_ps(sign_a, add03);

        let row0 = _mm_shuffle_ps(inv0, inv1, 0b00_00_00_00);
        let row1 = _mm_shuffle_ps(inv2, inv3, 0b00_00_00_00);
        let row2 = _mm_shuffle_ps(row0, row1, 0b10_00_10_00);

        let dot0 = dot4(self.x_axis.0, row2);
        glam_assert!(dot0 != 0.0);

        let rcp0 = _mm_set1_ps(dot0.recip());

        Self {
            x_axis: Vec4(_mm_mul_ps(inv0, rcp0)),
            y_axis: Vec4(_mm_mul_ps(inv1, rcp0)),
            z_axis: Vec4(_mm_mul_ps(inv2, rcp0)),
            w_axis: Vec4(_mm_mul_ps(inv3, rcp0)),
        }
    }
{% endmacro impl_mat4_inverse %}