1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f32x8 { avx: m256 }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f32x8 { a : f32x4, b : f32x4 }
12 }
13}
14
15macro_rules! const_f32_as_f32x8 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f32x8 = f32x8::new([$f; 8]);
19 };
20}
21
22impl f32x8 {
23 const_f32_as_f32x8!(ONE, 1.0);
24 const_f32_as_f32x8!(HALF, 0.5);
25 const_f32_as_f32x8!(ZERO, 0.0);
26 const_f32_as_f32x8!(E, core::f32::consts::E);
27 const_f32_as_f32x8!(FRAC_1_PI, core::f32::consts::FRAC_1_PI);
28 const_f32_as_f32x8!(FRAC_2_PI, core::f32::consts::FRAC_2_PI);
29 const_f32_as_f32x8!(FRAC_2_SQRT_PI, core::f32::consts::FRAC_2_SQRT_PI);
30 const_f32_as_f32x8!(FRAC_1_SQRT_2, core::f32::consts::FRAC_1_SQRT_2);
31 const_f32_as_f32x8!(FRAC_PI_2, core::f32::consts::FRAC_PI_2);
32 const_f32_as_f32x8!(FRAC_PI_3, core::f32::consts::FRAC_PI_3);
33 const_f32_as_f32x8!(FRAC_PI_4, core::f32::consts::FRAC_PI_4);
34 const_f32_as_f32x8!(FRAC_PI_6, core::f32::consts::FRAC_PI_6);
35 const_f32_as_f32x8!(FRAC_PI_8, core::f32::consts::FRAC_PI_8);
36 const_f32_as_f32x8!(LN_2, core::f32::consts::LN_2);
37 const_f32_as_f32x8!(LN_10, core::f32::consts::LN_10);
38 const_f32_as_f32x8!(LOG2_E, core::f32::consts::LOG2_E);
39 const_f32_as_f32x8!(LOG10_E, core::f32::consts::LOG10_E);
40 const_f32_as_f32x8!(LOG10_2, core::f32::consts::LOG10_2);
41 const_f32_as_f32x8!(LOG2_10, core::f32::consts::LOG2_10);
42 const_f32_as_f32x8!(PI, core::f32::consts::PI);
43 const_f32_as_f32x8!(SQRT_2, core::f32::consts::SQRT_2);
44 const_f32_as_f32x8!(TAU, core::f32::consts::TAU);
45}
46
47unsafe impl Zeroable for f32x8 {}
48unsafe impl Pod for f32x8 {}
49
50impl AlignTo for f32x8 {
51 type Elem = f32;
52}
53
54impl Add for f32x8 {
55 type Output = Self;
56 #[inline]
57 fn add(self, rhs: Self) -> Self::Output {
58 pick! {
59 if #[cfg(target_feature="avx")] {
60 Self { avx: add_m256(self.avx, rhs.avx) }
61 } else {
62 Self {
63 a : self.a.add(rhs.a),
64 b : self.b.add(rhs.b),
65 }
66 }
67 }
68 }
69}
70
71impl Sub for f32x8 {
72 type Output = Self;
73 #[inline]
74 fn sub(self, rhs: Self) -> Self::Output {
75 pick! {
76 if #[cfg(target_feature="avx")] {
77 Self { avx: sub_m256(self.avx, rhs.avx) }
78 } else {
79 Self {
80 a : self.a.sub(rhs.a),
81 b : self.b.sub(rhs.b),
82 }
83 }
84 }
85 }
86}
87
88impl Mul for f32x8 {
89 type Output = Self;
90 #[inline]
91 fn mul(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="avx")] {
94 Self { avx: mul_m256(self.avx, rhs.avx) }
95 } else {
96 Self {
97 a : self.a.mul(rhs.a),
98 b : self.b.mul(rhs.b),
99 }
100 }
101 }
102 }
103}
104
105impl Div for f32x8 {
106 type Output = Self;
107 #[inline]
108 fn div(self, rhs: Self) -> Self::Output {
109 pick! {
110 if #[cfg(target_feature="avx")] {
111 Self { avx: div_m256(self.avx, rhs.avx) }
112 } else {
113 Self {
114 a : self.a.div(rhs.a),
115 b : self.b.div(rhs.b),
116 }
117 }
118 }
119 }
120}
121
122impl Add<f32> for f32x8 {
123 type Output = Self;
124 #[inline]
125 fn add(self, rhs: f32) -> Self::Output {
126 self.add(Self::splat(rhs))
127 }
128}
129
130impl Sub<f32> for f32x8 {
131 type Output = Self;
132 #[inline]
133 fn sub(self, rhs: f32) -> Self::Output {
134 self.sub(Self::splat(rhs))
135 }
136}
137
138impl Mul<f32> for f32x8 {
139 type Output = Self;
140 #[inline]
141 fn mul(self, rhs: f32) -> Self::Output {
142 self.mul(Self::splat(rhs))
143 }
144}
145
146impl Div<f32> for f32x8 {
147 type Output = Self;
148 #[inline]
149 fn div(self, rhs: f32) -> Self::Output {
150 self.div(Self::splat(rhs))
151 }
152}
153
154impl Add<f32x8> for f32 {
155 type Output = f32x8;
156 #[inline]
157 fn add(self, rhs: f32x8) -> Self::Output {
158 f32x8::splat(self).add(rhs)
159 }
160}
161
162impl Sub<f32x8> for f32 {
163 type Output = f32x8;
164 #[inline]
165 fn sub(self, rhs: f32x8) -> Self::Output {
166 f32x8::splat(self).sub(rhs)
167 }
168}
169
170impl Mul<f32x8> for f32 {
171 type Output = f32x8;
172 #[inline]
173 fn mul(self, rhs: f32x8) -> Self::Output {
174 f32x8::splat(self).mul(rhs)
175 }
176}
177
178impl Div<f32x8> for f32 {
179 type Output = f32x8;
180 #[inline]
181 fn div(self, rhs: f32x8) -> Self::Output {
182 f32x8::splat(self).div(rhs)
183 }
184}
185
186impl BitAnd for f32x8 {
187 type Output = Self;
188 #[inline]
189 fn bitand(self, rhs: Self) -> Self::Output {
190 pick! {
191 if #[cfg(target_feature="avx")] {
192 Self { avx: bitand_m256(self.avx, rhs.avx) }
193 } else {
194 Self {
195 a : self.a.bitand(rhs.a),
196 b : self.b.bitand(rhs.b),
197 }
198 }
199 }
200 }
201}
202
203impl BitOr for f32x8 {
204 type Output = Self;
205 #[inline]
206 fn bitor(self, rhs: Self) -> Self::Output {
207 pick! {
208 if #[cfg(target_feature="avx")] {
209 Self { avx: bitor_m256(self.avx, rhs.avx) }
210 } else {
211 Self {
212 a : self.a.bitor(rhs.a),
213 b : self.b.bitor(rhs.b),
214 }
215 }
216 }
217 }
218}
219
220impl BitXor for f32x8 {
221 type Output = Self;
222 #[inline]
223 fn bitxor(self, rhs: Self) -> Self::Output {
224 pick! {
225 if #[cfg(target_feature="avx")] {
226 Self { avx: bitxor_m256(self.avx, rhs.avx) }
227 } else {
228 Self {
229 a : self.a.bitxor(rhs.a),
230 b : self.b.bitxor(rhs.b),
231 }
232 }
233 }
234 }
235}
236
237impl CmpEq for f32x8 {
238 type Output = Self;
239 #[inline]
240 fn simd_eq(self, rhs: Self) -> Self::Output {
241 pick! {
242 if #[cfg(target_feature="avx")] {
243 Self { avx: cmp_op_mask_m256::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
244 } else {
245 Self {
246 a : self.a.simd_eq(rhs.a),
247 b : self.b.simd_eq(rhs.b),
248 }
249 }
250 }
251 }
252}
253
254impl CmpGe for f32x8 {
255 type Output = Self;
256 #[inline]
257 fn simd_ge(self, rhs: Self) -> Self::Output {
258 pick! {
259 if #[cfg(target_feature="avx")] {
260 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
261 } else {
262 Self {
263 a : self.a.simd_ge(rhs.a),
264 b : self.b.simd_ge(rhs.b),
265 }
266 }
267 }
268 }
269}
270
271impl CmpGt for f32x8 {
272 type Output = Self;
273 #[inline]
274 fn simd_gt(self, rhs: Self) -> Self::Output {
275 pick! {
276 if #[cfg(target_feature="avx")] {
277 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterThanOrdered)}>(self.avx, rhs.avx) }
278 } else {
279 Self {
280 a : self.a.simd_gt(rhs.a),
281 b : self.b.simd_gt(rhs.b),
282 }
283 }
284 }
285 }
286}
287
288impl CmpNe for f32x8 {
289 type Output = Self;
290 #[inline]
291 fn simd_ne(self, rhs: Self) -> Self::Output {
292 pick! {
293 if #[cfg(target_feature="avx")] {
294 Self { avx: cmp_op_mask_m256::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
295 } else {
296 Self {
297 a : self.a.simd_ne(rhs.a),
298 b : self.b.simd_ne(rhs.b),
299 }
300 }
301 }
302 }
303}
304
305impl CmpLe for f32x8 {
306 type Output = Self;
307 #[inline]
308 fn simd_le(self, rhs: Self) -> Self::Output {
309 pick! {
310 if #[cfg(target_feature="avx")] {
311 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
312 } else {
313 Self {
314 a : self.a.simd_le(rhs.a),
315 b : self.b.simd_le(rhs.b),
316 }
317 }
318 }
319 }
320}
321
322impl CmpLt for f32x8 {
323 type Output = Self;
324 #[inline]
325 fn simd_lt(self, rhs: Self) -> Self::Output {
326 pick! {
327 if #[cfg(target_feature="avx")] {
328 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
329 } else {
330 Self {
331 a : self.a.simd_lt(rhs.a),
332 b : self.b.simd_lt(rhs.b),
333 }
334 }
335 }
336 }
337}
338
339impl f32x8 {
340 #[inline]
341 #[must_use]
342 pub const fn new(array: [f32; 8]) -> Self {
343 unsafe { core::mem::transmute(array) }
344 }
345 #[inline]
346 #[must_use]
347 pub fn blend(self, t: Self, f: Self) -> Self {
348 pick! {
349 if #[cfg(target_feature="avx")] {
350 Self { avx: blend_varying_m256(f.avx, t.avx, self.avx) }
351 } else {
352 Self {
353 a : self.a.blend(t.a, f.a),
354 b : self.b.blend(t.b, f.b),
355 }
356 }
357 }
358 }
359 #[inline]
360 #[must_use]
361 pub fn abs(self) -> Self {
362 pick! {
363 if #[cfg(target_feature="avx")] {
364 let non_sign_bits = f32x8::from(f32::from_bits(i32::MAX as u32));
365 self & non_sign_bits
366 } else {
367 Self {
368 a : self.a.abs(),
369 b : self.b.abs(),
370 }
371 }
372 }
373 }
374 #[inline]
375 #[must_use]
376 pub fn floor(self) -> Self {
377 pick! {
378 if #[cfg(target_feature="avx")] {
379 Self { avx: floor_m256(self.avx) }
380 } else {
381 Self {
382 a : self.a.floor(),
383 b : self.b.floor(),
384 }
385 }
386 }
387 }
388 #[inline]
389 #[must_use]
390 pub fn ceil(self) -> Self {
391 pick! {
392 if #[cfg(target_feature="avx")] {
393 Self { avx: ceil_m256(self.avx) }
394 } else {
395 Self {
396 a : self.a.ceil(),
397 b : self.b.ceil(),
398 }
399 }
400 }
401 }
402
403 #[inline]
407 #[must_use]
408 pub fn fast_max(self, rhs: Self) -> Self {
409 pick! {
410 if #[cfg(target_feature="avx")] {
411 Self { avx: max_m256(self.avx, rhs.avx) }
412 } else {
413 Self {
414 a : self.a.fast_max(rhs.a),
415 b : self.b.fast_max(rhs.b),
416 }
417 }
418 }
419 }
420
421 #[inline]
424 #[must_use]
425 pub fn max(self, rhs: Self) -> Self {
426 pick! {
427 if #[cfg(target_feature="avx")] {
428 rhs.is_nan().blend(self, Self { avx: max_m256(self.avx, rhs.avx) })
432 } else {
433 Self {
434 a : self.a.max(rhs.a),
435 b : self.b.max(rhs.b),
436 }
437 }
438
439 }
440 }
441
442 #[inline]
446 #[must_use]
447 pub fn fast_min(self, rhs: Self) -> Self {
448 pick! {
449 if #[cfg(target_feature="avx")] {
450 Self { avx: min_m256(self.avx, rhs.avx) }
451 } else {
452 Self {
453 a : self.a.fast_min(rhs.a),
454 b : self.b.fast_min(rhs.b),
455 }
456 }
457 }
458 }
459
460 #[inline]
464 #[must_use]
465 pub fn min(self, rhs: Self) -> Self {
466 pick! {
467 if #[cfg(target_feature="avx")] {
468 rhs.is_nan().blend(self, Self { avx: min_m256(self.avx, rhs.avx) })
472 } else {
473 Self {
474 a : self.a.min(rhs.a),
475 b : self.b.min(rhs.b),
476 }
477 }
478 }
479 }
480 #[inline]
481 #[must_use]
482 pub fn is_nan(self) -> Self {
483 pick! {
484 if #[cfg(target_feature="avx")] {
485 Self { avx: cmp_op_mask_m256::<{cmp_op!(Unordered)}>(self.avx, self.avx) }
486 } else {
487 Self {
488 a : self.a.is_nan(),
489 b : self.b.is_nan(),
490 }
491 }
492 }
493 }
494 #[inline]
495 #[must_use]
496 pub fn is_finite(self) -> Self {
497 let shifted_exp_mask = u32x8::from(0xFF000000);
498 let u: u32x8 = cast(self);
499 let shift_u = u << 1_u64;
500 let out = !(shift_u & shifted_exp_mask).simd_eq(shifted_exp_mask);
501 cast(out)
502 }
503 #[inline]
504 #[must_use]
505 pub fn is_inf(self) -> Self {
506 let shifted_inf = u32x8::from(0xFF000000);
507 let u: u32x8 = cast(self);
508 let shift_u = u << 1_u64;
509 let out = (shift_u).simd_eq(shifted_inf);
510 cast(out)
511 }
512
513 #[inline]
514 #[must_use]
515 pub fn round(self) -> Self {
516 pick! {
517 if #[cfg(target_feature="avx")] {
519 Self { avx: round_m256::<{round_op!(Nearest)}>(self.avx) }
520 } else {
521 Self {
522 a : self.a.round(),
523 b : self.b.round(),
524 }
525 }
526 }
527 }
528
529 #[inline]
533 #[must_use]
534 pub fn fast_round_int(self) -> i32x8 {
535 pick! {
536 if #[cfg(target_feature="avx")] {
537 cast(convert_to_i32_m256i_from_m256(self.avx))
538 } else {
539 cast([
540 self.a.fast_round_int(),
541 self.b.fast_round_int()])
542 }
543 }
544 }
545
546 #[inline]
550 #[must_use]
551 pub fn round_int(self) -> i32x8 {
552 pick! {
553 if #[cfg(target_feature="avx")] {
554 let non_nan_mask = self.simd_eq(self);
556 let non_nan = self & non_nan_mask;
557 let flip_to_max: i32x8 = cast(self.simd_ge(Self::splat(2147483648.0)));
558 let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
559 flip_to_max ^ cast
560 } else {
561 cast([
562 self.a.round_int(),
563 self.b.round_int(),
564 ])
565 }
566 }
567 }
568
569 #[inline]
573 #[must_use]
574 pub fn fast_trunc_int(self) -> i32x8 {
575 pick! {
576 if #[cfg(all(target_feature="avx"))] {
577 cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
578 } else {
579 cast([
580 self.a.fast_trunc_int(),
581 self.b.fast_trunc_int(),
582 ])
583 }
584 }
585 }
586
587 #[inline]
591 #[must_use]
592 pub fn trunc_int(self) -> i32x8 {
593 pick! {
594 if #[cfg(target_feature="avx")] {
595 let non_nan_mask = self.simd_eq(self);
597 let non_nan = self & non_nan_mask;
598 let flip_to_max: i32x8 = cast(self.simd_ge(Self::splat(2147483648.0)));
599 let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
600 flip_to_max ^ cast
601 } else {
602 cast([
603 self.a.trunc_int(),
604 self.b.trunc_int(),
605 ])
606 }
607 }
608 }
609 #[inline]
634 #[must_use]
635 pub fn mul_add(self, m: Self, a: Self) -> Self {
636 pick! {
637 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
638 Self { avx: fused_mul_add_m256(self.avx, m.avx, a.avx) }
639 } else if #[cfg(target_feature="avx")] {
640 (self * m) + a
642 } else {
643 Self {
644 a : self.a.mul_add(m.a, a.a),
645 b : self.b.mul_add(m.b, a.b),
646 }
647 }
648 }
649 }
650
651 #[inline]
676 #[must_use]
677 pub fn mul_sub(self, m: Self, s: Self) -> Self {
678 pick! {
679 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
680 Self { avx: fused_mul_sub_m256(self.avx, m.avx, s.avx) }
681 } else if #[cfg(target_feature="avx")] {
682 (self * m) - s
684 } else {
685 Self {
686 a : self.a.mul_sub(m.a, s.a),
687 b : self.b.mul_sub(m.b, s.b),
688 }
689 }
690 }
691 }
692
693 #[inline]
718 #[must_use]
719 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
720 pick! {
721 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
722 Self { avx: fused_mul_neg_add_m256(self.avx, m.avx, a.avx) }
723 } else if #[cfg(target_feature="avx")] {
724 a - (self * m)
726 } else {
727 Self {
728 a : self.a.mul_neg_add(m.a, a.a),
729 b : self.b.mul_neg_add(m.b, a.b),
730 }
731 }
732 }
733 }
734
735 #[inline]
760 #[must_use]
761 pub fn mul_neg_sub(self, m: Self, s: Self) -> Self {
762 pick! {
763 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
764 Self { avx: fused_mul_neg_sub_m256(self.avx, m.avx, s.avx) }
765 } else if #[cfg(target_feature="avx")] {
766 -(self * m) - s
768 } else {
769 Self {
770 a : self.a.mul_neg_sub(m.a, s.a),
771 b : self.b.mul_neg_sub(m.b, s.b),
772 }
773 }
774 }
775 }
776
777 #[inline]
778 #[must_use]
779 pub fn flip_signs(self, signs: Self) -> Self {
780 self ^ (signs & Self::from(-0.0))
781 }
782
783 #[inline]
784 #[must_use]
785 pub fn copysign(self, sign: Self) -> Self {
786 let magnitude_mask = Self::from(f32::from_bits(u32::MAX >> 1));
787 (self & magnitude_mask) | (sign & Self::from(-0.0))
788 }
789
790 #[inline]
791 pub fn asin_acos(self) -> (Self, Self) {
792 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
795 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
796 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
797 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
798 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
799
800 let xa = self.abs();
801 let big = xa.simd_ge(f32x8::splat(0.5));
802
803 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
804 let x2 = xa * xa;
805 let x3 = big.blend(x1, x2);
806
807 let xb = x1.sqrt();
808
809 let x4 = big.blend(xb, xa);
810
811 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
812 let z = z.mul_add(x3 * x4, x4);
813
814 let z1 = z + z;
815
816 let z3 = self.simd_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
818 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
819 let acos = big.blend(z3, z4);
820
821 let z3 = f32x8::FRAC_PI_2 - z1;
823 let asin = big.blend(z3, z);
824 let asin = asin.flip_signs(self);
825
826 (asin, acos)
827 }
828
829 #[inline]
830 #[must_use]
831 pub fn asin(self) -> Self {
832 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
835 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
836 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
837 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
838 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
839
840 let xa = self.abs();
841 let big = xa.simd_ge(f32x8::splat(0.5));
842
843 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
844 let x2 = xa * xa;
845 let x3 = big.blend(x1, x2);
846
847 let xb = x1.sqrt();
848
849 let x4 = big.blend(xb, xa);
850
851 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
852 let z = z.mul_add(x3 * x4, x4);
853
854 let z1 = z + z;
855
856 let z3 = f32x8::FRAC_PI_2 - z1;
858 let asin = big.blend(z3, z);
859 let asin = asin.flip_signs(self);
860
861 asin
862 }
863
864 #[inline]
865 #[must_use]
866 pub fn acos(self) -> Self {
867 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
870 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
871 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
872 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
873 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
874
875 let xa = self.abs();
876 let big = xa.simd_ge(f32x8::splat(0.5));
877
878 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
879 let x2 = xa * xa;
880 let x3 = big.blend(x1, x2);
881
882 let xb = x1.sqrt();
883
884 let x4 = big.blend(xb, xa);
885
886 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
887 let z = z.mul_add(x3 * x4, x4);
888
889 let z1 = z + z;
890
891 let z3 = self.simd_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
893 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
894 let acos = big.blend(z3, z4);
895
896 acos
897 }
898
899 #[inline]
900 pub fn atan(self) -> Self {
901 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
904 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
905 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
906 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
907
908 let t = self.abs();
909
910 let notsmal = t.simd_ge(Self::SQRT_2 - Self::ONE);
914 let notbig = t.simd_le(Self::SQRT_2 + Self::ONE);
915
916 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
917 s = notsmal & s;
918
919 let mut a = notbig & t;
920 a = notsmal.blend(a - Self::ONE, a);
921 let mut b = notbig & Self::ONE;
922 b = notsmal.blend(b + t, b);
923 let z = a / b;
924
925 let zz = z * z;
926
927 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
929 re = re.mul_add(zz * z, z) + s;
930
931 re = (self.sign_bit()).blend(-re, re);
933
934 re
935 }
936
937 #[inline]
938 pub fn atan2(self, x: Self) -> Self {
939 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
942 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
943 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
944 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
945
946 let y = self;
947
948 let x1 = x.abs();
950 let y1 = y.abs();
951 let swapxy = y1.simd_gt(x1);
952 let mut x2 = swapxy.blend(y1, x1);
954 let mut y2 = swapxy.blend(x1, y1);
955
956 let both_infinite = x.is_inf() & y.is_inf();
958 if both_infinite.any() {
959 let minus_one = -Self::ONE;
960 x2 = both_infinite.blend(x2 & minus_one, x2);
961 y2 = both_infinite.blend(y2 & minus_one, y2);
962 }
963
964 let t = y2 / x2;
966
967 let notsmal = t.simd_ge(Self::SQRT_2 - Self::ONE);
970
971 let a = notsmal.blend(t - Self::ONE, t);
972 let b = notsmal.blend(t + Self::ONE, Self::ONE);
973 let s = notsmal & Self::FRAC_PI_4;
974 let z = a / b;
975
976 let zz = z * z;
977
978 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
980 re = re.mul_add(zz * z, z) + s;
981
982 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
984 re = ((x | y).simd_eq(Self::ZERO)).blend(Self::ZERO, re);
985 re = (x.sign_bit()).blend(Self::PI - re, re);
986
987 re = (y.sign_bit()).blend(-re, re);
989
990 re
991 }
992
993 #[inline]
994 #[must_use]
995 pub fn sin_cos(self) -> (Self, Self) {
996 const_f32_as_f32x8!(DP1F, 0.78515625_f32 * 2.0);
1000 const_f32_as_f32x8!(DP2F, 2.4187564849853515625E-4_f32 * 2.0);
1001 const_f32_as_f32x8!(DP3F, 3.77489497744594108E-8_f32 * 2.0);
1002
1003 const_f32_as_f32x8!(P0sinf, -1.6666654611E-1);
1004 const_f32_as_f32x8!(P1sinf, 8.3321608736E-3);
1005 const_f32_as_f32x8!(P2sinf, -1.9515295891E-4);
1006
1007 const_f32_as_f32x8!(P0cosf, 4.166664568298827E-2);
1008 const_f32_as_f32x8!(P1cosf, -1.388731625493765E-3);
1009 const_f32_as_f32x8!(P2cosf, 2.443315711809948E-5);
1010
1011 const_f32_as_f32x8!(TWO_OVER_PI, 2.0 / core::f32::consts::PI);
1012
1013 let xa = self.abs();
1014
1015 let y = (xa * TWO_OVER_PI).round();
1017 let q: i32x8 = y.round_int();
1018
1019 let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
1020
1021 let x2 = x * x;
1022 let mut s = polynomial_2!(x2, P0sinf, P1sinf, P2sinf) * (x * x2) + x;
1023 let mut c = polynomial_2!(x2, P0cosf, P1cosf, P2cosf) * (x2 * x2)
1024 + f32x8::from(0.5).mul_neg_add(x2, f32x8::from(1.0));
1025
1026 let swap = !(q & i32x8::from(1)).simd_eq(i32x8::from(0));
1027
1028 let mut overflow: f32x8 = cast(q.simd_gt(i32x8::from(0x2000000)));
1029 overflow &= xa.is_finite();
1030 s = overflow.blend(f32x8::from(0.0), s);
1031 c = overflow.blend(f32x8::from(1.0), c);
1032
1033 let mut sin1 = cast::<_, f32x8>(swap).blend(c, s);
1035 let sign_sin: i32x8 = (q << 30) ^ cast::<_, i32x8>(self);
1036 sin1 = sin1.flip_signs(cast(sign_sin));
1037
1038 let mut cos1 = cast::<_, f32x8>(swap).blend(s, c);
1040 let sign_cos: i32x8 = ((q + i32x8::from(1)) & i32x8::from(2)) << 30;
1041 cos1 ^= cast::<_, f32x8>(sign_cos);
1042
1043 (sin1, cos1)
1044 }
1045 #[inline]
1046 #[must_use]
1047 pub fn sin(self) -> Self {
1048 let (s, _) = self.sin_cos();
1049 s
1050 }
1051 #[inline]
1052 #[must_use]
1053 pub fn cos(self) -> Self {
1054 let (_, c) = self.sin_cos();
1055 c
1056 }
1057 #[inline]
1058 #[must_use]
1059 pub fn tan(self) -> Self {
1060 let (s, c) = self.sin_cos();
1061 s / c
1062 }
1063 #[inline]
1064 #[must_use]
1065 pub fn to_degrees(self) -> Self {
1066 const_f32_as_f32x8!(RAD_TO_DEG_RATIO, 180.0_f32 / core::f32::consts::PI);
1067 self * RAD_TO_DEG_RATIO
1068 }
1069 #[inline]
1070 #[must_use]
1071 pub fn to_radians(self) -> Self {
1072 const_f32_as_f32x8!(DEG_TO_RAD_RATIO, core::f32::consts::PI / 180.0_f32);
1073 self * DEG_TO_RAD_RATIO
1074 }
1075 #[inline]
1076 #[must_use]
1077 pub fn recip(self) -> Self {
1078 pick! {
1079 if #[cfg(target_feature="avx")] {
1080 Self { avx: reciprocal_m256(self.avx) }
1081 } else {
1082 Self {
1083 a : self.a.recip(),
1084 b : self.b.recip(),
1085 }
1086 }
1087 }
1088 }
1089 #[inline]
1090 #[must_use]
1091 pub fn recip_sqrt(self) -> Self {
1092 pick! {
1093 if #[cfg(target_feature="avx")] {
1094 Self { avx: reciprocal_sqrt_m256(self.avx) }
1095 } else {
1096 Self {
1097 a : self.a.recip_sqrt(),
1098 b : self.b.recip_sqrt(),
1099 }
1100 }
1101 }
1102 }
1103 #[inline]
1104 #[must_use]
1105 pub fn sqrt(self) -> Self {
1106 pick! {
1107 if #[cfg(target_feature="avx")] {
1108 Self { avx: sqrt_m256(self.avx) }
1109 } else {
1110 Self {
1111 a : self.a.sqrt(),
1112 b : self.b.sqrt(),
1113 }
1114 }
1115 }
1116 }
1117 #[inline]
1118 #[must_use]
1119 pub fn to_bitmask(self) -> u32 {
1120 pick! {
1121 if #[cfg(target_feature="avx")] {
1122 move_mask_m256(self.avx) as u32
1123 } else {
1124 (self.b.to_bitmask() << 4) | self.a.to_bitmask()
1125 }
1126 }
1127 }
1128 #[inline]
1129 #[must_use]
1130 pub fn any(self) -> bool {
1131 pick! {
1132 if #[cfg(target_feature="avx")] {
1133 move_mask_m256(self.avx) != 0
1134 } else {
1135 self.a.any() || self.b.any()
1136 }
1137 }
1138 }
1139 #[inline]
1140 #[must_use]
1141 pub fn all(self) -> bool {
1142 pick! {
1143 if #[cfg(target_feature="avx")] {
1144 move_mask_m256(self.avx) == 0b11111111
1145 } else {
1146 self.a.all() && self.b.all()
1147 }
1148 }
1149 }
1150 #[inline]
1151 #[must_use]
1152 pub fn none(self) -> bool {
1153 !self.any()
1154 }
1155
1156 #[inline]
1157 fn vm_pow2n(self) -> Self {
1158 const_f32_as_f32x8!(pow2_23, 8388608.0);
1159 const_f32_as_f32x8!(bias, 127.0);
1160 let a = self + (bias + pow2_23);
1161 let c = cast::<_, i32x8>(a) << 23;
1162 cast::<_, f32x8>(c)
1163 }
1164
1165 #[inline]
1167 #[must_use]
1168 pub fn exp(self) -> Self {
1169 const_f32_as_f32x8!(P0, 1.0 / 2.0);
1170 const_f32_as_f32x8!(P1, 1.0 / 6.0);
1171 const_f32_as_f32x8!(P2, 1. / 24.);
1172 const_f32_as_f32x8!(P3, 1. / 120.);
1173 const_f32_as_f32x8!(P4, 1. / 720.);
1174 const_f32_as_f32x8!(P5, 1. / 5040.);
1175 const_f32_as_f32x8!(LN2D_HI, 0.693359375);
1176 const_f32_as_f32x8!(LN2D_LO, -2.12194440e-4);
1177 let max_x = f32x8::from(87.3);
1178 let r = (self * Self::LOG2_E).round();
1179 let x = r.mul_neg_add(LN2D_HI, self);
1180 let x = r.mul_neg_add(LN2D_LO, x);
1181 let z = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1182 let x2 = x * x;
1183 let z = z.mul_add(x2, x);
1184 let n2 = Self::vm_pow2n(r);
1185 let z = (z + Self::ONE) * n2;
1186 let in_range = self.abs().simd_lt(max_x);
1188 let in_range = in_range & self.is_finite();
1189 in_range.blend(z, Self::ZERO)
1190 }
1191
1192 #[inline]
1193 fn exponent(self) -> f32x8 {
1194 const_f32_as_f32x8!(pow2_23, 8388608.0);
1195 const_f32_as_f32x8!(bias, 127.0);
1196 let a = cast::<_, u32x8>(self);
1197 let b = a >> 23;
1198 let c = b | cast::<_, u32x8>(pow2_23);
1199 let d = cast::<_, f32x8>(c);
1200 let e = d - (pow2_23 + bias);
1201 e
1202 }
1203
1204 #[inline]
1205 fn fraction_2(self) -> Self {
1206 let t1 = cast::<_, u32x8>(self);
1207 let t2 = cast::<_, u32x8>(
1208 (t1 & u32x8::from(0x007FFFFF)) | u32x8::from(0x3F000000),
1209 );
1210 cast::<_, f32x8>(t2)
1211 }
1212 #[inline]
1213 fn is_zero_or_subnormal(self) -> Self {
1214 let t = cast::<_, i32x8>(self);
1215 let t = t & i32x8::splat(0x7F800000);
1216 i32x8::round_float(t.simd_eq(i32x8::splat(0)))
1217 }
1218 #[inline]
1219 fn infinity() -> Self {
1220 cast::<_, f32x8>(i32x8::splat(0x7F800000))
1221 }
1222 #[inline]
1223 fn nan_log() -> Self {
1224 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1225 }
1226 #[inline]
1227 fn nan_pow() -> Self {
1228 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1229 }
1230 #[inline]
1231 pub fn sign_bit(self) -> Self {
1232 let t1 = cast::<_, i32x8>(self);
1233 let t2 = t1 >> 31;
1234 !cast::<_, f32x8>(t2).simd_eq(f32x8::ZERO)
1235 }
1236
1237 #[inline]
1239 #[must_use]
1240 pub fn reduce_add(self) -> f32 {
1241 pick! {
1242 if #[cfg(target_feature="avx")]{
1244 let hi_quad = extract_m128_from_m256::<1>(self.avx);
1245 let lo_quad = cast_to_m128_from_m256(self.avx);
1246 let sum_quad = add_m128(lo_quad,hi_quad);
1247 let lo_dual = sum_quad;
1248 let hi_dual = move_high_low_m128(sum_quad,sum_quad);
1249 let sum_dual = add_m128(lo_dual,hi_dual);
1250 let lo = sum_dual;
1251 let hi = shuffle_abi_f32_all_m128::<0b_01>(sum_dual, sum_dual);
1252 let sum = add_m128_s(lo, hi);
1253 get_f32_from_m128_s(sum)
1254 } else {
1255 self.a.reduce_add() + self.b.reduce_add()
1256 }
1257 }
1258 }
1259
1260 #[inline]
1262 #[must_use]
1263 pub fn ln(self) -> Self {
1264 const_f32_as_f32x8!(HALF, 0.5);
1265 const_f32_as_f32x8!(P0, 3.3333331174E-1);
1266 const_f32_as_f32x8!(P1, -2.4999993993E-1);
1267 const_f32_as_f32x8!(P2, 2.0000714765E-1);
1268 const_f32_as_f32x8!(P3, -1.6668057665E-1);
1269 const_f32_as_f32x8!(P4, 1.4249322787E-1);
1270 const_f32_as_f32x8!(P5, -1.2420140846E-1);
1271 const_f32_as_f32x8!(P6, 1.1676998740E-1);
1272 const_f32_as_f32x8!(P7, -1.1514610310E-1);
1273 const_f32_as_f32x8!(P8, 7.0376836292E-2);
1274 const_f32_as_f32x8!(LN2F_HI, 0.693359375);
1275 const_f32_as_f32x8!(LN2F_LO, -2.12194440e-4);
1276 const_f32_as_f32x8!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1277
1278 let x1 = self;
1279 let x = Self::fraction_2(x1);
1280 let e = Self::exponent(x1);
1281 let mask = x.simd_gt(Self::SQRT_2 * HALF);
1282 let x = (!mask).blend(x + x, x);
1283 let fe = mask.blend(e + Self::ONE, e);
1284 let x = x - Self::ONE;
1285 let res = polynomial_8!(x, P0, P1, P2, P3, P4, P5, P6, P7, P8);
1286 let x2 = x * x;
1287 let res = x2 * x * res;
1288 let res = fe.mul_add(LN2F_LO, res);
1289 let res = res + x2.mul_neg_add(HALF, x);
1290 let res = fe.mul_add(LN2F_HI, res);
1291 let overflow = !self.is_finite();
1292 let underflow = x1.simd_lt(VM_SMALLEST_NORMAL);
1293 let mask = overflow | underflow;
1294 if !mask.any() {
1295 res
1296 } else {
1297 let is_zero = self.is_zero_or_subnormal();
1298 let res = underflow.blend(Self::nan_log(), res);
1299 let res = is_zero.blend(Self::infinity(), res);
1300 let res = overflow.blend(self, res);
1301 res
1302 }
1303 }
1304
1305 #[inline]
1306 #[must_use]
1307 pub fn log2(self) -> Self {
1308 Self::ln(self) * Self::LOG2_E
1309 }
1310 #[inline]
1311 #[must_use]
1312 pub fn log10(self) -> Self {
1313 Self::ln(self) * Self::LOG10_E
1314 }
1315
1316 #[inline]
1317 #[must_use]
1318 pub fn pow_f32x8(self, y: Self) -> Self {
1319 const_f32_as_f32x8!(ln2f_hi, 0.693359375);
1320 const_f32_as_f32x8!(ln2f_lo, -2.12194440e-4);
1321 const_f32_as_f32x8!(P0logf, 3.3333331174E-1);
1322 const_f32_as_f32x8!(P1logf, -2.4999993993E-1);
1323 const_f32_as_f32x8!(P2logf, 2.0000714765E-1);
1324 const_f32_as_f32x8!(P3logf, -1.6668057665E-1);
1325 const_f32_as_f32x8!(P4logf, 1.4249322787E-1);
1326 const_f32_as_f32x8!(P5logf, -1.2420140846E-1);
1327 const_f32_as_f32x8!(P6logf, 1.1676998740E-1);
1328 const_f32_as_f32x8!(P7logf, -1.1514610310E-1);
1329 const_f32_as_f32x8!(P8logf, 7.0376836292E-2);
1330
1331 const_f32_as_f32x8!(p2expf, 1.0 / 2.0); const_f32_as_f32x8!(p3expf, 1.0 / 6.0);
1333 const_f32_as_f32x8!(p4expf, 1.0 / 24.0);
1334 const_f32_as_f32x8!(p5expf, 1.0 / 120.0);
1335 const_f32_as_f32x8!(p6expf, 1.0 / 720.0);
1336 const_f32_as_f32x8!(p7expf, 1.0 / 5040.0);
1337
1338 let x1 = self.abs();
1339 let x = x1.fraction_2();
1340 let mask = x.simd_gt(f32x8::SQRT_2 * f32x8::HALF);
1341 let x = (!mask).blend(x + x, x);
1342
1343 let x = x - f32x8::ONE;
1344 let x2 = x * x;
1345 let lg1 = polynomial_8!(
1346 x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf
1347 );
1348 let lg1 = lg1 * x2 * x;
1349
1350 let ef = x1.exponent();
1351 let ef = mask.blend(ef + f32x8::ONE, ef);
1352 let e1 = (ef * y).round();
1353 let yr = ef.mul_sub(y, e1);
1354
1355 let lg = f32x8::HALF.mul_neg_add(x2, x) + lg1;
1356 let x2_err = (f32x8::HALF * x).mul_sub(x, f32x8::HALF * x2);
1357 let lg_err = f32x8::HALF.mul_add(x2, lg - x) - lg1;
1358
1359 let e2 = (lg * y * f32x8::LOG2_E).round();
1360 let v = lg.mul_sub(y, e2 * ln2f_hi);
1361 let v = e2.mul_neg_add(ln2f_lo, v);
1362 let v = v - (lg_err + x2_err).mul_sub(y, yr * f32x8::LN_2);
1363
1364 let x = v;
1365 let e3 = (x * f32x8::LOG2_E).round();
1366 let x = e3.mul_neg_add(f32x8::LN_2, x);
1367 let x2 = x * x;
1368 let z = x2.mul_add(
1369 polynomial_5!(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf),
1370 x + f32x8::ONE,
1371 );
1372
1373 let ee = e1 + e2 + e3;
1374 let ei = cast::<_, i32x8>(ee.round_int());
1375 let ej = cast::<_, i32x8>(ei + (cast::<_, i32x8>(z) >> 23));
1376
1377 let overflow = cast::<_, f32x8>(ej.simd_gt(i32x8::splat(0x0FF)))
1378 | (ee.simd_gt(f32x8::splat(300.0)));
1379 let underflow = cast::<_, f32x8>(ej.simd_lt(i32x8::splat(0x000)))
1380 | (ee.simd_lt(f32x8::splat(-300.0)));
1381
1382 let z = cast::<_, f32x8>(cast::<_, i32x8>(z) + (ei << 23));
1384 let z = underflow.blend(f32x8::ZERO, z);
1386 let z = overflow.blend(Self::infinity(), z);
1387
1388 let x_zero = self.is_zero_or_subnormal();
1390 let z = x_zero.blend(
1391 y.simd_lt(f32x8::ZERO).blend(
1392 Self::infinity(),
1393 y.simd_eq(f32x8::ZERO).blend(f32x8::ONE, f32x8::ZERO),
1394 ),
1395 z,
1396 );
1397
1398 let x_sign = self.sign_bit();
1399 let z = if x_sign.any() {
1400 let yi = y.simd_eq(y.round());
1402
1403 let y_odd = cast::<_, i32x8>(y.round_int() << 31).round_float();
1405
1406 let z1 =
1407 yi.blend(z | y_odd, self.simd_eq(Self::ZERO).blend(z, Self::nan_pow()));
1408
1409 x_sign.blend(z1, z)
1410 } else {
1411 z
1412 };
1413
1414 let x_finite = self.is_finite();
1415 let y_finite = y.is_finite();
1416 let e_finite = ee.is_finite();
1417 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1418 return z;
1419 }
1420
1421 (self.is_nan() | y.is_nan()).blend(self + y, z)
1422 }
1423 #[inline]
1424 pub fn powf(self, y: f32) -> Self {
1425 Self::pow_f32x8(self, f32x8::splat(y))
1426 }
1427
1428 #[must_use]
1430 #[inline]
1431 pub fn transpose(data: [f32x8; 8]) -> [f32x8; 8] {
1432 pick! {
1433 if #[cfg(target_feature="avx")] {
1434 let a0 = unpack_lo_m256(data[0].avx, data[1].avx);
1435 let a1 = unpack_hi_m256(data[0].avx, data[1].avx);
1436 let a2 = unpack_lo_m256(data[2].avx, data[3].avx);
1437 let a3 = unpack_hi_m256(data[2].avx, data[3].avx);
1438 let a4 = unpack_lo_m256(data[4].avx, data[5].avx);
1439 let a5 = unpack_hi_m256(data[4].avx, data[5].avx);
1440 let a6 = unpack_lo_m256(data[6].avx, data[7].avx);
1441 let a7 = unpack_hi_m256(data[6].avx, data[7].avx);
1442
1443 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
1444 (z << 6) | (y << 4) | (x << 2) | w
1445 }
1446
1447 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
1448 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
1449
1450 let b0 = shuffle_m256::<SHUFF_LO>(a0,a2);
1453 let b1 = shuffle_m256::<SHUFF_HI>(a0,a2);
1454 let b2 = shuffle_m256::<SHUFF_LO>(a1,a3);
1455 let b3 = shuffle_m256::<SHUFF_HI>(a1,a3);
1456 let b4 = shuffle_m256::<SHUFF_LO>(a4,a6);
1457 let b5 = shuffle_m256::<SHUFF_HI>(a4,a6);
1458 let b6 = shuffle_m256::<SHUFF_LO>(a5,a7);
1459 let b7 = shuffle_m256::<SHUFF_HI>(a5,a7);
1460
1461 [
1462 f32x8 { avx: permute2z_m256::<0x20>(b0, b4) },
1463 f32x8 { avx: permute2z_m256::<0x20>(b1, b5) },
1464 f32x8 { avx: permute2z_m256::<0x20>(b2, b6) },
1465 f32x8 { avx: permute2z_m256::<0x20>(b3, b7) },
1466 f32x8 { avx: permute2z_m256::<0x31>(b0, b4) },
1467 f32x8 { avx: permute2z_m256::<0x31>(b1, b5) },
1468 f32x8 { avx: permute2z_m256::<0x31>(b2, b6) },
1469 f32x8 { avx: permute2z_m256::<0x31>(b3, b7) }
1470 ]
1471 } else {
1472 #[inline(always)]
1475 fn transpose_column(data: &[f32x8; 8], index: usize) -> f32x8 {
1476 f32x8::new([
1477 data[0].as_array()[index],
1478 data[1].as_array()[index],
1479 data[2].as_array()[index],
1480 data[3].as_array()[index],
1481 data[4].as_array()[index],
1482 data[5].as_array()[index],
1483 data[6].as_array()[index],
1484 data[7].as_array()[index],
1485 ])
1486 }
1487
1488 [
1489 transpose_column(&data, 0),
1490 transpose_column(&data, 1),
1491 transpose_column(&data, 2),
1492 transpose_column(&data, 3),
1493 transpose_column(&data, 4),
1494 transpose_column(&data, 5),
1495 transpose_column(&data, 6),
1496 transpose_column(&data, 7),
1497 ]
1498 }
1499 }
1500 }
1501
1502 #[inline]
1503 pub fn to_array(self) -> [f32; 8] {
1504 cast(self)
1505 }
1506
1507 #[inline]
1508 pub fn as_array(&self) -> &[f32; 8] {
1509 cast_ref(self)
1510 }
1511
1512 #[inline]
1513 pub fn as_mut_array(&mut self) -> &mut [f32; 8] {
1514 cast_mut(self)
1515 }
1516
1517 #[inline]
1518 pub fn from_i32x8(v: i32x8) -> Self {
1519 pick! {
1520 if #[cfg(target_feature="avx2")] {
1521 Self { avx: convert_to_m256_from_i32_m256i(v.avx2) }
1522 } else {
1523 Self::new([
1524 v.as_array()[0] as f32,
1525 v.as_array()[1] as f32,
1526 v.as_array()[2] as f32,
1527 v.as_array()[3] as f32,
1528 v.as_array()[4] as f32,
1529 v.as_array()[5] as f32,
1530 v.as_array()[6] as f32,
1531 v.as_array()[7] as f32,
1532 ])
1533 }
1534 }
1535 }
1536}
1537
1538impl Not for f32x8 {
1539 type Output = Self;
1540 #[inline]
1541 fn not(self) -> Self {
1542 pick! {
1543 if #[cfg(target_feature="avx")] {
1544 Self { avx: self.avx.not() }
1545 } else {
1546 Self {
1547 a : self.a.not(),
1548 b : self.b.not(),
1549 }
1550 }
1551 }
1552 }
1553}