1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(32))]
7 pub struct i32x8 { pub(crate) avx2: m256i }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq, Eq)]
10 #[repr(C, align(32))]
11 pub struct i32x8 { pub(crate) a : i32x4, pub(crate) b : i32x4}
12 }
13}
14
15int_uint_consts!(i32, 8, i32x8, 256);
16
17unsafe impl Zeroable for i32x8 {}
18unsafe impl Pod for i32x8 {}
19
20impl AlignTo for i32x8 {
21 type Elem = i32;
22}
23
24impl Add for i32x8 {
25 type Output = Self;
26 #[inline]
27 fn add(self, rhs: Self) -> Self::Output {
28 pick! {
29 if #[cfg(target_feature="avx2")] {
30 Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
31 } else {
32 Self {
33 a : self.a.add(rhs.a),
34 b : self.b.add(rhs.b),
35 }
36 }
37 }
38 }
39}
40
41impl Sub for i32x8 {
42 type Output = Self;
43 #[inline]
44 fn sub(self, rhs: Self) -> Self::Output {
45 pick! {
46 if #[cfg(target_feature="avx2")] {
47 Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
48 } else {
49 Self {
50 a : self.a.sub(rhs.a),
51 b : self.b.sub(rhs.b),
52 }
53 }
54 }
55 }
56}
57
58impl Mul for i32x8 {
59 type Output = Self;
60 #[inline]
61 fn mul(self, rhs: Self) -> Self::Output {
62 pick! {
63 if #[cfg(target_feature="avx2")] {
64 Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
65 } else {
66 Self {
67 a : self.a.mul(rhs.a),
68 b : self.b.mul(rhs.b),
69 }
70 }
71 }
72 }
73}
74
75impl Add<i32> for i32x8 {
76 type Output = Self;
77 #[inline]
78 fn add(self, rhs: i32) -> Self::Output {
79 self.add(Self::splat(rhs))
80 }
81}
82
83impl Sub<i32> for i32x8 {
84 type Output = Self;
85 #[inline]
86 fn sub(self, rhs: i32) -> Self::Output {
87 self.sub(Self::splat(rhs))
88 }
89}
90
91impl Mul<i32> for i32x8 {
92 type Output = Self;
93 #[inline]
94 fn mul(self, rhs: i32) -> Self::Output {
95 self.mul(Self::splat(rhs))
96 }
97}
98
99impl Add<i32x8> for i32 {
100 type Output = i32x8;
101 #[inline]
102 fn add(self, rhs: i32x8) -> Self::Output {
103 i32x8::splat(self) + rhs
104 }
105}
106
107impl Sub<i32x8> for i32 {
108 type Output = i32x8;
109 #[inline]
110 fn sub(self, rhs: i32x8) -> Self::Output {
111 i32x8::splat(self) - rhs
112 }
113}
114
115impl Mul<i32x8> for i32 {
116 type Output = i32x8;
117 #[inline]
118 fn mul(self, rhs: i32x8) -> Self::Output {
119 i32x8::splat(self) * rhs
120 }
121}
122
123impl BitAnd for i32x8 {
124 type Output = Self;
125 #[inline]
126 fn bitand(self, rhs: Self) -> Self::Output {
127 pick! {
128 if #[cfg(target_feature="avx2")] {
129 Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
130 } else {
131 Self {
132 a : self.a.bitand(rhs.a),
133 b : self.b.bitand(rhs.b),
134 }
135 }
136 }
137 }
138}
139
140impl BitOr for i32x8 {
141 type Output = Self;
142 #[inline]
143 fn bitor(self, rhs: Self) -> Self::Output {
144 pick! {
145 if #[cfg(target_feature="avx2")] {
146 Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
147 } else {
148 Self {
149 a : self.a.bitor(rhs.a),
150 b : self.b.bitor(rhs.b),
151 }
152 } }
153 }
154}
155
156impl BitXor for i32x8 {
157 type Output = Self;
158 #[inline]
159 fn bitxor(self, rhs: Self) -> Self::Output {
160 pick! {
161 if #[cfg(target_feature="avx2")] {
162 Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
163 } else {
164 Self {
165 a : self.a.bitxor(rhs.a),
166 b : self.b.bitxor(rhs.b),
167 }
168 }
169 }
170 }
171}
172
173macro_rules! impl_shl_t_for_i32x8 {
174 ($($shift_type:ty),+ $(,)?) => {
175 $(impl Shl<$shift_type> for i32x8 {
176 type Output = Self;
177 #[inline]
179 fn shl(self, rhs: $shift_type) -> Self::Output {
180 pick! {
181 if #[cfg(target_feature="avx2")] {
182 let shift = cast([rhs as u64, 0]);
183 Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
184 } else {
185 Self {
186 a : self.a.shl(rhs),
187 b : self.b.shl(rhs),
188 }
189 }
190 }
191 }
192 })+
193 };
194}
195impl_shl_t_for_i32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
196
197macro_rules! impl_shr_t_for_i32x8 {
198 ($($shift_type:ty),+ $(,)?) => {
199 $(impl Shr<$shift_type> for i32x8 {
200 type Output = Self;
201 #[inline]
203 fn shr(self, rhs: $shift_type) -> Self::Output {
204 pick! {
205 if #[cfg(target_feature="avx2")] {
206 let shift = cast([rhs as u64, 0]);
207 Self { avx2: shr_all_i32_m256i(self.avx2, shift) }
208 } else {
209 Self {
210 a : self.a.shr(rhs),
211 b : self.b.shr(rhs),
212 }
213 }
214 }
215 }
216 })+
217 };
218}
219
220impl_shr_t_for_i32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
221
222impl Shr<i32x8> for i32x8 {
228 type Output = Self;
229
230 #[inline]
231 fn shr(self, rhs: i32x8) -> Self::Output {
232 pick! {
233 if #[cfg(target_feature="avx2")] {
234 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
236 Self { avx2: shr_each_i32_m256i(self.avx2, shift_by ) }
237 } else {
238 Self {
239 a : self.a.shr(rhs.a),
240 b : self.b.shr(rhs.b),
241 }
242 }
243 }
244 }
245}
246
247impl Shl<i32x8> for i32x8 {
253 type Output = Self;
254
255 #[inline]
256 fn shl(self, rhs: i32x8) -> Self::Output {
257 pick! {
258 if #[cfg(target_feature="avx2")] {
259 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
261 Self { avx2: shl_each_u32_m256i(self.avx2, shift_by) }
263 } else {
264 Self {
265 a : self.a.shl(rhs.a),
266 b : self.b.shl(rhs.b),
267 }
268 }
269 }
270 }
271}
272
273impl CmpEq for i32x8 {
274 type Output = Self;
275 #[inline]
276 fn simd_eq(self, rhs: Self) -> Self::Output {
277 pick! {
278 if #[cfg(target_feature="avx2")] {
279 Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2) }
280 } else {
281 Self {
282 a : self.a.simd_eq(rhs.a),
283 b : self.b.simd_eq(rhs.b),
284 }
285 }
286 }
287 }
288}
289
290impl CmpGt for i32x8 {
291 type Output = Self;
292 #[inline]
293 fn simd_gt(self, rhs: Self) -> Self::Output {
294 pick! {
295 if #[cfg(target_feature="avx2")] {
296 Self { avx2: cmp_gt_mask_i32_m256i(self.avx2, rhs.avx2) }
297 } else {
298 Self {
299 a : self.a.simd_gt(rhs.a),
300 b : self.b.simd_gt(rhs.b),
301 }
302 }
303 }
304 }
305}
306
307impl CmpLt for i32x8 {
308 type Output = Self;
309 #[inline]
310 fn simd_lt(self, rhs: Self) -> Self::Output {
311 pick! {
312 if #[cfg(target_feature="avx2")] {
313 Self { avx2: cmp_gt_mask_i32_m256i(rhs.avx2, self.avx2) }
314 } else {
315 Self {
316 a : self.a.simd_lt(rhs.a),
317 b : self.b.simd_lt(rhs.b),
318 }
319 }
320 }
321 }
322}
323
324impl From<i16x8> for i32x8 {
325 #[inline]
326 fn from(value: i16x8) -> Self {
327 i32x8::from_i16x8(value)
328 }
329}
330
331impl i32x8 {
332 #[inline]
333 #[must_use]
334 pub const fn new(array: [i32; 8]) -> Self {
335 unsafe { core::mem::transmute(array) }
336 }
337
338 #[inline]
340 #[must_use]
341 pub fn from_i16x8(v: i16x8) -> Self {
342 pick! {
343 if #[cfg(target_feature="avx2")] {
344 i32x8 { avx2:convert_to_i32_m256i_from_i16_m128i(v.sse) }
345 } else if #[cfg(target_feature="sse2")] {
346 i32x8 {
347 a: i32x4 { sse: shr_imm_i32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
348 b: i32x4 { sse: shr_imm_i32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
349 }
350 } else {
351 i32x8::new([
352 i32::from(v.as_array()[0]),
353 i32::from(v.as_array()[1]),
354 i32::from(v.as_array()[2]),
355 i32::from(v.as_array()[3]),
356 i32::from(v.as_array()[4]),
357 i32::from(v.as_array()[5]),
358 i32::from(v.as_array()[6]),
359 i32::from(v.as_array()[7]),
360 ])
361 }
362 }
363 }
364
365 #[inline]
367 #[must_use]
368 pub fn from_u16x8(v: u16x8) -> Self {
369 pick! {
370 if #[cfg(target_feature="avx2")] {
371 i32x8 { avx2:convert_to_i32_m256i_from_u16_m128i(v.sse) }
372 } else if #[cfg(target_feature="sse2")] {
373 i32x8 {
374 a: i32x4 { sse: shr_imm_u32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
375 b: i32x4 { sse: shr_imm_u32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
376 }
377 } else {
378 i32x8::new([
379 i32::from(v.as_array()[0]),
380 i32::from(v.as_array()[1]),
381 i32::from(v.as_array()[2]),
382 i32::from(v.as_array()[3]),
383 i32::from(v.as_array()[4]),
384 i32::from(v.as_array()[5]),
385 i32::from(v.as_array()[6]),
386 i32::from(v.as_array()[7]),
387 ])
388 }
389 }
390 }
391
392 #[inline]
393 #[must_use]
394 pub fn blend(self, t: Self, f: Self) -> Self {
395 pick! {
396 if #[cfg(target_feature="avx2")] {
397 Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
398 } else {
399 Self {
400 a : self.a.blend(t.a, f.a),
401 b : self.b.blend(t.b, f.b)
402 }
403 }
404 }
405 }
406
407 #[inline]
409 #[must_use]
410 pub fn reduce_add(self) -> i32 {
411 let arr: [i32x4; 2] = cast(self);
412 (arr[0] + arr[1]).reduce_add()
413 }
414
415 #[inline]
417 #[must_use]
418 pub fn reduce_max(self) -> i32 {
419 let arr: [i32x4; 2] = cast(self);
420 arr[0].max(arr[1]).reduce_max()
421 }
422
423 #[inline]
425 #[must_use]
426 pub fn reduce_min(self) -> i32 {
427 let arr: [i32x4; 2] = cast(self);
428 arr[0].min(arr[1]).reduce_min()
429 }
430
431 #[inline]
432 #[must_use]
433 pub fn abs(self) -> Self {
434 pick! {
435 if #[cfg(target_feature="avx2")] {
436 Self { avx2: abs_i32_m256i(self.avx2) }
437 } else {
438 Self {
439 a : self.a.abs(),
440 b : self.b.abs(),
441 }
442 }
443 }
444 }
445
446 #[inline]
447 #[must_use]
448 pub fn unsigned_abs(self) -> u32x8 {
449 pick! {
450 if #[cfg(target_feature="avx2")] {
451 u32x8 { avx2: abs_i32_m256i(self.avx2) }
452 } else {
453 u32x8 {
454 a : self.a.unsigned_abs(),
455 b : self.b.unsigned_abs(),
456 }
457 }
458 }
459 }
460
461 #[inline]
462 #[must_use]
463 pub fn max(self, rhs: Self) -> Self {
464 pick! {
465 if #[cfg(target_feature="avx2")] {
466 Self { avx2: max_i32_m256i(self.avx2, rhs.avx2) }
467 } else {
468 Self {
469 a : self.a.max(rhs.a),
470 b : self.b.max(rhs.b),
471 }
472 }
473 }
474 }
475 #[inline]
476 #[must_use]
477 pub fn min(self, rhs: Self) -> Self {
478 pick! {
479 if #[cfg(target_feature="avx2")] {
480 Self { avx2: min_i32_m256i(self.avx2, rhs.avx2) }
481 } else {
482 Self {
483 a : self.a.min(rhs.a),
484 b : self.b.min(rhs.b),
485 }
486 }
487 }
488 }
489 #[inline]
490 #[must_use]
491 pub fn round_float(self) -> f32x8 {
492 pick! {
493 if #[cfg(target_feature="avx2")] {
494 cast(convert_to_m256_from_i32_m256i(self.avx2))
495 } else {
496 cast([
497 self.a.round_float(),
498 self.b.round_float(),
499 ])
500 }
501 }
502 }
503
504 #[inline]
505 #[must_use]
506 pub fn to_bitmask(self) -> u32 {
507 pick! {
508 if #[cfg(target_feature="avx2")] {
509 move_mask_m256(cast(self.avx2)) as u32
511 } else {
512 self.a.to_bitmask() | (self.b.to_bitmask() << 4)
513 }
514 }
515 }
516
517 #[inline]
518 #[must_use]
519 pub fn any(self) -> bool {
520 pick! {
521 if #[cfg(target_feature="avx2")] {
522 move_mask_m256(cast(self.avx2)) != 0
523 } else {
524 (self.a | self.b).any()
525 }
526 }
527 }
528 #[inline]
529 #[must_use]
530 pub fn all(self) -> bool {
531 pick! {
532 if #[cfg(target_feature="avx2")] {
533 move_mask_m256(cast(self.avx2)) == 0b11111111
534 } else {
535 (self.a & self.b).all()
536 }
537 }
538 }
539 #[inline]
540 #[must_use]
541 pub fn none(self) -> bool {
542 !self.any()
543 }
544
545 #[must_use]
547 #[inline]
548 pub fn transpose(data: [i32x8; 8]) -> [i32x8; 8] {
549 pick! {
550 if #[cfg(target_feature="avx2")] {
551 let a0 = unpack_low_i32_m256i(data[0].avx2, data[1].avx2);
552 let a1 = unpack_high_i32_m256i(data[0].avx2, data[1].avx2);
553 let a2 = unpack_low_i32_m256i(data[2].avx2, data[3].avx2);
554 let a3 = unpack_high_i32_m256i(data[2].avx2, data[3].avx2);
555 let a4 = unpack_low_i32_m256i(data[4].avx2, data[5].avx2);
556 let a5 = unpack_high_i32_m256i(data[4].avx2, data[5].avx2);
557 let a6 = unpack_low_i32_m256i(data[6].avx2, data[7].avx2);
558 let a7 = unpack_high_i32_m256i(data[6].avx2, data[7].avx2);
559
560 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
561 (z << 6) | (y << 4) | (x << 2) | w
562 }
563
564 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
565 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
566
567 let b0 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a0),cast(a2)));
570 let b1 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a0),cast(a2)));
571 let b2 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a1),cast(a3)));
572 let b3 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a1),cast(a3)));
573 let b4 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a4),cast(a6)));
574 let b5 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a4),cast(a6)));
575 let b6 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a5),cast(a7)));
576 let b7 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a5),cast(a7)));
577
578 [
579 i32x8 { avx2: permute2z_m256i::<0x20>(b0, b4) },
580 i32x8 { avx2: permute2z_m256i::<0x20>(b1, b5) },
581 i32x8 { avx2: permute2z_m256i::<0x20>(b2, b6) },
582 i32x8 { avx2: permute2z_m256i::<0x20>(b3, b7) },
583 i32x8 { avx2: permute2z_m256i::<0x31>(b0, b4) },
584 i32x8 { avx2: permute2z_m256i::<0x31>(b1, b5) },
585 i32x8 { avx2: permute2z_m256i::<0x31>(b2, b6) },
586 i32x8 { avx2: permute2z_m256i::<0x31>(b3, b7) }
587 ]
588 } else {
589 #[inline(always)]
592 fn transpose_column(data: &[i32x8; 8], index: usize) -> i32x8 {
593 i32x8::new([
594 data[0].as_array()[index],
595 data[1].as_array()[index],
596 data[2].as_array()[index],
597 data[3].as_array()[index],
598 data[4].as_array()[index],
599 data[5].as_array()[index],
600 data[6].as_array()[index],
601 data[7].as_array()[index],
602 ])
603 }
604
605 [
606 transpose_column(&data, 0),
607 transpose_column(&data, 1),
608 transpose_column(&data, 2),
609 transpose_column(&data, 3),
610 transpose_column(&data, 4),
611 transpose_column(&data, 5),
612 transpose_column(&data, 6),
613 transpose_column(&data, 7),
614 ]
615 }
616 }
617 }
618
619 #[inline]
620 pub fn to_array(self) -> [i32; 8] {
621 cast(self)
622 }
623
624 #[inline]
625 pub fn as_array(&self) -> &[i32; 8] {
626 cast_ref(self)
627 }
628
629 #[inline]
630 pub fn as_mut_array(&mut self) -> &mut [i32; 8] {
631 cast_mut(self)
632 }
633}
634
635impl Not for i32x8 {
636 type Output = Self;
637 #[inline]
638 fn not(self) -> Self {
639 pick! {
640 if #[cfg(target_feature="avx2")] {
641 Self { avx2: self.avx2.not() }
642 } else {
643 Self {
644 a : self.a.not(),
645 b : self.b.not(),
646 }
647 }
648 }
649 }
650}