1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(32))]
7 pub struct u32x8 { pub(crate) avx2: m256i }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq, Eq)]
10 #[repr(C, align(32))]
11 pub struct u32x8 { pub(crate) a : u32x4, pub(crate) b : u32x4 }
12 }
13}
14
15int_uint_consts!(u32, 8, u32x8, 256);
16
17unsafe impl Zeroable for u32x8 {}
18unsafe impl Pod for u32x8 {}
19
20impl AlignTo for u32x8 {
21 type Elem = u32;
22}
23
24impl Add for u32x8 {
25 type Output = Self;
26 #[inline]
27 fn add(self, rhs: Self) -> Self::Output {
28 pick! {
29 if #[cfg(target_feature="avx2")] {
30 Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
31 } else {
32 Self {
33 a : self.a.add(rhs.a),
34 b : self.b.add(rhs.b),
35 }
36 }
37 }
38 }
39}
40
41impl Sub for u32x8 {
42 type Output = Self;
43 #[inline]
44 fn sub(self, rhs: Self) -> Self::Output {
45 pick! {
46 if #[cfg(target_feature="avx2")] {
47 Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
48 } else {
49 Self {
50 a : self.a.sub(rhs.a),
51 b : self.b.sub(rhs.b),
52 }
53 }
54 }
55 }
56}
57
58impl Add<u32> for u32x8 {
59 type Output = Self;
60 #[inline]
70 fn add(self, rhs: u32) -> Self::Output {
71 self + Self::splat(rhs)
72 }
73}
74
75impl Sub<u32> for u32x8 {
76 type Output = Self;
77 #[inline]
87 fn sub(self, rhs: u32) -> Self::Output {
88 self - Self::splat(rhs)
89 }
90}
91
92impl Mul<u32> for u32x8 {
93 type Output = Self;
94 #[inline]
104 fn mul(self, rhs: u32) -> Self::Output {
105 self * Self::splat(rhs)
106 }
107}
108
109impl Mul for u32x8 {
110 type Output = Self;
111 #[inline]
112 fn mul(self, rhs: Self) -> Self::Output {
113 pick! {
114 if #[cfg(target_feature="avx2")] {
115 Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
116 } else {
117 Self {
118 a : self.a.mul(rhs.a),
119 b : self.b.mul(rhs.b),
120 }
121 }
122 }
123 }
124}
125
126impl BitAnd for u32x8 {
127 type Output = Self;
128 #[inline]
129 fn bitand(self, rhs: Self) -> Self::Output {
130 pick! {
131 if #[cfg(target_feature="avx2")] {
132 Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
133 } else {
134 Self {
135 a : self.a.bitand(rhs.a),
136 b : self.b.bitand(rhs.b),
137 }
138 }
139 }
140 }
141}
142
143impl BitOr for u32x8 {
144 type Output = Self;
145 #[inline]
146 fn bitor(self, rhs: Self) -> Self::Output {
147 pick! {
148 if #[cfg(target_feature="avx2")] {
149 Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
150 } else {
151 Self {
152 a : self.a.bitor(rhs.a),
153 b : self.b.bitor(rhs.b),
154 }
155 }
156 }
157 }
158}
159
160impl BitXor for u32x8 {
161 type Output = Self;
162 #[inline]
163 fn bitxor(self, rhs: Self) -> Self::Output {
164 pick! {
165 if #[cfg(target_feature="avx2")] {
166 Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
167 } else {
168 Self {
169 a : self.a.bitxor(rhs.a),
170 b : self.b.bitxor(rhs.b),
171 }
172 }
173 }
174 }
175}
176
177impl From<u16x8> for u32x8 {
178 #[inline]
180 fn from(v: u16x8) -> Self {
181 pick! {
182 if #[cfg(target_feature="avx2")] {
183 Self { avx2:convert_to_i32_m256i_from_u16_m128i(v.sse) }
184 } else if #[cfg(target_feature="sse2")] {
185 Self {
186 a: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
187 b: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
188 }
189 } else {
190 u32x8::new([
191 u32::from(v.as_array()[0]),
192 u32::from(v.as_array()[1]),
193 u32::from(v.as_array()[2]),
194 u32::from(v.as_array()[3]),
195 u32::from(v.as_array()[4]),
196 u32::from(v.as_array()[5]),
197 u32::from(v.as_array()[6]),
198 u32::from(v.as_array()[7]),
199 ])
200 }
201 }
202 }
203}
204
205macro_rules! impl_shl_t_for_u32x8 {
206 ($($shift_type:ty),+ $(,)?) => {
207 $(impl Shl<$shift_type> for u32x8 {
208 type Output = Self;
209 #[inline]
211 fn shl(self, rhs: $shift_type) -> Self::Output {
212 pick! {
213 if #[cfg(target_feature="avx2")] {
214 let shift = cast([rhs as u64, 0]);
215 Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
216 } else {
217 Self {
218 a : self.a.shl(rhs),
219 b : self.b.shl(rhs),
220 }
221 }
222 }
223 }
224 })+
225 };
226}
227impl_shl_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
228
229macro_rules! impl_shr_t_for_u32x8 {
230 ($($shift_type:ty),+ $(,)?) => {
231 $(impl Shr<$shift_type> for u32x8 {
232 type Output = Self;
233 #[inline]
235 fn shr(self, rhs: $shift_type) -> Self::Output {
236 pick! {
237 if #[cfg(target_feature="avx2")] {
238 let shift = cast([rhs as u64, 0]);
239 Self { avx2: shr_all_u32_m256i(self.avx2, shift) }
240 } else {
241 Self {
242 a : self.a.shr(rhs),
243 b : self.b.shr(rhs),
244 }
245 }
246 }
247 }
248 })+
249 };
250}
251
252impl_shr_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
253
254impl Shr<u32x8> for u32x8 {
260 type Output = Self;
261
262 #[inline]
263 fn shr(self, rhs: u32x8) -> Self::Output {
264 pick! {
265 if #[cfg(target_feature="avx2")] {
266 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
268 Self { avx2: shr_each_u32_m256i(self.avx2, shift_by ) }
269 } else {
270 Self {
271 a : self.a.shr(rhs.a),
272 b : self.b.shr(rhs.b),
273 }
274 }
275 }
276 }
277}
278
279impl Shl<u32x8> for u32x8 {
285 type Output = Self;
286
287 #[inline]
288 fn shl(self, rhs: u32x8) -> Self::Output {
289 pick! {
290 if #[cfg(target_feature="avx2")] {
291 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
293 Self { avx2: shl_each_u32_m256i(self.avx2, shift_by) }
294 } else {
295 Self {
296 a : self.a.shl(rhs.a),
297 b : self.b.shl(rhs.b),
298 }
299 }
300 }
301 }
302}
303
304impl CmpEq for u32x8 {
305 type Output = Self;
306 #[inline]
321 fn simd_eq(self, rhs: Self) -> Self::Output {
322 pick! {
323 if #[cfg(target_feature="avx2")] {
324 Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2 ) }
325 } else {
326 Self {
327 a : self.a.simd_eq(rhs.a),
328 b : self.b.simd_eq(rhs.b),
329 }
330 }
331 }
332 }
333}
334
335impl CmpGt for u32x8 {
336 type Output = Self;
337 #[inline]
354 fn simd_gt(self, rhs: Self) -> Self::Output {
355 pick! {
356 if #[cfg(target_feature="avx2")] {
357 let highbit = u32x8::splat(1 << 31);
359 Self { avx2: cmp_gt_mask_i32_m256i((self ^ highbit).avx2, (rhs ^ highbit).avx2 ) }
360 } else {
361 Self {
362 a : self.a.simd_gt(rhs.a),
363 b : self.b.simd_gt(rhs.b),
364 }
365 }
366 }
367 }
368}
369
370impl CmpLt for u32x8 {
371 type Output = Self;
372 #[inline]
389 fn simd_lt(self, rhs: Self) -> Self::Output {
390 rhs.simd_gt(self)
392 }
393}
394
395impl CmpNe for u32x8 {
396 type Output = Self;
397 #[inline]
412 fn simd_ne(self, rhs: Self) -> Self::Output {
413 !self.simd_eq(rhs)
414 }
415}
416
417impl CmpGe for u32x8 {
418 type Output = Self;
419 #[inline]
438 fn simd_ge(self, rhs: Self) -> Self::Output {
439 self.simd_eq(rhs) | self.simd_gt(rhs)
440 }
441}
442
443impl CmpLe for u32x8 {
444 type Output = Self;
445 #[inline]
464 fn simd_le(self, rhs: Self) -> Self::Output {
465 self.simd_eq(rhs) | self.simd_lt(rhs)
466 }
467}
468
469impl u32x8 {
470 #[inline]
471 #[must_use]
472 pub const fn new(array: [u32; 8]) -> Self {
473 unsafe { core::mem::transmute(array) }
474 }
475
476 #[inline]
480 #[must_use]
481 pub fn mul_keep_high(self, rhs: u32x8) -> u32x8 {
482 pick! {
483 if #[cfg(target_feature="avx2")] {
484 let a : [u32;8]= cast(self);
485 let b : [u32;8]= cast(rhs);
486
487 let r1 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[0], 0, a[1], 0, a[2], 0, a[3], 0]), cast([b[0], 0, b[1], 0, b[2], 0, b[3], 0])));
489 let r2 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[4], 0, a[5], 0, a[6], 0, a[7], 0]), cast([b[4], 0, b[5], 0, b[6], 0, b[7], 0])));
490
491 cast([r1[1], r1[3], r1[5], r1[7], r2[1], r2[3], r2[5], r2[7]])
492 } else {
493 Self {
494 a : self.a.mul_keep_high(rhs.a),
495 b : self.b.mul_keep_high(rhs.b),
496 }
497 }
498 }
499 }
500
501 #[inline]
502 #[must_use]
503 pub fn blend(self, t: Self, f: Self) -> Self {
504 pick! {
505 if #[cfg(target_feature="avx2")] {
506 Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
507 } else {
508 Self {
509 a : self.a.blend(t.a, f.a),
510 b : self.b.blend(t.b, f.b),
511 }
512 }
513 }
514 }
515
516 #[inline]
517 #[must_use]
518 pub fn max(self, rhs: Self) -> Self {
519 pick! {
520 if #[cfg(target_feature="avx2")] {
521 Self { avx2: max_u32_m256i(self.avx2, rhs.avx2 ) }
522 } else {
523 Self {
524 a : self.a.max(rhs.a),
525 b : self.b.max(rhs.b),
526 }
527 }
528 }
529 }
530 #[inline]
531 #[must_use]
532 pub fn min(self, rhs: Self) -> Self {
533 pick! {
534 if #[cfg(target_feature="avx2")] {
535 Self { avx2: min_u32_m256i(self.avx2, rhs.avx2 ) }
536 } else {
537 Self {
538 a : self.a.min(rhs.a),
539 b : self.b.min(rhs.b),
540 }
541 }
542 }
543 }
544
545 #[inline]
546 #[must_use]
547 pub fn to_bitmask(self) -> u32 {
548 i32x8::to_bitmask(cast(self))
549 }
550
551 #[inline]
552 #[must_use]
553 pub fn any(self) -> bool {
554 pick! {
555 if #[cfg(target_feature="avx2")] {
556 ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) != 0
557 } else {
558 (self.a | self.b).any()
559 }
560 }
561 }
562
563 #[inline]
564 #[must_use]
565 pub fn all(self) -> bool {
566 pick! {
567 if #[cfg(target_feature="avx2")] {
568 ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) == 0b10001000100010001000100010001000
569 } else {
570 (self.a & self.b).all()
571 }
572 }
573 }
574
575 #[inline]
576 #[must_use]
577 pub fn none(self) -> bool {
578 !self.any()
579 }
580
581 #[inline]
582 pub fn to_array(self) -> [u32; 8] {
583 cast(self)
584 }
585
586 #[inline]
587 pub fn as_array(&self) -> &[u32; 8] {
588 cast_ref(self)
589 }
590
591 #[inline]
592 pub fn as_mut_array(&mut self) -> &mut [u32; 8] {
593 cast_mut(self)
594 }
595}
596
597impl Not for u32x8 {
598 type Output = Self;
599 #[inline]
600 fn not(self) -> Self {
601 pick! {
602 if #[cfg(target_feature="avx2")] {
603 Self { avx2: self.avx2.not() }
604 } else {
605 Self {
606 a : self.a.not(),
607 b : self.b.not(),
608 }
609 }
610 }
611 }
612}