[go: up one dir, main page]

wide/
u32x8_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct u32x8 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct u32x8 { pub(crate) a : u32x4, pub(crate) b : u32x4 }
12  }
13}
14
15int_uint_consts!(u32, 8, u32x8, 256);
16
17unsafe impl Zeroable for u32x8 {}
18unsafe impl Pod for u32x8 {}
19
20impl AlignTo for u32x8 {
21  type Elem = u32;
22}
23
24impl Add for u32x8 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx2")] {
30        Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for u32x8 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx2")] {
47        Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Add<u32> for u32x8 {
59  type Output = Self;
60  /// Adds a scalar `u32` to each element of the vector.
61  ///
62  /// # Examples
63  /// ```
64  /// # use wide::u32x8;
65  /// let vec = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
66  /// let result = vec + 10;
67  /// assert_eq!(result.to_array(), [11, 12, 13, 14, 15, 16, 17, 18]);
68  /// ```
69  #[inline]
70  fn add(self, rhs: u32) -> Self::Output {
71    self + Self::splat(rhs)
72  }
73}
74
75impl Sub<u32> for u32x8 {
76  type Output = Self;
77  /// Subtracts a scalar `u32` from each element of the vector.
78  ///
79  /// # Examples
80  /// ```
81  /// # use wide::u32x8;
82  /// let vec = u32x8::from([10, 20, 30, 40, 50, 60, 70, 80]);
83  /// let result = vec - 5;
84  /// assert_eq!(result.to_array(), [5, 15, 25, 35, 45, 55, 65, 75]);
85  /// ```
86  #[inline]
87  fn sub(self, rhs: u32) -> Self::Output {
88    self - Self::splat(rhs)
89  }
90}
91
92impl Mul<u32> for u32x8 {
93  type Output = Self;
94  /// Multiplies each element of the vector by a scalar `u32`.
95  ///
96  /// # Examples
97  /// ```
98  /// # use wide::u32x8;
99  /// let vec = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
100  /// let result = vec * 3;
101  /// assert_eq!(result.to_array(), [3, 6, 9, 12, 15, 18, 21, 24]);
102  /// ```
103  #[inline]
104  fn mul(self, rhs: u32) -> Self::Output {
105    self * Self::splat(rhs)
106  }
107}
108
109impl Mul for u32x8 {
110  type Output = Self;
111  #[inline]
112  fn mul(self, rhs: Self) -> Self::Output {
113    pick! {
114      if #[cfg(target_feature="avx2")] {
115        Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
116      } else {
117        Self {
118          a : self.a.mul(rhs.a),
119          b : self.b.mul(rhs.b),
120        }
121      }
122    }
123  }
124}
125
126impl BitAnd for u32x8 {
127  type Output = Self;
128  #[inline]
129  fn bitand(self, rhs: Self) -> Self::Output {
130    pick! {
131      if #[cfg(target_feature="avx2")] {
132        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
133      } else {
134        Self {
135          a : self.a.bitand(rhs.a),
136          b : self.b.bitand(rhs.b),
137        }
138      }
139    }
140  }
141}
142
143impl BitOr for u32x8 {
144  type Output = Self;
145  #[inline]
146  fn bitor(self, rhs: Self) -> Self::Output {
147    pick! {
148      if #[cfg(target_feature="avx2")] {
149        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
150      } else {
151        Self {
152          a : self.a.bitor(rhs.a),
153          b : self.b.bitor(rhs.b),
154        }
155      }
156    }
157  }
158}
159
160impl BitXor for u32x8 {
161  type Output = Self;
162  #[inline]
163  fn bitxor(self, rhs: Self) -> Self::Output {
164    pick! {
165      if #[cfg(target_feature="avx2")] {
166        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
167      } else {
168        Self {
169          a : self.a.bitxor(rhs.a),
170          b : self.b.bitxor(rhs.b),
171        }
172      }
173    }
174  }
175}
176
177impl From<u16x8> for u32x8 {
178  /// widens and zero extends to u32x8
179  #[inline]
180  fn from(v: u16x8) -> Self {
181    pick! {
182      if #[cfg(target_feature="avx2")] {
183        Self { avx2:convert_to_i32_m256i_from_u16_m128i(v.sse) }
184      } else if #[cfg(target_feature="sse2")] {
185        Self {
186          a: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
187          b: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
188        }
189      } else {
190        u32x8::new([
191          u32::from(v.as_array()[0]),
192          u32::from(v.as_array()[1]),
193          u32::from(v.as_array()[2]),
194          u32::from(v.as_array()[3]),
195          u32::from(v.as_array()[4]),
196          u32::from(v.as_array()[5]),
197          u32::from(v.as_array()[6]),
198          u32::from(v.as_array()[7]),
199        ])
200      }
201    }
202  }
203}
204
205macro_rules! impl_shl_t_for_u32x8 {
206  ($($shift_type:ty),+ $(,)?) => {
207    $(impl Shl<$shift_type> for u32x8 {
208      type Output = Self;
209      /// Shifts all lanes by the value given.
210      #[inline]
211      fn shl(self, rhs: $shift_type) -> Self::Output {
212        pick! {
213          if #[cfg(target_feature="avx2")] {
214            let shift = cast([rhs as u64, 0]);
215            Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
216          } else {
217            Self {
218              a : self.a.shl(rhs),
219              b : self.b.shl(rhs),
220            }
221          }
222        }
223      }
224    })+
225  };
226}
227impl_shl_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
228
229macro_rules! impl_shr_t_for_u32x8 {
230  ($($shift_type:ty),+ $(,)?) => {
231    $(impl Shr<$shift_type> for u32x8 {
232      type Output = Self;
233      /// Shifts all lanes by the value given.
234      #[inline]
235      fn shr(self, rhs: $shift_type) -> Self::Output {
236        pick! {
237          if #[cfg(target_feature="avx2")] {
238            let shift = cast([rhs as u64, 0]);
239            Self { avx2: shr_all_u32_m256i(self.avx2, shift) }
240          } else {
241            Self {
242              a : self.a.shr(rhs),
243              b : self.b.shr(rhs),
244            }
245          }
246        }
247      }
248    })+
249  };
250}
251
252impl_shr_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
253
254/// Shifts lanes by the corresponding lane.
255///
256/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
257/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
258/// of the type. (same as `wrapping_shr`)
259impl Shr<u32x8> for u32x8 {
260  type Output = Self;
261
262  #[inline]
263  fn shr(self, rhs: u32x8) -> Self::Output {
264    pick! {
265      if #[cfg(target_feature="avx2")] {
266        // ensure same behavior as scalar wrapping_shr
267        let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
268        Self { avx2: shr_each_u32_m256i(self.avx2, shift_by ) }
269      } else {
270        Self {
271          a : self.a.shr(rhs.a),
272          b : self.b.shr(rhs.b),
273        }
274      }
275    }
276  }
277}
278
279/// Shifts lanes by the corresponding lane.
280///
281/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
282/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
283/// of the type. (same as `wrapping_shl`)
284impl Shl<u32x8> for u32x8 {
285  type Output = Self;
286
287  #[inline]
288  fn shl(self, rhs: u32x8) -> Self::Output {
289    pick! {
290      if #[cfg(target_feature="avx2")] {
291        // ensure same behavior as scalar wrapping_shl
292        let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
293        Self { avx2: shl_each_u32_m256i(self.avx2, shift_by) }
294      } else {
295        Self {
296          a : self.a.shl(rhs.a),
297          b : self.b.shl(rhs.b),
298        }
299      }
300    }
301  }
302}
303
304impl CmpEq for u32x8 {
305  type Output = Self;
306  /// Element-wise equality comparison.
307  ///
308  /// Returns a mask where each element is all-ones (0xFFFFFFFF) if the
309  /// corresponding elements are equal, or all-zeros (0x00000000) otherwise.
310  ///
311  /// # Examples
312  /// ```
313  /// # use wide::{u32x8, CmpEq};
314  /// let a = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
315  /// let b = u32x8::from([1, 0, 3, 0, 5, 0, 7, 0]);
316  /// let mask = a.simd_eq(b);
317  /// let expected = [0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0];
318  /// assert_eq!(mask.to_array(), expected);
319  /// ```
320  #[inline]
321  fn simd_eq(self, rhs: Self) -> Self::Output {
322    pick! {
323      if #[cfg(target_feature="avx2")] {
324        Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2 ) }
325      } else {
326        Self {
327          a : self.a.simd_eq(rhs.a),
328          b : self.b.simd_eq(rhs.b),
329        }
330      }
331    }
332  }
333}
334
335impl CmpGt for u32x8 {
336  type Output = Self;
337  /// Element-wise greater-than comparison.
338  ///
339  /// Returns a mask where each element is all-ones (0xFFFFFFFF) if the
340  /// corresponding element in `self` is greater than the one in `rhs`,
341  /// or all-zeros (0x00000000) otherwise.
342  ///
343  /// # Examples
344  /// ```
345  /// # use wide::{u32x8, CmpGt};
346  /// let a = u32x8::from([5, 4, 3, 2, 10, 9, 8, 7]);
347  /// let b = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
348  /// let mask = a.simd_gt(b);
349  /// let expected =
350  ///   [0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0];
351  /// assert_eq!(mask.to_array(), expected);
352  /// ```
353  #[inline]
354  fn simd_gt(self, rhs: Self) -> Self::Output {
355    pick! {
356      if #[cfg(target_feature="avx2")] {
357        // no unsigned gt than so inverting the high bit will get the correct result
358        let highbit = u32x8::splat(1 << 31);
359        Self { avx2: cmp_gt_mask_i32_m256i((self ^ highbit).avx2, (rhs ^ highbit).avx2 ) }
360      } else {
361        Self {
362          a : self.a.simd_gt(rhs.a),
363          b : self.b.simd_gt(rhs.b),
364        }
365      }
366    }
367  }
368}
369
370impl CmpLt for u32x8 {
371  type Output = Self;
372  /// Element-wise less-than comparison.
373  ///
374  /// Returns a mask where each element is all-ones (0xFFFFFFFF) if the
375  /// corresponding element in `self` is less than the one in `rhs`,
376  /// or all-zeros (0x00000000) otherwise.
377  ///
378  /// # Examples
379  /// ```
380  /// # use wide::{u32x8, CmpLt};
381  /// let a = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
382  /// let b = u32x8::from([5, 4, 3, 2, 10, 9, 8, 7]);
383  /// let mask = a.simd_lt(b);
384  /// let expected =
385  ///   [0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0];
386  /// assert_eq!(mask.to_array(), expected);
387  /// ```
388  #[inline]
389  fn simd_lt(self, rhs: Self) -> Self::Output {
390    // lt is just gt the other way around
391    rhs.simd_gt(self)
392  }
393}
394
395impl CmpNe for u32x8 {
396  type Output = Self;
397  /// Element-wise not-equal comparison.
398  ///
399  /// Returns a mask where each element is all-ones (0xFFFFFFFF) if the
400  /// corresponding elements are not equal, or all-zeros (0x00000000) otherwise.
401  ///
402  /// # Examples
403  /// ```
404  /// # use wide::{u32x8, CmpNe};
405  /// let a = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
406  /// let b = u32x8::from([1, 0, 3, 0, 5, 0, 7, 0]);
407  /// let mask = a.simd_ne(b);
408  /// let expected = [0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF];
409  /// assert_eq!(mask.to_array(), expected);
410  /// ```
411  #[inline]
412  fn simd_ne(self, rhs: Self) -> Self::Output {
413    !self.simd_eq(rhs)
414  }
415}
416
417impl CmpGe for u32x8 {
418  type Output = Self;
419  /// Element-wise greater-than-or-equal comparison.
420  ///
421  /// Returns a mask where each element is all-ones (0xFFFFFFFF) if the
422  /// corresponding element in `self` is greater than or equal to the one in
423  /// `rhs`, or all-zeros (0x00000000) otherwise.
424  ///
425  /// # Examples
426  /// ```
427  /// # use wide::{u32x8, CmpGe};
428  /// let a = u32x8::from([5, 4, 3, 2, 10, 9, 8, 7]);
429  /// let b = u32x8::from([5, 2, 3, 4, 5, 6, 8, 8]);
430  /// let mask = a.simd_ge(b);
431  /// let expected = [
432  ///   0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
433  ///   0,
434  /// ];
435  /// assert_eq!(mask.to_array(), expected);
436  /// ```
437  #[inline]
438  fn simd_ge(self, rhs: Self) -> Self::Output {
439    self.simd_eq(rhs) | self.simd_gt(rhs)
440  }
441}
442
443impl CmpLe for u32x8 {
444  type Output = Self;
445  /// Element-wise less-than-or-equal comparison.
446  ///
447  /// Returns a mask where each element is all-ones (0xFFFFFFFF) if the
448  /// corresponding element in `self` is less than or equal to the one in `rhs`,
449  /// or all-zeros (0x00000000) otherwise.
450  ///
451  /// # Examples
452  /// ```
453  /// # use wide::{u32x8, CmpLe};
454  /// let a = u32x8::from([1, 2, 3, 4, 5, 6, 7, 8]);
455  /// let b = u32x8::from([1, 4, 3, 2, 10, 9, 7, 7]);
456  /// let mask = a.simd_le(b);
457  /// let expected = [
458  ///   0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
459  ///   0,
460  /// ];
461  /// assert_eq!(mask.to_array(), expected);
462  /// ```
463  #[inline]
464  fn simd_le(self, rhs: Self) -> Self::Output {
465    self.simd_eq(rhs) | self.simd_lt(rhs)
466  }
467}
468
469impl u32x8 {
470  #[inline]
471  #[must_use]
472  pub const fn new(array: [u32; 8]) -> Self {
473    unsafe { core::mem::transmute(array) }
474  }
475
476  /// Multiplies 32x32 bit to 64 bit and then only keeps the high 32 bits of the
477  /// result. Useful for implementing divide constant value (see `t_usefulness`
478  /// example)
479  #[inline]
480  #[must_use]
481  pub fn mul_keep_high(self, rhs: u32x8) -> u32x8 {
482    pick! {
483      if #[cfg(target_feature="avx2")] {
484        let a : [u32;8]= cast(self);
485        let b : [u32;8]= cast(rhs);
486
487        // let the compiler shuffle the values around, it does the right thing
488        let r1 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[0], 0, a[1], 0, a[2], 0, a[3], 0]), cast([b[0], 0, b[1], 0, b[2], 0, b[3], 0])));
489        let r2 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[4], 0, a[5], 0, a[6], 0, a[7], 0]), cast([b[4], 0, b[5], 0, b[6], 0, b[7], 0])));
490
491        cast([r1[1], r1[3], r1[5], r1[7], r2[1], r2[3], r2[5], r2[7]])
492      } else {
493        Self {
494          a : self.a.mul_keep_high(rhs.a),
495          b : self.b.mul_keep_high(rhs.b),
496        }
497      }
498    }
499  }
500
501  #[inline]
502  #[must_use]
503  pub fn blend(self, t: Self, f: Self) -> Self {
504    pick! {
505      if #[cfg(target_feature="avx2")] {
506        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
507      } else {
508        Self {
509          a : self.a.blend(t.a, f.a),
510          b : self.b.blend(t.b, f.b),
511        }
512      }
513    }
514  }
515
516  #[inline]
517  #[must_use]
518  pub fn max(self, rhs: Self) -> Self {
519    pick! {
520      if #[cfg(target_feature="avx2")] {
521        Self { avx2: max_u32_m256i(self.avx2, rhs.avx2 ) }
522      } else {
523        Self {
524          a : self.a.max(rhs.a),
525          b : self.b.max(rhs.b),
526        }
527      }
528    }
529  }
530  #[inline]
531  #[must_use]
532  pub fn min(self, rhs: Self) -> Self {
533    pick! {
534      if #[cfg(target_feature="avx2")] {
535        Self { avx2: min_u32_m256i(self.avx2, rhs.avx2 ) }
536      } else {
537        Self {
538          a : self.a.min(rhs.a),
539          b : self.b.min(rhs.b),
540        }
541      }
542    }
543  }
544  
545  #[inline]
546  #[must_use]
547  pub fn to_bitmask(self) -> u32 {
548    i32x8::to_bitmask(cast(self))
549  }
550
551  #[inline]
552  #[must_use]
553  pub fn any(self) -> bool {
554    pick! {
555      if #[cfg(target_feature="avx2")] {
556        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) != 0
557      } else {
558        (self.a | self.b).any()
559      }
560    }
561  }
562
563  #[inline]
564  #[must_use]
565  pub fn all(self) -> bool {
566    pick! {
567      if #[cfg(target_feature="avx2")] {
568        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) == 0b10001000100010001000100010001000
569      } else {
570        (self.a & self.b).all()
571      }
572    }
573  }
574
575  #[inline]
576  #[must_use]
577  pub fn none(self) -> bool {
578    !self.any()
579  }
580
581  #[inline]
582  pub fn to_array(self) -> [u32; 8] {
583    cast(self)
584  }
585
586  #[inline]
587  pub fn as_array(&self) -> &[u32; 8] {
588    cast_ref(self)
589  }
590
591  #[inline]
592  pub fn as_mut_array(&mut self) -> &mut [u32; 8] {
593    cast_mut(self)
594  }
595}
596
597impl Not for u32x8 {
598  type Output = Self;
599  #[inline]
600  fn not(self) -> Self {
601    pick! {
602      if #[cfg(target_feature="avx2")] {
603        Self { avx2: self.avx2.not()  }
604      } else {
605        Self {
606          a : self.a.not(),
607          b : self.b.not(),
608        }
609      }
610    }
611  }
612}