[go: up one dir, main page]

wide/
u64x4_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct u64x4 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct u64x4 { pub(crate) a : u64x2, pub(crate) b : u64x2 }
12  }
13}
14
15int_uint_consts!(u64, 4, u64x4, 256);
16
17unsafe impl Zeroable for u64x4 {}
18unsafe impl Pod for u64x4 {}
19
20impl AlignTo for u64x4 {
21  type Elem = u64;
22}
23
24impl Add for u64x4 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx2")] {
30        Self { avx2: add_i64_m256i(self.avx2, rhs.avx2) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for u64x4 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx2")] {
47        Self { avx2: sub_i64_m256i(self.avx2, rhs.avx2) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Mul for u64x4 {
59  type Output = Self;
60  #[inline]
61  fn mul(self, rhs: Self) -> Self::Output {
62    pick! {
63      if #[cfg(target_feature="avx2")] {
64        let arr1: [i64; 4] = cast(self);
65        let arr2: [i64; 4] = cast(rhs);
66        cast([
67          arr1[0].wrapping_mul(arr2[0]),
68          arr1[1].wrapping_mul(arr2[1]),
69          arr1[2].wrapping_mul(arr2[2]),
70          arr1[3].wrapping_mul(arr2[3]),
71        ])
72      } else {
73        Self { a: self.a.mul(rhs.a), b: self.b.mul(rhs.b) }
74      }
75    }
76  }
77}
78
79impl Add<u64> for u64x4 {
80  type Output = Self;
81  #[inline]
82  fn add(self, rhs: u64) -> Self::Output {
83    self.add(Self::splat(rhs))
84  }
85}
86
87impl Sub<u64> for u64x4 {
88  type Output = Self;
89  #[inline]
90  fn sub(self, rhs: u64) -> Self::Output {
91    self.sub(Self::splat(rhs))
92  }
93}
94
95impl Mul<u64> for u64x4 {
96  type Output = Self;
97  #[inline]
98  fn mul(self, rhs: u64) -> Self::Output {
99    self.mul(Self::splat(rhs))
100  }
101}
102
103impl Add<u64x4> for u64 {
104  type Output = u64x4;
105  #[inline]
106  fn add(self, rhs: u64x4) -> Self::Output {
107    u64x4::splat(self).add(rhs)
108  }
109}
110
111impl Sub<u64x4> for u64 {
112  type Output = u64x4;
113  #[inline]
114  fn sub(self, rhs: u64x4) -> Self::Output {
115    u64x4::splat(self).sub(rhs)
116  }
117}
118
119impl Mul<u64x4> for u64 {
120  type Output = u64x4;
121  #[inline]
122  fn mul(self, rhs: u64x4) -> Self::Output {
123    u64x4::splat(self).mul(rhs)
124  }
125}
126
127impl BitAnd for u64x4 {
128  type Output = Self;
129  #[inline]
130  fn bitand(self, rhs: Self) -> Self::Output {
131    pick! {
132      if #[cfg(target_feature="avx2")] {
133        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
134      } else {
135        Self {
136          a : self.a.bitand(rhs.a),
137          b : self.b.bitand(rhs.b),
138        }
139      }
140    }
141  }
142}
143
144impl BitOr for u64x4 {
145  type Output = Self;
146  #[inline]
147  fn bitor(self, rhs: Self) -> Self::Output {
148    pick! {
149    if #[cfg(target_feature="avx2")] {
150        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
151      } else {
152        Self {
153          a : self.a.bitor(rhs.a),
154          b : self.b.bitor(rhs.b),
155        }
156      }
157    }
158  }
159}
160
161impl BitXor for u64x4 {
162  type Output = Self;
163  #[inline]
164  fn bitxor(self, rhs: Self) -> Self::Output {
165    pick! {
166      if #[cfg(target_feature="avx2")] {
167        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
168      } else {
169        Self {
170          a : self.a.bitxor(rhs.a),
171          b : self.b.bitxor(rhs.b),
172        }
173      }
174    }
175  }
176}
177
178/// Shifts lanes by the corresponding lane.
179///
180/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
181/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
182/// of the type. (same as `wrapping_shl`)
183impl Shl for u64x4 {
184  type Output = Self;
185
186  #[inline]
187  fn shl(self, rhs: Self) -> Self::Output {
188    pick! {
189      if #[cfg(target_feature="avx2")] {
190        // mask the shift count to 63 to have same behavior on all platforms
191        let shift_by = rhs & Self::splat(63);
192        Self { avx2: shl_each_u64_m256i(self.avx2, shift_by.avx2) }
193      } else {
194        Self {
195          a : self.a.shl(rhs.a),
196          b : self.b.shl(rhs.b),
197        }
198      }
199    }
200  }
201}
202
203macro_rules! impl_shl_t_for_u64x4 {
204  ($($shift_type:ty),+ $(,)?) => {
205    $(impl Shl<$shift_type> for u64x4 {
206      type Output = Self;
207      /// Shifts all lanes by the value given.
208      #[inline]
209      fn shl(self, rhs: $shift_type) -> Self::Output {
210        pick! {
211          if #[cfg(target_feature="avx2")] {
212            let shift = cast([rhs as u64, 0]);
213            Self { avx2: shl_all_u64_m256i(self.avx2, shift) }
214          } else {
215            Self {
216              a : self.a.shl(rhs),
217              b : self.b.shl(rhs),
218            }
219          }
220        }
221      }
222    })+
223  };
224}
225impl_shl_t_for_u64x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
226
227/// Shifts lanes by the corresponding lane.
228///
229/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
230/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
231/// of the type. (same as `wrapping_shr`)
232impl Shr for u64x4 {
233  type Output = Self;
234
235  #[inline]
236  fn shr(self, rhs: Self) -> Self::Output {
237    pick! {
238      if #[cfg(target_feature="avx2")] {
239        // mask the shift count to 63 to have same behavior on all platforms
240        let shift_by = rhs & Self::splat(63);
241        Self { avx2: shr_each_u64_m256i(self.avx2, shift_by.avx2) }
242      } else {
243        Self {
244          a : self.a.shr(rhs.a),
245          b : self.b.shr(rhs.b),
246        }
247      }
248    }
249  }
250}
251
252macro_rules! impl_shr_t_for_u64x4 {
253  ($($shift_type:ty),+ $(,)?) => {
254    $(impl Shr<$shift_type> for u64x4 {
255      type Output = Self;
256      /// Shifts all lanes by the value given.
257      #[inline]
258      fn shr(self, rhs: $shift_type) -> Self::Output {
259        pick! {
260          if #[cfg(target_feature="avx2")] {
261            let shift = cast([rhs as u64, 0]);
262            Self { avx2: shr_all_u64_m256i(self.avx2, shift) }
263          } else {
264            Self {
265              a : self.a.shr(rhs),
266              b : self.b.shr(rhs),
267            }
268          }
269        }
270      }
271    })+
272  };
273}
274impl_shr_t_for_u64x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
275
276impl CmpEq for u64x4 {
277  type Output = Self;
278  #[inline]
279  fn simd_eq(self, rhs: Self) -> Self::Output {
280    Self::simd_eq(self, rhs)
281  }
282}
283
284impl CmpGt for u64x4 {
285  type Output = Self;
286  #[inline]
287  fn simd_gt(self, rhs: Self) -> Self::Output {
288    Self::simd_gt(self, rhs)
289  }
290}
291
292impl CmpLt for u64x4 {
293  type Output = Self;
294  #[inline]
295  fn simd_lt(self, rhs: Self) -> Self::Output {
296    // no lt, so just call gt with swapped args
297    Self::simd_gt(rhs, self)
298  }
299}
300
301impl u64x4 {
302  #[inline]
303  #[must_use]
304  pub const fn new(array: [u64; 4]) -> Self {
305    unsafe { core::mem::transmute(array) }
306  }
307  #[inline]
308  #[must_use]
309  pub fn simd_eq(self, rhs: Self) -> Self {
310    pick! {
311      if #[cfg(target_feature="avx2")] {
312        Self { avx2: cmp_eq_mask_i64_m256i(self.avx2, rhs.avx2) }
313      } else {
314        Self {
315          a : self.a.simd_eq(rhs.a),
316          b : self.b.simd_eq(rhs.b),
317        }
318      }
319    }
320  }
321  #[inline]
322  #[must_use]
323  pub fn simd_gt(self, rhs: Self) -> Self {
324    pick! {
325      if #[cfg(target_feature="avx2")] {
326        // no unsigned gt than so inverting the high bit will get the correct result
327        let highbit = u64x4::splat(1 << 63);
328        Self { avx2: cmp_gt_mask_i64_m256i((self ^ highbit).avx2, (rhs ^ highbit).avx2) }
329      } else {
330        Self {
331          a : self.a.simd_gt(rhs.a),
332          b : self.b.simd_gt(rhs.b),
333        }
334      }
335    }
336  }
337
338  #[inline]
339  #[must_use]
340  pub fn simd_lt(self, rhs: Self) -> Self {
341    // lt is just gt the other way around
342    rhs.simd_gt(self)
343  }
344
345  #[inline]
346  #[must_use]
347  pub fn blend(self, t: Self, f: Self) -> Self {
348    pick! {
349      if #[cfg(target_feature="avx2")] {
350        Self { avx2: blend_varying_i8_m256i(f.avx2,t.avx2,self.avx2) }
351      } else {
352        Self {
353          a : self.a.blend(t.a, f.a),
354          b : self.b.blend(t.b, f.b),
355        }
356      }
357    }
358  }
359  
360  #[inline]
361  #[must_use]
362  pub fn to_bitmask(self) -> u32 {
363    i64x4::to_bitmask(cast(self))
364  }
365
366  #[inline]
367  pub fn to_array(self) -> [u64; 4] {
368    cast(self)
369  }
370
371  #[inline]
372  pub fn as_array(&self) -> &[u64; 4] {
373    cast_ref(self)
374  }
375
376  #[inline]
377  pub fn as_mut_array(&mut self) -> &mut [u64; 4] {
378    cast_mut(self)
379  }
380
381  #[inline]
382  #[must_use]
383  pub fn min(self, rhs: Self) -> Self {
384    self.simd_lt(rhs).blend(self, rhs)
385  }
386
387  #[inline]
388  #[must_use]
389  pub fn max(self, rhs: Self) -> Self {
390    self.simd_gt(rhs).blend(self, rhs)
391  }
392
393  #[inline]
394  #[must_use]
395  pub fn mul_keep_high(self, rhs: Self) -> Self {
396    pick! {
397      if #[cfg(target_feature="avx2")] {
398        let arr1: [u64; 4] = cast(self);
399        let arr2: [u64; 4] = cast(rhs);
400        cast([
401          (arr1[0] as u128 * arr2[0] as u128 >> 64) as u64,
402          (arr1[1] as u128 * arr2[1] as u128 >> 64) as u64,
403          (arr1[2] as u128 * arr2[2] as u128 >> 64) as u64,
404          (arr1[3] as u128 * arr2[3] as u128 >> 64) as u64,
405        ])
406      } else {
407        Self {
408          a: self.a.mul_keep_high(rhs.a),
409          b: self.b.mul_keep_high(rhs.b),
410        }
411      }
412    }
413  }
414}
415
416impl Not for u64x4 {
417  type Output = Self;
418  #[inline]
419  fn not(self) -> Self {
420    pick! {
421      if #[cfg(target_feature="avx2")] {
422        Self { avx2: self.avx2.not()  }
423      } else {
424        Self {
425          a : self.a.not(),
426          b : self.b.not(),
427        }
428      }
429    }
430  }
431}