[go: up one dir, main page]

wide/
i64x4_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct i64x4 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct i64x4 { pub(crate) a : i64x2, pub(crate) b : i64x2 }
12  }
13}
14
15int_uint_consts!(i64, 4, i64x4, 256);
16
17unsafe impl Zeroable for i64x4 {}
18unsafe impl Pod for i64x4 {}
19
20impl AlignTo for i64x4 {
21  type Elem = i64;
22}
23
24impl Add for i64x4 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx2")] {
30        Self { avx2: add_i64_m256i(self.avx2, rhs.avx2) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for i64x4 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx2")] {
47        Self { avx2: sub_i64_m256i(self.avx2, rhs.avx2) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Mul for i64x4 {
59  type Output = Self;
60  #[inline]
61  fn mul(self, rhs: Self) -> Self::Output {
62    pick! {
63      if #[cfg(target_feature="avx2")] {
64        let arr1: [i64; 4] = cast(self);
65        let arr2: [i64; 4] = cast(rhs);
66        cast([
67          arr1[0].wrapping_mul(arr2[0]),
68          arr1[1].wrapping_mul(arr2[1]),
69          arr1[2].wrapping_mul(arr2[2]),
70          arr1[3].wrapping_mul(arr2[3]),
71        ])
72      } else {
73        Self { a: self.a.mul(rhs.a), b: self.b.mul(rhs.b) }
74      }
75    }
76  }
77}
78
79impl Add<i64> for i64x4 {
80  type Output = Self;
81  #[inline]
82  fn add(self, rhs: i64) -> Self::Output {
83    self.add(Self::splat(rhs))
84  }
85}
86
87impl Sub<i64> for i64x4 {
88  type Output = Self;
89  #[inline]
90  fn sub(self, rhs: i64) -> Self::Output {
91    self.sub(Self::splat(rhs))
92  }
93}
94
95impl Mul<i64> for i64x4 {
96  type Output = Self;
97  #[inline]
98  fn mul(self, rhs: i64) -> Self::Output {
99    self.mul(Self::splat(rhs))
100  }
101}
102
103impl Add<i64x4> for i64 {
104  type Output = i64x4;
105  #[inline]
106  fn add(self, rhs: i64x4) -> Self::Output {
107    i64x4::splat(self).add(rhs)
108  }
109}
110
111impl Sub<i64x4> for i64 {
112  type Output = i64x4;
113  #[inline]
114  fn sub(self, rhs: i64x4) -> Self::Output {
115    i64x4::splat(self).sub(rhs)
116  }
117}
118
119impl Mul<i64x4> for i64 {
120  type Output = i64x4;
121  #[inline]
122  fn mul(self, rhs: i64x4) -> Self::Output {
123    i64x4::splat(self).mul(rhs)
124  }
125}
126
127impl BitAnd for i64x4 {
128  type Output = Self;
129  #[inline]
130  fn bitand(self, rhs: Self) -> Self::Output {
131    pick! {
132      if #[cfg(target_feature="avx2")] {
133        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
134      } else {
135        Self {
136          a : self.a.bitand(rhs.a),
137          b : self.b.bitand(rhs.b),
138        }
139      }
140    }
141  }
142}
143
144impl BitOr for i64x4 {
145  type Output = Self;
146  #[inline]
147  fn bitor(self, rhs: Self) -> Self::Output {
148    pick! {
149    if #[cfg(target_feature="avx2")] {
150            Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
151      } else {
152        Self {
153          a : self.a.bitor(rhs.a),
154          b : self.b.bitor(rhs.b),
155        }
156      }
157    }
158  }
159}
160
161impl BitXor for i64x4 {
162  type Output = Self;
163  #[inline]
164  fn bitxor(self, rhs: Self) -> Self::Output {
165    pick! {
166      if #[cfg(target_feature="avx2")] {
167        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
168      } else {
169        Self {
170          a : self.a.bitxor(rhs.a),
171          b : self.b.bitxor(rhs.b),
172        }
173      }
174    }
175  }
176}
177
178/// Shifts lanes by the corresponding lane.
179///
180/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
181/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
182/// of the type. (same as `wrapping_shl`)
183impl Shl for i64x4 {
184  type Output = Self;
185
186  #[inline]
187  fn shl(self, rhs: Self) -> Self::Output {
188    pick! {
189      if #[cfg(target_feature="avx2")] {
190        // mask the shift count to 63 to have same behavior on all platforms
191        let shift_by = rhs & Self::splat(63);
192        Self { avx2: shl_each_u64_m256i(self.avx2, shift_by.avx2) }
193      } else {
194        Self {
195          a : self.a.shl(rhs.a),
196          b : self.b.shl(rhs.b),
197        }
198      }
199    }
200  }
201}
202
203macro_rules! impl_shl_t_for_i64x4 {
204  ($($shift_type:ty),+ $(,)?) => {
205    $(impl Shl<$shift_type> for i64x4 {
206      type Output = Self;
207      /// Shifts all lanes by the value given.
208      #[inline]
209      fn shl(self, rhs: $shift_type) -> Self::Output {
210        pick! {
211          if #[cfg(target_feature="avx2")] {
212            let shift = cast([rhs as u64, 0]);
213            Self { avx2: shl_all_u64_m256i(self.avx2, shift) }
214          } else {
215            Self {
216              a : self.a.shl(rhs),
217              b : self.b.shl(rhs),
218            }
219          }
220        }
221      }
222    })+
223  };
224}
225impl_shl_t_for_i64x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
226
227/// Shifts lanes by the corresponding lane.
228///
229/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
230/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
231/// of the type. (same as `wrapping_shr`)
232impl Shr for i64x4 {
233  type Output = Self;
234
235  #[inline]
236  fn shr(self, rhs: Self) -> Self::Output {
237    pick! {
238      if #[cfg(target_feature="avx2")] {
239        let arr: [i64; 4] = cast(self);
240        let rhs: [i64; 4] = cast(rhs);
241        cast([
242          arr[0].wrapping_shr(rhs[0] as u32),
243          arr[1].wrapping_shr(rhs[1] as u32),
244          arr[2].wrapping_shr(rhs[2] as u32),
245          arr[3].wrapping_shr(rhs[3] as u32),
246        ])
247      } else {
248        Self {
249          a : self.a.shr(rhs.a),
250          b : self.b.shr(rhs.b),
251        }
252      }
253    }
254  }
255}
256
257macro_rules! impl_shr_t_for_i64x4 {
258  ($($shift_type:ty),+ $(,)?) => {
259    $(impl Shr<$shift_type> for i64x4 {
260      type Output = Self;
261      /// Shifts all lanes by the value given.
262      #[inline]
263      fn shr(self, rhs: $shift_type) -> Self::Output {
264          // there is no signed right shift in AVX2
265          let [a,b] : [i64x2; 2] = cast(self);
266          cast([a.shr(rhs), b.shr(rhs)])
267      }
268    })+
269  };
270}
271impl_shr_t_for_i64x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
272
273impl CmpEq for i64x4 {
274  type Output = Self;
275  #[inline]
276  fn simd_eq(self, rhs: Self) -> Self::Output {
277    pick! {
278      if #[cfg(target_feature="avx2")] {
279        Self { avx2: cmp_eq_mask_i64_m256i(self.avx2, rhs.avx2) }
280      } else {
281        Self {
282          a : self.a.simd_eq(rhs.a),
283          b : self.b.simd_eq(rhs.b),
284        }
285      }
286    }
287  }
288}
289
290impl CmpGt for i64x4 {
291  type Output = Self;
292  #[inline]
293  fn simd_gt(self, rhs: Self) -> Self::Output {
294    pick! {
295      if #[cfg(target_feature="avx2")] {
296        Self { avx2: cmp_gt_mask_i64_m256i(self.avx2, rhs.avx2) }
297      } else {
298        Self {
299          a : self.a.simd_gt(rhs.a),
300          b : self.b.simd_gt(rhs.b),
301        }
302      }
303    }
304  }
305}
306
307impl CmpLt for i64x4 {
308  type Output = Self;
309  #[inline]
310  fn simd_lt(self, rhs: Self) -> Self::Output {
311    pick! {
312      if #[cfg(target_feature="avx2")] {
313        Self { avx2: !(cmp_gt_mask_i64_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i64_m256i(self.avx2, rhs.avx2)) }
314      } else {
315        Self {
316          a : self.a.simd_lt(rhs.a),
317          b : self.b.simd_lt(rhs.b),
318        }
319      }
320    }
321  }
322}
323
324impl i64x4 {
325  #[inline]
326  #[must_use]
327  pub const fn new(array: [i64; 4]) -> Self {
328    unsafe { core::mem::transmute(array) }
329  }
330  #[inline]
331  #[must_use]
332  pub fn blend(self, t: Self, f: Self) -> Self {
333    pick! {
334      if #[cfg(target_feature="avx2")] {
335        Self { avx2: blend_varying_i8_m256i(f.avx2,t.avx2,self.avx2) }
336      } else {
337        Self {
338          a : self.a.blend(t.a, f.a),
339          b : self.b.blend(t.b, f.b),
340        }
341      }
342    }
343  }
344
345  #[inline]
346  #[must_use]
347  pub fn abs(self) -> Self {
348    pick! {
349      if #[cfg(target_feature="avx2")] {
350        // avx x86 doesn't have this builtin
351        let arr: [i64; 4] = cast(self);
352        cast(
353          [
354            arr[0].wrapping_abs(),
355            arr[1].wrapping_abs(),
356            arr[2].wrapping_abs(),
357            arr[3].wrapping_abs(),
358          ])
359      } else {
360        Self {
361          a : self.a.abs(),
362          b : self.b.abs(),
363        }
364      }
365    }
366  }
367
368  #[inline]
369  #[must_use]
370  pub fn unsigned_abs(self) -> u64x4 {
371    pick! {
372      if #[cfg(target_feature="avx2")] {
373        // avx x86 doesn't have this builtin
374        let arr: [i64; 4] = cast(self);
375        cast(
376          [
377            arr[0].unsigned_abs(),
378            arr[1].unsigned_abs(),
379            arr[2].unsigned_abs(),
380            arr[3].unsigned_abs(),
381          ])
382      } else {
383        u64x4 {
384          a : self.a.unsigned_abs(),
385          b : self.b.unsigned_abs(),
386        }
387      }
388    }
389  }
390
391  #[inline]
392  #[must_use]
393  pub fn round_float(self) -> f64x4 {
394    let arr: [i64; 4] = cast(self);
395    cast([arr[0] as f64, arr[1] as f64, arr[2] as f64, arr[3] as f64])
396  }
397
398  /// returns the bit mask for each high bit set in the vector with the lowest
399  /// lane being the lowest bit
400  #[inline]
401  #[must_use]
402  pub fn to_bitmask(self) -> u32 {
403    pick! {
404      if #[cfg(target_feature="avx2")] {
405        // use f64 move_mask since it is the same size as i64
406        move_mask_m256d(cast(self.avx2)) as u32
407      } else {
408        self.a.to_bitmask() | (self.b.to_bitmask() << 2)
409      }
410    }
411  }
412
413  /// true if any high bits are set for any value in the vector
414  #[inline]
415  #[must_use]
416  pub fn any(self) -> bool {
417    pick! {
418      if #[cfg(target_feature="avx2")] {
419        move_mask_m256d(cast(self.avx2)) != 0
420      } else {
421        (self.a | self.b).any()
422      }
423    }
424  }
425
426  /// true if all high bits are set for every value in the vector
427  #[inline]
428  #[must_use]
429  pub fn all(self) -> bool {
430    pick! {
431      if #[cfg(target_feature="avx2")] {
432        move_mask_m256d(cast(self.avx2)) == 0b1111
433      } else {
434        (self.a & self.b).all()
435      }
436    }
437  }
438
439  /// true if no high bits are set for any values of the vector
440  #[inline]
441  #[must_use]
442  pub fn none(self) -> bool {
443    !self.any()
444  }
445
446  #[inline]
447  pub fn to_array(self) -> [i64; 4] {
448    cast(self)
449  }
450
451  #[inline]
452  pub fn as_array(&self) -> &[i64; 4] {
453    cast_ref(self)
454  }
455
456  #[inline]
457  pub fn as_mut_array(&mut self) -> &mut [i64; 4] {
458    cast_mut(self)
459  }
460
461  #[inline]
462  #[must_use]
463  pub fn min(self, rhs: Self) -> Self {
464    self.simd_lt(rhs).blend(self, rhs)
465  }
466
467  #[inline]
468  #[must_use]
469  pub fn max(self, rhs: Self) -> Self {
470    self.simd_gt(rhs).blend(self, rhs)
471  }
472}
473
474impl Not for i64x4 {
475  type Output = Self;
476  #[inline]
477  fn not(self) -> Self {
478    pick! {
479      if #[cfg(target_feature="avx2")] {
480        Self { avx2: self.avx2.not()  }
481      } else {
482        Self {
483          a : self.a.not(),
484          b : self.b.not(),
485        }
486      }
487    }
488  }
489}