[go: up one dir, main page]

wide/
i64x8_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx512f")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(64))]
7    pub struct i64x8 { pub(crate) avx512: m512i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(64))]
11    pub struct i64x8 { pub(crate) a : i64x4, pub(crate) b : i64x4 }
12  }
13}
14
15int_uint_consts!(i64, 8, i64x8, 512);
16
17unsafe impl Zeroable for i64x8 {}
18unsafe impl Pod for i64x8 {}
19
20impl AlignTo for i64x8 {
21  type Elem = i64;
22}
23
24impl Add for i64x8 {
25  type Output = Self;
26  #[inline]
27  fn add(self, rhs: Self) -> Self::Output {
28    pick! {
29      if #[cfg(target_feature="avx512f")] {
30        Self { avx512: add_i64_m512i(self.avx512, rhs.avx512) }
31      } else {
32        Self {
33          a : self.a.add(rhs.a),
34          b : self.b.add(rhs.b),
35        }
36      }
37    }
38  }
39}
40
41impl Sub for i64x8 {
42  type Output = Self;
43  #[inline]
44  fn sub(self, rhs: Self) -> Self::Output {
45    pick! {
46      if #[cfg(target_feature="avx512f")] {
47        Self { avx512: sub_i64_m512i(self.avx512, rhs.avx512) }
48      } else {
49        Self {
50          a : self.a.sub(rhs.a),
51          b : self.b.sub(rhs.b),
52        }
53      }
54    }
55  }
56}
57
58impl Mul for i64x8 {
59  type Output = Self;
60  #[inline]
61  fn mul(self, rhs: Self) -> Self::Output {
62    pick! {
63      if #[cfg(target_feature="avx512f")] {
64        let arr1: [i64; 8] = cast(self);
65        let arr2: [i64; 8] = cast(rhs);
66        cast([
67          arr1[0].wrapping_mul(arr2[0]),
68          arr1[1].wrapping_mul(arr2[1]),
69          arr1[2].wrapping_mul(arr2[2]),
70          arr1[3].wrapping_mul(arr2[3]),
71          arr1[4].wrapping_mul(arr2[4]),
72          arr1[5].wrapping_mul(arr2[5]),
73          arr1[6].wrapping_mul(arr2[6]),
74          arr1[7].wrapping_mul(arr2[7]),
75        ])
76      } else {
77        Self { a: self.a.mul(rhs.a), b: self.b.mul(rhs.b) }
78      }
79    }
80  }
81}
82
83impl Shr for i64x8 {
84  type Output = Self;
85
86  #[inline]
87  fn shr(self, rhs: Self) -> Self::Output {
88    pick! {
89      if #[cfg(target_feature="avx512f")] {
90        // TODO(safe_arch): add shr_each_i64_m512i (arithmetic right shift)
91        // Self { avx512: shr_each_i64_m512i(self.avx512, rhs.avx512) }
92        // Fallback for now:
93        let a: [i64; 8] = cast(self);
94        let r: [i64; 8] = cast(rhs);
95        cast([
96          a[0].wrapping_shr(r[0] as u32),
97          a[1].wrapping_shr(r[1] as u32),
98          a[2].wrapping_shr(r[2] as u32),
99          a[3].wrapping_shr(r[3] as u32),
100          a[4].wrapping_shr(r[4] as u32),
101          a[5].wrapping_shr(r[5] as u32),
102          a[6].wrapping_shr(r[6] as u32),
103          a[7].wrapping_shr(r[7] as u32),
104        ])
105      } else {
106        // widen via two halves
107        Self {
108          a: self.a.shr(rhs.a),
109          b: self.b.shr(rhs.b),
110        }
111      }
112    }
113  }
114}
115
116impl Shl for i64x8 {
117  type Output = Self;
118
119  #[inline]
120  fn shl(self, rhs: Self) -> Self::Output {
121    pick! {
122      if #[cfg(target_feature="avx512f")] {
123        // TODO(safe_arch): add shl_each_i64_m512i
124        // Self { avx512: shl_each_i64_m512i(self.avx512, rhs.avx512) }
125        // Fallback for now:
126        let a: [i64; 8] = cast(self);
127        let r: [i64; 8] = cast(rhs);
128        cast([
129          a[0].wrapping_shl(r[0] as u32),
130          a[1].wrapping_shl(r[1] as u32),
131          a[2].wrapping_shl(r[2] as u32),
132          a[3].wrapping_shl(r[3] as u32),
133          a[4].wrapping_shl(r[4] as u32),
134          a[5].wrapping_shl(r[5] as u32),
135          a[6].wrapping_shl(r[6] as u32),
136          a[7].wrapping_shl(r[7] as u32),
137        ])
138      } else {
139        // widen via two halves
140        Self {
141          a: self.a.shl(rhs.a),
142          b: self.b.shl(rhs.b),
143        }
144      }
145    }
146  }
147}
148
149impl Add<i64> for i64x8 {
150  type Output = Self;
151  #[inline]
152  fn add(self, rhs: i64) -> Self::Output {
153    self.add(Self::splat(rhs))
154  }
155}
156
157impl Sub<i64> for i64x8 {
158  type Output = Self;
159  #[inline]
160  fn sub(self, rhs: i64) -> Self::Output {
161    self.sub(Self::splat(rhs))
162  }
163}
164
165impl Mul<i64> for i64x8 {
166  type Output = Self;
167  #[inline]
168  fn mul(self, rhs: i64) -> Self::Output {
169    self.mul(Self::splat(rhs))
170  }
171}
172
173impl Add<i64x8> for i64 {
174  type Output = i64x8;
175  #[inline]
176  fn add(self, rhs: i64x8) -> Self::Output {
177    i64x8::splat(self).add(rhs)
178  }
179}
180
181impl Sub<i64x8> for i64 {
182  type Output = i64x8;
183  #[inline]
184  fn sub(self, rhs: i64x8) -> Self::Output {
185    i64x8::splat(self).sub(rhs)
186  }
187}
188
189impl Mul<i64x8> for i64 {
190  type Output = i64x8;
191  #[inline]
192  fn mul(self, rhs: i64x8) -> Self::Output {
193    i64x8::splat(self).mul(rhs)
194  }
195}
196
197impl BitAnd for i64x8 {
198  type Output = Self;
199  #[inline]
200  fn bitand(self, rhs: Self) -> Self::Output {
201    pick! {
202      if #[cfg(target_feature="avx512f")] {
203        Self { avx512: bitand_m512i(self.avx512, rhs.avx512) }
204      } else {
205        Self {
206          a : self.a.bitand(rhs.a),
207          b : self.b.bitand(rhs.b),
208        }
209      }
210    }
211  }
212}
213
214impl BitOr for i64x8 {
215  type Output = Self;
216  #[inline]
217  fn bitor(self, rhs: Self) -> Self::Output {
218    pick! {
219    if #[cfg(target_feature="avx512f")] {
220        Self { avx512: bitor_m512i(self.avx512, rhs.avx512) }
221      } else {
222        Self {
223          a : self.a.bitor(rhs.a),
224          b : self.b.bitor(rhs.b),
225        }
226      }
227    }
228  }
229}
230
231impl BitXor for i64x8 {
232  type Output = Self;
233  #[inline]
234  fn bitxor(self, rhs: Self) -> Self::Output {
235    pick! {
236      if #[cfg(target_feature="avx512f")] {
237        Self { avx512: bitxor_m512i(self.avx512, rhs.avx512) }
238      } else {
239        Self {
240          a : self.a.bitxor(rhs.a),
241          b : self.b.bitxor(rhs.b),
242        }
243      }
244    }
245  }
246}
247
248macro_rules! impl_shl_t_for_i64x8 {
249  ($($shift_type:ty),+ $(,)?) => {
250    $(impl Shl<$shift_type> for i64x8 {
251      type Output = Self;
252      /// Shifts all lanes by the value given.
253      #[inline]
254      fn shl(self, rhs: $shift_type) -> Self::Output {
255        pick! {
256          if #[cfg(target_feature="avx512f")] {
257            let shift = cast(rhs as u64);
258            Self { avx512: shl_all_u64_m512i(self.avx512, shift) }
259          } else {
260            Self {
261              a : self.a.shl(rhs),
262              b : self.b.shl(rhs),
263            }
264          }
265        }
266      }
267    })+
268  };
269}
270impl_shl_t_for_i64x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
271
272macro_rules! impl_shr_t_for_i64x8 {
273  ($($shift_type:ty),+ $(,)?) => {
274    $(impl Shr<$shift_type> for i64x8 {
275      type Output = Self;
276      /// Shifts all lanes by the value given.
277      #[inline]
278      fn shr(self, rhs: $shift_type) -> Self::Output {
279        pick! {
280          if #[cfg(target_feature="avx512f")] {
281            let shift = cast(rhs as u64);
282            Self { avx512: shr_all_i64_m512i(self.avx512, shift) }
283          } else {
284            Self {
285              a : self.a.shr(rhs),
286              b : self.b.shr(rhs),
287            }
288          }
289        }
290      }
291    })+
292  };
293}
294impl_shr_t_for_i64x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
295
296impl CmpEq for i64x8 {
297  type Output = Self;
298  #[inline]
299  fn simd_eq(self, rhs: Self) -> Self::Output {
300    Self::simd_eq(self, rhs)
301  }
302}
303
304impl CmpGt for i64x8 {
305  type Output = Self;
306  #[inline]
307  fn simd_gt(self, rhs: Self) -> Self::Output {
308    Self::simd_gt(self, rhs)
309  }
310}
311
312impl CmpLt for i64x8 {
313  type Output = Self;
314  #[inline]
315  fn simd_lt(self, rhs: Self) -> Self::Output {
316    Self::simd_lt(self, rhs)
317  }
318}
319
320impl i64x8 {
321  #[inline]
322  #[must_use]
323  pub const fn new(array: [i64; 8]) -> Self {
324    unsafe { core::mem::transmute(array) }
325  }
326  #[inline]
327  #[must_use]
328  pub fn simd_eq(self, rhs: Self) -> Self {
329    pick! {
330      if #[cfg(target_feature="avx512f")] {
331        Self { avx512: cmp_op_mask_i64_m512i::<{cmp_int_op!(Eq)}>(self.avx512, rhs.avx512) }
332      } else {
333        Self {
334          a : self.a.simd_eq(rhs.a),
335          b : self.b.simd_eq(rhs.b),
336        }
337      }
338    }
339  }
340  #[inline]
341  #[must_use]
342  pub fn simd_gt(self, rhs: Self) -> Self {
343    pick! {
344      if #[cfg(target_feature="avx512f")] {
345        Self { avx512: cmp_op_mask_i64_m512i::<{cmp_int_op!(Nle)}>(self.avx512, rhs.avx512) }
346      } else {
347        Self {
348          a : self.a.simd_gt(rhs.a),
349          b : self.b.simd_gt(rhs.b),
350        }
351      }
352    }
353  }
354
355  #[inline]
356  #[must_use]
357  pub fn simd_lt(self, rhs: Self) -> Self {
358    pick! {
359      if #[cfg(target_feature="avx512f")] {
360        Self { avx512: cmp_op_mask_i64_m512i::<{cmp_int_op!(Lt)}>(self.avx512, rhs.avx512) }
361      } else {
362        Self {
363          a : rhs.a.simd_gt(self.a),
364          b : rhs.b.simd_gt(self.b),
365        }
366      }
367    }
368  }
369
370  #[inline]
371  #[must_use]
372  pub fn blend(self, t: Self, f: Self) -> Self {
373    pick! {
374      if #[cfg(target_feature="avx512f")] {
375        Self { avx512: blend_varying_i8_m512i(f.avx512,t.avx512,movepi8_mask_m512i(self.avx512)) }
376      } else {
377        Self {
378          a : self.a.blend(t.a, f.a),
379          b : self.b.blend(t.b, f.b),
380        }
381      }
382    }
383  }
384
385  #[inline]
386  #[must_use]
387  pub fn abs(self) -> Self {
388    pick! {
389      if #[cfg(target_feature="avx512f")] {
390        // AVX512 might have this, unsure for now
391        let arr: [i64; 8] = cast(self);
392        cast(
393          [
394            arr[0].wrapping_abs(),
395            arr[1].wrapping_abs(),
396            arr[2].wrapping_abs(),
397            arr[3].wrapping_abs(),
398            arr[4].wrapping_abs(),
399            arr[5].wrapping_abs(),
400            arr[6].wrapping_abs(),
401            arr[7].wrapping_abs(),
402          ])
403      } else {
404        Self {
405          a : self.a.abs(),
406          b : self.b.abs(),
407        }
408      }
409    }
410  }
411
412  #[inline]
413  #[must_use]
414  pub fn unsigned_abs(self) -> u64x8 {
415    pick! {
416      if #[cfg(target_feature="avx512f")] {
417        // AVX512 might have this, unsure for now
418        let arr: [i64; 8] = cast(self);
419        cast(
420          [
421            arr[0].unsigned_abs(),
422            arr[1].unsigned_abs(),
423            arr[2].unsigned_abs(),
424            arr[3].unsigned_abs(),
425            arr[4].unsigned_abs(),
426            arr[5].unsigned_abs(),
427            arr[6].unsigned_abs(),
428            arr[7].unsigned_abs(),
429          ])
430      } else {
431        u64x8 {
432          a : self.a.unsigned_abs(),
433          b : self.b.unsigned_abs(),
434        }
435      }
436    }
437  }
438
439  #[inline]
440  #[must_use]
441  pub fn round_float(self) -> f64x8 {
442    let arr: [i64; 8] = cast(self);
443    cast([
444      arr[0] as f64,
445      arr[1] as f64,
446      arr[2] as f64,
447      arr[3] as f64,
448      arr[4] as f64,
449      arr[5] as f64,
450      arr[6] as f64,
451      arr[7] as f64,
452    ])
453  }
454
455  /// returns the bit mask for each high bit set in the vector with the lowest
456  /// lane being the lowest bit
457  #[inline]
458  #[must_use]
459  pub fn to_bitmask(self) -> u32 {
460    pick! {
461      if #[cfg(target_feature="avx512dq")] {
462        // use f64 move_mask since it is the same size as i64
463        movepi64_mask_m512d(cast(self.avx512)) as u32
464      } else {
465        self.a.to_bitmask() | (self.b.to_bitmask() << 2)
466      }
467    }
468  }
469
470  /// true if any high bits are set for any value in the vector
471  #[inline]
472  #[must_use]
473  pub fn any(self) -> bool {
474    pick! {
475      if #[cfg(target_feature="avx512f")] {
476        movepi64_mask_m512d(cast(self.avx512)) != 0
477      } else {
478        (self.a | self.b).any()
479      }
480    }
481  }
482
483  /// true if all high bits are set for every value in the vector
484  #[inline]
485  #[must_use]
486  pub fn all(self) -> bool {
487    pick! {
488      if #[cfg(target_feature="avx512bw")] {
489        movepi64_mask_m512d(cast(self.avx512)) == 0b11111111
490      } else {
491        (self.a & self.b).all()
492      }
493    }
494  }
495
496  /// true if no high bits are set for any values of the vector
497  #[inline]
498  #[must_use]
499  pub fn none(self) -> bool {
500    !self.any()
501  }
502
503  #[inline]
504  pub fn to_array(self) -> [i64; 8] {
505    cast(self)
506  }
507
508  #[inline]
509  pub fn as_array(&self) -> &[i64; 8] {
510    cast_ref(self)
511  }
512
513  #[inline]
514  pub fn as_mut_array(&mut self) -> &mut [i64; 8] {
515    cast_mut(self)
516  }
517
518  #[inline]
519  #[must_use]
520  pub fn min(self, rhs: Self) -> Self {
521    pick! {
522      if #[cfg(target_feature="avx512f")] {
523        Self { avx512: min_i64_m512i(self.avx512, rhs.avx512) }
524      } else {
525        Self {
526          a: self.a.min(rhs.a),
527          b: self.b.min(rhs.b),
528        }
529      }
530    }
531  }
532
533  #[inline]
534  #[must_use]
535  pub fn max(self, rhs: Self) -> Self {
536    pick! {
537      if #[cfg(target_feature="avx512f")] {
538        Self { avx512: max_i64_m512i(self.avx512, rhs.avx512) }
539      } else {
540        Self {
541          a: self.a.max(rhs.a),
542          b: self.b.max(rhs.b),
543        }
544      }
545    }
546  }
547}
548
549impl Not for i64x8 {
550  type Output = Self;
551  #[inline]
552  fn not(self) -> Self::Output {
553    pick! {
554      if #[cfg(target_feature="avx512f")] {
555        Self { avx512: bitxor_m512i(self.avx512, set_splat_i64_m512i(-1)) }
556      } else {
557        Self {
558          a : self.a.not(),
559          b : self.b.not(),
560        }
561      }
562    }
563  }
564}