1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct u64x2 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct u64x2 { pub(crate) simd: v128 }
14
15 impl Default for u64x2 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for u64x2 {
22 fn eq(&self, other: &Self) -> bool {
23 u64x2_all_true(u64x2_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for u64x2 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct u64x2 { pub(crate) neon : uint64x2_t }
33
34 impl Default for u64x2 {
35 #[inline]
36 fn default() -> Self {
37 unsafe { Self { neon: vdupq_n_u64(0)} }
38 }
39 }
40
41 impl PartialEq for u64x2 {
42 #[inline]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe {
45 vgetq_lane_u64(self.neon,0) == vgetq_lane_u64(other.neon,0) &&
46 vgetq_lane_u64(self.neon,1) == vgetq_lane_u64(other.neon,1)
47 }
48 }
49 }
50
51 impl Eq for u64x2 { }
52 } else {
53 #[derive(Default, Clone, Copy, PartialEq, Eq)]
54 #[repr(C, align(16))]
55 pub struct u64x2 { arr: [u64;2] }
56 }
57}
58
59int_uint_consts!(u64, 2, u64x2, 128);
60
61unsafe impl Zeroable for u64x2 {}
62unsafe impl Pod for u64x2 {}
63
64impl AlignTo for u64x2 {
65 type Elem = u64;
66}
67
68impl Add for u64x2 {
69 type Output = Self;
70 #[inline]
71 fn add(self, rhs: Self) -> Self::Output {
72 pick! {
73 if #[cfg(target_feature="sse2")] {
74 Self { sse: add_i64_m128i(self.sse, rhs.sse) }
75 } else if #[cfg(target_feature="simd128")] {
76 Self { simd: u64x2_add(self.simd, rhs.simd) }
77 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
78 unsafe { Self { neon: vaddq_u64(self.neon, rhs.neon) } }
79 } else {
80 Self { arr: [
81 self.arr[0].wrapping_add(rhs.arr[0]),
82 self.arr[1].wrapping_add(rhs.arr[1]),
83 ]}
84 }
85 }
86 }
87}
88
89impl Sub for u64x2 {
90 type Output = Self;
91 #[inline]
92 fn sub(self, rhs: Self) -> Self::Output {
93 pick! {
94 if #[cfg(target_feature="sse2")] {
95 Self { sse: sub_i64_m128i(self.sse, rhs.sse) }
96 } else if #[cfg(target_feature="simd128")] {
97 Self { simd: u64x2_sub(self.simd, rhs.simd) }
98 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
99 unsafe { Self { neon: vsubq_u64(self.neon, rhs.neon) } }
100 } else {
101 Self { arr: [
102 self.arr[0].wrapping_sub(rhs.arr[0]),
103 self.arr[1].wrapping_sub(rhs.arr[1]),
104 ]}
105 }
106 }
107 }
108}
109
110impl Mul for u64x2 {
112 type Output = Self;
113 #[inline]
114 fn mul(self, rhs: Self) -> Self::Output {
115 pick! {
116 if #[cfg(target_feature="simd128")] {
117 Self { simd: u64x2_mul(self.simd, rhs.simd) }
118 } else {
119 let arr1: [u64; 2] = cast(self);
120 let arr2: [u64; 2] = cast(rhs);
121 cast([
122 arr1[0].wrapping_mul(arr2[0]),
123 arr1[1].wrapping_mul(arr2[1]),
124 ])
125 }
126 }
127 }
128}
129
130impl Add<u64> for u64x2 {
131 type Output = Self;
132 #[inline]
133 fn add(self, rhs: u64) -> Self::Output {
134 self.add(Self::splat(rhs))
135 }
136}
137
138impl Sub<u64> for u64x2 {
139 type Output = Self;
140 #[inline]
141 fn sub(self, rhs: u64) -> Self::Output {
142 self.sub(Self::splat(rhs))
143 }
144}
145
146impl Mul<u64> for u64x2 {
147 type Output = Self;
148 #[inline]
149 fn mul(self, rhs: u64) -> Self::Output {
150 self.mul(Self::splat(rhs))
151 }
152}
153
154impl Add<u64x2> for u64 {
155 type Output = u64x2;
156 #[inline]
157 fn add(self, rhs: u64x2) -> Self::Output {
158 u64x2::splat(self).add(rhs)
159 }
160}
161
162impl Sub<u64x2> for u64 {
163 type Output = u64x2;
164 #[inline]
165 fn sub(self, rhs: u64x2) -> Self::Output {
166 u64x2::splat(self).sub(rhs)
167 }
168}
169
170impl Mul<u64x2> for u64 {
171 type Output = u64x2;
172 #[inline]
173 fn mul(self, rhs: u64x2) -> Self::Output {
174 u64x2::splat(self).mul(rhs)
175 }
176}
177
178impl BitAnd for u64x2 {
179 type Output = Self;
180 #[inline]
181 fn bitand(self, rhs: Self) -> Self::Output {
182 pick! {
183 if #[cfg(target_feature="sse2")] {
184 Self { sse: bitand_m128i(self.sse, rhs.sse) }
185 } else if #[cfg(target_feature="simd128")] {
186 Self { simd: v128_and(self.simd, rhs.simd) }
187 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
188 unsafe {Self { neon: vandq_u64(self.neon, rhs.neon) }}
189 } else {
190 Self { arr: [
191 self.arr[0].bitand(rhs.arr[0]),
192 self.arr[1].bitand(rhs.arr[1]),
193 ]}
194 }
195 }
196 }
197}
198
199impl BitOr for u64x2 {
200 type Output = Self;
201 #[inline]
202 fn bitor(self, rhs: Self) -> Self::Output {
203 pick! {
204 if #[cfg(target_feature="sse2")] {
205 Self { sse: bitor_m128i(self.sse, rhs.sse) }
206 } else if #[cfg(target_feature="simd128")] {
207 Self { simd: v128_or(self.simd, rhs.simd) }
208 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
209 unsafe {Self { neon: vorrq_u64(self.neon, rhs.neon) }}
210 } else {
211 Self { arr: [
212 self.arr[0].bitor(rhs.arr[0]),
213 self.arr[1].bitor(rhs.arr[1]),
214 ]}
215 }
216 }
217 }
218}
219
220impl BitXor for u64x2 {
221 type Output = Self;
222 #[inline]
223 fn bitxor(self, rhs: Self) -> Self::Output {
224 pick! {
225 if #[cfg(target_feature="sse2")] {
226 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
227 } else if #[cfg(target_feature="simd128")] {
228 Self { simd: v128_xor(self.simd, rhs.simd) }
229 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
230 unsafe {Self { neon: veorq_u64(self.neon, rhs.neon) }}
231 } else {
232 Self { arr: [
233 self.arr[0].bitxor(rhs.arr[0]),
234 self.arr[1].bitxor(rhs.arr[1]),
235 ]}
236 }
237 }
238 }
239}
240
241impl Shl for u64x2 {
247 type Output = Self;
248
249 #[inline]
250 fn shl(self, rhs: Self) -> Self::Output {
251 pick! {
252 if #[cfg(target_feature="avx2")] {
253 let shift_by = rhs & Self::splat(63);
255 Self { sse: shl_each_u64_m128i(self.sse, shift_by.sse) }
256 } else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
257 unsafe {
258 let shift_by = vreinterpretq_s64_u64(vandq_u64(rhs.neon, vmovq_n_u64(63)));
260 Self { neon: vshlq_u64(self.neon, shift_by) }
261 }
262 } else {
263 let arr: [u64; 2] = cast(self);
264 let rhs: [u64; 2] = cast(rhs);
265 cast([
266 arr[0].wrapping_shl(rhs[0] as u32),
267 arr[1].wrapping_shl(rhs[1] as u32),
268 ])
269 }
270 }
271 }
272}
273
274macro_rules! impl_shl_t_for_u64x2 {
275 ($($shift_type:ty),+ $(,)?) => {
276 $(impl Shl<$shift_type> for u64x2 {
277 type Output = Self;
278 #[inline]
280 fn shl(self, rhs: $shift_type) -> Self::Output {
281 pick! {
282 if #[cfg(target_feature="sse2")] {
283 let shift = cast([rhs as u64, 0]);
284 Self { sse: shl_all_u64_m128i(self.sse, shift) }
285 } else if #[cfg(target_feature="simd128")] {
286 Self { simd: u64x2_shl(self.simd, rhs as u32) }
287 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
288 unsafe {Self { neon: vshlq_u64(self.neon, vmovq_n_s64(rhs as i64)) }}
289 } else {
290 let u = rhs as u32;
291 Self { arr: [
292 self.arr[0].wrapping_shl(u),
293 self.arr[1].wrapping_shl(u),
294 ]}
295 }
296 }
297 }
298 })+
299 };
300}
301impl_shl_t_for_u64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
302
303impl Shr for u64x2 {
309 type Output = Self;
310
311 #[inline]
312 fn shr(self, rhs: Self) -> Self::Output {
313 pick! {
314 if #[cfg(target_feature="avx2")] {
315 let shift_by = rhs & Self::splat(63);
317 Self { sse: shr_each_u64_m128i(self.sse, shift_by.sse) }
318 } else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
319 unsafe {
320 let shift_by = vnegq_s64(vreinterpretq_s64_u64(vandq_u64(rhs.neon, vmovq_n_u64(63))));
323 Self { neon: vshlq_u64(self.neon, shift_by) }
324 }
325 } else {
326 let arr: [u64; 2] = cast(self);
327 let rhs: [u64; 2] = cast(rhs);
328 cast([
329 arr[0].wrapping_shr(rhs[0] as u32),
330 arr[1].wrapping_shr(rhs[1] as u32),
331 ])
332 }
333 }
334 }
335}
336
337macro_rules! impl_shr_t_for_u64x2 {
338 ($($shift_type:ty),+ $(,)?) => {
339 $(impl Shr<$shift_type> for u64x2 {
340 type Output = Self;
341 #[inline]
343 fn shr(self, rhs: $shift_type) -> Self::Output {
344 pick! {
345 if #[cfg(target_feature="sse2")] {
346 let shift = cast([rhs as u64, 0]);
347 Self { sse: shr_all_u64_m128i(self.sse, shift) }
348 } else if #[cfg(target_feature="simd128")] {
349 Self { simd: u64x2_shr(self.simd, rhs as u32) }
350 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
351 unsafe {Self { neon: vshlq_u64(self.neon, vmovq_n_s64(-(rhs as i64))) }}
352 } else {
353 let u = rhs as u32;
354 Self { arr: [
355 self.arr[0].wrapping_shr(u),
356 self.arr[1].wrapping_shr(u),
357 ]}
358 }
359 }
360 }
361 })+
362 };
363}
364impl_shr_t_for_u64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
365
366impl CmpEq for u64x2 {
367 type Output = Self;
368 #[inline]
369 fn simd_eq(self, rhs: Self) -> Self::Output {
370 Self::simd_eq(self, rhs)
371 }
372}
373
374impl CmpGt for u64x2 {
375 type Output = Self;
376 #[inline]
377 fn simd_gt(self, rhs: Self) -> Self::Output {
378 Self::simd_gt(self, rhs)
379 }
380}
381
382impl CmpLt for u64x2 {
383 type Output = Self;
384 #[inline]
385 fn simd_lt(self, rhs: Self) -> Self::Output {
386 Self::simd_gt(rhs, self)
388 }
389}
390
391impl u64x2 {
392 #[inline]
393 #[must_use]
394 pub const fn new(array: [u64; 2]) -> Self {
395 unsafe { core::mem::transmute(array) }
396 }
397 #[inline]
398 #[must_use]
399 pub fn simd_eq(self, rhs: Self) -> Self {
400 pick! {
401 if #[cfg(target_feature="sse4.1")] {
402 Self { sse: cmp_eq_mask_i64_m128i(self.sse, rhs.sse) }
403 } else if #[cfg(target_feature="simd128")] {
404 Self { simd: u64x2_eq(self.simd, rhs.simd) }
405 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
406 unsafe {Self { neon: vceqq_u64(self.neon, rhs.neon) } }
407 } else {
408 let s: [u64;2] = cast(self);
409 let r: [u64;2] = cast(rhs);
410 cast([
411 if s[0] == r[0] { -1_i64 } else { 0 },
412 if s[1] == r[1] { -1_i64 } else { 0 },
413 ])
414 }
415 }
416 }
417 #[inline]
418 #[must_use]
419 pub fn simd_gt(self, rhs: Self) -> Self {
420 pick! {
421 if #[cfg(target_feature="sse4.2")] {
422 let highbit = u64x2::splat(1 << 63);
424 Self { sse: cmp_gt_mask_i64_m128i((self ^ highbit).sse, (rhs ^ highbit).sse) }
425 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
426 unsafe {Self { neon: vcgtq_u64(self.neon, rhs.neon) }}
427 } else {
428 let s: [u64;2] = cast(self);
430 let r: [u64;2] = cast(rhs);
431 cast([
432 if s[0] > r[0] { u64::MAX } else { 0 },
433 if s[1] > r[1] { u64::MAX } else { 0 },
434 ])
435 }
436 }
437 }
438
439 #[inline]
440 #[must_use]
441 pub fn simd_lt(self, rhs: Self) -> Self {
442 rhs.simd_gt(self)
444 }
445
446 #[inline]
447 #[must_use]
448 pub fn blend(self, t: Self, f: Self) -> Self {
449 pick! {
450 if #[cfg(target_feature="sse4.1")] {
451 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
452 } else if #[cfg(target_feature="simd128")] {
453 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
454 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
455 unsafe {Self { neon: vbslq_u64(self.neon, t.neon, f.neon) }}
456 } else {
457 generic_bit_blend(self, t, f)
458 }
459 }
460 }
461
462 #[inline]
463 #[must_use]
464 pub fn to_bitmask(self) -> u32 {
465 i64x2::to_bitmask(cast(self))
466 }
467
468 #[inline]
469 pub fn to_array(self) -> [u64; 2] {
470 cast(self)
471 }
472
473 #[inline]
474 pub fn as_array(&self) -> &[u64; 2] {
475 cast_ref(self)
476 }
477
478 #[inline]
479 pub fn as_mut_array(&mut self) -> &mut [u64; 2] {
480 cast_mut(self)
481 }
482
483 #[inline]
484 #[must_use]
485 pub fn min(self, rhs: Self) -> Self {
486 self.simd_lt(rhs).blend(self, rhs)
487 }
488
489 #[inline]
490 #[must_use]
491 pub fn max(self, rhs: Self) -> Self {
492 self.simd_gt(rhs).blend(self, rhs)
493 }
494
495 #[inline]
496 #[must_use]
497 pub fn mul_keep_high(self, rhs: Self) -> Self {
498 let arr1: [u64; 2] = cast(self);
499 let arr2: [u64; 2] = cast(rhs);
500 cast([
501 ((arr1[0] as u128 * arr2[0] as u128) >> 64) as u64,
502 ((arr1[1] as u128 * arr2[1] as u128) >> 64) as u64,
503 ])
504 }
505}