1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct i32x4 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct i32x4 { pub(crate) simd: v128 }
14
15 impl Default for i32x4 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for i32x4 {
22 fn eq(&self, other: &Self) -> bool {
23 u32x4_all_true(i32x4_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for i32x4 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct i32x4 { pub(crate) neon : int32x4_t }
33
34 impl Default for i32x4 {
35 #[inline]
36 fn default() -> Self {
37 Self::splat(0)
38 }
39 }
40
41 impl PartialEq for i32x4 {
42 #[inline]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe { vminvq_u32(vceqq_s32(self.neon, other.neon))==u32::MAX }
45 }
46 }
47
48 impl Eq for i32x4 { }
49 } else {
50 #[derive(Default, Clone, Copy, PartialEq, Eq)]
51 #[repr(C, align(16))]
52 pub struct i32x4 { pub(crate) arr: [i32;4] }
53 }
54}
55
56int_uint_consts!(i32, 4, i32x4, 128);
57
58unsafe impl Zeroable for i32x4 {}
59unsafe impl Pod for i32x4 {}
60
61impl AlignTo for i32x4 {
62 type Elem = i32;
63}
64
65impl Add for i32x4 {
66 type Output = Self;
67 #[inline]
68 fn add(self, rhs: Self) -> Self::Output {
69 pick! {
70 if #[cfg(target_feature="sse2")] {
71 Self { sse: add_i32_m128i(self.sse, rhs.sse) }
72 } else if #[cfg(target_feature="simd128")] {
73 Self { simd: i32x4_add(self.simd, rhs.simd) }
74 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75 unsafe { Self { neon: vaddq_s32(self.neon, rhs.neon) } }
76 } else {
77 Self { arr: [
78 self.arr[0].wrapping_add(rhs.arr[0]),
79 self.arr[1].wrapping_add(rhs.arr[1]),
80 self.arr[2].wrapping_add(rhs.arr[2]),
81 self.arr[3].wrapping_add(rhs.arr[3]),
82 ]}
83 }
84 }
85 }
86}
87
88impl Sub for i32x4 {
89 type Output = Self;
90 #[inline]
91 fn sub(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="sse2")] {
94 Self { sse: sub_i32_m128i(self.sse, rhs.sse) }
95 } else if #[cfg(target_feature="simd128")] {
96 Self { simd: i32x4_sub(self.simd, rhs.simd) }
97 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98 unsafe {Self { neon: vsubq_s32(self.neon, rhs.neon) }}
99 } else {
100 Self { arr: [
101 self.arr[0].wrapping_sub(rhs.arr[0]),
102 self.arr[1].wrapping_sub(rhs.arr[1]),
103 self.arr[2].wrapping_sub(rhs.arr[2]),
104 self.arr[3].wrapping_sub(rhs.arr[3]),
105 ]}
106 }
107 }
108 }
109}
110
111impl Mul for i32x4 {
112 type Output = Self;
113 #[inline]
114 fn mul(self, rhs: Self) -> Self::Output {
115 pick! {
116 if #[cfg(target_feature="sse4.1")] {
117 Self { sse: mul_32_m128i(self.sse, rhs.sse) }
118 } else if #[cfg(target_feature="simd128")] {
119 Self { simd: i32x4_mul(self.simd, rhs.simd) }
120 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
121 unsafe {Self { neon: vmulq_s32(self.neon, rhs.neon) }}
122 } else {
123 let arr1: [i32; 4] = cast(self);
124 let arr2: [i32; 4] = cast(rhs);
125 cast([
126 arr1[0].wrapping_mul(arr2[0]),
127 arr1[1].wrapping_mul(arr2[1]),
128 arr1[2].wrapping_mul(arr2[2]),
129 arr1[3].wrapping_mul(arr2[3]),
130 ])
131 }
132 }
133 }
134}
135
136impl Add<i32> for i32x4 {
137 type Output = Self;
138 #[inline]
139 fn add(self, rhs: i32) -> Self::Output {
140 self.add(Self::splat(rhs))
141 }
142}
143
144impl Sub<i32> for i32x4 {
145 type Output = Self;
146 #[inline]
147 fn sub(self, rhs: i32) -> Self::Output {
148 self.sub(Self::splat(rhs))
149 }
150}
151
152impl Mul<i32> for i32x4 {
153 type Output = Self;
154 #[inline]
155 fn mul(self, rhs: i32) -> Self::Output {
156 self.mul(Self::splat(rhs))
157 }
158}
159
160impl Add<i32x4> for i32 {
161 type Output = i32x4;
162 #[inline]
163 fn add(self, rhs: i32x4) -> Self::Output {
164 i32x4::splat(self).add(rhs)
165 }
166}
167
168impl Sub<i32x4> for i32 {
169 type Output = i32x4;
170 #[inline]
171 fn sub(self, rhs: i32x4) -> Self::Output {
172 i32x4::splat(self).sub(rhs)
173 }
174}
175
176impl Mul<i32x4> for i32 {
177 type Output = i32x4;
178 #[inline]
179 fn mul(self, rhs: i32x4) -> Self::Output {
180 i32x4::splat(self).mul(rhs)
181 }
182}
183
184impl BitAnd for i32x4 {
185 type Output = Self;
186 #[inline]
187 fn bitand(self, rhs: Self) -> Self::Output {
188 pick! {
189 if #[cfg(target_feature="sse2")] {
190 Self { sse: bitand_m128i(self.sse, rhs.sse) }
191 } else if #[cfg(target_feature="simd128")] {
192 Self { simd: v128_and(self.simd, rhs.simd) }
193 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
194 unsafe {Self { neon: vandq_s32(self.neon, rhs.neon) }}
195 } else {
196 Self { arr: [
197 self.arr[0].bitand(rhs.arr[0]),
198 self.arr[1].bitand(rhs.arr[1]),
199 self.arr[2].bitand(rhs.arr[2]),
200 self.arr[3].bitand(rhs.arr[3]),
201 ]}
202 }
203 }
204 }
205}
206
207impl BitOr for i32x4 {
208 type Output = Self;
209 #[inline]
210 fn bitor(self, rhs: Self) -> Self::Output {
211 pick! {
212 if #[cfg(target_feature="sse2")] {
213 Self { sse: bitor_m128i(self.sse, rhs.sse) }
214 } else if #[cfg(target_feature="simd128")] {
215 Self { simd: v128_or(self.simd, rhs.simd) }
216 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
217 unsafe {Self { neon: vorrq_s32(self.neon, rhs.neon) }}
218 } else {
219 Self { arr: [
220 self.arr[0].bitor(rhs.arr[0]),
221 self.arr[1].bitor(rhs.arr[1]),
222 self.arr[2].bitor(rhs.arr[2]),
223 self.arr[3].bitor(rhs.arr[3]),
224 ]}
225 }
226 }
227 }
228}
229
230impl BitXor for i32x4 {
231 type Output = Self;
232 #[inline]
233 fn bitxor(self, rhs: Self) -> Self::Output {
234 pick! {
235 if #[cfg(target_feature="sse2")] {
236 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
237 } else if #[cfg(target_feature="simd128")] {
238 Self { simd: v128_xor(self.simd, rhs.simd) }
239 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
240 unsafe {Self { neon: veorq_s32(self.neon, rhs.neon) }}
241 } else {
242 Self { arr: [
243 self.arr[0].bitxor(rhs.arr[0]),
244 self.arr[1].bitxor(rhs.arr[1]),
245 self.arr[2].bitxor(rhs.arr[2]),
246 self.arr[3].bitxor(rhs.arr[3]),
247 ]}
248 }
249 }
250 }
251}
252
253macro_rules! impl_shl_t_for_i32x4 {
254 ($($shift_type:ty),+ $(,)?) => {
255 $(impl Shl<$shift_type> for i32x4 {
256 type Output = Self;
257 #[inline]
259 fn shl(self, rhs: $shift_type) -> Self::Output {
260 pick! {
261 if #[cfg(target_feature="sse2")] {
262 let shift = cast([rhs as u64, 0]);
263 Self { sse: shl_all_u32_m128i(self.sse, shift) }
264 } else if #[cfg(target_feature="simd128")] {
265 Self { simd: i32x4_shl(self.simd, rhs as u32) }
266 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
267 unsafe {Self { neon: vshlq_s32(self.neon, vmovq_n_s32(rhs as i32)) }}
268 } else {
269 let u = rhs as u32;
270 Self { arr: [
271 self.arr[0].wrapping_shl(u),
272 self.arr[1].wrapping_shl(u),
273 self.arr[2].wrapping_shl(u),
274 self.arr[3].wrapping_shl(u),
275 ]}
276 }
277 }
278 }
279 })+
280 };
281}
282impl_shl_t_for_i32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
283
284macro_rules! impl_shr_t_for_i32x4 {
285 ($($shift_type:ty),+ $(,)?) => {
286 $(impl Shr<$shift_type> for i32x4 {
287 type Output = Self;
288 #[inline]
290 fn shr(self, rhs: $shift_type) -> Self::Output {
291 pick! {
292 if #[cfg(target_feature="sse2")] {
293 let shift = cast([rhs as u64, 0]);
294 Self { sse: shr_all_i32_m128i(self.sse, shift) }
295 } else if #[cfg(target_feature="simd128")] {
296 Self { simd: i32x4_shr(self.simd, rhs as u32) }
297 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
298 unsafe {Self { neon: vshlq_s32(self.neon, vmovq_n_s32( -(rhs as i32))) }}
299 } else {
300 let u = rhs as u32;
301 Self { arr: [
302 self.arr[0].wrapping_shr(u),
303 self.arr[1].wrapping_shr(u),
304 self.arr[2].wrapping_shr(u),
305 self.arr[3].wrapping_shr(u),
306 ]}
307 }
308 }
309 }
310 })+
311 };
312}
313impl_shr_t_for_i32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
314
315impl Shr<i32x4> for i32x4 {
321 type Output = Self;
322
323 #[inline]
324 fn shr(self, rhs: i32x4) -> Self::Output {
325 pick! {
326 if #[cfg(target_feature="avx2")] {
327 let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
329 Self { sse: shr_each_i32_m128i(self.sse, shift_by) }
330 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
331 unsafe {
332 let shift_by = vnegq_s32(vandq_s32(rhs.neon, vmovq_n_s32(31)));
335 Self { neon: vshlq_s32(self.neon, shift_by) }
336 }
337 } else {
338 let arr: [i32; 4] = cast(self);
339 let rhs: [i32; 4] = cast(rhs);
340 cast([
341 arr[0].wrapping_shr(rhs[0] as u32),
342 arr[1].wrapping_shr(rhs[1] as u32),
343 arr[2].wrapping_shr(rhs[2] as u32),
344 arr[3].wrapping_shr(rhs[3] as u32),
345 ])
346 }
347 }
348 }
349}
350
351impl Shl<i32x4> for i32x4 {
357 type Output = Self;
358
359 #[inline]
360 fn shl(self, rhs: i32x4) -> Self::Output {
361 pick! {
362 if #[cfg(target_feature="avx2")] {
363 let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
365 Self { sse: shl_each_u32_m128i(self.sse, shift_by) }
366 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
367 unsafe {
368 let shift_by = vandq_s32(rhs.neon, vmovq_n_s32(31));
370 Self { neon: vshlq_s32(self.neon, shift_by) }
371 }
372 } else {
373 let arr: [i32; 4] = cast(self);
374 let rhs: [i32; 4] = cast(rhs);
375 cast([
376 arr[0].wrapping_shl(rhs[0] as u32),
377 arr[1].wrapping_shl(rhs[1] as u32),
378 arr[2].wrapping_shl(rhs[2] as u32),
379 arr[3].wrapping_shl(rhs[3] as u32),
380 ])
381 }
382 }
383 }
384}
385
386impl CmpEq for i32x4 {
387 type Output = Self;
388 #[inline]
389 fn simd_eq(self, rhs: Self) -> Self::Output {
390 pick! {
391 if #[cfg(target_feature="sse2")] {
392 Self { sse: cmp_eq_mask_i32_m128i(self.sse, rhs.sse) }
393 } else if #[cfg(target_feature="simd128")] {
394 Self { simd: i32x4_eq(self.simd, rhs.simd) }
395 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
396 unsafe {Self { neon: vreinterpretq_s32_u32(vceqq_s32(self.neon, rhs.neon)) }}
397 } else {
398 Self { arr: [
399 if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
400 if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
401 if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
402 if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
403 ]}
404 }
405 }
406 }
407}
408
409impl CmpGt for i32x4 {
410 type Output = Self;
411 #[inline]
412 fn simd_gt(self, rhs: Self) -> Self::Output {
413 pick! {
414 if #[cfg(target_feature="sse2")] {
415 Self { sse: cmp_gt_mask_i32_m128i(self.sse, rhs.sse) }
416 } else if #[cfg(target_feature="simd128")] {
417 Self { simd: i32x4_gt(self.simd, rhs.simd) }
418 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
419 unsafe {Self { neon: vreinterpretq_s32_u32(vcgtq_s32(self.neon, rhs.neon)) }}
420 } else {
421 Self { arr: [
422 if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
423 if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
424 if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
425 if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
426 ]}
427 }
428 }
429 }
430}
431
432impl CmpLt for i32x4 {
433 type Output = Self;
434 #[inline]
435 fn simd_lt(self, rhs: Self) -> Self::Output {
436 pick! {
437 if #[cfg(target_feature="sse2")] {
438 Self { sse: cmp_lt_mask_i32_m128i(self.sse, rhs.sse) }
439 } else if #[cfg(target_feature="simd128")] {
440 Self { simd: i32x4_lt(self.simd, rhs.simd) }
441 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
442 unsafe {Self { neon: vreinterpretq_s32_u32(vcltq_s32(self.neon, rhs.neon)) }}
443 } else {
444 Self { arr: [
445 if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
446 if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
447 if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
448 if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
449 ]}
450 }
451 }
452 }
453}
454
455impl i32x4 {
456 #[inline]
457 #[must_use]
458 pub const fn new(array: [i32; 4]) -> Self {
459 unsafe { core::mem::transmute(array) }
460 }
461 #[inline]
462 #[must_use]
463 pub fn blend(self, t: Self, f: Self) -> Self {
464 pick! {
465 if #[cfg(target_feature="sse4.1")] {
466 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
467 } else if #[cfg(target_feature="simd128")] {
468 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
469 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
470 unsafe {Self { neon: vbslq_s32(vreinterpretq_u32_s32(self.neon), t.neon, f.neon) }}
471 } else {
472 generic_bit_blend(self, t, f)
473 }
474 }
475 }
476
477 #[inline]
483 #[must_use]
484 pub fn mul_widen(self, rhs: Self) -> i64x4 {
485 pick! {
486 if #[cfg(target_feature="avx2")] {
487 let a = convert_to_i64_m256i_from_i32_m128i(self.sse);
488 let b = convert_to_i64_m256i_from_i32_m128i(rhs.sse);
489 cast(mul_i64_low_bits_m256i(a, b))
490 } else if #[cfg(target_feature="sse4.1")] {
491 let evenp = mul_widen_i32_odd_m128i(self.sse, rhs.sse);
492
493 let oddp = mul_widen_i32_odd_m128i(
494 shr_imm_u64_m128i::<32>(self.sse),
495 shr_imm_u64_m128i::<32>(rhs.sse));
496
497 i64x4 {
498 a: i64x2 { sse: unpack_low_i64_m128i(evenp, oddp)},
499 b: i64x2 { sse: unpack_high_i64_m128i(evenp, oddp)}
500 }
501 } else if #[cfg(target_feature="simd128")] {
502 i64x4 {
503 a: i64x2 { simd: i64x2_extmul_low_i32x4(self.simd, rhs.simd) },
504 b: i64x2 { simd: i64x2_extmul_high_i32x4(self.simd, rhs.simd) },
505 }
506 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
507 unsafe {
508 i64x4 { a: i64x2 { neon: vmull_s32(vget_low_s32(self.neon), vget_low_s32(rhs.neon)) },
509 b: i64x2 { neon: vmull_s32(vget_high_s32(self.neon), vget_high_s32(rhs.neon)) } }
510 }
511 } else {
512 let a: [i32; 4] = cast(self);
513 let b: [i32; 4] = cast(rhs);
514 cast([
515 i64::from(a[0]) * i64::from(b[0]),
516 i64::from(a[1]) * i64::from(b[1]),
517 i64::from(a[2]) * i64::from(b[2]),
518 i64::from(a[3]) * i64::from(b[3]),
519 ])
520 }
521 }
522 }
523
524 #[inline]
525 #[must_use]
526 pub fn abs(self) -> Self {
527 pick! {
528 if #[cfg(target_feature="ssse3")] {
529 Self { sse: abs_i32_m128i(self.sse) }
530 } else if #[cfg(target_feature="simd128")] {
531 Self { simd: i32x4_abs(self.simd) }
532 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
533 unsafe {Self { neon: vabsq_s32(self.neon) }}
534 } else {
535 let arr: [i32; 4] = cast(self);
536 cast([
537 arr[0].wrapping_abs(),
538 arr[1].wrapping_abs(),
539 arr[2].wrapping_abs(),
540 arr[3].wrapping_abs(),
541 ])
542 }
543 }
544 }
545
546 #[inline]
547 #[must_use]
548 pub fn unsigned_abs(self) -> u32x4 {
549 pick! {
550 if #[cfg(target_feature="ssse3")] {
551 u32x4 { sse: abs_i32_m128i(self.sse) }
552 } else if #[cfg(target_feature="simd128")] {
553 u32x4 { simd: i32x4_abs(self.simd) }
554 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
555 unsafe {u32x4 { neon: vreinterpretq_u32_s32(vabsq_s32(self.neon)) }}
556 } else {
557 let arr: [i32; 4] = cast(self);
558 cast([
559 arr[0].unsigned_abs(),
560 arr[1].unsigned_abs(),
561 arr[2].unsigned_abs(),
562 arr[3].unsigned_abs(),
563 ])
564 }
565 }
566 }
567
568 #[inline]
570 #[must_use]
571 pub fn reduce_add(self) -> i32 {
572 pick! {
573 if #[cfg(target_feature="sse2")] {
574 let hi64 = unpack_high_i64_m128i(self.sse, self.sse);
575 let sum64 = add_i32_m128i(hi64, self.sse);
576 let hi32 = shuffle_ai_f32_all_m128i::<0b10_11_00_01>(sum64); let sum32 = add_i32_m128i(sum64, hi32);
578 get_i32_from_m128i_s(sum32)
579 } else {
580 let arr: [i32; 4] = cast(self);
581 arr[0].wrapping_add(arr[1]).wrapping_add(
582 arr[2].wrapping_add(arr[3]))
583 }
584 }
585 }
586
587 #[inline]
589 #[must_use]
590 pub fn reduce_max(self) -> i32 {
591 let arr: [i32; 4] = cast(self);
592 arr[0].max(arr[1]).max(arr[2].max(arr[3]))
593 }
594
595 #[inline]
597 #[must_use]
598 pub fn reduce_min(self) -> i32 {
599 let arr: [i32; 4] = cast(self);
600 arr[0].min(arr[1]).min(arr[2].min(arr[3]))
601 }
602
603 #[inline]
604 #[must_use]
605 pub fn max(self, rhs: Self) -> Self {
606 pick! {
607 if #[cfg(target_feature="sse4.1")] {
608 Self { sse: max_i32_m128i(self.sse, rhs.sse) }
609 } else if #[cfg(target_feature="simd128")] {
610 Self { simd: i32x4_max(self.simd, rhs.simd) }
611 } else {
612 self.simd_lt(rhs).blend(rhs, self)
613 }
614 }
615 }
616 #[inline]
617 #[must_use]
618 pub fn min(self, rhs: Self) -> Self {
619 pick! {
620 if #[cfg(target_feature="sse4.1")] {
621 Self { sse: min_i32_m128i(self.sse, rhs.sse) }
622 } else if #[cfg(target_feature="simd128")] {
623 Self { simd: i32x4_min(self.simd, rhs.simd) }
624 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
625 unsafe {Self { neon: vminq_s32(self.neon, rhs.neon) }}
626 } else {
627 self.simd_lt(rhs).blend(self, rhs)
628 }
629 }
630 }
631 #[inline]
632 #[must_use]
633 pub fn round_float(self) -> f32x4 {
634 pick! {
635 if #[cfg(target_feature="sse2")] {
636 cast(convert_to_m128_from_i32_m128i(self.sse))
637 } else if #[cfg(target_feature="simd128")] {
638 cast(Self { simd: f32x4_convert_i32x4(self.simd) })
639 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
640 cast(unsafe {Self { neon: vreinterpretq_s32_f32(vcvtq_f32_s32(self.neon)) }})
641 } else {
642 let arr: [i32; 4] = cast(self);
643 cast([
644 arr[0] as f32,
645 arr[1] as f32,
646 arr[2] as f32,
647 arr[3] as f32,
648 ])
649 }
650 }
651 }
652
653 #[inline]
654 #[must_use]
655 pub fn to_bitmask(self) -> u32 {
656 pick! {
657 if #[cfg(target_feature="sse2")] {
658 move_mask_m128(cast(self.sse)) as u32
660 } else if #[cfg(target_feature="simd128")] {
661 u32x4_bitmask(self.simd) as u32
662 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
663 unsafe
664 {
665 let masked = vcltq_s32(self.neon, vdupq_n_s32(0));
667
668 let selectbit : uint32x4_t = core::mem::transmute([1u32, 2, 4, 8]);
670 let r = vandq_u32(masked, selectbit);
671
672 vaddvq_u32(r) as u32
674 }
675 } else {
676 ((self.arr[0] < 0) as u32) << 0 |
677 ((self.arr[1] < 0) as u32) << 1 |
678 ((self.arr[2] < 0) as u32) << 2 |
679 ((self.arr[3] < 0) as u32) << 3
680 }
681 }
682 }
683
684 #[inline]
685 #[must_use]
686 pub fn any(self) -> bool {
687 pick! {
688 if #[cfg(target_feature="sse2")] {
689 move_mask_m128(cast(self.sse)) != 0
691 } else if #[cfg(target_feature="simd128")] {
692 u32x4_bitmask(self.simd) != 0
693 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
694 unsafe {
696 vminvq_s32(self.neon) < 0
697 }
698 } else {
699 let v : [u64;2] = cast(self);
700 ((v[0] | v[1]) & 0x8000000080000000) != 0
701 }
702 }
703 }
704
705 #[inline]
706 #[must_use]
707 pub fn all(self) -> bool {
708 pick! {
709 if #[cfg(target_feature="sse2")] {
710 move_mask_m128(cast(self.sse)) == 0b1111
712 } else if #[cfg(target_feature="simd128")] {
713 u32x4_bitmask(self.simd) == 0b1111
714 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
715 unsafe {
717 vmaxvq_s32(self.neon) < 0
718 }
719 } else {
720 let v : [u64;2] = cast(self);
721 (v[0] & v[1] & 0x8000000080000000) == 0x8000000080000000
722 }
723 }
724 }
725
726 #[inline]
727 #[must_use]
728 pub fn none(self) -> bool {
729 !self.any()
730 }
731
732 #[must_use]
734 #[inline]
735 pub fn transpose(data: [i32x4; 4]) -> [i32x4; 4] {
736 pick! {
737 if #[cfg(target_feature="sse")] {
738 let mut e0 = data[0];
739 let mut e1 = data[1];
740 let mut e2 = data[2];
741 let mut e3 = data[3];
742
743 transpose_four_m128(
744 cast_mut(&mut e0.sse),
745 cast_mut(&mut e1.sse),
746 cast_mut(&mut e2.sse),
747 cast_mut(&mut e3.sse),
748 );
749
750 [e0, e1, e2, e3]
751 } else {
752 #[inline(always)]
753 fn transpose_column(data: &[i32x4; 4], index: usize) -> i32x4 {
754 i32x4::new([
755 data[0].as_array()[index],
756 data[1].as_array()[index],
757 data[2].as_array()[index],
758 data[3].as_array()[index],
759 ])
760 }
761
762 [
763 transpose_column(&data, 0),
764 transpose_column(&data, 1),
765 transpose_column(&data, 2),
766 transpose_column(&data, 3),
767 ]
768 }
769 }
770 }
771
772 #[inline]
773 pub fn to_array(self) -> [i32; 4] {
774 cast(self)
775 }
776
777 #[inline]
778 pub fn as_array(&self) -> &[i32; 4] {
779 cast_ref(self)
780 }
781
782 #[inline]
783 pub fn as_mut_array(&mut self) -> &mut [i32; 4] {
784 cast_mut(self)
785 }
786}