1use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
5use super::{CastFrom, CastInto, Float, Int, MinInt};
6
7#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
11pub fn fma(x: f64, y: f64, z: f64) -> f64 {
12 select_implementation! {
13 name: fma,
14 use_arch: all(target_arch = "aarch64", target_feature = "neon"),
15 args: x, y, z,
16 }
17
18 fma_round(x, y, z, Round::Nearest).val
19}
20
21#[cfg(f128_enabled)]
25#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
26pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
27 fma_round(x, y, z, Round::Nearest).val
28}
29
30#[inline]
33pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
34where
35 F: Float,
36 F: CastFrom<F::SignedInt>,
37 F: CastFrom<i8>,
38 F::Int: HInt,
39 u32: CastInto<F::Int>,
40{
41 let
42 let zero = IntTy::<F>::ZERO;
43
44 let nx = Norm::from_float(x);
46 let ny = Norm::from_float(y);
47 let nz = Norm::from_float(z);
48
49 if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
50 return FpResult::ok(x * y + z);
52 }
53
54 if nz.is_zero_nan_inf() {
55 if nz.is_zero() {
56 return FpResult::ok(x * y);
58 }
59 return FpResult::ok(z);
61 }
62
63 let zhi: F::Int;
65 let zlo: F::Int;
66 let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
67
68 let mut e: i32 = nx.e + ny.e;
70 let mut d: i32 = nz.e - e;
72 let sbits = F::BITS as i32;
73
74 if d > 0 {
76 if d < sbits {
78 zlo = nz.m << d;
81 zhi = nz.m >> (sbits - d);
82 } else {
83 zlo = zero;
86 zhi = nz.m;
87 d -= sbits;
88
89 e = nz.e - sbits;
91
92 if d == 0 {
93 } else if d < sbits {
95 rlo = (rhi << (sbits - d)) | (rlo >> d);
97 rlo |= IntTy::<F>::from((rlo << (sbits - d)) != zero);
99 rhi = rhi >> d;
100 } else {
101 rlo = one;
104 rhi = zero;
105 }
106 }
107 } else {
108 zhi = zero;
110 d = -d;
111 if d == 0 {
112 zlo = nz.m;
114 } else if d < sbits {
115 let sticky = IntTy::<F>::from((nz.m << (sbits - d)) != zero);
117 zlo = (nz.m >> d) | sticky;
118 } else {
119 zlo = one;
121 }
122 }
123
124 let mut neg = nx.neg ^ ny.neg;
127 let samesign: bool = !neg ^ nz.neg;
128 let mut rhi_nonzero = true;
129
130 if samesign {
131 rlo = rlo.wrapping_add(zlo);
133 rhi += zhi + IntTy::<F>::from(rlo < zlo);
134 } else {
135 let (res, borrow) = rlo.overflowing_sub(zlo);
137 rlo = res;
138 rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
139 if (rhi >> (F::BITS - 1)) != zero {
140 rlo = rlo.signed().wrapping_neg().unsigned();
141 rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
142 neg = !neg;
143 }
144 rhi_nonzero = rhi != zero;
145 }
146
147 if rhi_nonzero {
151 e += sbits;
153 d = rhi.leading_zeros() as i32 - 1;
154 rhi = (rhi << d) | (rlo >> (sbits - d));
155 rhi |= IntTy::<F>::from((rlo << d) != zero);
157 } else if rlo != zero {
158 d = rlo.leading_zeros() as i32 - 1;
160 if d < 0 {
161 rhi = (rlo >> 1) | (rlo & one);
163 } else {
164 rhi = rlo << d;
165 }
166 } else {
167 return FpResult::ok(x * y + z);
169 }
170
171 e -= d;
172
173 let mut i: F::SignedInt = rhi.signed();
176
177 if neg {
178 i = -i;
179 }
180
181 let mut r: F = F::cast_from_lossy(i);
183
184 let max_pow = F::BITS - 1 + F::EXP_BIAS;
188
189 let mut status = Status::OK;
190
191 if e < -(max_pow as i32 - 2) {
192 if e == -(max_pow as i32 - 1) {
194 let mut c = F::from_parts(false, max_pow, zero);
195 if neg {
196 c = -c;
197 }
198
199 if r == c {
200 status.set_underflow(true);
202 r = F::MIN_POSITIVE_NORMAL.copysign(r);
203 return FpResult::new(r, status);
204 }
205
206 if (rhi << (F::SIG_BITS + 1)) != zero {
207 let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
210 i = iu.signed();
211
212 if neg {
213 i = -i;
214 }
215
216 r = F::cast_from_lossy(i);
217
218 r = F::cast_from(2i8) * r - c;
220 status.set_underflow(true);
221 }
222 } else {
223 d = F::EXP_BITS as i32 - 1;
225 let sticky = IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero);
226 i = (((rhi >> d) | sticky) << d).signed();
227
228 if neg {
229 i = -i;
230 }
231
232 r = F::cast_from_lossy(i);
233 }
234 }
235
236 FpResult::new(super::generic::scalbn(r, e), status)
238}
239
240#[derive(Clone, Copy, Debug)]
242struct Norm<F: Float> {
243 m: F::Int,
245 e: i32,
248 neg: bool,
249}
250
251impl<F: Float> Norm<F> {
252 const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1;
254
255 const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS;
258
259 fn from_float(x: F) -> Self {
260 let mut ix = x.to_bits();
261 let mut e = x.ex() as i32;
262 let neg = x.is_sign_negative();
263 if e == 0 {
264 let scale_i = F::BITS - 1;
266 let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO);
267 let scaled = x * scale_f;
268 ix = scaled.to_bits();
269 e = scaled.ex() as i32;
270 e = if e == 0 {
271 1 << F::EXP_BITS
274 } else {
275 e - scale_i as i32
277 };
278 }
279
280 e -= Self::EXP_UNBIAS as i32;
281
282 ix &= F::SIG_MASK;
284 ix |= F::IMPLICIT_BIT;
285 ix <<= 1;
286
287 Self { m: ix, e, neg }
288 }
289
290 fn is_zero_nan_inf(self) -> bool {
292 self.e >= Self::ZERO_INF_NAN as i32
293 }
294
295 fn is_zero(self) -> bool {
297 self.e > Self::ZERO_INF_NAN as i32
299 }
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 fn spec_test<F>()
308 where
309 F: Float,
310 F: CastFrom<F::SignedInt>,
311 F: CastFrom<i8>,
312 F::Int: HInt,
313 u32: CastInto<F::Int>,
314 {
315 let x = F::from_bits(F::Int::ONE);
316 let y = F::from_bits(F::Int::ONE);
317 let z = F::ZERO;
318
319 let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
320
321 assert_biteq!(fma(x, y, z), F::ZERO);
325 assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
326 assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
327 assert_biteq!(fma(-x, -y, z), F::ZERO);
328 }
329
330 #[test]
331 fn spec_test_f32() {
332 spec_test::<f32>();
333 }
334
335 #[test]
336 fn spec_test_f64() {
337 spec_test::<f64>();
338
339 let expect_underflow = [
340 (
341 hf64!("0x1.0p-1070"),
342 hf64!("0x1.0p-1070"),
343 hf64!("0x1.ffffffffffffp-1023"),
344 hf64!("0x0.ffffffffffff8p-1022"),
345 ),
346 (
347 hf64!("0x1.0p-1070"),
350 hf64!("0x1.0p-1070"),
351 hf64!("-0x1.0p-1022"),
352 hf64!("-0x1.0p-1022"),
353 ),
354 ];
355
356 for (x, y, z, res) in expect_underflow {
357 let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
358 assert_biteq!(val, res);
359 assert_eq!(status, Status::UNDERFLOW);
360 }
361 }
362
363 #[test]
364 #[cfg(f128_enabled)]
365 fn spec_test_f128() {
366 spec_test::<f128>();
367 }
368
369 #[test]
370 fn fma_segfault() {
371 assert_eq!(
373 fma(
374 -0.0000000000000002220446049250313,
375 -0.0000000000000002220446049250313,
376 -0.0000000000000002220446049250313
377 ),
378 -0.00000000000000022204460492503126,
379 );
380
381 let result = fma(-0.992, -0.992, -0.992);
382 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
384 let result = force_eval!(result);
385 assert_eq!(result, -0.007936000000000007,);
386 }
387
388 #[test]
389 fn fma_sbb() {
390 assert_eq!(
391 fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
392 -3991680619069439e277
393 );
394 }
395
396 #[test]
397 fn fma_underflow() {
398 assert_eq!(
399 fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
400 0.0,
401 );
402 }
403}