#![cfg(feature = "f16")]
#![doc(hidden)]
use crate::num::Float;
use core::cmp::Ordering;
use core::{fmt, ops};
#[allow(non_camel_case_types)]
#[derive(Default, Copy, Clone)]
pub struct f16 {
bits: u16,
}
unsafe impl Send for f16 {
}
unsafe impl Sync for f16 {
}
impl f16 {
#[inline(always)]
pub const fn to_bits(self) -> u16 {
self.bits
}
#[inline(always)]
pub const fn from_bits(bits: u16) -> Self {
Self {
bits,
}
}
#[inline(always)]
pub fn as_f32(self) -> f32 {
f16_to_f32(self)
}
#[inline(always)]
pub fn from_f32(value: f32) -> Self {
f32_to_f16(value)
}
}
impl PartialEq for f16 {
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
self.as_f32().eq(&other.as_f32())
}
}
impl PartialOrd for f16 {
#[inline(always)]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_f32().partial_cmp(&other.as_f32())
}
}
impl fmt::Debug for f16 {
#[inline(always)]
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_f32().fmt(formatter)
}
}
impl fmt::Display for f16 {
#[inline(always)]
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_f32().fmt(formatter)
}
}
impl ops::Add for f16 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self::Output {
Self::from_f32(self.as_f32() + rhs.as_f32())
}
}
impl ops::Div for f16 {
type Output = Self;
#[inline(always)]
fn div(self, rhs: Self) -> Self::Output {
Self::from_f32(self.as_f32() / rhs.as_f32())
}
}
impl ops::Mul for f16 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Self) -> Self::Output {
Self::from_f32(self.as_f32() * rhs.as_f32())
}
}
impl ops::Sub for f16 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self::Output {
Self::from_f32(self.as_f32() - rhs.as_f32())
}
}
impl ops::Rem for f16 {
type Output = Self;
#[inline(always)]
fn rem(self, rhs: Self) -> Self::Output {
Self::from_f32(self.as_f32() % rhs.as_f32())
}
}
impl ops::Neg for f16 {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self::Output {
Self::from_bits(self.bits ^ (1 << 15))
}
}
impl ops::AddAssign for f16 {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl ops::DivAssign for f16 {
#[inline(always)]
fn div_assign(&mut self, rhs: Self) {
*self = *self / rhs;
}
}
impl ops::MulAssign for f16 {
#[inline(always)]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl ops::SubAssign for f16 {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl ops::RemAssign for f16 {
#[inline(always)]
fn rem_assign(&mut self, rhs: Self) {
*self = *self % rhs;
}
}
fn f16_to_f32(half: f16) -> f32 {
let man_shift = f32::MANTISSA_SIZE - f16::MANTISSA_SIZE;
let f16_bias = f16::EXPONENT_BIAS - f16::MANTISSA_SIZE;
let f32_bias = f32::EXPONENT_BIAS - f32::MANTISSA_SIZE;
if half.bits & (f16::SIGN_MASK - 1) == 0 {
return f32::from_bits((half.bits as u32) << 16);
}
let half_sign = (half.bits & f16::SIGN_MASK) as u32;
let half_exp = (half.bits & f16::EXPONENT_MASK) as u32;
let half_man = (half.bits & f16::MANTISSA_MASK) as u32;
if half.is_nan() {
return f32::from_bits((half_sign << 16) | 0x7FC0_0000u32 | (half_man << man_shift));
} else if half.is_inf() {
return f32::from_bits((half_sign << 16) | f32::INFINITY_BITS);
}
let sign = half_sign << 16;
let unbiased_exp = ((half_exp as i32) >> f16::MANTISSA_SIZE) - f16_bias;
if half_exp == 0 {
let e = (half_man as u16).leading_zeros() - (16 - f16::MANTISSA_SIZE as u32);
let exp = (f32_bias as u32 - f16_bias as u32 - e) << f32::MANTISSA_SIZE;
let man = (half_man << (f16_bias as u32 - 1 + e)) & f32::MANTISSA_MASK;
return f32::from_bits(sign | exp | man);
}
let exp = ((unbiased_exp + f32_bias) as u32) << f32::MANTISSA_SIZE;
let man = (half_man & f16::MANTISSA_MASK as u32) << man_shift;
f32::from_bits(sign | exp | man)
}
fn f32_to_f16(value: f32) -> f16 {
let man_shift = f32::MANTISSA_SIZE - f16::MANTISSA_SIZE;
let f16_bias = f16::EXPONENT_BIAS - f16::MANTISSA_SIZE;
let f32_bias = f32::EXPONENT_BIAS - f32::MANTISSA_SIZE;
let x = value.to_bits();
let sign = x & f32::SIGN_MASK;
let exp = x & f32::EXPONENT_MASK;
let man = x & f32::MANTISSA_MASK;
if value.is_nan() {
return f16::from_bits((sign >> 16) as u16 | 0x7e00 | (man >> man_shift) as u16);
} else if value.is_inf() {
return f16::from_bits((sign >> 16) as u16 | f16::INFINITY_BITS);
}
let half_sign = sign >> 16;
let unbiased_exp = ((exp >> f32::MANTISSA_SIZE) as i32) - f32_bias;
let half_exp = unbiased_exp + f16_bias;
if unbiased_exp >= 0x1F {
return f16::from_bits(half_sign as u16 | f16::INFINITY_BITS);
}
if half_exp <= 0 {
if f16_bias - 1 - half_exp > f32::MANTISSA_SIZE + 1 {
return f16::from_bits(half_sign as u16);
}
let man = man | f32::HIDDEN_BIT_MASK;
let mut half_man = man >> (f16_bias - 1 - half_exp);
let round_bit = 1 << (man_shift - half_exp);
if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
half_man += 1;
}
return f16::from_bits((half_sign | half_man) as u16);
}
let half_exp = (half_exp as u32) << f16::MANTISSA_SIZE;
let half_man = man >> man_shift;
let round_bit = 1 << (man_shift - 1);
if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
f16::from_bits(((half_sign | half_exp | half_man) + 1) as u16)
} else {
f16::from_bits((half_sign | half_exp | half_man) as u16)
}
}