Source code for gfloat.round

# Copyright (c) 2024 Graphcore Ltd. All rights reserved.

import math

import numpy as np

from .types import FormatInfo, RoundMode


def _isodd(v: int) -> bool:
    return v & 0x1 == 1


[docs] def round_float( fi: FormatInfo, v: float, rnd: RoundMode = RoundMode.TiesToEven, sat: bool = False ) -> float: """ Round input to the given :py:class:`FormatInfo`, given rounding mode and saturation flag An input NaN will convert to a NaN in the target. An input Infinity will convert to the largest float if :paramref:`sat`, otherwise to an Inf, if present, otherwise to a NaN. Negative zero will be returned if the format has negative zero, otherwise zero. Args: fi (FormatInfo): Describes the target format v (float): Input value to be rounded rnd (RoundMode): Rounding mode to use sat (bool): Saturation flag: if True, round overflowed values to `fi.max` Returns: A float which is one of the values in the format. Raises: ValueError: The target format cannot represent the input (e.g. converting a `NaN`, or an `Inf` when the target has no `NaN` or `Inf`, and :paramref:`sat` is false) """ # Constants p = fi.precision bias = fi.expBias if np.isnan(v): if fi.num_nans == 0: raise ValueError(f"No NaN in format {fi}") # Note that this does not preserve the NaN payload return np.nan # Extract sign sign = np.signbit(v) and fi.is_signed vpos = -v if sign else v if np.isinf(vpos): result = np.inf elif vpos == 0: result = 0 else: # Extract exponent expval = int(np.floor(np.log2(vpos))) # Effective precision, accounting for right shift for subnormal values if fi.has_subnormals: expval = max(expval, 1 - bias) # Lift to "integer * 2^e" expval = expval - p + 1 # use ldexp instead of vpos*2**-expval to avoid overflow fsignificand = math.ldexp(vpos, -expval) # Round isignificand = math.floor(fsignificand) delta = fsignificand - isignificand if ( (rnd == RoundMode.TowardPositive and not sign and delta > 0) or (rnd == RoundMode.TowardNegative and sign and delta > 0) or (rnd == RoundMode.TiesToAway and delta >= 0.5) or (rnd == RoundMode.TiesToEven and delta > 0.5) or (rnd == RoundMode.TiesToEven and delta == 0.5 and _isodd(isignificand)) ): isignificand += 1 ## Special case for Precision=1, all-log format with zero. # The logic is simply duplicated (and isignificand overwritten) for clarity. if fi.precision == 1: isignificand = math.floor(fsignificand) code_is_odd = isignificand != 0 and _isodd(expval + bias) if ( (rnd == RoundMode.TowardPositive and not sign and delta > 0) or (rnd == RoundMode.TowardNegative and sign and delta > 0) or (rnd == RoundMode.TiesToAway and delta >= 0.5) or (rnd == RoundMode.TiesToEven and delta > 0.5) or (rnd == RoundMode.TiesToEven and delta == 0.5 and code_is_odd) ): # Go to nextUp. # Increment isignificand if zero, # else increment exponent if isignificand == 0: isignificand = 1 else: assert isignificand == 1 expval += 1 ## End special case for Precision=1. # Reconstruct rounded result to float result = isignificand * (2.0**expval) if result == 0: if sign and fi.has_nz: return -0.0 else: return 0.0 # Overflow amax = -fi.min if sign else fi.max if result > amax: if ( sat or (rnd == RoundMode.TowardNegative and not sign and np.isfinite(v)) or (rnd == RoundMode.TowardPositive and sign and np.isfinite(v)) or (rnd == RoundMode.TowardZero and np.isfinite(v)) ): result = amax else: if fi.has_infs: result = np.inf elif fi.num_nans > 0: result = np.nan else: raise ValueError(f"No Infs or NaNs in format {fi}, and sat=False") # Set sign if sign: result = -result return result
[docs] def encode_float(fi: FormatInfo, v: float) -> int: """ Encode input to the given :py:class:`FormatInfo`. Will round toward zero if :paramref:`v` is not in the value set. Will saturate to `Inf`, `NaN`, `fi.max` in order of precedence. Encode -0 to 0 if not `fi.has_nz` For other roundings and saturations, call :func:`round_float` first. Args: fi (FormatInfo): Describes the target format v (float): The value to be encoded. Returns: The integer code point """ # Format Constants k = fi.bits p = fi.precision t = p - 1 # Encode if np.isnan(v): return fi.code_of_nan # Overflow/underflow if v > fi.max: if fi.has_infs: return fi.code_of_posinf if fi.num_nans > 0: return fi.code_of_nan return fi.code_of_max if v < fi.min: if fi.has_infs: return fi.code_of_neginf if fi.num_nans > 0: return fi.code_of_nan return fi.code_of_min # Finite values sign = fi.is_signed and np.signbit(v) vpos = -v if sign else v if fi.has_subnormals and vpos <= fi.smallest_subnormal / 2: isig = 0 biased_exp = 0 else: sig, exp = np.frexp(vpos) exp = int(exp) # All calculations in Python ints # sig in range [0.5, 1) sig *= 2 exp -= 1 # now sig in range [1, 2) biased_exp = exp + fi.expBias if biased_exp < 1 and fi.has_subnormals: # subnormal sig *= 2.0 ** (biased_exp - 1) biased_exp = 0 assert vpos == sig * 2 ** (1 - fi.expBias) else: if sig > 0: sig -= 1.0 isig = math.floor(sig * 2**t) # Zero if isig == 0 and biased_exp == 0 and fi.has_zero: if sign and fi.has_nz: return fi.code_of_negzero else: return fi.code_of_zero # Nonzero assert isig < 2**t assert biased_exp < 2**fi.expBits or fi.is_twos_complement # Handle two's complement encoding if fi.is_twos_complement and sign: isig = (1 << t) - isig # Pack values into a single integer code = (int(sign) << (k - 1)) | (biased_exp << t) | (isig << 0) return code