/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package android.util; import android.annotation.HalfFloat; import android.annotation.NonNull; import android.annotation.Nullable; import sun.misc.FloatingDecimal; /** *

The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit * IEEE 754 * floating point data types (also called fp16 or binary16). A half-precision float can be * created from or converted to single-precision floats, and is stored in a short data type. * To distinguish short values holding half-precision floats from regular short values, * it is recommended to use the @HalfFloat annotation.

* *

The IEEE 754 standard specifies an fp16 as having the following format:

* * *

The format is laid out as follows:

*
 * 1   11111   1111111111
 * ^   --^--   -----^----
 * sign  |          |_______ significand
 *       |
 *       -- exponent
 * 
* *

Half-precision floating points can be useful to save memory and/or * bandwidth at the expense of range and precision when compared to single-precision * floating points (fp32).

*

To help you decide whether fp16 is the right storage type for you need, please * refer to the table below that shows the available precision throughout the range of * possible values. The precision column indicates the step size between two * consecutive numbers in a specific part of the range.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Range startPrecision
01 ⁄ 16,777,216
1 ⁄ 16,3841 ⁄ 16,777,216
1 ⁄ 8,1921 ⁄ 8,388,608
1 ⁄ 4,0961 ⁄ 4,194,304
1 ⁄ 2,0481 ⁄ 2,097,152
1 ⁄ 1,0241 ⁄ 1,048,576
1 ⁄ 5121 ⁄ 524,288
1 ⁄ 2561 ⁄ 262,144
1 ⁄ 1281 ⁄ 131,072
1 ⁄ 641 ⁄ 65,536
1 ⁄ 321 ⁄ 32,768
1 ⁄ 161 ⁄ 16,384
1 ⁄ 81 ⁄ 8,192
1 ⁄ 41 ⁄ 4,096
1 ⁄ 21 ⁄ 2,048
11 ⁄ 1,024
21 ⁄ 512
41 ⁄ 256
81 ⁄ 128
161 ⁄ 64
321 ⁄ 32
641 ⁄ 16
1281 ⁄ 8
2561 ⁄ 4
5121 ⁄ 2
1,0241
2,0482
4,0964
8,1928
16,38416
32,76832
* *

This table shows that numbers higher than 1024 lose all fractional precision.

*/ @SuppressWarnings("SimplifiableIfStatement") public final class Half extends Number implements Comparable { /** * The number of bits used to represent a half-precision float value. */ public static final int SIZE = 16; /** * Epsilon is the difference between 1.0 and the next value representable * by a half-precision floating-point. */ public static final @HalfFloat short EPSILON = (short) 0x1400; /** * Maximum exponent a finite half-precision float may have. */ public static final int MAX_EXPONENT = 15; /** * Minimum exponent a normalized half-precision float may have. */ public static final int MIN_EXPONENT = -14; /** * Smallest negative value a half-precision float may have. */ public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff; /** * Maximum positive finite value a half-precision float may have. */ public static final @HalfFloat short MAX_VALUE = (short) 0x7bff; /** * Smallest positive normal value a half-precision float may have. */ public static final @HalfFloat short MIN_NORMAL = (short) 0x0400; /** * Smallest positive non-zero value a half-precision float may have. */ public static final @HalfFloat short MIN_VALUE = (short) 0x0001; /** * A Not-a-Number representation of a half-precision float. */ public static final @HalfFloat short NaN = (short) 0x7e00; /** * Negative infinity of type half-precision float. */ public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00; /** * Negative 0 of type half-precision float. */ public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000; /** * Positive infinity of type half-precision float. */ public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00; /** * Positive 0 of type half-precision float. */ public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000; private static final int FP16_SIGN_SHIFT = 15; private static final int FP16_SIGN_MASK = 0x8000; private static final int FP16_EXPONENT_SHIFT = 10; private static final int FP16_EXPONENT_MASK = 0x1f; private static final int FP16_SIGNIFICAND_MASK = 0x3ff; private static final int FP16_EXPONENT_BIAS = 15; private static final int FP16_COMBINED = 0x7fff; private static final int FP16_EXPONENT_MAX = 0x7c00; private static final int FP32_SIGN_SHIFT = 31; private static final int FP32_EXPONENT_SHIFT = 23; private static final int FP32_EXPONENT_MASK = 0xff; private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; private static final int FP32_EXPONENT_BIAS = 127; private static final int FP32_DENORMAL_MAGIC = 126 << 23; private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); private final @HalfFloat short mValue; /** * Constructs a newly allocated {@code Half} object that represents the * half-precision float type argument. * * @param value The value to be represented by the {@code Half} */ public Half(@HalfFloat short value) { mValue = value; } /** * Constructs a newly allocated {@code Half} object that represents the * argument converted to a half-precision float. * * @param value The value to be represented by the {@code Half} * * @see #toHalf(float) */ public Half(float value) { mValue = toHalf(value); } /** * Constructs a newly allocated {@code Half} object that * represents the argument converted to a half-precision float. * * @param value The value to be represented by the {@code Half} * * @see #toHalf(float) */ public Half(double value) { mValue = toHalf((float) value); } /** *

Constructs a newly allocated {@code Half} object that represents the * half-precision float value represented by the string. * The string is converted to a half-precision float value as if by the * {@link #valueOf(String)} method.

* *

Calling this constructor is equivalent to calling:

*
     *     new Half(Float.parseFloat(value))
     * 
* * @param value A string to be converted to a {@code Half} * @throws NumberFormatException if the string does not contain a parsable number * * @see Float#valueOf(java.lang.String) * @see #toHalf(float) */ public Half(@NonNull String value) throws NumberFormatException { mValue = toHalf(Float.parseFloat(value)); } /** * Returns the half-precision value of this {@code Half} as a {@code short} * containing the bit representation described in {@link Half}. * * @return The half-precision float value represented by this object */ public @HalfFloat short halfValue() { return mValue; } /** * Returns the value of this {@code Half} as a {@code byte} after * a narrowing primitive conversion. * * @return The half-precision float value represented by this object * converted to type {@code byte} */ @Override public byte byteValue() { return (byte) toFloat(mValue); } /** * Returns the value of this {@code Half} as a {@code short} after * a narrowing primitive conversion. * * @return The half-precision float value represented by this object * converted to type {@code short} */ @Override public short shortValue() { return (short) toFloat(mValue); } /** * Returns the value of this {@code Half} as a {@code int} after * a narrowing primitive conversion. * * @return The half-precision float value represented by this object * converted to type {@code int} */ @Override public int intValue() { return (int) toFloat(mValue); } /** * Returns the value of this {@code Half} as a {@code long} after * a narrowing primitive conversion. * * @return The half-precision float value represented by this object * converted to type {@code long} */ @Override public long longValue() { return (long) toFloat(mValue); } /** * Returns the value of this {@code Half} as a {@code float} after * a widening primitive conversion. * * @return The half-precision float value represented by this object * converted to type {@code float} */ @Override public float floatValue() { return toFloat(mValue); } /** * Returns the value of this {@code Half} as a {@code double} after * a widening primitive conversion. * * @return The half-precision float value represented by this object * converted to type {@code double} */ @Override public double doubleValue() { return toFloat(mValue); } /** * Returns true if this {@code Half} value represents a Not-a-Number, * false otherwise. * * @return True if the value is a NaN, false otherwise */ public boolean isNaN() { return isNaN(mValue); } /** * Compares this object against the specified object. The result is {@code true} * if and only if the argument is not {@code null} and is a {@code Half} object * that represents the same half-precision value as the this object. Two * half-precision values are considered to be the same if and only if the method * {@link #halfToIntBits(short)} returns an identical {@code int} value for both. * * @param o The object to compare * @return True if the objects are the same, false otherwise * * @see #halfToIntBits(short) */ @Override public boolean equals(@Nullable Object o) { return (o instanceof Half) && (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue)); } /** * Returns a hash code for this {@code Half} object. The result is the * integer bit representation, exactly as produced by the method * {@link #halfToIntBits(short)}, of the primitive half-precision float * value represented by this {@code Half} object. * * @return A hash code value for this object */ @Override public int hashCode() { return hashCode(mValue); } /** * Returns a string representation of the specified half-precision * float value. See {@link #toString(short)} for more information. * * @return A string representation of this {@code Half} object */ @NonNull @Override public String toString() { return toString(mValue); } /** *

Compares the two specified half-precision float values. The following * conditions apply during the comparison:

* * * * @param h The half-precision float value to compare to the half-precision value * represented by this {@code Half} object * * @return The value {@code 0} if {@code x} is numerically equal to {@code y}; a * value less than {@code 0} if {@code x} is numerically less than {@code y}; * and a value greater than {@code 0} if {@code x} is numerically greater * than {@code y} */ @Override public int compareTo(@NonNull Half h) { return compare(mValue, h.mValue); } /** * Returns a hash code for a half-precision float value. * * @param h The value to hash * * @return A hash code value for a half-precision float value */ public static int hashCode(@HalfFloat short h) { return halfToIntBits(h); } /** *

Compares the two specified half-precision float values. The following * conditions apply during the comparison:

* * * * @param x The first half-precision float value to compare. * @param y The second half-precision float value to compare * * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a * value less than {@code 0} if {@code x} is numerically less than {@code y}, * and a value greater than {@code 0} if {@code x} is numerically greater * than {@code y} */ public static int compare(@HalfFloat short x, @HalfFloat short y) { if (less(x, y)) return -1; if (greater(x, y)) return 1; // Collapse NaNs, akin to halfToIntBits(), but we want to keep // (signed) short value types to preserve the ordering of -0.0 // and +0.0 short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x; short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y; return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); } /** *

Returns a representation of the specified half-precision float value * according to the bit layout described in {@link Half}.

* *

Similar to {@link #halfToIntBits(short)}, this method collapses all * possible Not-a-Number values to a single canonical Not-a-Number value * defined by {@link #NaN}.

* * @param h A half-precision float value * @return The bits that represent the half-precision float value * * @see #halfToIntBits(short) */ public static @HalfFloat short halfToShortBits(@HalfFloat short h) { return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h; } /** *

Returns a representation of the specified half-precision float value * according to the bit layout described in {@link Half}.

* *

Unlike {@link #halfToRawIntBits(short)}, this method collapses all * possible Not-a-Number values to a single canonical Not-a-Number value * defined by {@link #NaN}.

* * @param h A half-precision float value * @return The bits that represent the half-precision float value * * @see #halfToRawIntBits(short) * @see #halfToShortBits(short) * @see #intBitsToHalf(int) */ public static int halfToIntBits(@HalfFloat short h) { return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff; } /** *

Returns a representation of the specified half-precision float value * according to the bit layout described in {@link Half}.

* *

The argument is considered to be a representation of a half-precision * float value according to the bit layout described in {@link Half}. The 16 * most significant bits of the returned value are set to 0.

* * @param h A half-precision float value * @return The bits that represent the half-precision float value * * @see #halfToIntBits(short) * @see #intBitsToHalf(int) */ public static int halfToRawIntBits(@HalfFloat short h) { return h & 0xffff; } /** *

Returns the half-precision float value corresponding to a given * bit representation.

* *

The argument is considered to be a representation of a half-precision * float value according to the bit layout described in {@link Half}. The 16 * most significant bits of the argument are ignored.

* * @param bits An integer * @return The half-precision float value with the same bit pattern */ public static @HalfFloat short intBitsToHalf(int bits) { return (short) (bits & 0xffff); } /** * Returns the first parameter with the sign of the second parameter. * This method treats NaNs as having a sign. * * @param magnitude A half-precision float value providing the magnitude of the result * @param sign A half-precision float value providing the sign of the result * @return A value with the magnitude of the first parameter and the sign * of the second parameter */ public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) { return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED)); } /** * Returns the absolute value of the specified half-precision float. * Special values are handled in the following ways: * * * @param h A half-precision float value * @return The absolute value of the specified half-precision float */ public static @HalfFloat short abs(@HalfFloat short h) { return (short) (h & FP16_COMBINED); } /** * Returns the closest integral half-precision float value to the specified * half-precision float value. Special values are handled in the * following ways: * * * @param h A half-precision float value * @return The value of the specified half-precision float rounded to the nearest * half-precision float value */ public static @HalfFloat short round(@HalfFloat short h) { int bits = h & 0xffff; int e = bits & 0x7fff; int result = bits; if (e < 0x3c00) { result &= FP16_SIGN_MASK; result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0)); } else if (e < 0x6400) { e = 25 - (e >> 10); int mask = (1 << e) - 1; result += (1 << (e - 1)); result &= ~mask; } return (short) result; } /** * Returns the smallest half-precision float value toward negative infinity * greater than or equal to the specified half-precision float value. * Special values are handled in the following ways: * * * @param h A half-precision float value * @return The smallest half-precision float value toward negative infinity * greater than or equal to the specified half-precision float value */ public static @HalfFloat short ceil(@HalfFloat short h) { int bits = h & 0xffff; int e = bits & 0x7fff; int result = bits; if (e < 0x3c00) { result &= FP16_SIGN_MASK; result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0)); } else if (e < 0x6400) { e = 25 - (e >> 10); int mask = (1 << e) - 1; result += mask & ((bits >> 15) - 1); result &= ~mask; } return (short) result; } /** * Returns the largest half-precision float value toward positive infinity * less than or equal to the specified half-precision float value. * Special values are handled in the following ways: * * * @param h A half-precision float value * @return The largest half-precision float value toward positive infinity * less than or equal to the specified half-precision float value */ public static @HalfFloat short floor(@HalfFloat short h) { int bits = h & 0xffff; int e = bits & 0x7fff; int result = bits; if (e < 0x3c00) { result &= FP16_SIGN_MASK; result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0); } else if (e < 0x6400) { e = 25 - (e >> 10); int mask = (1 << e) - 1; result += mask & -(bits >> 15); result &= ~mask; } return (short) result; } /** * Returns the truncated half-precision float value of the specified * half-precision float value. Special values are handled in the following ways: * * * @param h A half-precision float value * @return The truncated half-precision float value of the specified * half-precision float value */ public static @HalfFloat short trunc(@HalfFloat short h) { int bits = h & 0xffff; int e = bits & 0x7fff; int result = bits; if (e < 0x3c00) { result &= FP16_SIGN_MASK; } else if (e < 0x6400) { e = 25 - (e >> 10); int mask = (1 << e) - 1; result &= ~mask; } return (short) result; } /** * Returns the smaller of two half-precision float values (the value closest * to negative infinity). Special values are handled in the following ways: * * * @param x The first half-precision value * @param y The second half-precision value * @return The smaller of the two specified half-precision values */ public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) { return (x & FP16_SIGN_MASK) != 0 ? x : y; } return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; } /** * Returns the larger of two half-precision float values (the value closest * to positive infinity). Special values are handled in the following ways: * * * @param x The first half-precision value * @param y The second half-precision value * * @return The larger of the two specified half-precision values */ public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) { return (x & FP16_SIGN_MASK) != 0 ? y : x; } return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; } /** * Returns true if the first half-precision float value is less (smaller * toward negative infinity) than the second half-precision float value. * If either of the values is NaN, the result is false. * * @param x The first half-precision value * @param y The second half-precision value * * @return True if x is less than y, false otherwise */ public static boolean less(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); } /** * Returns true if the first half-precision float value is less (smaller * toward negative infinity) than or equal to the second half-precision * float value. If either of the values is NaN, the result is false. * * @param x The first half-precision value * @param y The second half-precision value * * @return True if x is less than or equal to y, false otherwise */ public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <= ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); } /** * Returns true if the first half-precision float value is greater (larger * toward positive infinity) than the second half-precision float value. * If either of the values is NaN, the result is false. * * @param x The first half-precision value * @param y The second half-precision value * * @return True if x is greater than y, false otherwise */ public static boolean greater(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); } /** * Returns true if the first half-precision float value is greater (larger * toward positive infinity) than or equal to the second half-precision float * value. If either of the values is NaN, the result is false. * * @param x The first half-precision value * @param y The second half-precision value * * @return True if x is greater than y, false otherwise */ public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >= ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); } /** * Returns true if the two half-precision float values are equal. * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO} * and {@link #NEGATIVE_ZERO} are considered equal. * * @param x The first half-precision value * @param y The second half-precision value * * @return True if x is equal to y, false otherwise */ public static boolean equals(@HalfFloat short x, @HalfFloat short y) { if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; return x == y || ((x | y) & FP16_COMBINED) == 0; } /** * Returns the sign of the specified half-precision float. * * @param h A half-precision float value * @return 1 if the value is positive, -1 if the value is negative */ public static int getSign(@HalfFloat short h) { return (h & FP16_SIGN_MASK) == 0 ? 1 : -1; } /** * Returns the unbiased exponent used in the representation of * the specified half-precision float value. if the value is NaN * or infinite, this* method returns {@link #MAX_EXPONENT} + 1. * If the argument is 0 or a subnormal representation, this method * returns {@link #MIN_EXPONENT} - 1. * * @param h A half-precision float value * @return The unbiased exponent of the specified value */ public static int getExponent(@HalfFloat short h) { return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS; } /** * Returns the significand, or mantissa, used in the representation * of the specified half-precision float value. * * @param h A half-precision float value * @return The significand, or significand, of the specified vlaue */ public static int getSignificand(@HalfFloat short h) { return h & FP16_SIGNIFICAND_MASK; } /** * Returns true if the specified half-precision float value represents * infinity, false otherwise. * * @param h A half-precision float value * @return True if the value is positive infinity or negative infinity, * false otherwise */ public static boolean isInfinite(@HalfFloat short h) { return (h & FP16_COMBINED) == FP16_EXPONENT_MAX; } /** * Returns true if the specified half-precision float value represents * a Not-a-Number, false otherwise. * * @param h A half-precision float value * @return True if the value is a NaN, false otherwise */ public static boolean isNaN(@HalfFloat short h) { return (h & FP16_COMBINED) > FP16_EXPONENT_MAX; } /** * Returns true if the specified half-precision float value is normalized * (does not have a subnormal representation). If the specified value is * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY}, * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal * number, this method returns false. * * @param h A half-precision float value * @return True if the value is normalized, false otherwise */ public static boolean isNormalized(@HalfFloat short h) { return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX; } /** *

Converts the specified half-precision float value into a * single-precision float value. The following special cases are handled:

* * * @param h The half-precision float value to convert to single-precision * @return A normalized single-precision float value */ public static float toFloat(@HalfFloat short h) { int bits = h & 0xffff; int s = bits & FP16_SIGN_MASK; int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK; int m = (bits ) & FP16_SIGNIFICAND_MASK; int outE = 0; int outM = 0; if (e == 0) { // Denormal or 0 if (m != 0) { // Convert denorm fp16 into normalized fp32 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); o -= FP32_DENORMAL_FLOAT; return s == 0 ? o : -o; } } else { outM = m << 13; if (e == 0x1f) { // Infinite or NaN outE = 0xff; } else { outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS; } } int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; return Float.intBitsToFloat(out); } /** *

Converts the specified single-precision float value into a * half-precision float value. The following special cases are handled:

* * * @param f The single-precision float value to convert to half-precision * @return A half-precision float value */ @SuppressWarnings("StatementWithEmptyBody") public static @HalfFloat short toHalf(float f) { int bits = Float.floatToRawIntBits(f); int s = (bits >>> FP32_SIGN_SHIFT ); int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK; int m = (bits ) & FP32_SIGNIFICAND_MASK; int outE = 0; int outM = 0; if (e == 0xff) { // Infinite or NaN outE = 0x1f; outM = m != 0 ? 0x200 : 0; } else { e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS; if (e >= 0x1f) { // Overflow outE = 0x31; } else if (e <= 0) { // Underflow if (e < -10) { // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 } else { // The fp32 value is a normalized float less than MIN_NORMAL, // we convert to a denorm fp16 m = (m | 0x800000) >> (1 - e); if ((m & 0x1000) != 0) m += 0x2000; outM = m >> 13; } } else { outE = e; outM = m >> 13; if ((m & 0x1000) != 0) { // Round to nearest "0.5" up int out = (outE << FP16_EXPONENT_SHIFT) | outM; out++; return (short) (out | (s << FP16_SIGN_SHIFT)); } } } return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM); } /** * Returns a {@code Half} instance representing the specified * half-precision float value. * * @param h A half-precision float value * @return a {@code Half} instance representing {@code h} */ public static @NonNull Half valueOf(@HalfFloat short h) { return new Half(h); } /** * Returns a {@code Half} instance representing the specified float value. * * @param f A float value * @return a {@code Half} instance representing {@code f} */ public static @NonNull Half valueOf(float f) { return new Half(f); } /** * Returns a {@code Half} instance representing the specified string value. * Calling this method is equivalent to calling * toHalf(Float.parseString(h)). See {@link Float#valueOf(String)} * for more information on the format of the string representation. * * @param s The string to be parsed * @return a {@code Half} instance representing {@code h} * @throws NumberFormatException if the string does not contain a parsable * half-precision float value */ public static @NonNull Half valueOf(@NonNull String s) { return new Half(s); } /** * Returns the half-precision float value represented by the specified string. * Calling this method is equivalent to calling * toHalf(Float.parseString(h)). See {@link Float#valueOf(String)} * for more information on the format of the string representation. * * @param s The string to be parsed * @return A half-precision float value represented by the string * @throws NumberFormatException if the string does not contain a parsable * half-precision float value */ public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException { return toHalf(FloatingDecimal.parseFloat(s)); } /** * Returns a string representation of the specified half-precision * float value. Calling this method is equivalent to calling * Float.toString(toFloat(h)). See {@link Float#toString(float)} * for more information on the format of the string representation. * * @param h A half-precision float value * @return A string representation of the specified value */ @NonNull public static String toString(@HalfFloat short h) { return Float.toString(toFloat(h)); } /** *

Returns a hexadecimal string representation of the specified half-precision * float value. If the value is a NaN, the result is "NaN", * otherwise the result follows this format:

* * * @param h A half-precision float value * @return A hexadecimal string representation of the specified value */ @NonNull public static String toHexString(@HalfFloat short h) { StringBuilder o = new StringBuilder(); int bits = h & 0xffff; int s = (bits >>> FP16_SIGN_SHIFT ); int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK; int m = (bits ) & FP16_SIGNIFICAND_MASK; if (e == 0x1f) { // Infinite or NaN if (m == 0) { if (s != 0) o.append('-'); o.append("Infinity"); } else { o.append("NaN"); } } else { if (s == 1) o.append('-'); if (e == 0) { if (m == 0) { o.append("0x0.0p0"); } else { o.append("0x0."); String significand = Integer.toHexString(m); o.append(significand.replaceFirst("0{2,}$", "")); o.append("p-14"); } } else { o.append("0x1."); String significand = Integer.toHexString(m); o.append(significand.replaceFirst("0{2,}$", "")); o.append('p'); o.append(Integer.toString(e - FP16_EXPONENT_BIAS)); } } return o.toString(); } }