111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Copyright (c) 2015-2016 The Khronos Group Inc. 211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Permission is hereby granted, free of charge, to any person obtaining a 411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// copy of this software and/or associated documentation files (the 511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// "Materials"), to deal in the Materials without restriction, including 611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// without limitation the rights to use, copy, modify, merge, publish, 711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// distribute, sublicense, and/or sell copies of the Materials, and to 811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// permit persons to whom the Materials are furnished to do so, subject to 911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// the following conditions: 1011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 1111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// The above copyright notice and this permission notice shall be included 1211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// in all copies or substantial portions of the Materials. 1311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 1411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 1511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 1611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 1711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// https://www.khronos.org/registry/ 1811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 1911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 2211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 2311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 2411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 2511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 2611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 2711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ 2811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#define LIBSPIRV_UTIL_HEX_FLOAT_H_ 2911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 3011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include <cassert> 3111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include <cctype> 3211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include <cmath> 3311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include <cstdint> 3411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include <iomanip> 3511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include <limits> 3611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 3711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#include "bitutils.h" 3811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 3911cd02dfb91661c65134cac258cf5924270e9d2Dan Albertnamespace spvutils { 4011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 4111cd02dfb91661c65134cac258cf5924270e9d2Dan Albertclass Float16 { 4211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert public: 4311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert Float16(uint16_t v) : val(v) {} 4411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert Float16() = default; 4511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isNan(const Float16& val) { 4611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); 4711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 4811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if the given value is any kind of infinity. 4911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isInfinity(const Float16& val) { 5011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); 5111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 5211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert Float16(const Float16& other) { val = other.val; } 5311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint16_t get_value() const { return val; } 5411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 5511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the maximum normal value. 5611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static Float16 max() { return Float16(0x7bff); } 5711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the lowest normal value. 5811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static Float16 lowest() { return Float16(0xfbff); } 5911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 6011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert private: 6111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint16_t val; 6211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 6311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 6411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// To specialize this type, you must override uint_type to define 6511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// an unsigned integer that can fit your floating point type. 6611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// You must also add a isNan function that returns true if 6711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// a value is Nan. 6811cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T> 6911cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct FloatProxyTraits { 7011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = void; 7111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 7211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 7311cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 7411cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct FloatProxyTraits<float> { 7511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = uint32_t; 7611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isNan(float f) { return std::isnan(f); } 7711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if the given value is any kind of infinity. 7811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isInfinity(float f) { return std::isinf(f); } 7911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the maximum normal value. 8011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static float max() { return std::numeric_limits<float>::max(); } 8111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the lowest normal value. 8211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static float lowest() { return std::numeric_limits<float>::lowest(); } 8311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 8411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 8511cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 8611cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct FloatProxyTraits<double> { 8711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = uint64_t; 8811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isNan(double f) { return std::isnan(f); } 8911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if the given value is any kind of infinity. 9011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isInfinity(double f) { return std::isinf(f); } 9111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the maximum normal value. 9211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static double max() { return std::numeric_limits<double>::max(); } 9311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the lowest normal value. 9411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static double lowest() { return std::numeric_limits<double>::lowest(); } 9511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 9611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 9711cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 9811cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct FloatProxyTraits<Float16> { 9911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = uint16_t; 10011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isNan(Float16 f) { return Float16::isNan(f); } 10111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if the given value is any kind of infinity. 10211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } 10311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the maximum normal value. 10411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static Float16 max() { return Float16::max(); } 10511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the lowest normal value. 10611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static Float16 lowest() { return Float16::lowest(); } 10711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 10811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 10911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Since copying a floating point number (especially if it is NaN) 11011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// does not guarantee that bits are preserved, this class lets us 11111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// store the type and use it as a float when necessary. 11211cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T> 11311cd02dfb91661c65134cac258cf5924270e9d2Dan Albertclass FloatProxy { 11411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert public: 11511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = typename FloatProxyTraits<T>::uint_type; 11611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 11711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Since this is to act similar to the normal floats, 11811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // do not initialize the data by default. 11911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert FloatProxy() = default; 12011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 12111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Intentionally non-explicit. This is a proxy type so 12211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // implicit conversions allow us to use it more transparently. 12311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert FloatProxy(T val) { data_ = BitwiseCast<uint_type>(val); } 12411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 12511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Intentionally non-explicit. This is a proxy type so 12611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // implicit conversions allow us to use it more transparently. 12711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert FloatProxy(uint_type val) { data_ = val; } 12811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 12911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // This is helpful to have and is guaranteed not to stomp bits. 13011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert FloatProxy<T> operator-() const { 13111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>(data_ ^ 13211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (uint_type(0x1) << (sizeof(T) * 8 - 1))); 13311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 13411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 13511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the data as a floating point value. 13611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert T getAsFloat() const { return BitwiseCast<T>(data_); } 13711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 13811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the raw data. 13911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type data() const { return data_; } 14011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 14111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if the value represents any type of NaN. 14211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool isNan() { return FloatProxyTraits<T>::isNan(getAsFloat()); } 14311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if the value represents any type of infinity. 14411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool isInfinity() { return FloatProxyTraits<T>::isInfinity(getAsFloat()); } 14511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 14611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the maximum normal value. 14711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static FloatProxy<T> max() { 14811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return FloatProxy<T>(FloatProxyTraits<T>::max()); 14911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 15011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the lowest normal value. 15111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static FloatProxy<T> lowest() { 15211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return FloatProxy<T>(FloatProxyTraits<T>::lowest()); 15311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 15411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 15511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert private: 15611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type data_; 15711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 15811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 15911cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T> 16011cd02dfb91661c65134cac258cf5924270e9d2Dan Albertbool operator==(const FloatProxy<T>& first, const FloatProxy<T>& second) { 16111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return first.data() == second.data(); 16211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 16311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 16411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Reads a FloatProxy value as a normal float from a stream. 16511cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T> 16611cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstd::istream& operator>>(std::istream& is, FloatProxy<T>& value) { 16711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert T float_val; 16811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is >> float_val; 16911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value = FloatProxy<T>(float_val); 17011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 17111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 17211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 17311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// This is an example traits. It is not meant to be used in practice, but will 17411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// be the default for any non-specialized type. 17511cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T> 17611cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct HexFloatTraits { 17711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Integer type that can store this hex-float. 17811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = void; 17911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Signed integer type that can store this hex-float. 18011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = void; 18111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The numerical type that this HexFloat represents. 18211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using underlying_type = void; 18311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The type needed to construct the underlying type. 18411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using native_type = void; 18511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The number of bits that are actually relevant in the uint_type. 18611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // This allows us to deal with, for example, 24-bit values in a 32-bit 18711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // integer. 18811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_used_bits = 0; 18911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Number of bits that represent the exponent. 19011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_exponent_bits = 0; 19111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Number of bits that represent the fractional part. 19211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_fraction_bits = 0; 19311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The bias of the exponent. (How much we need to subtract from the stored 19411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // value to get the correct value.) 19511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t exponent_bias = 0; 19611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 19711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 19811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Traits for IEEE float. 19911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 1 sign bit, 8 exponent bits, 23 fractional bits. 20011cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 20111cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct HexFloatTraits<FloatProxy<float>> { 20211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = uint32_t; 20311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = int32_t; 20411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using underlying_type = FloatProxy<float>; 20511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using native_type = float; 20611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_used_bits = 32; 20711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_exponent_bits = 8; 20811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_fraction_bits = 23; 20911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type exponent_bias = 127; 21011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 21111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 21211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Traits for IEEE double. 21311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 1 sign bit, 11 exponent bits, 52 fractional bits. 21411cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 21511cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct HexFloatTraits<FloatProxy<double>> { 21611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = uint64_t; 21711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = int64_t; 21811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using underlying_type = FloatProxy<double>; 21911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using native_type = double; 22011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_used_bits = 64; 22111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_exponent_bits = 11; 22211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_fraction_bits = 52; 22311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type exponent_bias = 1023; 22411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 22511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 22611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Traits for IEEE half. 22711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 1 sign bit, 5 exponent bits, 10 fractional bits. 22811cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 22911cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstruct HexFloatTraits<FloatProxy<Float16>> { 23011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = uint16_t; 23111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = int16_t; 23211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using underlying_type = uint16_t; 23311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using native_type = uint16_t; 23411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_used_bits = 16; 23511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_exponent_bits = 5; 23611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type num_fraction_bits = 10; 23711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type exponent_bias = 15; 23811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 23911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 24011cd02dfb91661c65134cac258cf5924270e9d2Dan Albertenum class round_direction { 24111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert kToZero, 24211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert kToNearestEven, 24311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert kToPositiveInfinity, 24411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert kToNegativeInfinity, 24511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert max = kToNegativeInfinity 24611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 24711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 24811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Template class that houses a floating pointer number. 24911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// It exposes a number of constants based on the provided traits to 25011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// assist in interpreting the bits of the value. 25111cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T, typename Traits = HexFloatTraits<T>> 25211cd02dfb91661c65134cac258cf5924270e9d2Dan Albertclass HexFloat { 25311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert public: 25411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = typename Traits::uint_type; 25511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = typename Traits::int_type; 25611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using underlying_type = typename Traits::underlying_type; 25711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using native_type = typename Traits::native_type; 25811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 25911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert explicit HexFloat(T f) : value_(f) {} 26011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 26111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert T value() const { return value_; } 26211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert void set_value(T f) { value_ = f; } 26311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 26411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // These are all written like this because it is convenient to have 26511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // compile-time constants for all of these values. 26611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 26711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Pass-through values to save typing. 26811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_used_bits = Traits::num_used_bits; 26911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t exponent_bias = Traits::exponent_bias; 27011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_exponent_bits = Traits::num_exponent_bits; 27111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_fraction_bits = Traits::num_fraction_bits; 27211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 27311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Number of bits to shift left to set the highest relevant bit. 27411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t top_bit_left_shift = num_used_bits - 1; 27511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // How many nibbles (hex characters) the fractional part takes up. 27611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; 27711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If the fractional part does not fit evenly into a hex character (4-bits) 27811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // then we have to left-shift to get rid of leading 0s. This is the amount 27911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // we have to shift (might be 0). 28011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t num_overflow_bits = 28111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction_nibbles * 4 - num_fraction_bits; 28211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 28311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The representation of the fraction, not the actual bits. This 28411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // includes the leading bit that is usually implicit. 28511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type fraction_represent_mask = 28611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert spvutils::SetBits<uint_type, 0, 28711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert num_fraction_bits + num_overflow_bits>::get; 28811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 28911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The topmost bit in the nibble-aligned fraction. 29011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type fraction_top_bit = 29111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); 29211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 29311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The least significant bit in the exponent, which is also the bit 29411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // immediately to the left of the significand. 29511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type first_exponent_bit = uint_type(1) 29611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert << (num_fraction_bits); 29711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 29811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The mask for the encoded fraction. It does not include the 29911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // implicit bit. 30011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type fraction_encode_mask = 30111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert spvutils::SetBits<uint_type, 0, num_fraction_bits>::get; 30211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 30311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The bit that is used as a sign. 30411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; 30511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 30611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The bits that represent the exponent. 30711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type exponent_mask = 30811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert spvutils::SetBits<uint_type, num_fraction_bits, num_exponent_bits>::get; 30911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 31011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // How far left the exponent is shifted. 31111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t exponent_left_shift = num_fraction_bits; 31211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 31311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // How far from the right edge the fraction is shifted. 31411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint32_t fraction_right_shift = 31511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<uint32_t>(sizeof(uint_type) * 8) - num_fraction_bits; 31611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 31711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The maximum representable unbiased exponent. 31811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const int_type max_exponent = 31911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (exponent_mask >> num_fraction_bits) - exponent_bias; 32011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The minimum representable exponent for normalized numbers. 32111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const int_type min_exponent = -static_cast<int_type>(exponent_bias); 32211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 32311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the bits associated with the value. 32411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type getBits() const { return spvutils::BitwiseCast<uint_type>(value_); } 32511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 32611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the bits associated with the value, without the leading sign bit. 32711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type getUnsignedBits() const { 32811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>(spvutils::BitwiseCast<uint_type>(value_) & 32911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert ~sign_mask); 33011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 33111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 33211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the bits associated with the exponent, shifted to start at the 33311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // lsb of the type. 33411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const uint_type getExponentBits() const { 33511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>((getBits() & exponent_mask) >> 33611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert num_fraction_bits); 33711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 33811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 33911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the exponent in unbiased form. This is the exponent in the 34011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // human-friendly form. 34111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const int_type getUnbiasedExponent() const { 34211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<int_type>(getExponentBits() - exponent_bias); 34311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 34411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 34511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns just the significand bits from the value. 34611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const uint_type getSignificandBits() const { 34711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return getBits() & fraction_encode_mask; 34811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 34911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 35011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If the number was normalized, returns the unbiased exponent. 35111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If the number was denormal, normalize the exponent first. 35211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const int_type getUnbiasedNormalizedExponent() const { 35311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 35411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return 0; 35511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 35611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_type exp = getUnbiasedExponent(); 35711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (exp == min_exponent) { // We are in denorm land. 35811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type significand_bits = getSignificandBits(); 35911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while ((significand_bits & (first_exponent_bit >> 1)) == 0) { 36011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand_bits = static_cast<uint_type>(significand_bits << 1); 36111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exp = static_cast<int_type>(exp - 1); 36211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 36311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand_bits &= fraction_encode_mask; 36411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 36511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return exp; 36611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 36711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 36811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the signficand after it has been normalized. 36911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const uint_type getNormalizedSignificand() const { 37011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_type unbiased_exponent = getUnbiasedNormalizedExponent(); 37111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type significand = getSignificandBits(); 37211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { 37311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand << 1); 37411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 37511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand &= fraction_encode_mask; 37611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return significand; 37711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 37811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 37911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns true if this number represents a negative value. 38011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool isNegative() const { return (getBits() & sign_mask) != 0; } 38111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 38211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Sets this HexFloat from the individual components. 38311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Note this assumes EVERY significand is normalized, and has an implicit 38411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // leading one. This means that the only way that this method will set 0, 38511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // is if you set a number so denormalized that it underflows. 38611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Do not use this method with raw bits extracted from a subnormal number, 38711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // since subnormals do not have an implicit leading 1 in the significand. 38811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The significand is also expected to be in the 38911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // lowest-most num_fraction_bits of the uint_type. 39011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The exponent is expected to be unbiased, meaning an exponent of 39111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // 0 actually means 0. 39211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If underflow_round_up is set, then on underflow, if a number is non-0 39311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // and would underflow, we round up to the smallest denorm. 39411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert void setFromSignUnbiasedExponentAndNormalizedSignificand( 39511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool negative, int_type exponent, uint_type significand, 39611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool round_denorm_up) { 39711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool significand_is_zero = significand == 0; 39811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 39911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (exponent <= min_exponent) { 40011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If this was denormalized, then we have to shift the bit on, meaning 40111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // the significand is not zero. 40211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand_is_zero = false; 40311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand |= first_exponent_bit; 40411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand >> 1); 40511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 40611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 40711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (exponent < min_exponent) { 40811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand >> 1); 40911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert ++exponent; 41011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 41111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 41211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (exponent == min_exponent) { 41311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (significand == 0 && !significand_is_zero && round_denorm_up) { 41411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(0x1); 41511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 41611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 41711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 41811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type new_value = 0; 41911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (negative) { 42011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert new_value = static_cast<uint_type>(new_value | sign_mask); 42111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 42211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent + exponent_bias); 42311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert assert(exponent >= 0); 42411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 42511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // put it all together 42611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<uint_type>((exponent << exponent_left_shift) & 42711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent_mask); 42811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand & fraction_encode_mask); 42911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert new_value = static_cast<uint_type>(new_value | (exponent | significand)); 43011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value_ = BitwiseCast<T>(new_value); 43111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 43211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 43311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Increments the significand of this number by the given amount. 43411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If this would spill the significand into the implicit bit, 43511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // carry is set to true and the significand is shifted to fit into 43611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // the correct location, otherwise carry is set to false. 43711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // All significands and to_increment are assumed to be within the bounds 43811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // for a valid significand. 43911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static uint_type incrementSignificand(uint_type significand, 44011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type to_increment, bool* carry) { 44111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand + to_increment); 44211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert *carry = false; 44311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (significand & first_exponent_bit) { 44411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert *carry = true; 44511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // The implicit 1-bit will have carried, so we should zero-out the 44611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // top bit and shift back. 44711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand & ~first_exponent_bit); 44811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand = static_cast<uint_type>(significand >> 1); 44911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 45011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return significand; 45111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 45211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 45311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // These exist because MSVC throws warnings on negative right-shifts 45411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // even if they are not going to be executed. Eg: 45511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // constant_number < 0? 0: constant_number 45611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // These convert the negative left-shifts into right shifts. 45711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 45811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert template <int_type N, typename enable = void> 45911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert struct negatable_left_shift { 46011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static uint_type val(uint_type val) { 46111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>(val >> -N); 46211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 46311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert }; 46411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 46511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert template <int_type N> 46611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert struct negatable_left_shift<N, typename std::enable_if<N >= 0>::type> { 46711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static uint_type val(uint_type val) { 46811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>(val << N); 46911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 47011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert }; 47111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 47211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert template <int_type N, typename enable = void> 47311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert struct negatable_right_shift { 47411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static uint_type val(uint_type val) { 47511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>(val << -N); 47611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 47711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert }; 47811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 47911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert template <int_type N> 48011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert struct negatable_right_shift<N, typename std::enable_if<N >= 0>::type> { 48111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static uint_type val(uint_type val) { 48211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint_type>(val >> N); 48311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 48411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert }; 48511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 48611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Returns the significand, rounded to fit in a significand in 48711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // other_T. This is shifted so that the most significant 48811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // bit of the rounded number lines up with the most significant bit 48911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // of the returned significand. 49011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert template <typename other_T> 49111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert typename other_T::uint_type getRoundedNormalizedSignificand( 49211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert round_direction dir, bool* carry_bit) { 49311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using other_uint_type = typename other_T::uint_type; 49411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const int_type num_throwaway_bits = 49511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<int_type>(num_fraction_bits) - 49611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<int_type>(other_T::num_fraction_bits); 49711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 49811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type last_significant_bit = 49911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (num_throwaway_bits < 0) 50011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert ? 0 50111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert : negatable_left_shift<num_throwaway_bits>::val(1u); 50211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type first_rounded_bit = 50311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (num_throwaway_bits < 1) 50411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert ? 0 50511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert : negatable_left_shift<num_throwaway_bits - 1>::val(1u); 50611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 50711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type throwaway_mask_bits = 50811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert num_throwaway_bits > 0 ? num_throwaway_bits : 0; 50911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static const uint_type throwaway_mask = 51011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert spvutils::SetBits<uint_type, 0, throwaway_mask_bits>::get; 51111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 51211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert *carry_bit = false; 51311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert other_uint_type out_val = 0; 51411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type significand = getNormalizedSignificand(); 51511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If we are up-casting, then we just have to shift to the right location. 51611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (num_throwaway_bits <= 0) { 51711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert out_val = static_cast<other_uint_type>(significand); 51811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type shift_amount = static_cast<uint_type>(-num_throwaway_bits); 51911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert out_val = static_cast<other_uint_type>(out_val << shift_amount); 52011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return out_val; 52111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 52211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 52311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If every non-representable bit is 0, then we don't have any casting to 52411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // do. 52511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if ((significand & throwaway_mask) == 0) { 52611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<other_uint_type>( 52711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert negatable_right_shift<num_throwaway_bits>::val(significand)); 52811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 52911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 53011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool round_away_from_zero = false; 53111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We actually have to narrow the significand here, so we have to follow the 53211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // rounding rules. 53311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert switch (dir) { 53411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert case round_direction::kToZero: 53511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 53611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert case round_direction::kToPositiveInfinity: 53711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert round_away_from_zero = !isNegative(); 53811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 53911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert case round_direction::kToNegativeInfinity: 54011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert round_away_from_zero = isNegative(); 54111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 54211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert case round_direction::kToNearestEven: 54311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Have to round down, round bit is 0 54411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if ((first_rounded_bit & significand) == 0) { 54511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 54611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 54711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { 54811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If any subsequent bit of the rounded portion is non-0 then we round 54911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // up. 55011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert round_away_from_zero = true; 55111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 55211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 55311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We are exactly half-way between 2 numbers, pick even. 55411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if ((significand & last_significant_bit) != 0) { 55511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // 1 for our last bit, round up. 55611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert round_away_from_zero = true; 55711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 55811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 55911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 56011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 56111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 56211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (round_away_from_zero) { 56311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<other_uint_type>( 56411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert negatable_right_shift<num_throwaway_bits>::val(incrementSignificand( 56511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert significand, last_significant_bit, carry_bit))); 56611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 56711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<other_uint_type>( 56811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert negatable_right_shift<num_throwaway_bits>::val(significand)); 56911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 57011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 57111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 57211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Casts this value to another HexFloat. If the cast is widening, 57311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // then round_dir is ignored. If the cast is narrowing, then 57411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // the result is rounded in the direction specified. 57511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // This number will retain Nan and Inf values. 57611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // It will also saturate to Inf if the number overflows, and 57711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // underflow to (0 or min depending on rounding) if the number underflows. 57811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert template <typename other_T> 57911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert void castTo(other_T& other, round_direction round_dir) { 58011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert other = other_T(static_cast<typename other_T::native_type>(0)); 58111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool negate = isNegative(); 58211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (getUnsignedBits() == 0) { 58311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (negate) { 58411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert other.set_value(-other.value()); 58511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 58611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return; 58711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 58811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type significand = getSignificandBits(); 58911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool carried = false; 59011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert typename other_T::uint_type rounded_significand = 59111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert getRoundedNormalizedSignificand<other_T>(round_dir, &carried); 59211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 59311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_type exponent = getUnbiasedExponent(); 59411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (exponent == min_exponent) { 59511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If we are denormal, normalize the exponent, so that we can encode 59611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // easily. 59711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent + 1); 59811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; 59911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert check_bit = static_cast<uint_type>(check_bit >> 1)) { 60011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent - 1); 60111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (check_bit & significand) break; 60211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 60311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 60411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 60511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool is_nan = 60611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (getBits() & exponent_mask) == exponent_mask && significand != 0; 60711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool is_inf = 60811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert !is_nan && 60911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) || 61011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); 61111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 61211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If we are Nan or Inf we should pass that through. 61311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is_inf) { 61411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert other.set_value(BitwiseCast<typename other_T::underlying_type>( 61511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<typename other_T::uint_type>( 61611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); 61711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return; 61811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 61911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is_nan) { 62011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert typename other_T::uint_type shifted_significand; 62111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert shifted_significand = static_cast<typename other_T::uint_type>( 62211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert negatable_left_shift< 62311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<int_type>(other_T::num_fraction_bits) - 62411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<int_type>(num_fraction_bits)>::val(significand)); 62511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 62611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We are some sort of Nan. We try to keep the bit-pattern of the Nan 62711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // as close as possible. If we had to shift off bits so we are 0, then we 62811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // just set the last bit. 62911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert other.set_value(BitwiseCast<typename other_T::underlying_type>( 63011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<typename other_T::uint_type>( 63111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | 63211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (shifted_significand == 0 ? 0x1 : shifted_significand)))); 63311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return; 63411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 63511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 63611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool round_underflow_up = 63711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert isNegative() ? round_dir == round_direction::kToNegativeInfinity 63811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert : round_dir == round_direction::kToPositiveInfinity; 63911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using other_int_type = typename other_T::int_type; 64011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // setFromSignUnbiasedExponentAndNormalizedSignificand will 64111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // zero out any underflowing value (but retain the sign). 64211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert other.setFromSignUnbiasedExponentAndNormalizedSignificand( 64311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert negate, static_cast<other_int_type>(exponent), rounded_significand, 64411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert round_underflow_up); 64511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return; 64611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 64711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 64811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert private: 64911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert T value_; 65011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 65111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_assert(num_used_bits == 65211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert Traits::num_exponent_bits + Traits::num_fraction_bits + 1, 65311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert "The number of bits do not fit"); 65411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); 65511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert}; 65611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 65711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Returns 4 bits represented by the hex character. 65811cd02dfb91661c65134cac258cf5924270e9d2Dan Albertinline uint8_t get_nibble_from_character(int character) { 65911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const char* dec = "0123456789"; 66011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const char* lower = "abcdef"; 66111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const char* upper = "ABCDEF"; 66211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const char* p = nullptr; 66311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if ((p = strchr(dec, character))) { 66411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint8_t>(p - dec); 66511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if ((p = strchr(lower, character))) { 66611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint8_t>(p - lower + 0xa); 66711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if ((p = strchr(upper, character))) { 66811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return static_cast<uint8_t>(p - upper + 0xa); 66911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 67011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 67111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert assert(false && "This was called with a non-hex character"); 67211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return 0; 67311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 67411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 67511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Outputs the given HexFloat to the stream. 67611cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T, typename Traits> 67711cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstd::ostream& operator<<(std::ostream& os, const HexFloat<T, Traits>& value) { 67811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using HF = HexFloat<T, Traits>; 67911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = typename HF::uint_type; 68011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = typename HF::int_type; 68111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 68211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_assert(HF::num_used_bits != 0, 68311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert "num_used_bits must be non-zero for a valid float"); 68411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_assert(HF::num_exponent_bits != 0, 68511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert "num_exponent_bits must be non-zero for a valid float"); 68611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_assert(HF::num_fraction_bits != 0, 68711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert "num_fractin_bits must be non-zero for a valid float"); 68811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 68911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const uint_type bits = spvutils::BitwiseCast<uint_type>(value.value()); 69011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const char* const sign = (bits & HF::sign_mask) ? "-" : ""; 69111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const uint_type exponent = static_cast<uint_type>( 69211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert (bits & HF::exponent_mask) >> HF::num_fraction_bits); 69311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 69411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type fraction = static_cast<uint_type>((bits & HF::fraction_encode_mask) 69511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert << HF::num_overflow_bits); 69611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 69711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const bool is_zero = exponent == 0 && fraction == 0; 69811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const bool is_denorm = exponent == 0 && !is_zero; 69911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 70011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // exponent contains the biased exponent we have to convert it back into 70111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // the normal range. 70211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_type int_exponent = static_cast<int_type>(exponent - HF::exponent_bias); 70311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If the number is all zeros, then we actually have to NOT shift the 70411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // exponent. 70511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_exponent = is_zero ? 0 : int_exponent; 70611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 70711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If we are denorm, then start shifting, and decreasing the exponent until 70811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // our leading bit is 1. 70911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 71011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is_denorm) { 71111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while ((fraction & HF::fraction_top_bit) == 0) { 71211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>(fraction << 1); 71311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_exponent = static_cast<int_type>(int_exponent - 1); 71411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 71511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Since this is denormalized, we have to consume the leading 1 since it 71611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // will end up being implicit. 71711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>(fraction << 1); // eat the leading 1 71811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction &= HF::fraction_represent_mask; 71911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 72011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 72111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type fraction_nibbles = HF::fraction_nibbles; 72211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We do not have to display any trailing 0s, since this represents the 72311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // fractional part. 72411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { 72511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Shift off any trailing values; 72611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>(fraction >> 4); 72711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert --fraction_nibbles; 72811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 72911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 73011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const auto saved_flags = os.flags(); 73111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const auto saved_fill = os.fill(); 73211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 73311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os << sign << "0x" << (is_zero ? '0' : '1'); 73411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (fraction_nibbles) { 73511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Make sure to keep the leading 0s in place, since this is the fractional 73611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // part. 73711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os << "." << std::setw(static_cast<int>(fraction_nibbles)) 73811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert << std::setfill('0') << std::hex << fraction; 73911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 74011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; 74111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 74211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os.flags(saved_flags); 74311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os.fill(saved_fill); 74411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 74511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return os; 74611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 74711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 74811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Returns true if negate_value is true and the next character on the 74911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// input stream is a plus or minus sign. In that case we also set the fail bit 75011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// on the stream and set the value to the zero value for its type. 75111cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T, typename Traits> 75211cd02dfb91661c65134cac258cf5924270e9d2Dan Albertinline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, 75311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert HexFloat<T, Traits>& value) { 75411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (negate_value) { 75511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert auto next_char = is.peek(); 75611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (next_char == '-' || next_char == '+') { 75711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Fail the parse. Emulate standard behaviour by setting the value to 75811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // the zero value, and set the fail bit on the stream. 75911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value = HexFloat<T, Traits>(typename HexFloat<T, Traits>::uint_type{0}); 76011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.setstate(std::ios_base::failbit); 76111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return true; 76211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 76311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 76411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return false; 76511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 76611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 76711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Parses a floating point number from the given stream and stores it into the 76811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// value parameter. 76911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// If negate_value is true then the number may not have a leading minus or 77011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// plus, and if it successfully parses, then the number is negated before 77111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// being stored into the value parameter. 77211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// If the value cannot be correctly parsed or overflows the target floating 77311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// point type, then set the fail bit on the stream. 77411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// TODO(dneto): Promise C++11 standard behavior in how the value is set in 77511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// the error case, but only after all target platforms implement it correctly. 77611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// In particular, the Microsoft C++ runtime appears to be out of spec. 77711cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T, typename Traits> 77811cd02dfb91661c65134cac258cf5924270e9d2Dan Albertinline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, 77911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert HexFloat<T, Traits>& value) { 78011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (RejectParseDueToLeadingSign(is, negate_value, value)) { 78111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 78211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 78311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert T val; 78411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is >> val; 78511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (negate_value) { 78611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert val = -val; 78711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 78811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value.set_value(val); 78911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // In the failure case, map -0.0 to 0.0. 79011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is.fail() && value.getUnsignedBits() == 0u) { 79111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value = HexFloat<T, Traits>(typename HexFloat<T, Traits>::uint_type{0}); 79211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 79311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (val.isInfinity()) { 79411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Fail the parse. Emulate standard behaviour by setting the value to 79511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // the closest normal value, and set the fail bit on the stream. 79611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value.set_value((value.isNegative() | negate_value) ? T::lowest() 79711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert : T::max()); 79811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.setstate(std::ios_base::failbit); 79911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 80011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 80111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 80211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 80311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Specialization of ParseNormalFloat for FloatProxy<Float16> values. 80411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// This will parse the float as it were a 32-bit floating point number, 80511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// and then round it down to fit into a Float16 value. 80611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// The number is rounded towards zero. 80711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// If negate_value is true then the number may not have a leading minus or 80811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// plus, and if it successfully parses, then the number is negated before 80911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// being stored into the value parameter. 81011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// If the value cannot be correctly parsed or overflows the target floating 81111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// point type, then set the fail bit on the stream. 81211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// TODO(dneto): Promise C++11 standard behavior in how the value is set in 81311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// the error case, but only after all target platforms implement it correctly. 81411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// In particular, the Microsoft C++ runtime appears to be out of spec. 81511cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 81611cd02dfb91661c65134cac258cf5924270e9d2Dan Albertinline std::istream& 81711cd02dfb91661c65134cac258cf5924270e9d2Dan AlbertParseNormalFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>( 81811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert std::istream& is, bool negate_value, 81911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert HexFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>& value) { 82011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // First parse as a 32-bit float. 82111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert HexFloat<FloatProxy<float>> float_val(0.0f); 82211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert ParseNormalFloat(is, negate_value, float_val); 82311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 82411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Then convert to 16-bit float, saturating at infinities, and 82511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // rounding toward zero. 82611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert float_val.castTo(value, round_direction::kToZero); 82711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 82811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the 82911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // fail bit and set the lowest or highest value. 83011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (Float16::isInfinity(value.value().getAsFloat())) { 83111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); 83211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.setstate(std::ios_base::failbit); 83311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 83411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 83511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 83611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 83711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Reads a HexFloat from the given stream. 83811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// If the float is not encoded as a hex-float then it will be parsed 83911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// as a regular float. 84011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// This may fail if your stream does not support at least one unget. 84111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Nan values can be encoded with "0x1.<not zero>p+exponent_bias". 84211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// This would normally overflow a float and round to 84311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// infinity but this special pattern is the exact representation for a NaN, 84411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// and therefore is actually encoded as the correct NaN. To encode inf, 84511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// either 0x0p+exponent_bias can be specified or any exponent greater than 84611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// exponent_bias. 84711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Examples using IEEE 32-bit float encoding. 84811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 0x1.0p+128 (+inf) 84911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// -0x1.0p-128 (-inf) 85011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 85111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 0x1.1p+128 (+Nan) 85211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// -0x1.1p+128 (-Nan) 85311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 85411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// 0x1p+129 (+inf) 85511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// -0x1p+129 (-inf) 85611cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T, typename Traits> 85711cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstd::istream& operator>>(std::istream& is, HexFloat<T, Traits>& value) { 85811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using HF = HexFloat<T, Traits>; 85911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using uint_type = typename HF::uint_type; 86011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert using int_type = typename HF::int_type; 86111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 86211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value.set_value(static_cast<typename HF::native_type>(0.f)); 86311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 86411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is.flags() & std::ios::skipws) { 86511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If the user wants to skip whitespace , then we should obey that. 86611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (std::isspace(is.peek())) { 86711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); 86811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 86911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 87011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 87111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert auto next_char = is.peek(); 87211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool negate_value = false; 87311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 87411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (next_char != '-' && next_char != '0') { 87511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return ParseNormalFloat(is, negate_value, value); 87611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 87711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 87811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (next_char == '-') { 87911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert negate_value = true; 88011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); 88111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert next_char = is.peek(); 88211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 88311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 88411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (next_char == '0') { 88511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); // We may have to unget this. 88611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert auto maybe_hex_start = is.peek(); 88711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { 88811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.unget(); 88911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return ParseNormalFloat(is, negate_value, value); 89011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 89111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); // Throw away the 'x'; 89211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 89311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 89411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return ParseNormalFloat(is, negate_value, value); 89511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 89611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 89711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // This "looks" like a hex-float so treat it as one. 89811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool seen_p = false; 89911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool seen_dot = false; 90011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type fraction_index = 0; 90111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 90211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type fraction = 0; 90311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_type exponent = HF::exponent_bias; 90411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 90511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Strip off leading zeros so we don't have to special-case them later. 90611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while ((next_char = is.peek()) == '0') { 90711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); 90811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 90911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 91011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool is_denorm = 91111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert true; // Assume denorm "representation" until we hear otherwise. 91211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // NB: This does not mean the value is actually denorm, 91311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // it just means that it was written 0. 91411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool bits_written = false; // Stays false until we write a bit. 91511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (!seen_p && !seen_dot) { 91611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Handle characters that are left of the fractional part. 91711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (next_char == '.') { 91811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert seen_dot = true; 91911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if (next_char == 'p') { 92011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert seen_p = true; 92111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if (::isxdigit(next_char)) { 92211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We know this is not denormalized since we have stripped all leading 92311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // zeroes and we are not a ".". 92411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is_denorm = false; 92511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int number = get_nibble_from_character(next_char); 92611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert for (int i = 0; i < 4; ++i, number <<= 1) { 92711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; 92811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (bits_written) { 92911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // If we are here the bits represented belong in the fractional 93011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // part of the float, and we have to adjust the exponent accordingly. 93111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>( 93211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction | 93311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<uint_type>( 93411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert write_bit << (HF::top_bit_left_shift - fraction_index++))); 93511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent + 1); 93611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 93711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bits_written |= write_bit != 0; 93811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 93911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 94011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We have not found our exponent yet, so we have to fail. 94111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.setstate(std::ios::failbit); 94211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 94311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 94411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); 94511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert next_char = is.peek(); 94611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 94711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bits_written = false; 94811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (seen_dot && !seen_p) { 94911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Handle only fractional parts now. 95011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (next_char == 'p') { 95111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert seen_p = true; 95211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if (::isxdigit(next_char)) { 95311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int number = get_nibble_from_character(next_char); 95411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert for (int i = 0; i < 4; ++i, number <<= 1) { 95511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; 95611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bits_written |= write_bit != 0; 95711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is_denorm && !bits_written) { 95811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Handle modifying the exponent here this way we can handle 95911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // an arbitrary number of hex values without overflowing our 96011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // integer. 96111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent - 1); 96211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 96311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>( 96411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction | 96511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<uint_type>( 96611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert write_bit << (HF::top_bit_left_shift - fraction_index++))); 96711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 96811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 96911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 97011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We still have not found our 'p' exponent yet, so this is not a valid 97111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // hex-float. 97211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.setstate(std::ios::failbit); 97311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 97411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 97511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); 97611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert next_char = is.peek(); 97711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 97811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 97911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool seen_sign = false; 98011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int8_t exponent_sign = 1; 98111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert int_type written_exponent = 0; 98211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (true) { 98311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if ((next_char == '-' || next_char == '+')) { 98411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (seen_sign) { 98511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.setstate(std::ios::failbit); 98611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 98711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 98811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert seen_sign = true; 98911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent_sign = (next_char == '-') ? -1 : 1; 99011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if (::isdigit(next_char)) { 99111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Hex-floats express their exponent as decimal. 99211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert written_exponent = static_cast<int_type>(written_exponent * 10); 99311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert written_exponent = 99411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<int_type>(written_exponent + (next_char - '0')); 99511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else { 99611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 99711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 99811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is.get(); 99911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert next_char = is.peek(); 100011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 100111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 100211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert written_exponent = static_cast<int_type>(written_exponent * exponent_sign); 100311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent + written_exponent); 100411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 100511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert bool is_zero = is_denorm && (fraction == 0); 100611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (is_denorm && !is_zero) { 100711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>(fraction << 1); 100811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent - 1); 100911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } else if (is_zero) { 101011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = 0; 101111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 101211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 101311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (exponent <= 0 && !is_zero) { 101411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>(fraction >> 1); 101511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction |= static_cast<uint_type>(1) << HF::top_bit_left_shift; 101611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 101711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 101811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; 101911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 102011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const int_type max_exponent = 102111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert SetBits<uint_type, 0, HF::num_exponent_bits>::get; 102211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 102311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // Handle actual denorm numbers 102411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert while (exponent < 0 && !is_zero) { 102511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = static_cast<uint_type>(fraction >> 1); 102611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = static_cast<int_type>(exponent + 1); 102711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 102811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction &= HF::fraction_encode_mask; 102911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (fraction == 0) { 103011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We have underflowed our fraction. We should clamp to zero. 103111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert is_zero = true; 103211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = 0; 103311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 103411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 103511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 103611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert // We have overflowed so we should be inf/-inf. 103711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert if (exponent > max_exponent) { 103811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert exponent = max_exponent; 103911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert fraction = 0; 104011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 104111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 104211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type output_bits = static_cast<uint_type>( 104311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<uint_type>(negate_value ? 1 : 0) << HF::top_bit_left_shift); 104411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert output_bits |= fraction; 104511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 104611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert uint_type shifted_exponent = static_cast<uint_type>( 104711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert static_cast<uint_type>(exponent << HF::exponent_left_shift) & 104811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert HF::exponent_mask); 104911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert output_bits |= shifted_exponent; 105011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 105111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert T output_float = spvutils::BitwiseCast<T>(output_bits); 105211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert value.set_value(output_float); 105311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 105411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return is; 105511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 105611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 105711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Writes a FloatProxy value to a stream. 105811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// Zero and normal numbers are printed in the usual notation, but with 105911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// enough digits to fully reproduce the value. Other values (subnormal, 106011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert// NaN, and infinity) are printed as a hex float. 106111cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <typename T> 106211cd02dfb91661c65134cac258cf5924270e9d2Dan Albertstd::ostream& operator<<(std::ostream& os, const FloatProxy<T>& value) { 106311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert auto float_val = value.getAsFloat(); 106411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert switch (std::fpclassify(float_val)) { 106511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert case FP_ZERO: 106611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert case FP_NORMAL: { 106711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert auto saved_precision = os.precision(); 106811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os.precision(std::numeric_limits<T>::digits10); 106911cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os << float_val; 107011cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os.precision(saved_precision); 107111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } break; 107211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert default: 107311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os << HexFloat<FloatProxy<T>>(value); 107411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert break; 107511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert } 107611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return os; 107711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 107811cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 107911cd02dfb91661c65134cac258cf5924270e9d2Dan Alberttemplate <> 108011cd02dfb91661c65134cac258cf5924270e9d2Dan Albertinline std::ostream& operator<<<Float16>(std::ostream& os, 108111cd02dfb91661c65134cac258cf5924270e9d2Dan Albert const FloatProxy<Float16>& value) { 108211cd02dfb91661c65134cac258cf5924270e9d2Dan Albert os << HexFloat<FloatProxy<Float16>>(value); 108311cd02dfb91661c65134cac258cf5924270e9d2Dan Albert return os; 108411cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 108511cd02dfb91661c65134cac258cf5924270e9d2Dan Albert} 108611cd02dfb91661c65134cac258cf5924270e9d2Dan Albert 108711cd02dfb91661c65134cac258cf5924270e9d2Dan Albert#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ 1088