14 #include "ngraph/ngraph_visibility.hpp"
16 #define ROUND_MODE_TO_NEAREST_EVEN
28 static uint32_t constexpr frac_size = 10;
29 static uint32_t constexpr exp_size = 5;
30 static uint32_t constexpr exp_bias = 15;
32 float16(uint32_t sign, uint32_t biased_exponent, uint32_t fraction)
33 : m_value((sign & 0x01) << 15 | (biased_exponent & 0x1F) << 10 | (fraction & 0x03FF))
41 : m_value{
float16{
static_cast<float>(value)}.m_value}
45 std::string to_string()
const;
48 bool operator==(
const T& other)
const;
50 bool operator!=(
const T& other)
const
52 return !(*
this == other);
55 bool operator<(
const T& other)
const;
57 bool operator<=(
const T& other)
const;
59 bool operator>(
const T& other)
const;
61 bool operator>=(
const T& other)
const;
65 float16 operator+=(
const T& other);
67 float16 operator-(
const T& other)
const;
69 float16 operator-=(
const T& other);
71 float16 operator*(
const T& other)
const;
73 float16 operator*=(
const T& other);
75 float16 operator/(
const T& other)
const;
77 float16 operator/=(
const T& other);
78 operator float()
const;
80 static constexpr
float16 from_bits(uint16_t bits) {
return float16(bits,
true); }
81 uint16_t to_bits()
const;
82 friend std::ostream& operator<<(std::ostream& out,
const float16& obj)
84 out << static_cast<float>(obj);
89 constexpr
float16(uint16_t x,
bool)
109 template <
typename T>
110 bool float16::operator==(
const T& other)
const
112 #if defined(__GNUC__)
113 #pragma GCC diagnostic push
114 #pragma GCC diagnostic ignored "-Wfloat-equal"
116 return (
static_cast<float>(*
this) ==
static_cast<float>(other));
117 #if defined(__GNUC__)
118 #pragma GCC diagnostic pop
122 template <
typename T>
123 bool float16::operator<(
const T& other)
const
125 return (
static_cast<float>(*
this) <
static_cast<float>(other));
128 template <
typename T>
129 bool float16::operator<=(
const T& other)
const
131 return (
static_cast<float>(*
this) <=
static_cast<float>(other));
134 template <
typename T>
135 bool float16::operator>(
const T& other)
const
137 return (
static_cast<float>(*
this) >
static_cast<float>(other));
140 template <
typename T>
141 bool float16::operator>=(
const T& other)
const
143 return (
static_cast<float>(*
this) >=
static_cast<float>(other));
146 template <
typename T>
147 float16 float16::operator+(
const T& other)
const
149 return {
static_cast<float>(*this) +
static_cast<float>(other)};
152 template <
typename T>
153 float16 float16::operator+=(
const T& other)
155 return *
this = *
this + other;
158 template <
typename T>
159 float16 float16::operator-(
const T& other)
const
161 return {
static_cast<float>(*this) -
static_cast<float>(other)};
164 template <
typename T>
165 float16 float16::operator-=(
const T& other)
167 return *
this = *
this - other;
170 template <
typename T>
171 float16 float16::operator*(
const T& other)
const
173 return {
static_cast<float>(*this) *
static_cast<float>(other)};
176 template <
typename T>
177 float16 float16::operator*=(
const T& other)
179 return *
this = *
this * other;
182 template <
typename T>
183 float16 float16::operator/(
const T& other)
const
185 return {
static_cast<float>(*this) /
static_cast<float>(other)};
188 template <
typename T>
189 float16 float16::operator/=(
const T& other)
191 return *
this = *
this / other;
203 static constexpr
bool is_specialized =
true;
206 return ngraph::float16::from_bits(0x0200);
210 return ngraph::float16::from_bits(0x7BFF);
214 return ngraph::float16::from_bits(0xFBFF);
216 static constexpr
int digits = 11;
217 static constexpr
int digits10 = 3;
218 static constexpr
bool is_signed =
true;
219 static constexpr
bool is_integer =
false;
220 static constexpr
bool is_exact =
false;
221 static constexpr
int radix = 2;
224 return ngraph::float16::from_bits(0x1200);
228 return ngraph::float16::from_bits(0x3C00);
230 static constexpr
int min_exponent = -13;
231 static constexpr
int min_exponent10 = -4;
232 static constexpr
int max_exponent = 16;
233 static constexpr
int max_exponent10 = 4;
234 static constexpr
bool has_infinity =
true;
235 static constexpr
bool has_quiet_NaN =
true;
236 static constexpr
bool has_signaling_NaN =
true;
237 static constexpr float_denorm_style has_denorm = denorm_absent;
238 static constexpr
bool has_denorm_loss =
false;
241 return ngraph::float16::from_bits(0x7C00);
245 return ngraph::float16::from_bits(0x7FFF);
249 return ngraph::float16::from_bits(0x7DFF);
253 return ngraph::float16::from_bits(0);
255 static constexpr
bool is_iec559 =
false;
256 static constexpr
bool is_bounded =
false;
257 static constexpr
bool is_modulo =
false;
258 static constexpr
bool traps =
false;
259 static constexpr
bool tinyness_before =
false;
260 static constexpr float_round_style round_style = round_to_nearest;
Definition: float16.hpp:21
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:16
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.