26 #include "ngraph/ngraph_visibility.hpp"
28 #define ROUND_MODE_TO_NEAREST_EVEN
40 static uint32_t constexpr frac_size = 10;
41 static uint32_t constexpr exp_size = 5;
42 static uint32_t constexpr exp_bias = 15;
44 float16(uint32_t sign, uint32_t biased_exponent, uint32_t fraction)
45 : m_value((sign & 0x01) << 15 | (biased_exponent & 0x1F) << 10 | (fraction & 0x03FF))
53 : m_value{
float16{
static_cast<float>(value)}.m_value}
57 std::string to_string()
const;
60 bool operator==(
const T& other)
const;
62 bool operator!=(
const T& other)
const
64 return !(*
this == other);
67 bool operator<(
const T& other)
const;
69 bool operator<=(
const T& other)
const;
71 bool operator>(
const T& other)
const;
73 bool operator>=(
const T& other)
const;
77 float16 operator+=(
const T& other);
79 float16 operator-(
const T& other)
const;
81 float16 operator-=(
const T& other);
83 float16 operator*(
const T& other)
const;
85 float16 operator*=(
const T& other);
87 float16 operator/(
const T& other)
const;
89 float16 operator/=(
const T& other);
90 operator float()
const;
92 static constexpr
float16 from_bits(uint16_t bits) {
return float16(bits,
true); }
93 uint16_t to_bits()
const;
94 friend std::ostream& operator<<(std::ostream& out,
const float16& obj)
96 out << static_cast<float>(obj);
101 constexpr
float16(uint16_t x,
bool)
121 template <
typename T>
122 bool float16::operator==(
const T& other)
const
124 #if defined(__GNUC__)
125 #pragma GCC diagnostic push
126 #pragma GCC diagnostic ignored "-Wfloat-equal"
128 return (
static_cast<float>(*
this) ==
static_cast<float>(other));
129 #if defined(__GNUC__)
130 #pragma GCC diagnostic pop
134 template <
typename T>
135 bool float16::operator<(
const T& other)
const
137 return (
static_cast<float>(*
this) <
static_cast<float>(other));
140 template <
typename T>
141 bool float16::operator<=(
const T& other)
const
143 return (
static_cast<float>(*
this) <=
static_cast<float>(other));
146 template <
typename T>
147 bool float16::operator>(
const T& other)
const
149 return (
static_cast<float>(*
this) >
static_cast<float>(other));
152 template <
typename T>
153 bool float16::operator>=(
const T& other)
const
155 return (
static_cast<float>(*
this) >=
static_cast<float>(other));
158 template <
typename T>
159 float16 float16::operator+(
const T& other)
const
161 return {
static_cast<float>(*this) +
static_cast<float>(other)};
164 template <
typename T>
165 float16 float16::operator+=(
const T& other)
167 return *
this = *
this + other;
170 template <
typename T>
171 float16 float16::operator-(
const T& other)
const
173 return {
static_cast<float>(*this) -
static_cast<float>(other)};
176 template <
typename T>
177 float16 float16::operator-=(
const T& other)
179 return *
this = *
this - other;
182 template <
typename T>
183 float16 float16::operator*(
const T& other)
const
185 return {
static_cast<float>(*this) *
static_cast<float>(other)};
188 template <
typename T>
189 float16 float16::operator*=(
const T& other)
191 return *
this = *
this * other;
194 template <
typename T>
195 float16 float16::operator/(
const T& other)
const
197 return {
static_cast<float>(*this) /
static_cast<float>(other)};
200 template <
typename T>
201 float16 float16::operator/=(
const T& other)
203 return *
this = *
this / other;
215 static constexpr
bool is_specialized =
true;
218 return ngraph::float16::from_bits(0x0200);
222 return ngraph::float16::from_bits(0x7BFF);
226 return ngraph::float16::from_bits(0xFBFF);
228 static constexpr
int digits = 11;
229 static constexpr
int digits10 = 3;
230 static constexpr
bool is_signed =
true;
231 static constexpr
bool is_integer =
false;
232 static constexpr
bool is_exact =
false;
233 static constexpr
int radix = 2;
236 return ngraph::float16::from_bits(0x1200);
240 return ngraph::float16::from_bits(0x3C00);
242 static constexpr
int min_exponent = -13;
243 static constexpr
int min_exponent10 = -4;
244 static constexpr
int max_exponent = 16;
245 static constexpr
int max_exponent10 = 4;
246 static constexpr
bool has_infinity =
true;
247 static constexpr
bool has_quiet_NaN =
true;
248 static constexpr
bool has_signaling_NaN =
true;
249 static constexpr float_denorm_style has_denorm = denorm_absent;
250 static constexpr
bool has_denorm_loss =
false;
253 return ngraph::float16::from_bits(0x7C00);
257 return ngraph::float16::from_bits(0x7FFF);
261 return ngraph::float16::from_bits(0x7DFF);
265 return ngraph::float16::from_bits(0);
267 static constexpr
bool is_iec559 =
false;
268 static constexpr
bool is_bounded =
false;
269 static constexpr
bool is_modulo =
false;
270 static constexpr
bool traps =
false;
271 static constexpr
bool tinyness_before =
false;
272 static constexpr float_round_style round_style = round_to_nearest;
Definition: float16.hpp:33
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:28
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.