26 #include "ngraph/ngraph_visibility.hpp"
28 #define ROUND_MODE_TO_NEAREST_EVEN
42 #if defined ROUND_MODE_TO_NEAREST
43 round_to_nearest(value)
44 #elif defined ROUND_MODE_TO_NEAREST_EVEN
45 round_to_nearest_even(value)
46 #elif defined ROUND_MODE_TRUNCATE
50 "ROUNDING_MODE must be one of ROUND_MODE_TO_NEAREST, ROUND_MODE_TO_NEAREST_EVEN, or ROUND_MODE_TRUNCATE"
58 : m_value{
bfloat16{
static_cast<float>(value)}.m_value}
62 std::string to_string()
const;
65 bool operator==(
const T& other)
const;
67 bool operator!=(
const T& other)
const
69 return !(*
this == other);
72 bool operator<(
const T& other)
const;
74 bool operator<=(
const T& other)
const;
76 bool operator>(
const T& other)
const;
78 bool operator>=(
const T& other)
const;
84 bfloat16 operator-(
const T& other)
const;
88 bfloat16 operator*(
const T& other)
const;
92 bfloat16 operator/(
const T& other)
const;
95 operator float()
const;
97 static std::vector<float> to_float_vector(
const std::vector<bfloat16>&);
98 static std::vector<bfloat16> from_float_vector(
const std::vector<float>&);
99 static constexpr
bfloat16 from_bits(uint16_t bits) {
return bfloat16(bits,
true); }
100 uint16_t to_bits()
const;
101 friend std::ostream& operator<<(std::ostream& out,
const bfloat16& obj)
103 out << static_cast<float>(obj);
107 #define cu32(x) (F32(x).i)
109 static uint16_t round_to_nearest_even(
float x)
111 return static_cast<uint16_t
>((cu32(x) + ((cu32(x) & 0x00010000) >> 1)) >> 16);
114 static uint16_t round_to_nearest(
float x)
116 return static_cast<uint16_t
>((cu32(x) + 0x8000) >> 16);
119 static uint16_t truncate(
float x) {
return static_cast<uint16_t
>((cu32(x)) >> 16); }
121 constexpr
bfloat16(uint16_t x,
bool)
141 template <
typename T>
142 bool bfloat16::operator==(
const T& other)
const
144 #if defined(__GNUC__)
145 #pragma GCC diagnostic push
146 #pragma GCC diagnostic ignored "-Wfloat-equal"
148 return (
static_cast<float>(*
this) ==
static_cast<float>(other));
149 #if defined(__GNUC__)
150 #pragma GCC diagnostic pop
154 template <
typename T>
155 bool bfloat16::operator<(
const T& other)
const
157 return (
static_cast<float>(*
this) <
static_cast<float>(other));
160 template <
typename T>
161 bool bfloat16::operator<=(
const T& other)
const
163 return (
static_cast<float>(*
this) <=
static_cast<float>(other));
166 template <
typename T>
167 bool bfloat16::operator>(
const T& other)
const
169 return (
static_cast<float>(*
this) >
static_cast<float>(other));
172 template <
typename T>
173 bool bfloat16::operator>=(
const T& other)
const
175 return (
static_cast<float>(*
this) >=
static_cast<float>(other));
178 template <
typename T>
179 bfloat16 bfloat16::operator+(
const T& other)
const
181 return {
static_cast<float>(*this) +
static_cast<float>(other)};
184 template <
typename T>
185 bfloat16 bfloat16::operator+=(
const T& other)
187 return *
this = *
this + other;
190 template <
typename T>
191 bfloat16 bfloat16::operator-(
const T& other)
const
193 return {
static_cast<float>(*this) -
static_cast<float>(other)};
196 template <
typename T>
197 bfloat16 bfloat16::operator-=(
const T& other)
199 return *
this = *
this - other;
202 template <
typename T>
203 bfloat16 bfloat16::operator*(
const T& other)
const
205 return {
static_cast<float>(*this) *
static_cast<float>(other)};
208 template <
typename T>
209 bfloat16 bfloat16::operator*=(
const T& other)
211 return *
this = *
this * other;
214 template <
typename T>
215 bfloat16 bfloat16::operator/(
const T& other)
const
217 return {
static_cast<float>(*this) /
static_cast<float>(other)};
220 template <
typename T>
221 bfloat16 bfloat16::operator/=(
const T& other)
223 return *
this = *
this / other;
233 static constexpr
bool is_specialized =
true;
236 return ngraph::bfloat16::from_bits(0x007F);
240 return ngraph::bfloat16::from_bits(0x7F7F);
244 return ngraph::bfloat16::from_bits(0xFF7F);
246 static constexpr
int digits = 7;
247 static constexpr
int digits10 = 2;
248 static constexpr
bool is_signed =
true;
249 static constexpr
bool is_integer =
false;
250 static constexpr
bool is_exact =
false;
251 static constexpr
int radix = 2;
254 return ngraph::bfloat16::from_bits(0x3C00);
258 return ngraph::bfloat16::from_bits(0x3F00);
260 static constexpr
int min_exponent = -125;
261 static constexpr
int min_exponent10 = -37;
262 static constexpr
int max_exponent = 128;
263 static constexpr
int max_exponent10 = 38;
264 static constexpr
bool has_infinity =
true;
265 static constexpr
bool has_quiet_NaN =
true;
266 static constexpr
bool has_signaling_NaN =
true;
267 static constexpr float_denorm_style has_denorm = denorm_absent;
268 static constexpr
bool has_denorm_loss =
false;
271 return ngraph::bfloat16::from_bits(0x7F80);
275 return ngraph::bfloat16::from_bits(0x7FC0);
279 return ngraph::bfloat16::from_bits(0x7FC0);
283 return ngraph::bfloat16::from_bits(0);
285 static constexpr
bool is_iec559 =
false;
286 static constexpr
bool is_bounded =
false;
287 static constexpr
bool is_modulo =
false;
288 static constexpr
bool traps =
false;
289 static constexpr
bool tinyness_before =
false;
290 static constexpr float_round_style round_style = round_to_nearest;
Definition: bfloat16.hpp:33
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:28
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.