14 #include "ngraph/ngraph_visibility.hpp"
16 #define ROUND_MODE_TO_NEAREST_EVEN
30 #if defined ROUND_MODE_TO_NEAREST
31 round_to_nearest(value)
32 #elif defined ROUND_MODE_TO_NEAREST_EVEN
33 round_to_nearest_even(value)
34 #elif defined ROUND_MODE_TRUNCATE
38 "ROUNDING_MODE must be one of ROUND_MODE_TO_NEAREST, ROUND_MODE_TO_NEAREST_EVEN, or ROUND_MODE_TRUNCATE"
46 : m_value{
bfloat16{
static_cast<float>(value)}.m_value}
50 std::string to_string()
const;
53 bool operator==(
const T& other)
const;
55 bool operator!=(
const T& other)
const
57 return !(*
this == other);
60 bool operator<(
const T& other)
const;
62 bool operator<=(
const T& other)
const;
64 bool operator>(
const T& other)
const;
66 bool operator>=(
const T& other)
const;
72 bfloat16 operator-(
const T& other)
const;
76 bfloat16 operator*(
const T& other)
const;
80 bfloat16 operator/(
const T& other)
const;
83 operator float()
const;
85 static std::vector<float> to_float_vector(
const std::vector<bfloat16>&);
86 static std::vector<bfloat16> from_float_vector(
const std::vector<float>&);
87 static constexpr
bfloat16 from_bits(uint16_t bits) {
return bfloat16(bits,
true); }
88 uint16_t to_bits()
const;
89 friend std::ostream& operator<<(std::ostream& out,
const bfloat16& obj)
91 out << static_cast<float>(obj);
95 #define cu32(x) (F32(x).i)
97 static uint16_t round_to_nearest_even(
float x)
99 return static_cast<uint16_t
>((cu32(x) + ((cu32(x) & 0x00010000) >> 1)) >> 16);
102 static uint16_t round_to_nearest(
float x)
104 return static_cast<uint16_t
>((cu32(x) + 0x8000) >> 16);
107 static uint16_t truncate(
float x) {
return static_cast<uint16_t
>((cu32(x)) >> 16); }
110 constexpr
bfloat16(uint16_t x,
bool)
130 template <
typename T>
131 bool bfloat16::operator==(
const T& other)
const
133 #if defined(__GNUC__)
134 #pragma GCC diagnostic push
135 #pragma GCC diagnostic ignored "-Wfloat-equal"
137 return (
static_cast<float>(*
this) ==
static_cast<float>(other));
138 #if defined(__GNUC__)
139 #pragma GCC diagnostic pop
143 template <
typename T>
144 bool bfloat16::operator<(
const T& other)
const
146 return (
static_cast<float>(*
this) <
static_cast<float>(other));
149 template <
typename T>
150 bool bfloat16::operator<=(
const T& other)
const
152 return (
static_cast<float>(*
this) <=
static_cast<float>(other));
155 template <
typename T>
156 bool bfloat16::operator>(
const T& other)
const
158 return (
static_cast<float>(*
this) >
static_cast<float>(other));
161 template <
typename T>
162 bool bfloat16::operator>=(
const T& other)
const
164 return (
static_cast<float>(*
this) >=
static_cast<float>(other));
167 template <
typename T>
168 bfloat16 bfloat16::operator+(
const T& other)
const
170 return {
static_cast<float>(*this) +
static_cast<float>(other)};
173 template <
typename T>
174 bfloat16 bfloat16::operator+=(
const T& other)
176 return *
this = *
this + other;
179 template <
typename T>
180 bfloat16 bfloat16::operator-(
const T& other)
const
182 return {
static_cast<float>(*this) -
static_cast<float>(other)};
185 template <
typename T>
186 bfloat16 bfloat16::operator-=(
const T& other)
188 return *
this = *
this - other;
191 template <
typename T>
192 bfloat16 bfloat16::operator*(
const T& other)
const
194 return {
static_cast<float>(*this) *
static_cast<float>(other)};
197 template <
typename T>
198 bfloat16 bfloat16::operator*=(
const T& other)
200 return *
this = *
this * other;
203 template <
typename T>
204 bfloat16 bfloat16::operator/(
const T& other)
const
206 return {
static_cast<float>(*this) /
static_cast<float>(other)};
209 template <
typename T>
210 bfloat16 bfloat16::operator/=(
const T& other)
212 return *
this = *
this / other;
222 static constexpr
bool is_specialized =
true;
225 return ngraph::bfloat16::from_bits(0x007F);
229 return ngraph::bfloat16::from_bits(0x7F7F);
233 return ngraph::bfloat16::from_bits(0xFF7F);
235 static constexpr
int digits = 7;
236 static constexpr
int digits10 = 2;
237 static constexpr
bool is_signed =
true;
238 static constexpr
bool is_integer =
false;
239 static constexpr
bool is_exact =
false;
240 static constexpr
int radix = 2;
243 return ngraph::bfloat16::from_bits(0x3C00);
247 return ngraph::bfloat16::from_bits(0x3F00);
249 static constexpr
int min_exponent = -125;
250 static constexpr
int min_exponent10 = -37;
251 static constexpr
int max_exponent = 128;
252 static constexpr
int max_exponent10 = 38;
253 static constexpr
bool has_infinity =
true;
254 static constexpr
bool has_quiet_NaN =
true;
255 static constexpr
bool has_signaling_NaN =
true;
256 static constexpr float_denorm_style has_denorm = denorm_absent;
257 static constexpr
bool has_denorm_loss =
false;
260 return ngraph::bfloat16::from_bits(0x7F80);
264 return ngraph::bfloat16::from_bits(0x7FC0);
268 return ngraph::bfloat16::from_bits(0x7FC0);
272 return ngraph::bfloat16::from_bits(0);
274 static constexpr
bool is_iec559 =
false;
275 static constexpr
bool is_bounded =
false;
276 static constexpr
bool is_modulo =
false;
277 static constexpr
bool traps =
false;
278 static constexpr
bool tinyness_before =
false;
279 static constexpr float_round_style round_style = round_to_nearest;
Definition: bfloat16.hpp:21
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:16
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.