bfloat16.hpp
1 // Copyright (C) 2018-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 
5 #pragma once
6 
7 #include <cmath>
8 #include <iostream>
9 #include <limits>
10 #include <memory>
11 #include <string>
12 #include <vector>
13 
14 #include "ngraph/ngraph_visibility.hpp"
15 
16 #define ROUND_MODE_TO_NEAREST_EVEN
17 
18 namespace ngraph
19 {
20  class NGRAPH_API bfloat16
21  {
22  public:
23  constexpr bfloat16()
24  : m_value{0}
25  {
26  }
27  bfloat16(float value)
28  : m_value
29  {
30 #if defined ROUND_MODE_TO_NEAREST
31  round_to_nearest(value)
32 #elif defined ROUND_MODE_TO_NEAREST_EVEN
33  round_to_nearest_even(value)
34 #elif defined ROUND_MODE_TRUNCATE
35  truncate(value)
36 #else
37 #error \
38  "ROUNDING_MODE must be one of ROUND_MODE_TO_NEAREST, ROUND_MODE_TO_NEAREST_EVEN, or ROUND_MODE_TRUNCATE"
39 #endif
40  }
41  {
42  }
43 
44  template <typename I>
45  explicit bfloat16(I value)
46  : m_value{bfloat16{static_cast<float>(value)}.m_value}
47  {
48  }
49 
50  std::string to_string() const;
51  size_t size() const;
52  template <typename T>
53  bool operator==(const T& other) const;
54  template <typename T>
55  bool operator!=(const T& other) const
56  {
57  return !(*this == other);
58  }
59  template <typename T>
60  bool operator<(const T& other) const;
61  template <typename T>
62  bool operator<=(const T& other) const;
63  template <typename T>
64  bool operator>(const T& other) const;
65  template <typename T>
66  bool operator>=(const T& other) const;
67  template <typename T>
68  bfloat16 operator+(const T& other) const;
69  template <typename T>
70  bfloat16 operator+=(const T& other);
71  template <typename T>
72  bfloat16 operator-(const T& other) const;
73  template <typename T>
74  bfloat16 operator-=(const T& other);
75  template <typename T>
76  bfloat16 operator*(const T& other) const;
77  template <typename T>
78  bfloat16 operator*=(const T& other);
79  template <typename T>
80  bfloat16 operator/(const T& other) const;
81  template <typename T>
82  bfloat16 operator/=(const T& other);
83  operator float() const;
84 
85  static std::vector<float> to_float_vector(const std::vector<bfloat16>&);
86  static std::vector<bfloat16> from_float_vector(const std::vector<float>&);
87  static constexpr bfloat16 from_bits(uint16_t bits) { return bfloat16(bits, true); }
88  uint16_t to_bits() const;
89  friend std::ostream& operator<<(std::ostream& out, const bfloat16& obj)
90  {
91  out << static_cast<float>(obj);
92  return out;
93  }
94 
95 #define cu32(x) (F32(x).i)
96 
97  static uint16_t round_to_nearest_even(float x)
98  {
99  return static_cast<uint16_t>((cu32(x) + ((cu32(x) & 0x00010000) >> 1)) >> 16);
100  }
101 
102  static uint16_t round_to_nearest(float x)
103  {
104  return static_cast<uint16_t>((cu32(x) + 0x8000) >> 16);
105  }
106 
107  static uint16_t truncate(float x) { return static_cast<uint16_t>((cu32(x)) >> 16); }
108 
109  private:
110  constexpr bfloat16(uint16_t x, bool)
111  : m_value{x}
112  {
113  }
114  union F32 {
115  F32(float val)
116  : f{val}
117  {
118  }
119  F32(uint32_t val)
120  : i{val}
121  {
122  }
123  float f;
124  uint32_t i;
125  };
126 
127  uint16_t m_value;
128  };
129 
130  template <typename T>
131  bool bfloat16::operator==(const T& other) const
132  {
133 #if defined(__GNUC__)
134 #pragma GCC diagnostic push
135 #pragma GCC diagnostic ignored "-Wfloat-equal"
136 #endif
137  return (static_cast<float>(*this) == static_cast<float>(other));
138 #if defined(__GNUC__)
139 #pragma GCC diagnostic pop
140 #endif
141  }
142 
143  template <typename T>
144  bool bfloat16::operator<(const T& other) const
145  {
146  return (static_cast<float>(*this) < static_cast<float>(other));
147  }
148 
149  template <typename T>
150  bool bfloat16::operator<=(const T& other) const
151  {
152  return (static_cast<float>(*this) <= static_cast<float>(other));
153  }
154 
155  template <typename T>
156  bool bfloat16::operator>(const T& other) const
157  {
158  return (static_cast<float>(*this) > static_cast<float>(other));
159  }
160 
161  template <typename T>
162  bool bfloat16::operator>=(const T& other) const
163  {
164  return (static_cast<float>(*this) >= static_cast<float>(other));
165  }
166 
167  template <typename T>
168  bfloat16 bfloat16::operator+(const T& other) const
169  {
170  return {static_cast<float>(*this) + static_cast<float>(other)};
171  }
172 
173  template <typename T>
174  bfloat16 bfloat16::operator+=(const T& other)
175  {
176  return *this = *this + other;
177  }
178 
179  template <typename T>
180  bfloat16 bfloat16::operator-(const T& other) const
181  {
182  return {static_cast<float>(*this) - static_cast<float>(other)};
183  }
184 
185  template <typename T>
186  bfloat16 bfloat16::operator-=(const T& other)
187  {
188  return *this = *this - other;
189  }
190 
191  template <typename T>
192  bfloat16 bfloat16::operator*(const T& other) const
193  {
194  return {static_cast<float>(*this) * static_cast<float>(other)};
195  }
196 
197  template <typename T>
198  bfloat16 bfloat16::operator*=(const T& other)
199  {
200  return *this = *this * other;
201  }
202 
203  template <typename T>
204  bfloat16 bfloat16::operator/(const T& other) const
205  {
206  return {static_cast<float>(*this) / static_cast<float>(other)};
207  }
208 
209  template <typename T>
210  bfloat16 bfloat16::operator/=(const T& other)
211  {
212  return *this = *this / other;
213  }
214 } // namespace ngraph
215 
216 namespace std
217 {
218  template <>
219  class numeric_limits<ngraph::bfloat16>
220  {
221  public:
222  static constexpr bool is_specialized = true;
223  static constexpr ngraph::bfloat16 min() noexcept
224  {
225  return ngraph::bfloat16::from_bits(0x007F);
226  }
227  static constexpr ngraph::bfloat16 max() noexcept
228  {
229  return ngraph::bfloat16::from_bits(0x7F7F);
230  }
231  static constexpr ngraph::bfloat16 lowest() noexcept
232  {
233  return ngraph::bfloat16::from_bits(0xFF7F);
234  }
235  static constexpr int digits = 7;
236  static constexpr int digits10 = 2;
237  static constexpr bool is_signed = true;
238  static constexpr bool is_integer = false;
239  static constexpr bool is_exact = false;
240  static constexpr int radix = 2;
241  static constexpr ngraph::bfloat16 epsilon() noexcept
242  {
243  return ngraph::bfloat16::from_bits(0x3C00);
244  }
245  static constexpr ngraph::bfloat16 round_error() noexcept
246  {
247  return ngraph::bfloat16::from_bits(0x3F00);
248  }
249  static constexpr int min_exponent = -125;
250  static constexpr int min_exponent10 = -37;
251  static constexpr int max_exponent = 128;
252  static constexpr int max_exponent10 = 38;
253  static constexpr bool has_infinity = true;
254  static constexpr bool has_quiet_NaN = true;
255  static constexpr bool has_signaling_NaN = true;
256  static constexpr float_denorm_style has_denorm = denorm_absent;
257  static constexpr bool has_denorm_loss = false;
258  static constexpr ngraph::bfloat16 infinity() noexcept
259  {
260  return ngraph::bfloat16::from_bits(0x7F80);
261  }
262  static constexpr ngraph::bfloat16 quiet_NaN() noexcept
263  {
264  return ngraph::bfloat16::from_bits(0x7FC0);
265  }
266  static constexpr ngraph::bfloat16 signaling_NaN() noexcept
267  {
268  return ngraph::bfloat16::from_bits(0x7FC0);
269  }
270  static constexpr ngraph::bfloat16 denorm_min() noexcept
271  {
272  return ngraph::bfloat16::from_bits(0);
273  }
274  static constexpr bool is_iec559 = false;
275  static constexpr bool is_bounded = false;
276  static constexpr bool is_modulo = false;
277  static constexpr bool traps = false;
278  static constexpr bool tinyness_before = false;
279  static constexpr float_round_style round_style = round_to_nearest;
280  };
281 } // namespace std
Definition: bfloat16.hpp:21
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:16
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.