float16.hpp
1 // Copyright (C) 2018-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 
5 #pragma once
6 
7 #include <cmath>
8 #include <iostream>
9 #include <limits>
10 #include <memory>
11 #include <string>
12 #include <vector>
13 
14 #include "ngraph/ngraph_visibility.hpp"
15 
16 #define ROUND_MODE_TO_NEAREST_EVEN
17 
18 namespace ngraph
19 {
20  class NGRAPH_API float16
21  {
22  public:
23  constexpr float16()
24  : m_value{0}
25  {
26  }
27 
28  static uint32_t constexpr frac_size = 10;
29  static uint32_t constexpr exp_size = 5;
30  static uint32_t constexpr exp_bias = 15;
31 
32  float16(uint32_t sign, uint32_t biased_exponent, uint32_t fraction)
33  : m_value((sign & 0x01) << 15 | (biased_exponent & 0x1F) << 10 | (fraction & 0x03FF))
34  {
35  }
36 
37  float16(float value);
38 
39  template <typename I>
40  explicit float16(I value)
41  : m_value{float16{static_cast<float>(value)}.m_value}
42  {
43  }
44 
45  std::string to_string() const;
46  size_t size() const;
47  template <typename T>
48  bool operator==(const T& other) const;
49  template <typename T>
50  bool operator!=(const T& other) const
51  {
52  return !(*this == other);
53  }
54  template <typename T>
55  bool operator<(const T& other) const;
56  template <typename T>
57  bool operator<=(const T& other) const;
58  template <typename T>
59  bool operator>(const T& other) const;
60  template <typename T>
61  bool operator>=(const T& other) const;
62  template <typename T>
63  float16 operator+(const T& other) const;
64  template <typename T>
65  float16 operator+=(const T& other);
66  template <typename T>
67  float16 operator-(const T& other) const;
68  template <typename T>
69  float16 operator-=(const T& other);
70  template <typename T>
71  float16 operator*(const T& other) const;
72  template <typename T>
73  float16 operator*=(const T& other);
74  template <typename T>
75  float16 operator/(const T& other) const;
76  template <typename T>
77  float16 operator/=(const T& other);
78  operator float() const;
79 
80  static constexpr float16 from_bits(uint16_t bits) { return float16(bits, true); }
81  uint16_t to_bits() const;
82  friend std::ostream& operator<<(std::ostream& out, const float16& obj)
83  {
84  out << static_cast<float>(obj);
85  return out;
86  }
87 
88  private:
89  constexpr float16(uint16_t x, bool)
90  : m_value{x}
91  {
92  }
93  union F32 {
94  F32(float val)
95  : f{val}
96  {
97  }
98  F32(uint32_t val)
99  : i{val}
100  {
101  }
102  float f;
103  uint32_t i;
104  };
105 
106  uint16_t m_value;
107  };
108 
109  template <typename T>
110  bool float16::operator==(const T& other) const
111  {
112 #if defined(__GNUC__)
113 #pragma GCC diagnostic push
114 #pragma GCC diagnostic ignored "-Wfloat-equal"
115 #endif
116  return (static_cast<float>(*this) == static_cast<float>(other));
117 #if defined(__GNUC__)
118 #pragma GCC diagnostic pop
119 #endif
120  }
121 
122  template <typename T>
123  bool float16::operator<(const T& other) const
124  {
125  return (static_cast<float>(*this) < static_cast<float>(other));
126  }
127 
128  template <typename T>
129  bool float16::operator<=(const T& other) const
130  {
131  return (static_cast<float>(*this) <= static_cast<float>(other));
132  }
133 
134  template <typename T>
135  bool float16::operator>(const T& other) const
136  {
137  return (static_cast<float>(*this) > static_cast<float>(other));
138  }
139 
140  template <typename T>
141  bool float16::operator>=(const T& other) const
142  {
143  return (static_cast<float>(*this) >= static_cast<float>(other));
144  }
145 
146  template <typename T>
147  float16 float16::operator+(const T& other) const
148  {
149  return {static_cast<float>(*this) + static_cast<float>(other)};
150  }
151 
152  template <typename T>
153  float16 float16::operator+=(const T& other)
154  {
155  return *this = *this + other;
156  }
157 
158  template <typename T>
159  float16 float16::operator-(const T& other) const
160  {
161  return {static_cast<float>(*this) - static_cast<float>(other)};
162  }
163 
164  template <typename T>
165  float16 float16::operator-=(const T& other)
166  {
167  return *this = *this - other;
168  }
169 
170  template <typename T>
171  float16 float16::operator*(const T& other) const
172  {
173  return {static_cast<float>(*this) * static_cast<float>(other)};
174  }
175 
176  template <typename T>
177  float16 float16::operator*=(const T& other)
178  {
179  return *this = *this * other;
180  }
181 
182  template <typename T>
183  float16 float16::operator/(const T& other) const
184  {
185  return {static_cast<float>(*this) / static_cast<float>(other)};
186  }
187 
188  template <typename T>
189  float16 float16::operator/=(const T& other)
190  {
191  return *this = *this / other;
192  }
193 } // namespace ngraph
194 
195 namespace std
196 {
197  bool NGRAPH_API isnan(ngraph::float16 x);
198 
199  template <>
200  class numeric_limits<ngraph::float16>
201  {
202  public:
203  static constexpr bool is_specialized = true;
204  static constexpr ngraph::float16 min() noexcept
205  {
206  return ngraph::float16::from_bits(0x0200);
207  }
208  static constexpr ngraph::float16 max() noexcept
209  {
210  return ngraph::float16::from_bits(0x7BFF);
211  }
212  static constexpr ngraph::float16 lowest() noexcept
213  {
214  return ngraph::float16::from_bits(0xFBFF);
215  }
216  static constexpr int digits = 11;
217  static constexpr int digits10 = 3;
218  static constexpr bool is_signed = true;
219  static constexpr bool is_integer = false;
220  static constexpr bool is_exact = false;
221  static constexpr int radix = 2;
222  static constexpr ngraph::float16 epsilon() noexcept
223  {
224  return ngraph::float16::from_bits(0x1200);
225  }
226  static constexpr ngraph::float16 round_error() noexcept
227  {
228  return ngraph::float16::from_bits(0x3C00);
229  }
230  static constexpr int min_exponent = -13;
231  static constexpr int min_exponent10 = -4;
232  static constexpr int max_exponent = 16;
233  static constexpr int max_exponent10 = 4;
234  static constexpr bool has_infinity = true;
235  static constexpr bool has_quiet_NaN = true;
236  static constexpr bool has_signaling_NaN = true;
237  static constexpr float_denorm_style has_denorm = denorm_absent;
238  static constexpr bool has_denorm_loss = false;
239  static constexpr ngraph::float16 infinity() noexcept
240  {
241  return ngraph::float16::from_bits(0x7C00);
242  }
243  static constexpr ngraph::float16 quiet_NaN() noexcept
244  {
245  return ngraph::float16::from_bits(0x7FFF);
246  }
247  static constexpr ngraph::float16 signaling_NaN() noexcept
248  {
249  return ngraph::float16::from_bits(0x7DFF);
250  }
251  static constexpr ngraph::float16 denorm_min() noexcept
252  {
253  return ngraph::float16::from_bits(0);
254  }
255  static constexpr bool is_iec559 = false;
256  static constexpr bool is_bounded = false;
257  static constexpr bool is_modulo = false;
258  static constexpr bool traps = false;
259  static constexpr bool tinyness_before = false;
260  static constexpr float_round_style round_style = round_to_nearest;
261  };
262 } // namespace std
Definition: float16.hpp:21
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:16
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.