float16.hpp
1 //*****************************************************************************
2 // Copyright 2017-2021 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //*****************************************************************************
16 
17 #pragma once
18 
19 #include <cmath>
20 #include <iostream>
21 #include <limits>
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include "ngraph/ngraph_visibility.hpp"
27 
28 #define ROUND_MODE_TO_NEAREST_EVEN
29 
30 namespace ngraph
31 {
32  class NGRAPH_API float16
33  {
34  public:
35  constexpr float16()
36  : m_value{0}
37  {
38  }
39 
40  static uint32_t constexpr frac_size = 10;
41  static uint32_t constexpr exp_size = 5;
42  static uint32_t constexpr exp_bias = 15;
43 
44  float16(uint32_t sign, uint32_t biased_exponent, uint32_t fraction)
45  : m_value((sign & 0x01) << 15 | (biased_exponent & 0x1F) << 10 | (fraction & 0x03FF))
46  {
47  }
48 
49  float16(float value);
50 
51  template <typename I>
52  explicit float16(I value)
53  : m_value{float16{static_cast<float>(value)}.m_value}
54  {
55  }
56 
57  std::string to_string() const;
58  size_t size() const;
59  template <typename T>
60  bool operator==(const T& other) const;
61  template <typename T>
62  bool operator!=(const T& other) const
63  {
64  return !(*this == other);
65  }
66  template <typename T>
67  bool operator<(const T& other) const;
68  template <typename T>
69  bool operator<=(const T& other) const;
70  template <typename T>
71  bool operator>(const T& other) const;
72  template <typename T>
73  bool operator>=(const T& other) const;
74  template <typename T>
75  float16 operator+(const T& other) const;
76  template <typename T>
77  float16 operator+=(const T& other);
78  template <typename T>
79  float16 operator-(const T& other) const;
80  template <typename T>
81  float16 operator-=(const T& other);
82  template <typename T>
83  float16 operator*(const T& other) const;
84  template <typename T>
85  float16 operator*=(const T& other);
86  template <typename T>
87  float16 operator/(const T& other) const;
88  template <typename T>
89  float16 operator/=(const T& other);
90  operator float() const;
91 
92  static constexpr float16 from_bits(uint16_t bits) { return float16(bits, true); }
93  uint16_t to_bits() const;
94  friend std::ostream& operator<<(std::ostream& out, const float16& obj)
95  {
96  out << static_cast<float>(obj);
97  return out;
98  }
99 
100  private:
101  constexpr float16(uint16_t x, bool)
102  : m_value{x}
103  {
104  }
105  union F32 {
106  F32(float val)
107  : f{val}
108  {
109  }
110  F32(uint32_t val)
111  : i{val}
112  {
113  }
114  float f;
115  uint32_t i;
116  };
117 
118  uint16_t m_value;
119  };
120 
121  template <typename T>
122  bool float16::operator==(const T& other) const
123  {
124 #if defined(__GNUC__)
125 #pragma GCC diagnostic push
126 #pragma GCC diagnostic ignored "-Wfloat-equal"
127 #endif
128  return (static_cast<float>(*this) == static_cast<float>(other));
129 #if defined(__GNUC__)
130 #pragma GCC diagnostic pop
131 #endif
132  }
133 
134  template <typename T>
135  bool float16::operator<(const T& other) const
136  {
137  return (static_cast<float>(*this) < static_cast<float>(other));
138  }
139 
140  template <typename T>
141  bool float16::operator<=(const T& other) const
142  {
143  return (static_cast<float>(*this) <= static_cast<float>(other));
144  }
145 
146  template <typename T>
147  bool float16::operator>(const T& other) const
148  {
149  return (static_cast<float>(*this) > static_cast<float>(other));
150  }
151 
152  template <typename T>
153  bool float16::operator>=(const T& other) const
154  {
155  return (static_cast<float>(*this) >= static_cast<float>(other));
156  }
157 
158  template <typename T>
159  float16 float16::operator+(const T& other) const
160  {
161  return {static_cast<float>(*this) + static_cast<float>(other)};
162  }
163 
164  template <typename T>
165  float16 float16::operator+=(const T& other)
166  {
167  return *this = *this + other;
168  }
169 
170  template <typename T>
171  float16 float16::operator-(const T& other) const
172  {
173  return {static_cast<float>(*this) - static_cast<float>(other)};
174  }
175 
176  template <typename T>
177  float16 float16::operator-=(const T& other)
178  {
179  return *this = *this - other;
180  }
181 
182  template <typename T>
183  float16 float16::operator*(const T& other) const
184  {
185  return {static_cast<float>(*this) * static_cast<float>(other)};
186  }
187 
188  template <typename T>
189  float16 float16::operator*=(const T& other)
190  {
191  return *this = *this * other;
192  }
193 
194  template <typename T>
195  float16 float16::operator/(const T& other) const
196  {
197  return {static_cast<float>(*this) / static_cast<float>(other)};
198  }
199 
200  template <typename T>
201  float16 float16::operator/=(const T& other)
202  {
203  return *this = *this / other;
204  }
205 }
206 
207 namespace std
208 {
209  bool NGRAPH_API isnan(ngraph::float16 x);
210 
211  template <>
212  class numeric_limits<ngraph::float16>
213  {
214  public:
215  static constexpr bool is_specialized = true;
216  static constexpr ngraph::float16 min() noexcept
217  {
218  return ngraph::float16::from_bits(0x0200);
219  }
220  static constexpr ngraph::float16 max() noexcept
221  {
222  return ngraph::float16::from_bits(0x7BFF);
223  }
224  static constexpr ngraph::float16 lowest() noexcept
225  {
226  return ngraph::float16::from_bits(0xFBFF);
227  }
228  static constexpr int digits = 11;
229  static constexpr int digits10 = 3;
230  static constexpr bool is_signed = true;
231  static constexpr bool is_integer = false;
232  static constexpr bool is_exact = false;
233  static constexpr int radix = 2;
234  static constexpr ngraph::float16 epsilon() noexcept
235  {
236  return ngraph::float16::from_bits(0x1200);
237  }
238  static constexpr ngraph::float16 round_error() noexcept
239  {
240  return ngraph::float16::from_bits(0x3C00);
241  }
242  static constexpr int min_exponent = -13;
243  static constexpr int min_exponent10 = -4;
244  static constexpr int max_exponent = 16;
245  static constexpr int max_exponent10 = 4;
246  static constexpr bool has_infinity = true;
247  static constexpr bool has_quiet_NaN = true;
248  static constexpr bool has_signaling_NaN = true;
249  static constexpr float_denorm_style has_denorm = denorm_absent;
250  static constexpr bool has_denorm_loss = false;
251  static constexpr ngraph::float16 infinity() noexcept
252  {
253  return ngraph::float16::from_bits(0x7C00);
254  }
255  static constexpr ngraph::float16 quiet_NaN() noexcept
256  {
257  return ngraph::float16::from_bits(0x7FFF);
258  }
259  static constexpr ngraph::float16 signaling_NaN() noexcept
260  {
261  return ngraph::float16::from_bits(0x7DFF);
262  }
263  static constexpr ngraph::float16 denorm_min() noexcept
264  {
265  return ngraph::float16::from_bits(0);
266  }
267  static constexpr bool is_iec559 = false;
268  static constexpr bool is_bounded = false;
269  static constexpr bool is_modulo = false;
270  static constexpr bool traps = false;
271  static constexpr bool tinyness_before = false;
272  static constexpr float_round_style round_style = round_to_nearest;
273  };
274 }
Definition: float16.hpp:33
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:28
PartialShape operator+(const PartialShape &s1, const PartialShape &s2)
Elementwise addition of two PartialShape objects.