fake_quantize.hpp
1 //*****************************************************************************
2 // Copyright 2017-2021 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //*****************************************************************************
16 
17 #pragma once
18 
19 #include "ngraph/node.hpp"
20 #include "ngraph/op/util/attr_types.hpp"
21 #include "ngraph/op/util/fused_op.hpp"
22 
23 NGRAPH_SUPPRESS_DEPRECATED_START
24 
25 namespace ngraph
26 {
27  namespace op
28  {
29  namespace v0
30  {
31  ///
32  /// \brief Class performing element-wise linear quantization.
33  ///
34  /// \note Input floating point values are quantized into a discrete
35  /// set of floating point values.
36  ///
37  /// \paragraph Implementation This class creates a node which performs the following
38  /// operation:
39  ///
40  /// round((data - input_low) / (input_high - input_low) * (levels-1)) /
41  /// (levels-1) * (output_high - output_low) + output_low
42  ///
43  ///
44  class NGRAPH_API FakeQuantize : public ngraph::op::Op
45  {
46  public:
47  NGRAPH_RTTI_DECLARATION;
48 
49  FakeQuantize();
50  ///
51  /// \brief Constructs a FakeQuantize operation node.
52  ///
53  /// \param[in] data The input data tensor.
54  /// \param[in] input_low The minimum limit for input values.
55  /// \param[in] input_high The maximum limit for input values.
56  /// \param[in] output_low The minimum quantized value.
57  /// \param[in] output_high The maximum quantized value.
58  /// \param[in] levels The number of quantization levels.
59  /// \param[in] auto_broadcast AutoBroadcast mode to be used for broadcasting
60  /// limit values
61  ///
63  const Output<Node>& input_low,
64  const Output<Node>& input_high,
65  const Output<Node>& output_low,
66  const Output<Node>& output_high,
67  std::size_t levels,
68  const AutoBroadcastSpec& auto_broadcast =
69  AutoBroadcastSpec(AutoBroadcastType::NUMPY));
70 
71  bool visit_attributes(AttributeVisitor& visitor) override;
72  virtual void validate_and_infer_types() override;
73 
74  virtual std::shared_ptr<Node>
75  clone_with_new_inputs(const OutputVector& new_args) const override;
76 
77  std::size_t get_levels() const { return m_levels; }
78  void set_levels(std::size_t levels) { m_levels = levels; }
79  const AutoBroadcastSpec& get_auto_broadcast() const { return m_auto_broadcast; }
80  void set_auto_broadcast(const AutoBroadcastSpec& auto_broadcast)
81  {
82  m_auto_broadcast = auto_broadcast;
83  }
84 
85  private:
86  std::size_t m_levels;
87  AutoBroadcastSpec m_auto_broadcast = op::AutoBroadcastType::NUMPY;
88  };
89  }
90  using v0::FakeQuantize;
91  }
92 }
93 
94 NGRAPH_SUPPRESS_DEPRECATED_END
Visits the attributes of a node, primarily for serialization-like tasks.
Definition: attribute_visitor.hpp:71
A handle for one of a node's outputs.
Definition: node_output.hpp:42
Root of all actual ops.
Definition: op.hpp:29
Class performing element-wise linear quantization.
Definition: fake_quantize.hpp:45
FakeQuantize(const Output< Node > &data, const Output< Node > &input_low, const Output< Node > &input_high, const Output< Node > &output_low, const Output< Node > &output_high, std::size_t levels, const AutoBroadcastSpec &auto_broadcast=AutoBroadcastSpec(AutoBroadcastType::NUMPY))
Constructs a FakeQuantize operation node.
virtual void validate_and_infer_types() override
Verifies that attributes and inputs are consistent and computes output shapes and element types....
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:28
Implicit broadcast specification.
Definition: attr_types.hpp:323