fake_quantize.hpp
1 // Copyright (C) 2018-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 
5 #pragma once
6 
7 #include "ngraph/node.hpp"
8 #include "ngraph/op/util/attr_types.hpp"
9 #include "ngraph/op/util/fused_op.hpp"
10 
11 namespace ngraph
12 {
13  namespace op
14  {
15  namespace v0
16  {
17  ///
18  /// \brief Class performing element-wise linear quantization.
19  ///
20  /// \note Input floating point values are quantized into a discrete
21  /// set of floating point values.
22  ///
23  /// \paragraph Implementation This class creates a node which performs the following
24  /// operation:
25  ///
26  /// round((data - input_low) / (input_high - input_low) * (levels-1)) /
27  /// (levels-1) * (output_high - output_low) + output_low
28  ///
29  ///
30  class NGRAPH_API FakeQuantize : public ngraph::op::Op
31  {
32  public:
33  NGRAPH_RTTI_DECLARATION;
34 
35  FakeQuantize();
36  ///
37  /// \brief Constructs a FakeQuantize operation node.
38  ///
39  /// \param[in] data The input data tensor.
40  /// \param[in] input_low The minimum limit for input values.
41  /// \param[in] input_high The maximum limit for input values.
42  /// \param[in] output_low The minimum quantized value.
43  /// \param[in] output_high The maximum quantized value.
44  /// \param[in] levels The number of quantization levels.
45  /// \param[in] auto_broadcast AutoBroadcast mode to be used for broadcasting
46  /// limit values
47  ///
49  const Output<Node>& input_low,
50  const Output<Node>& input_high,
51  const Output<Node>& output_low,
52  const Output<Node>& output_high,
53  std::size_t levels,
54  const AutoBroadcastSpec& auto_broadcast =
55  AutoBroadcastSpec(AutoBroadcastType::NUMPY));
56 
57  bool visit_attributes(AttributeVisitor& visitor) override;
58  virtual void validate_and_infer_types() override;
59 
60  virtual std::shared_ptr<Node>
61  clone_with_new_inputs(const OutputVector& new_args) const override;
62 
63  std::size_t get_levels() const { return m_levels; }
64  void set_levels(std::size_t levels) { m_levels = levels; }
65  const AutoBroadcastSpec& get_auto_broadcast() const { return m_auto_broadcast; }
66  void set_auto_broadcast(const AutoBroadcastSpec& auto_broadcast)
67  {
68  m_auto_broadcast = auto_broadcast;
69  }
70 
71  private:
72  std::size_t m_levels;
73  AutoBroadcastSpec m_auto_broadcast = op::AutoBroadcastType::NUMPY;
74  };
75  } // namespace v0
76  using v0::FakeQuantize;
77  } // namespace op
78 } // namespace ngraph
Visits the attributes of a node, primarily for serialization-like tasks.
Definition: attribute_visitor.hpp:59
A handle for one of a node's outputs.
Definition: node_output.hpp:33
Root of all actual ops.
Definition: op.hpp:17
Class performing element-wise linear quantization.
Definition: fake_quantize.hpp:31
FakeQuantize(const Output< Node > &data, const Output< Node > &input_low, const Output< Node > &input_high, const Output< Node > &output_low, const Output< Node > &output_high, std::size_t levels, const AutoBroadcastSpec &auto_broadcast=AutoBroadcastSpec(AutoBroadcastType::NUMPY))
Constructs a FakeQuantize operation node.
virtual void validate_and_infer_types() override
Verifies that attributes and inputs are consistent and computes output shapes and element types....
The Intel nGraph C++ API.
Definition: attribute_adapter.hpp:16
Implicit broadcast specification.
Definition: attr_types.hpp:311