namespace ov::reference

Overview

namespace reference {

// namespaces

namespace ov::reference::adaptive_pool;
namespace ov::reference::detail;
namespace ov::reference::details;
namespace ov::reference::fake_quantize_details;
namespace ov::reference::fft_common;
namespace ov::reference::func;
namespace ov::reference::internal;
namespace ov::reference::interpolate_pil;
namespace ov::reference::iou_rotated;
namespace ov::reference::kernel;
namespace ov::reference::multinomial;
namespace ov::reference::nms_common;

// typedefs

typedef op::v4::Interpolate::NearestMode Nearest_mode;
typedef op::v4::Interpolate::CoordinateTransformMode Transform_mode;
typedef op::v4::Interpolate::InterpolateMode InterpolateMode;
typedef op::v3::ROIAlign::PoolingMode ROIPoolingMode;
typedef op::v9::ROIAlign::AlignedMode AlignedMode;
typedef ov::op::v12::ScatterElementsUpdate::Reduction Reduction;
typedef std::function<void(const std::shared_ptr<Model>&function, const ov::TensorVector&inputs, ov::TensorVector&outputs)> custom_evaluate_function;

// enums

enum
{
    idxLocation,
    idxConfidence,
    idxPriors,
    idxArmConfidence,
    idxArmLocation,
    numInputs,
};

enum CellType;
enum DescriptorType;
enum FFTKind;
enum PSROIPoolingMode;

// structs

struct CellArgs;
template <typename T>
struct RoundingDirectionGuard;
template <typename Index_t, typename Count_t>
struct TensorSlice;
template <typename Index_t, typename Count_t>
struct UniqueElements;
struct convert_types;
template <>
struct widen<double>;
template <typename T>
struct widen;
template <>
struct widen<float>;

// classes

class GetNearestPixel;
class GetOriginalCoordinate;
template <typename T>
class InterpolateEval;
class InterpolateEvalHelper;
template <typename Element>
class Span;
template <typename dataType>
class referenceDetectionOutput;

// global variables

constexpr auto nms_rotated_postprocessing = ov::reference::nms_postprocessing;
const uint32_t crush_resistance_const_lower_value = 0x9E3779B9;
const uint32_t crush_resistance_const_upper_value = 0xBB67AE85;
const uint64_t statistic_maximizing_multiplier_n = 0xD2511F53;
const uint64_t statistic_maximizing_multiplier_counter = 0xCD9E8D57;
const size_t rounds_number = 10;
const uint64_t skip_const = 256;

// global functions

template <class T>
void abs(const T \* in, T \* out, const size_t count);

template <class T>
void acos(const T \* arg, T \* out, const size_t count);

template <class T>
void acosh(const T \* arg, T \* out, const size_t count);

template <typename T>
void adaptive_avg_pool(
    const T \* arg,
    T \* out,
    const Shape& arg_shape,
    const Shape& out_shape
    );

template <typename T, typename IT>
void adaptive_max_pool_1d(
    const T \* arg,
    T \* out,
    IT \* indices,
    size_t h_in,
    size_t h_out
    );

template <typename T, typename IT>
void adaptive_max_pool_2d(
    const T \* arg,
    T \* out,
    IT \* indices,
    size_t h_in,
    size_t h_out,
    size_t w_in,
    size_t w_out
    );

template <typename T, typename IT>
void adaptive_max_pool_3d(
    const T \* arg,
    T \* out,
    IT \* indices,
    size_t d_in,
    size_t d_out,
    size_t h_in,
    size_t h_out,
    size_t w_in,
    size_t w_out
    );

template <typename T, typename IT>
void adaptive_max_pool(
    const T \* arg,
    T \* out,
    IT \* selected_indices,
    const Shape& arg_shape,
    const Shape& out_shape
    );

template <class T>
void add(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const size_t count
    );

template <class T>
void add(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <class T>
void logical_and(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <class T>
void logical_and(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void asin(const T \* arg, T \* out, const size_t count);

template <class T>
void asinh(const T \* arg, T \* out, const size_t count);

template <class T>
void atan(const T \* arg, T \* out, const size_t count);

template <typename X, typename Y, typename Z>
void atan2(
    const X \* py,
    const Y \* px,
    Z \* pout,
    size_t count
    );

template <class T>
void atanh(const T \* arg, T \* out, const size_t count);

template <typename T, typename U, typename Functor>
void autobroadcast_binop(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    Functor elementwise_functor
    );

template <typename T, typename U, typename Functor>
void autobroadcast_select(
    const U \* arg0,
    const T \* arg1,
    const T \* arg2,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& arg2_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    Functor elementwise_functor
    );

template <typename T>
void avg_pool(
    const T \*const arg,
    T \*const out,
    const Shape& arg_shape,
    const Shape& out_shape,
    const Shape& window_shape,
    const Strides& window_movement_strides,
    const Shape& padding_below,
    const Shape& padding_above,
    const bool include_padding_in_avg_computation
    );

template <typename T>
static T norm(T val, T mean, T var, T eps);

template <typename T>
void batch_norm_inference(
    float eps,
    const T \* in,
    const T \* gamma,
    const T \* beta,
    const T \* mean,
    const T \* variance,
    T \* out,
    const Shape& in_shape
    );

template <typename T_IN, typename T_F>
void binary_convolution(
    const T_IN \* in,
    const T_F \* f,
    T_IN \* out,
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const float pad_value
    );

template <
    class T,
    typename std::enable_if<std::is_same<typename std::decay<T>::type, char>::value>::type \* = nullptr
    >
void bitwise_and(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <class T>
void bitwise_not(const T \* in, T \* out, size_t count);

template <
    class T,
    typename std::enable_if<std::is_same<typename std::decay<T>::type, char>::value>::type \* = nullptr
    >
void bitwise_or(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <
    class T,
    typename std::enable_if<std::is_same<typename std::decay<T>::type, char>::value>::type \* = nullptr
    >
void bitwise_xor(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void broadcast(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const Shape& out_shape,
    const AxisSet& broadcast_axes,
    size_t elem_size
    );

template <typename T, typename B, typename P>
void bucketize(
    const T \* data,
    const B \* buckets,
    P \* out,
    const Shape& data_shape,
    const Shape& buckets_shape,
    bool with_right_bound
    );

template <
    class T,
    typename std::enable_if<std::is_integral<T>::value>::type \* = nullptr
    >
void ceiling(
    const T \* arg,
    T \* out,
    const size_t count
    );

template <typename T>
void clamp(
    const T \* arg,
    T \* out,
    const T min,
    const T max,
    const size_t count
    );

void concat(
    const std::vector<const char \*>& args,
    char \* out,
    const std::vector<Shape>& in_shapes,
    const Shape& out_shape,
    int64_t concatenation_axis,
    size_t elem_size
    );

template <typename T>
void constant(const T \* arg0, T \* out, size_t count);

template <typename TI, typename TO>
std::enable_if<!std::is_same<TO, char>::value>::type convert(
    const TI \* arg,
    TO \* out,
    size_t count
    );

size_t count_out_of_f16_range(const float \* arg, size_t count);

void convert_from_f32_to_f16_with_clamp(
    const float \* arg,
    float16 \* out,
    size_t count
    );

template <typename TI, typename TO>
std::enable_if<std::is_same<TO, char>::value>::type convert(
    const TI \* arg,
    TO \* out,
    size_t count
    );

template <typename T>
std::tuple<T, T, T> yuv_pixel_to_rgb(
    float y_val,
    float u_val,
    float v_val
    );

template <typename T>
void color_convert_nv12(
    const T \* arg_y,
    const T \* arg_uv,
    T \* out_ptr,
    size_t batch_size,
    size_t image_h,
    size_t image_w,
    size_t stride_y,
    size_t stride_uv,
    ov::op::util::ConvertColorNV12Base::ColorConversion color_format
    );

template <typename T>
void color_convert_i420(
    const T \* arg_y,
    const T \* arg_u,
    const T \* arg_v,
    T \* out_ptr,
    size_t batch_size,
    size_t image_h,
    size_t image_w,
    size_t stride_y,
    size_t stride_uv,
    ov::op::util::ConvertColorI420Base::ColorConversion color_format
    );

template <ov::element::Type_t T>
bool color_convert_nv12(
    const std::shared_ptr<Node>& op,
    ov::TensorVector& outputs,
    const ov::TensorVector& inputs,
    ov::op::util::ConvertColorNV12Base::ColorConversion type
    );

template <ov::element::Type_t T>
bool color_convert_i420(
    const std::shared_ptr<Node>& op,
    ov::TensorVector& outputs,
    const ov::TensorVector& inputs,
    ov::op::util::ConvertColorI420Base::ColorConversion type
    );

template <typename T>
void convolution(
    const T \* in,
    const T \* f,
    T \* out,
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

template <typename T>
void convolution_backprop_in(
    const T \* delta_in,
    const T \* filter,
    T \* delta_out,
    const Shape& in_shape,
    const Shape& filter_shape,
    const Shape& out_shape,
    const Strides& in_dilation,
    const Strides& filter_dilation,
    const CoordinateDiff& forward_in_pad_bellow,
    const CoordinateDiff& forward_in_pad_above,
    const Strides& stride,
    const CoordinateDiff& output_padding
    );

template <typename T>
void copy(const T \* arg, T \* out, size_t count);

template <class T>
void cos(const T \* arg, T \* out, const size_t count);

template <class T>
void cosh(const T \* arg, T \* out, size_t count);

template <typename T>
void ctc_greedy_decoder(
    const T \* data,
    const T \* sequence_masks,
    T \* out,
    const Shape& data_shape,
    const Shape& sequence_masks_shape,
    const Shape& out_shape,
    const bool ctc_merge_repeated
    );

template <typename TF, typename TI, typename TCI, typename TSL>
void ctc_greedy_decoder_seq_len(
    const TF \* data,
    const TI \* sequence_length,
    const TI \* blank_index,
    TCI \* out1,
    TSL \* out2,
    const Shape& data_shape,
    const Shape& out_shape,
    const bool ctc_merge_repeated
    );

template <typename T, typename U>
void CTCLoss(
    const T \* logits,
    const Shape& logitsShape,
    const U \* logitsLength,
    const U \* labels,
    const U \* labelsLength,
    const U \* blankIndexP,
    const bool preprocessCollapseRepeated,
    const bool ctcMergeRepeated,
    const bool unique,
    T \* output
    );

template <typename T, typename P>
void cumsum(
    const T \* arg,
    const P axis,
    T \* out,
    const Shape& tensor_shape,
    const bool exclusive,
    const bool reverse
    );

template <typename T>
void deformable_convolution(
    const T \* in,
    const T \* offsets,
    const T \* filters,
    const T \* mask,
    T \* out,
    const Shape& in_shape,
    const Shape& o_shape,
    const Shape& f_shape,
    const Shape& m_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const int64_t groups,
    const int64_t deformable_groups,
    const bool bilinear_interpolation_pad
    );

template <typename T>
void deformable_convolution(
    const T \* in,
    const T \* offsets,
    const T \* filters,
    T \* out,
    const Shape& in_shape,
    const Shape& o_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const int64_t groups,
    const int64_t deformable_groups,
    const bool bilinear_interpolation_pad = false
    );

template <typename T>
void deformable_psroi_pooling(
    const T \* data_input,
    const Shape& data_input_shape,
    const T \* rois_input,
    const Shape& rois_input_shape,
    const T \* offsets_input,
    const Shape& offsets_input_shape,
    T \* output,
    const Shape& output_shape,
    const std::string& mode_str,
    const float spatial_scale,
    const int64_t spatial_bins_x,
    const int64_t spatial_bins_y,
    const float trans_std,
    const int64_t part_size
    );

void depth_to_space(
    const char \*const in,
    const Shape& in_shape,
    char \*const out,
    const Shape& out_shape,
    const size_t block_size,
    const op::v0::DepthToSpace::DepthToSpaceMode mode,
    const size_t elem_size
    );

template <typename T>
std::enable_if<std::is_integral<T>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count,
    bool pythondiv
    );

template <typename T>
std::enable_if<std::is_integral<T>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    bool pythondiv
    );

template <typename T>
std::enable_if<std::is_floating_point<T>::value||std::is_same<T, bfloat16>::value||std::is_same<T, float16>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count,
    bool pythondiv
    );

template <typename T>
std::enable_if<std::is_floating_point<T>::value||std::is_same<T, bfloat16>::value||std::is_same<T, float16>::value>::type divide(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec,
    bool pythondiv
    );

void einsum(
    ov::TensorVector& outputs,
    const ov::TensorVector& inputs,
    const std::string& equation
    );

template <typename T>
void elu(
    const T \* arg,
    T \* out,
    size_t count,
    double alpha
    );

template <typename T, typename U>
void embeddingBagOffsetsSum(
    const T \* emb_table,
    const U \* indices,
    const U \* offsets,
    const U \* default_index,
    const T \* weights,
    T \* out,
    const size_t indices_count,
    const Shape& outShape
    );

template <typename T, typename U>
void embeddingBagPackedSum(
    const T \* emb_table,
    const U \* indices,
    const T \* weights,
    T \* out,
    const Shape& indicesShape,
    const Shape& outShape
    );

template <typename T, typename U>
void embeddingSegmentsSum(
    const T \* embTable,
    const U \* indices,
    const U \* segmentIds,
    const U \* defaultIndex,
    const T \* weights,
    T \* out,
    const Shape& embTableShape,
    const Shape& indicesShape,
    const Shape& outShape
    );

template <typename T>
void equal(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <
    typename T,
    typename U,
    typename std::enable_if<std::is_integral<T>::value>::type \* = nullptr
    >
void equal(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <
    typename T,
    typename std::enable_if<!std::is_integral<T>::value, bool>::type = true
    >
void erf(
    const T \* arg,
    T \* out,
    size_t count
    );

template <typename T>
void exp(const T \* arg, T \* out, size_t count);

void experimental_detectron_detection_output(
    const float \* input_rois,
    const float \* input_deltas,
    const float \* input_scores,
    const float \* input_im_info,
    const op::v6::ExperimentalDetectronDetectionOutput::Attributes& attrs,
    float \* output_boxes,
    float \* output_scores,
    int32_t \* output_classes
    );

void experimental_detectron_detection_output_postprocessing(
    void \* pboxes,
    void \* pclasses,
    void \* pscores,
    const element::Type output_type,
    const std::vector<float>& output_boxes,
    const std::vector<int32_t>& output_classes,
    const std::vector<float>& output_scores,
    const Shape& output_boxes_shape,
    const Shape& output_classes_shape,
    const Shape& output_scores_shape
    );

template <typename T>
void experimental_detectron_prior_grid_generator(
    const T \* priors,
    const Shape& priors_shape,
    const Shape& feature_map_shape,
    const Shape& im_data_shape,
    T \* output_rois,
    int64_t grid_h,
    int64_t grid_w,
    float stride_h,
    float stride_w
    );

void experimental_detectron_proposals_single_image(
    const float \* im_info,
    const float \* anchors,
    const float \* deltas,
    const float \* scores,
    const op::v6::ExperimentalDetectronGenerateProposalsSingleImage::Attributes& attrs,
    const Shape& im_info_shape,
    const Shape& anchors_shape,
    const Shape& deltas_shape,
    const Shape& scores_shape,
    float \* output_rois,
    float \* output_scores
    );

void experimental_detectron_proposals_single_image_postprocessing(
    void \* prois,
    void \* pscores,
    const element::Type output_type,
    const std::vector<float>& output_rois,
    const std::vector<float>& output_scores,
    const Shape& output_rois_shape,
    const Shape& output_scores_shape
    );

void experimental_detectron_roi_feature_extractor(
    const std::vector<std::vector<float>>& inputs,
    const std::vector<Shape>& input_shapes,
    const op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes& attrs,
    float \* output_rois_features,
    float \* output_rois
    );

void experimental_detectron_roi_feature_extractor_postprocessing(
    void \* prois_features,
    void \* prois,
    const element::Type output_type,
    const std::vector<float>& output_roi_features,
    const std::vector<float>& output_rois,
    const Shape& output_roi_features_shape,
    const Shape& output_rois_shape
    );

template <typename T>
void experimental_detectron_topk_rois(
    const T \* input_rois,
    const T \* input_probs,
    const Shape& input_rois_shape,
    const Shape& input_probs_shape,
    size_t max_rois,
    T \* output_rois
    );

template <typename T>
void extract_image_patches(
    const std::shared_ptr<op::v3::ExtractImagePatches> extImgPatches,
    const T \* input,
    T \* out,
    const Shape& inShape,
    const Shape& outShape
    );

template <typename T>
void eye(
    T \* data,
    const Shape& out_shape,
    const int64_t diagonal_index
    );

template <typename T>
void fake_quantize(
    const T \* arg,
    const T \* in_low,
    const T \* in_high,
    const T \* out_low,
    const T \* out_high,
    T \* out,
    const Shape& arg_shape,
    const Shape& in_low_shape,
    const Shape& in_high_shape,
    const Shape& out_low_shape,
    const Shape& out_high_shape,
    size_t levels,
    const op::AutoBroadcastSpec& broadcast
    );

void fft(
    const float \* input_data,
    const Shape& input_data_shape,
    const int64_t \* axes_data,
    const Shape& axes_data_shape,
    float \* fft_result,
    const Shape& output_shape,
    FFTKind fft_kind
    );

void fft_postprocessing(
    ov::TensorVector& outputs,
    const ov::element::Type output_type,
    const std::vector<float>& fft_result
    );

std::vector<int64_t> canonicalize_axes(
    const int64_t \* axes_data,
    const Shape& axes_data_shape,
    int64_t complex_data_rank
    );

template <typename T>
void floor(const T \* arg, T \* out, size_t count);

template <typename T>
void floor_mod(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void function(
    const std::shared_ptr<Model>& function,
    const ov::TensorVector& inputs,
    ov::TensorVector& outputs
    );

template <typename T, typename U>
void gather(
    const T \*const data,
    const U \*const indices,
    T \* out,
    const Shape& data_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    size_t axis,
    size_t batch_dims = 0
    );

template <typename T, typename U>
void gather_elements(
    const T \* data,
    const U \* indices,
    T \* out,
    const Shape& data_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    int64_t axis
    );

template <typename T, typename U>
void gather_nd(
    const T \*const params,
    const U \*const indices,
    T \*const out,
    const Shape& params_shape,
    const Shape& indices_shape,
    const Shape& out_shape,
    const int batch_dims = 0
    );

void gather_tree(
    const char \* step_ids,
    const char \* parent_ids,
    const char \* max_seq_len,
    const char \* end_token,
    char \* out,
    const Shape& step_ids_shape,
    const Shape& parent_ids_shape,
    const Shape& max_seq_len_shape,
    const Shape& end_token_shape,
    const element::Type& type
    );

template <typename T>
void gelu(
    const T \* arg,
    T \* out,
    op::GeluApproximationMode mode,
    size_t count
    );

void generate_proposals(
    const std::vector<float>& im_info,
    const std::vector<float>& anchors,
    const std::vector<float>& deltas,
    const std::vector<float>& scores,
    const op::v9::GenerateProposals::Attributes& attrs,
    const Shape& im_info_shape,
    const Shape& anchors_shape,
    const Shape& deltas_shape,
    const Shape& scores_shape,
    std::vector<float>& output_rois,
    std::vector<float>& output_scores,
    std::vector<int64_t>& num_rois
    );

void generate_proposals_postprocessing(
    void \* prois,
    void \* pscores,
    void \* proi_num,
    const element::Type& output_type,
    const element::Type& roi_num_type,
    const std::vector<float>& output_rois,
    const std::vector<float>& output_scores,
    const std::vector<int64_t>& num_rois,
    const Shape& output_rois_shape,
    const Shape& output_scores_shape
    );

template <typename T>
void greater(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void greater(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void greater_eq(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    size_t count
    );

template <typename T, typename U>
void greater_eq(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename DATA_ET, typename GRID_ET>
void grid_sample(
    DATA_ET \* output,
    const DATA_ET \* data,
    const GRID_ET \* grid,
    const Shape& data_shape,
    const Shape& grid_shape,
    const bool align_corners,
    const ov::op::v9::GridSample::InterpolationMode interpolation_mode,
    const ov::op::v9::GridSample::PaddingMode padding_mode
    );

template <typename T>
void grn(
    const T \* data,
    T \* out,
    float bias,
    const Shape& data_shape
    );

void validate_group_convolution_parameters(
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

template <
    typename INPUT,
    typename FILTER,
    typename OUTPUT,
    typename ACCU = typename widen<OUTPUT>::type
    >
void group_convolution(
    const INPUT \* in,
    const FILTER \* f,
    OUTPUT \* out,
    const Shape& in_shape,
    const Shape& filter_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

void infer_backward_conv_output_shape(
    const Shape& in_spatial_shape,
    const Shape& f_spatial_shape,
    Shape& out_spatial_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

void validate_convolution_backprop_data_parameters(
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

void validate_group_convolution_backprop_data_parameters(
    const Shape& in_shape,
    const Shape& f_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilations,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end
    );

template <typename T>
void group_convolution_backprop_data(
    const T \* in,
    const T \* f,
    T \* out,
    const Shape& in_shape,
    const Shape& filter_shape,
    const Shape& out_shape,
    const Strides& strides,
    const Strides& dilation,
    const CoordinateDiff& pads_begin,
    const CoordinateDiff& pads_end,
    const CoordinateDiff& output_padding
    );

template <typename T>
void group_normalization(
    const T \*const data,
    const T \*const scale,
    const T \*const bias,
    T \*const out,
    const Shape& data_shape,
    const size_t num_groups,
    const double epsilon
    );

template <typename T>
void gru_cell(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    T \* dst_data,
    const std::string& activation_f,
    const std::string& activation_g,
    float clip,
    bool linear_before_reset,
    const T \* A = nullptr
    );

template <typename T>
void hard_sigmoid(
    const T \* arg,
    const T alpha,
    const T beta,
    T \* out,
    size_t count
    );

template <typename T>
void hsigmoid(const T \* arg, T \* out, size_t count);

template <typename T>
void hswish(const T \* arg, T \* out, size_t count);

void if_reference(
    const std::vector<std::shared_ptr<Model>>& body,
    const std::vector<op::util::MultiSubGraphOp::MultiSubgraphOutputDescriptionVector>& out_descs,
    const std::vector<op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector>& input_descs,
    ov::TensorVector& out,
    const ov::TensorVector& args
    );

void pad_input_data(
    const uint8_t \* data_ptr,
    uint8_t \* padded_data_ptr,
    size_t type_size,
    const ov::Shape& input_shape,
    const ov::Shape& padded_input_shape,
    const std::vector<size_t>& pads_begin
    );

PartialShape get_padded_input_shape(
    const PartialShape& input_shape,
    const op::v0::Interpolate::Attributes& attrs
    );

std::vector<float> get_scales(
    const PartialShape& input_data_partial_shape,
    const Shape& out_shape,
    const op::v0::Interpolate::Attributes& attrs
    );

op::v4::Interpolate::InterpolateAttrs transform_v0_to_v4(
    const PartialShape& input_partial_shape,
    const op::v0::Interpolate::Attributes& attrs_v0
    );

template <typename T>
void interpolate(
    const T \* input_data,
    const Shape& input_data_shape,
    const std::vector<float>& scales,
    const std::vector<int64_t>& axes,
    T \* out,
    const Shape& out_shape,
    const op::v4::Interpolate::InterpolateAttrs& attrs
    );

template <typename T>
void interpolate(
    T \* input_data,
    const PartialShape& input_data_shape,
    T \* out,
    const Shape& out_shape,
    const op::v0::Interpolate::Attributes& attrs
    );

void irdft(
    const std::vector<float>& input_data,
    const Shape& input_data_shape,
    const std::vector<int64_t>& axes_data,
    float \* irdft_result,
    const Shape& fft_output_shape,
    const Shape& irdft_output_shape,
    const int64_t last_signal_size
    );

template <typename T, typename U>
std::enable_if<std::is_floating_point<T>::value, void>::type is_finite(
    const T \* input,
    U \* output,
    size_t count
    );

template <typename T, typename U>
std::enable_if<std::is_class<T>::value, void>::type is_finite(
    const T \* input,
    U \* output,
    size_t count
    );

template <typename T, typename U>
std::enable_if<std::is_floating_point<T>::value, void>::type is_inf(
    const T \* input,
    U \* output,
    size_t count,
    const ov::op::v10::IsInf::Attributes& attributes
    );

template <typename T, typename U>
std::enable_if<std::is_class<T>::value, void>::type is_inf(
    const T \* input,
    U \* output,
    size_t count,
    const ov::op::v10::IsInf::Attributes& attributes
    );

template <typename T, typename U>
std::enable_if<std::is_floating_point<T>::value, void>::type is_nan(
    const T \* input,
    U \* output,
    size_t count
    );

template <typename T, typename U>
std::enable_if<std::is_class<T>::value, void>::type is_nan(
    const T \* input,
    U \* output,
    size_t count
    );

template <typename T>
void less(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    const size_t count
    );

template <typename T, typename U>
void less(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void less_eq(
    const T \* arg0,
    const T \* arg1,
    char \* out,
    const size_t count
    );

template <typename T, typename U>
void less_eq(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void log(const T \* arg, T \* out, size_t count);

template <typename T>
void log_softmax(
    const T \* arg,
    T \* out,
    const Shape& shape,
    const AxisSet& axes
    );

template <class T>
void logical_not(
    const T \* arg,
    T \* out,
    const size_t count
    );

static void reduce_logical_and(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

static void reduce_logical_or(
    const char \* arg,
    char \* out,
    const Shape& in_shape,
    const AxisSet& reduction_axes
    );

void loop(
    const std::shared_ptr<Model>& body,
    const op::util::OutputDescriptionVector& out_descs,
    const op::util::InputDescriptionVector& input_descs,
    const op::v5::Loop::SpecialBodyPorts& special_ports,
    ov::TensorVector& out,
    const ov::TensorVector& args
    );

static size_t point_to_flat_idx(
    const Shape& shape,
    const std::vector<size_t>& point
    );

static std::vector<size_t> slice_indices(
    const Shape& full_shape,
    const std::vector<size_t>& begin,
    const Shape& slice_shape
    );

template <typename T>
static T sum_region_across_axes(
    const T \* arg,
    const std::vector<size_t>& indices
    );

template <typename T>
void lrn(
    const T \* arg,
    const AxisSet& axes,
    T \* out,
    const Shape& arg_shape,
    double dalpha,
    double dbeta,
    double dbias,
    size_t size
    );

template <typename T>
void lstm_cell(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* C,
    const Shape& C_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    T \* out_Ht,
    T \* out_Ct,
    const std::string& activation_f,
    const std::string& activation_g,
    const std::string& activation_h,
    float clip
    );

template <typename T>
void lstm_cell_v1(
    const T \* X,
    const Shape& X_shape,
    const T \* H,
    const Shape& H_shape,
    const T \* C,
    const Shape& C_shape,
    const T \* W,
    const Shape& W_shape,
    const T \* R,
    const Shape& R_shape,
    const T \* B,
    const Shape& B_shape,
    const T \* P,
    const Shape& P_shape,
    T \* out_Ht,
    T \* out_Ct,
    const std::string& activation_f,
    const std::string& activation_g,
    const std::string& activation_h,
    float clip,
    const ov::op::LSTMWeightsFormat weight_format,
    bool input_forget
    );

template <typename T>
void matmul(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const Shape& out_shape,
    bool transpose_arg0,
    bool transpose_arg1
    );

void matrix_nms(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    const op::v8::MatrixNms::Attributes& attrs,
    float \* selected_outputs,
    const Shape& selected_outputs_shape,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    int64_t \* valid_outputs
    );

template <typename Values_t, typename Indices_t>
void max_pool(
    const Values_t \* data,
    Values_t \* values,
    Indices_t \* indices,
    const Shape& data_shape,
    const Shape& out_shape,
    const Shape& kernel,
    const Strides& strides,
    const Strides& dilations,
    const Shape& pads_begin,
    const Shape& pads_end,
    const int64_t axis = 0
    );

template <typename Value_t>
void max_pool(
    const Value_t \* data,
    Value_t \* values,
    const Shape& data_shape,
    const Shape& out_shape,
    const Shape& kernel,
    const Strides& strides,
    const Shape& pads_begin,
    const Shape& pads_end
    );

template <typename T>
void maximum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void maximum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void minimum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void minimum(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void mish(const T \* arg, T \* out, size_t count);

template <class InputIt, class OutputIt>
void mod(
    InputIt arg0,
    InputIt arg1,
    OutputIt out,
    const Shape& arg_shape0,
    const Shape& arg_shape1,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void multiclass_nms(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    const int64_t \* roisnum_data,
    const Shape& roisnum_data_shape,
    const op::util::MulticlassNmsBase::Attributes& attrs,
    float \* selected_outputs,
    const Shape& selected_outputs_shape,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    int64_t \* valid_outputs
    );

template <typename T>
void multiply(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void multiply(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void mvn(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    const bool normalize_variance,
    const AxisSet& reduction_axes,
    const double eps
    );

template <typename T>
void mvn_6(
    const T \* arg,
    T \* out,
    const Shape& in_shape,
    AxisSet reduction_axes,
    bool normalize_variance,
    double eps,
    op::MVNEpsMode eps_mode
    );

template <typename T>
AxisSet mvn_6_reduction_axes(
    const ov::Tensor& axes_input,
    size_t rank
    );

template <typename T>
void negate(const T \* arg, T \* out, const size_t count);

void nms_rotated(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    int64_t max_output_boxes_per_class,
    float iou_threshold,
    float score_threshold,
    float soft_nms_sigma,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    float \* selected_scores,
    const Shape& selected_scores_shape,
    int64_t \* valid_outputs,
    bool sort_result_descending,
    bool clockwise = true
    );

void non_max_suppression5(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    int64_t max_output_boxes_per_class,
    float iou_threshold,
    float score_threshold,
    float soft_nms_sigma,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    float \* selected_scores,
    const Shape& selected_scores_shape,
    int64_t \* valid_outputs,
    const bool sort_result_descending
    );

void nms5_postprocessing(
    ov::TensorVector& outputs,
    const element::Type output_type,
    const std::vector<int64_t>& selected_indices,
    const std::vector<float>& selected_scores,
    int64_t valid_outputs,
    const element::Type selected_scores_type
    );

void non_max_suppression(
    const float \* boxes_data,
    const Shape& boxes_data_shape,
    const float \* scores_data,
    const Shape& scores_data_shape,
    int64_t max_output_boxes_per_class,
    float iou_threshold,
    float score_threshold,
    float soft_nms_sigma,
    int64_t \* selected_indices,
    const Shape& selected_indices_shape,
    float \* selected_scores,
    const Shape& selected_scores_shape,
    int64_t \* valid_outputs,
    const bool sort_result_descending
    );

void nms_postprocessing(
    ov::TensorVector& outputs,
    const element::Type output_type,
    const std::vector<int64_t>& selected_indices,
    const std::vector<float>& selected_scores,
    int64_t valid_outputs,
    const element::Type selected_scores_type
    );

template <typename T>
size_t non_zero_get_count(
    const T \* arg,
    const Shape& arg_shape
    );

template <typename T, typename U>
void non_zero(
    const T \* arg,
    U \* out,
    const Shape& arg_shape
    );

template <typename T>
void normalize_l2(
    const T \* data,
    T \* out,
    const Shape& data_shape,
    const AxisSet& reduction_axes,
    float eps,
    op::EpsMode eps_mode
    );

template <typename T, typename U>
void not_equal(
    const T \* arg0,
    const T \* arg1,
    U \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename INPUT_TYPE>
void one_hot(
    const INPUT_TYPE \* indices,
    const Shape& indices_shape,
    char \* out,
    const size_t out_elem_size,
    const size_t depth,
    const int64_t one_hot_axis,
    const char \* on_value,
    const char \* off_value
    );

template <class T>
void logical_or(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const size_t count
    );

template <class T>
void logical_or(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

void pad(
    const char \* data,
    const char \* pad_value,
    char \* out,
    const size_t elem_size,
    const Shape& data_shape,
    const Shape& out_shape,
    const CoordinateDiff& padding_below,
    const CoordinateDiff& padding_above,
    const op::PadMode pad_mode
    );

template <typename T>
void power(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    size_t count
    );

template <typename T>
void power(
    const T \* arg0,
    const T \* arg1,
    T \* out,
    const Shape& arg0_shape,
    const Shape& arg1_shape,
    const op::AutoBroadcastSpec& broadcast_spec
    );

template <typename T>
void prelu(
    const T \* arg,
    const T \* slope,
    T \* out,
    const Shape& arg_shape,
    const Shape& slope_shape
    );

static float clip_great(float x, float threshold);
static float clip_less(float x, float threshold);

template <typename T>
void prior_box(
    const T \* data,
    const T \* img,
    float \* dst_data,
    const Shape& out_shape,
    const op::v8::PriorBox::Attributes& attrs
    );

template <typename T>
void prior_box_clustered(
    const T \* data,
    const T \* img,
    float \* dst_data,
    const Shape& out_shape,
    const op::v0::PriorBoxClustered::Attributes& attrs
    );

template <typename T>
void proposal_v0(
    const T \* class_probs,
    const T \* bbox_deltas,
    const T \* image_shape,
    T \* output,
    const Shape& class_probs_shape,
    const Shape& bbox_deltas_shape,
    const Shape& image_shape_shape,
    const Shape& output_shape,
    const op::v0::Proposal::Attributes& attrs
    );

template <typename T>
void proposal_v4(
    const T \* class_probs,
    const T \* bbox_deltas,
    const T \* image_shape,
    T \* output,
    T \* out_probs,
    const Shape& class_probs_shape,
    const Shape& bbox_deltas_shape,
    const Shape& image_shape_shape,
    const Shape& output_shape,
    const Shape& out_probs_shape,
    const op::v0::Proposal::Attributes& attrs
    );

template <typename T>
void psroi_pooling(
    const T \* input,
    const Shape& input_shape,
    const T \* rois,
    const Shape& rois_shape,
    T \* output,
    const Shape& output_shape,
    const std::string& mode_str,
    float spatial_scale,
    int spatial_bins_x,
    int spatial_bins_y
    );