namespace InferenceEngine::gapi::kernels

namespace kernels {

// namespaces

namespace InferenceEngine::gapi::kernels::areaDownscale32f;
namespace InferenceEngine::gapi::kernels::areaDownscale8u;
namespace InferenceEngine::gapi::kernels::areaUpscale;
namespace InferenceEngine::gapi::kernels::areaUpscale32f;
namespace InferenceEngine::gapi::kernels::avx;
namespace InferenceEngine::gapi::kernels::avx512;
namespace InferenceEngine::gapi::kernels::linear;
namespace InferenceEngine::gapi::kernels::linear32f;
namespace InferenceEngine::gapi::kernels::neon;

// typedefs

typedef std::integral_constant<int, 3> C3;
typedef std::integral_constant<int, 4> C4;
typedef typelist<scalar_tag> isas_set;
typedef typename head<typelist>::type head_t;
typedef typename concat<left_typelsist, right_typelsist>::type concat_t;
typedef typename std::is_same<T, U>::type is_same_t;
typedef typename if_c_impl<C, T, E>::type if_c;
typedef typename if_c_impl<C::value !=0, T, E>::type if_;
typedef typename remove<typelist, type>::type remove_t;
typedef uint16_t Q0_16;
typedef uint16_t Q8_8;
typedef uint8_t U8;
typedef MapperUnit<float, int> MapperUnit32F;
typedef MapperUnit<Q0_16, short> MapperUnit8U;
typedef typename vector_type_of<isa_tag_t, scalar_t>::type vector_type_of_t;

// structs

template <typename A, typename I, typename W>
struct AreaDownMapper;
template <typename F, typename I>
struct MapperUnit;
struct avx2_tag;
struct avx512_tag;
template <
    template<typename ...> class left_list,
    typename ... left_types,
    template<typename ...> class right_list,
    typename ... right_types
    >
struct concat<left_list<left_types...>, right_list<right_types...>>;
template <typename left_typelsist, typename right_typelsist>
struct concat;
template <>
struct cv_type_to_depth<std::int8_t>;
template <>
struct cv_type_to_depth<std::uint8_t>;
template <>
struct cv_type_to_depth<std::uint16_t>;
template <>
struct cv_type_to_depth<std::int32_t>;
template <>
struct cv_type_to_depth<fp_16_t>;
template <>
struct cv_type_to_depth<float>;
template <typename type>
struct cv_type_to_depth;
template <>
struct cv_type_to_depth<std::int16_t>;
struct fp_16_t;
template <
    template<typename ...> class list,
    typename head_t,
    typename ... types
    >
struct head<list<head_t, types...>>;
template <typename type_list>
struct head;
struct is_isa_present;
template <typename T, typename Mapper, int chanNum>
struct linearScratchDesc;
struct neon_tag;
template <typename typelist, typename type>
struct remove;
template <
    template<typename ...> class list,
    typename head_t,
    typename ... types,
    typename t
    >
struct remove<list<head_t, types...>, t>;
template <template<typename ...> class list, typename t>
struct remove<list<>, t>;
struct scalar_tag;
struct sse42_tag;
template <typename type>
struct type_to_type;
template <typename ... types>
struct typelist;
template <typename isa_tag_t, typename scalar_t>
struct vector_type_of;
template <typename isa_tag_t>
struct vector_type_of<isa_tag_t, float>;
template <typename isa_tag_t>
struct vector_type_of<isa_tag_t, uint8_t>;

// global variables

static constexpr static const int ONE = 1<<15;
static const int ITUR_BT_601_CY = 1220542;
static const int ITUR_BT_601_CUB = 2116026;
static const int ITUR_BT_601_CUG = -409993;
static const int ITUR_BT_601_CVG = -852492;
static const int ITUR_BT_601_CVR = 1673527;
static const int ITUR_BT_601_SHIFT = 20;

// global functions

bool calcRowLinear8UC3C4Impl< neon_tag, 3 >(
    neon_tag,
    std::array<std::array<uint8_t \*, 4>, 3>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC3C4Impl< neon_tag, 4 >(
    neon_tag,
    std::array<std::array<uint8_t \*, 4>, 4>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC1Impl(
    neon_tag,
    uint8_t \* dst[],
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

template void chanToPlaneRowImpl(
    neon_tag,
    const uint8_t \* in,
    int chan,
    int chs,
    uint8_t \* out,
    const int length
    );

template void chanToPlaneRowImpl(
    neon_tag,
    const float \* in,
    int chan,
    int chs,
    float \* out,
    const int length
    );

template void nv12ToRgbRowImpl(
    neon_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* uv_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void i420ToRgbRowImpl(
    neon_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* u_row,
    const uint8_t \* v_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void splitRowImpl< neon_tag, uint8_t, 2 >(
    neon_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 2>& outs,
    const int length
    );

template void splitRowImpl< neon_tag, float, 2 >(
    neon_tag,
    const float \* in,
    std::array<float \*, 2>& outs,
    const int length
    );

template void splitRowImpl< neon_tag, uint8_t, 3 >(
    neon_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 3>& outs,
    const int length
    );

template void splitRowImpl< neon_tag, float, 3 >(
    neon_tag,
    const float \* in,
    std::array<float \*, 3>& outs,
    const int length
    );

template void splitRowImpl< neon_tag, uint8_t, 4 >(
    neon_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 4>& outs,
    const int length
    );

template void splitRowImpl< neon_tag, float, 4 >(
    neon_tag,
    const float \* in,
    std::array<float \*, 4>& outs,
    const int length
    );

template void mergeRowImpl< neon_tag, uint8_t, 2 >(
    neon_tag,
    const std::array<const uint8_t \*, 2>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< neon_tag, float, 2 >(
    neon_tag,
    const std::array<const float \*, 2>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< neon_tag, uint8_t, 3 >(
    neon_tag,
    const std::array<const uint8_t \*, 3>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< neon_tag, float, 3 >(
    neon_tag,
    const std::array<const float \*, 3>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< neon_tag, uint8_t, 4 >(
    neon_tag,
    const std::array<const uint8_t \*, 4>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< neon_tag, float, 4 >(
    neon_tag,
    const std::array<const float \*, 4>& ins,
    float \* out,
    const int length
    );

template void calcRowLinear32FC1Impl(
    neon_tag,
    float \* dst[],
    const float \* src0[],
    const float \* src1[],
    const float alpha[],
    const int mapsx[],
    const float beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template void calcRowAreaImpl< neon_tag, uint8_t, Q0_16, short, Q8_8 >(
    neon_tag,
    uint8_t dst[],
    const uint8_t \* src[],
    const Size& inSz,
    const Size& outSz,
    Q0_16 yalpha,
    const MapperUnit8U& ymap,
    int xmaxdf,
    const short xindex[],
    const Q0_16 xalpha[],
    Q8_8 vbuf[]
    );

template void calcRowAreaImpl< neon_tag, float, float, int, float >(
    neon_tag,
    float dst[],
    const float \* src[],
    const Size& inSz,
    const Size& outSz,
    float yalpha,
    const MapperUnit32F& ymap,
    int xmaxdf,
    const int xindex[],
    const float xalpha[],
    float vbuf[]
    );

template <typename isa_tag_t, typename T>
void chanToPlaneRowImpl(
    isa_tag_t,
    const T \* in,
    const int chan,
    const int chs,
    T \* out,
    const int length
    );

template <typename isa_tag_t>
void nv12ToRgbRowImpl(
    isa_tag_t,
    const uint8_t \*\* y_rows,
    const uint8_t \* uv_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template <typename isa_tag_t>
void i420ToRgbRowImpl(
    isa_tag_t,
    const uint8_t \*\* y_rows,
    const uint8_t \* u_row,
    const uint8_t \* v_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template <typename isa_tag_t, typename T, int chs>
void splitRowImpl(
    isa_tag_t,
    const T \* in,
    std::array<T \*, chs>& outs,
    const int length
    );

template <typename isa_tag_t, typename T, int chs>
void mergeRowImpl(
    isa_tag_t,
    const std::array<const T \*, chs>& ins,
    T \* out,
    const int length
    );

template <typename isa_tag_t>
bool calcRowLinear8UC1Impl(
    isa_tag_t,
    uint8_t \* dst[],
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template <typename isa_tag_t>
void calcRowLinear32FC1Impl(
    isa_tag_t,
    float \* dst[],
    const float \* src0[],
    const float \* src1[],
    const float alpha[],
    const int mapsx[],
    const float beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template <typename isa_tag_t, int chs>
bool calcRowLinear8UC3C4Impl(
    isa_tag_t,
    std::array<std::array<uint8_t \*, 4>, chs>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template <typename isa_tag_t, typename T, typename A, typename I, typename W>
void calcRowAreaImpl(
    isa_tag_t,
    T dst[],
    const T \* src[],
    const Size& inSz,
    const Size& outSz,
    A yalpha,
    const MapperUnit<A, I>& ymap,
    int xmaxdf,
    const I xindex[],
    const A xalpha[],
    W vbuf[]
    );

bool calcRowLinear8UC3C4Impl< avx2_tag, 3 >(
    avx2_tag,
    std::array<std::array<uint8_t \*, 4>, 3>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC3C4Impl< avx2_tag, 4 >(
    avx2_tag,
    std::array<std::array<uint8_t \*, 4>, 4>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC1Impl(
    avx2_tag,
    uint8_t \* dst[],
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

template void chanToPlaneRowImpl(
    avx2_tag,
    const uint8_t \* in,
    const int chan,
    const int chs,
    uint8_t \* out,
    const int length
    );

template void chanToPlaneRowImpl(
    avx2_tag,
    const float \* in,
    const int chan,
    const int chs,
    float \* out,
    const int length
    );

template void nv12ToRgbRowImpl(
    avx2_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* uv_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void i420ToRgbRowImpl(
    avx2_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* u_row,
    const uint8_t \* v_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void splitRowImpl< avx2_tag, uint8_t, 2 >(
    avx2_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 2>& outs,
    const int length
    );

template void splitRowImpl< avx2_tag, float, 2 >(
    avx2_tag,
    const float \* in,
    std::array<float \*, 2>& outs,
    const int length
    );

template void splitRowImpl< avx2_tag, uint8_t, 3 >(
    avx2_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 3>& outs,
    const int length
    );

template void splitRowImpl< avx2_tag, float, 3 >(
    avx2_tag,
    const float \* in,
    std::array<float \*, 3>& outs,
    const int length
    );

template void splitRowImpl< avx2_tag, uint8_t, 4 >(
    avx2_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 4>& outs,
    const int length
    );

template void splitRowImpl< avx2_tag, float, 4 >(
    avx2_tag,
    const float \* in,
    std::array<float \*, 4>& outs,
    const int length
    );

template void mergeRowImpl< avx2_tag, uint8_t, 2 >(
    avx2_tag,
    const std::array<const uint8_t \*, 2>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< avx2_tag, float, 2 >(
    avx2_tag,
    const std::array<const float \*, 2>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< avx2_tag, uint8_t, 3 >(
    avx2_tag,
    const std::array<const uint8_t \*, 3>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< avx2_tag, float, 3 >(
    avx2_tag,
    const std::array<const float \*, 3>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< avx2_tag, uint8_t, 4 >(
    avx2_tag,
    const std::array<const uint8_t \*, 4>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< avx2_tag, float, 4 >(
    avx2_tag,
    const std::array<const float \*, 4>& ins,
    float \* out,
    const int length
    );

template void calcRowLinear32FC1Impl(
    avx2_tag,
    float \* dst[],
    const float \* src0[],
    const float \* src1[],
    const float alpha[],
    const int mapsx[],
    const float beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template void calcRowAreaImpl< avx2_tag, uint8_t, Q0_16, short, Q8_8 >(
    avx2_tag,
    uint8_t dst[],
    const uint8_t \* src[],
    const Size& inSz,
    const Size& outSz,
    Q0_16 yalpha,
    const MapperUnit8U& ymap,
    int xmaxdf,
    const short xindex[],
    const Q0_16 xalpha[],
    Q8_8 vbuf[]
    );

template void calcRowAreaImpl< avx2_tag, float, float, int, float >(
    avx2_tag,
    float dst[],
    const float \* src[],
    const Size& inSz,
    const Size& outSz,
    float yalpha,
    const MapperUnit32F& ymap,
    int xmaxdf,
    const int xindex[],
    const float xalpha[],
    float vbuf[]
    );

bool calcRowLinear8UC3C4Impl< avx512_tag, 3 >(
    avx512_tag,
    std::array<std::array<uint8_t \*, 4>, 3>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC3C4Impl< avx512_tag, 4 >(
    avx512_tag,
    std::array<std::array<uint8_t \*, 4>, 4>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC1Impl(
    avx512_tag,
    uint8_t \* dst[],
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

template void chanToPlaneRowImpl(
    avx512_tag,
    const uint8_t \* in,
    const int chan,
    const int chs,
    uint8_t \* out,
    const int length
    );

template void chanToPlaneRowImpl(
    avx512_tag,
    const float \* in,
    const int chan,
    const int chs,
    float \* out,
    const int length
    );

template void nv12ToRgbRowImpl(
    avx512_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* uv_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void i420ToRgbRowImpl(
    avx512_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* u_row,
    const uint8_t \* v_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void splitRowImpl< avx512_tag, uint8_t, 2 >(
    avx512_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 2>& outs,
    const int length
    );

template void splitRowImpl< avx512_tag, float, 2 >(
    avx512_tag,
    const float \* in,
    std::array<float \*, 2>& outs,
    const int length
    );

template void splitRowImpl< avx512_tag, uint8_t, 3 >(
    avx512_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 3>& outs,
    const int length
    );

template void splitRowImpl< avx512_tag, float, 3 >(
    avx512_tag,
    const float \* in,
    std::array<float \*, 3>& outs,
    const int length
    );

template void splitRowImpl< avx512_tag, uint8_t, 4 >(
    avx512_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 4>& outs,
    const int length
    );

template void splitRowImpl< avx512_tag, float, 4 >(
    avx512_tag,
    const float \* in,
    std::array<float \*, 4>& outs,
    const int length
    );

template void mergeRowImpl< avx512_tag, uint8_t, 2 >(
    avx512_tag,
    const std::array<const uint8_t \*, 2>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< avx512_tag, float, 2 >(
    avx512_tag,
    const std::array<const float \*, 2>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< avx512_tag, uint8_t, 3 >(
    avx512_tag,
    const std::array<const uint8_t \*, 3>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< avx512_tag, float, 3 >(
    avx512_tag,
    const std::array<const float \*, 3>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< avx512_tag, uint8_t, 4 >(
    avx512_tag,
    const std::array<const uint8_t \*, 4>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< avx512_tag, float, 4 >(
    avx512_tag,
    const std::array<const float \*, 4>& ins,
    float \* out,
    const int length
    );

template void calcRowLinear32FC1Impl(
    avx512_tag,
    float \* dst[],
    const float \* src0[],
    const float \* src1[],
    const float alpha[],
    const int mapsx[],
    const float beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template void calcRowAreaImpl< avx512_tag, uint8_t, Q0_16, short, Q8_8 >(
    avx512_tag,
    uint8_t dst[],
    const uint8_t \* src[],
    const Size& inSz,
    const Size& outSz,
    Q0_16 yalpha,
    const MapperUnit8U& ymap,
    int xmaxdf,
    const short xindex[],
    const Q0_16 xalpha[],
    Q8_8 vbuf[]
    );

template void calcRowAreaImpl< avx512_tag, float, float, int, float >(
    avx512_tag,
    float dst[],
    const float \* src[],
    const Size& inSz,
    const Size& outSz,
    float yalpha,
    const MapperUnit32F& ymap,
    int xmaxdf,
    const int xindex[],
    const float xalpha[],
    float vbuf[]
    );

bool calcRowLinear8UC1Impl(
    sse42_tag,
    uint8_t \* dst[],
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

template <int chanNum>
CV_ALWAYS_INLINE bool calcRowLinear_8UC_Impl_(
    sse42_tag,
    std::array<std::array<uint8_t \*, 4>, chanNum>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi
    );

bool calcRowLinear8UC3C4Impl< sse42_tag, 3 >(
    sse42_tag,
    std::array<std::array<uint8_t \*, 4>, 3>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

bool calcRowLinear8UC3C4Impl< sse42_tag, 4 >(
    sse42_tag,
    std::array<std::array<uint8_t \*, 4>, 4>& dst,
    const uint8_t \* src0[],
    const uint8_t \* src1[],
    const short alpha[],
    const short clone[],
    const short mapsx[],
    const short beta[],
    uint8_t tmp[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int
    );

template void chanToPlaneRowImpl(
    sse42_tag,
    const uint8_t \* in,
    const int chan,
    const int chs,
    uint8_t \* out,
    const int length
    );

template void chanToPlaneRowImpl(
    sse42_tag,
    const float \* in,
    const int chan,
    const int chs,
    float \* out,
    const int length
    );

template void nv12ToRgbRowImpl(
    sse42_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* uv_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void i420ToRgbRowImpl(
    sse42_tag,
    const uint8_t \*\* y_rows,
    const uint8_t \* u_row,
    const uint8_t \* v_row,
    uint8_t \*\* out_rows,
    const int buf_width
    );

template void splitRowImpl< sse42_tag, uchar, 2 >(
    sse42_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 2>& outs,
    const int length
    );

template void splitRowImpl< sse42_tag, float, 2 >(
    sse42_tag,
    const float \* in,
    std::array<float \*, 2>& outs,
    const int length
    );

template void splitRowImpl< sse42_tag, uchar, 3 >(
    sse42_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 3>& outs,
    const int length
    );

template void splitRowImpl< sse42_tag, float, 3 >(
    sse42_tag,
    const float \* in,
    std::array<float \*, 3>& outs,
    const int length
    );

template void splitRowImpl< sse42_tag, uchar, 4 >(
    sse42_tag,
    const uint8_t \* in,
    std::array<uint8_t \*, 4>& outs,
    const int length
    );

template void splitRowImpl< sse42_tag, float, 4 >(
    sse42_tag,
    const float \* in,
    std::array<float \*, 4>& outs,
    const int length
    );

template void mergeRowImpl< sse42_tag, uchar, 2 >(
    sse42_tag,
    const std::array<const uint8_t \*, 2>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< sse42_tag, float, 2 >(
    sse42_tag,
    const std::array<const float \*, 2>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< sse42_tag, uchar, 3 >(
    sse42_tag,
    const std::array<const uint8_t \*, 3>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< sse42_tag, float, 3 >(
    sse42_tag,
    const std::array<const float \*, 3>& ins,
    float \* out,
    const int length
    );

template void mergeRowImpl< sse42_tag, uchar, 4 >(
    sse42_tag,
    const std::array<const uint8_t \*, 4>& ins,
    uint8_t \* out,
    const int length
    );

template void mergeRowImpl< sse42_tag, float, 4 >(
    sse42_tag,
    const std::array<const float \*, 4>& ins,
    float \* out,
    const int length
    );

template void calcRowLinear32FC1Impl(
    sse42_tag,
    float \* dst[],
    const float \* src0[],
    const float \* src1[],
    const float alpha[],
    const int mapsx[],
    const float beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template void calcRowAreaImpl< sse42_tag, uint8_t, Q0_16, short, Q8_8 >(
    sse42_tag,
    uint8_t dst[],
    const uint8_t \* src[],
    const Size& inSz,
    const Size& outSz,
    Q0_16 yalpha,
    const MapperUnit8U& ymap,
    int xmaxdf,
    const short xindex[],
    const Q0_16 xalpha[],
    Q8_8 vbuf[]
    );

template void calcRowAreaImpl< sse42_tag, float, float, int, float >(
    sse42_tag,
    float dst[],
    const float \* src[],
    const Size& inSz,
    const Size& outSz,
    float yalpha,
    const MapperUnit32F& ymap,
    int xmaxdf,
    const int xindex[],
    const float xalpha[],
    float vbuf[]
    );

void calcRowArea_8U(
    uchar dst[],
    const uchar \* src[],
    const Size& inSz,
    const Size& outSz,
    Q0_16 yalpha,
    const MapperUnit8U& ymap,
    int xmaxdf,
    const short xindex[],
    const Q0_16 xalpha[],
    Q8_8 vbuf[]
    );

void calcRowArea_32F(
    float dst[],
    const float \* src[],
    const Size& inSz,
    const Size& outSz,
    float yalpha,
    const MapperUnit32F& ymap,
    int xmaxdf,
    const int xindex[],
    const float xalpha[],
    float vbuf[]
    );

bool is_present(scalar_tag);
static double invRatio(int inSz, int outSz);
static double ratio(int inSz, int outSz);

template <typename T, typename Mapper, int chanNum = 1>
static void initScratchLinear(
    const cv::GMatDesc& in,
    const Size& outSz,
    cv::gapi::fluid::Buffer& scratch,
    int lpi
    );

template <typename T, typename IT, typename AT, class Mapper>
void calcRowLinearC1Impl(
    T \* dst[],
    const T \* src0[],
    const T \* src1[],
    const AT alpha[],
    const IT mapsx[],
    const AT beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int length
    );

template <typename Mapper>
static void initScratchArea(
    const cv::GMatDesc& in,
    const Size& outSz,
    cv::gapi::fluid::Buffer& scratch
    );

cv::gapi::GKernelPackage FKernelsChooseISA();
GAPI_COMPOUND_KERNEL(FScalePlane, ScalePlane);
GAPI_FLUID_KERNEL(FConvertDepth, ConvertDepth, false);
GAPI_FLUID_KERNEL(FSubC, GSubC, false);
GAPI_FLUID_KERNEL(FDivC, GDivC, false);

template <
    typename typelist,
    typename default_t,
    typename type_id_t,
    typename type_to_id_t,
    typename type_to_value_t,
    typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))
    >
result_t type_dispatch(
    type_id_t type_id,
    type_to_id_t&& type_to_id,
    type_to_value_t&& type_to_value,
    default_t default_value = {}
    );

template <
    typename typelist,
    typename default_t,
    typename pred_t,
    typename type_to_value_t,
    typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))
    >
result_t type_dispatch(
    pred_t&& pred,
    type_to_value_t&& type_to_value,
    default_t default_value = {}
    );

template <typename typelist>
bool is_cv_type_in_list(const int type_id);

template <typename DST, typename SRC>
static DST saturate_cast(SRC x);

short saturate_cast(int x);
short saturate_cast(float x);
short saturate_cast(short x);
uint16_t saturate_cast(uint16_t x);
uchar saturate_cast< uchar >(int v);
uint16_t saturate_cast(uint8_t x);
float saturate_cast(double x);
static uint8_t calc(short alpha0, uint8_t src0, short alpha1, uint8_t src1);
static float calc(float alpha0, float src0, float alpha1, float src1);

template <typename DST, typename SRC>
static DST convert_cast(SRC x);

uint8_t convert_cast(uint8_t x);
uint8_t convert_cast(float x);
float convert_cast(double x);
Q8_8 convert_cast(uchar x);
uchar convert_cast(Q8_8 x);

template <typename DST, typename SRC>
static DST checked_cast(SRC x);

static Q8_8 mulas(Q0_16 a, U8 s);
static Q8_8 mulaw(Q0_16 a, Q8_8 w);
static float mulas(float a, float s);
static float mulaw(float a, float w);

static void uvToRGBuv(
    const uchar u,
    const uchar v,
    int& ruv,
    int& guv,
    int& buv
    );

static void yRGBuvToRGB(
    const uchar vy,
    const int ruv,
    const int guv,
    const int buv,
    uchar& r,
    uchar& g,
    uchar& b
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void mergeRowC2_Impl(
    const T in0[],
    const T in1[],
    T out[],
    const int length
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void mergeRowC3_Impl(
    const T in0[],
    const T in1[],
    const T in2[],
    T out[],
    const int length
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void mergeRowC4_Impl(
    const T in0[],
    const T in1[],
    const T in2[],
    const T in3[],
    T out[],
    const int length
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void splitRowC2_Impl(
    const T in[],
    T out0[],
    T out1[],
    const int length
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void splitRowC3_Impl(
    const T in[],
    T out0[],
    T out1[],
    T out2[],
    const int length
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void splitRowC4_Impl(
    const T in[],
    T out0[],
    T out1[],
    T out2[],
    T out3[],
    const int length
    );

CV_ALWAYS_INLINE void uvToRGBuv(
    const v_uint8& u,
    const v_uint8& v,
    v_int32(&) ruv[4],
    v_int32(&) guv[4],
    v_int32(&) buv[4]
    );

CV_ALWAYS_INLINE void yRGBuvToRGB(
    const v_uint8& vy,
    const v_int32(&) ruv[4],
    const v_int32(&) guv[4],
    const v_int32(&) buv[4],
    v_uint8& rr,
    v_uint8& gg,
    v_uint8& bb
    );

template <typename isa_tag_t>
CV_ALWAYS_INLINE void nv12ToRgbRowImpl(
    isa_tag_t,
    const uchar \*\* srcY,
    const uchar \* srcUV,
    uchar \*\* dstRGBx,
    const int width
    );

template <typename isa_tag_t>
CV_ALWAYS_INLINE void i420ToRgbRowImpl(
    isa_tag_t,
    const uint8_t \*\* srcY,
    const uint8_t \* srcU,
    const uint8_t \* srcV,
    uint8_t \*\* dstRGBx,
    const int width
    );

template <typename T, typename A, typename I, typename W>
CV_ALWAYS_INLINE void downy(
    const T \* src[],
    int inWidth,
    const MapperUnit<A, I>& ymap,
    A yalpha,
    W vbuf[]
    );

template <typename T, typename A, typename I, typename W>
CV_ALWAYS_INLINE void downx(
    T dst[],
    int outWidth,
    int xmaxdf,
    const I xindex[],
    const A xalpha[],
    const W vbuf[]
    );

template <typename isa_tag_t, typename T, typename A, typename I, typename W>
CV_ALWAYS_INLINE void calcRowAreaImpl(
    isa_tag_t,
    T dst[],
    const T \* src[],
    const Size& inSz,
    const Size& outSz,
    A yalpha,
    const MapperUnit<A, I>& ymap,
    int xmaxdf,
    const I xindex[],
    const A xalpha[],
    W vbuf[]
    );

template <typename VecT, typename T>
CV_ALWAYS_INLINE void copyRow_Impl(
    const T in[],
    T out[],
    int length
    );

template <typename isa_tag_t>
CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(
    isa_tag_t,
    float \* dst[],
    const float \* src0[],
    const float \* src1[],
    const float alpha[],
    const int mapsx[],
    const float beta[],
    const Size& inSz,
    const Size& outSz,
    const int lpi,
    const int l
    );

template <typename isa_tag_t, typename T>
CV_ALWAYS_INLINE void chanToPlaneRowImpl(
    isa_tag_t,
    const T \* in,
    const int chan,
    const int chs,
    T \* out,
    const int length
    );

template <typename isa_tag_t, typename T, int chs>
CV_ALWAYS_INLINE void splitRowImpl(
    isa_tag_t,
    const T \* in,
    std::array<T \*, chs>& outs,
    const int length
    );

template <typename isa_tag_t, typename T, int chs>
CV_ALWAYS_INLINE void mergeRowImpl(
    isa_tag_t,
    const std::array<const T \*, chs>& ins,
    T \* out,
    const int length
    );

} // namespace kernels