The primary vehicle for the performance of the CPU codepath in the Inference Engine is the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN), and new CPU kernels extend the Inference Engine plugin for the Intel MKL-DNN. Implementing the InferenceEngine::ILayerExecImpl defines a general CPU-side extension. There are no Intel MKL-DNN specifics in the way you need to implement a kernel.

Implementation Class

All custom kernels for the CPU plugin should be inherited from the InferenceEngine::ILayerExecImpl interface. Based on that, declaration of a kernel implementation class can look as follows:

class OpImplementation : public InferenceEngine::ILayerExecImpl {
public:
    explicit OpImplementation(const std::shared_ptr<ngraph::Node>& node);
    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
                                                           InferenceEngine::ResponseDesc *resp) noexcept override;
    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig &config,
                                     InferenceEngine::ResponseDesc *resp) noexcept override;
    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
                                        std::vector<InferenceEngine::Blob::Ptr> &outputs,
                                        InferenceEngine::ResponseDesc *resp) noexcept override;
private:
    int64_t add;
    ngraph::Shape inShape;
    ngraph::Shape outShape;
    std::string error;
};

Class Fields

The provided implementation has several fields:

add of the type int64_t is an attribute of a custom operation
inShape of the type ngraph::Shape is an input shape
outShape of the type ngraph::Shape is an output shape
error of the type std::string is a field to handle errors from a constructor

Constructor of Implementation

An implementation constructor checks parameters of nGraph operation, stores needed attributes, and stores an error message in the case of an error.

OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node> &node) {
    try {
        auto castedNode = std::dynamic_pointer_cast<Operation>(node);
        if (!castedNode)
            THROW_IE_EXCEPTION << "Cannot create implementation for unknown operation!";
        if (castedNode->inputs().size() != 1 || castedNode->outputs().size() != 1)
            THROW_IE_EXCEPTION << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
        if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
            THROW_IE_EXCEPTION << "Cannot create implementation for op with dynamic shapes!";
        if (castedNode->get_input_shape(0).size() != 4 || castedNode->get_output_shape(0).size() != 4)
            THROW_IE_EXCEPTION << "Operation supports only 4d tensors for input and output.";
        if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
            THROW_IE_EXCEPTION << "Operation supports only FP32 tensors.";
        add = castedNode->getAddAttr();
    } catch (InferenceEngine::details::InferenceEngineException& ex) {
        error = ex.what();
    }
}

`getSupportedConfigurations`

InferenceEngine::ILayerExecImpl::getSupportedConfigurations method returns all supported configuration formats (input/output tensor layouts) for your implementation. To specify formats of data, use InferenceEngine::TensorDesc. Refer to the Memory Primitives section for instructions on how to do it.

InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
                                                                         InferenceEngine::ResponseDesc *resp) noexcept {
    auto createConfig = [](const InferenceEngine::SizeVector inShape, const InferenceEngine::SizeVector& outShape, bool planar) {
        InferenceEngine::LayerConfig config;
        config.dynBatchSupport = false;
        InferenceEngine::DataConfig inData;
        InferenceEngine::DataConfig outData;
        InferenceEngine::SizeVector order = {0, 1, 2, 3};
        // Allow any offset before data
        size_t offset((std::numeric_limits<size_t>::max)());
        if (planar) {
            inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset});
            config.inConfs.push_back(inData);
            outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
            config.outConfs.push_back(outData);
        } else {
            // Add blocked (nChw8c) format
            auto div_up = [](const int a, const int b) -> int {
                if (!b)
                    return 0;
                return (a + b - 1) / b;
            };
            order.push_back(1);
            InferenceEngine::SizeVector inBlkDims = inShape;
            inBlkDims[1] = div_up(inBlkDims[1], 8);
            inBlkDims.push_back(8);
            InferenceEngine::SizeVector outBlkDims = outShape;
            outBlkDims[1] = div_up(outBlkDims[1], 8);
            outBlkDims.push_back(8);
            inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset});
            config.inConfs.push_back(inData);
            outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset});
            config.outConfs.push_back(outData);
        }
        return config;
    };
    if (!error.empty()) {
        if (resp) {
            strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
            resp->msg[sizeof(resp->msg)-1] = 0;
        }
        return InferenceEngine::GENERAL_ERROR;
    }
    // Add planar format
    conf.emplace_back(createConfig(inShape, outShape, true));
    // Add blocked format nChw8c
    conf.emplace_back(createConfig(inShape, outShape, false));
    return InferenceEngine::OK;
}

`init`

InferenceEngine::ILayerExecImpl::init method gets a runtime-selected configuration from a vector that is populated from the getSupportedConfigurations method and checks the parameters:

InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig &config, InferenceEngine::ResponseDesc *resp) noexcept {
    try {
        if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
            THROW_IE_EXCEPTION << "Operation cannot be initialized with incorrect number of inputs/outputs!";
        }
        if (config.inConfs[0].desc.getDims().size() != 4 || config.outConfs[0].desc.getDims().size() != 4) {
            THROW_IE_EXCEPTION << "Operation can be initialized only with 4d input/output tensors!";
        }
        if (config.outConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32 ||
                config.inConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32)  {
            THROW_IE_EXCEPTION << "Operation supports only FP32 precisions!";
        }
    } catch (InferenceEngine::details::InferenceEngineException& ex) {
        if (resp) {
            strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
            resp->msg[sizeof(resp->msg)-1] = 0;
        }
        return InferenceEngine::GENERAL_ERROR;
    }
    return InferenceEngine::OK;
}

`execute`

InferenceEngine::ILayerExecImpl::execute method accepts and processes the actual tenors as input/output blobs:

InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
                                                      std::vector<InferenceEngine::Blob::Ptr> &outputs,
                                                      InferenceEngine::ResponseDesc *resp) noexcept {
    const float* src_data = inputs[0]->cbuffer().as<const float *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
    float *dst_data = outputs[0]->buffer().as<float *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
    for (size_t i = 0; i < inputs[0]->size(); i++) {
        dst_data[i] = src_data[i] + add;
    }
    return InferenceEngine::OK;
}

Register Implementation in `Extension` Class

To register custom kernel implementation in the Extension class, implement the following methods:

getImplTypes
getImplementation

`getImplTypes`

InferenceEngine::IExtension::getImplTypes returns a vector of implementation types for an operation.

std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::Node> &node) {
    if (std::dynamic_pointer_cast<Operation>(node)) {
        return {"CPU"};
    }
    return {};
}

`getImplementation`

InferenceEngine::IExtension::getImplementation returns the kernel implementation with a specified type for an operation.

InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node> &node, const std::string &implType) {
    if (std::dynamic_pointer_cast<Operation>(node) && implType == "CPU") {
        return std::make_shared<OpImplementation>(node);
    }
    return nullptr;
}

Load Extension with Executable Kernels to Plugin

Use the AddExtension method of the general plugin interface to load your primitives:

InferenceEngine::Core core;
// Load CPU extension as a shared library
auto extension_ptr = make_so_pointer<InferenceEngine::IExtension>("<shared lib path>");
// Add extension to the CPU device
core.AddExtension(extension_ptr, "CPU");

Implementation Class

Class Fields

Constructor of Implementation

getSupportedConfigurations

init

execute

Register Implementation in Extension Class

getImplTypes