class InferenceEngine::AsyncInferRequestThreadSafeDefault

Overview

Base class with default implementation of asynchronous multi staged inference request. To customize pipeline stages derived class should change the content of AsyncInferRequestThreadSafeDefault::_pipeline member container. It consists of pairs of tasks and executors which will run the task. The class is recommended to be used by plugins as a base class for asynchronous inference request implementation. More…

#include <ie_infer_async_request_thread_safe_default.hpp>

class AsyncInferRequestThreadSafeDefault: public InferenceEngine::IInferRequestInternal
{
public:
    // typedefs

    typedef std::shared_ptr<AsyncInferRequestThreadSafeDefault> Ptr;

    // structs

    struct DisableCallbackGuard;
    struct ImmediateStreamsExecutor;

    // construction

    AsyncInferRequestThreadSafeDefault(
        const IInferRequestInternal::Ptr& request,
        const ITaskExecutor::Ptr& taskExecutor,
        const ITaskExecutor::Ptr& callbackExecutor
        );

    // methods

    virtual StatusCode Wait(int64_t millis_timeout);
    virtual void StartAsync();
    virtual void Infer();
    virtual std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const;
    virtual void SetBlob(const std::string& name, const Blob::Ptr& data);

    virtual void SetBlob(
        const std::string& name,
        const Blob::Ptr& data,
        const PreProcessInfo& info
        );

    virtual void SetBlobs(
        const std::string& name,
        const std::vector<Blob::Ptr>& blobs
        );

    virtual BatchedBlob::Ptr GetBlobs(const std::string& name);
    virtual Blob::Ptr GetBlob(const std::string& name);
    virtual const PreProcessInfo& GetPreProcess(const std::string& name) const;
    virtual void SetBatch(int batch);
    virtual void SetCallback(Callback callback);
    virtual std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState();
    void ThrowIfCanceled() const;
    virtual void Cancel();

    virtual void setModelInputsOutputs(
        const std::vector<std::shared_ptr<const ov::Node>>& inputs,
        const std::vector<std::shared_ptr<const ov::Node>>& outputs
        );
};

Inherited Members

public:
    // typedefs

    typedef std::shared_ptr<IInferRequestInternal> Ptr;
    typedef std::function<void(std::exception_ptr)> Callback;

    // methods

    virtual void Infer();
    virtual void InferImpl();
    virtual void Cancel();
    virtual std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const;
    virtual void SetBlob(const std::string& name, const Blob::Ptr& data);

    virtual void SetBlobs(
        const std::string& name,
        const std::vector<Blob::Ptr>& blobs
        );

    virtual void SetBlobsImpl(
        const std::string& name,
        const BatchedBlob::Ptr& batched_blob
        );

    virtual Blob::Ptr GetBlob(const std::string& name);
    virtual BatchedBlob::Ptr GetBlobs(const std::string& name);

    virtual void SetBlob(
        const std::string& name,
        const Blob::Ptr& data,
        const PreProcessInfo& info
        );

    virtual const PreProcessInfo& GetPreProcess(const std::string& name) const;
    virtual void SetBatch(int batch);
    virtual std::vector<std::shared_ptr<IVariableStateInternal>> QueryState();
    virtual void StartAsync();
    virtual void StartAsyncImpl();
    virtual StatusCode Wait(int64_t millis_timeout);
    virtual void SetCallback(Callback callback);

    void checkBlob(
        const Blob::Ptr& blob,
        const std::string& name,
        bool isInput,
        const SizeVector& refDims = {}
        ) const;

    virtual void checkBlobs();
    void setPointerToExecutableNetworkInternal(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork);
    std::shared_ptr<IExecutableNetworkInternal> getPointerToExecutableNetworkInternal() const;
    void setPointerToSo(const std::shared_ptr<void>& so);
    std::shared_ptr<void> getPointerToSo() const;
    void \* GetUserData();
    void SetUserData(void \* userData);
    const std::vector<std::shared_ptr<const ov::Node>>& GetInputs() const;
    const std::vector<std::shared_ptr<const ov::Node>>& GetOutputs() const;

    virtual void setModelInputsOutputs(
        const std::vector<std::shared_ptr<const ov::Node>>& inputs,
        const std::vector<std::shared_ptr<const ov::Node>>& outputs
        );

Detailed Documentation

Base class with default implementation of asynchronous multi staged inference request. To customize pipeline stages derived class should change the content of AsyncInferRequestThreadSafeDefault::_pipeline member container. It consists of pairs of tasks and executors which will run the task. The class is recommended to be used by plugins as a base class for asynchronous inference request implementation.

To synchronize derived context with stages derived class should call AsyncInferRequestThreadSafeDefault::StopAndWait() function in destructor.

Here is an example of asynchronous inference request implementation for some accelerator device. It uses 5 different executors to run different stages of a synchronous inference request.

// Inherits from AsyncInferRequestThreadSafeDefault
class AcceleratorAsyncInferRequest : public AsyncInferRequestThreadSafeDefault {
    // Store the pointer to the synchronous request and five executors
    AcceleratorAsyncInferRequest(const AcceleratorSyncRequest::Ptr& syncRequest,
                                 const ITaskExecutor::Ptr& preprocessExecutor,
                                 const ITaskExecutor::Ptr& writeToDeviceExecutor,
                                 const ITaskExecutor::Ptr& runOnDeviceExecutor,
                                 const ITaskExecutor::Ptr& readFromDeviceExecutor,
                                 const ITaskExecutor::Ptr& postProcessExecutor) :
    AsyncInferRequestThreadSafeDefault(syncRequest, nullptr, nullptr),
    _accSyncRequest{syncRequest},
    _preprocessExecutor{preprocessExecutor},
    _writeToDeviceExecutor{writeToDeviceExecutor},
    _runOnDeviceExecutor{runOnDeviceExecutor},
    _readFromDeviceExecutor{readFromDeviceExecutor},
    _postProcessExecutor{postProcessExecutor}
    {
        // Five pipeline stages of synchronous infer request are run by different executors
        _pipeline = {
            { _preprocessExecutor , [this] {
                _accSyncRequest->preprocess();
            }},
            { _writeToDeviceExecutor , [this] {
                _accSyncRequest->write_to_device();
            }},
            { _runOnDeviceExecutor , [this] {
                _accSyncRequest->run_on_device();
            }},
            { _readFromDeviceExecutor , [this] {
                _accSyncRequest->read_from_device();
            }},
            { _postProcessExecutor , [this] {
                _accSyncRequest->post_process();
            }},
        };
    }

    // As all stages use _accSyncRequest member we should wait for all stages tasks before the destructor destroy this member.
    ~AcceleratorAsyncInferRequest() {
        StopAndWait();
    }

    AcceleratorSyncRequest::Ptr _accSyncRequest;
    ITaskExecutor::Ptr _preprocessExecutor, _writeToDeviceExecutor, _runOnDeviceExecutor, _readFromDeviceExecutor, _postProcessExecutor;
};

Typedefs

typedef std::shared_ptr<AsyncInferRequestThreadSafeDefault> Ptr

A shared pointer to AsyncInferRequestThreadSafeDefault.

Construction

AsyncInferRequestThreadSafeDefault(
    const IInferRequestInternal::Ptr& request,
    const ITaskExecutor::Ptr& taskExecutor,
    const ITaskExecutor::Ptr& callbackExecutor
    )

Wraps a IInferRequestInternal::Ptr implementation and constructs a AsyncInferRequestThreadSafeDefault::_pipeline where taskExecutor is used to run IInferRequestInternal::Infer asynchronously.

Parameters:

request

The synchronous request

taskExecutor

The task executor

callbackExecutor

The callback executor

Methods

virtual StatusCode Wait(int64_t millis_timeout)

Waits for completion of all pipeline stages If the pipeline raises an exception it will be rethrown here.

Parameters:

millis_timeout

A timeout is ms to wait or special enum value of InferRequest::WaitMode

Returns:

A status code

virtual void StartAsync()

Start inference of specified input(s) in asynchronous mode.

The method returns immediately. Inference starts also immediately.

virtual void Infer()

Infers specified input(s) in synchronous mode.

blocks all method of InferRequest while request is ongoing (running or waiting in queue)

virtual std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const

Queries performance measures per layer to get feedback of what is the most time consuming layer. Note: not all plugins may provide meaningful data.

Returns:

  • a map of layer names to profiling information for that layer.

virtual void SetBlob(const std::string& name, const Blob::Ptr& data)

Set input/output data to infer.

Memory allocation doesn’t happen

Parameters:

name

  • a name of input or output blob.

data

  • a reference to input or output blob. The type of Blob must correspond to the network input precision and size.

virtual void SetBlob(
    const std::string& name,
    const Blob::Ptr& data,
    const PreProcessInfo& info
    )

Sets pre-process for input data.

Parameters:

name

Name of input blob.

data

  • a reference to input or output blob. The type of Blob must correspond to the network input precision and size.

info

Preprocess info for blob.

virtual void SetBlobs(
    const std::string& name,
    const std::vector<Blob::Ptr>& blobs
    )

Set batch of input data to infer. Default implementation performs basic validation and checks that all tensors are not remote. Plugin-specific implementations may override this behavior to handle remote tensors case. If plugin expects only memory blobs (not remote blobs), consider to override only SetBlobsImpl and reuse basic existing implementation.

Parameters:

name

  • an operation name of input or output blob.

blobs

  • input blobs. The type of Blob must correspond to the model’s input precision and size.

virtual BatchedBlob::Ptr GetBlobs(const std::string& name)

Get input/output data to infer.

Memory allocation doesn’t happen

Parameters:

name

  • a name of input or output blob.

Returns:

data - a reference to input batched blob.

virtual Blob::Ptr GetBlob(const std::string& name)

Get input/output data to infer.

Memory allocation doesn’t happen

Parameters:

name

  • a name of input or output blob.

data

  • a reference to input or output blob. The type of Blob must correspond to the network input precision and size.

virtual const PreProcessInfo& GetPreProcess(const std::string& name) const

Gets pre-process for input data.

Parameters:

name

Name of input blob.

info

pointer to a pointer to PreProcessInfo structure

virtual void SetBatch(int batch)

Sets new batch size when dynamic batching is enabled in executable network that created this request.

Parameters:

batch

  • new batch size to be used by all the following inference calls for this request.

virtual void SetCallback(Callback callback)

Set callback function which will be called on success or failure of asynchronous request.

Parameters:

callback

  • function to be called with the following description:

virtual std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState()

Queries memory states.

Returns:

Returns memory states

virtual void Cancel()

Cancel current inference request execution.

virtual void setModelInputsOutputs(
    const std::vector<std::shared_ptr<const ov::Node>>& inputs,
    const std::vector<std::shared_ptr<const ov::Node>>& outputs
    )

Sets inputs/outputs from ov::Model.