推理管道¶
如需使用 OpenVINO™ 运行时推理模型,通常您需要在应用管道中执行以下步骤:
以下代码将基于这些步骤演示如何更改应用代码以迁移到 API 2.0。
1. 创建核心对象¶
推理引擎 API
import numpy as np
import openvino.inference_engine as ie
core = ie.IECore()
InferenceEngine::Core core;
ie_core_t *core = nullptr;
ie_core_create("", &core);
API 2.0
import openvino.runtime as ov
core = ov.Core()
ov::Core core;
ov_core_t* core = NULL;
ov_core_create(&core);
1.1 (可选)加载扩展¶
如需通过自定义操作加载模型,您需要为这些操作添加扩展。强烈建议您使用 OpenVINO™ 扩展性 API 编写扩展。但是,您也可以将旧扩展加载到新的 OpenVINO™ 运行时:
推理引擎 API
core.add_extension("path_to_extension_library.so", "CPU")
core.AddExtension(std::make_shared<InferenceEngine::Extension>("path_to_extension_library.so"));
ie_core_add_extension(core, "path_to_extension_library.so", "CPU");
API 2.0
core.add_extension("path_to_extension_library.so")
core.add_extension(std::make_shared<InferenceEngine::Extension>("path_to_extension_library.so"));
// For C API 2.0 "add_extension()" is not supported for now
2.从驱动器中读取模型¶
推理引擎 API
network = core.read_network("model.xml")
InferenceEngine::CNNNetwork network = core.ReadNetwork("model.xml");
ie_network_t *network = nullptr;
ie_core_read_network(core, "model.xml", nullptr, &network);
API 2.0
model = core.read_model("model.xml")
std::shared_ptr<ov::Model> model = core.read_model("model.xml");
ov_model_t* model = NULL;
ov_core_read_model(core, "model.xml", NULL, &model);
读取模型采用与 模型创建迁移指南 中的示例相同的结构。
您可以在单次 ov::Core::compile_model(filename, devicename)
调用中组合模型读取和编译。
2.1 (可选)执行模型预处理¶
当应用输入数据与模型输入格式不完全匹配时,可能需要进行预处理。请参阅 API 2.0 中的预处理 了解更多详情。
3.将模型加载到设备¶
推理引擎 API
# Load network to the device and create infer requests
exec_network = core.load_network(network, "CPU", num_requests=4)
InferenceEngine::ExecutableNetwork exec_network = core.LoadNetwork(network, "CPU");
ie_executable_network_t *exe_network = nullptr;
ie_core_load_network(core, network, "CPU", nullptr, &exe_network);
API 2.0
compiled_model = core.compile_model(model, "CPU")
ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
ov_compiled_model_t* compiled_model = NULL;
ov_core_compile_model(core, model, "CPU", 0, &compiled_model);
如果需要用 OpenVINO™ 运行时的其他参数配置设备,请参阅 配置设备 。
4.创建推理请求¶
推理引擎 API
# Done in the previous step
InferenceEngine::InferRequest infer_request = exec_network.CreateInferRequest();
ie_infer_request_t *infer_request = nullptr;
ie_exec_network_create_infer_request(exe_network, &infer_request);
API 2.0
infer_request = compiled_model.create_infer_request()
ov::InferRequest infer_request = compiled_model.create_infer_request();
ov_infer_request_t* infer_request = NULL;
ov_compiled_model_create_infer_request(compiled_model, &infer_request);
5.用数据填充输入张量¶
推理引擎 API
推理引擎 API 用 I32
精度(与原始模型 不 一致)的数据填充输入:
infer_request = exec_network.requests[0]
# Get input blobs mapped to input layers names
input_blobs = infer_request.input_blobs
data = input_blobs["data1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Fill the first blob ...
InferenceEngine::Blob::Ptr input_blob1 = infer_request.GetBlob(inputs.begin()->first);
// fill first blob
InferenceEngine::MemoryBlob::Ptr minput1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob1);
if (minput1) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput1->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
InferenceEngine::Blob::Ptr input_blob2 = infer_request.GetBlob("data2");
// fill second blob
InferenceEngine::MemoryBlob::Ptr minput2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob2);
if (minput2) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput2->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
// fill first blob
ie_blob_t *input_blob1 = nullptr;
{
ie_infer_request_get_blob(infer_request, input_name, &input_blob1);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob1, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
// fill second blob
ie_blob_t *input_blob2 = nullptr;
{
ie_infer_request_get_blob(infer_request, "data2", &input_blob2);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob2, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
infer_request = exec_network.requests[0]
# Get input blobs mapped to input layers names
input_blobs = infer_request.input_blobs
data = input_blobs["data1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Fill the first blob ...
InferenceEngine::Blob::Ptr input_blob1 = infer_request.GetBlob(inputs.begin()->first);
// fill first blob
InferenceEngine::MemoryBlob::Ptr minput1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob1);
if (minput1) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput1->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
InferenceEngine::Blob::Ptr input_blob2 = infer_request.GetBlob("data2");
// fill second blob
InferenceEngine::MemoryBlob::Ptr minput2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob2);
if (minput2) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput2->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
// fill first blob
ie_blob_t *input_blob1 = nullptr;
{
ie_infer_request_get_blob(infer_request, input_name, &input_blob1);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob1, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
// fill second blob
ie_blob_t *input_blob2 = nullptr;
{
ie_infer_request_get_blob(infer_request, "data2", &input_blob2);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob2, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
infer_request = exec_network.requests[0]
# Get input blobs mapped to input layers names
input_blobs = infer_request.input_blobs
data = input_blobs["data1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Fill the first blob ...
InferenceEngine::Blob::Ptr input_blob1 = infer_request.GetBlob(inputs.begin()->first);
// fill first blob
InferenceEngine::MemoryBlob::Ptr minput1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob1);
if (minput1) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput1->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
InferenceEngine::Blob::Ptr input_blob2 = infer_request.GetBlob("data2");
// fill second blob
InferenceEngine::MemoryBlob::Ptr minput2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob2);
if (minput2) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput2->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
// fill first blob
ie_blob_t *input_blob1 = nullptr;
{
ie_infer_request_get_blob(infer_request, input_name, &input_blob1);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob1, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
// fill second blob
ie_blob_t *input_blob2 = nullptr;
{
ie_infer_request_get_blob(infer_request, "data2", &input_blob2);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob2, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
infer_request = exec_network.requests[0]
# Get input blobs mapped to input layers names
input_blobs = infer_request.input_blobs
data = input_blobs["data1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Fill the first blob ...
InferenceEngine::Blob::Ptr input_blob1 = infer_request.GetBlob(inputs.begin()->first);
// fill first blob
InferenceEngine::MemoryBlob::Ptr minput1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob1);
if (minput1) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput1->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
InferenceEngine::Blob::Ptr input_blob2 = infer_request.GetBlob("data2");
// fill second blob
InferenceEngine::MemoryBlob::Ptr minput2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob2);
if (minput2) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = minput2->wmap();
// Original I64 precision was converted to I32
auto data = minputHolder.as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// Fill data ...
}
// fill first blob
ie_blob_t *input_blob1 = nullptr;
{
ie_infer_request_get_blob(infer_request, input_name, &input_blob1);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob1, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
// fill second blob
ie_blob_t *input_blob2 = nullptr;
{
ie_infer_request_get_blob(infer_request, "data2", &input_blob2);
ie_blob_buffer_t buffer;
ie_blob_get_buffer(input_blob2, &buffer);
// Original I64 precision was converted to I32
int32_t* blob_internal_buffer = (int32_t*)buffer.buffer;
// Fill data ...
}
API 2.0
API 2.0 用 I64
精度(与原始模型一致)的数据填充输入:
# Get input tensor by index
input_tensor1 = infer_request.get_input_tensor(0)
# IR v10 works with converted precisions (i64 -> i32)
assert input_tensor1.data.dtype == np.int32
# Fill the first data ...
# Get input tensor by tensor name
input_tensor2 = infer_request.get_tensor("data2_t")
# IR v10 works with converted precisions (i64 -> i32)
assert input_tensor2.data.dtype == np.int32
# Fill the second data ..
// Get input tensor by index
ov::Tensor input_tensor1 = infer_request.get_input_tensor(0);
// IR v10 works with converted precisions (i64 -> i32)
auto data1 = input_tensor1.data<int32_t>();
// Fill first data ...
// Get input tensor by tensor name
ov::Tensor input_tensor2 = infer_request.get_tensor("data2_t");
// IR v10 works with converted precisions (i64 -> i32)
auto data2 = input_tensor1.data<int32_t>();
// Fill first data ...
ov_tensor_t* input_tensor1 = NULL;
ov_tensor_t* input_tensor2 = NULL;
void* data = NULL;
{
// Get input tensor by index
ov_infer_request_get_input_tensor_by_index(infer_request, 0, &input_tensor1);
// IR v10 works with converted precisions (i64 -> i32)
ov_tensor_data(input_tensor1, &data);
int32_t* data1 = (int32_t*)data;
// Fill first data ...
}
{
// Get input tensor by tensor name
ov_infer_request_get_tensor(infer_request, "data2_t", &input_tensor2);
// IR v10 works with converted precisions (i64 -> i32)
ov_tensor_data(input_tensor2, &data);
int32_t* data2 = (int32_t*)data;
// Fill first data ...
}
ov_tensor_free(input_tensor1);
ov_tensor_free(input_tensor2);
# Get input tensor by index
input_tensor1 = infer_request.get_input_tensor(0)
# Element types, names and layouts are aligned with framework
assert input_tensor1.data.dtype == np.int64
# Fill the first data ...
# Get input tensor by tensor name
input_tensor2 = infer_request.get_tensor("data2_t")
assert input_tensor2.data.dtype == np.int64
# Fill the second data ...
// Get input tensor by index
ov::Tensor input_tensor1 = infer_request.get_input_tensor(0);
// Element types, names and layouts are aligned with framework
auto data1 = input_tensor1.data<int64_t>();
// Fill first data ...
// Get input tensor by tensor name
ov::Tensor input_tensor2 = infer_request.get_tensor("data2_t");
// Element types, names and layouts are aligned with framework
auto data2 = input_tensor1.data<int64_t>();
// Fill first data ...
ov_tensor_t* input_tensor1 = NULL;
ov_tensor_t* input_tensor2 = NULL;
void* data = NULL;
{
// Get input tensor by index
ov_infer_request_get_input_tensor_by_index(infer_request, 0, &input_tensor1);
// Element types, names and layouts are aligned with framework
ov_tensor_data(input_tensor1, &data);
// Fill first data ...
}
{
// Get input tensor by tensor name
ov_infer_request_get_tensor(infer_request, "data2_t", &input_tensor2);
// Element types, names and layouts are aligned with framework
ov_tensor_data(input_tensor2, &data);
// Fill first data ...
}
ov_tensor_free(input_tensor1);
ov_tensor_free(input_tensor2);
# Get input tensor by index
input_tensor1 = infer_request.get_input_tensor(0)
# Element types, names and layouts are aligned with framework
assert input_tensor1.data.dtype == np.int64
# Fill the first data ...
# Get input tensor by tensor name
input_tensor2 = infer_request.get_tensor("data2_t")
assert input_tensor2.data.dtype == np.int64
# Fill the second data ...
// Get input tensor by index
ov::Tensor input_tensor1 = infer_request.get_input_tensor(0);
// Element types, names and layouts are aligned with framework
auto data1 = input_tensor1.data<int64_t>();
// Fill first data ...
// Get input tensor by tensor name
ov::Tensor input_tensor2 = infer_request.get_tensor("data2_t");
// Element types, names and layouts are aligned with framework
auto data2 = input_tensor1.data<int64_t>();
// Fill first data ...
ov_tensor_t* input_tensor1 = NULL;
ov_tensor_t* input_tensor2 = NULL;
void* data = NULL;
{
// Get input tensor by index
ov_infer_request_get_input_tensor_by_index(infer_request, 0, &input_tensor1);
// Element types, names and layouts are aligned with framework
ov_tensor_data(input_tensor1, &data);
// Fill first data ...
}
{
// Get input tensor by tensor name
ov_infer_request_get_tensor(infer_request, "data2_t", &input_tensor2);
// Element types, names and layouts are aligned with framework
ov_tensor_data(input_tensor2, &data);
// Fill first data ...
}
ov_tensor_free(input_tensor1);
ov_tensor_free(input_tensor2);
# Get input tensor by index
input_tensor1 = infer_request.get_input_tensor(0)
# Element types, names and layouts are aligned with framework
assert input_tensor1.data.dtype == np.int64
# Fill the first data ...
# Get input tensor by tensor name
input_tensor2 = infer_request.get_tensor("data2_t")
assert input_tensor2.data.dtype == np.int64
# Fill the second data ...
// Get input tensor by index
ov::Tensor input_tensor1 = infer_request.get_input_tensor(0);
// Element types, names and layouts are aligned with framework
auto data1 = input_tensor1.data<int64_t>();
// Fill first data ...
// Get input tensor by tensor name
ov::Tensor input_tensor2 = infer_request.get_tensor("data2_t");
// Element types, names and layouts are aligned with framework
auto data2 = input_tensor1.data<int64_t>();
// Fill first data ...
ov_tensor_t* input_tensor1 = NULL;
ov_tensor_t* input_tensor2 = NULL;
void* data = NULL;
{
// Get input tensor by index
ov_infer_request_get_input_tensor_by_index(infer_request, 0, &input_tensor1);
// Element types, names and layouts are aligned with framework
ov_tensor_data(input_tensor1, &data);
// Fill first data ...
}
{
// Get input tensor by tensor name
ov_infer_request_get_tensor(infer_request, "data2_t", &input_tensor2);
// Element types, names and layouts are aligned with framework
ov_tensor_data(input_tensor2, &data);
// Fill first data ...
}
ov_tensor_free(input_tensor1);
ov_tensor_free(input_tensor2);
6.开始推理¶
推理引擎 API
results = infer_request.infer()
infer_request.Infer();
ie_infer_request_infer(infer_request);
# Start async inference on a single infer request
infer_request.async_infer()
# Wait for 1 milisecond
infer_request.wait(1)
# Wait for inference completion
infer_request.wait()
# Demonstrates async pipeline using ExecutableNetwork
results = []
# Callback to process inference results
def callback(output_blobs, _):
# Copy the data from output blobs to numpy array
results_copy = {out_name: out_blob.buffer[:] for out_name, out_blob in output_blobs.items()}
results.append(process_results(results_copy))
# Setting callback for each infer requests
for infer_request in exec_network.requests:
infer_request.set_completion_callback(callback, py_data=infer_request.output_blobs)
# Async pipline is managed by ExecutableNetwork
total_frames = 100
for _ in range(total_frames):
# Wait for at least one free request
exec_network.wait(num_request=1)
# Get idle id
idle_id = exec_network.get_idle_request_id()
# Start asynchronous inference on idle request
exec_network.start_async(request_id=idle_id, inputs=next(input_data))
# Wait for all requests to complete
exec_network.wait()
// NOTE: For demonstration purposes we are trying to set callback
// which restarts inference inside one more time, so two inferences happen here
// Start inference without blocking current thread
auto restart_once = true;
infer_request.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest, InferenceEngine::StatusCode)>>(
[&, restart_once](InferenceEngine::InferRequest request, InferenceEngine::StatusCode status) mutable {
if (status != InferenceEngine::OK) {
// Process error code
} else {
// Extract inference result
InferenceEngine::Blob::Ptr output_blob = request.GetBlob(outputs.begin()->first);
// Restart inference if needed
if (restart_once) {
request.StartAsync();
restart_once = false;
}
}
});
infer_request.StartAsync();
// Get inference status immediately
InferenceEngine::StatusCode status = infer_request.Wait(InferenceEngine::InferRequest::STATUS_ONLY);
// Wait for 1 milisecond
status = infer_request.Wait(1);
// Wait for inference completion
infer_request.Wait(InferenceEngine::InferRequest::RESULT_READY);
// NOTE: For demonstration purposes we are trying to set callback
ie_complete_call_back_t callback;
callback.completeCallBackFunc = completion_callback;
callback.args = infer_request;
ie_infer_set_completion_callback(infer_request, &callback);
// Start inference without blocking current thread
ie_infer_request_infer_async(infer_request);
// Wait for 10 milisecond
IEStatusCode waitStatus = ie_infer_request_wait(infer_request, 10);
// Wait for inference completion
ie_infer_request_wait(infer_request, -1);
API 2.0
results = infer_request.infer()
infer_request.infer();
ov_infer_request_infer(infer_request);
# Start async inference on a single infer request
infer_request.start_async()
# Wait for 1 milisecond
infer_request.wait_for(1)
# Wait for inference completion
infer_request.wait()
# Demonstrates async pipeline using AsyncInferQueue
results = []
def callback(request, frame_id):
# Copy the data from output tensors to numpy array and process it
results_copy = {output: data[:] for output, data in request.results.items()}
results.append(process_results(results_copy, frame_id))
# Create AsyncInferQueue with 4 infer requests
infer_queue = ov.AsyncInferQueue(compiled_model, jobs=4)
# Set callback for each infer request in the queue
infer_queue.set_callback(callback)
total_frames = 100
for i in range(total_frames):
# Wait for at least one available infer request and start asynchronous inference
infer_queue.start_async(next(input_data), userdata=i)
# Wait for all requests to complete
infer_queue.wait_all()
// NOTE: For demonstration purposes we are trying to set callback
// which restarts inference inside one more time, so two inferences happen here
auto restart_once = true;
infer_request.set_callback([&, restart_once](std::exception_ptr exception_ptr) mutable {
if (exception_ptr) {
// procces exception or rethrow it.
std::rethrow_exception(exception_ptr);
} else {
// Extract inference result
ov::Tensor output_tensor = infer_request.get_output_tensor();
// Restart inference if needed
if (restart_once) {
infer_request.start_async();
restart_once = false;
}
}
});
// Start inference without blocking current thread
infer_request.start_async();
// Get inference status immediately
bool status = infer_request.wait_for(std::chrono::milliseconds{0});
// Wait for one milisecond
status = infer_request.wait_for(std::chrono::milliseconds{1});
// Wait for inference completion
infer_request.wait();
// NOTE: For demonstration purposes we are trying to set callback
ov_callback_t callback;
callback.callback_func = infer_request_callback;
callback.args = infer_request;
ov_infer_request_set_callback(infer_request, &callback);
// Start inference without blocking current thread
ov_infer_request_start_async(infer_request);
// Wait for inference completion
ov_infer_request_wait(infer_request);
// Wait for 10 milisecond
ov_infer_request_wait_for(infer_request, 10);
7.处理推理结果¶
推理引擎 API
推理引擎 API 处理输出的原因是,输出精度为 I32
(与原始模型 不 一致):
# Get output blobs mapped to output layers names
output_blobs = infer_request.output_blobs
data = output_blobs["out1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Process output data
InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
InferenceEngine::MemoryBlob::Ptr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(output_blob);
if (moutput) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = moutput->rmap();
// Original I64 precision was converted to I32
auto data =
minputHolder.as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// process output data
}
// get output blob by name
ie_blob_t *output_blob = nullptr;
ie_infer_request_get_blob(infer_request, "output_name", &output_blob);
// get blob buffer
ie_blob_buffer_t out_buffer;
ie_blob_get_buffer(output_blob, &out_buffer);
// get data
float *data = (float *)(out_buffer.buffer);
// process output data
# Get output blobs mapped to output layers names
output_blobs = infer_request.output_blobs
data = output_blobs["out1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Process output data
InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
InferenceEngine::MemoryBlob::Ptr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(output_blob);
if (moutput) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = moutput->rmap();
// Original I64 precision was converted to I32
auto data =
minputHolder.as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// process output data
}
// get output blob by name
ie_blob_t *output_blob = nullptr;
ie_infer_request_get_blob(infer_request, "output_name", &output_blob);
// get blob buffer
ie_blob_buffer_t out_buffer;
ie_blob_get_buffer(output_blob, &out_buffer);
// get data
float *data = (float *)(out_buffer.buffer);
// process output data
# Get output blobs mapped to output layers names
output_blobs = infer_request.output_blobs
data = output_blobs["out1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Process output data
InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
InferenceEngine::MemoryBlob::Ptr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(output_blob);
if (moutput) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = moutput->rmap();
// Original I64 precision was converted to I32
auto data =
minputHolder.as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// process output data
}
// get output blob by name
ie_blob_t *output_blob = nullptr;
ie_infer_request_get_blob(infer_request, "output_name", &output_blob);
// get blob buffer
ie_blob_buffer_t out_buffer;
ie_blob_get_buffer(output_blob, &out_buffer);
// get data
float *data = (float *)(out_buffer.buffer);
// process output data
# Get output blobs mapped to output layers names
output_blobs = infer_request.output_blobs
data = output_blobs["out1"].buffer
# Original I64 precision was converted to I32
assert data.dtype == np.int32
# Process output data
InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
InferenceEngine::MemoryBlob::Ptr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(output_blob);
if (moutput) {
// locked memory holder should be alive all time while access to its
// buffer happens
auto minputHolder = moutput->rmap();
// Original I64 precision was converted to I32
auto data =
minputHolder.as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type*>();
// process output data
}
// get output blob by name
ie_blob_t *output_blob = nullptr;
ie_infer_request_get_blob(infer_request, "output_name", &output_blob);
// get blob buffer
ie_blob_buffer_t out_buffer;
ie_blob_get_buffer(output_blob, &out_buffer);
// get data
float *data = (float *)(out_buffer.buffer);
// process output data
API 2.0
API 2.0 处理输出的原因如下,输出精度:
对于 OpenVINO IR v10 模型为
I32
(与原始模型 不 一致),与 旧行为 匹配。对于 OpenVINO IR v11、ONNX、ov::Model 和 PaddlePaddle 模型为
I64
(与原始模型一致),与 新行为 匹配。
# Model has only one output
output_tensor = infer_request.get_output_tensor()
# IR v10 works with converted precisions (i64 -> i32)
assert output_tensor.data.dtype == np.int32
# process output data ...
// model has only one output
ov::Tensor output_tensor = infer_request.get_output_tensor();
// IR v10 works with converted precisions (i64 -> i32)
auto out_data = output_tensor.data<int32_t>();
// process output data
ov_tensor_t* output_tensor = NULL;
void* data = NULL;
// model has only one output
ov_infer_request_get_output_tensor(infer_request, &output_tensor);
// IR v10 works with converted precisions (i64 -> i32)
ov_tensor_data(output_tensor, &data);
int32_t* out_data = (int32_t*)data;
// process output data
ov_tensor_free(output_tensor);
# Model has only one output
output_tensor = infer_request.get_output_tensor()
# Element types, names and layouts are aligned with framework
assert output_tensor.data.dtype == np.int64
# process output data ...
// model has only one output
ov::Tensor output_tensor = infer_request.get_output_tensor();
// Element types, names and layouts are aligned with framework
auto out_data = output_tensor.data<int64_t>();
// process output data
ov_tensor_t* output_tensor = NULL;
void* out_data = NULL;
// model has only one output
ov_infer_request_get_output_tensor(infer_request, &output_tensor);
// Element types, names and layouts are aligned with framework
ov_tensor_data(output_tensor, &out_data);
// process output data
ov_tensor_free(output_tensor);
# Model has only one output
output_tensor = infer_request.get_output_tensor()
# Element types, names and layouts are aligned with framework
assert output_tensor.data.dtype == np.int64
# process output data ...
// model has only one output
ov::Tensor output_tensor = infer_request.get_output_tensor();
// Element types, names and layouts are aligned with framework
auto out_data = output_tensor.data<int64_t>();
// process output data
ov_tensor_t* output_tensor = NULL;
void* out_data = NULL;
// model has only one output
ov_infer_request_get_output_tensor(infer_request, &output_tensor);
// Element types, names and layouts are aligned with framework
ov_tensor_data(output_tensor, &out_data);
// process output data
ov_tensor_free(output_tensor);
# Model has only one output
output_tensor = infer_request.get_output_tensor()
# Element types, names and layouts are aligned with framework
assert output_tensor.data.dtype == np.int64
# process output data ...
// model has only one output
ov::Tensor output_tensor = infer_request.get_output_tensor();
// Element types, names and layouts are aligned with framework
auto out_data = output_tensor.data<int64_t>();
// process output data
ov_tensor_t* output_tensor = NULL;
void* out_data = NULL;
// model has only one output
ov_infer_request_get_output_tensor(infer_request, &output_tensor);
// Element types, names and layouts are aligned with framework
ov_tensor_data(output_tensor, &out_data);
// process output data
ov_tensor_free(output_tensor);