Torchvision preprocessing converter#

The Torchvision-to-OpenVINO converter enables automatic translation of operators from the torchvision preprocessing pipeline to the OpenVINO format and embed them in your model. It is often used to adjust images serving as input for AI models to have proper dimensions or data types.

As the converter is fully based on the openvino.preprocess module, you can implement the torchvision.transforms feature easily and without the use of external libraries, reducing the overall application complexity and enabling additional performance optimizations.

Note

Not all torchvision transforms are supported yet. The following operations are available:

transforms.Compose
transforms.Normalize
transforms.ConvertImageDtype
transforms.Grayscale
transforms.Pad
transforms.ToTensor
transforms.CenterCrop
transforms.Resize

Example#

    import torch.nn.functional as f
    import openvino as ov
    import numpy as np
    import torchvision
    import torch
    import os

    from openvino.preprocess.torchvision import PreprocessConverter
    from PIL import Image


    # 1. Create a sample model
    class Convnet(torch.nn.Module):
        def __init__(self, input_channels):
            super(Convnet, self).__init__()
            self.conv1 = torch.nn.Conv2d(input_channels, 6, 5)
            self.conv2 = torch.nn.Conv2d(6, 16, 3)

        def forward(self, data):
            data = f.max_pool2d(f.relu(self.conv1(data)), 2)
            data = f.max_pool2d(f.relu(self.conv2(data)), 2)
            return data


    # 2. Define torchvision preprocessing pipeline
    preprocess_pipeline = torchvision.transforms.Compose(
       [
           torchvision.transforms.Resize(256, interpolation=torchvision.transforms.InterpolationMode.NEAREST),
           torchvision.transforms.CenterCrop((216, 218)),
           torchvision.transforms.Pad((2, 3, 4, 5), fill=3),
           torchvision.transforms.ToTensor(),
           torchvision.transforms.ConvertImageDtype(torch.float32),
           torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
       ]
   )

    # 3. Read the model into OpenVINO    
    torch_model = Convnet(input_channels=3)
    torch.onnx.export(torch_model, torch.randn(1, 3, 224, 224), "test_convnet.onnx", verbose=False, input_names=["input"], output_names=["output"])
    core = ov.Core()
    ov_model = core.read_model(model="test_convnet.onnx")
    if os.path.exists("test_convnet.onnx"):
        os.remove("test_convnet.onnx")
    test_input = np.random.randint(255, size=(260, 260, 3), dtype=np.uint16)

    # 4. Embed the torchvision preocessing into OpenVINO model
    ov_model = PreprocessConverter.from_torchvision(
        model=ov_model, transform=preprocess_pipeline, input_example=Image.fromarray(test_input.astype("uint8"), "RGB")
    )
    ov_model = core.compile_model(ov_model, "CPU")

    # 5. Perform inference
    ov_input = np.expand_dims(test_input, axis=0)
    output = ov_model.output(0)
    ov_result = ov_model(ov_input)[output]