Single Image Super Resolution with OpenVINO™

This tutorial is also available as a Jupyter notebook that can be cloned directly from GitHub. See the installation guide for instructions to run this tutorial locally on Windows, Linux or macOS. To run without installing anything, click the launch binder button.

Binder Github

Super Resolution is the process of enhancing the quality of an image by increasing the pixel count using deep learning. This notebook shows the Single Image Super Resolution (SISR) which takes just one low resolution image. A model called single-image-super-resolution-1032, which is available in Open Model Zoo, is used in this tutorial. It is based on the research paper cited below.

Y. Liu et al., “An Attention-Based Approach for Single Image Super Resolution,” 2018 24th International Conference on Pattern Recognition (ICPR), 2018, pp. 2777-2784, doi: 10.1109/ICPR.2018.8545760.

Preparation

Imports

import os
import time
import requests
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import HTML, FileLink
from IPython.display import Image as DisplayImage
from IPython.display import Pretty, ProgressBar, clear_output, display
from PIL import Image
from openvino.runtime import Core

Settings

# Device to use for inference. For example, "CPU", or "GPU".
DEVICE = "CPU"
# 1032: 4x superresolution, 1033: 3x superresolution
MODEL_FILE = "model/single-image-super-resolution-1032.xml"
model_name = os.path.basename(MODEL_FILE)
model_xml_path = Path(MODEL_FILE)

Functions

def write_text_on_image(image: np.ndarray, text: str) -> np.ndarray:
    """
    Write the specified text in the top left corner of the image
    as white text with a black border.

    :param image: image as numpy arry with HWC shape, RGB or BGR
    :param text: text to write
    :return: image with written text, as numpy array
    """
    font = cv2.FONT_HERSHEY_PLAIN
    org = (20, 20)
    font_scale = 4
    font_color = (255, 255, 255)
    line_type = 1
    font_thickness = 2
    text_color_bg = (0, 0, 0)
    x, y = org

    image = cv2.UMat(image)
    (text_w, text_h), _ = cv2.getTextSize(text, font, font_scale, font_thickness)
    result_im = cv2.rectangle(image, org, (x + text_w, y + text_h), text_color_bg, -1)

    textim = cv2.putText(
        result_im,
        text,
        (x, y + text_h + font_scale - 1),
        font,
        font_scale,
        font_color,
        font_thickness,
        line_type,
    )
    return textim.get()


def load_image(path: str) -> np.ndarray:
    """
    Loads an image from `path` and returns it as BGR numpy array.

    :param path: path to an image filename or url
    :return: image as numpy array, with BGR channel order
    """
    if path.startswith("http"):
        # Set User-Agent to Mozilla because some websites block requests
        # with User-Agent Python.
        response = requests.get(path, headers={"User-Agent": "Mozilla/5.0"})
        array = np.asarray(bytearray(response.content), dtype="uint8")
        image = cv2.imdecode(array, -1)  # Loads the image as BGR.
    else:
        image = cv2.imread(path)
    return image


def convert_result_to_image(result) -> np.ndarray:
    """
    Convert network result of floating point numbers to image with integer
    values from 0-255. Values outside this range are clipped to 0 and 255.

    :param result: a single superresolution network result in N,C,H,W shape
    """
    result = result.squeeze(0).transpose(1, 2, 0)
    result *= 255
    result[result < 0] = 0
    result[result > 255] = 255
    result = result.astype(np.uint8)
    return result


def to_rgb(image_data) -> np.ndarray:
    """
    Convert image_data from BGR to RGB
    """
    return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)

Load the Superresolution Model

The Super Resolution model expects two inputs: the input image and a bicubic interpolation of the input image to the target size of 1920x1080. It returns the super resolution version of the image in 1920x1800 (for the default superresolution model (1032)).

Load the model in OpenVINO Runtime with ie.read_model, compile it for the specified device with ie.compile_model, and get information about the network inputs and outputs.

ie = Core()
model = ie.read_model(model=model_xml_path)
compiled_model = ie.compile_model(model=model, device_name=DEVICE)

# Network inputs and outputs are dictionaries. Get the keys for the
# dictionaries.
original_image_key, bicubic_image_key = compiled_model.inputs
output_key = compiled_model.output(0)

# Get the expected input and target shape. The `.dims[2:]` returns the height
# and width. The `resize` function of OpenCV expects the shape as (width, height),
# so reverse the shape with `[::-1]` and convert it to a tuple.
input_height, input_width = list(original_image_key.shape)[2:]
target_height, target_width = list(bicubic_image_key.shape)[2:]

upsample_factor = int(target_height / input_height)

print(f"The network expects inputs with a width of {input_width}, " f"height of {input_height}")
print(f"The network returns images with a width of {target_width}, " f"height of {target_height}")

print(
    f"The image sides are upsampled by a factor of {upsample_factor}. "
    f"The new image is {upsample_factor**2} times as large as the "
    "original image"
)
The network expects inputs with a width of 480, height of 270
The network returns images with a width of 1920, height of 1080
The image sides are upsampled by a factor of 4. The new image is 16 times as large as the original image

Load and Show the Input Image

NOTE: For the best results, use raw images (like TIFF, BMP or PNG). Compressed images (like JPEG) may appear distorted after processing with the super resolution model.

IMAGE_PATH = Path("../data/image/tower.jpg")
OUTPUT_PATH = Path("output/")

os.makedirs(str(OUTPUT_PATH), exist_ok=True)
full_image = load_image(str(IMAGE_PATH))

# Uncomment these lines to load a raw image as BGR.
# import rawpy
# with rawpy.imread(IMAGE_PATH) as raw:
#     full_image = raw.postprocess()[:,:,(2,1,0)]

plt.imshow(to_rgb(full_image))
print(f"Showing full image with width {full_image.shape[1]} " f"and height {full_image.shape[0]}")
Showing full image with width 5976 and height 3770
../_images/202-vision-superresolution-image-with-output_10_1.png

Superresolution on a Crop of the Image

Crop the Input Image once.

Crop the network input size. Give the X (width) and Y (height) coordinates for the top left corner of the crop. Set the CROP_FACTOR variable to 2 to make a crop that is larger than the network input size (this only works with the single-image-super-resolution-1032 model). The crop will be downsampled before propagating to the network. This is useful for very high resolution images, where a crop of the network input size is too small to show enough information. It can also improve the result. Keep in mind that with a CROP_FACTOR or 2 the net upsampling factor will be halved. If the superresolution network increases the side lengths of the image by a factor of 4, it upsamples a 480x270 crop to 1920x1080. With a CROP_FACTOR of 2, a 960x540 crop is upsampled to the same 1920x1080: the side lengths are twice as large as the crop size.

# Set `CROP_FACTOR` to 2 to crop with twice the input width and height
# This only works with the 1032 (4x) superresolution model!
# Set it to 1 to crop the image with the exact input size.
CROP_FACTOR = 2
adjusted_upsample_factor = upsample_factor // CROP_FACTOR

image_id = "flag"  # A tag to recognize the saved images.
starty = 3200
startx = 0

# Perform the crop.
image_crop = full_image[
    starty : starty + input_height * CROP_FACTOR,
    startx : startx + input_width * CROP_FACTOR,
]

# Show the cropped image.
print(f"Showing image crop with width {image_crop.shape[1]} and " f"height {image_crop.shape[0]}.")
plt.imshow(to_rgb(image_crop));
Showing image crop with width 960 and height 540.
../_images/202-vision-superresolution-image-with-output_12_1.png

Reshape/Resize Crop for Model Input

The input image is resized to a network input size, and reshaped to (N,C,H,W) (N=number of images, C=number of channels, H=height, W=width). The image is also resized to the network output size, with bicubic interpolation. This bicubic image is the second input to the network.

# Resize the image to the target shape with bicubic interpolation.
bicubic_image = cv2.resize(
    src=image_crop, dsize=(target_width, target_height), interpolation=cv2.INTER_CUBIC
)

# If required, resize the image to the input image shape.
if CROP_FACTOR > 1:
    image_crop = cv2.resize(src=image_crop, dsize=(input_width, input_height))

# Reshape the images from (H,W,C) to (N,C,H,W).
input_image_original = np.expand_dims(image_crop.transpose(2, 0, 1), axis=0)
input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0)

Do Inference

Do inference and convert the inference result to an RGB image.

result = compiled_model(
    {
        original_image_key.any_name: input_image_original,
        bicubic_image_key.any_name: input_image_bicubic,
    }
)[output_key]

# Get inference result as numpy array and reshape to image shape and data type
result_image = convert_result_to_image(result)

Show and Save Results

Show the bicubic image and the enhanced superresolution image.

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(30, 15))
ax[0].imshow(to_rgb(bicubic_image))
ax[1].imshow(to_rgb(result_image))
ax[0].set_title("Bicubic")
ax[1].set_title("Superresolution")
Text(0.5, 1.0, 'Superresolution')
../_images/202-vision-superresolution-image-with-output_18_1.png

Save Superresolution and Bicubic Image Crop

# Add a text with "SUPER" or "BICUBIC" to the superresolution or bicubic image.
image_super = write_text_on_image(image=result_image, text="SUPER")
image_bicubic = write_text_on_image(image=bicubic_image, text="BICUBIC")

# Store the image and the results.
crop_image_path = Path(f"{OUTPUT_PATH.stem}/{image_id}_{adjusted_upsample_factor}x_crop.png")
superres_image_path = Path(
    f"{OUTPUT_PATH.stem}/{image_id}_{adjusted_upsample_factor}x_crop_superres.png"
)
bicubic_image_path = Path(
    f"{OUTPUT_PATH.stem}/{image_id}_{adjusted_upsample_factor}x_crop_bicubic.png"
)
cv2.imwrite(filename=str(crop_image_path), img=image_crop, params=[cv2.IMWRITE_PNG_COMPRESSION, 0])
cv2.imwrite(
    filename=str(superres_image_path), img=image_super, params=[cv2.IMWRITE_PNG_COMPRESSION, 0]
)
cv2.imwrite(
    filename=str(bicubic_image_path), img=image_bicubic, params=[cv2.IMWRITE_PNG_COMPRESSION, 0]
)
print(f"Images written to directory: {OUTPUT_PATH}")
Images written to directory: output

Write Animated GIF with Bicubic/Superresolution Comparison

print(image_bicubic.shape)
print(image_super.shape)

result_pil = Image.fromarray(to_rgb(image_super))
bicubic_pil = Image.fromarray(to_rgb(image_bicubic))
gif_image_path = Path(f"{OUTPUT_PATH.stem}/{image_id}_comparison_{adjusted_upsample_factor}x.gif")

result_pil.save(
    fp=str(gif_image_path),
    format="GIF",
    append_images=[bicubic_pil],
    save_all=True,
    duration=1000,
    loop=0,
)

# The `DisplayImage(str(gif_image_path))` function does not work in Colab.
DisplayImage(data=open(gif_image_path, "rb").read(), width=1920 // 2)
(1080, 1920, 3)
(1080, 1920, 3)
../_images/202-vision-superresolution-image-with-output_22_1.png

Create a Video with Sliding Bicubic/Superresolution Comparison

This may take a while. For the video, the superresolution and bicubic image are resized by a factor of 2 to improve processing speed. This gives an indication of the superresolution effect. The video is saved as an .avi file. You can click on the link to download the video, or open it directly from the images directory, and play it locally.

FOURCC = cv2.VideoWriter_fourcc(*"MJPG")

result_video_path = Path(
    f"{OUTPUT_PATH.stem}/{image_id}_crop_comparison_{adjusted_upsample_factor}x.avi"
)
video_target_height, video_target_width = (
    result_image.shape[0] // 2,
    result_image.shape[1] // 2,
)

out_video = cv2.VideoWriter(
    filename=str(result_video_path),
    fourcc=FOURCC,
    fps=90,
    frameSize=(video_target_width, video_target_height),
)

resized_result_image = cv2.resize(src=result_image, dsize=(video_target_width, video_target_height))
resized_bicubic_image = cv2.resize(
    src=bicubic_image, dsize=(video_target_width, video_target_height)
)

progress_bar = ProgressBar(total=video_target_width)
progress_bar.display()

for i in range(video_target_width):
    # Create a frame where the left part (until i pixels width) contains the
    # superresolution image, and the right part (from i pixels width) contains
    # the bicubic image.
    comparison_frame = np.hstack(
        (
            resized_result_image[:, :i, :],
            resized_bicubic_image[:, i:, :],
        )
    )
    # Create a small black border line between the superresolution
    # and bicubic part of the image.
    comparison_frame[:, i - 1 : i + 1, :] = 0
    out_video.write(image=comparison_frame)
    progress_bar.progress = i
    progress_bar.update()
out_video.release()
clear_output()

video_link = FileLink(result_video_path)
video_link.html_link_str = "<a href='%s' download>%s</a>"
display(HTML(f"The video has been saved to {video_link._repr_html_()}"))
The video has been saved to output/flag_crop_comparison_2x.avi

Superresolution on full input image

Superresolution on the full image is done by dividing the image into patches of equal size, doing superresolution on each path, and then stitching the resulting patches together again. For this demo, patches near the border of the image are ignored.

Adjust the CROPLINES setting in the next cell if you see boundary effects.

Compute patches

# Set the number of lines to crop from the network result to prevent
# boundary effects. The value of `CROPLINES` should be an integer >= 1.
CROPLINES = 10
# See Superresolution on one crop of the image for description of `CROP_FACTOR`.
CROP_FACTOR = 2

full_image_height, full_image_width = full_image.shape[:2]

# Compute x and y coordinates of left top of image tiles.
x_coords = list(range(0, full_image_width, input_width * CROP_FACTOR - CROPLINES * 2))
while full_image_width - x_coords[-1] < input_width * CROP_FACTOR:
    x_coords.pop(-1)
y_coords = list(range(0, full_image_height, input_height * CROP_FACTOR - CROPLINES * 2))
while full_image_height - y_coords[-1] < input_height * CROP_FACTOR:
    y_coords.pop(-1)

# Compute the width and height to crop the full image. The full image is
# cropped at the border to tiles of the input size.
crop_width = x_coords[-1] + input_width * CROP_FACTOR
crop_height = y_coords[-1] + input_height * CROP_FACTOR

# Compute the width and height of the target superresolution image.
new_width = (
    x_coords[-1] * (upsample_factor // CROP_FACTOR)
    + target_width
    - CROPLINES * 2 * (upsample_factor // CROP_FACTOR)
)
new_height = (
    y_coords[-1] * (upsample_factor // CROP_FACTOR)
    + target_height
    - CROPLINES * 2 * (upsample_factor // CROP_FACTOR)
)
print(f"The output image will have a width of {new_width} " f"and a height of {new_height}")
The output image will have a width of 11280 and a height of 7280

Do Inference

The code below reads one patch of the image at a time. Each patch is reshaped to the network input shape and upsampled with bicubic interpolation to the target shape. Both the original and the bicubic images are propagated through the network. The network result is a numpy array with floating point values, with a shape of (1,3,1920,1080). This array is converted to an 8-bit image with the (1080,1920,3) shape and written to a full_superresolution_image. The bicubic image is written to a full_bicubic_image for comparison. A progress bar shows the progress of the process. Inference time is measured, as well as total time to process each patch.

start_time = time.perf_counter()
patch_nr = 0
num_patches = len(x_coords) * len(y_coords)
progress_bar = ProgressBar(total=num_patches)
progress_bar.display()

# Crop image to fit tiles of the input size.
full_image_crop = full_image.copy()[:crop_height, :crop_width, :]

# Create an empty array of the target size.
full_superresolution_image = np.empty((new_height, new_width, 3), dtype=np.uint8)

# Create a bicubic upsampled image of the target size for comparison.
full_bicubic_image = cv2.resize(
    src=full_image_crop[CROPLINES:-CROPLINES, CROPLINES:-CROPLINES, :],
    dsize=(new_width, new_height),
    interpolation=cv2.INTER_CUBIC,
)

total_inference_duration = 0
for y in y_coords:
    for x in x_coords:
        patch_nr += 1

        # Crop the input image.
        image_crop = full_image_crop[
            y : y + input_height * CROP_FACTOR,
            x : x + input_width * CROP_FACTOR,
        ]

        # Resize the images to the target shape with bicubic interpolation
        bicubic_image = cv2.resize(
            src=image_crop,
            dsize=(target_width, target_height),
            interpolation=cv2.INTER_CUBIC,
        )

        if CROP_FACTOR > 1:
            image_crop = cv2.resize(src=image_crop, dsize=(input_width, input_height))

        input_image_original = np.expand_dims(image_crop.transpose(2, 0, 1), axis=0)

        input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0)

        # Do inference.
        inference_start_time = time.perf_counter()

        result = compiled_model(
            {
                original_image_key.any_name: input_image_original,
                bicubic_image_key.any_name: input_image_bicubic,
            }
        )[output_key]

        inference_stop_time = time.perf_counter()
        inference_duration = inference_stop_time - inference_start_time
        total_inference_duration += inference_duration

        # Reshape an inference result to the image shape and the data type.
        result_image = convert_result_to_image(result)

        # Add the inference result of this patch to the full superresolution
        # image.
        adjusted_upsample_factor = upsample_factor // CROP_FACTOR
        new_y = y * adjusted_upsample_factor
        new_x = x * adjusted_upsample_factor
        full_superresolution_image[
            new_y : new_y + target_height - CROPLINES * adjusted_upsample_factor * 2,
            new_x : new_x + target_width - CROPLINES * adjusted_upsample_factor * 2,
        ] = result_image[
            CROPLINES * adjusted_upsample_factor : -CROPLINES * adjusted_upsample_factor,
            CROPLINES * adjusted_upsample_factor : -CROPLINES * adjusted_upsample_factor,
            :,
        ]

        progress_bar.progress = patch_nr
        progress_bar.update()

        if patch_nr % 10 == 0:
            clear_output(wait=True)
            progress_bar.display()
            display(
                Pretty(
                    f"Processed patch {patch_nr}/{num_patches}. "
                    f"Inference time: {inference_duration:.2f} seconds "
                    f"({1/inference_duration:.2f} FPS)"
                )
            )

end_time = time.perf_counter()
duration = end_time - start_time
clear_output(wait=True)
print(
    f"Processed {num_patches} patches in {duration:.2f} seconds. "
    f"Total patches per second (including processing): "
    f"{num_patches/duration:.2f}.\nInference patches per second: "
    f"{num_patches/total_inference_duration:.2f} "
)
Processed 42 patches in 4.64 seconds. Total patches per second (including processing): 9.04.
Inference patches per second: 20.52

Save superresolution image and the bicubic image

full_superresolution_image_path = Path(
    f"{OUTPUT_PATH.stem}/full_superres_{adjusted_upsample_factor}x.jpg"
)
full_bicubic_image_path = Path(f"{OUTPUT_PATH.stem}/full_bicubic_{adjusted_upsample_factor}x.jpg")

cv2.imwrite(str(full_superresolution_image_path), full_superresolution_image)
cv2.imwrite(str(full_bicubic_image_path), full_bicubic_image);
bicubic_link = FileLink(full_bicubic_image_path)
image_link = FileLink(full_superresolution_image_path)
bicubic_link.html_link_str = "<a href='%s' download>%s</a>"
image_link.html_link_str = "<a href='%s' download>%s</a>"
display(
    HTML(
        "The images are saved in the images directory. You can also download "
        "them by clicking on these links:"
        f"<ul><li>{image_link._repr_html_()}<li>{bicubic_link._repr_html_()}"
    )
)
The images are saved in the images directory. You can also download them by clicking on these links:
  • output/full_bicubic_2x.jpg