TensorFlow Serving API#

Python Client#

When creating a Python-based client application, there are two packages on PyPi that can be used with OpenVINO Model Server:

Install the Package#

pip3 install ovmsclient
pip3 install tensorflow-serving-api

Request Model Status#

from ovmsclient import make_grpc_client

client = make_grpc_client("localhost:9000")
status = client.get_model_status(model_name="my_model")
from ovmsclient import make_http_client

client = make_http_client("localhost:8000")
status = client.get_model_status(model_name="my_model")
import grpc
from tensorflow_serving.apis import model_service_pb2_grpc, get_model_status_pb2
from tensorflow_serving.apis.get_model_status_pb2 import ModelVersionStatus

channel = grpc.insecure_channel("localhost:9000")
model_service_stub = model_service_pb2_grpc.ModelServiceStub(channel)

status_request = get_model_status_pb2.GetModelStatusRequest()
status_request.model_spec.name = "my_model"
status_response = model_service_stub.GetModelStatus(status_request, 10.0)
        
model_status = {}
model_version_status = status_response.model_version_status
for model_version in model_version_status:
    model_status[model_version.version] = dict([
        ('state', ModelVersionStatus.State.Name(model_version.state)),
        ('error_code', model_version.status.error_code),
        ('error_message', model_version.status.error_message),
    ])
curl http://localhost:8000/v1/models/my_model

Request Model Metadata#

from ovmsclient import make_grpc_client

client = make_grpc_client("localhost:9000")
model_metadata = client.get_model_metadata(model_name="my_model")
from ovmsclient import make_http_client

client = make_http_client("localhost:8000")
model_metadata = client.get_model_metadata(model_name="my_model")
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc, get_model_metadata_pb2
from tensorflow.core.framework.types_pb2 import DataType

channel = grpc.insecure_channel("localhost:9000")
prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

metadata_request = get_model_metadata_pb2.GetModelMetadataRequest()
metadata_request.model_spec.name = "my_model"
metadata_response = prediction_service_stub.GetModelMetadata(metadata_request, 10.0)

model_metadata = {}

signature_def = metadata_response.metadata['signature_def']
signature_map = get_model_metadata_pb2.SignatureDefMap()
signature_map.ParseFromString(signature_def.value)
model_signature = signature_map.ListFields()[0][1]['serving_default']

inputs_metadata = {}
for input_name, input_info in model_signature.inputs.items():
    input_shape = [d.size for d in input_info.tensor_shape.dim]
    inputs_metadata[input_name] = dict([
        ("shape", input_shape),
        ("dtype", DataType.Name(input_info.dtype))
    ])

outputs_metadata = {}
for output_name, output_info in model_signature.outputs.items():
    output_shape = [d.size for d in output_info.tensor_shape.dim]
    outputs_metadata[output_name] = dict([
        ("shape", output_shape),
        ("dtype", DataType.Name(output_info.dtype))
    ])

version = metadata_response.model_spec.version.value
model_metadata = dict([
    ("model_version", version),
    ("inputs", inputs_metadata),
    ("outputs", outputs_metadata)
])
curl http://localhost:8000/v1/models/my_model/metadata

Request Prediction on a Binary Input#

from ovmsclient import make_grpc_client

client = make_grpc_client("localhost:9000")
with open("img.jpeg", "rb") as f:
    data = f.read()
inputs = {"input_name": data}    
results = client.predict(inputs=inputs, model_name="my_model")
from ovmsclient import make_http_client

client = make_http_client("localhost:8000")

with open("img.jpeg", "rb") as f:
    data = f.read()
inputs = {"input_name": data}    
results = client.predict(inputs=inputs, model_name="my_model")
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc, predict_pb2
from tensorflow import make_tensor_proto, make_ndarray

channel = grpc.insecure_channel("localhost:9000")
prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

with open("img.jpeg", "rb") as f:
    data = f.read()
predict_request = predict_pb2.PredictRequest()
predict_request.model_spec.name = "my_model"
predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data))
predict_response = prediction_service_stub.Predict(predict_request, 10.0)
results = make_ndarray(predict_response.outputs["output_name"])
curl -X POST http://localhost:8000/v1/models/my_model:predict
-H 'Content-Type: application/json'
-d '{"instances": [{"input_name": {"b64":"YXdlc29tZSBpbWFnZSBieXRlcw=="}}]}'

Request Prediction on a Numpy Array#

import numpy as np
from ovmsclient import make_grpc_client

client = make_grpc_client("localhost:9000")
data = np.array([1.0, 2.0, ..., 1000.0])
inputs = {"input_name": data}
results = client.predict(inputs=inputs, model_name="my_model")
import numpy as np
from ovmsclient import make_http_client

client = make_http_client("localhost:8000")

data = np.array([1.0, 2.0, ..., 1000.0])
inputs = {"input_name": data}
results = client.predict(inputs=inputs, model_name="my_model")
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc, predict_pb2
from tensorflow import make_tensor_proto

channel = grpc.insecure_channel("localhost:9000")
prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

data = np.array([1.0, 2.0, ..., 1000.0])
predict_request = predict_pb2.PredictRequest()
predict_request.model_spec.name = "my_model"
predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data))
predict_response = prediction_service_stub.Predict(predict_request, 10.0)
results = make_ndarray(predict_response.outputs["output_name"])
curl -X POST http://localhost:8000/v1/models/my_model:predict
-H 'Content-Type: application/json'
-d '{"instances": [{"input_name": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]}'

Request Prediction on a string#

from ovmsclient import make_grpc_client

client = make_grpc_client("localhost:9000")
data = ["<string>"]
inputs = {"input_name": data}
results = client.predict(inputs=inputs, model_name="my_model")
from ovmsclient import make_http_client

client = make_http_client("localhost:8000")

data = ["<string>"]
inputs = {"input_name": data}
results = client.predict(inputs=inputs, model_name="my_model")
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc, predict_pb2
from tensorflow import make_tensor_proto

channel = grpc.insecure_channel("localhost:9000")
prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

data = ["<string>"]
predict_request = predict_pb2.PredictRequest()
predict_request.model_spec.name = "my_model"
predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data))
predict_response = prediction_service_stub.Predict(predict_request, 1)
results = predict_response.outputs["output_name"]
curl -X POST http://localhost:8000/v1/models/my_model:predict
-H 'Content-Type: application/json'
-d '{"instances": [{"input_name": "<string>"}]}'

For complete usage examples see ovmsclient samples.

C++ and Go Clients#

Creating a client application in C++ or Go follows the same principles as Python, but using them adds some complexity. There is no package or library available for them with convenient functions to interact with OpenVINO Model Server.

To successfully set up communication with the model server, you need to implement the logic to communicate with endpoints specified in the API. For gRPC, download and compile protos, then link and use them in your application according to the gRPC API specification. For REST, prepare your data and pack it into the appropriate JSON structure according to the REST API specification.

See our C++ demo or Go demo to learn how to build a sample C++ and Go-based client application in a Docker container and get predictions via the gRPC API.