forked from kiri-art/docker-diffusers-api
-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.py
54 lines (42 loc) · 1.54 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Do not edit if deploying to Banana Serverless
# This file is boilerplate for the http server, and follows a strict interface.
# Instead, edit the init() and inference() functions in app.py
from sanic import Sanic, response
import subprocess
import app as user_src
import traceback
# We do the model load-to-GPU step on server startup
# so the model object is available globally for reuse
user_src.init()
# Create the http server app
server = Sanic("my_app")
# Healthchecks verify that the environment is correct on Banana Serverless
@server.route("/healthcheck", methods=["GET"])
def healthcheck(request):
# dependency free way to check if GPU is visible
gpu = False
out = subprocess.run("nvidia-smi", shell=True)
if out.returncode == 0: # success state on shell command
gpu = True
return response.json({"state": "healthy", "gpu": gpu})
# Inference POST handler at '/' is called for every http call from Banana
@server.route("/", methods=["POST"])
def inference(request):
try:
model_inputs = response.json.loads(request.json)
except:
model_inputs = request.json
try:
output = user_src.inference(model_inputs)
except Exception as err:
output = {
"$error": {
"code": "APP_INFERENCE_ERROR",
"name": type(err).__name__,
"message": str(err),
"stack": traceback.format_exc(),
}
}
return response.json(output)
if __name__ == "__main__":
server.run(host="0.0.0.0", port="8000", workers=1)