Spaces:
Sleeping
Sleeping
File size: 4,870 Bytes
e33a4ef 2e7653f e33a4ef 2e7653f e33a4ef 2e7653f e33a4ef 2e7653f e33a4ef 2e7653f da33dfa 2e7653f da33dfa 2e7653f da33dfa 2e7653f da33dfa 2e7653f da33dfa 2e7653f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import cv2
import numpy as np
import importlib.util
import gradio as gr
from PIL import Image
# Load the TensorFlow Lite model
MODEL_DIR = 'model_3'
GRAPH_NAME = 'detect.tflite'
LABELMAP_NAME = 'labelmap.txt'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
from tensorflow.lite.python.interpreter import load_delegate
PATH_TO_CKPT = os.path.join(MODEL_DIR, GRAPH_NAME)
PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
del(labels[0])
# Load the TensorFlow Lite model
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
outname = output_details[0]['name']
if ('StatefulPartitionedCall' in outname):
boxes_idx, classes_idx, scores_idx = 1, 3, 0
else:
boxes_idx, classes_idx, scores_idx = 0, 1, 2
def perform_detection(image, interpreter, labels):
imH, imW, _ = image.shape
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0]
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0]
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0]
detections = []
for i in range(len(scores)):
if ((scores[i] > 0.5) and (scores[i] <= 1.0)):
ymin = int(max(1, (boxes[i][0] * imH)))
xmin = int(max(1, (boxes[i][1] * imW)))
ymax = int(min(imH, (boxes[i][2] * imH)))
xmax = int(min(imW, (boxes[i][3] * imW)))
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])]
label = '%s: %d%%' % (object_name, int(scores[i] * 100))
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
label_ymin = max(ymin, labelSize[1] + 10)
cv2.rectangle(image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED)
cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
detections.append([object_name, scores[i], xmin, ymin, xmax, ymax])
return image
def resize_image(image, size=640):
return cv2.resize(image, (size, size))
def detect_image(input_image):
image = np.array(input_image)
resized_image = resize_image(image, size=640) # Resize input image
result_image = perform_detection(resized_image, interpreter, labels)
return Image.fromarray(result_image)
def detect_video(input_video):
cap = cv2.VideoCapture(input_video)
frames = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
resized_frame = resize_image(frame, size=640) # Resize each frame
result_frame = perform_detection(resized_frame, interpreter, labels)
frames.append(result_frame)
cap.release()
if not frames:
raise ValueError("No frames were read from the video.")
height, width, layers = frames[0].shape
size = (width, height)
output_video_path = "result_" + os.path.basename(input_video)
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 15, size)
for frame in frames:
out.write(frame)
out.release()
return output_video_path
app = gr.Blocks()
with app:
with gr.Tab("Image Detection"):
gr.Markdown("Upload an image for object detection")
image_input = gr.Image(type="pil", label="Upload an image")
image_output = gr.Image(type="pil", label="Detection Result")
gr.Button("Submit").click(fn=detect_image, inputs=image_input, outputs=image_output)
with gr.Tab("Video Detection"):
gr.Markdown("Upload a video for object detection")
video_input = gr.Video(label="Upload a video")
video_output = gr.Video(label="Detection Result")
gr.Button("Submit").click(fn=detect_video, inputs=video_input, outputs=video_output)
app.launch() |