File size: 4,870 Bytes
683ca2f
 
 
 
 
8423797
683ca2f
8423797
683ca2f
 
 
 
 
 
 
8423797
683ca2f
 
8423797
683ca2f
 
 
 
8423797
683ca2f
 
 
 
 
 
8423797
683ca2f
 
 
 
 
 
 
 
 
8423797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683ca2f
 
8423797
 
683ca2f
 
 
 
 
 
 
 
 
 
 
8423797
 
683ca2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8423797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import cv2
import numpy as np
import importlib.util
import gradio as gr
from PIL import Image

# Load the TensorFlow Lite model
MODEL_DIR = 'model_2'
GRAPH_NAME = 'detect.tflite'
LABELMAP_NAME = 'labelmap.txt'

pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
    from tflite_runtime.interpreter import Interpreter
    from tflite_runtime.interpreter import load_delegate
else:
    from tensorflow.lite.python.interpreter import Interpreter
    from tensorflow.lite.python.interpreter import load_delegate

PATH_TO_CKPT = os.path.join(MODEL_DIR, GRAPH_NAME)
PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)

# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
    labels = [line.strip() for line in f.readlines()]

if labels[0] == '???':
    del(labels[0])

# Load the TensorFlow Lite model
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)

input_mean = 127.5
input_std = 127.5

outname = output_details[0]['name']
if ('StatefulPartitionedCall' in outname):
    boxes_idx, classes_idx, scores_idx = 1, 3, 0
else:
    boxes_idx, classes_idx, scores_idx = 0, 1, 2

def perform_detection(image, interpreter, labels):
    imH, imW, _ = image.shape
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_resized = cv2.resize(image_rgb, (width, height))
    input_data = np.expand_dims(image_resized, axis=0)

    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0]
    classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0]
    scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0]

    detections = []
    for i in range(len(scores)):
        if ((scores[i] > 0.5) and (scores[i] <= 1.0)):
            ymin = int(max(1, (boxes[i][0] * imH)))
            xmin = int(max(1, (boxes[i][1] * imW)))
            ymax = int(min(imH, (boxes[i][2] * imH)))
            xmax = int(min(imW, (boxes[i][3] * imW)))
            
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
            object_name = labels[int(classes[i])]
            label = '%s: %d%%' % (object_name, int(scores[i] * 100))
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
            label_ymin = max(ymin, labelSize[1] + 10)
            cv2.rectangle(image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED)
            cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

            detections.append([object_name, scores[i], xmin, ymin, xmax, ymax])
    return image

def resize_image(image, size=640):
    return cv2.resize(image, (size, size))

def detect_image(input_image):
    image = np.array(input_image)
    resized_image = resize_image(image, size=640)  # Resize input image
    result_image = perform_detection(resized_image, interpreter, labels)
    return Image.fromarray(result_image)

def detect_video(input_video):
    cap = cv2.VideoCapture(input_video)
    frames = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        resized_frame = resize_image(frame, size=640)  # Resize each frame
        result_frame = perform_detection(resized_frame, interpreter, labels)
        frames.append(result_frame)

    cap.release()

    if not frames:
        raise ValueError("No frames were read from the video.")

    height, width, layers = frames[0].shape
    size = (width, height)
    output_video_path = "result_" + os.path.basename(input_video)
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 15, size)

    for frame in frames:
        out.write(frame)

    out.release()

    return output_video_path

app = gr.Blocks()

with app:
    with gr.Tab("Image Detection"):
        gr.Markdown("Upload an image for object detection")
        image_input = gr.Image(type="pil", label="Upload an image")
        image_output = gr.Image(type="pil", label="Detection Result")
        gr.Button("Submit").click(fn=detect_image, inputs=image_input, outputs=image_output)
    
    with gr.Tab("Video Detection"):
        gr.Markdown("Upload a video for object detection")
        video_input = gr.Video(label="Upload a video")
        video_output = gr.Video(label="Detection Result")
        gr.Button("Submit").click(fn=detect_video, inputs=video_input, outputs=video_output)

app.launch()