Spaces:

brxerq
/

ShelvesDetection

Sleeping

App Files Files Community

brxerq commited on May 18

Commit

9728877

•

1 Parent(s): 2230f78

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -59

app.py CHANGED Viewed

@@ -1,32 +1,34 @@
-import streamlit as st
 import os
-import numpy as np
 import cv2
 from PIL import Image
-import tempfile
-# TensorFlow imports
-from tensorflow.lite.python.interpreter import Interpreter
-if use_TPU:
-    from tensorflow.lite.python.interpreter import load_delegate
-# Setup the model and labels
-MODEL_NAME = 'model'
 GRAPH_NAME = 'detect.tflite'
 LABELMAP_NAME = 'labelmap.txt'
-min_conf_threshold = 0.5
-use_TPU = False  # Change this based on your needs
-PATH_TO_CKPT = os.path.join('model', GRAPH_NAME)
-PATH_TO_LABELS = os.path.join('model', LABELMAP_NAME)
-# Load labels
 with open(PATH_TO_LABELS, 'r') as f:
     labels = [line.strip() for line in f.readlines()]
 if labels[0] == '???':
     del(labels[0])
-# Load model
 interpreter = Interpreter(model_path=PATH_TO_CKPT)
 interpreter.allocate_tensors()
@@ -34,61 +36,103 @@ input_details = interpreter.get_input_details()
 output_details = interpreter.get_output_details()
 height = input_details[0]['shape'][1]
 width = input_details[0]['shape'][2]
-# Streamlit interface
-st.title('Object Detection System')
-st.sidebar.title('Settings')
-uploaded_file = st.sidebar.file_uploader("Choose an image or video file", type=['jpg', 'png', 'jpeg', 'mp4'])
-def detect_objects(image):
-    # Prepare image for detection
     image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     image_resized = cv2.resize(image_rgb, (width, height))
     input_data = np.expand_dims(image_resized, axis=0)
-    input_data = (np.float32(input_data) - 127.5) / 127.5  # Normalize
-    # Perform detection
     interpreter.set_tensor(input_details[0]['index'], input_data)
     interpreter.invoke()
-    # Retrieve detection results
-    boxes = interpreter.get_tensor(output_details[0]['index'])[0]  # Bounding box coordinates of detected objects
-    classes = interpreter.get_tensor(output_details[1]['index'])[0]  # Class index of detected objects
-    scores = interpreter.get_tensor(output_details[2]['index'])[0]  # Confidence of detected objects
     for i in range(len(scores)):
-        if scores[i] > min_conf_threshold and scores[i] <= 1.0:
-            # Draw bounding boxes and labels on the image
-            ymin, xmin, ymax, xmax = boxes[i]
-            (left, right, top, bottom) = (xmin * imW, xmax * imW, ymin * imH, ymax * imH)
-            cv2.rectangle(image, (int(left), int(top)), (int(right), int(bottom)), (10, 255, 0), 4)
             object_name = labels[int(classes[i])]
-            label = '%s: %d%%' % (object_name, int(scores[i]*100))
-            cv2.putText(image, label, (int(left), int(top)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
     return image
-if uploaded_file is not None:
-    file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
-    if uploaded_file.type == "video/mp4":
-        # Handle video upload
-        tfile = tempfile.NamedTemporaryFile(delete=False)
-        tfile.write(uploaded_file.read())
-        cap = cv2.VideoCapture(tfile.name)
-        stframe = st.empty()
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame = detect_objects(frame)
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-            stframe.image(frame)
-    else:
-        # Handle image upload
-        image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
-        image = detect_objects(image)
-        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-        st.image(image, use_column_width=True)

 import os
 import cv2
+import numpy as np
+import importlib.util
+import gradio as gr
 from PIL import Image
+# Load the TensorFlow Lite model
+MODEL_DIR = 'model'
 GRAPH_NAME = 'detect.tflite'
 LABELMAP_NAME = 'labelmap.txt'
+pkg = importlib.util.find_spec('tflite_runtime')
+if pkg:
+    from tflite_runtime.interpreter import Interpreter
+    from tflite_runtime.interpreter import load_delegate
+else:
+    from tensorflow.lite.python.interpreter import Interpreter
+    from tensorflow.lite.python.interpreter import load_delegate
+PATH_TO_CKPT = os.path.join(MODEL_DIR, GRAPH_NAME)
+PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)
+# Load the label map
 with open(PATH_TO_LABELS, 'r') as f:
     labels = [line.strip() for line in f.readlines()]
 if labels[0] == '???':
     del(labels[0])
+# Load the TensorFlow Lite model
 interpreter = Interpreter(model_path=PATH_TO_CKPT)
 interpreter.allocate_tensors()
 output_details = interpreter.get_output_details()
 height = input_details[0]['shape'][1]
 width = input_details[0]['shape'][2]
+floating_model = (input_details[0]['dtype'] == np.float32)
+input_mean = 127.5
+input_std = 127.5
+outname = output_details[0]['name']
+if ('StatefulPartitionedCall' in outname):
+    boxes_idx, classes_idx, scores_idx = 1, 3, 0
+else:
+    boxes_idx, classes_idx, scores_idx = 0, 1, 2
+def perform_detection(image, interpreter, labels):
+    imH, imW, _ = image.shape
     image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     image_resized = cv2.resize(image_rgb, (width, height))
     input_data = np.expand_dims(image_resized, axis=0)
+    if floating_model:
+        input_data = (np.float32(input_data) - input_mean) / input_std
     interpreter.set_tensor(input_details[0]['index'], input_data)
     interpreter.invoke()
+    boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0]
+    classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0]
+    scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0]
+    detections = []
     for i in range(len(scores)):
+        if ((scores[i] > 0.5) and (scores[i] <= 1.0)):
+            ymin = int(max(1, (boxes[i][0] * imH)))
+            xmin = int(max(1, (boxes[i][1] * imW)))
+            ymax = int(min(imH, (boxes[i][2] * imH)))
+            xmax = int(min(imW, (boxes[i][3] * imW)))
+            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
             object_name = labels[int(classes[i])]
+            label = '%s: %d%%' % (object_name, int(scores[i] * 100))
+            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
+            label_ymin = max(ymin, labelSize[1] + 10)
+            cv2.rectangle(image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED)
+            cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
+            detections.append([object_name, scores[i], xmin, ymin, xmax, ymax])
     return image
+def detect_image(input_image):
+    image = np.array(input_image)
+    result_image = perform_detection(image, interpreter, labels)
+    return Image.fromarray(result_image)
+def detect_video(input_video):
+    cap = cv2.VideoCapture(input_video.name)
+    frames = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        result_frame = perform_detection(frame, interpreter, labels)
+        frames.append(result_frame)
+    cap.release()
+    height, width, layers = frames[0].shape
+    size = (width, height)
+    output_video_path = "result_" + input_video.name
+    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
+    for frame in frames:
+        out.write(frame)
+    out.release()
+    return output_video_path
+image_input = gr.inputs.Image(type="pil", label="Upload an image")
+image_output = gr.outputs.Image(type="pil", label="Detection Result")
+video_input = gr.inputs.Video(type="file", label="Upload a video")
+video_output = gr.outputs.Video(label="Detection Result")
+app = gr.Interface(
+    fn=detect_image,
+    inputs=image_input,
+    outputs=image_output,
+    live=True,
+    description="Object Detection on Images"
+)
+app_video = gr.Interface(
+    fn=detect_video,
+    inputs=video_input,
+    outputs=video_output,
+    live=True,
+    description="Object Detection on Videos"
+)
+gr.TabbedInterface([app, app_video], ["Image Detection", "Video Detection"]).launch()