Spaces:

RakanAlsheraiwi
/

ObjectDetection

Running

App Files Files Community

ObjectDetection / app.py

RakanAlsheraiwi

Create app.py

eee0e91 verified 12 days ago

raw

history blame

4.21 kB

	import cv2
	import torch
	from PIL import Image, ImageDraw
	import gradio as gr
	import numpy as np
	import pandas as pd
	from transformers import pipeline

	# Load the YOLOv5 model
	model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

	# Load the translation model
	translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")

	# Define a function to detect objects and draw bounding boxes for images
	def detect_and_draw_image(input_image):
	results = model(input_image)
	detections = results.xyxy[0].numpy()

	draw = ImageDraw.Draw(input_image)

	counts = {}
	for detection in detections:
	xmin, ymin, xmax, ymax, conf, class_id = detection

	# Update counts for each label
	label = model.names[int(class_id)]
	counts[label] = counts.get(label, 0) + 1

	# Draw the bounding box
	draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
	draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")

	# Translate counts to Arabic
	translated_counts = translator(list(counts.keys()))

	df = pd.DataFrame({
	'label (English)': list(counts.keys()),
	'label (Arabic)': [t['translation_text'] for t in translated_counts],
	'counts': list(counts.values())
	})

	return input_image, df

	# Define a function to detect objects and draw bounding boxes for videos
	def detect_and_draw_video(video_path):
	cap = cv2.VideoCapture(video_path)
	frames = []
	frame_shape = None
	overall_counts = {}
	detected_objects = set() # Set to keep track of unique detections

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame = cv2.resize(frame, (640, 480))

	results = model(frame)
	detections = results.xyxy[0].numpy()

	for detection in detections:
	xmin, ymin, xmax, ymax, conf, class_id = detection

	# Create a unique identifier for the object based on its bounding box
	identifier = (model.names[int(class_id)], int((xmin + xmax) / 2), int((ymin + ymax) / 2))

	# Count the object only if it hasn't been detected before
	if identifier not in detected_objects:
	detected_objects.add(identifier)
	label = model.names[int(class_id)]
	overall_counts[label] = overall_counts.get(label, 0) + 1

	cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
	cv2.putText(frame, f"{model.names[int(class_id)]}: {conf:.2f}", (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

	frames.append(frame)

	cap.release()

	if frame_shape is None:
	return None, None

	output_path = 'output.mp4'
	out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))

	for frame in frames:
	out.write(frame)
	out.release()

	# Translate counts to Arabic
	translated_counts = translator(list(overall_counts.keys()))

	df = pd.DataFrame({
	'label (English)': list(overall_counts.keys()),
	'label (Arabic)': [t['translation_text'] for t in translated_counts],
	'counts': list(overall_counts.values())
	})

	return output_path, df

	# Create separate interfaces for images and videos
	image_interface = gr.Interface(
	fn=detect_and_draw_image,
	inputs=gr.Image(type="pil", label="Upload Image"),
	outputs=[gr.Image(type="pil"), gr.Dataframe(label="Object Counts")],
	title="Object Detection for Images",
	description="Upload an image to see the objects detected by YOLOv5 with bounding boxes and their counts."
	)

	video_interface = gr.Interface(
	fn=detect_and_draw_video,
	inputs=gr.Video(label="Upload Video"),
	outputs=[gr.Video(label="Processed Video"), gr.Dataframe(label="Object Counts")],
	title="Object Detection for Videos",
	description="Upload a video to see the objects detected by YOLOv5 with bounding boxes and their counts."
	)

	# Combine interfaces into a single app
	app = gr.TabbedInterface([image_interface, video_interface], ["Image Detection", "Video Detection"])

	# Launch the app
	app.launch(debug=True)