import threading import streamlit as st import cv2 import numpy as np from transformers import pipeline from PIL import Image, ImageDraw from mtcnn import MTCNN from streamlit_webrtc import webrtc_streamer import logging # Suppress transformers progress bars logging.getLogger("transformers").setLevel(logging.ERROR) lock = threading.Lock() img_container = {"webcam": None, "analyzed": None} # Initialize the Hugging Face pipeline for facial emotion detection emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression") # Initialize MTCNN for face detection mtcnn = MTCNN() # Function to analyze sentiment def analyze_sentiment(face): # Convert face to RGB rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) # Convert the face to a PIL image pil_image = Image.fromarray(rgb_face) # Analyze sentiment using the Hugging Face pipeline results = emotion_pipeline(pil_image) # Get the dominant emotion dominant_emotion = max(results, key=lambda x: x['score'])['label'] return dominant_emotion TEXT_SIZE = 3 # Function to detect faces, analyze sentiment, and draw a red box around them def detect_and_draw_faces(frame): # Detect faces using MTCNN results = mtcnn.detect_faces(frame) # Draw on the frame for result in results: x, y, w, h = result['box'] face = frame[y:y+h, x:x+w] sentiment = analyze_sentiment(face) cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 10) # Thicker red box # Calculate position for the text background and the text itself text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0] text_x = x text_y = y - 10 background_tl = (text_x, text_y - text_size[1]) background_br = (text_x + text_size[0], text_y + 5) # Draw black rectangle as background cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED) # Draw white text on top cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2) return frame # Streamlit UI st.markdown( """ """, unsafe_allow_html=True ) st.title("Computer Vision Test Lab") st.subheader("Facial Sentiment") # Columns for input and output streams col1, col2 = st.columns(2) with col1: st.header("Input Stream") st.subheader("Webcam") video_placeholder = st.empty() with col2: st.header("Output Stream") st.subheader("Analysis") output_placeholder = st.empty() sentiment_placeholder = st.empty() def video_frame_callback(frame): try: with lock: img = frame.to_ndarray(format="bgr24") img_container["webcam"] = img frame_with_boxes = detect_and_draw_faces(img) img_container["analyzed"] = frame_with_boxes except Exception as e: st.error(f"Error processing frame: {e}") return frame ctx = webrtc_streamer(key="webcam", video_frame_callback=video_frame_callback) while ctx.state.playing: with lock: print(img_container) img = img_container["webcam"] frame_with_boxes = img_container["analyzed"] if img is None: continue video_placeholder.image(img, channels="BGR") output_placeholder.image(frame_with_boxes, channels="BGR")