feat: migrate from face detection to HOG person detection

2026-01-21 11:39:32 +01:00
parent cae56c40cc
commit b68fa2614e
7 changed files with 158 additions and 1987 deletions
--- a/person_detector.py
+++ b/person_detector.py
@@ -0,0 +1,129 @@
+"""
+Person Detection Module using OpenCV HOG (Histogram of Oriented Gradients).
+Uses built-in OpenCV people detector - no external model files required.
+"""
+
+import cv2
+import numpy as np
+
+
+class PersonDetector:
+    def __init__(self, model_dir=None, confidence_threshold=0.6):
+        """
+        Initialize the person detector with HOG descriptor.
+        
+        Args:
+            model_dir: Ignored for HOG (kept for API compatibility)
+            confidence_threshold: Threshold for detection weights
+        """
+        self.confidence_threshold = confidence_threshold
+        
+        # Initialize HOG descriptor/person detector
+        self.hog = cv2.HOGDescriptor()
+        self.hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+        print("Initialized HOG Person Detector")
+    
+    def detect_people(self, frame):
+        """
+        Detect people in a frame using HOG.
+        
+        Args:
+            frame: BGR image frame from OpenCV
+            
+        Returns:
+            List of tuples (x, y, w, h, confidence) for each detected person
+        """
+        # Resize for faster processing (optional, but HOG is computationally expensive)
+        # Using a slightly smaller scale can speed things up significantly
+        scale = 1.0
+        if frame.shape[1] > 640:
+            scale = 640 / frame.shape[1]
+            frame_small = cv2.resize(frame, None, fx=scale, fy=scale)
+        else:
+            frame_small = frame
+            
+        # Detect people
+        # winStride: step size in x and y
+        # padding: padding around the input
+        # scale: coefficient of the detection window increase
+        (rects, weights) = self.hog.detectMultiScale(
+            frame_small, 
+            winStride=(4, 4),
+            padding=(8, 8),
+            scale=1.05,
+            hitThreshold=0.0  # Default
+        )
+        
+        people = []
+        
+        # Convert detected rectangles to our format
+        for i, (x, y, w, h) in enumerate(rects):
+            confidence = weights[i]
+            
+            # HOG returns confidence scores, usually > 0.
+            # We can filter if needed.
+            check_conf = float(confidence) if isinstance(confidence, (float, np.float32, np.float64)) else float(confidence[0])
+            
+            if check_conf > self.confidence_threshold:
+                # Scale back up if we resized
+                if scale != 1.0:
+                    x = int(x / scale)
+                    y = int(y / scale)
+                    w = int(w / scale)
+                    h = int(h / scale)
+                
+                # Size filtering
+                # Ignore detections that are too small (noise) or too large (walls/windows)
+                # Assumes 640x480 or similar resolution
+                if w < 40 or w > 400 or h < 80 or h > 480:
+                    continue
+                
+                # Ensure coordinates are within frame bounds (simple clamp)
+                x = max(0, x)
+                y = max(0, y)
+                
+                people.append((x, y, w, h, check_conf))
+        
+        return people
+    
+    detect_faces = detect_people  # Alias for compatibility
+    
+    def draw_people(self, frame, people, color=(0, 255, 0), thickness=2):
+        """
+        Draw bounding boxes around detected people.
+        """
+        result_frame = frame.copy()
+        for (x, y, w, h, confidence) in people:
+            cv2.rectangle(result_frame, (x, y), (x + w, y + h), color, thickness)
+            
+            # Draw label
+            label = f"Person: {confidence:.2f}"
+            
+            # Get label size
+            (label_w, label_h), baseline = cv2.getTextSize(
+                label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
+            )
+            
+            # Draw background rectangle for label
+            cv2.rectangle(
+                result_frame, 
+                (x, y - label_h - 10), 
+                (x + label_w, y), 
+                color, 
+                -1
+            )
+            
+            # Draw text
+            cv2.putText(
+                result_frame, 
+                label, 
+                (x, y - 5),
+                cv2.FONT_HERSHEY_SIMPLEX, 
+                0.5, 
+                (0, 0, 0), 
+                1
+            )
+            
+        return result_frame
+    
+    draw_faces = draw_people  # Alias for compatibility