diff --git a/camera.py b/camera.py index 7e04b7c..2b63cc9 100644 --- a/camera.py +++ b/camera.py @@ -6,7 +6,7 @@ Integrates face detection and zone tracking. import cv2 import threading import time -from face_detector import FaceDetector +from person_detector import PersonDetector from zone_tracker import ZoneTracker @@ -18,8 +18,8 @@ class Camera: Args: camera_index: Index of the USB camera (usually 0) - process_every_n_frames: Process face detection every N frames for performance - face_confidence: Confidence threshold for face detection + process_every_n_frames: Process detection every N frames for performance + face_confidence: Confidence threshold for person detection frame_width: Desired frame width frame_height: Desired frame height """ @@ -37,8 +37,8 @@ class Camera: self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height) - # Initialize face detector and zone tracker - self.face_detector = FaceDetector(confidence_threshold=face_confidence) + # Initialize person detector and zone tracker + self.person_detector = PersonDetector(confidence_threshold=face_confidence) self.zone_tracker = None # Will be initialized after first frame # Frame processing state @@ -95,7 +95,7 @@ class Camera: with self.lock: self.current_frame = frame.copy() - # Process face detection every N frames + # Process detection every N frames if self.frame_counter % self.process_every_n_frames == 0: processed_frame, counts = self._process_frame(frame) with self.lock: @@ -104,7 +104,7 @@ class Camera: def _process_frame(self, frame): """ - Process a single frame: detect faces, track zones, update counts. + Process a single frame: detect people, track zones, update counts. Args: frame: Input frame from camera @@ -112,12 +112,12 @@ class Camera: Returns: Tuple of (processed_frame, counts_dict) """ - # Detect faces - faces = self.face_detector.detect_faces(frame) + # Detect people + people = self.person_detector.detect_people(frame) # Track zones and update counts if self.zone_tracker: - counts = self.zone_tracker.process_faces(faces) + counts = self.zone_tracker.process_faces(people) else: counts = { 'total_entered': 0, @@ -131,8 +131,8 @@ class Camera: else: processed_frame = frame.copy() - # Draw faces on frame - processed_frame = self.face_detector.draw_faces(processed_frame, faces) + # Draw people on frame + processed_frame = self.person_detector.draw_people(processed_frame, people) # Draw count information on frame text_y = 60 diff --git a/download_models.py b/download_models.py index 6769fba..221abb3 100644 --- a/download_models.py +++ b/download_models.py @@ -1,77 +1,27 @@ #!/usr/bin/env python3 """ -Script to download OpenCV DNN face detection model files. -Downloads the required prototxt and caffemodel files for face detection. +Cleanup script for People Counter. +Removes unused model files since we switched to HOG detector. """ -import os -import urllib.request +import shutil from pathlib import Path -def download_file(url, destination): - """Download a file from URL to destination.""" - print(f"Downloading {os.path.basename(destination)}...") - try: - urllib.request.urlretrieve(url, destination) - print(f"✓ Successfully downloaded {os.path.basename(destination)}") - return True - except Exception as e: - print(f"✗ Error downloading {os.path.basename(destination)}: {e}") - return False - - def main(): - """Main function to download model files.""" - # Create models directory if it doesn't exist + print("Cleaning up unused model files...") + models_dir = Path("models") - models_dir.mkdir(exist_ok=True) - - # Model file URLs - prototxt_url = "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt" - model_url = "https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel" - - # Destination paths - prototxt_path = models_dir / "deploy.prototxt" - model_path = models_dir / "res10_300x300_ssd_iter_140000.caffemodel" - - print("=" * 60) - print("OpenCV DNN Face Detection Model Downloader") - print("=" * 60) - print() - - # Check if files already exist - if prototxt_path.exists(): - print(f"⚠ {prototxt_path.name} already exists. Skipping download.") + if models_dir.exists(): + try: + shutil.rmtree(models_dir) + print("✓ Removed models directory") + except Exception as e: + print(f"✗ Failed to remove models directory: {e}") else: - success = download_file(prototxt_url, prototxt_path) - if not success: - print("\nAlternative: You can manually download deploy.prototxt from:") - print("https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/deploy.prototxt") - print() - - if model_path.exists(): - print(f"⚠ {model_path.name} already exists. Skipping download.") - else: - success = download_file(model_url, model_path) - if not success: - print("\n⚠ Warning: The caffemodel file is large (~10MB) and may require manual download.") - print("Alternative download methods:") - print("1. Using wget:") - print(f" wget -O {model_path} {model_url}") - print("2. Using curl:") - print(f" curl -L -o {model_path} {model_url}") - print("3. Direct browser download:") - print(f" {model_url}") - print() - - print() - print("=" * 60) - if prototxt_path.exists() and model_path.exists(): - print("✓ All model files are ready!") - else: - print("⚠ Some files may be missing. Please check the files above.") - print("=" * 60) + print("✓ No models directory to remove") + + print("\nSystem ready for HOG-based person detection.") if __name__ == "__main__": diff --git a/face_detector.py b/face_detector.py deleted file mode 100644 index 24a11eb..0000000 --- a/face_detector.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -Face Detection Module using OpenCV DNN Face Detector -Uses pre-trained models for accurate face detection. -""" - -import cv2 -import numpy as np -import os - - -class FaceDetector: - def __init__(self, model_dir="models", confidence_threshold=0.5): - """ - Initialize the face detector with OpenCV DNN models. - - Args: - model_dir: Directory containing the model files - confidence_threshold: Minimum confidence for face detection (0.0-1.0) - """ - self.confidence_threshold = confidence_threshold - self.model_dir = model_dir - - # Paths to model files - self.prototxt_path = os.path.join(model_dir, "deploy.prototxt") - self.model_path = os.path.join(model_dir, "res10_300x300_ssd_iter_140000.caffemodel") - - # Load the DNN face detector - self.net = None - self._load_model() - - def _load_model(self): - """Load the OpenCV DNN face detection model.""" - if not os.path.exists(self.prototxt_path): - raise FileNotFoundError( - f"Model prototxt file not found: {self.prototxt_path}\n" - "Please download the model files first." - ) - if not os.path.exists(self.model_path): - raise FileNotFoundError( - f"Model weights file not found: {self.model_path}\n" - "Please download the model files first." - ) - - self.net = cv2.dnn.readNetFromCaffe(self.prototxt_path, self.model_path) - - def detect_faces(self, frame): - """ - Detect faces in a frame. - - Args: - frame: BGR image frame from OpenCV - - Returns: - List of tuples (x, y, w, h, confidence) for each detected face - where (x, y) is top-left corner, w and h are width and height - """ - if self.net is None: - return [] - - # Get frame dimensions - (h, w) = frame.shape[:2] - - # Create blob from frame (preprocessing for DNN) - blob = cv2.dnn.blobFromImage( - cv2.resize(frame, (300, 300)), - 1.0, - (300, 300), - (104.0, 177.0, 123.0) - ) - - # Pass blob through network - self.net.setInput(blob) - detections = self.net.forward() - - faces = [] - - # Process detections - for i in range(0, detections.shape[2]): - confidence = detections[0, 0, i, 2] - - # Filter weak detections - if confidence > self.confidence_threshold: - # Get bounding box coordinates - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (x1, y1, x2, y2) = box.astype("int") - - # Ensure coordinates are within frame bounds - x1 = max(0, x1) - y1 = max(0, y1) - x2 = min(w, x2) - y2 = min(h, y2) - - # Convert to (x, y, w, h) format - faces.append((x1, y1, x2 - x1, y2 - y1, confidence)) - - return faces - - def draw_faces(self, frame, faces, color=(0, 255, 0), thickness=2): - """ - Draw bounding boxes around detected faces. - - Args: - frame: Frame to draw on - faces: List of face detections from detect_faces() - color: BGR color tuple for bounding boxes - thickness: Line thickness - - Returns: - Frame with bounding boxes drawn - """ - result_frame = frame.copy() - for (x, y, w, h, confidence) in faces: - cv2.rectangle(result_frame, (x, y), (x + w, y + h), color, thickness) - # Optionally draw confidence score - label = f"{confidence:.2f}" - cv2.putText(result_frame, label, (x, y - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) - return result_frame diff --git a/models/deploy.prototxt b/models/deploy.prototxt deleted file mode 100644 index a128515..0000000 --- a/models/deploy.prototxt +++ /dev/null @@ -1,1790 +0,0 @@ -input: "data" -input_shape { - dim: 1 - dim: 3 - dim: 300 - dim: 300 -} - -layer { - name: "data_bn" - type: "BatchNorm" - bottom: "data" - top: "data_bn" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "data_scale" - type: "Scale" - bottom: "data_bn" - top: "data_bn" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "conv1_h" - type: "Convolution" - bottom: "data_bn" - top: "conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 32 - pad: 3 - kernel_size: 7 - stride: 2 - weight_filler { - type: "msra" - variance_norm: FAN_OUT - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv1_bn_h" - type: "BatchNorm" - bottom: "conv1_h" - top: "conv1_h" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "conv1_scale_h" - type: "Scale" - bottom: "conv1_h" - top: "conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "conv1_relu" - type: "ReLU" - bottom: "conv1_h" - top: "conv1_h" -} -layer { - name: "conv1_pool" - type: "Pooling" - bottom: "conv1_h" - top: "conv1_pool" - pooling_param { - kernel_size: 3 - stride: 2 - } -} -layer { - name: "layer_64_1_conv1_h" - type: "Convolution" - bottom: "conv1_pool" - top: "layer_64_1_conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 32 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_64_1_bn2_h" - type: "BatchNorm" - bottom: "layer_64_1_conv1_h" - top: "layer_64_1_conv1_h" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_64_1_scale2_h" - type: "Scale" - bottom: "layer_64_1_conv1_h" - top: "layer_64_1_conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_64_1_relu2" - type: "ReLU" - bottom: "layer_64_1_conv1_h" - top: "layer_64_1_conv1_h" -} -layer { - name: "layer_64_1_conv2_h" - type: "Convolution" - bottom: "layer_64_1_conv1_h" - top: "layer_64_1_conv2_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 32 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_64_1_sum" - type: "Eltwise" - bottom: "layer_64_1_conv2_h" - bottom: "conv1_pool" - top: "layer_64_1_sum" -} -layer { - name: "layer_128_1_bn1_h" - type: "BatchNorm" - bottom: "layer_64_1_sum" - top: "layer_128_1_bn1_h" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_128_1_scale1_h" - type: "Scale" - bottom: "layer_128_1_bn1_h" - top: "layer_128_1_bn1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_128_1_relu1" - type: "ReLU" - bottom: "layer_128_1_bn1_h" - top: "layer_128_1_bn1_h" -} -layer { - name: "layer_128_1_conv1_h" - type: "Convolution" - bottom: "layer_128_1_bn1_h" - top: "layer_128_1_conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 128 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_128_1_bn2" - type: "BatchNorm" - bottom: "layer_128_1_conv1_h" - top: "layer_128_1_conv1_h" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_128_1_scale2" - type: "Scale" - bottom: "layer_128_1_conv1_h" - top: "layer_128_1_conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_128_1_relu2" - type: "ReLU" - bottom: "layer_128_1_conv1_h" - top: "layer_128_1_conv1_h" -} -layer { - name: "layer_128_1_conv2" - type: "Convolution" - bottom: "layer_128_1_conv1_h" - top: "layer_128_1_conv2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 128 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_128_1_conv_expand_h" - type: "Convolution" - bottom: "layer_128_1_bn1_h" - top: "layer_128_1_conv_expand_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 128 - bias_term: false - pad: 0 - kernel_size: 1 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_128_1_sum" - type: "Eltwise" - bottom: "layer_128_1_conv2" - bottom: "layer_128_1_conv_expand_h" - top: "layer_128_1_sum" -} -layer { - name: "layer_256_1_bn1" - type: "BatchNorm" - bottom: "layer_128_1_sum" - top: "layer_256_1_bn1" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_256_1_scale1" - type: "Scale" - bottom: "layer_256_1_bn1" - top: "layer_256_1_bn1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_256_1_relu1" - type: "ReLU" - bottom: "layer_256_1_bn1" - top: "layer_256_1_bn1" -} -layer { - name: "layer_256_1_conv1" - type: "Convolution" - bottom: "layer_256_1_bn1" - top: "layer_256_1_conv1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 256 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_256_1_bn2" - type: "BatchNorm" - bottom: "layer_256_1_conv1" - top: "layer_256_1_conv1" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_256_1_scale2" - type: "Scale" - bottom: "layer_256_1_conv1" - top: "layer_256_1_conv1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_256_1_relu2" - type: "ReLU" - bottom: "layer_256_1_conv1" - top: "layer_256_1_conv1" -} -layer { - name: "layer_256_1_conv2" - type: "Convolution" - bottom: "layer_256_1_conv1" - top: "layer_256_1_conv2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 256 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_256_1_conv_expand" - type: "Convolution" - bottom: "layer_256_1_bn1" - top: "layer_256_1_conv_expand" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 256 - bias_term: false - pad: 0 - kernel_size: 1 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_256_1_sum" - type: "Eltwise" - bottom: "layer_256_1_conv2" - bottom: "layer_256_1_conv_expand" - top: "layer_256_1_sum" -} -layer { - name: "layer_512_1_bn1" - type: "BatchNorm" - bottom: "layer_256_1_sum" - top: "layer_512_1_bn1" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_512_1_scale1" - type: "Scale" - bottom: "layer_512_1_bn1" - top: "layer_512_1_bn1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_512_1_relu1" - type: "ReLU" - bottom: "layer_512_1_bn1" - top: "layer_512_1_bn1" -} -layer { - name: "layer_512_1_conv1_h" - type: "Convolution" - bottom: "layer_512_1_bn1" - top: "layer_512_1_conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 128 - bias_term: false - pad: 1 - kernel_size: 3 - stride: 1 # 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_512_1_bn2_h" - type: "BatchNorm" - bottom: "layer_512_1_conv1_h" - top: "layer_512_1_conv1_h" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "layer_512_1_scale2_h" - type: "Scale" - bottom: "layer_512_1_conv1_h" - top: "layer_512_1_conv1_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "layer_512_1_relu2" - type: "ReLU" - bottom: "layer_512_1_conv1_h" - top: "layer_512_1_conv1_h" -} -layer { - name: "layer_512_1_conv2_h" - type: "Convolution" - bottom: "layer_512_1_conv1_h" - top: "layer_512_1_conv2_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 256 - bias_term: false - pad: 2 # 1 - kernel_size: 3 - stride: 1 - dilation: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_512_1_conv_expand_h" - type: "Convolution" - bottom: "layer_512_1_bn1" - top: "layer_512_1_conv_expand_h" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - convolution_param { - num_output: 256 - bias_term: false - pad: 0 - kernel_size: 1 - stride: 1 # 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "layer_512_1_sum" - type: "Eltwise" - bottom: "layer_512_1_conv2_h" - bottom: "layer_512_1_conv_expand_h" - top: "layer_512_1_sum" -} -layer { - name: "last_bn_h" - type: "BatchNorm" - bottom: "layer_512_1_sum" - top: "layer_512_1_sum" - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } - param { - lr_mult: 0.0 - } -} -layer { - name: "last_scale_h" - type: "Scale" - bottom: "layer_512_1_sum" - top: "layer_512_1_sum" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 1.0 - } - scale_param { - bias_term: true - } -} -layer { - name: "last_relu" - type: "ReLU" - bottom: "layer_512_1_sum" - top: "fc7" -} - -layer { - name: "conv6_1_h" - type: "Convolution" - bottom: "fc7" - top: "conv6_1_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 128 - pad: 0 - kernel_size: 1 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv6_1_relu" - type: "ReLU" - bottom: "conv6_1_h" - top: "conv6_1_h" -} -layer { - name: "conv6_2_h" - type: "Convolution" - bottom: "conv6_1_h" - top: "conv6_2_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv6_2_relu" - type: "ReLU" - bottom: "conv6_2_h" - top: "conv6_2_h" -} -layer { - name: "conv7_1_h" - type: "Convolution" - bottom: "conv6_2_h" - top: "conv7_1_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 64 - pad: 0 - kernel_size: 1 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv7_1_relu" - type: "ReLU" - bottom: "conv7_1_h" - top: "conv7_1_h" -} -layer { - name: "conv7_2_h" - type: "Convolution" - bottom: "conv7_1_h" - top: "conv7_2_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 128 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv7_2_relu" - type: "ReLU" - bottom: "conv7_2_h" - top: "conv7_2_h" -} -layer { - name: "conv8_1_h" - type: "Convolution" - bottom: "conv7_2_h" - top: "conv8_1_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 64 - pad: 0 - kernel_size: 1 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv8_1_relu" - type: "ReLU" - bottom: "conv8_1_h" - top: "conv8_1_h" -} -layer { - name: "conv8_2_h" - type: "Convolution" - bottom: "conv8_1_h" - top: "conv8_2_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 128 - pad: 0 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv8_2_relu" - type: "ReLU" - bottom: "conv8_2_h" - top: "conv8_2_h" -} -layer { - name: "conv9_1_h" - type: "Convolution" - bottom: "conv8_2_h" - top: "conv9_1_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 64 - pad: 0 - kernel_size: 1 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv9_1_relu" - type: "ReLU" - bottom: "conv9_1_h" - top: "conv9_1_h" -} -layer { - name: "conv9_2_h" - type: "Convolution" - bottom: "conv9_1_h" - top: "conv9_2_h" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 128 - pad: 0 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv9_2_relu" - type: "ReLU" - bottom: "conv9_2_h" - top: "conv9_2_h" -} -layer { - name: "conv4_3_norm" - type: "Normalize" - bottom: "layer_256_1_bn1" - top: "conv4_3_norm" - norm_param { - across_spatial: false - scale_filler { - type: "constant" - value: 20 - } - channel_shared: false - } -} -layer { - name: "conv4_3_norm_mbox_loc" - type: "Convolution" - bottom: "conv4_3_norm" - top: "conv4_3_norm_mbox_loc" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 16 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv4_3_norm_mbox_loc_perm" - type: "Permute" - bottom: "conv4_3_norm_mbox_loc" - top: "conv4_3_norm_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv4_3_norm_mbox_loc_flat" - type: "Flatten" - bottom: "conv4_3_norm_mbox_loc_perm" - top: "conv4_3_norm_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv4_3_norm_mbox_conf" - type: "Convolution" - bottom: "conv4_3_norm" - top: "conv4_3_norm_mbox_conf" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 8 # 84 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv4_3_norm_mbox_conf_perm" - type: "Permute" - bottom: "conv4_3_norm_mbox_conf" - top: "conv4_3_norm_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv4_3_norm_mbox_conf_flat" - type: "Flatten" - bottom: "conv4_3_norm_mbox_conf_perm" - top: "conv4_3_norm_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv4_3_norm_mbox_priorbox" - type: "PriorBox" - bottom: "conv4_3_norm" - bottom: "data" - top: "conv4_3_norm_mbox_priorbox" - prior_box_param { - min_size: 30.0 - max_size: 60.0 - aspect_ratio: 2 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - step: 8 - offset: 0.5 - } -} -layer { - name: "fc7_mbox_loc" - type: "Convolution" - bottom: "fc7" - top: "fc7_mbox_loc" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 24 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "fc7_mbox_loc_perm" - type: "Permute" - bottom: "fc7_mbox_loc" - top: "fc7_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "fc7_mbox_loc_flat" - type: "Flatten" - bottom: "fc7_mbox_loc_perm" - top: "fc7_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "fc7_mbox_conf" - type: "Convolution" - bottom: "fc7" - top: "fc7_mbox_conf" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 12 # 126 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "fc7_mbox_conf_perm" - type: "Permute" - bottom: "fc7_mbox_conf" - top: "fc7_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "fc7_mbox_conf_flat" - type: "Flatten" - bottom: "fc7_mbox_conf_perm" - top: "fc7_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "fc7_mbox_priorbox" - type: "PriorBox" - bottom: "fc7" - bottom: "data" - top: "fc7_mbox_priorbox" - prior_box_param { - min_size: 60.0 - max_size: 111.0 - aspect_ratio: 2 - aspect_ratio: 3 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - step: 16 - offset: 0.5 - } -} -layer { - name: "conv6_2_mbox_loc" - type: "Convolution" - bottom: "conv6_2_h" - top: "conv6_2_mbox_loc" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 24 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv6_2_mbox_loc_perm" - type: "Permute" - bottom: "conv6_2_mbox_loc" - top: "conv6_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv6_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv6_2_mbox_loc_perm" - top: "conv6_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv6_2_mbox_conf" - type: "Convolution" - bottom: "conv6_2_h" - top: "conv6_2_mbox_conf" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 12 # 126 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv6_2_mbox_conf_perm" - type: "Permute" - bottom: "conv6_2_mbox_conf" - top: "conv6_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv6_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv6_2_mbox_conf_perm" - top: "conv6_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv6_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv6_2_h" - bottom: "data" - top: "conv6_2_mbox_priorbox" - prior_box_param { - min_size: 111.0 - max_size: 162.0 - aspect_ratio: 2 - aspect_ratio: 3 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - step: 32 - offset: 0.5 - } -} -layer { - name: "conv7_2_mbox_loc" - type: "Convolution" - bottom: "conv7_2_h" - top: "conv7_2_mbox_loc" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 24 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv7_2_mbox_loc_perm" - type: "Permute" - bottom: "conv7_2_mbox_loc" - top: "conv7_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv7_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv7_2_mbox_loc_perm" - top: "conv7_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv7_2_mbox_conf" - type: "Convolution" - bottom: "conv7_2_h" - top: "conv7_2_mbox_conf" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 12 # 126 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv7_2_mbox_conf_perm" - type: "Permute" - bottom: "conv7_2_mbox_conf" - top: "conv7_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv7_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv7_2_mbox_conf_perm" - top: "conv7_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv7_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv7_2_h" - bottom: "data" - top: "conv7_2_mbox_priorbox" - prior_box_param { - min_size: 162.0 - max_size: 213.0 - aspect_ratio: 2 - aspect_ratio: 3 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - step: 64 - offset: 0.5 - } -} -layer { - name: "conv8_2_mbox_loc" - type: "Convolution" - bottom: "conv8_2_h" - top: "conv8_2_mbox_loc" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 16 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv8_2_mbox_loc_perm" - type: "Permute" - bottom: "conv8_2_mbox_loc" - top: "conv8_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv8_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv8_2_mbox_loc_perm" - top: "conv8_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv8_2_mbox_conf" - type: "Convolution" - bottom: "conv8_2_h" - top: "conv8_2_mbox_conf" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 8 # 84 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv8_2_mbox_conf_perm" - type: "Permute" - bottom: "conv8_2_mbox_conf" - top: "conv8_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv8_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv8_2_mbox_conf_perm" - top: "conv8_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv8_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv8_2_h" - bottom: "data" - top: "conv8_2_mbox_priorbox" - prior_box_param { - min_size: 213.0 - max_size: 264.0 - aspect_ratio: 2 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - step: 100 - offset: 0.5 - } -} -layer { - name: "conv9_2_mbox_loc" - type: "Convolution" - bottom: "conv9_2_h" - top: "conv9_2_mbox_loc" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 16 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv9_2_mbox_loc_perm" - type: "Permute" - bottom: "conv9_2_mbox_loc" - top: "conv9_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv9_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv9_2_mbox_loc_perm" - top: "conv9_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv9_2_mbox_conf" - type: "Convolution" - bottom: "conv9_2_h" - top: "conv9_2_mbox_conf" - param { - lr_mult: 1 - decay_mult: 1 - } - param { - lr_mult: 2 - decay_mult: 0 - } - convolution_param { - num_output: 8 # 84 - pad: 1 - kernel_size: 3 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layer { - name: "conv9_2_mbox_conf_perm" - type: "Permute" - bottom: "conv9_2_mbox_conf" - top: "conv9_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv9_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv9_2_mbox_conf_perm" - top: "conv9_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv9_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv9_2_h" - bottom: "data" - top: "conv9_2_mbox_priorbox" - prior_box_param { - min_size: 264.0 - max_size: 315.0 - aspect_ratio: 2 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - step: 300 - offset: 0.5 - } -} -layer { - name: "mbox_loc" - type: "Concat" - bottom: "conv4_3_norm_mbox_loc_flat" - bottom: "fc7_mbox_loc_flat" - bottom: "conv6_2_mbox_loc_flat" - bottom: "conv7_2_mbox_loc_flat" - bottom: "conv8_2_mbox_loc_flat" - bottom: "conv9_2_mbox_loc_flat" - top: "mbox_loc" - concat_param { - axis: 1 - } -} -layer { - name: "mbox_conf" - type: "Concat" - bottom: "conv4_3_norm_mbox_conf_flat" - bottom: "fc7_mbox_conf_flat" - bottom: "conv6_2_mbox_conf_flat" - bottom: "conv7_2_mbox_conf_flat" - bottom: "conv8_2_mbox_conf_flat" - bottom: "conv9_2_mbox_conf_flat" - top: "mbox_conf" - concat_param { - axis: 1 - } -} -layer { - name: "mbox_priorbox" - type: "Concat" - bottom: "conv4_3_norm_mbox_priorbox" - bottom: "fc7_mbox_priorbox" - bottom: "conv6_2_mbox_priorbox" - bottom: "conv7_2_mbox_priorbox" - bottom: "conv8_2_mbox_priorbox" - bottom: "conv9_2_mbox_priorbox" - top: "mbox_priorbox" - concat_param { - axis: 2 - } -} - -layer { - name: "mbox_conf_reshape" - type: "Reshape" - bottom: "mbox_conf" - top: "mbox_conf_reshape" - reshape_param { - shape { - dim: 0 - dim: -1 - dim: 2 - } - } -} -layer { - name: "mbox_conf_softmax" - type: "Softmax" - bottom: "mbox_conf_reshape" - top: "mbox_conf_softmax" - softmax_param { - axis: 2 - } -} -layer { - name: "mbox_conf_flatten" - type: "Flatten" - bottom: "mbox_conf_softmax" - top: "mbox_conf_flatten" - flatten_param { - axis: 1 - } -} - -layer { - name: "detection_out" - type: "DetectionOutput" - bottom: "mbox_loc" - bottom: "mbox_conf_flatten" - bottom: "mbox_priorbox" - top: "detection_out" - include { - phase: TEST - } - detection_output_param { - num_classes: 2 - share_location: true - background_label_id: 0 - nms_param { - nms_threshold: 0.45 - top_k: 400 - } - code_type: CENTER_SIZE - keep_top_k: 200 - confidence_threshold: 0.01 - clip: 1 - } -} diff --git a/models/res10_300x300_ssd_iter_140000.caffemodel b/models/res10_300x300_ssd_iter_140000.caffemodel deleted file mode 100644 index 809dfd7..0000000 Binary files a/models/res10_300x300_ssd_iter_140000.caffemodel and /dev/null differ diff --git a/person_detector.py b/person_detector.py new file mode 100644 index 0000000..e4e7ecb --- /dev/null +++ b/person_detector.py @@ -0,0 +1,129 @@ +""" +Person Detection Module using OpenCV HOG (Histogram of Oriented Gradients). +Uses built-in OpenCV people detector - no external model files required. +""" + +import cv2 +import numpy as np + + +class PersonDetector: + def __init__(self, model_dir=None, confidence_threshold=0.6): + """ + Initialize the person detector with HOG descriptor. + + Args: + model_dir: Ignored for HOG (kept for API compatibility) + confidence_threshold: Threshold for detection weights + """ + self.confidence_threshold = confidence_threshold + + # Initialize HOG descriptor/person detector + self.hog = cv2.HOGDescriptor() + self.hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) + print("Initialized HOG Person Detector") + + def detect_people(self, frame): + """ + Detect people in a frame using HOG. + + Args: + frame: BGR image frame from OpenCV + + Returns: + List of tuples (x, y, w, h, confidence) for each detected person + """ + # Resize for faster processing (optional, but HOG is computationally expensive) + # Using a slightly smaller scale can speed things up significantly + scale = 1.0 + if frame.shape[1] > 640: + scale = 640 / frame.shape[1] + frame_small = cv2.resize(frame, None, fx=scale, fy=scale) + else: + frame_small = frame + + # Detect people + # winStride: step size in x and y + # padding: padding around the input + # scale: coefficient of the detection window increase + (rects, weights) = self.hog.detectMultiScale( + frame_small, + winStride=(4, 4), + padding=(8, 8), + scale=1.05, + hitThreshold=0.0 # Default + ) + + people = [] + + # Convert detected rectangles to our format + for i, (x, y, w, h) in enumerate(rects): + confidence = weights[i] + + # HOG returns confidence scores, usually > 0. + # We can filter if needed. + check_conf = float(confidence) if isinstance(confidence, (float, np.float32, np.float64)) else float(confidence[0]) + + if check_conf > self.confidence_threshold: + # Scale back up if we resized + if scale != 1.0: + x = int(x / scale) + y = int(y / scale) + w = int(w / scale) + h = int(h / scale) + + # Size filtering + # Ignore detections that are too small (noise) or too large (walls/windows) + # Assumes 640x480 or similar resolution + if w < 40 or w > 400 or h < 80 or h > 480: + continue + + # Ensure coordinates are within frame bounds (simple clamp) + x = max(0, x) + y = max(0, y) + + people.append((x, y, w, h, check_conf)) + + return people + + detect_faces = detect_people # Alias for compatibility + + def draw_people(self, frame, people, color=(0, 255, 0), thickness=2): + """ + Draw bounding boxes around detected people. + """ + result_frame = frame.copy() + for (x, y, w, h, confidence) in people: + cv2.rectangle(result_frame, (x, y), (x + w, y + h), color, thickness) + + # Draw label + label = f"Person: {confidence:.2f}" + + # Get label size + (label_w, label_h), baseline = cv2.getTextSize( + label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1 + ) + + # Draw background rectangle for label + cv2.rectangle( + result_frame, + (x, y - label_h - 10), + (x + label_w, y), + color, + -1 + ) + + # Draw text + cv2.putText( + result_frame, + label, + (x, y - 5), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (0, 0, 0), + 1 + ) + + return result_frame + + draw_faces = draw_people # Alias for compatibility diff --git a/zone_tracker.py b/zone_tracker.py index 82fa62c..89ca731 100644 --- a/zone_tracker.py +++ b/zone_tracker.py @@ -10,7 +10,7 @@ from collections import defaultdict class ZoneTracker: def __init__(self, frame_width, entry_zone_percent=0.4, exit_zone_percent=0.4, - cooldown_seconds=2.0, center_buffer_percent=0.1): + cooldown_seconds=4.0, center_buffer_percent=0.1): """ Initialize the zone tracker. @@ -97,8 +97,8 @@ class ZoneTracker: Returns: face_id if matched, None if new face """ - max_distance = 100 # Maximum pixel distance to consider it the same face - max_size_diff = 50 # Maximum size difference to consider it the same face + max_distance = 150 # Maximum pixel distance to consider it the same face + max_size_diff = 100 # Maximum size difference to consider it the same face for face_id, face_data in self.tracked_faces.items(): # Skip if face hasn't been seen recently (within last 2 seconds)