commit 432f0378bf571988f686c515953e6be903e8955c Author: eroncero Date: Tue Jan 20 00:44:06 2026 +0100 feat: initial implementation of People Counter web app - Add Flask application with MJPEG video streaming - Implement OpenCV DNN face detection module - Add zone-based entry/exit tracking with cooldown mechanism - Create web interface with real-time WebSocket updates - Add model download script and comprehensive README - Include OpenCV DNN model files for face detection diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..486f2f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +*.egg-info/ +dist/ +build/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.cursor/ + +# Logs +*.log +.cursor/debug.log + +# OS +.DS_Store +Thumbs.db + +# Project specific - model files are included but could be excluded if needed +# Uncomment the line below to exclude model files from git (saves ~10MB) +# models/*.caffemodel + +# Environment variables +.env +.env.local diff --git a/README.md b/README.md new file mode 100644 index 0000000..52c0597 --- /dev/null +++ b/README.md @@ -0,0 +1,123 @@ +# People Counter Web App + +A real-time web application that uses USB camera face detection to count people entering and leaving a room, with zone-based entry/exit tracking. + +## Features + +- Real-time face detection using OpenCV DNN face detector +- Zone-based entry/exit tracking (left zone = entry, right zone = exit) +- Live video streaming via MJPEG +- Real-time count updates via WebSocket +- Visual indicators for overpopulation warnings +- Clean, modern web interface + +## Prerequisites + +- Python 3.7 or higher +- USB camera connected to your computer +- Linux/Windows/macOS + +## Installation + +1. **Clone or download this repository** + +2. **Install Python dependencies:** + ```bash + pip install -r requirements.txt + ``` + +3. **Download the face detection model files:** + ```bash + python download_models.py + ``` + + This will download the required OpenCV DNN model files to the `models/` directory. + + **Note:** If the automatic download fails, you can manually download: + - `deploy.prototxt` from: https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/deploy.prototxt + - `res10_300x300_ssd_iter_140000.caffemodel` from: https://github.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel + + Place both files in the `models/` directory. + +## Usage + +1. **Make sure your USB camera is connected** + +2. **Run the Flask application:** + ```bash + python app.py + ``` + +3. **Open your web browser and navigate to:** + ``` + http://localhost:5000 + ``` + +4. **The application will:** + - Display live video feed from your camera + - Detect faces in real-time + - Count people entering (left zone) and exiting (right zone) + - Display current occupancy and statistics + - Show visual warnings when occupancy is high + +## Configuration + +You can modify the following settings in `app.py` and `camera.py`: + +- **Camera index:** Change `camera_index=0` to use a different camera +- **Frame processing rate:** Adjust `process_every_n_frames` (default: 3) to balance performance and accuracy +- **Face detection confidence:** Modify `face_confidence` threshold (default: 0.5) +- **Zone boundaries:** Adjust `entry_zone_percent` and `exit_zone_percent` in `zone_tracker.py` +- **Cooldown period:** Change `cooldown_seconds` to prevent double-counting (default: 2.0 seconds) +- **Maximum occupancy:** Update `MAX_OCCUPANCY` in `static/js/main.js` for overpopulation warnings + +## How It Works + +1. **Camera Capture:** The camera module captures frames from your USB camera +2. **Face Detection:** OpenCV DNN detects faces in the video frames +3. **Zone Tracking:** The zone tracker determines which zone each face is in: + - **Left 40%** = Entry zone (green) + - **Right 40%** = Exit zone (red) + - **Center 10%** = Buffer zone (ignored to prevent false counts) +4. **Counting Logic:** People are counted when they appear in entry/exit zones with a cooldown period to prevent double-counting +5. **Real-time Updates:** Counts are sent to the web interface via WebSocket for live updates + +## Project Structure + +``` +PeopleCounter/ +├── app.py # Flask main application +├── camera.py # Camera capture wrapper +├── face_detector.py # Face detection module +├── zone_tracker.py # Zone-based tracking logic +├── download_models.py # Script to download model files +├── requirements.txt # Python dependencies +├── models/ # OpenCV DNN model files +│ ├── deploy.prototxt +│ └── res10_300x300_ssd_iter_140000.caffemodel +├── templates/ +│ └── index.html # Main web interface +└── static/ + ├── css/ + │ └── style.css # Styling + └── js/ + └── main.js # Client-side JavaScript +``` + +## Troubleshooting + +- **Camera not found:** Make sure your camera is connected and try changing the `camera_index` in `camera.py` +- **Model files missing:** Run `python download_models.py` to download required files +- **Slow performance:** Increase `process_every_n_frames` value or reduce video resolution +- **Double counting:** Increase the `cooldown_seconds` value in `zone_tracker.py` + +## Security & Privacy + +- All processing runs locally (no cloud storage) +- No face recognition - only detection (no personal identification) +- Video frames processed in memory, not stored +- Optional: You can modify the code to blur faces for display if privacy is a concern + +## License + +This project is open source and available for use and modification. diff --git a/app.py b/app.py new file mode 100644 index 0000000..a1f3c25 --- /dev/null +++ b/app.py @@ -0,0 +1,118 @@ +""" +Flask Application for People Counter Web App +Provides video streaming and real-time count updates via WebSocket. +""" + +from flask import Flask, render_template, Response +from flask_socketio import SocketIO, emit +import time +import threading +from camera import Camera + +app = Flask(__name__) +app.config['SECRET_KEY'] = 'people-counter-secret-key' +socketio = SocketIO(app, cors_allowed_origins="*") + +# Global camera instance +camera = None +count_update_interval = 0.5 # Update counts every 0.5 seconds + + +def initialize_camera(): + """Initialize the camera.""" + global camera + try: + camera = Camera(camera_index=0, process_every_n_frames=3) + camera.start() + print("Camera initialized successfully") + return True + except Exception as e: + print(f"Failed to initialize camera: {e}") + return False + + +def count_update_thread(): + """Background thread to periodically send count updates via WebSocket.""" + while True: + time.sleep(count_update_interval) + if camera: + counts = camera.get_counts() + socketio.emit('count_update', counts) + else: + # Send zero counts if camera not available + socketio.emit('count_update', { + 'total_entered': 0, + 'total_exited': 0, + 'current_occupancy': 0 + }) + + +@app.route('/') +def index(): + """Serve the main page.""" + return render_template('index.html') + + +def generate_frames(): + """Generator function for MJPEG video streaming.""" + while True: + if camera: + frame = camera.get_frame() + if frame: + yield (b'--frame\r\n' + b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') + else: + time.sleep(0.1) + else: + time.sleep(0.1) + + +@app.route('/video_feed') +def video_feed(): + """Video streaming route.""" + return Response(generate_frames(), + mimetype='multipart/x-mixed-replace; boundary=frame') + + +@socketio.on('connect') +def handle_connect(): + """Handle WebSocket connection.""" + print('Client connected') + if camera: + counts = camera.get_counts() + emit('count_update', counts) + + +@socketio.on('disconnect') +def handle_disconnect(): + """Handle WebSocket disconnection.""" + print('Client disconnected') + + +@socketio.on('reset_counts') +def handle_reset_counts(): + """Handle reset counts request.""" + if camera: + camera.reset_counts() + counts = camera.get_counts() + emit('count_update', counts) + emit('reset_confirmation', {'status': 'success'}) + + +if __name__ == '__main__': + # Initialize camera + if initialize_camera(): + # Start background thread for count updates + update_thread = threading.Thread(target=count_update_thread, daemon=True) + update_thread.start() + + # Run Flask app + try: + socketio.run(app, host='0.0.0.0', port=5000, debug=False, allow_unsafe_werkzeug=True) + except KeyboardInterrupt: + print("\nShutting down...") + finally: + if camera: + camera.stop() + else: + print("Failed to initialize camera. Exiting.") diff --git a/camera.py b/camera.py new file mode 100644 index 0000000..7e04b7c --- /dev/null +++ b/camera.py @@ -0,0 +1,191 @@ +""" +Camera Module for USB camera capture and frame processing +Integrates face detection and zone tracking. +""" + +import cv2 +import threading +import time +from face_detector import FaceDetector +from zone_tracker import ZoneTracker + + +class Camera: + def __init__(self, camera_index=0, process_every_n_frames=3, + face_confidence=0.5, frame_width=640, frame_height=480): + """ + Initialize camera and processing components. + + Args: + camera_index: Index of the USB camera (usually 0) + process_every_n_frames: Process face detection every N frames for performance + face_confidence: Confidence threshold for face detection + frame_width: Desired frame width + frame_height: Desired frame height + """ + self.camera_index = camera_index + self.process_every_n_frames = process_every_n_frames + self.frame_width = frame_width + self.frame_height = frame_height + + # Initialize camera + self.cap = cv2.VideoCapture(camera_index) + if not self.cap.isOpened(): + raise RuntimeError(f"Failed to open camera {camera_index}") + + # Set camera properties + self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width) + self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height) + + # Initialize face detector and zone tracker + self.face_detector = FaceDetector(confidence_threshold=face_confidence) + self.zone_tracker = None # Will be initialized after first frame + + # Frame processing state + self.frame_counter = 0 + self.current_frame = None + self.processed_frame = None + self.current_counts = { + 'total_entered': 0, + 'total_exited': 0, + 'current_occupancy': 0 + } + + # Thread safety + self.lock = threading.Lock() + self.running = False + self.processing_thread = None + + # Initialize zone tracker after getting first frame dimensions + ret, frame = self.cap.read() + if ret: + h, w = frame.shape[:2] + self.zone_tracker = ZoneTracker(w) + self.frame_width = w + self.frame_height = h + + def start(self): + """Start the camera and processing thread.""" + if self.running: + return + + self.running = True + self.processing_thread = threading.Thread(target=self._process_loop, daemon=True) + self.processing_thread.start() + + def stop(self): + """Stop the camera and processing thread.""" + self.running = False + if self.processing_thread: + self.processing_thread.join(timeout=2.0) + if self.cap: + self.cap.release() + + def _process_loop(self): + """Main processing loop running in background thread.""" + while self.running: + ret, frame = self.cap.read() + if not ret: + time.sleep(0.1) + continue + + self.frame_counter += 1 + + # Store current frame + with self.lock: + self.current_frame = frame.copy() + + # Process face detection every N frames + if self.frame_counter % self.process_every_n_frames == 0: + processed_frame, counts = self._process_frame(frame) + with self.lock: + self.processed_frame = processed_frame + self.current_counts = counts + + def _process_frame(self, frame): + """ + Process a single frame: detect faces, track zones, update counts. + + Args: + frame: Input frame from camera + + Returns: + Tuple of (processed_frame, counts_dict) + """ + # Detect faces + faces = self.face_detector.detect_faces(frame) + + # Track zones and update counts + if self.zone_tracker: + counts = self.zone_tracker.process_faces(faces) + else: + counts = { + 'total_entered': 0, + 'total_exited': 0, + 'current_occupancy': 0 + } + + # Draw zones on frame + if self.zone_tracker: + processed_frame = self.zone_tracker.draw_zones(frame) + else: + processed_frame = frame.copy() + + # Draw faces on frame + processed_frame = self.face_detector.draw_faces(processed_frame, faces) + + # Draw count information on frame + text_y = 60 + cv2.putText(processed_frame, f"Entered: {counts['total_entered']}", + (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) + cv2.putText(processed_frame, f"Exited: {counts['total_exited']}", + (10, text_y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) + cv2.putText(processed_frame, f"Occupancy: {counts['current_occupancy']}", + (10, text_y + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2) + + return processed_frame, counts + + def get_frame(self): + """ + Get the most recent processed frame. + + Returns: + JPEG encoded frame bytes, or None if no frame available + """ + with self.lock: + if self.processed_frame is not None: + ret, buffer = cv2.imencode('.jpg', self.processed_frame, + [cv2.IMWRITE_JPEG_QUALITY, 85]) + if ret: + return buffer.tobytes() + return None + + def get_counts(self): + """ + Get current count statistics. + + Returns: + Dictionary with total_entered, total_exited, current_occupancy + """ + with self.lock: + return self.current_counts.copy() + + def reset_counts(self): + """Reset all counters.""" + with self.lock: + if self.zone_tracker: + self.zone_tracker.reset_counts() + self.current_counts = { + 'total_entered': 0, + 'total_exited': 0, + 'current_occupancy': 0 + } + + def __enter__(self): + """Context manager entry.""" + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.stop() diff --git a/download_models.py b/download_models.py new file mode 100644 index 0000000..6769fba --- /dev/null +++ b/download_models.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Script to download OpenCV DNN face detection model files. +Downloads the required prototxt and caffemodel files for face detection. +""" + +import os +import urllib.request +from pathlib import Path + + +def download_file(url, destination): + """Download a file from URL to destination.""" + print(f"Downloading {os.path.basename(destination)}...") + try: + urllib.request.urlretrieve(url, destination) + print(f"✓ Successfully downloaded {os.path.basename(destination)}") + return True + except Exception as e: + print(f"✗ Error downloading {os.path.basename(destination)}: {e}") + return False + + +def main(): + """Main function to download model files.""" + # Create models directory if it doesn't exist + models_dir = Path("models") + models_dir.mkdir(exist_ok=True) + + # Model file URLs + prototxt_url = "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt" + model_url = "https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel" + + # Destination paths + prototxt_path = models_dir / "deploy.prototxt" + model_path = models_dir / "res10_300x300_ssd_iter_140000.caffemodel" + + print("=" * 60) + print("OpenCV DNN Face Detection Model Downloader") + print("=" * 60) + print() + + # Check if files already exist + if prototxt_path.exists(): + print(f"⚠ {prototxt_path.name} already exists. Skipping download.") + else: + success = download_file(prototxt_url, prototxt_path) + if not success: + print("\nAlternative: You can manually download deploy.prototxt from:") + print("https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/deploy.prototxt") + print() + + if model_path.exists(): + print(f"⚠ {model_path.name} already exists. Skipping download.") + else: + success = download_file(model_url, model_path) + if not success: + print("\n⚠ Warning: The caffemodel file is large (~10MB) and may require manual download.") + print("Alternative download methods:") + print("1. Using wget:") + print(f" wget -O {model_path} {model_url}") + print("2. Using curl:") + print(f" curl -L -o {model_path} {model_url}") + print("3. Direct browser download:") + print(f" {model_url}") + print() + + print() + print("=" * 60) + if prototxt_path.exists() and model_path.exists(): + print("✓ All model files are ready!") + else: + print("⚠ Some files may be missing. Please check the files above.") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/face_detector.py b/face_detector.py new file mode 100644 index 0000000..24a11eb --- /dev/null +++ b/face_detector.py @@ -0,0 +1,118 @@ +""" +Face Detection Module using OpenCV DNN Face Detector +Uses pre-trained models for accurate face detection. +""" + +import cv2 +import numpy as np +import os + + +class FaceDetector: + def __init__(self, model_dir="models", confidence_threshold=0.5): + """ + Initialize the face detector with OpenCV DNN models. + + Args: + model_dir: Directory containing the model files + confidence_threshold: Minimum confidence for face detection (0.0-1.0) + """ + self.confidence_threshold = confidence_threshold + self.model_dir = model_dir + + # Paths to model files + self.prototxt_path = os.path.join(model_dir, "deploy.prototxt") + self.model_path = os.path.join(model_dir, "res10_300x300_ssd_iter_140000.caffemodel") + + # Load the DNN face detector + self.net = None + self._load_model() + + def _load_model(self): + """Load the OpenCV DNN face detection model.""" + if not os.path.exists(self.prototxt_path): + raise FileNotFoundError( + f"Model prototxt file not found: {self.prototxt_path}\n" + "Please download the model files first." + ) + if not os.path.exists(self.model_path): + raise FileNotFoundError( + f"Model weights file not found: {self.model_path}\n" + "Please download the model files first." + ) + + self.net = cv2.dnn.readNetFromCaffe(self.prototxt_path, self.model_path) + + def detect_faces(self, frame): + """ + Detect faces in a frame. + + Args: + frame: BGR image frame from OpenCV + + Returns: + List of tuples (x, y, w, h, confidence) for each detected face + where (x, y) is top-left corner, w and h are width and height + """ + if self.net is None: + return [] + + # Get frame dimensions + (h, w) = frame.shape[:2] + + # Create blob from frame (preprocessing for DNN) + blob = cv2.dnn.blobFromImage( + cv2.resize(frame, (300, 300)), + 1.0, + (300, 300), + (104.0, 177.0, 123.0) + ) + + # Pass blob through network + self.net.setInput(blob) + detections = self.net.forward() + + faces = [] + + # Process detections + for i in range(0, detections.shape[2]): + confidence = detections[0, 0, i, 2] + + # Filter weak detections + if confidence > self.confidence_threshold: + # Get bounding box coordinates + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (x1, y1, x2, y2) = box.astype("int") + + # Ensure coordinates are within frame bounds + x1 = max(0, x1) + y1 = max(0, y1) + x2 = min(w, x2) + y2 = min(h, y2) + + # Convert to (x, y, w, h) format + faces.append((x1, y1, x2 - x1, y2 - y1, confidence)) + + return faces + + def draw_faces(self, frame, faces, color=(0, 255, 0), thickness=2): + """ + Draw bounding boxes around detected faces. + + Args: + frame: Frame to draw on + faces: List of face detections from detect_faces() + color: BGR color tuple for bounding boxes + thickness: Line thickness + + Returns: + Frame with bounding boxes drawn + """ + result_frame = frame.copy() + for (x, y, w, h, confidence) in faces: + cv2.rectangle(result_frame, (x, y), (x + w, y + h), color, thickness) + # Optionally draw confidence score + label = f"{confidence:.2f}" + cv2.putText(result_frame, label, (x, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) + return result_frame diff --git a/models/deploy.prototxt b/models/deploy.prototxt new file mode 100644 index 0000000..a128515 --- /dev/null +++ b/models/deploy.prototxt @@ -0,0 +1,1790 @@ +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} + +layer { + name: "data_bn" + type: "BatchNorm" + bottom: "data" + top: "data_bn" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "data_scale" + type: "Scale" + bottom: "data_bn" + top: "data_bn" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "conv1_h" + type: "Convolution" + bottom: "data_bn" + top: "conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + pad: 3 + kernel_size: 7 + stride: 2 + weight_filler { + type: "msra" + variance_norm: FAN_OUT + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv1_bn_h" + type: "BatchNorm" + bottom: "conv1_h" + top: "conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "conv1_scale_h" + type: "Scale" + bottom: "conv1_h" + top: "conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "conv1_relu" + type: "ReLU" + bottom: "conv1_h" + top: "conv1_h" +} +layer { + name: "conv1_pool" + type: "Pooling" + bottom: "conv1_h" + top: "conv1_pool" + pooling_param { + kernel_size: 3 + stride: 2 + } +} +layer { + name: "layer_64_1_conv1_h" + type: "Convolution" + bottom: "conv1_pool" + top: "layer_64_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_64_1_bn2_h" + type: "BatchNorm" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_64_1_scale2_h" + type: "Scale" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_64_1_relu2" + type: "ReLU" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" +} +layer { + name: "layer_64_1_conv2_h" + type: "Convolution" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv2_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_64_1_sum" + type: "Eltwise" + bottom: "layer_64_1_conv2_h" + bottom: "conv1_pool" + top: "layer_64_1_sum" +} +layer { + name: "layer_128_1_bn1_h" + type: "BatchNorm" + bottom: "layer_64_1_sum" + top: "layer_128_1_bn1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_128_1_scale1_h" + type: "Scale" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_bn1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_128_1_relu1" + type: "ReLU" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_bn1_h" +} +layer { + name: "layer_128_1_conv1_h" + type: "Convolution" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_bn2" + type: "BatchNorm" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_128_1_scale2" + type: "Scale" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_128_1_relu2" + type: "ReLU" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" +} +layer { + name: "layer_128_1_conv2" + type: "Convolution" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_conv_expand_h" + type: "Convolution" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_conv_expand_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_sum" + type: "Eltwise" + bottom: "layer_128_1_conv2" + bottom: "layer_128_1_conv_expand_h" + top: "layer_128_1_sum" +} +layer { + name: "layer_256_1_bn1" + type: "BatchNorm" + bottom: "layer_128_1_sum" + top: "layer_256_1_bn1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_256_1_scale1" + type: "Scale" + bottom: "layer_256_1_bn1" + top: "layer_256_1_bn1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_256_1_relu1" + type: "ReLU" + bottom: "layer_256_1_bn1" + top: "layer_256_1_bn1" +} +layer { + name: "layer_256_1_conv1" + type: "Convolution" + bottom: "layer_256_1_bn1" + top: "layer_256_1_conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_bn2" + type: "BatchNorm" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_256_1_scale2" + type: "Scale" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_256_1_relu2" + type: "ReLU" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" +} +layer { + name: "layer_256_1_conv2" + type: "Convolution" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_conv_expand" + type: "Convolution" + bottom: "layer_256_1_bn1" + top: "layer_256_1_conv_expand" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_sum" + type: "Eltwise" + bottom: "layer_256_1_conv2" + bottom: "layer_256_1_conv_expand" + top: "layer_256_1_sum" +} +layer { + name: "layer_512_1_bn1" + type: "BatchNorm" + bottom: "layer_256_1_sum" + top: "layer_512_1_bn1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_512_1_scale1" + type: "Scale" + bottom: "layer_512_1_bn1" + top: "layer_512_1_bn1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_512_1_relu1" + type: "ReLU" + bottom: "layer_512_1_bn1" + top: "layer_512_1_bn1" +} +layer { + name: "layer_512_1_conv1_h" + type: "Convolution" + bottom: "layer_512_1_bn1" + top: "layer_512_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 # 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_bn2_h" + type: "BatchNorm" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_512_1_scale2_h" + type: "Scale" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_512_1_relu2" + type: "ReLU" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" +} +layer { + name: "layer_512_1_conv2_h" + type: "Convolution" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv2_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 2 # 1 + kernel_size: 3 + stride: 1 + dilation: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_conv_expand_h" + type: "Convolution" + bottom: "layer_512_1_bn1" + top: "layer_512_1_conv_expand_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 1 # 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_sum" + type: "Eltwise" + bottom: "layer_512_1_conv2_h" + bottom: "layer_512_1_conv_expand_h" + top: "layer_512_1_sum" +} +layer { + name: "last_bn_h" + type: "BatchNorm" + bottom: "layer_512_1_sum" + top: "layer_512_1_sum" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "last_scale_h" + type: "Scale" + bottom: "layer_512_1_sum" + top: "layer_512_1_sum" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "last_relu" + type: "ReLU" + bottom: "layer_512_1_sum" + top: "fc7" +} + +layer { + name: "conv6_1_h" + type: "Convolution" + bottom: "fc7" + top: "conv6_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1_h" + top: "conv6_1_h" +} +layer { + name: "conv6_2_h" + type: "Convolution" + bottom: "conv6_1_h" + top: "conv6_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2_h" + top: "conv6_2_h" +} +layer { + name: "conv7_1_h" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv7_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1_h" + top: "conv7_1_h" +} +layer { + name: "conv7_2_h" + type: "Convolution" + bottom: "conv7_1_h" + top: "conv7_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2_h" + top: "conv7_2_h" +} +layer { + name: "conv8_1_h" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv8_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1_h" + top: "conv8_1_h" +} +layer { + name: "conv8_2_h" + type: "Convolution" + bottom: "conv8_1_h" + top: "conv8_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2_h" + top: "conv8_2_h" +} +layer { + name: "conv9_1_h" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv9_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1_h" + top: "conv9_1_h" +} +layer { + name: "conv9_2_h" + type: "Convolution" + bottom: "conv9_1_h" + top: "conv9_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2_h" + top: "conv9_2_h" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "layer_256_1_bn1" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2_h" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2_h" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2_h" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2_h" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2_h" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2_h" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } +} + +layer { + name: "mbox_conf_reshape" + type: "Reshape" + bottom: "mbox_conf" + top: "mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: -1 + dim: 2 + } + } +} +layer { + name: "mbox_conf_softmax" + type: "Softmax" + bottom: "mbox_conf_reshape" + top: "mbox_conf_softmax" + softmax_param { + axis: 2 + } +} +layer { + name: "mbox_conf_flatten" + type: "Flatten" + bottom: "mbox_conf_softmax" + top: "mbox_conf_flatten" + flatten_param { + axis: 1 + } +} + +layer { + name: "detection_out" + type: "DetectionOutput" + bottom: "mbox_loc" + bottom: "mbox_conf_flatten" + bottom: "mbox_priorbox" + top: "detection_out" + include { + phase: TEST + } + detection_output_param { + num_classes: 2 + share_location: true + background_label_id: 0 + nms_param { + nms_threshold: 0.45 + top_k: 400 + } + code_type: CENTER_SIZE + keep_top_k: 200 + confidence_threshold: 0.01 + clip: 1 + } +} diff --git a/models/res10_300x300_ssd_iter_140000.caffemodel b/models/res10_300x300_ssd_iter_140000.caffemodel new file mode 100644 index 0000000..809dfd7 Binary files /dev/null and b/models/res10_300x300_ssd_iter_140000.caffemodel differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d0ef7bc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +Flask>=3.0.0 +opencv-python>=4.8.0 +Flask-SocketIO>=5.3.0 +numpy>=1.26.0 +setuptools>=65.0.0 diff --git a/static/css/style.css b/static/css/style.css new file mode 100644 index 0000000..50c1461 --- /dev/null +++ b/static/css/style.css @@ -0,0 +1,222 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + min-height: 100vh; + color: #333; +} + +.container { + max-width: 1400px; + margin: 0 auto; + padding: 20px; +} + +header { + text-align: center; + color: white; + margin-bottom: 30px; +} + +header h1 { + font-size: 2.5em; + margin-bottom: 10px; + text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3); +} + +.subtitle { + font-size: 1.1em; + opacity: 0.9; +} + +main { + background: white; + border-radius: 15px; + padding: 30px; + box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3); +} + +.video-section { + margin-bottom: 30px; +} + +.video-container { + position: relative; + background: #000; + border-radius: 10px; + overflow: hidden; + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); +} + +#videoStream { + width: 100%; + height: auto; + display: block; + max-height: 600px; + object-fit: contain; +} + +.status-indicator { + position: absolute; + top: 15px; + right: 15px; + width: 15px; + height: 15px; + border-radius: 50%; + background: #4caf50; + box-shadow: 0 0 10px rgba(76, 175, 80, 0.6); + animation: pulse 2s infinite; +} + +.status-indicator.warning { + background: #ff9800; + box-shadow: 0 0 10px rgba(255, 152, 0, 0.6); +} + +.status-indicator.danger { + background: #f44336; + box-shadow: 0 0 10px rgba(244, 67, 54, 0.6); +} + +@keyframes pulse { + 0%, 100% { + opacity: 1; + } + 50% { + opacity: 0.5; + } +} + +.stats-section { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); + gap: 20px; + margin-bottom: 30px; +} + +.stat-card { + background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); + border-radius: 10px; + padding: 25px; + text-align: center; + box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1); + transition: transform 0.3s ease, box-shadow 0.3s ease; +} + +.stat-card:hover { + transform: translateY(-5px); + box-shadow: 0 5px 20px rgba(0, 0, 0, 0.2); +} + +.stat-card.warning { + background: linear-gradient(135deg, #ffeaa7 0%, #fdcb6e 100%); +} + +.stat-card.danger { + background: linear-gradient(135deg, #fab1a0 0%, #e17055 100%); + color: white; +} + +.stat-label { + font-size: 0.9em; + color: #666; + margin-bottom: 10px; + text-transform: uppercase; + letter-spacing: 1px; + font-weight: 600; +} + +.stat-card.danger .stat-label { + color: rgba(255, 255, 255, 0.9); +} + +.stat-value { + font-size: 3em; + font-weight: bold; + color: #333; + margin-bottom: 5px; +} + +.stat-card.danger .stat-value { + color: white; +} + +.stat-entered { + color: #4caf50; +} + +.stat-exited { + color: #f44336; +} + +.stat-subtitle { + font-size: 0.85em; + color: #888; + margin-top: 5px; +} + +.stat-card.danger .stat-subtitle { + color: rgba(255, 255, 255, 0.9); +} + +.controls-section { + text-align: center; +} + +.reset-btn { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + border: none; + padding: 15px 40px; + font-size: 1.1em; + border-radius: 25px; + cursor: pointer; + transition: all 0.3s ease; + box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4); + font-weight: 600; +} + +.reset-btn:hover { + transform: translateY(-2px); + box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6); +} + +.reset-btn:active { + transform: translateY(0); +} + +footer { + text-align: center; + color: white; + margin-top: 30px; + opacity: 0.8; +} + +/* Responsive design */ +@media (max-width: 768px) { + header h1 { + font-size: 2em; + } + + .stat-value { + font-size: 2.5em; + } + + main { + padding: 20px; + } +} + +/* Animation for count updates */ +.stat-value { + transition: transform 0.2s ease; +} + +.stat-value.updated { + transform: scale(1.1); +} diff --git a/static/js/main.js b/static/js/main.js new file mode 100644 index 0000000..06333f0 --- /dev/null +++ b/static/js/main.js @@ -0,0 +1,133 @@ +// WebSocket connection and real-time UI updates +const socket = io(); + +// Default maximum occupancy threshold (can be configured) +const MAX_OCCUPANCY = 10; + +// DOM elements +const occupancyValue = document.getElementById('occupancyValue'); +const occupancyStatus = document.getElementById('occupancyStatus'); +const occupancyCard = document.getElementById('occupancyCard'); +const enteredValue = document.getElementById('enteredValue'); +const exitedValue = document.getElementById('exitedValue'); +const resetButton = document.getElementById('resetButton'); +const statusIndicator = document.getElementById('statusIndicator'); +const videoStream = document.getElementById('videoStream'); + +// Connection status +let isConnected = false; + +// WebSocket event handlers +socket.on('connect', () => { + console.log('Connected to server'); + isConnected = true; + updateConnectionStatus(true); +}); + +socket.on('disconnect', () => { + console.log('Disconnected from server'); + isConnected = false; + updateConnectionStatus(false); +}); + +// Handle count updates from server +socket.on('count_update', (data) => { + updateCounts(data); +}); + +// Handle reset confirmation +socket.on('reset_confirmation', (data) => { + console.log('Counts reset:', data); + // Counts will be updated via count_update event +}); + +// Update count displays +function updateCounts(counts) { + const { total_entered, total_exited, current_occupancy } = counts; + + // Update values with animation + updateValue(enteredValue, total_entered); + updateValue(exitedValue, total_exited); + updateValue(occupancyValue, current_occupancy); + + // Update occupancy status and styling + updateOccupancyStatus(current_occupancy); +} + +// Update a single value with animation +function updateValue(element, newValue) { + const oldValue = parseInt(element.textContent) || 0; + if (oldValue !== newValue) { + element.classList.add('updated'); + element.textContent = newValue; + setTimeout(() => { + element.classList.remove('updated'); + }, 200); + } +} + +// Update occupancy status based on current count +function updateOccupancyStatus(occupancy) { + // Remove all status classes + occupancyCard.classList.remove('warning', 'danger'); + statusIndicator.classList.remove('warning', 'danger'); + + // Update status text and styling + if (occupancy >= MAX_OCCUPANCY) { + occupancyStatus.textContent = 'OVER LIMIT'; + occupancyCard.classList.add('danger'); + statusIndicator.classList.add('danger'); + } else if (occupancy >= MAX_OCCUPANCY * 0.8) { + occupancyStatus.textContent = 'High'; + occupancyCard.classList.add('warning'); + statusIndicator.classList.add('warning'); + } else if (occupancy >= MAX_OCCUPANCY * 0.5) { + occupancyStatus.textContent = 'Moderate'; + occupancyCard.classList.remove('warning', 'danger'); + statusIndicator.classList.remove('warning', 'danger'); + } else { + occupancyStatus.textContent = 'Normal'; + occupancyCard.classList.remove('warning', 'danger'); + statusIndicator.classList.remove('warning', 'danger'); + } +} + +// Update connection status indicator +function updateConnectionStatus(connected) { + if (connected) { + statusIndicator.style.background = '#4caf50'; + statusIndicator.title = 'Connected'; + } else { + statusIndicator.style.background = '#f44336'; + statusIndicator.title = 'Disconnected'; + } +} + +// Handle reset button click +resetButton.addEventListener('click', () => { + if (confirm('Are you sure you want to reset all counts?')) { + socket.emit('reset_counts'); + } +}); + +// Handle video stream errors +videoStream.addEventListener('error', () => { + console.error('Error loading video stream'); + videoStream.src = ''; // Clear src to prevent repeated errors + // Optionally show an error message + const errorMsg = document.createElement('div'); + errorMsg.className = 'error-message'; + errorMsg.textContent = 'Unable to load video stream. Please check camera connection.'; + errorMsg.style.cssText = 'position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); color: white; background: rgba(244, 67, 54, 0.9); padding: 20px; border-radius: 10px;'; + document.querySelector('.video-container').appendChild(errorMsg); +}); + +// Periodic check for connection (fallback) +setInterval(() => { + if (!isConnected && socket.connected === false) { + console.log('Attempting to reconnect...'); + } +}, 5000); + +// Initialize +console.log('People Counter frontend initialized'); diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..a360575 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,55 @@ + + + + + + People Counter + + + +
+
+

People Counter

+

Real-time occupancy tracking

+
+ +
+
+
+ Video Stream +
+
+
+ +
+
+
Current Occupancy
+
0
+
Normal
+
+ +
+
Total Entered
+
0
+
+ +
+
Total Exited
+
0
+
+
+ +
+ +
+
+ +
+

People Counter System - Real-time face detection and tracking

+
+
+ + + + + diff --git a/zone_tracker.py b/zone_tracker.py new file mode 100644 index 0000000..78fae7a --- /dev/null +++ b/zone_tracker.py @@ -0,0 +1,218 @@ +""" +Zone-based Entry/Exit Tracker +Tracks people entering and exiting based on zone detection with cooldown mechanism. +""" + +import time +import cv2 +from collections import defaultdict + + +class ZoneTracker: + def __init__(self, frame_width, entry_zone_percent=0.4, exit_zone_percent=0.4, + cooldown_seconds=2.0, center_buffer_percent=0.1): + """ + Initialize the zone tracker. + + Args: + frame_width: Width of the video frame in pixels + entry_zone_percent: Percentage of frame width for entry zone (left side) + exit_zone_percent: Percentage of frame width for exit zone (right side) + cooldown_seconds: Time in seconds before same person can be counted again + center_buffer_percent: Percentage of center to ignore (prevents false counts) + """ + self.frame_width = frame_width + self.entry_zone_percent = entry_zone_percent + self.exit_zone_percent = exit_zone_percent + self.cooldown_seconds = cooldown_seconds + self.center_buffer_percent = center_buffer_percent + + # Calculate zone boundaries + self.entry_zone_end = int(frame_width * entry_zone_percent) + buffer_width = int(frame_width * center_buffer_percent) + self.center_start = int(frame_width / 2 - buffer_width / 2) + self.center_end = int(frame_width / 2 + buffer_width / 2) + self.exit_zone_start = int(frame_width * (1 - exit_zone_percent)) + + # Counters + self.total_entered = 0 + self.total_exited = 0 + + # Track faces with timestamps to prevent double-counting + # Key: face_id (centroid hash), Value: (zone, timestamp) + self.tracked_faces = {} + self.face_cooldowns = defaultdict(float) + + # Track last seen zone for each face (to detect zone transitions) + self.last_zone = {} + + def get_zone(self, face_x, face_w): + """ + Determine which zone a face is in based on its position. + + Args: + face_x: X coordinate of face (left edge) + face_w: Width of face bounding box + face_center: Center X of the face + + Returns: + 'entry' if in entry zone, 'exit' if in exit zone, 'center' if in buffer, None otherwise + """ + face_center = face_x + face_w // 2 + + # Check if in center buffer zone (ignore) + if self.center_start <= face_center <= self.center_end: + return 'center' + + # Check entry zone (left side) + if face_center < self.entry_zone_end: + return 'entry' + + # Check exit zone (right side) + if face_center > self.exit_zone_start: + return 'exit' + + # In the middle zone (between entry/exit and center buffer) + return None + + def _get_face_id(self, face_x, face_y, face_w, face_h): + """ + Generate a simple ID for a face based on its position and size. + This is a basic approach - in production, use proper tracking algorithms. + + Args: + face_x, face_y: Top-left coordinates + face_w, face_h: Width and height + + Returns: + A simple hash-like ID for tracking + """ + # Use approximate position and size to create a simple ID + # This helps group similar detections as the same person + grid_x = face_x // 50 + grid_y = face_y // 50 + size_category = (face_w + face_h) // 50 + return f"{grid_x}_{grid_y}_{size_category}" + + def process_faces(self, faces): + """ + Process detected faces and update entry/exit counts. + + Args: + faces: List of tuples (x, y, w, h, confidence) from face detector + + Returns: + Dictionary with updated counts and zone info + """ + current_time = time.time() + current_zones = {} + + # Process each detected face + for face in faces: + face_x, face_y, face_w, face_h, confidence = face + face_id = self._get_face_id(face_x, face_y, face_w, face_h) + zone = self.get_zone(face_x, face_w) + + if zone is None or zone == 'center': + continue + + current_zones[face_id] = zone + + # Check if this face is in cooldown + if face_id in self.face_cooldowns: + if current_time - self.face_cooldowns[face_id] < self.cooldown_seconds: + continue # Still in cooldown, skip + + # Check for zone transitions or first detection + if face_id not in self.last_zone: + # First time seeing this face - mark the zone + self.last_zone[face_id] = zone + self.tracked_faces[face_id] = (zone, current_time) + else: + # Face has been seen before - check for valid transition + last_zone = self.last_zone[face_id] + + # Only count if we have a clear zone assignment + # Entry: person appears in entry zone + # Exit: person appears in exit zone + if zone == 'entry' and last_zone != 'entry': + # Person entered + self.total_entered += 1 + self.face_cooldowns[face_id] = current_time + self.last_zone[face_id] = zone + elif zone == 'exit' and last_zone != 'exit': + # Person exited + self.total_exited += 1 + self.face_cooldowns[face_id] = current_time + self.last_zone[face_id] = zone + + # Clean up old tracking data for faces no longer detected + faces_to_remove = [] + for face_id in self.last_zone: + if face_id not in current_zones: + # Face no longer detected, but keep in memory for a bit + if face_id in self.tracked_faces: + last_seen = self.tracked_faces[face_id][1] + if current_time - last_seen > 5.0: # Remove after 5 seconds + faces_to_remove.append(face_id) + + for face_id in faces_to_remove: + if face_id in self.last_zone: + del self.last_zone[face_id] + if face_id in self.tracked_faces: + del self.tracked_faces[face_id] + if face_id in self.face_cooldowns: + del self.face_cooldowns[face_id] + + return { + 'total_entered': self.total_entered, + 'total_exited': self.total_exited, + 'current_occupancy': self.total_entered - self.total_exited, + 'zones': current_zones + } + + def get_counts(self): + """Get current count statistics.""" + return { + 'total_entered': self.total_entered, + 'total_exited': self.total_exited, + 'current_occupancy': self.total_entered - self.total_exited + } + + def reset_counts(self): + """Reset all counters and tracking data.""" + self.total_entered = 0 + self.total_exited = 0 + self.tracked_faces.clear() + self.face_cooldowns.clear() + self.last_zone.clear() + + def draw_zones(self, frame): + """ + Draw zone boundaries on the frame for visualization. + + Args: + frame: Frame to draw on + + Returns: + Frame with zone boundaries drawn + """ + result_frame = frame.copy() + h = frame.shape[0] + + # Draw entry zone (left, green) + cv2.rectangle(result_frame, (0, 0), (self.entry_zone_end, h), (0, 255, 0), 2) + cv2.putText(result_frame, "ENTRY", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) + + # Draw exit zone (right, red) + cv2.rectangle(result_frame, (self.exit_zone_start, 0), (self.frame_width, h), (0, 0, 255), 2) + cv2.putText(result_frame, "EXIT", (self.exit_zone_start + 10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) + + # Draw center buffer (yellow, semi-transparent) + overlay = result_frame.copy() + cv2.rectangle(overlay, (self.center_start, 0), (self.center_end, h), (0, 255, 255), -1) + cv2.addWeighted(overlay, 0.2, result_frame, 0.8, 0, result_frame) + + return result_frame