diff --git a/camera.py b/camera.py
index 7e04b7c..2b63cc9 100644
--- a/camera.py
+++ b/camera.py
@@ -6,7 +6,7 @@ Integrates face detection and zone tracking.
 import cv2
 import threading
 import time
-from face_detector import FaceDetector
+from person_detector import PersonDetector
 from zone_tracker import ZoneTracker
 
 
@@ -18,8 +18,8 @@ class Camera:
         
         Args:
             camera_index: Index of the USB camera (usually 0)
-            process_every_n_frames: Process face detection every N frames for performance
-            face_confidence: Confidence threshold for face detection
+            process_every_n_frames: Process detection every N frames for performance
+            face_confidence: Confidence threshold for person detection
             frame_width: Desired frame width
             frame_height: Desired frame height
         """
@@ -37,8 +37,8 @@ class Camera:
         self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
         self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)
         
-        # Initialize face detector and zone tracker
-        self.face_detector = FaceDetector(confidence_threshold=face_confidence)
+        # Initialize person detector and zone tracker
+        self.person_detector = PersonDetector(confidence_threshold=face_confidence)
         self.zone_tracker = None  # Will be initialized after first frame
         
         # Frame processing state
@@ -95,7 +95,7 @@ class Camera:
             with self.lock:
                 self.current_frame = frame.copy()
             
-            # Process face detection every N frames
+            # Process detection every N frames
             if self.frame_counter % self.process_every_n_frames == 0:
                 processed_frame, counts = self._process_frame(frame)
                 with self.lock:
@@ -104,7 +104,7 @@ class Camera:
     
     def _process_frame(self, frame):
         """
-        Process a single frame: detect faces, track zones, update counts.
+        Process a single frame: detect people, track zones, update counts.
         
         Args:
             frame: Input frame from camera
@@ -112,12 +112,12 @@ class Camera:
         Returns:
             Tuple of (processed_frame, counts_dict)
         """
-        # Detect faces
-        faces = self.face_detector.detect_faces(frame)
+        # Detect people
+        people = self.person_detector.detect_people(frame)
         
         # Track zones and update counts
         if self.zone_tracker:
-            counts = self.zone_tracker.process_faces(faces)
+            counts = self.zone_tracker.process_faces(people)
         else:
             counts = {
                 'total_entered': 0,
@@ -131,8 +131,8 @@ class Camera:
         else:
             processed_frame = frame.copy()
         
-        # Draw faces on frame
-        processed_frame = self.face_detector.draw_faces(processed_frame, faces)
+        # Draw people on frame
+        processed_frame = self.person_detector.draw_people(processed_frame, people)
         
         # Draw count information on frame
         text_y = 60
diff --git a/download_models.py b/download_models.py
index 6769fba..221abb3 100644
--- a/download_models.py
+++ b/download_models.py
@@ -1,77 +1,27 @@
 #!/usr/bin/env python3
 """
-Script to download OpenCV DNN face detection model files.
-Downloads the required prototxt and caffemodel files for face detection.
+Cleanup script for People Counter.
+Removes unused model files since we switched to HOG detector.
 """
 
-import os
-import urllib.request
+import shutil
 from pathlib import Path
 
 
-def download_file(url, destination):
-    """Download a file from URL to destination."""
-    print(f"Downloading {os.path.basename(destination)}...")
-    try:
-        urllib.request.urlretrieve(url, destination)
-        print(f"✓ Successfully downloaded {os.path.basename(destination)}")
-        return True
-    except Exception as e:
-        print(f"✗ Error downloading {os.path.basename(destination)}: {e}")
-        return False
-
-
 def main():
-    """Main function to download model files."""
-    # Create models directory if it doesn't exist
+    print("Cleaning up unused model files...")
+    
     models_dir = Path("models")
-    models_dir.mkdir(exist_ok=True)
-    
-    # Model file URLs
-    prototxt_url = "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt"
-    model_url = "https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel"
-    
-    # Destination paths
-    prototxt_path = models_dir / "deploy.prototxt"
-    model_path = models_dir / "res10_300x300_ssd_iter_140000.caffemodel"
-    
-    print("=" * 60)
-    print("OpenCV DNN Face Detection Model Downloader")
-    print("=" * 60)
-    print()
-    
-    # Check if files already exist
-    if prototxt_path.exists():
-        print(f"⚠ {prototxt_path.name} already exists. Skipping download.")
+    if models_dir.exists():
+        try:
+            shutil.rmtree(models_dir)
+            print("✓ Removed models directory")
+        except Exception as e:
+            print(f"✗ Failed to remove models directory: {e}")
     else:
-        success = download_file(prototxt_url, prototxt_path)
-        if not success:
-            print("\nAlternative: You can manually download deploy.prototxt from:")
-            print("https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/deploy.prototxt")
-            print()
-    
-    if model_path.exists():
-        print(f"⚠ {model_path.name} already exists. Skipping download.")
-    else:
-        success = download_file(model_url, model_path)
-        if not success:
-            print("\n⚠ Warning: The caffemodel file is large (~10MB) and may require manual download.")
-            print("Alternative download methods:")
-            print("1. Using wget:")
-            print(f"   wget -O {model_path} {model_url}")
-            print("2. Using curl:")
-            print(f"   curl -L -o {model_path} {model_url}")
-            print("3. Direct browser download:")
-            print(f"   {model_url}")
-            print()
-    
-    print()
-    print("=" * 60)
-    if prototxt_path.exists() and model_path.exists():
-        print("✓ All model files are ready!")
-    else:
-        print("⚠ Some files may be missing. Please check the files above.")
-    print("=" * 60)
+        print("✓ No models directory to remove")
+        
+    print("\nSystem ready for HOG-based person detection.")
 
 
 if __name__ == "__main__":
diff --git a/face_detector.py b/face_detector.py
deleted file mode 100644
index 24a11eb..0000000
--- a/face_detector.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-Face Detection Module using OpenCV DNN Face Detector
-Uses pre-trained models for accurate face detection.
-"""
-
-import cv2
-import numpy as np
-import os
-
-
-class FaceDetector:
-    def __init__(self, model_dir="models", confidence_threshold=0.5):
-        """
-        Initialize the face detector with OpenCV DNN models.
-        
-        Args:
-            model_dir: Directory containing the model files
-            confidence_threshold: Minimum confidence for face detection (0.0-1.0)
-        """
-        self.confidence_threshold = confidence_threshold
-        self.model_dir = model_dir
-        
-        # Paths to model files
-        self.prototxt_path = os.path.join(model_dir, "deploy.prototxt")
-        self.model_path = os.path.join(model_dir, "res10_300x300_ssd_iter_140000.caffemodel")
-        
-        # Load the DNN face detector
-        self.net = None
-        self._load_model()
-    
-    def _load_model(self):
-        """Load the OpenCV DNN face detection model."""
-        if not os.path.exists(self.prototxt_path):
-            raise FileNotFoundError(
-                f"Model prototxt file not found: {self.prototxt_path}\n"
-                "Please download the model files first."
-            )
-        if not os.path.exists(self.model_path):
-            raise FileNotFoundError(
-                f"Model weights file not found: {self.model_path}\n"
-                "Please download the model files first."
-            )
-        
-        self.net = cv2.dnn.readNetFromCaffe(self.prototxt_path, self.model_path)
-    
-    def detect_faces(self, frame):
-        """
-        Detect faces in a frame.
-        
-        Args:
-            frame: BGR image frame from OpenCV
-            
-        Returns:
-            List of tuples (x, y, w, h, confidence) for each detected face
-            where (x, y) is top-left corner, w and h are width and height
-        """
-        if self.net is None:
-            return []
-        
-        # Get frame dimensions
-        (h, w) = frame.shape[:2]
-        
-        # Create blob from frame (preprocessing for DNN)
-        blob = cv2.dnn.blobFromImage(
-            cv2.resize(frame, (300, 300)),
-            1.0,
-            (300, 300),
-            (104.0, 177.0, 123.0)
-        )
-        
-        # Pass blob through network
-        self.net.setInput(blob)
-        detections = self.net.forward()
-        
-        faces = []
-        
-        # Process detections
-        for i in range(0, detections.shape[2]):
-            confidence = detections[0, 0, i, 2]
-            
-            # Filter weak detections
-            if confidence > self.confidence_threshold:
-                # Get bounding box coordinates
-                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
-                (x1, y1, x2, y2) = box.astype("int")
-                
-                # Ensure coordinates are within frame bounds
-                x1 = max(0, x1)
-                y1 = max(0, y1)
-                x2 = min(w, x2)
-                y2 = min(h, y2)
-                
-                # Convert to (x, y, w, h) format
-                faces.append((x1, y1, x2 - x1, y2 - y1, confidence))
-        
-        return faces
-    
-    def draw_faces(self, frame, faces, color=(0, 255, 0), thickness=2):
-        """
-        Draw bounding boxes around detected faces.
-        
-        Args:
-            frame: Frame to draw on
-            faces: List of face detections from detect_faces()
-            color: BGR color tuple for bounding boxes
-            thickness: Line thickness
-            
-        Returns:
-            Frame with bounding boxes drawn
-        """
-        result_frame = frame.copy()
-        for (x, y, w, h, confidence) in faces:
-            cv2.rectangle(result_frame, (x, y), (x + w, y + h), color, thickness)
-            # Optionally draw confidence score
-            label = f"{confidence:.2f}"
-            cv2.putText(result_frame, label, (x, y - 10),
-                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
-        return result_frame
diff --git a/models/deploy.prototxt b/models/deploy.prototxt
deleted file mode 100644
index a128515..0000000
--- a/models/deploy.prototxt
+++ /dev/null
@@ -1,1790 +0,0 @@
-input: "data"
-input_shape {
-  dim: 1
-  dim: 3
-  dim: 300
-  dim: 300
-}
-
-layer {
-  name: "data_bn"
-  type: "BatchNorm"
-  bottom: "data"
-  top: "data_bn"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "data_scale"
-  type: "Scale"
-  bottom: "data_bn"
-  top: "data_bn"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "conv1_h"
-  type: "Convolution"
-  bottom: "data_bn"
-  top: "conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    pad: 3
-    kernel_size: 7
-    stride: 2
-    weight_filler {
-      type: "msra"
-      variance_norm: FAN_OUT
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "conv1_bn_h"
-  type: "BatchNorm"
-  bottom: "conv1_h"
-  top: "conv1_h"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "conv1_scale_h"
-  type: "Scale"
-  bottom: "conv1_h"
-  top: "conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "conv1_relu"
-  type: "ReLU"
-  bottom: "conv1_h"
-  top: "conv1_h"
-}
-layer {
-  name: "conv1_pool"
-  type: "Pooling"
-  bottom: "conv1_h"
-  top: "conv1_pool"
-  pooling_param {
-    kernel_size: 3
-    stride: 2
-  }
-}
-layer {
-  name: "layer_64_1_conv1_h"
-  type: "Convolution"
-  bottom: "conv1_pool"
-  top: "layer_64_1_conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_64_1_bn2_h"
-  type: "BatchNorm"
-  bottom: "layer_64_1_conv1_h"
-  top: "layer_64_1_conv1_h"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_64_1_scale2_h"
-  type: "Scale"
-  bottom: "layer_64_1_conv1_h"
-  top: "layer_64_1_conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_64_1_relu2"
-  type: "ReLU"
-  bottom: "layer_64_1_conv1_h"
-  top: "layer_64_1_conv1_h"
-}
-layer {
-  name: "layer_64_1_conv2_h"
-  type: "Convolution"
-  bottom: "layer_64_1_conv1_h"
-  top: "layer_64_1_conv2_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_64_1_sum"
-  type: "Eltwise"
-  bottom: "layer_64_1_conv2_h"
-  bottom: "conv1_pool"
-  top: "layer_64_1_sum"
-}
-layer {
-  name: "layer_128_1_bn1_h"
-  type: "BatchNorm"
-  bottom: "layer_64_1_sum"
-  top: "layer_128_1_bn1_h"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_128_1_scale1_h"
-  type: "Scale"
-  bottom: "layer_128_1_bn1_h"
-  top: "layer_128_1_bn1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_128_1_relu1"
-  type: "ReLU"
-  bottom: "layer_128_1_bn1_h"
-  top: "layer_128_1_bn1_h"
-}
-layer {
-  name: "layer_128_1_conv1_h"
-  type: "Convolution"
-  bottom: "layer_128_1_bn1_h"
-  top: "layer_128_1_conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_128_1_bn2"
-  type: "BatchNorm"
-  bottom: "layer_128_1_conv1_h"
-  top: "layer_128_1_conv1_h"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_128_1_scale2"
-  type: "Scale"
-  bottom: "layer_128_1_conv1_h"
-  top: "layer_128_1_conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_128_1_relu2"
-  type: "ReLU"
-  bottom: "layer_128_1_conv1_h"
-  top: "layer_128_1_conv1_h"
-}
-layer {
-  name: "layer_128_1_conv2"
-  type: "Convolution"
-  bottom: "layer_128_1_conv1_h"
-  top: "layer_128_1_conv2"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_128_1_conv_expand_h"
-  type: "Convolution"
-  bottom: "layer_128_1_bn1_h"
-  top: "layer_128_1_conv_expand_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_128_1_sum"
-  type: "Eltwise"
-  bottom: "layer_128_1_conv2"
-  bottom: "layer_128_1_conv_expand_h"
-  top: "layer_128_1_sum"
-}
-layer {
-  name: "layer_256_1_bn1"
-  type: "BatchNorm"
-  bottom: "layer_128_1_sum"
-  top: "layer_256_1_bn1"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_256_1_scale1"
-  type: "Scale"
-  bottom: "layer_256_1_bn1"
-  top: "layer_256_1_bn1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_256_1_relu1"
-  type: "ReLU"
-  bottom: "layer_256_1_bn1"
-  top: "layer_256_1_bn1"
-}
-layer {
-  name: "layer_256_1_conv1"
-  type: "Convolution"
-  bottom: "layer_256_1_bn1"
-  top: "layer_256_1_conv1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 256
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_256_1_bn2"
-  type: "BatchNorm"
-  bottom: "layer_256_1_conv1"
-  top: "layer_256_1_conv1"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_256_1_scale2"
-  type: "Scale"
-  bottom: "layer_256_1_conv1"
-  top: "layer_256_1_conv1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_256_1_relu2"
-  type: "ReLU"
-  bottom: "layer_256_1_conv1"
-  top: "layer_256_1_conv1"
-}
-layer {
-  name: "layer_256_1_conv2"
-  type: "Convolution"
-  bottom: "layer_256_1_conv1"
-  top: "layer_256_1_conv2"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 256
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_256_1_conv_expand"
-  type: "Convolution"
-  bottom: "layer_256_1_bn1"
-  top: "layer_256_1_conv_expand"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 256
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_256_1_sum"
-  type: "Eltwise"
-  bottom: "layer_256_1_conv2"
-  bottom: "layer_256_1_conv_expand"
-  top: "layer_256_1_sum"
-}
-layer {
-  name: "layer_512_1_bn1"
-  type: "BatchNorm"
-  bottom: "layer_256_1_sum"
-  top: "layer_512_1_bn1"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_512_1_scale1"
-  type: "Scale"
-  bottom: "layer_512_1_bn1"
-  top: "layer_512_1_bn1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_512_1_relu1"
-  type: "ReLU"
-  bottom: "layer_512_1_bn1"
-  top: "layer_512_1_bn1"
-}
-layer {
-  name: "layer_512_1_conv1_h"
-  type: "Convolution"
-  bottom: "layer_512_1_bn1"
-  top: "layer_512_1_conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1 # 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_512_1_bn2_h"
-  type: "BatchNorm"
-  bottom: "layer_512_1_conv1_h"
-  top: "layer_512_1_conv1_h"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "layer_512_1_scale2_h"
-  type: "Scale"
-  bottom: "layer_512_1_conv1_h"
-  top: "layer_512_1_conv1_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "layer_512_1_relu2"
-  type: "ReLU"
-  bottom: "layer_512_1_conv1_h"
-  top: "layer_512_1_conv1_h"
-}
-layer {
-  name: "layer_512_1_conv2_h"
-  type: "Convolution"
-  bottom: "layer_512_1_conv1_h"
-  top: "layer_512_1_conv2_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 256
-    bias_term: false
-    pad: 2 # 1
-    kernel_size: 3
-    stride: 1
-    dilation: 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_512_1_conv_expand_h"
-  type: "Convolution"
-  bottom: "layer_512_1_bn1"
-  top: "layer_512_1_conv_expand_h"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 256
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1 # 2
-    weight_filler {
-      type: "msra"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "layer_512_1_sum"
-  type: "Eltwise"
-  bottom: "layer_512_1_conv2_h"
-  bottom: "layer_512_1_conv_expand_h"
-  top: "layer_512_1_sum"
-}
-layer {
-  name: "last_bn_h"
-  type: "BatchNorm"
-  bottom: "layer_512_1_sum"
-  top: "layer_512_1_sum"
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-  param {
-    lr_mult: 0.0
-  }
-}
-layer {
-  name: "last_scale_h"
-  type: "Scale"
-  bottom: "layer_512_1_sum"
-  top: "layer_512_1_sum"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 1.0
-  }
-  scale_param {
-    bias_term: true
-  }
-}
-layer {
-  name: "last_relu"
-  type: "ReLU"
-  bottom: "layer_512_1_sum"
-  top: "fc7"
-}
-
-layer {
-  name: "conv6_1_h"
-  type: "Convolution"
-  bottom: "fc7"
-  top: "conv6_1_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv6_1_relu"
-  type: "ReLU"
-  bottom: "conv6_1_h"
-  top: "conv6_1_h"
-}
-layer {
-  name: "conv6_2_h"
-  type: "Convolution"
-  bottom: "conv6_1_h"
-  top: "conv6_2_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv6_2_relu"
-  type: "ReLU"
-  bottom: "conv6_2_h"
-  top: "conv6_2_h"
-}
-layer {
-  name: "conv7_1_h"
-  type: "Convolution"
-  bottom: "conv6_2_h"
-  top: "conv7_1_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv7_1_relu"
-  type: "ReLU"
-  bottom: "conv7_1_h"
-  top: "conv7_1_h"
-}
-layer {
-  name: "conv7_2_h"
-  type: "Convolution"
-  bottom: "conv7_1_h"
-  top: "conv7_2_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 1
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv7_2_relu"
-  type: "ReLU"
-  bottom: "conv7_2_h"
-  top: "conv7_2_h"
-}
-layer {
-  name: "conv8_1_h"
-  type: "Convolution"
-  bottom: "conv7_2_h"
-  top: "conv8_1_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv8_1_relu"
-  type: "ReLU"
-  bottom: "conv8_1_h"
-  top: "conv8_1_h"
-}
-layer {
-  name: "conv8_2_h"
-  type: "Convolution"
-  bottom: "conv8_1_h"
-  top: "conv8_2_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv8_2_relu"
-  type: "ReLU"
-  bottom: "conv8_2_h"
-  top: "conv8_2_h"
-}
-layer {
-  name: "conv9_1_h"
-  type: "Convolution"
-  bottom: "conv8_2_h"
-  top: "conv9_1_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 64
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv9_1_relu"
-  type: "ReLU"
-  bottom: "conv9_1_h"
-  top: "conv9_1_h"
-}
-layer {
-  name: "conv9_2_h"
-  type: "Convolution"
-  bottom: "conv9_1_h"
-  top: "conv9_2_h"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 128
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv9_2_relu"
-  type: "ReLU"
-  bottom: "conv9_2_h"
-  top: "conv9_2_h"
-}
-layer {
-  name: "conv4_3_norm"
-  type: "Normalize"
-  bottom: "layer_256_1_bn1"
-  top: "conv4_3_norm"
-  norm_param {
-    across_spatial: false
-    scale_filler {
-      type: "constant"
-      value: 20
-    }
-    channel_shared: false
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_loc"
-  type: "Convolution"
-  bottom: "conv4_3_norm"
-  top: "conv4_3_norm_mbox_loc"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 16
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_loc_perm"
-  type: "Permute"
-  bottom: "conv4_3_norm_mbox_loc"
-  top: "conv4_3_norm_mbox_loc_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_loc_flat"
-  type: "Flatten"
-  bottom: "conv4_3_norm_mbox_loc_perm"
-  top: "conv4_3_norm_mbox_loc_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_conf"
-  type: "Convolution"
-  bottom: "conv4_3_norm"
-  top: "conv4_3_norm_mbox_conf"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 8 # 84
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_conf_perm"
-  type: "Permute"
-  bottom: "conv4_3_norm_mbox_conf"
-  top: "conv4_3_norm_mbox_conf_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_conf_flat"
-  type: "Flatten"
-  bottom: "conv4_3_norm_mbox_conf_perm"
-  top: "conv4_3_norm_mbox_conf_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv4_3_norm_mbox_priorbox"
-  type: "PriorBox"
-  bottom: "conv4_3_norm"
-  bottom: "data"
-  top: "conv4_3_norm_mbox_priorbox"
-  prior_box_param {
-    min_size: 30.0
-    max_size: 60.0
-    aspect_ratio: 2
-    flip: true
-    clip: false
-    variance: 0.1
-    variance: 0.1
-    variance: 0.2
-    variance: 0.2
-    step: 8
-    offset: 0.5
-  }
-}
-layer {
-  name: "fc7_mbox_loc"
-  type: "Convolution"
-  bottom: "fc7"
-  top: "fc7_mbox_loc"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 24
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "fc7_mbox_loc_perm"
-  type: "Permute"
-  bottom: "fc7_mbox_loc"
-  top: "fc7_mbox_loc_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "fc7_mbox_loc_flat"
-  type: "Flatten"
-  bottom: "fc7_mbox_loc_perm"
-  top: "fc7_mbox_loc_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "fc7_mbox_conf"
-  type: "Convolution"
-  bottom: "fc7"
-  top: "fc7_mbox_conf"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 12 # 126
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "fc7_mbox_conf_perm"
-  type: "Permute"
-  bottom: "fc7_mbox_conf"
-  top: "fc7_mbox_conf_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "fc7_mbox_conf_flat"
-  type: "Flatten"
-  bottom: "fc7_mbox_conf_perm"
-  top: "fc7_mbox_conf_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "fc7_mbox_priorbox"
-  type: "PriorBox"
-  bottom: "fc7"
-  bottom: "data"
-  top: "fc7_mbox_priorbox"
-  prior_box_param {
-    min_size: 60.0
-    max_size: 111.0
-    aspect_ratio: 2
-    aspect_ratio: 3
-    flip: true
-    clip: false
-    variance: 0.1
-    variance: 0.1
-    variance: 0.2
-    variance: 0.2
-    step: 16
-    offset: 0.5
-  }
-}
-layer {
-  name: "conv6_2_mbox_loc"
-  type: "Convolution"
-  bottom: "conv6_2_h"
-  top: "conv6_2_mbox_loc"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 24
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv6_2_mbox_loc_perm"
-  type: "Permute"
-  bottom: "conv6_2_mbox_loc"
-  top: "conv6_2_mbox_loc_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv6_2_mbox_loc_flat"
-  type: "Flatten"
-  bottom: "conv6_2_mbox_loc_perm"
-  top: "conv6_2_mbox_loc_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv6_2_mbox_conf"
-  type: "Convolution"
-  bottom: "conv6_2_h"
-  top: "conv6_2_mbox_conf"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 12 # 126
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv6_2_mbox_conf_perm"
-  type: "Permute"
-  bottom: "conv6_2_mbox_conf"
-  top: "conv6_2_mbox_conf_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv6_2_mbox_conf_flat"
-  type: "Flatten"
-  bottom: "conv6_2_mbox_conf_perm"
-  top: "conv6_2_mbox_conf_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv6_2_mbox_priorbox"
-  type: "PriorBox"
-  bottom: "conv6_2_h"
-  bottom: "data"
-  top: "conv6_2_mbox_priorbox"
-  prior_box_param {
-    min_size: 111.0
-    max_size: 162.0
-    aspect_ratio: 2
-    aspect_ratio: 3
-    flip: true
-    clip: false
-    variance: 0.1
-    variance: 0.1
-    variance: 0.2
-    variance: 0.2
-    step: 32
-    offset: 0.5
-  }
-}
-layer {
-  name: "conv7_2_mbox_loc"
-  type: "Convolution"
-  bottom: "conv7_2_h"
-  top: "conv7_2_mbox_loc"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 24
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv7_2_mbox_loc_perm"
-  type: "Permute"
-  bottom: "conv7_2_mbox_loc"
-  top: "conv7_2_mbox_loc_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv7_2_mbox_loc_flat"
-  type: "Flatten"
-  bottom: "conv7_2_mbox_loc_perm"
-  top: "conv7_2_mbox_loc_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv7_2_mbox_conf"
-  type: "Convolution"
-  bottom: "conv7_2_h"
-  top: "conv7_2_mbox_conf"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 12 # 126
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv7_2_mbox_conf_perm"
-  type: "Permute"
-  bottom: "conv7_2_mbox_conf"
-  top: "conv7_2_mbox_conf_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv7_2_mbox_conf_flat"
-  type: "Flatten"
-  bottom: "conv7_2_mbox_conf_perm"
-  top: "conv7_2_mbox_conf_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv7_2_mbox_priorbox"
-  type: "PriorBox"
-  bottom: "conv7_2_h"
-  bottom: "data"
-  top: "conv7_2_mbox_priorbox"
-  prior_box_param {
-    min_size: 162.0
-    max_size: 213.0
-    aspect_ratio: 2
-    aspect_ratio: 3
-    flip: true
-    clip: false
-    variance: 0.1
-    variance: 0.1
-    variance: 0.2
-    variance: 0.2
-    step: 64
-    offset: 0.5
-  }
-}
-layer {
-  name: "conv8_2_mbox_loc"
-  type: "Convolution"
-  bottom: "conv8_2_h"
-  top: "conv8_2_mbox_loc"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 16
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv8_2_mbox_loc_perm"
-  type: "Permute"
-  bottom: "conv8_2_mbox_loc"
-  top: "conv8_2_mbox_loc_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv8_2_mbox_loc_flat"
-  type: "Flatten"
-  bottom: "conv8_2_mbox_loc_perm"
-  top: "conv8_2_mbox_loc_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv8_2_mbox_conf"
-  type: "Convolution"
-  bottom: "conv8_2_h"
-  top: "conv8_2_mbox_conf"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 8 # 84
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv8_2_mbox_conf_perm"
-  type: "Permute"
-  bottom: "conv8_2_mbox_conf"
-  top: "conv8_2_mbox_conf_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv8_2_mbox_conf_flat"
-  type: "Flatten"
-  bottom: "conv8_2_mbox_conf_perm"
-  top: "conv8_2_mbox_conf_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv8_2_mbox_priorbox"
-  type: "PriorBox"
-  bottom: "conv8_2_h"
-  bottom: "data"
-  top: "conv8_2_mbox_priorbox"
-  prior_box_param {
-    min_size: 213.0
-    max_size: 264.0
-    aspect_ratio: 2
-    flip: true
-    clip: false
-    variance: 0.1
-    variance: 0.1
-    variance: 0.2
-    variance: 0.2
-    step: 100
-    offset: 0.5
-  }
-}
-layer {
-  name: "conv9_2_mbox_loc"
-  type: "Convolution"
-  bottom: "conv9_2_h"
-  top: "conv9_2_mbox_loc"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 16
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv9_2_mbox_loc_perm"
-  type: "Permute"
-  bottom: "conv9_2_mbox_loc"
-  top: "conv9_2_mbox_loc_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv9_2_mbox_loc_flat"
-  type: "Flatten"
-  bottom: "conv9_2_mbox_loc_perm"
-  top: "conv9_2_mbox_loc_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv9_2_mbox_conf"
-  type: "Convolution"
-  bottom: "conv9_2_h"
-  top: "conv9_2_mbox_conf"
-  param {
-    lr_mult: 1
-    decay_mult: 1
-  }
-  param {
-    lr_mult: 2
-    decay_mult: 0
-  }
-  convolution_param {
-    num_output: 8 # 84
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0
-    }
-  }
-}
-layer {
-  name: "conv9_2_mbox_conf_perm"
-  type: "Permute"
-  bottom: "conv9_2_mbox_conf"
-  top: "conv9_2_mbox_conf_perm"
-  permute_param {
-    order: 0
-    order: 2
-    order: 3
-    order: 1
-  }
-}
-layer {
-  name: "conv9_2_mbox_conf_flat"
-  type: "Flatten"
-  bottom: "conv9_2_mbox_conf_perm"
-  top: "conv9_2_mbox_conf_flat"
-  flatten_param {
-    axis: 1
-  }
-}
-layer {
-  name: "conv9_2_mbox_priorbox"
-  type: "PriorBox"
-  bottom: "conv9_2_h"
-  bottom: "data"
-  top: "conv9_2_mbox_priorbox"
-  prior_box_param {
-    min_size: 264.0
-    max_size: 315.0
-    aspect_ratio: 2
-    flip: true
-    clip: false
-    variance: 0.1
-    variance: 0.1
-    variance: 0.2
-    variance: 0.2
-    step: 300
-    offset: 0.5
-  }
-}
-layer {
-  name: "mbox_loc"
-  type: "Concat"
-  bottom: "conv4_3_norm_mbox_loc_flat"
-  bottom: "fc7_mbox_loc_flat"
-  bottom: "conv6_2_mbox_loc_flat"
-  bottom: "conv7_2_mbox_loc_flat"
-  bottom: "conv8_2_mbox_loc_flat"
-  bottom: "conv9_2_mbox_loc_flat"
-  top: "mbox_loc"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mbox_conf"
-  type: "Concat"
-  bottom: "conv4_3_norm_mbox_conf_flat"
-  bottom: "fc7_mbox_conf_flat"
-  bottom: "conv6_2_mbox_conf_flat"
-  bottom: "conv7_2_mbox_conf_flat"
-  bottom: "conv8_2_mbox_conf_flat"
-  bottom: "conv9_2_mbox_conf_flat"
-  top: "mbox_conf"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mbox_priorbox"
-  type: "Concat"
-  bottom: "conv4_3_norm_mbox_priorbox"
-  bottom: "fc7_mbox_priorbox"
-  bottom: "conv6_2_mbox_priorbox"
-  bottom: "conv7_2_mbox_priorbox"
-  bottom: "conv8_2_mbox_priorbox"
-  bottom: "conv9_2_mbox_priorbox"
-  top: "mbox_priorbox"
-  concat_param {
-    axis: 2
-  }
-}
-
-layer {
-  name: "mbox_conf_reshape"
-  type: "Reshape"
-  bottom: "mbox_conf"
-  top: "mbox_conf_reshape"
-  reshape_param {
-    shape {
-      dim: 0
-      dim: -1
-      dim: 2
-    }
-  }
-}
-layer {
-  name: "mbox_conf_softmax"
-  type: "Softmax"
-  bottom: "mbox_conf_reshape"
-  top: "mbox_conf_softmax"
-  softmax_param {
-    axis: 2
-  }
-}
-layer {
-  name: "mbox_conf_flatten"
-  type: "Flatten"
-  bottom: "mbox_conf_softmax"
-  top: "mbox_conf_flatten"
-  flatten_param {
-    axis: 1
-  }
-}
-
-layer {
-  name: "detection_out"
-  type: "DetectionOutput"
-  bottom: "mbox_loc"
-  bottom: "mbox_conf_flatten"
-  bottom: "mbox_priorbox"
-  top: "detection_out"
-  include {
-    phase: TEST
-  }
-  detection_output_param {
-    num_classes: 2
-    share_location: true
-    background_label_id: 0
-    nms_param {
-      nms_threshold: 0.45
-      top_k: 400
-    }
-    code_type: CENTER_SIZE
-    keep_top_k: 200
-    confidence_threshold: 0.01
-    clip: 1
-  }
-}
diff --git a/models/res10_300x300_ssd_iter_140000.caffemodel b/models/res10_300x300_ssd_iter_140000.caffemodel
deleted file mode 100644
index 809dfd7..0000000
Binary files a/models/res10_300x300_ssd_iter_140000.caffemodel and /dev/null differ
diff --git a/person_detector.py b/person_detector.py
new file mode 100644
index 0000000..e4e7ecb
--- /dev/null
+++ b/person_detector.py
@@ -0,0 +1,129 @@
+"""
+Person Detection Module using OpenCV HOG (Histogram of Oriented Gradients).
+Uses built-in OpenCV people detector - no external model files required.
+"""
+
+import cv2
+import numpy as np
+
+
+class PersonDetector:
+    def __init__(self, model_dir=None, confidence_threshold=0.6):
+        """
+        Initialize the person detector with HOG descriptor.
+        
+        Args:
+            model_dir: Ignored for HOG (kept for API compatibility)
+            confidence_threshold: Threshold for detection weights
+        """
+        self.confidence_threshold = confidence_threshold
+        
+        # Initialize HOG descriptor/person detector
+        self.hog = cv2.HOGDescriptor()
+        self.hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+        print("Initialized HOG Person Detector")
+    
+    def detect_people(self, frame):
+        """
+        Detect people in a frame using HOG.
+        
+        Args:
+            frame: BGR image frame from OpenCV
+            
+        Returns:
+            List of tuples (x, y, w, h, confidence) for each detected person
+        """
+        # Resize for faster processing (optional, but HOG is computationally expensive)
+        # Using a slightly smaller scale can speed things up significantly
+        scale = 1.0
+        if frame.shape[1] > 640:
+            scale = 640 / frame.shape[1]
+            frame_small = cv2.resize(frame, None, fx=scale, fy=scale)
+        else:
+            frame_small = frame
+            
+        # Detect people
+        # winStride: step size in x and y
+        # padding: padding around the input
+        # scale: coefficient of the detection window increase
+        (rects, weights) = self.hog.detectMultiScale(
+            frame_small, 
+            winStride=(4, 4),
+            padding=(8, 8),
+            scale=1.05,
+            hitThreshold=0.0  # Default
+        )
+        
+        people = []
+        
+        # Convert detected rectangles to our format
+        for i, (x, y, w, h) in enumerate(rects):
+            confidence = weights[i]
+            
+            # HOG returns confidence scores, usually > 0.
+            # We can filter if needed.
+            check_conf = float(confidence) if isinstance(confidence, (float, np.float32, np.float64)) else float(confidence[0])
+            
+            if check_conf > self.confidence_threshold:
+                # Scale back up if we resized
+                if scale != 1.0:
+                    x = int(x / scale)
+                    y = int(y / scale)
+                    w = int(w / scale)
+                    h = int(h / scale)
+                
+                # Size filtering
+                # Ignore detections that are too small (noise) or too large (walls/windows)
+                # Assumes 640x480 or similar resolution
+                if w < 40 or w > 400 or h < 80 or h > 480:
+                    continue
+                
+                # Ensure coordinates are within frame bounds (simple clamp)
+                x = max(0, x)
+                y = max(0, y)
+                
+                people.append((x, y, w, h, check_conf))
+        
+        return people
+    
+    detect_faces = detect_people  # Alias for compatibility
+    
+    def draw_people(self, frame, people, color=(0, 255, 0), thickness=2):
+        """
+        Draw bounding boxes around detected people.
+        """
+        result_frame = frame.copy()
+        for (x, y, w, h, confidence) in people:
+            cv2.rectangle(result_frame, (x, y), (x + w, y + h), color, thickness)
+            
+            # Draw label
+            label = f"Person: {confidence:.2f}"
+            
+            # Get label size
+            (label_w, label_h), baseline = cv2.getTextSize(
+                label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
+            )
+            
+            # Draw background rectangle for label
+            cv2.rectangle(
+                result_frame, 
+                (x, y - label_h - 10), 
+                (x + label_w, y), 
+                color, 
+                -1
+            )
+            
+            # Draw text
+            cv2.putText(
+                result_frame, 
+                label, 
+                (x, y - 5),
+                cv2.FONT_HERSHEY_SIMPLEX, 
+                0.5, 
+                (0, 0, 0), 
+                1
+            )
+            
+        return result_frame
+    
+    draw_faces = draw_people  # Alias for compatibility
diff --git a/zone_tracker.py b/zone_tracker.py
index 82fa62c..89ca731 100644
--- a/zone_tracker.py
+++ b/zone_tracker.py
@@ -10,7 +10,7 @@ from collections import defaultdict
 
 class ZoneTracker:
     def __init__(self, frame_width, entry_zone_percent=0.4, exit_zone_percent=0.4, 
-                 cooldown_seconds=2.0, center_buffer_percent=0.1):
+                 cooldown_seconds=4.0, center_buffer_percent=0.1):
         """
         Initialize the zone tracker.
         
@@ -97,8 +97,8 @@ class ZoneTracker:
         Returns:
             face_id if matched, None if new face
         """
-        max_distance = 100  # Maximum pixel distance to consider it the same face
-        max_size_diff = 50  # Maximum size difference to consider it the same face
+        max_distance = 150  # Maximum pixel distance to consider it the same face
+        max_size_diff = 100  # Maximum size difference to consider it the same face
         
         for face_id, face_data in self.tracked_faces.items():
             # Skip if face hasn't been seen recently (within last 2 seconds)