田运杰 10 miesięcy temu
rodzic
commit
71b3842a4e

+ 30 - 0
ultralytics/solutions/__init__.py

@@ -0,0 +1,30 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from .ai_gym import AIGym
+from .analytics import Analytics
+from .distance_calculation import DistanceCalculation
+from .heatmap import Heatmap
+from .object_counter import ObjectCounter
+from .parking_management import ParkingManagement, ParkingPtsSelection
+from .queue_management import QueueManager
+from .region_counter import RegionCounter
+from .security_alarm import SecurityAlarm
+from .speed_estimation import SpeedEstimator
+from .streamlit_inference import Inference
+from .trackzone import TrackZone
+
+__all__ = (
+    "AIGym",
+    "DistanceCalculation",
+    "Heatmap",
+    "ObjectCounter",
+    "ParkingManagement",
+    "ParkingPtsSelection",
+    "QueueManager",
+    "SpeedEstimator",
+    "Analytics",
+    "Inference",
+    "RegionCounter",
+    "TrackZone",
+    "SecurityAlarm",
+)

+ 111 - 0
ultralytics/solutions/ai_gym.py

@@ -0,0 +1,111 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator
+
+
+class AIGym(BaseSolution):
+    """
+    A class to manage gym steps of people in a real-time video stream based on their poses.
+
+    This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
+    repetitions of exercises based on predefined angle thresholds for up and down positions.
+
+    Attributes:
+        count (List[int]): Repetition counts for each detected person.
+        angle (List[float]): Current angle of the tracked body part for each person.
+        stage (List[str]): Current exercise stage ('up', 'down', or '-') for each person.
+        initial_stage (str | None): Initial stage of the exercise.
+        up_angle (float): Angle threshold for considering the 'up' position of an exercise.
+        down_angle (float): Angle threshold for considering the 'down' position of an exercise.
+        kpts (List[int]): Indices of keypoints used for angle calculation.
+        annotator (Annotator): Object for drawing annotations on the image.
+
+    Methods:
+        monitor: Processes a frame to detect poses, calculate angles, and count repetitions.
+
+    Examples:
+        >>> gym = AIGym(model="yolov8n-pose.pt")
+        >>> image = cv2.imread("gym_scene.jpg")
+        >>> processed_image = gym.monitor(image)
+        >>> cv2.imshow("Processed Image", processed_image)
+        >>> cv2.waitKey(0)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes AIGym for workout monitoring using pose estimation and predefined angles."""
+        # Check if the model name ends with '-pose'
+        if "model" in kwargs and "-pose" not in kwargs["model"]:
+            kwargs["model"] = "yolo11n-pose.pt"
+        elif "model" not in kwargs:
+            kwargs["model"] = "yolo11n-pose.pt"
+
+        super().__init__(**kwargs)
+        self.count = []  # List for counts, necessary where there are multiple objects in frame
+        self.angle = []  # List for angle, necessary where there are multiple objects in frame
+        self.stage = []  # List for stage, necessary where there are multiple objects in frame
+
+        # Extract details from CFG single time for usage later
+        self.initial_stage = None
+        self.up_angle = float(self.CFG["up_angle"])  # Pose up predefined angle to consider up pose
+        self.down_angle = float(self.CFG["down_angle"])  # Pose down predefined angle to consider down pose
+        self.kpts = self.CFG["kpts"]  # User selected kpts of workouts storage for further usage
+
+    def monitor(self, im0):
+        """
+        Monitors workouts using Ultralytics YOLO Pose Model.
+
+        This function processes an input image to track and analyze human poses for workout monitoring. It uses
+        the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
+        angle thresholds.
+
+        Args:
+            im0 (ndarray): Input image for processing.
+
+        Returns:
+            (ndarray): Processed image with annotations for workout monitoring.
+
+        Examples:
+            >>> gym = AIGym()
+            >>> image = cv2.imread("workout.jpg")
+            >>> processed_image = gym.monitor(image)
+        """
+        # Extract tracks
+        tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)[0]
+
+        if tracks.boxes.id is not None:
+            # Extract and check keypoints
+            if len(tracks) > len(self.count):
+                new_human = len(tracks) - len(self.count)
+                self.angle += [0] * new_human
+                self.count += [0] * new_human
+                self.stage += ["-"] * new_human
+
+            # Initialize annotator
+            self.annotator = Annotator(im0, line_width=self.line_width)
+
+            # Enumerate over keypoints
+            for ind, k in enumerate(reversed(tracks.keypoints.data)):
+                # Get keypoints and estimate the angle
+                kpts = [k[int(self.kpts[i])].cpu() for i in range(3)]
+                self.angle[ind] = self.annotator.estimate_pose_angle(*kpts)
+                im0 = self.annotator.draw_specific_points(k, self.kpts, radius=self.line_width * 3)
+
+                # Determine stage and count logic based on angle thresholds
+                if self.angle[ind] < self.down_angle:
+                    if self.stage[ind] == "up":
+                        self.count[ind] += 1
+                    self.stage[ind] = "down"
+                elif self.angle[ind] > self.up_angle:
+                    self.stage[ind] = "up"
+
+                # Display angle, count, and stage text
+                self.annotator.plot_angle_and_count_and_stage(
+                    angle_text=self.angle[ind],  # angle text for display
+                    count_text=self.count[ind],  # count text for workouts
+                    stage_text=self.stage[ind],  # stage position text
+                    center_kpt=k[int(self.kpts[1])],  # center keypoint for display
+                )
+
+        self.display_output(im0)  # Display output image, if environment support display
+        return im0  # return an image for writing or further usage

+ 247 - 0
ultralytics/solutions/analytics.py

@@ -0,0 +1,247 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from itertools import cycle
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from matplotlib.figure import Figure
+
+from ultralytics.solutions.solutions import BaseSolution  # Import a parent class
+
+
+class Analytics(BaseSolution):
+    """
+    A class for creating and updating various types of charts for visual analytics.
+
+    This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts
+    based on object detection and tracking data.
+
+    Attributes:
+        type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area').
+        x_label (str): Label for the x-axis.
+        y_label (str): Label for the y-axis.
+        bg_color (str): Background color of the chart frame.
+        fg_color (str): Foreground color of the chart frame.
+        title (str): Title of the chart window.
+        max_points (int): Maximum number of data points to display on the chart.
+        fontsize (int): Font size for text display.
+        color_cycle (cycle): Cyclic iterator for chart colors.
+        total_counts (int): Total count of detected objects (used for line charts).
+        clswise_count (Dict[str, int]): Dictionary for class-wise object counts.
+        fig (Figure): Matplotlib figure object for the chart.
+        ax (Axes): Matplotlib axes object for the chart.
+        canvas (FigureCanvas): Canvas for rendering the chart.
+
+    Methods:
+        process_data: Processes image data and updates the chart.
+        update_graph: Updates the chart with new data points.
+
+    Examples:
+        >>> analytics = Analytics(analytics_type="line")
+        >>> frame = cv2.imread("image.jpg")
+        >>> processed_frame = analytics.process_data(frame, frame_number=1)
+        >>> cv2.imshow("Analytics", processed_frame)
+    """
+
+    def __init__(self, **kwargs):
+        """Initialize Analytics class with various chart types for visual data representation."""
+        super().__init__(**kwargs)
+
+        self.type = self.CFG["analytics_type"]  # extract type of analytics
+        self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
+        self.y_label = "Total Counts"
+
+        # Predefined data
+        self.bg_color = "#F3F3F3"  # background color of frame
+        self.fg_color = "#111E68"  # foreground color of frame
+        self.title = "Ultralytics Solutions"  # window name
+        self.max_points = 45  # maximum points to be drawn on window
+        self.fontsize = 25  # text font size for display
+        figsize = (19.2, 10.8)  # Set output image size 1920 * 1080
+        self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
+
+        self.total_counts = 0  # count variable for storing total counts i.e. for line
+        self.clswise_count = {}  # dictionary for class-wise counts
+
+        # Ensure line and area chart
+        if self.type in {"line", "area"}:
+            self.lines = {}
+            self.fig = Figure(facecolor=self.bg_color, figsize=figsize)
+            self.canvas = FigureCanvas(self.fig)  # Set common axis properties
+            self.ax = self.fig.add_subplot(111, facecolor=self.bg_color)
+            if self.type == "line":
+                (self.line,) = self.ax.plot([], [], color="cyan", linewidth=self.line_width)
+        elif self.type in {"bar", "pie"}:
+            # Initialize bar or pie plot
+            self.fig, self.ax = plt.subplots(figsize=figsize, facecolor=self.bg_color)
+            self.canvas = FigureCanvas(self.fig)  # Set common axis properties
+            self.ax.set_facecolor(self.bg_color)
+            self.color_mapping = {}
+
+            if self.type == "pie":  # Ensure pie chart is circular
+                self.ax.axis("equal")
+
+    def process_data(self, im0, frame_number):
+        """
+        Processes image data and runs object tracking to update analytics charts.
+
+        Args:
+            im0 (np.ndarray): Input image for processing.
+            frame_number (int): Video frame number for plotting the data.
+
+        Returns:
+            (np.ndarray): Processed image with updated analytics chart.
+
+        Raises:
+            ModuleNotFoundError: If an unsupported chart type is specified.
+
+        Examples:
+            >>> analytics = Analytics(analytics_type="line")
+            >>> frame = np.zeros((480, 640, 3), dtype=np.uint8)
+            >>> processed_frame = analytics.process_data(frame, frame_number=1)
+        """
+        self.extract_tracks(im0)  # Extract tracks
+
+        if self.type == "line":
+            for _ in self.boxes:
+                self.total_counts += 1
+            im0 = self.update_graph(frame_number=frame_number)
+            self.total_counts = 0
+        elif self.type in {"pie", "bar", "area"}:
+            self.clswise_count = {}
+            for box, cls in zip(self.boxes, self.clss):
+                if self.names[int(cls)] in self.clswise_count:
+                    self.clswise_count[self.names[int(cls)]] += 1
+                else:
+                    self.clswise_count[self.names[int(cls)]] = 1
+            im0 = self.update_graph(frame_number=frame_number, count_dict=self.clswise_count, plot=self.type)
+        else:
+            raise ModuleNotFoundError(f"{self.type} chart is not supported ❌")
+        return im0
+
+    def update_graph(self, frame_number, count_dict=None, plot="line"):
+        """
+        Updates the graph with new data for single or multiple classes.
+
+        Args:
+            frame_number (int): The current frame number.
+            count_dict (Dict[str, int] | None): Dictionary with class names as keys and counts as values for multiple
+                classes. If None, updates a single line graph.
+            plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
+
+        Returns:
+            (np.ndarray): Updated image containing the graph.
+
+        Examples:
+            >>> analytics = Analytics()
+            >>> frame_number = 10
+            >>> count_dict = {"person": 5, "car": 3}
+            >>> updated_image = analytics.update_graph(frame_number, count_dict, plot="bar")
+        """
+        if count_dict is None:
+            # Single line update
+            x_data = np.append(self.line.get_xdata(), float(frame_number))
+            y_data = np.append(self.line.get_ydata(), float(self.total_counts))
+
+            if len(x_data) > self.max_points:
+                x_data, y_data = x_data[-self.max_points :], y_data[-self.max_points :]
+
+            self.line.set_data(x_data, y_data)
+            self.line.set_label("Counts")
+            self.line.set_color("#7b0068")  # Pink color
+            self.line.set_marker("*")
+            self.line.set_markersize(self.line_width * 5)
+        else:
+            labels = list(count_dict.keys())
+            counts = list(count_dict.values())
+            if plot == "area":
+                color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
+                # Multiple lines or area update
+                x_data = self.ax.lines[0].get_xdata() if self.ax.lines else np.array([])
+                y_data_dict = {key: np.array([]) for key in count_dict.keys()}
+                if self.ax.lines:
+                    for line, key in zip(self.ax.lines, count_dict.keys()):
+                        y_data_dict[key] = line.get_ydata()
+
+                x_data = np.append(x_data, float(frame_number))
+                max_length = len(x_data)
+                for key in count_dict.keys():
+                    y_data_dict[key] = np.append(y_data_dict[key], float(count_dict[key]))
+                    if len(y_data_dict[key]) < max_length:
+                        y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])))
+                if len(x_data) > self.max_points:
+                    x_data = x_data[1:]
+                    for key in count_dict.keys():
+                        y_data_dict[key] = y_data_dict[key][1:]
+
+                self.ax.clear()
+                for key, y_data in y_data_dict.items():
+                    color = next(color_cycle)
+                    self.ax.fill_between(x_data, y_data, color=color, alpha=0.7)
+                    self.ax.plot(
+                        x_data,
+                        y_data,
+                        color=color,
+                        linewidth=self.line_width,
+                        marker="o",
+                        markersize=self.line_width * 5,
+                        label=f"{key} Data Points",
+                    )
+            if plot == "bar":
+                self.ax.clear()  # clear bar data
+                for label in labels:  # Map labels to colors
+                    if label not in self.color_mapping:
+                        self.color_mapping[label] = next(self.color_cycle)
+                colors = [self.color_mapping[label] for label in labels]
+                bars = self.ax.bar(labels, counts, color=colors)
+                for bar, count in zip(bars, counts):
+                    self.ax.text(
+                        bar.get_x() + bar.get_width() / 2,
+                        bar.get_height(),
+                        str(count),
+                        ha="center",
+                        va="bottom",
+                        color=self.fg_color,
+                    )
+                # Create the legend using labels from the bars
+                for bar, label in zip(bars, labels):
+                    bar.set_label(label)  # Assign label to each bar
+                self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
+            if plot == "pie":
+                total = sum(counts)
+                percentages = [size / total * 100 for size in counts]
+                start_angle = 90
+                self.ax.clear()
+
+                # Create pie chart and create legend labels with percentages
+                wedges, autotexts = self.ax.pie(
+                    counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None
+                )
+                legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)]
+
+                # Assign the legend using the wedges and manually created labels
+                self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
+                self.fig.subplots_adjust(left=0.1, right=0.75)  # Adjust layout to fit the legend
+
+        # Common plot settings
+        self.ax.set_facecolor("#f0f0f0")  # Set to light gray or any other color you like
+        self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize)
+        self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3)
+        self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3)
+
+        # Add and format legend
+        legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.bg_color)
+        for text in legend.get_texts():
+            text.set_color(self.fg_color)
+
+        # Redraw graph, update view, capture, and display the updated plot
+        self.ax.relim()
+        self.ax.autoscale_view()
+        self.canvas.draw()
+        im0 = np.array(self.canvas.renderer.buffer_rgba())
+        im0 = cv2.cvtColor(im0[:, :, :3], cv2.COLOR_RGBA2BGR)
+        self.display_output(im0)
+
+        return im0  # Return the image

+ 124 - 0
ultralytics/solutions/distance_calculation.py

@@ -0,0 +1,124 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import math
+
+import cv2
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class DistanceCalculation(BaseSolution):
+    """
+    A class to calculate distance between two objects in a real-time video stream based on their tracks.
+
+    This class extends BaseSolution to provide functionality for selecting objects and calculating the distance
+    between them in a video stream using YOLO object detection and tracking.
+
+    Attributes:
+        left_mouse_count (int): Counter for left mouse button clicks.
+        selected_boxes (Dict[int, List[float]]): Dictionary to store selected bounding boxes and their track IDs.
+        annotator (Annotator): An instance of the Annotator class for drawing on the image.
+        boxes (List[List[float]]): List of bounding boxes for detected objects.
+        track_ids (List[int]): List of track IDs for detected objects.
+        clss (List[int]): List of class indices for detected objects.
+        names (List[str]): List of class names that the model can detect.
+        centroids (List[List[int]]): List to store centroids of selected bounding boxes.
+
+    Methods:
+        mouse_event_for_distance: Handles mouse events for selecting objects in the video stream.
+        calculate: Processes video frames and calculates the distance between selected objects.
+
+    Examples:
+        >>> distance_calc = DistanceCalculation()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = distance_calc.calculate(frame)
+        >>> cv2.imshow("Distance Calculation", processed_frame)
+        >>> cv2.waitKey(0)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the DistanceCalculation class for measuring object distances in video streams."""
+        super().__init__(**kwargs)
+
+        # Mouse event information
+        self.left_mouse_count = 0
+        self.selected_boxes = {}
+
+        self.centroids = []  # Initialize empty list to store centroids
+
+    def mouse_event_for_distance(self, event, x, y, flags, param):
+        """
+        Handles mouse events to select regions in a real-time video stream for distance calculation.
+
+        Args:
+            event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN).
+            x (int): X-coordinate of the mouse pointer.
+            y (int): Y-coordinate of the mouse pointer.
+            flags (int): Flags associated with the event (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY).
+            param (Dict): Additional parameters passed to the function.
+
+        Examples:
+            >>> # Assuming 'dc' is an instance of DistanceCalculation
+            >>> cv2.setMouseCallback("window_name", dc.mouse_event_for_distance)
+        """
+        if event == cv2.EVENT_LBUTTONDOWN:
+            self.left_mouse_count += 1
+            if self.left_mouse_count <= 2:
+                for box, track_id in zip(self.boxes, self.track_ids):
+                    if box[0] < x < box[2] and box[1] < y < box[3] and track_id not in self.selected_boxes:
+                        self.selected_boxes[track_id] = box
+
+        elif event == cv2.EVENT_RBUTTONDOWN:
+            self.selected_boxes = {}
+            self.left_mouse_count = 0
+
+    def calculate(self, im0):
+        """
+        Processes a video frame and calculates the distance between two selected bounding boxes.
+
+        This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance
+        between two user-selected objects if they have been chosen.
+
+        Args:
+            im0 (numpy.ndarray): The input image frame to process.
+
+        Returns:
+            (numpy.ndarray): The processed image frame with annotations and distance calculations.
+
+        Examples:
+            >>> import numpy as np
+            >>> from ultralytics.solutions import DistanceCalculation
+            >>> dc = DistanceCalculation()
+            >>> frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
+            >>> processed_frame = dc.calculate(frame)
+        """
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks
+
+        # Iterate over bounding boxes, track ids and classes index
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            self.annotator.box_label(box, color=colors(int(cls), True), label=self.names[int(cls)])
+
+            if len(self.selected_boxes) == 2:
+                for trk_id in self.selected_boxes.keys():
+                    if trk_id == track_id:
+                        self.selected_boxes[track_id] = box
+
+        if len(self.selected_boxes) == 2:
+            # Store user selected boxes in centroids list
+            self.centroids.extend(
+                [[int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)] for box in self.selected_boxes.values()]
+            )
+            # Calculate pixels distance
+            pixels_distance = math.sqrt(
+                (self.centroids[0][0] - self.centroids[1][0]) ** 2 + (self.centroids[0][1] - self.centroids[1][1]) ** 2
+            )
+            self.annotator.plot_distance_and_line(pixels_distance, self.centroids)
+
+        self.centroids = []
+
+        self.display_output(im0)  # display output with base class function
+        cv2.setMouseCallback("Ultralytics Solutions", self.mouse_event_for_distance)
+
+        return im0  # return output image for more usage

+ 127 - 0
ultralytics/solutions/heatmap.py

@@ -0,0 +1,127 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.object_counter import ObjectCounter
+from ultralytics.utils.plotting import Annotator
+
+
+class Heatmap(ObjectCounter):
+    """
+    A class to draw heatmaps in real-time video streams based on object tracks.
+
+    This class extends the ObjectCounter class to generate and visualize heatmaps of object movements in video
+    streams. It uses tracked object positions to create a cumulative heatmap effect over time.
+
+    Attributes:
+        initialized (bool): Flag indicating whether the heatmap has been initialized.
+        colormap (int): OpenCV colormap used for heatmap visualization.
+        heatmap (np.ndarray): Array storing the cumulative heatmap data.
+        annotator (Annotator): Object for drawing annotations on the image.
+
+    Methods:
+        heatmap_effect: Calculates and updates the heatmap effect for a given bounding box.
+        generate_heatmap: Generates and applies the heatmap effect to each frame.
+
+    Examples:
+        >>> from ultralytics.solutions import Heatmap
+        >>> heatmap = Heatmap(model="yolov8n.pt", colormap=cv2.COLORMAP_JET)
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = heatmap.generate_heatmap(frame)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the Heatmap class for real-time video stream heatmap generation based on object tracks."""
+        super().__init__(**kwargs)
+
+        self.initialized = False  # bool variable for heatmap initialization
+        if self.region is not None:  # check if user provided the region coordinates
+            self.initialize_region()
+
+        # store colormap
+        self.colormap = cv2.COLORMAP_PARULA if self.CFG["colormap"] is None else self.CFG["colormap"]
+        self.heatmap = None
+
+    def heatmap_effect(self, box):
+        """
+        Efficiently calculates heatmap area and effect location for applying colormap.
+
+        Args:
+            box (List[float]): Bounding box coordinates [x0, y0, x1, y1].
+
+        Examples:
+            >>> heatmap = Heatmap()
+            >>> box = [100, 100, 200, 200]
+            >>> heatmap.heatmap_effect(box)
+        """
+        x0, y0, x1, y1 = map(int, box)
+        radius_squared = (min(x1 - x0, y1 - y0) // 2) ** 2
+
+        # Create a meshgrid with region of interest (ROI) for vectorized distance calculations
+        xv, yv = np.meshgrid(np.arange(x0, x1), np.arange(y0, y1))
+
+        # Calculate squared distances from the center
+        dist_squared = (xv - ((x0 + x1) // 2)) ** 2 + (yv - ((y0 + y1) // 2)) ** 2
+
+        # Create a mask of points within the radius
+        within_radius = dist_squared <= radius_squared
+
+        # Update only the values within the bounding box in a single vectorized operation
+        self.heatmap[y0:y1, x0:x1][within_radius] += 2
+
+    def generate_heatmap(self, im0):
+        """
+        Generate heatmap for each frame using Ultralytics.
+
+        Args:
+            im0 (np.ndarray): Input image array for processing.
+
+        Returns:
+            (np.ndarray): Processed image with heatmap overlay and object counts (if region is specified).
+
+        Examples:
+            >>> heatmap = Heatmap()
+            >>> im0 = cv2.imread("image.jpg")
+            >>> result = heatmap.generate_heatmap(im0)
+        """
+        if not self.initialized:
+            self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99
+        self.initialized = True  # Initialize heatmap only once
+
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks
+
+        # Iterate over bounding boxes, track ids and classes index
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            # Draw bounding box and counting region
+            self.heatmap_effect(box)
+
+            if self.region is not None:
+                self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2)
+                self.store_tracking_history(track_id, box)  # Store track history
+                self.store_classwise_counts(cls)  # store classwise counts in dict
+                current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+                # Store tracking previous position and perform object counting
+                prev_position = None
+                if len(self.track_history[track_id]) > 1:
+                    prev_position = self.track_history[track_id][-2]
+                self.count_objects(current_centroid, track_id, prev_position, cls)  # Perform object counting
+
+        if self.region is not None:
+            self.display_counts(im0)  # Display the counts on the frame
+
+        # Normalize, apply colormap to heatmap and combine with original image
+        if self.track_data.id is not None:
+            im0 = cv2.addWeighted(
+                im0,
+                0.5,
+                cv2.applyColorMap(
+                    cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8), self.colormap
+                ),
+                0.5,
+                0,
+            )
+
+        self.display_output(im0)  # display output with base class function
+        return im0  # return output image for more usage

+ 203 - 0
ultralytics/solutions/object_counter.py

@@ -0,0 +1,203 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class ObjectCounter(BaseSolution):
+    """
+    A class to manage the counting of objects in a real-time video stream based on their tracks.
+
+    This class extends the BaseSolution class and provides functionality for counting objects moving in and out of a
+    specified region in a video stream. It supports both polygonal and linear regions for counting.
+
+    Attributes:
+        in_count (int): Counter for objects moving inward.
+        out_count (int): Counter for objects moving outward.
+        counted_ids (List[int]): List of IDs of objects that have been counted.
+        classwise_counts (Dict[str, Dict[str, int]]): Dictionary for counts, categorized by object class.
+        region_initialized (bool): Flag indicating whether the counting region has been initialized.
+        show_in (bool): Flag to control display of inward count.
+        show_out (bool): Flag to control display of outward count.
+
+    Methods:
+        count_objects: Counts objects within a polygonal or linear region.
+        store_classwise_counts: Initializes class-wise counts if not already present.
+        display_counts: Displays object counts on the frame.
+        count: Processes input data (frames or object tracks) and updates counts.
+
+    Examples:
+        >>> counter = ObjectCounter()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = counter.count(frame)
+        >>> print(f"Inward count: {counter.in_count}, Outward count: {counter.out_count}")
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the ObjectCounter class for real-time object counting in video streams."""
+        super().__init__(**kwargs)
+
+        self.in_count = 0  # Counter for objects moving inward
+        self.out_count = 0  # Counter for objects moving outward
+        self.counted_ids = []  # List of IDs of objects that have been counted
+        self.classwise_counts = {}  # Dictionary for counts, categorized by object class
+        self.region_initialized = False  # Bool variable for region initialization
+
+        self.show_in = self.CFG["show_in"]
+        self.show_out = self.CFG["show_out"]
+
+    def count_objects(self, current_centroid, track_id, prev_position, cls):
+        """
+        Counts objects within a polygonal or linear region based on their tracks.
+
+        Args:
+            current_centroid (Tuple[float, float]): Current centroid values in the current frame.
+            track_id (int): Unique identifier for the tracked object.
+            prev_position (Tuple[float, float]): Last frame position coordinates (x, y) of the track.
+            cls (int): Class index for classwise count updates.
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> track_line = {1: [100, 200], 2: [110, 210], 3: [120, 220]}
+            >>> box = [130, 230, 150, 250]
+            >>> track_id = 1
+            >>> prev_position = (120, 220)
+            >>> cls = 0
+            >>> counter.count_objects(current_centroid, track_id, prev_position, cls)
+        """
+        if prev_position is None or track_id in self.counted_ids:
+            return
+
+        if len(self.region) == 2:  # Linear region (defined as a line segment)
+            line = self.LineString(self.region)  # Check if the line intersects the trajectory of the object
+            if line.intersects(self.LineString([prev_position, current_centroid])):
+                # Determine orientation of the region (vertical or horizontal)
+                if abs(self.region[0][0] - self.region[1][0]) < abs(self.region[0][1] - self.region[1][1]):
+                    # Vertical region: Compare x-coordinates to determine direction
+                    if current_centroid[0] > prev_position[0]:  # Moving right
+                        self.in_count += 1
+                        self.classwise_counts[self.names[cls]]["IN"] += 1
+                    else:  # Moving left
+                        self.out_count += 1
+                        self.classwise_counts[self.names[cls]]["OUT"] += 1
+                # Horizontal region: Compare y-coordinates to determine direction
+                elif current_centroid[1] > prev_position[1]:  # Moving downward
+                    self.in_count += 1
+                    self.classwise_counts[self.names[cls]]["IN"] += 1
+                else:  # Moving upward
+                    self.out_count += 1
+                    self.classwise_counts[self.names[cls]]["OUT"] += 1
+                self.counted_ids.append(track_id)
+
+        elif len(self.region) > 2:  # Polygonal region
+            polygon = self.Polygon(self.region)
+            if polygon.contains(self.Point(current_centroid)):
+                # Determine motion direction for vertical or horizontal polygons
+                region_width = max(p[0] for p in self.region) - min(p[0] for p in self.region)
+                region_height = max(p[1] for p in self.region) - min(p[1] for p in self.region)
+
+                if (
+                    region_width < region_height
+                    and current_centroid[0] > prev_position[0]
+                    or region_width >= region_height
+                    and current_centroid[1] > prev_position[1]
+                ):  # Moving right
+                    self.in_count += 1
+                    self.classwise_counts[self.names[cls]]["IN"] += 1
+                else:  # Moving left
+                    self.out_count += 1
+                    self.classwise_counts[self.names[cls]]["OUT"] += 1
+                self.counted_ids.append(track_id)
+
+    def store_classwise_counts(self, cls):
+        """
+        Initialize class-wise counts for a specific object class if not already present.
+
+        Args:
+            cls (int): Class index for classwise count updates.
+
+        This method ensures that the 'classwise_counts' dictionary contains an entry for the specified class,
+        initializing 'IN' and 'OUT' counts to zero if the class is not already present.
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> counter.store_classwise_counts(0)  # Initialize counts for class index 0
+            >>> print(counter.classwise_counts)
+            {'person': {'IN': 0, 'OUT': 0}}
+        """
+        if self.names[cls] not in self.classwise_counts:
+            self.classwise_counts[self.names[cls]] = {"IN": 0, "OUT": 0}
+
+    def display_counts(self, im0):
+        """
+        Displays object counts on the input image or frame.
+
+        Args:
+            im0 (numpy.ndarray): The input image or frame to display counts on.
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> frame = cv2.imread("image.jpg")
+            >>> counter.display_counts(frame)
+        """
+        labels_dict = {
+            str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
+            f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
+            for key, value in self.classwise_counts.items()
+            if value["IN"] != 0 or value["OUT"] != 0
+        }
+
+        if labels_dict:
+            self.annotator.display_analytics(im0, labels_dict, (104, 31, 17), (255, 255, 255), 10)
+
+    def count(self, im0):
+        """
+        Processes input data (frames or object tracks) and updates object counts.
+
+        This method initializes the counting region, extracts tracks, draws bounding boxes and regions, updates
+        object counts, and displays the results on the input image.
+
+        Args:
+            im0 (numpy.ndarray): The input image or frame to be processed.
+
+        Returns:
+            (numpy.ndarray): The processed image with annotations and count information.
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> processed_frame = counter.count(frame)
+        """
+        if not self.region_initialized:
+            self.initialize_region()
+            self.region_initialized = True
+
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks
+
+        self.annotator.draw_region(
+            reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2
+        )  # Draw region
+
+        # Iterate over bounding boxes, track ids and classes index
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            # Draw bounding box and counting region
+            self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+            self.store_tracking_history(track_id, box)  # Store track history
+            self.store_classwise_counts(cls)  # store classwise counts in dict
+
+            # Draw tracks of objects
+            self.annotator.draw_centroid_and_tracks(
+                self.track_line, color=colors(int(cls), True), track_thickness=self.line_width
+            )
+            current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+            # store previous position of track for object counting
+            prev_position = None
+            if len(self.track_history[track_id]) > 1:
+                prev_position = self.track_history[track_id][-2]
+            self.count_objects(current_centroid, track_id, prev_position, cls)  # Perform object counting
+
+        self.display_counts(im0)  # Display the counts on the frame
+        self.display_output(im0)  # display output with base class function
+
+        return im0  # return output image for more usage

+ 246 - 0
ultralytics/solutions/parking_management.py

@@ -0,0 +1,246 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import json
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
+from ultralytics.utils.checks import check_requirements
+from ultralytics.utils.plotting import Annotator
+
+
+class ParkingPtsSelection:
+    """
+    A class for selecting and managing parking zone points on images using a Tkinter-based UI.
+
+    This class provides functionality to upload an image, select points to define parking zones, and save the
+    selected points to a JSON file. It uses Tkinter for the graphical user interface.
+
+    Attributes:
+        tk (module): The Tkinter module for GUI operations.
+        filedialog (module): Tkinter's filedialog module for file selection operations.
+        messagebox (module): Tkinter's messagebox module for displaying message boxes.
+        master (tk.Tk): The main Tkinter window.
+        canvas (tk.Canvas): The canvas widget for displaying the image and drawing bounding boxes.
+        image (PIL.Image.Image): The uploaded image.
+        canvas_image (ImageTk.PhotoImage): The image displayed on the canvas.
+        rg_data (List[List[Tuple[int, int]]]): List of bounding boxes, each defined by 4 points.
+        current_box (List[Tuple[int, int]]): Temporary storage for the points of the current bounding box.
+        imgw (int): Original width of the uploaded image.
+        imgh (int): Original height of the uploaded image.
+        canvas_max_width (int): Maximum width of the canvas.
+        canvas_max_height (int): Maximum height of the canvas.
+
+    Methods:
+        initialize_properties: Initializes the necessary properties.
+        upload_image: Uploads an image, resizes it to fit the canvas, and displays it.
+        on_canvas_click: Handles mouse clicks to add points for bounding boxes.
+        draw_box: Draws a bounding box on the canvas.
+        remove_last_bounding_box: Removes the last bounding box and redraws the canvas.
+        redraw_canvas: Redraws the canvas with the image and all bounding boxes.
+        save_to_json: Saves the bounding boxes to a JSON file.
+
+    Examples:
+        >>> parking_selector = ParkingPtsSelection()
+        >>> # Use the GUI to upload an image, select parking zones, and save the data
+    """
+
+    def __init__(self):
+        """Initializes the ParkingPtsSelection class, setting up UI and properties for parking zone point selection."""
+        check_requirements("tkinter")
+        import tkinter as tk
+        from tkinter import filedialog, messagebox
+
+        self.tk, self.filedialog, self.messagebox = tk, filedialog, messagebox
+        self.master = self.tk.Tk()  # Reference to the main application window or parent widget
+        self.master.title("Ultralytics Parking Zones Points Selector")
+        self.master.resizable(False, False)
+
+        self.canvas = self.tk.Canvas(self.master, bg="white")  # Canvas widget for displaying images or graphics
+        self.canvas.pack(side=self.tk.BOTTOM)
+
+        self.image = None  # Variable to store the loaded image
+        self.canvas_image = None  # Reference to the image displayed on the canvas
+        self.canvas_max_width = None  # Maximum allowed width for the canvas
+        self.canvas_max_height = None  # Maximum allowed height for the canvas
+        self.rg_data = None  # Data related to region or annotation management
+        self.current_box = None  # Stores the currently selected or active bounding box
+        self.imgh = None  # Height of the current image
+        self.imgw = None  # Width of the current image
+
+        # Button frame with buttons
+        button_frame = self.tk.Frame(self.master)
+        button_frame.pack(side=self.tk.TOP)
+
+        for text, cmd in [
+            ("Upload Image", self.upload_image),
+            ("Remove Last BBox", self.remove_last_bounding_box),
+            ("Save", self.save_to_json),
+        ]:
+            self.tk.Button(button_frame, text=text, command=cmd).pack(side=self.tk.LEFT)
+
+        self.initialize_properties()
+        self.master.mainloop()
+
+    def initialize_properties(self):
+        """Initialize properties for image, canvas, bounding boxes, and dimensions."""
+        self.image = self.canvas_image = None
+        self.rg_data, self.current_box = [], []
+        self.imgw = self.imgh = 0
+        self.canvas_max_width, self.canvas_max_height = 1280, 720
+
+    def upload_image(self):
+        """Uploads and displays an image on the canvas, resizing it to fit within specified dimensions."""
+        from PIL import Image, ImageTk  # scope because ImageTk requires tkinter package
+
+        self.image = Image.open(self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg")]))
+        if not self.image:
+            return
+
+        self.imgw, self.imgh = self.image.size
+        aspect_ratio = self.imgw / self.imgh
+        canvas_width = (
+            min(self.canvas_max_width, self.imgw) if aspect_ratio > 1 else int(self.canvas_max_height * aspect_ratio)
+        )
+        canvas_height = (
+            min(self.canvas_max_height, self.imgh) if aspect_ratio <= 1 else int(canvas_width / aspect_ratio)
+        )
+
+        self.canvas.config(width=canvas_width, height=canvas_height)
+        self.canvas_image = ImageTk.PhotoImage(self.image.resize((canvas_width, canvas_height)))
+        self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
+        self.canvas.bind("<Button-1>", self.on_canvas_click)
+
+        self.rg_data.clear(), self.current_box.clear()
+
+    def on_canvas_click(self, event):
+        """Handles mouse clicks to add points for bounding boxes on the canvas."""
+        self.current_box.append((event.x, event.y))
+        self.canvas.create_oval(event.x - 3, event.y - 3, event.x + 3, event.y + 3, fill="red")
+        if len(self.current_box) == 4:
+            self.rg_data.append(self.current_box.copy())
+            self.draw_box(self.current_box)
+            self.current_box.clear()
+
+    def draw_box(self, box):
+        """Draws a bounding box on the canvas using the provided coordinates."""
+        for i in range(4):
+            self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2)
+
+    def remove_last_bounding_box(self):
+        """Removes the last bounding box from the list and redraws the canvas."""
+        if not self.rg_data:
+            self.messagebox.showwarning("Warning", "No bounding boxes to remove.")
+            return
+        self.rg_data.pop()
+        self.redraw_canvas()
+
+    def redraw_canvas(self):
+        """Redraws the canvas with the image and all bounding boxes."""
+        self.canvas.delete("all")
+        self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
+        for box in self.rg_data:
+            self.draw_box(box)
+
+    def save_to_json(self):
+        """Saves the selected parking zone points to a JSON file with scaled coordinates."""
+        scale_w, scale_h = self.imgw / self.canvas.winfo_width(), self.imgh / self.canvas.winfo_height()
+        data = [{"points": [(int(x * scale_w), int(y * scale_h)) for x, y in box]} for box in self.rg_data]
+
+        from io import StringIO  # Function level import, as it's only required to store coordinates, not every frame
+
+        write_buffer = StringIO()
+        json.dump(data, write_buffer, indent=4)
+        with open("bounding_boxes.json", "w", encoding="utf-8") as f:
+            f.write(write_buffer.getvalue())
+        self.messagebox.showinfo("Success", "Bounding boxes saved to bounding_boxes.json")
+
+
+class ParkingManagement(BaseSolution):
+    """
+    Manages parking occupancy and availability using YOLO model for real-time monitoring and visualization.
+
+    This class extends BaseSolution to provide functionality for parking lot management, including detection of
+    occupied spaces, visualization of parking regions, and display of occupancy statistics.
+
+    Attributes:
+        json_file (str): Path to the JSON file containing parking region details.
+        json (List[Dict]): Loaded JSON data containing parking region information.
+        pr_info (Dict[str, int]): Dictionary storing parking information (Occupancy and Available spaces).
+        arc (Tuple[int, int, int]): RGB color tuple for available region visualization.
+        occ (Tuple[int, int, int]): RGB color tuple for occupied region visualization.
+        dc (Tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects.
+
+    Methods:
+        process_data: Processes model data for parking lot management and visualization.
+
+    Examples:
+        >>> from ultralytics.solutions import ParkingManagement
+        >>> parking_manager = ParkingManagement(model="yolov8n.pt", json_file="parking_regions.json")
+        >>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}")
+        >>> print(f"Available spaces: {parking_manager.pr_info['Available']}")
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the parking management system with a YOLO model and visualization settings."""
+        super().__init__(**kwargs)
+
+        self.json_file = self.CFG["json_file"]  # Load JSON data
+        if self.json_file is None:
+            LOGGER.warning("❌ json_file argument missing. Parking region details required.")
+            raise ValueError("❌ Json file path can not be empty")
+
+        with open(self.json_file) as f:
+            self.json = json.load(f)
+
+        self.pr_info = {"Occupancy": 0, "Available": 0}  # dictionary for parking information
+
+        self.arc = (0, 0, 255)  # available region color
+        self.occ = (0, 255, 0)  # occupied region color
+        self.dc = (255, 0, 189)  # centroid color for each box
+
+    def process_data(self, im0):
+        """
+        Processes the model data for parking lot management.
+
+        This function analyzes the input image, extracts tracks, and determines the occupancy status of parking
+        regions defined in the JSON file. It annotates the image with occupied and available parking spots,
+        and updates the parking information.
+
+        Args:
+            im0 (np.ndarray): The input inference image.
+
+        Examples:
+            >>> parking_manager = ParkingManagement(json_file="parking_regions.json")
+            >>> image = cv2.imread("parking_lot.jpg")
+            >>> parking_manager.process_data(image)
+        """
+        self.extract_tracks(im0)  # extract tracks from im0
+        es, fs = len(self.json), 0  # empty slots, filled slots
+        annotator = Annotator(im0, self.line_width)  # init annotator
+
+        for region in self.json:
+            # Convert points to a NumPy array with the correct dtype and reshape properly
+            pts_array = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2))
+            rg_occupied = False  # occupied region initialization
+            for box, cls in zip(self.boxes, self.clss):
+                xc, yc = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
+                dist = cv2.pointPolygonTest(pts_array, (xc, yc), False)
+                if dist >= 0:
+                    # cv2.circle(im0, (xc, yc), radius=self.line_width * 4, color=self.dc, thickness=-1)
+                    annotator.display_objects_labels(
+                        im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10
+                    )
+                    rg_occupied = True
+                    break
+            fs, es = (fs + 1, es - 1) if rg_occupied else (fs, es)
+            # Plotting regions
+            cv2.polylines(im0, [pts_array], isClosed=True, color=self.occ if rg_occupied else self.arc, thickness=2)
+
+        self.pr_info["Occupancy"], self.pr_info["Available"] = fs, es
+
+        annotator.display_analytics(im0, self.pr_info, (104, 31, 17), (255, 255, 255), 10)
+        self.display_output(im0)  # display output with base class function
+        return im0  # return output image for more usage

+ 112 - 0
ultralytics/solutions/queue_management.py

@@ -0,0 +1,112 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class QueueManager(BaseSolution):
+    """
+    Manages queue counting in real-time video streams based on object tracks.
+
+    This class extends BaseSolution to provide functionality for tracking and counting objects within a specified
+    region in video frames.
+
+    Attributes:
+        counts (int): The current count of objects in the queue.
+        rect_color (Tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
+        region_length (int): The number of points defining the queue region.
+        annotator (Annotator): An instance of the Annotator class for drawing on frames.
+        track_line (List[Tuple[int, int]]): List of track line coordinates.
+        track_history (Dict[int, List[Tuple[int, int]]]): Dictionary storing tracking history for each object.
+
+    Methods:
+        initialize_region: Initializes the queue region.
+        process_queue: Processes a single frame for queue management.
+        extract_tracks: Extracts object tracks from the current frame.
+        store_tracking_history: Stores the tracking history for an object.
+        display_output: Displays the processed output.
+
+    Examples:
+        >>> cap = cv2.VideoCapture("Path/to/video/file.mp4")
+        >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300])
+        >>> while cap.isOpened():
+        >>>     success, im0 = cap.read()
+        >>>     if not success:
+        >>>         break
+        >>>     out = queue.process_queue(im0)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the QueueManager with parameters for tracking and counting objects in a video stream."""
+        super().__init__(**kwargs)
+        self.initialize_region()
+        self.counts = 0  # Queue counts Information
+        self.rect_color = (255, 255, 255)  # Rectangle color
+        self.region_length = len(self.region)  # Store region length for further usage
+
+    def process_queue(self, im0):
+        """
+        Processes the queue management for a single frame of video.
+
+        Args:
+            im0 (numpy.ndarray): Input image for processing, typically a frame from a video stream.
+
+        Returns:
+            (numpy.ndarray): Processed image with annotations, bounding boxes, and queue counts.
+
+        This method performs the following steps:
+        1. Resets the queue count for the current frame.
+        2. Initializes an Annotator object for drawing on the image.
+        3. Extracts tracks from the image.
+        4. Draws the counting region on the image.
+        5. For each detected object:
+           - Draws bounding boxes and labels.
+           - Stores tracking history.
+           - Draws centroids and tracks.
+           - Checks if the object is inside the counting region and updates the count.
+        6. Displays the queue count on the image.
+        7. Displays the processed output.
+
+        Examples:
+            >>> queue_manager = QueueManager()
+            >>> frame = cv2.imread("frame.jpg")
+            >>> processed_frame = queue_manager.process_queue(frame)
+        """
+        self.counts = 0  # Reset counts every frame
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks
+
+        self.annotator.draw_region(
+            reg_pts=self.region, color=self.rect_color, thickness=self.line_width * 2
+        )  # Draw region
+
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            # Draw bounding box and counting region
+            self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True))
+            self.store_tracking_history(track_id, box)  # Store track history
+
+            # Draw tracks of objects
+            self.annotator.draw_centroid_and_tracks(
+                self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width
+            )
+
+            # Cache frequently accessed attributes
+            track_history = self.track_history.get(track_id, [])
+
+            # store previous position of track and check if the object is inside the counting region
+            prev_position = None
+            if len(track_history) > 1:
+                prev_position = track_history[-2]
+            if self.region_length >= 3 and prev_position and self.r_s.contains(self.Point(self.track_line[-1])):
+                self.counts += 1
+
+        # Display queue counts
+        self.annotator.queue_counts_display(
+            f"Queue Counts : {str(self.counts)}",
+            points=self.region,
+            region_color=self.rect_color,
+            txt_color=(104, 31, 17),
+        )
+        self.display_output(im0)  # display output with base class function
+
+        return im0  # return output image for more usage

+ 116 - 0
ultralytics/solutions/region_counter.py

@@ -0,0 +1,116 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class RegionCounter(BaseSolution):
+    """
+    A class designed for real-time counting of objects within user-defined regions in a video stream.
+
+    This class inherits from `BaseSolution` and offers functionalities to define polygonal regions in a video
+    frame, track objects, and count those objects that pass through each defined region. This makes it useful
+    for applications that require counting in specified areas, such as monitoring zones or segmented sections.
+
+    Attributes:
+        region_template (dict): A template for creating new counting regions with default attributes including
+                                the name, polygon coordinates, and display colors.
+        counting_regions (list): A list storing all defined regions, where each entry is based on `region_template`
+                                 and includes specific region settings like name, coordinates, and color.
+
+    Methods:
+        add_region: Adds a new counting region with specified attributes, such as the region's name, polygon points,
+                    region color, and text color.
+        count: Processes video frames to count objects in each region, drawing regions and displaying counts
+               on the frame. Handles object detection, region definition, and containment checks.
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the RegionCounter class for real-time counting in different regions of the video streams."""
+        super().__init__(**kwargs)
+        self.region_template = {
+            "name": "Default Region",
+            "polygon": None,
+            "counts": 0,
+            "dragging": False,
+            "region_color": (255, 255, 255),
+            "text_color": (0, 0, 0),
+        }
+        self.counting_regions = []
+
+    def add_region(self, name, polygon_points, region_color, text_color):
+        """
+        Adds a new region to the counting list based on the provided template with specific attributes.
+
+        Args:
+            name (str): Name assigned to the new region.
+            polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
+            region_color (tuple): BGR color for region visualization.
+            text_color (tuple): BGR color for the text within the region.
+        """
+        region = self.region_template.copy()
+        region.update(
+            {
+                "name": name,
+                "polygon": self.Polygon(polygon_points),
+                "region_color": region_color,
+                "text_color": text_color,
+            }
+        )
+        self.counting_regions.append(region)
+
+    def count(self, im0):
+        """
+        Processes the input frame to detect and count objects within each defined region.
+
+        Args:
+            im0 (numpy.ndarray): Input image frame where objects and regions are annotated.
+
+        Returns:
+           im0 (numpy.ndarray): Processed image frame with annotated counting information.
+        """
+        self.annotator = Annotator(im0, line_width=self.line_width)
+        self.extract_tracks(im0)
+
+        # Region initialization and conversion
+        if self.region is None:
+            self.initialize_region()
+            regions = {"Region#01": self.region}
+        else:
+            regions = self.region if isinstance(self.region, dict) else {"Region#01": self.region}
+
+        # Draw regions and process counts for each defined area
+        for idx, (region_name, reg_pts) in enumerate(regions.items(), start=1):
+            if not isinstance(reg_pts, list) or not all(isinstance(pt, tuple) for pt in reg_pts):
+                LOGGER.warning(f"Invalid region points for {region_name}: {reg_pts}")
+                continue  # Skip invalid entries
+            color = colors(idx, True)
+            self.annotator.draw_region(reg_pts=reg_pts, color=color, thickness=self.line_width * 2)
+            self.add_region(region_name, reg_pts, color, self.annotator.get_txt_color())
+
+        # Prepare regions for containment check
+        for region in self.counting_regions:
+            region["prepared_polygon"] = self.prep(region["polygon"])
+
+        # Process bounding boxes and count objects within each region
+        for box, cls in zip(self.boxes, self.clss):
+            self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+            bbox_center = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+
+            for region in self.counting_regions:
+                if region["prepared_polygon"].contains(self.Point(bbox_center)):
+                    region["counts"] += 1
+
+        # Display counts in each region
+        for region in self.counting_regions:
+            self.annotator.text_label(
+                region["polygon"].bounds,
+                label=str(region["counts"]),
+                color=region["region_color"],
+                txt_color=region["text_color"],
+            )
+            region["counts"] = 0  # Reset count for next frame
+
+        self.display_output(im0)
+        return im0

+ 144 - 0
ultralytics/solutions/security_alarm.py

@@ -0,0 +1,144 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class SecurityAlarm(BaseSolution):
+    """
+    A class to manage security alarm functionalities for real-time monitoring.
+
+    This class extends the BaseSolution class and provides features to monitor
+    objects in a frame, send email notifications when specific thresholds are
+    exceeded for total detections, and annotate the output frame for visualization.
+
+    Attributes:
+       email_sent (bool): Flag to track if an email has already been sent for the current event.
+       records (int): Threshold for the number of detected objects to trigger an alert.
+
+    Methods:
+       authenticate: Sets up email server authentication for sending alerts.
+       send_email: Sends an email notification with details and an image attachment.
+       monitor: Monitors the frame, processes detections, and triggers alerts if thresholds are crossed.
+
+    Examples:
+        >>> security = SecurityAlarm()
+        >>> security.authenticate("abc@gmail.com", "1111222233334444", "xyz@gmail.com")
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = security.monitor(frame)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the SecurityAlarm class with parameters for real-time object monitoring."""
+        super().__init__(**kwargs)
+        self.email_sent = False
+        self.records = self.CFG["records"]
+        self.server = None
+        self.to_email = ""
+        self.from_email = ""
+
+    def authenticate(self, from_email, password, to_email):
+        """
+        Authenticates the email server for sending alert notifications.
+
+        Args:
+            from_email (str): Sender's email address.
+            password (str): Password for the sender's email account.
+            to_email (str): Recipient's email address.
+
+        This method initializes a secure connection with the SMTP server
+        and logs in using the provided credentials.
+
+        Examples:
+            >>> alarm = SecurityAlarm()
+            >>> alarm.authenticate("sender@example.com", "password123", "recipient@example.com")
+        """
+        import smtplib
+
+        self.server = smtplib.SMTP("smtp.gmail.com: 587")
+        self.server.starttls()
+        self.server.login(from_email, password)
+        self.to_email = to_email
+        self.from_email = from_email
+
+    def send_email(self, im0, records=5):
+        """
+        Sends an email notification with an image attachment indicating the number of objects detected.
+
+        Args:
+            im0 (numpy.ndarray): The input image or frame to be attached to the email.
+            records (int): The number of detected objects to be included in the email message.
+
+        This method encodes the input image, composes the email message with
+        details about the detection, and sends it to the specified recipient.
+
+        Examples:
+            >>> alarm = SecurityAlarm()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> alarm.send_email(frame, records=10)
+        """
+        from email.mime.image import MIMEImage
+        from email.mime.multipart import MIMEMultipart
+        from email.mime.text import MIMEText
+
+        import cv2
+
+        img_bytes = cv2.imencode(".jpg", im0)[1].tobytes()  # Encode the image as JPEG
+
+        # Create the email
+        message = MIMEMultipart()
+        message["From"] = self.from_email
+        message["To"] = self.to_email
+        message["Subject"] = "Security Alert"
+
+        # Add the text message body
+        message_body = f"Ultralytics ALERT!!! {records} objects have been detected!!"
+        message.attach(MIMEText(message_body))
+
+        # Attach the image
+        image_attachment = MIMEImage(img_bytes, name="ultralytics.jpg")
+        message.attach(image_attachment)
+
+        # Send the email
+        try:
+            self.server.send_message(message)
+            LOGGER.info("✅ Email sent successfully!")
+        except Exception as e:
+            print(f"❌ Failed to send email: {e}")
+
+    def monitor(self, im0):
+        """
+        Monitors the frame, processes object detections, and triggers alerts if thresholds are exceeded.
+
+        Args:
+            im0 (numpy.ndarray): The input image or frame to be processed and annotated.
+
+        This method processes the input frame, extracts detections, annotates the frame
+        with bounding boxes, and sends an email notification if the number of detected objects
+        surpasses the specified threshold and an alert has not already been sent.
+
+        Returns:
+            (numpy.ndarray): The processed frame with annotations.
+
+        Examples:
+            >>> alarm = SecurityAlarm()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> processed_frame = alarm.monitor(frame)
+        """
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks
+
+        # Iterate over bounding boxes, track ids and classes index
+        for box, cls in zip(self.boxes, self.clss):
+            # Draw bounding box
+            self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+
+        total_det = len(self.clss)
+        if total_det > self.records and not self.email_sent:  # Only send email If not sent before
+            self.send_email(im0, total_det)
+            self.email_sent = True
+
+        self.display_output(im0)  # display output with base class function
+
+        return im0  # return output image for more usage

+ 178 - 0
ultralytics/solutions/solutions.py

@@ -0,0 +1,178 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from collections import defaultdict
+
+import cv2
+
+from ultralytics import YOLO
+from ultralytics.utils import ASSETS_URL, DEFAULT_CFG_DICT, DEFAULT_SOL_DICT, LOGGER
+from ultralytics.utils.checks import check_imshow, check_requirements
+
+
+class BaseSolution:
+    """
+    A base class for managing Ultralytics Solutions.
+
+    This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking,
+    and region initialization.
+
+    Attributes:
+        LineString (shapely.geometry.LineString): Class for creating line string geometries.
+        Polygon (shapely.geometry.Polygon): Class for creating polygon geometries.
+        Point (shapely.geometry.Point): Class for creating point geometries.
+        CFG (Dict): Configuration dictionary loaded from a YAML file and updated with kwargs.
+        region (List[Tuple[int, int]]): List of coordinate tuples defining a region of interest.
+        line_width (int): Width of lines used in visualizations.
+        model (ultralytics.YOLO): Loaded YOLO model instance.
+        names (Dict[int, str]): Dictionary mapping class indices to class names.
+        env_check (bool): Flag indicating whether the environment supports image display.
+        track_history (collections.defaultdict): Dictionary to store tracking history for each object.
+
+    Methods:
+        extract_tracks: Apply object tracking and extract tracks from an input image.
+        store_tracking_history: Store object tracking history for a given track ID and bounding box.
+        initialize_region: Initialize the counting region and line segment based on configuration.
+        display_output: Display the results of processing, including showing frames or saving results.
+
+    Examples:
+        >>> solution = BaseSolution(model="yolov8n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
+        >>> solution.initialize_region()
+        >>> image = cv2.imread("image.jpg")
+        >>> solution.extract_tracks(image)
+        >>> solution.display_output(image)
+    """
+
+    def __init__(self, IS_CLI=False, **kwargs):
+        """
+        Initializes the `BaseSolution` class with configuration settings and the YOLO model for Ultralytics solutions.
+
+        IS_CLI (optional): Enables CLI mode if set.
+        """
+        check_requirements("shapely>=2.0.0")
+        from shapely.geometry import LineString, Point, Polygon
+        from shapely.prepared import prep
+
+        self.LineString = LineString
+        self.Polygon = Polygon
+        self.Point = Point
+        self.prep = prep
+        self.annotator = None  # Initialize annotator
+        self.tracks = None
+        self.track_data = None
+        self.boxes = []
+        self.clss = []
+        self.track_ids = []
+        self.track_line = None
+        self.r_s = None
+
+        # Load config and update with args
+        DEFAULT_SOL_DICT.update(kwargs)
+        DEFAULT_CFG_DICT.update(kwargs)
+        self.CFG = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT}
+        LOGGER.info(f"Ultralytics Solutions: ✅ {DEFAULT_SOL_DICT}")
+
+        self.region = self.CFG["region"]  # Store region data for other classes usage
+        self.line_width = (
+            self.CFG["line_width"] if self.CFG["line_width"] is not None else 2
+        )  # Store line_width for usage
+
+        # Load Model and store classes names
+        if self.CFG["model"] is None:
+            self.CFG["model"] = "yolo11n.pt"
+        self.model = YOLO(self.CFG["model"])
+        self.names = self.model.names
+
+        self.track_add_args = {  # Tracker additional arguments for advance configuration
+            k: self.CFG[k] for k in ["verbose", "iou", "conf", "device", "max_det", "half", "tracker"]
+        }
+
+        if IS_CLI and self.CFG["source"] is None:
+            d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
+            LOGGER.warning(f"⚠️ WARNING: source not provided. using default source {ASSETS_URL}/{d_s}")
+            from ultralytics.utils.downloads import safe_download
+
+            safe_download(f"{ASSETS_URL}/{d_s}")  # download source from ultralytics assets
+            self.CFG["source"] = d_s  # set default source
+
+        # Initialize environment and region setup
+        self.env_check = check_imshow(warn=True)
+        self.track_history = defaultdict(list)
+
+    def extract_tracks(self, im0):
+        """
+        Applies object tracking and extracts tracks from an input image or frame.
+
+        Args:
+            im0 (ndarray): The input image or frame.
+
+        Examples:
+            >>> solution = BaseSolution()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> solution.extract_tracks(frame)
+        """
+        self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)
+
+        # Extract tracks for OBB or object detection
+        self.track_data = self.tracks[0].obb or self.tracks[0].boxes
+
+        if self.track_data and self.track_data.id is not None:
+            self.boxes = self.track_data.xyxy.cpu()
+            self.clss = self.track_data.cls.cpu().tolist()
+            self.track_ids = self.track_data.id.int().cpu().tolist()
+        else:
+            LOGGER.warning("WARNING ⚠️ no tracks found!")
+            self.boxes, self.clss, self.track_ids = [], [], []
+
+    def store_tracking_history(self, track_id, box):
+        """
+        Stores the tracking history of an object.
+
+        This method updates the tracking history for a given object by appending the center point of its
+        bounding box to the track line. It maintains a maximum of 30 points in the tracking history.
+
+        Args:
+            track_id (int): The unique identifier for the tracked object.
+            box (List[float]): The bounding box coordinates of the object in the format [x1, y1, x2, y2].
+
+        Examples:
+            >>> solution = BaseSolution()
+            >>> solution.store_tracking_history(1, [100, 200, 300, 400])
+        """
+        # Store tracking history
+        self.track_line = self.track_history[track_id]
+        self.track_line.append(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
+        if len(self.track_line) > 30:
+            self.track_line.pop(0)
+
+    def initialize_region(self):
+        """Initialize the counting region and line segment based on configuration settings."""
+        if self.region is None:
+            self.region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
+        self.r_s = (
+            self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
+        )  # region or line
+
+    def display_output(self, im0):
+        """
+        Display the results of the processing, which could involve showing frames, printing counts, or saving results.
+
+        This method is responsible for visualizing the output of the object detection and tracking process. It displays
+        the processed frame with annotations, and allows for user interaction to close the display.
+
+        Args:
+            im0 (numpy.ndarray): The input image or frame that has been processed and annotated.
+
+        Examples:
+            >>> solution = BaseSolution()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> solution.display_output(frame)
+
+        Notes:
+            - This method will only display output if the 'show' configuration is set to True and the environment
+              supports image display.
+            - The display can be closed by pressing the 'q' key.
+        """
+        if self.CFG.get("show") and self.env_check:
+            cv2.imshow("Ultralytics Solutions", im0)
+            if cv2.waitKey(1) & 0xFF == ord("q"):
+                return

+ 110 - 0
ultralytics/solutions/speed_estimation.py

@@ -0,0 +1,110 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from time import time
+
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class SpeedEstimator(BaseSolution):
+    """
+    A class to estimate the speed of objects in a real-time video stream based on their tracks.
+
+    This class extends the BaseSolution class and provides functionality for estimating object speeds using
+    tracking data in video streams.
+
+    Attributes:
+        spd (Dict[int, float]): Dictionary storing speed data for tracked objects.
+        trkd_ids (List[int]): List of tracked object IDs that have already been speed-estimated.
+        trk_pt (Dict[int, float]): Dictionary storing previous timestamps for tracked objects.
+        trk_pp (Dict[int, Tuple[float, float]]): Dictionary storing previous positions for tracked objects.
+        annotator (Annotator): Annotator object for drawing on images.
+        region (List[Tuple[int, int]]): List of points defining the speed estimation region.
+        track_line (List[Tuple[float, float]]): List of points representing the object's track.
+        r_s (LineString): LineString object representing the speed estimation region.
+
+    Methods:
+        initialize_region: Initializes the speed estimation region.
+        estimate_speed: Estimates the speed of objects based on tracking data.
+        store_tracking_history: Stores the tracking history for an object.
+        extract_tracks: Extracts tracks from the current frame.
+        display_output: Displays the output with annotations.
+
+    Examples:
+        >>> estimator = SpeedEstimator()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = estimator.estimate_speed(frame)
+        >>> cv2.imshow("Speed Estimation", processed_frame)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the SpeedEstimator object with speed estimation parameters and data structures."""
+        super().__init__(**kwargs)
+
+        self.initialize_region()  # Initialize speed region
+
+        self.spd = {}  # set for speed data
+        self.trkd_ids = []  # list for already speed_estimated and tracked ID's
+        self.trk_pt = {}  # set for tracks previous time
+        self.trk_pp = {}  # set for tracks previous point
+
+    def estimate_speed(self, im0):
+        """
+        Estimates the speed of objects based on tracking data.
+
+        Args:
+            im0 (np.ndarray): Input image for processing. Shape is typically (H, W, C) for RGB images.
+
+        Returns:
+            (np.ndarray): Processed image with speed estimations and annotations.
+
+        Examples:
+            >>> estimator = SpeedEstimator()
+            >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
+            >>> processed_image = estimator.estimate_speed(image)
+        """
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks
+
+        self.annotator.draw_region(
+            reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2
+        )  # Draw region
+
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            self.store_tracking_history(track_id, box)  # Store track history
+
+            # Check if track_id is already in self.trk_pp or trk_pt initialize if not
+            if track_id not in self.trk_pt:
+                self.trk_pt[track_id] = 0
+            if track_id not in self.trk_pp:
+                self.trk_pp[track_id] = self.track_line[-1]
+
+            speed_label = f"{int(self.spd[track_id])} km/h" if track_id in self.spd else self.names[int(cls)]
+            self.annotator.box_label(box, label=speed_label, color=colors(track_id, True))  # Draw bounding box
+
+            # Draw tracks of objects
+            self.annotator.draw_centroid_and_tracks(
+                self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width
+            )
+
+            # Calculate object speed and direction based on region intersection
+            if self.LineString([self.trk_pp[track_id], self.track_line[-1]]).intersects(self.r_s):
+                direction = "known"
+            else:
+                direction = "unknown"
+
+            # Perform speed calculation and tracking updates if direction is valid
+            if direction == "known" and track_id not in self.trkd_ids:
+                self.trkd_ids.append(track_id)
+                time_difference = time() - self.trk_pt[track_id]
+                if time_difference > 0:
+                    self.spd[track_id] = np.abs(self.track_line[-1][1] - self.trk_pp[track_id][1]) / time_difference
+
+            self.trk_pt[track_id] = time()
+            self.trk_pp[track_id] = self.track_line[-1]
+
+        self.display_output(im0)  # display output with base class function
+
+        return im0  # return output image for more usage

+ 190 - 0
ultralytics/solutions/streamlit_inference.py

@@ -0,0 +1,190 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import io
+from typing import Any
+
+import cv2
+
+from ultralytics import YOLO
+from ultralytics.utils import LOGGER
+from ultralytics.utils.checks import check_requirements
+from ultralytics.utils.downloads import GITHUB_ASSETS_STEMS
+
+
+class Inference:
+    """
+    A class to perform object detection, image classification, image segmentation and pose estimation inference using
+    Streamlit and Ultralytics YOLO models. It provides the functionalities such as loading models, configuring settings,
+    uploading video files, and performing real-time inference.
+
+    Attributes:
+        st (module): Streamlit module for UI creation.
+        temp_dict (dict): Temporary dictionary to store the model path.
+        model_path (str): Path to the loaded model.
+        model (YOLO): The YOLO model instance.
+        source (str): Selected video source.
+        enable_trk (str): Enable tracking option.
+        conf (float): Confidence threshold.
+        iou (float): IoU threshold for non-max suppression.
+        vid_file_name (str): Name of the uploaded video file.
+        selected_ind (list): List of selected class indices.
+
+    Methods:
+        web_ui: Sets up the Streamlit web interface with custom HTML elements.
+        sidebar: Configures the Streamlit sidebar for model and inference settings.
+        source_upload: Handles video file uploads through the Streamlit interface.
+        configure: Configures the model and loads selected classes for inference.
+        inference: Performs real-time object detection inference.
+
+    Examples:
+        >>> inf = solutions.Inference(model="path/to/model.pt")  # Model is not necessary argument.
+        >>> inf.inference()
+    """
+
+    def __init__(self, **kwargs: Any):
+        """
+        Initializes the Inference class, checking Streamlit requirements and setting up the model path.
+
+        Args:
+            **kwargs (Any): Additional keyword arguments for model configuration.
+        """
+        check_requirements("streamlit>=1.29.0")  # scope imports for faster ultralytics package load speeds
+        import streamlit as st
+
+        self.st = st  # Reference to the Streamlit class instance
+        self.source = None  # Placeholder for video or webcam source details
+        self.enable_trk = False  # Flag to toggle object tracking
+        self.conf = 0.25  # Confidence threshold for detection
+        self.iou = 0.45  # Intersection-over-Union (IoU) threshold for non-maximum suppression
+        self.org_frame = None  # Container for the original frame to be displayed
+        self.ann_frame = None  # Container for the annotated frame to be displayed
+        self.vid_file_name = None  # Holds the name of the video file
+        self.selected_ind = []  # List of selected classes for detection or tracking
+        self.model = None  # Container for the loaded model instance
+
+        self.temp_dict = {"model": None, **kwargs}
+        self.model_path = None  # Store model file name with path
+        if self.temp_dict["model"] is not None:
+            self.model_path = self.temp_dict["model"]
+
+        LOGGER.info(f"Ultralytics Solutions: ✅ {self.temp_dict}")
+
+    def web_ui(self):
+        """Sets up the Streamlit web interface with custom HTML elements."""
+        menu_style_cfg = """<style>MainMenu {visibility: hidden;}</style>"""  # Hide main menu style
+
+        # Main title of streamlit application
+        main_title_cfg = """<div><h1 style="color:#FF64DA; text-align:center; font-size:40px; margin-top:-50px;
+        font-family: 'Archivo', sans-serif; margin-bottom:20px;">Ultralytics YOLO Streamlit Application</h1></div>"""
+
+        # Subtitle of streamlit application
+        sub_title_cfg = """<div><h4 style="color:#042AFF; text-align:center; font-family: 'Archivo', sans-serif; 
+        margin-top:-15px; margin-bottom:50px;">Experience real-time object detection on your webcam with the power 
+        of Ultralytics YOLO! 🚀</h4></div>"""
+
+        # Set html page configuration and append custom HTML
+        self.st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide")
+        self.st.markdown(menu_style_cfg, unsafe_allow_html=True)
+        self.st.markdown(main_title_cfg, unsafe_allow_html=True)
+        self.st.markdown(sub_title_cfg, unsafe_allow_html=True)
+
+    def sidebar(self):
+        """Configures the Streamlit sidebar for model and inference settings."""
+        with self.st.sidebar:  # Add Ultralytics LOGO
+            logo = "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg"
+            self.st.image(logo, width=250)
+
+        self.st.sidebar.title("User Configuration")  # Add elements to vertical setting menu
+        self.source = self.st.sidebar.selectbox(
+            "Video",
+            ("webcam", "video"),
+        )  # Add source selection dropdown
+        self.enable_trk = self.st.sidebar.radio("Enable Tracking", ("Yes", "No"))  # Enable object tracking
+        self.conf = float(
+            self.st.sidebar.slider("Confidence Threshold", 0.0, 1.0, self.conf, 0.01)
+        )  # Slider for confidence
+        self.iou = float(self.st.sidebar.slider("IoU Threshold", 0.0, 1.0, self.iou, 0.01))  # Slider for NMS threshold
+
+        col1, col2 = self.st.columns(2)
+        self.org_frame = col1.empty()
+        self.ann_frame = col2.empty()
+
+    def source_upload(self):
+        """Handles video file uploads through the Streamlit interface."""
+        self.vid_file_name = ""
+        if self.source == "video":
+            vid_file = self.st.sidebar.file_uploader("Upload Video File", type=["mp4", "mov", "avi", "mkv"])
+            if vid_file is not None:
+                g = io.BytesIO(vid_file.read())  # BytesIO Object
+                with open("ultralytics.mp4", "wb") as out:  # Open temporary file as bytes
+                    out.write(g.read())  # Read bytes into file
+                self.vid_file_name = "ultralytics.mp4"
+        elif self.source == "webcam":
+            self.vid_file_name = 0
+
+    def configure(self):
+        """Configures the model and loads selected classes for inference."""
+        # Add dropdown menu for model selection
+        available_models = [x.replace("yolo", "YOLO") for x in GITHUB_ASSETS_STEMS if x.startswith("yolo11")]
+        if self.model_path:  # If user provided the custom model, insert model without suffix as *.pt is added later
+            available_models.insert(0, self.model_path.split(".pt")[0])
+        selected_model = self.st.sidebar.selectbox("Model", available_models)
+
+        with self.st.spinner("Model is downloading..."):
+            self.model = YOLO(f"{selected_model.lower()}.pt")  # Load the YOLO model
+            class_names = list(self.model.names.values())  # Convert dictionary to list of class names
+        self.st.success("Model loaded successfully!")
+
+        # Multiselect box with class names and get indices of selected classes
+        selected_classes = self.st.sidebar.multiselect("Classes", class_names, default=class_names[:3])
+        self.selected_ind = [class_names.index(option) for option in selected_classes]
+
+        if not isinstance(self.selected_ind, list):  # Ensure selected_options is a list
+            self.selected_ind = list(self.selected_ind)
+
+    def inference(self):
+        """Performs real-time object detection inference."""
+        self.web_ui()  # Initialize the web interface
+        self.sidebar()  # Create the sidebar
+        self.source_upload()  # Upload the video source
+        self.configure()  # Configure the app
+
+        if self.st.sidebar.button("Start"):
+            stop_button = self.st.button("Stop")  # Button to stop the inference
+            cap = cv2.VideoCapture(self.vid_file_name)  # Capture the video
+            if not cap.isOpened():
+                self.st.error("Could not open webcam.")
+            while cap.isOpened():
+                success, frame = cap.read()
+                if not success:
+                    self.st.warning("Failed to read frame from webcam. Please verify the webcam is connected properly.")
+                    break
+
+                # Store model predictions
+                if self.enable_trk == "Yes":
+                    results = self.model.track(
+                        frame, conf=self.conf, iou=self.iou, classes=self.selected_ind, persist=True
+                    )
+                else:
+                    results = self.model(frame, conf=self.conf, iou=self.iou, classes=self.selected_ind)
+                annotated_frame = results[0].plot()  # Add annotations on frame
+
+                if stop_button:
+                    cap.release()  # Release the capture
+                    self.st.stop()  # Stop streamlit app
+
+                self.org_frame.image(frame, channels="BGR")  # Display original frame
+                self.ann_frame.image(annotated_frame, channels="BGR")  # Display processed frame
+
+            cap.release()  # Release the capture
+        cv2.destroyAllWindows()  # Destroy window
+
+
+if __name__ == "__main__":
+    import sys  # Import the sys module for accessing command-line arguments
+
+    # Check if a model name is provided as a command-line argument
+    args = len(sys.argv)
+    model = sys.argv[1] if args > 1 else None  # assign first argument as the model name
+    # Create an instance of the Inference class and run inference
+    Inference(model=model).inference()

+ 68 - 0
ultralytics/solutions/trackzone.py

@@ -0,0 +1,68 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class TrackZone(BaseSolution):
+    """
+    A class to manage region-based object tracking in a video stream.
+
+    This class extends the BaseSolution class and provides functionality for tracking objects within a specific region
+    defined by a polygonal area. Objects outside the region are excluded from tracking. It supports dynamic initialization
+    of the region, allowing either a default region or a user-specified polygon.
+
+    Attributes:
+        region (ndarray): The polygonal region for tracking, represented as a convex hull.
+
+    Methods:
+        trackzone: Processes each frame of the video, applying region-based tracking.
+
+    Examples:
+        >>> tracker = TrackZone()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = tracker.trackzone(frame)
+        >>> cv2.imshow("Tracked Frame", processed_frame)
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the TrackZone class for tracking objects within a defined region in video streams."""
+        super().__init__(**kwargs)
+        default_region = [(150, 150), (1130, 150), (1130, 570), (150, 570)]
+        self.region = cv2.convexHull(np.array(self.region or default_region, dtype=np.int32))
+
+    def trackzone(self, im0):
+        """
+        Processes the input frame to track objects within a defined region.
+
+        This method initializes the annotator, creates a mask for the specified region, extracts tracks
+        only from the masked area, and updates tracking information. Objects outside the region are ignored.
+
+        Args:
+            im0 (numpy.ndarray): The input image or frame to be processed.
+
+        Returns:
+            (numpy.ndarray): The processed image with tracking id and bounding boxes annotations.
+
+        Examples:
+            >>> tracker = TrackZone()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> tracker.trackzone(frame)
+        """
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        # Create a mask for the region and extract tracks from the masked image
+        masked_frame = cv2.bitwise_and(im0, im0, mask=cv2.fillPoly(np.zeros_like(im0[:, :, 0]), [self.region], 255))
+        self.extract_tracks(masked_frame)
+
+        cv2.polylines(im0, [self.region], isClosed=True, color=(255, 255, 255), thickness=self.line_width * 2)
+
+        # Iterate over boxes, track ids, classes indexes list and draw bounding boxes
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            self.annotator.box_label(box, label=f"{self.names[cls]}:{track_id}", color=colors(track_id, True))
+
+        self.display_output(im0)  # display output with base class function
+
+        return im0  # return output image for more usage