New Solutions sweep counting annotator (#17742)

Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com>
2025-09-15 15:48:41 +08:00 · 2024-11-26 22:26:23 +05:00 · 2024-11-26 22:26:23 +05:00 · 5b124dc7ca
commit 5b124dc7ca
parent d8a339d370
2 changed files with 126 additions and 8 deletions
--- a/docs/en/usage/simple-utilities.md
+++ b/docs/en/usage/simple-utilities.md
@ -374,6 +374,91 @@ See docstring for each function or visit the `ultralytics.utils.ops` [reference

 Ultralytics includes an Annotator class that can be used to annotate any kind of data. It's easiest to use with [object detection bounding boxes](../modes/predict.md#boxes), [pose key points](../modes/predict.md#keypoints), and [oriented bounding boxes](../modes/predict.md#obb).

+#### Ultralytics Sweep Annotation
+
+!!! example "Python Examples using YOLO11 🚀"
+
+    === "Python"
+
+    ```python
+    import cv2
+
+    from ultralytics import YOLO
+    from ultralytics.utils.plotting import Annotator, colors
+
+    # User defined video path and model file
+    cap = cv2.VideoCapture("Path/to/video/file.mp4")
+    model = YOLO(model="yolo11s-seg.pt")  # Model file i.e. yolo11s.pt or yolo11m-seg.pt
+
+    if not cap.isOpened():
+        print("Error: Could not open video.")
+        exit()
+
+    # Initialize the video writer object.
+    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+    video_writer = cv2.VideoWriter("ultralytics.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+
+    masks = None  # Initialize variable to store masks data
+    f = 0  # Initialize frame count variable for enabling mouse event.
+    line_x = w  # Store width of line.
+    dragging = False  # Initialize bool variable for line dragging.
+    classes = model.names  # Store model classes names for plotting.
+    window_name = "Ultralytics Sweep Annotator"
+
+
+    def drag_line(event, x, y, flags, param):  # Mouse callback for dragging line.
+        global line_x, dragging
+        if event == cv2.EVENT_LBUTTONDOWN or (flags & cv2.EVENT_FLAG_LBUTTON):
+            line_x = max(0, min(x, w))
+            dragging = True
+
+
+    while cap.isOpened():  # Loop over the video capture object.
+        ret, im0 = cap.read()
+        if not ret:
+            break
+        f = f + 1  # Increment frame count.
+        count = 0  # Re-initialize count variable on every frame for precise counts.
+        annotator = Annotator(im0)
+        results = model.track(im0, persist=True)  # Track objects using track method.
+        if f == 1:
+            cv2.namedWindow(window_name)
+            cv2.setMouseCallback(window_name, drag_line)
+
+        if results[0].boxes.id is not None:
+            if results[0].masks is not None:
+                masks = results[0].masks.xy
+            track_ids = results[0].boxes.id.int().cpu().tolist()
+            clss = results[0].boxes.cls.cpu().tolist()
+            boxes = results[0].boxes.xyxy.cpu()
+
+            for mask, box, cls, t_id in zip(masks or [None] * len(boxes), boxes, clss, track_ids):
+                color = colors(t_id, True)  # Assign different color to each tracked object.
+                if mask is not None and mask.size > 0:
+                    # If you want to overlay the masks
+                    # mask[:, 0] = np.clip(mask[:, 0], line_x, w)
+                    # mask_img = cv2.fillPoly(im0.copy(), [mask.astype(int)], color)
+                    # cv2.addWeighted(mask_img, 0.5, im0, 0.5, 0, im0)
+
+                    if box[0] > line_x:
+                        count += 1
+                        annotator.seg_bbox(mask=mask, mask_color=color, label=str(classes[cls]))
+                else:
+                    if box[0] > line_x:
+                        count += 1
+                        annotator.box_label(box=box, color=color, label=str(classes[cls]))
+
+        annotator.sweep_annotator(line_x=line_x, line_y=h, label=f"COUNT:{count}")  # Display the sweep
+        cv2.imshow(window_name, im0)
+        video_writer.write(im0)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+
+    cap.release()  # Release the video capture.
+    video_writer.release()  # Release the video writer.
+    cv2.destroyAllWindows()  # Destroy all opened windows.
+    ```
+
 #### Horizontal Bounding Boxes

 ```{ .py .annotate }
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@ -791,19 +791,52 @@ class Annotator:
        cv2.polylines(self.im, [np.int32([mask])], isClosed=True, color=mask_color, thickness=2)
        text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf)

-        cv2.rectangle(
-            self.im,
-            (int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10),
-            (int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)),
-            mask_color,
-            -1,
-        )
-
        if label:
+            cv2.rectangle(
+                self.im,
+                (int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10),
+                (int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)),
+                mask_color,
+                -1,
+            )
            cv2.putText(
                self.im, label, (int(mask[0][0]) - text_size[0] // 2, int(mask[0][1])), 0, self.sf, txt_color, self.tf
            )

+    def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
+        """
+        Function for drawing a sweep annotation line and an optional label.
+
+        Args:
+            line_x (int): The x-coordinate of the sweep line.
+            line_y (int): The y-coordinate limit of the sweep line.
+            label (str, optional): Text label to be drawn in center of sweep line. If None, no label is drawn.
+            color (tuple): RGB color for the line and label background.
+            txt_color (tuple): RGB color for the label text.
+        """
+        # Draw the sweep line
+        cv2.line(self.im, (line_x, 0), (line_x, line_y), color, self.tf * 2)
+
+        # Draw label, if provided
+        if label:
+            (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf)
+            cv2.rectangle(
+                self.im,
+                (line_x - text_width // 2 - 10, line_y // 2 - text_height // 2 - 10),
+                (line_x + text_width // 2 + 10, line_y // 2 + text_height // 2 + 10),
+                color,
+                -1,
+            )
+            cv2.putText(
+                self.im,
+                label,
+                (line_x - text_width // 2, line_y // 2 + text_height // 2),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                self.sf,
+                txt_color,
+                self.tf,
+            )
+
    def plot_distance_and_line(
        self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255)
    ):