From 12446a6c4400d4f38e6f8e669027f083f1d5cc0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Dalton?=
 <44240057+shdalton@users.noreply.github.com>
Date: Wed, 18 Mar 2026 23:42:37 -0400
Subject: [PATCH] Populate classId and confidence level for object detection in
 Java simulation (#2372)

## Description

compute confidence level based on target area in total image size and
populate classId and confidence level in Java (while building the
PhotonTrackedTarget)

## Changes
- Add new VisionTargetSim constructor for object detection
- If class ID specified but confidence = -1, estimate based on total
area

---------

Co-authored-by: Matt Morley <matthew.morley.ca@gmail.com>
---
 .../source/docs/simulation/simulation-java.md |  21 ++++
 .../photonlibpy/simulation/photonCameraSim.py |  11 ++
 .../photonlibpy/simulation/visionTargetSim.py |  72 ++++++++++-
 .../simulation/PhotonCameraSim.java           |  14 ++-
 .../simulation/VisionTargetSim.java           |  43 ++++++-
 .../cpp/photon/simulation/PhotonCameraSim.cpp |  21 +++-
 .../photon/simulation/VisionSystemSim.h       |  13 +-
 .../photon/simulation/VisionTargetSim.h       | 112 +++++++++++++++++-
 .../org/photonvision/VisionSystemSimTest.java |  44 ++++++-
 9 files changed, 321 insertions(+), 30 deletions(-)

diff --git a/docs/source/docs/simulation/simulation-java.md b/docs/source/docs/simulation/simulation-java.md
index 326aebba2..96a191e69 100644
--- a/docs/source/docs/simulation/simulation-java.md
+++ b/docs/source/docs/simulation/simulation-java.md
@@ -95,6 +95,27 @@ These `TargetModel` are paired with a target pose to create a `VisionTargetSim`.
 The pose of a `VisionTargetSim` object can be updated to simulate moving targets. Note, however, that this will break latency simulation for that target.
 :::
 
+To use simulated object detection, you must provide an objDetClassId (zero-indexed class ID) and confidence value. When you set objDetConf to -1, the simulation computes confidence based on the area of the target in the camera's field of view. To simulate a object detection model with one class (fuel, index 0) and specify confidence, you'd write:
+
+```{eval-rst}
+.. tab-set-code::
+
+   .. code-block:: java
+
+      // arbitrary position on field
+      final var targetPose = new Pose3d(new Translation3d(2, 0, 0), new Rotation3d());
+      // Class id, zero-indexed
+      final int classId = 0;
+      // Confidence, between 0 and 1.
+      final float conf = 0.67f;
+      // 6 inch diameter ball
+      final TargetModel ballModel = new TargetModel(Units.inchesToMeters(6));
+      final var ballTargetSim = new VisionTargetSim(targetPose, ballModel, classId, conf);
+
+      // Add this vision target to the vision system simulation to make it visible
+      visionSim.addVisionTargets(visionTarget);
+```
+
 For convenience, an `AprilTagFieldLayout` can also be added to automatically create a target for each of its AprilTags.
 
 ```{eval-rst}
diff --git a/photon-lib/py/photonlibpy/simulation/photonCameraSim.py b/photon-lib/py/photonlibpy/simulation/photonCameraSim.py
index a1ee052a1..b804d3e0f 100644
--- a/photon-lib/py/photonlibpy/simulation/photonCameraSim.py
+++ b/photon-lib/py/photonlibpy/simulation/photonCameraSim.py
@@ -368,6 +368,15 @@ class PhotonCameraSim:
                     noisyTargetCorners,
                 )
 
+            # Compute object detection confidence if this is an obj det target
+            classId = tgt.objDetClassId
+            conf = tgt.objDetConf
+            if classId >= 0 and conf < 0:
+                # Simulate confidence using sqrt-scaled area for a more realistic
+                # curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
+                # gives reasonable values even for small-but-visible objects.
+                conf = max(0.0, min(1.0, math.sqrt(areaPercent / 100.0) * 2.0))
+
             smallVec: list[TargetCorner] = []
             for corner in minAreaRectPts:
                 smallVec.append(TargetCorner(corner[0], corner[1]))
@@ -381,6 +390,8 @@ class PhotonCameraSim:
                     area=areaPercent,
                     skew=math.degrees(centerRot.X()),
                     fiducialId=tgt.fiducialId,
+                    objDetectId=classId,
+                    objDetectConf=conf,
                     detectedCorners=cornersFloat,
                     minAreaRectCorners=smallVec,
                     bestCameraToTarget=pnpSim.best if pnpSim else Transform3d(),
diff --git a/photon-lib/py/photonlibpy/simulation/visionTargetSim.py b/photon-lib/py/photonlibpy/simulation/visionTargetSim.py
index f2bd0b543..5a5caff5a 100644
--- a/photon-lib/py/photonlibpy/simulation/visionTargetSim.py
+++ b/photon-lib/py/photonlibpy/simulation/visionTargetSim.py
@@ -1,4 +1,5 @@
 import math
+from typing import overload
 
 from wpimath.geometry import Pose3d, Translation3d
 
@@ -8,19 +9,78 @@ from ..estimation.targetModel import TargetModel
 class VisionTargetSim:
     """Describes a vision target located somewhere on the field that your vision system can detect."""
 
-    def __init__(self, pose: Pose3d, model: TargetModel, id: int = -1):
-        """Describes a fiducial tag located somewhere on the field that your vision system can detect.
+    @overload
+    def __init__(self, pose: Pose3d, model: TargetModel) -> None:
+        """
+        Describes a retro-reflective/colored shape vision target located somewhere on the field that
+        your vision system can detect.
+
+        :param pose:  Pose3d of the target in field-relative coordinates
+        :param model: TargetModel which describes the shape of the target
+        """
+        ...
+
+    @overload
+    def __init__(self, pose: Pose3d, model: TargetModel, id: int) -> None:
+        """
+        Describes a fiducial tag located somewhere on the field that your vision system can detect.
 
         :param pose:  Pose3d of the tag in field-relative coordinates
-        :param model: TargetModel which describes the shape of the target(tag)
+        :param model: TargetModel which describes the geometry of the target (tag)
         :param id:    The ID of this fiducial tag
         """
+        ...
+
+    @overload
+    def __init__(
+        self, pose: Pose3d, model: TargetModel, objDetClassId: int, objDetConf: float
+    ) -> None:
+        """
+        Describes an object-detection vision target located somewhere on the field that your vision
+        system can detect.
+
+        :param pose:          Pose3d of the target in field-relative coordinates
+        :param model:         TargetModel which describes the shape of the target
+        :param objDetClassId: The object detection class ID, or -1 to exclude from object detection
+        :param objDetConf:    The object detection confidence, or -1.0 to compute from target area
+                              in the camera's field of view
+        """
+        ...
+
+    def __init__(
+        self,
+        pose: Pose3d,
+        model: TargetModel,
+        *args,
+        **kwargs,
+    ):
+        if kwargs:
+            raise TypeError(
+                f"VisionTargetSim does not accept keyword arguments: {list(kwargs.keys())}"
+            )
 
         self.pose: Pose3d = pose
         self.model: TargetModel = model
-        self.fiducialId: int = id
-        self.objDetClassId: int = -1
-        self.objDetConf: float = -1.0
+
+        if len(args) == 0:
+            # VisionTargetSim(pose, model)
+            self.fiducialId: int = -1
+            self.objDetClassId: int = -1
+            self.objDetConf: float = -1.0
+        elif len(args) == 1:
+            # VisionTargetSim(pose, model, id)
+            self.fiducialId = args[0]
+            self.objDetClassId = -1
+            self.objDetConf = -1.0
+        elif len(args) == 2:
+            # VisionTargetSim(pose, model, objDetClassId, objDetConf)
+            self.fiducialId = -1
+            self.objDetClassId = args[0]
+            self.objDetConf = args[1]
+        else:
+            raise ValueError(
+                f"VisionTargetSim takes 2-4 arguments, got {2 + len(args)}"
+            )
 
     def __lt__(self, right) -> bool:
         return self.pose.translation().norm() < right.pose.translation().norm()
diff --git a/photon-lib/src/main/java/org/photonvision/simulation/PhotonCameraSim.java b/photon-lib/src/main/java/org/photonvision/simulation/PhotonCameraSim.java
index c4331ba8c..68d302328 100644
--- a/photon-lib/src/main/java/org/photonvision/simulation/PhotonCameraSim.java
+++ b/photon-lib/src/main/java/org/photonvision/simulation/PhotonCameraSim.java
@@ -525,6 +525,16 @@ public class PhotonCameraSim implements AutoCloseable {
                                 .get();
             }
 
+            // If object detection (user classId valid) but conf wasn't provided, estimate
+            int classId = tgt.objDetClassId;
+            float conf = tgt.objDetConf;
+            if (classId >= 0 && conf < 0) {
+                // Simulate confidence using sqrt-scaled area for a more realistic
+                // curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
+                // gives reasonable values even for small-but-visible objects.
+                conf = (float) MathUtil.clamp(Math.sqrt(areaPercent / 100.0) * 2.0, 0.0, 1.0);
+            }
+
             detectableTgts.add(
                     new PhotonTrackedTarget(
                             -Math.toDegrees(centerRot.getZ()),
@@ -532,8 +542,8 @@ public class PhotonCameraSim implements AutoCloseable {
                             areaPercent,
                             Math.toDegrees(centerRot.getX()),
                             tgt.fiducialID,
-                            -1,
-                            -1,
+                            classId,
+                            conf,
                             pnpSim.best,
                             pnpSim.alt,
                             pnpSim.ambiguity,
diff --git a/photon-lib/src/main/java/org/photonvision/simulation/VisionTargetSim.java b/photon-lib/src/main/java/org/photonvision/simulation/VisionTargetSim.java
index 5e148fc7b..489bbdff1 100644
--- a/photon-lib/src/main/java/org/photonvision/simulation/VisionTargetSim.java
+++ b/photon-lib/src/main/java/org/photonvision/simulation/VisionTargetSim.java
@@ -36,16 +36,21 @@ public class VisionTargetSim {
 
     public final int fiducialID;
 
+    /** The object detection class ID, or -1 if not applicable. */
+    public final int objDetClassId;
+
+    /** The object detection confidence, or -1 if not applicable. */
+    public final float objDetConf;
+
     /**
-     * Describes a vision target located somewhere on the field that your vision system can detect.
+     * Describes a retro-reflective/colored shape vision target located somewhere on the field that
+     * your vision system can detect.
      *
      * @param pose Pose3d of the tag in field-relative coordinates
      * @param model TargetModel which describes the geometry of the target
      */
     public VisionTargetSim(Pose3d pose, TargetModel model) {
-        this.pose = pose;
-        this.model = model;
-        this.fiducialID = -1;
+        this(pose, model, -1, -1, -1);
     }
 
     /**
@@ -56,9 +61,33 @@ public class VisionTargetSim {
      * @param id The ID of this fiducial tag
      */
     public VisionTargetSim(Pose3d pose, TargetModel model, int id) {
+        this(pose, model, id, -1, -1);
+    }
+
+    /**
+     * Describes an object-detection vision target located somewhere on the field that your vision
+     * system can detect. Class ID is the (zero-indexed) index of the object's class ID in the list of
+     * all classes. Confidence can be specified, or pass -1 to estimate confidence based on 2 *
+     * sqrt(target area / total image area)
+     *
+     * @param pose Pose3d of the target in field-relative coordinates
+     * @param model TargetModel which describes the geometry of the target
+     * @param objDetClassId The object detection class ID, if -1 it will not be detected by object
+     *     detection
+     * @param objDetConf The object detection confidence, or -1 in which case the simulation will
+     *     compute a confidence based on the area of the target in the camera's field of view
+     */
+    public VisionTargetSim(Pose3d pose, TargetModel model, int objDetClassId, float objDetConf) {
+        this(pose, model, -1, objDetClassId, objDetConf);
+    }
+
+    private VisionTargetSim(
+            Pose3d pose, TargetModel model, int id, int objDetClassId, float objDetConf) {
         this.pose = pose;
         this.model = model;
         this.fiducialID = id;
+        this.objDetClassId = objDetClassId;
+        this.objDetConf = objDetConf;
     }
 
     /**
@@ -97,7 +126,11 @@ public class VisionTargetSim {
         return model;
     }
 
-    /** This target's vertices offset from its field pose. */
+    /**
+     * This target's vertices offset from its field pose.
+     *
+     * @return A vector of Translation3d representing the vertices of the target
+     */
     public List<Translation3d> getFieldVertices() {
         return model.getFieldVertices(pose);
     }
diff --git a/photon-lib/src/main/native/cpp/photon/simulation/PhotonCameraSim.cpp b/photon-lib/src/main/native/cpp/photon/simulation/PhotonCameraSim.cpp
index 4914436ec..d9ff4f754 100644
--- a/photon-lib/src/main/native/cpp/photon/simulation/PhotonCameraSim.cpp
+++ b/photon-lib/src/main/native/cpp/photon/simulation/PhotonCameraSim.cpp
@@ -214,12 +214,23 @@ PhotonPipelineResult PhotonCameraSim::Process(
     }
 
     std::optional<photon::PnpResult> pnpSim = std::nullopt;
-    if (tgt.fiducialId >= 0 && tgt.GetFieldVertices().size() == 4) {
+    if (tgt.GetFiducialId() >= 0 && tgt.GetFieldVertices().size() == 4) {
       pnpSim = OpenCVHelp::SolvePNP_Square(
           prop.GetIntrinsics(), prop.GetDistCoeffs(),
           tgt.GetModel().GetVertices(), noisyTargetCorners);
     }
 
+    // Compute object detection confidence if this is an obj det target
+    int classId = tgt.GetObjDetClassId();
+    float conf = tgt.GetObjDetConf();
+    if (classId >= 0 && conf < 0) {
+      // Simulate confidence using sqrt-scaled area for a more realistic
+      // curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
+      // gives reasonable values even for small-but-visible objects.
+      conf = static_cast<float>(
+          std::clamp(std::sqrt(areaPercent / 100.0) * 2.0, 0.0, 1.0));
+    }
+
     std::vector<std::pair<float, float>> tempCorners =
         OpenCVHelp::PointsToCorners(minAreaRectPts);
     std::vector<TargetCorner> smallVec;
@@ -236,8 +247,8 @@ PhotonPipelineResult PhotonCameraSim::Process(
     detectableTgts.emplace_back(
         -centerRot.Z().convert<units::degrees>().to<double>(),
         -centerRot.Y().convert<units::degrees>().to<double>(), areaPercent,
-        centerRot.X().convert<units::degrees>().to<double>(), tgt.fiducialId,
-        tgt.objDetClassId, tgt.objDetConf,
+        centerRot.X().convert<units::degrees>().to<double>(),
+        tgt.GetFiducialId(), classId, conf,
         pnpSim ? pnpSim->best : frc::Transform3d{},
         pnpSim ? pnpSim->alt : frc::Transform3d{},
         pnpSim ? pnpSim->ambiguity : -1, smallVec, cornersDouble);
@@ -254,8 +265,8 @@ PhotonPipelineResult PhotonCameraSim::Process(
       VisionTargetSim tgt = pair.first;
       std::vector<cv::Point2f> corners = pair.second;
 
-      if (tgt.fiducialId > 0) {
-        VideoSimUtil::Warp165h5TagImage(tgt.fiducialId, corners, true,
+      if (tgt.GetFiducialId() > 0) {
+        VideoSimUtil::Warp165h5TagImage(tgt.GetFiducialId(), corners, true,
                                         videoSimFrameRaw);
       } else if (!tgt.GetModel().GetIsSpherical()) {
         std::vector<cv::Point2f> contour = corners;
diff --git a/photon-lib/src/main/native/include/photon/simulation/VisionSystemSim.h b/photon-lib/src/main/native/include/photon/simulation/VisionSystemSim.h
index 81f1a87a6..6e9da0d10 100644
--- a/photon-lib/src/main/native/include/photon/simulation/VisionSystemSim.h
+++ b/photon-lib/src/main/native/include/photon/simulation/VisionSystemSim.h
@@ -346,11 +346,14 @@ class VisionSystemSim {
       const std::vector<VisionTargetSim>& targets) {
     std::vector<VisionTargetSim> removedList;
     for (auto& entry : targetSets) {
-      for (auto target : entry.second) {
-        auto it = std::find(targets.begin(), targets.end(), target);
-        if (it != targets.end()) {
-          removedList.emplace_back(target);
-          entry.second.erase(it);
+      auto& vec = entry.second;
+      auto it = vec.begin();
+      while (it != vec.end()) {
+        if (std::find(targets.begin(), targets.end(), *it) != targets.end()) {
+          removedList.emplace_back(*it);
+          it = vec.erase(it);
+        } else {
+          ++it;
         }
       }
     }
diff --git a/photon-lib/src/main/native/include/photon/simulation/VisionTargetSim.h b/photon-lib/src/main/native/include/photon/simulation/VisionTargetSim.h
index a0d04ab78..5095c5e98 100644
--- a/photon-lib/src/main/native/include/photon/simulation/VisionTargetSim.h
+++ b/photon-lib/src/main/native/include/photon/simulation/VisionTargetSim.h
@@ -31,23 +31,120 @@
 #include "photon/estimation/TargetModel.h"
 
 namespace photon {
+/** Describes a vision target located somewhere on the field that your vision
+ * system can detect. */
 class VisionTargetSim {
  public:
+  /**
+   * Describes a retro-reflective/colored shape vision target located somewhere
+   * on the field that your vision system can detect.
+   *
+   * @param pose Pose3d of the tag in field-relative coordinates
+   * @param model TargetModel which describes the geometry of the target
+   */
   VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model)
-      : fiducialId(-1), pose(pose), model(model) {}
+      : fiducialId(-1),
+        objDetClassId(-1),
+        objDetConf(-1),
+        pose(pose),
+        model(model) {}
+
+  /**
+   * Describes a fiducial tag located somewhere on the field that your vision
+   * system can detect.
+   *
+   * @param pose Pose3d of the tag in field-relative coordinates
+   * @param model TargetModel which describes the geometry of the target(tag)
+   * @param id The ID of this fiducial tag
+   */
   VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model, int id)
-      : fiducialId(id), pose(pose), model(model) {}
+      : fiducialId(id),
+        objDetClassId(-1),
+        objDetConf(-1),
+        pose(pose),
+        model(model) {}
+
+  /**
+   * Describes an object-detection vision target located somewhere on the field
+   * that your vision system can detect. Class ID is the (zero-indexed) index of
+   * the object's class ID in the list of all classes. Confidence can be
+   * specified, or pass -1 to estimate confidence based on 2 * sqrt(target area
+   * / total image area)
+   *
+   * @param pose Pose3d of the target in field-relative coordinates
+   * @param model TargetModel which describes the geometry of the target
+   * @param objDetClassId The object detection class ID, if -1 it will not be
+   * detected by object detection
+   * @param objDetConf The object detection confidence, or -1 in which case the
+   * simulation will compute a confidence based on the area of the target in the
+   * camera's field of view
+   */
+  VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model,
+                  int objDetClassId, float objDetConf)
+      : fiducialId(-1),
+        objDetClassId(objDetClassId),
+        objDetConf(objDetConf),
+        pose(pose),
+        model(model) {}
+
+  /**
+   * Sets the pose of this target on the field.
+   *
+   * @param newPose The pose in field-relative coordinates
+   */
   void SetPose(const frc::Pose3d& newPose) { pose = newPose; }
+
+  /**
+   * Sets the model describing this target's geometry.
+   *
+   * @param newModel The model of the target
+   */
   void SetModel(const TargetModel& newModel) { model = newModel; }
+
+  /**
+   * Returns the pose of this target on the field.
+   *
+   * @return The pose in field-relative coordinates
+   */
   frc::Pose3d GetPose() const { return pose; }
+
+  /**
+   * Returns the model describing this target's geometry.
+   *
+   * @return The model of the target
+   */
   TargetModel GetModel() const { return model; }
+
+  /**
+   * Returns the fiducial ID of this target, or -1 if not a fiducial target.
+   *
+   * @return The fiducial ID
+   */
+  int GetFiducialId() const { return fiducialId; }
+
+  /**
+   * Returns the object detection class ID of this target, or -1 if not an
+   * object detection target.
+   *
+   * @return The object detection class ID
+   */
+  int GetObjDetClassId() const { return objDetClassId; }
+
+  /**
+   * Returns the object detection confidence of this target, or -1 if
+   * confidence is estimated from target area or is not an object.
+   *
+   * @return The object detection confidence
+   */
+  float GetObjDetConf() const { return objDetConf; }
+
+  /**
+   * This target's vertices offset from its field pose.
+   * @return A vector of Translation3d representing the vertices of the target
+   */
   std::vector<frc::Translation3d> GetFieldVertices() const {
     return model.GetFieldVertices(pose);
   }
-  int fiducialId;
-
-  int objDetClassId = -1;
-  float objDetConf = -1;
 
   bool operator<(const VisionTargetSim& right) const {
     return pose.Translation().Norm() < right.pose.Translation().Norm();
@@ -70,6 +167,9 @@ class VisionTargetSim {
   }
 
  private:
+  int fiducialId;
+  int objDetClassId;
+  float objDetConf;
   frc::Pose3d pose;
   TargetModel model;
 };
diff --git a/photon-lib/src/test/java/org/photonvision/VisionSystemSimTest.java b/photon-lib/src/test/java/org/photonvision/VisionSystemSimTest.java
index 917d1d5ad..148286a17 100644
--- a/photon-lib/src/test/java/org/photonvision/VisionSystemSimTest.java
+++ b/photon-lib/src/test/java/org/photonvision/VisionSystemSimTest.java
@@ -601,7 +601,49 @@ class VisionSystemSimTest {
 
         robotPose = new Pose2d(-2, -2, Rotation2d.fromDegrees(30));
         visionSysSim.update(robotPose);
-        ambiguity = waitForSequenceNumber(camera, 2).getBestTarget().getPoseAmbiguity();
+        var target2 = waitForSequenceNumber(camera, 2).getBestTarget();
+        ambiguity = target2.getPoseAmbiguity();
         assertTrue(0 < ambiguity && ambiguity < 0.2, "Tag ambiguity expected to be low");
+
+        // and prove that object detection class id/conf are -1 when we look at a tag
+        assertEquals(-1, target2.objDetectId);
+        assertEquals(-1, target2.objDetectConf);
+    }
+
+    @Test
+    public void testObjectDetection() {
+        var visionSysSim = new VisionSystemSim("Test");
+        var camera = new PhotonCamera(inst, "camera");
+        var cameraSim = new PhotonCameraSim(camera);
+        visionSysSim.addCamera(cameraSim, new Transform3d());
+        cameraSim.prop.setCalibration(640, 480, Rotation2d.fromDegrees(80));
+        cameraSim.setMinTargetAreaPixels(20.0);
+
+        final var targetPose = new Pose3d(new Translation3d(2, 0, 0), new Rotation3d(0, 0, Math.PI));
+        final int classId = 3;
+        final float conf = 0.67f;
+        final TargetModel ballModel = new TargetModel(Units.inchesToMeters(6));
+        final var ballTarget = new VisionTargetSim(targetPose, ballModel, classId, conf);
+
+        visionSysSim.addVisionTargets(ballTarget);
+
+        var robotPose = Pose2d.kZero;
+        visionSysSim.update(robotPose);
+        var target1 = waitForSequenceNumber(camera, 1).getBestTarget();
+        assertEquals(classId, target1.objDetectId);
+        assertEquals(conf, target1.objDetectConf);
+        assertEquals(-1, target1.fiducialId);
+
+        // much around with the target to force PhotonCameraSim::process calculate conf
+        visionSysSim.removeVisionTargets(ballTarget);
+        final float conf2 = -1;
+        final var ballTarget2 = new VisionTargetSim(targetPose, ballModel, classId, conf2);
+        visionSysSim.addVisionTargets(ballTarget2);
+        visionSysSim.update(robotPose);
+        var target2 = waitForSequenceNumber(camera, 2).getBestTarget();
+        assertEquals(classId, target2.objDetectId);
+        // 2 * sqrt(area pixels) at this particular pose
+        assertEquals(0.131, target2.objDetectConf, 0.01);
+        assertEquals(-1, target2.fiducialId);
     }
 }