Populate classId and confidence level for object detection in Java simulation (#2372)

## Description compute confidence level based on target area in total image size and populate classId and confidence level in Java (while building the PhotonTrackedTarget) ## Changes - Add new VisionTargetSim constructor for object detection - If class ID specified but confidence = -1, estimate based on total area --------- Co-authored-by: Matt Morley <matthew.morley.ca@gmail.com>
2026-06-19 00:41:41 +00:00 · 2026-03-18 23:42:37 -04:00
parent 846528ce9c
commit 12446a6c44
9 changed files with 321 additions and 30 deletions
--- a/docs/source/docs/simulation/simulation-java.md
+++ b/docs/source/docs/simulation/simulation-java.md
@@ -95,6 +95,27 @@ These `TargetModel` are paired with a target pose to create a `VisionTargetSim`.
 The pose of a `VisionTargetSim` object can be updated to simulate moving targets. Note, however, that this will break latency simulation for that target.
 :::

+To use simulated object detection, you must provide an objDetClassId (zero-indexed class ID) and confidence value. When you set objDetConf to -1, the simulation computes confidence based on the area of the target in the camera's field of view. To simulate a object detection model with one class (fuel, index 0) and specify confidence, you'd write:
+
+```{eval-rst}
+.. tab-set-code::
+
+   .. code-block:: java
+
+      // arbitrary position on field
+      final var targetPose = new Pose3d(new Translation3d(2, 0, 0), new Rotation3d());
+      // Class id, zero-indexed
+      final int classId = 0;
+      // Confidence, between 0 and 1.
+      final float conf = 0.67f;
+      // 6 inch diameter ball
+      final TargetModel ballModel = new TargetModel(Units.inchesToMeters(6));
+      final var ballTargetSim = new VisionTargetSim(targetPose, ballModel, classId, conf);
+
+      // Add this vision target to the vision system simulation to make it visible
+      visionSim.addVisionTargets(visionTarget);
+```
+
 For convenience, an `AprilTagFieldLayout` can also be added to automatically create a target for each of its AprilTags.

 ```{eval-rst}
--- a/photon-lib/py/photonlibpy/simulation/photonCameraSim.py
+++ b/photon-lib/py/photonlibpy/simulation/photonCameraSim.py
@@ -368,6 +368,15 @@ class PhotonCameraSim:
                    noisyTargetCorners,
                )

+            # Compute object detection confidence if this is an obj det target
+            classId = tgt.objDetClassId
+            conf = tgt.objDetConf
+            if classId >= 0 and conf < 0:
+                # Simulate confidence using sqrt-scaled area for a more realistic
+                # curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
+                # gives reasonable values even for small-but-visible objects.
+                conf = max(0.0, min(1.0, math.sqrt(areaPercent / 100.0) * 2.0))
+
            smallVec: list[TargetCorner] = []
            for corner in minAreaRectPts:
                smallVec.append(TargetCorner(corner[0], corner[1]))
@@ -381,6 +390,8 @@ class PhotonCameraSim:
                    area=areaPercent,
                    skew=math.degrees(centerRot.X()),
                    fiducialId=tgt.fiducialId,
+                    objDetectId=classId,
+                    objDetectConf=conf,
                    detectedCorners=cornersFloat,
                    minAreaRectCorners=smallVec,
                    bestCameraToTarget=pnpSim.best if pnpSim else Transform3d(),
--- a/photon-lib/py/photonlibpy/simulation/visionTargetSim.py
+++ b/photon-lib/py/photonlibpy/simulation/visionTargetSim.py
@@ -1,4 +1,5 @@
 import math
+from typing import overload

 from wpimath.geometry import Pose3d, Translation3d

@@ -8,19 +9,78 @@ from ..estimation.targetModel import TargetModel
 class VisionTargetSim:
    """Describes a vision target located somewhere on the field that your vision system can detect."""

-    def __init__(self, pose: Pose3d, model: TargetModel, id: int = -1):
-        """Describes a fiducial tag located somewhere on the field that your vision system can detect.
+    @overload
+    def __init__(self, pose: Pose3d, model: TargetModel) -> None:
+        """
+        Describes a retro-reflective/colored shape vision target located somewhere on the field that
+        your vision system can detect.
+
+        :param pose:  Pose3d of the target in field-relative coordinates
+        :param model: TargetModel which describes the shape of the target
+        """
+        ...
+
+    @overload
+    def __init__(self, pose: Pose3d, model: TargetModel, id: int) -> None:
+        """
+        Describes a fiducial tag located somewhere on the field that your vision system can detect.

        :param pose:  Pose3d of the tag in field-relative coordinates
-        :param model: TargetModel which describes the shape of the target(tag)
+        :param model: TargetModel which describes the geometry of the target (tag)
        :param id:    The ID of this fiducial tag
        """
+        ...
+
+    @overload
+    def __init__(
+        self, pose: Pose3d, model: TargetModel, objDetClassId: int, objDetConf: float
+    ) -> None:
+        """
+        Describes an object-detection vision target located somewhere on the field that your vision
+        system can detect.
+
+        :param pose:          Pose3d of the target in field-relative coordinates
+        :param model:         TargetModel which describes the shape of the target
+        :param objDetClassId: The object detection class ID, or -1 to exclude from object detection
+        :param objDetConf:    The object detection confidence, or -1.0 to compute from target area
+                              in the camera's field of view
+        """
+        ...
+
+    def __init__(
+        self,
+        pose: Pose3d,
+        model: TargetModel,
+        *args,
+        **kwargs,
+    ):
+        if kwargs:
+            raise TypeError(
+                f"VisionTargetSim does not accept keyword arguments: {list(kwargs.keys())}"
+            )

        self.pose: Pose3d = pose
        self.model: TargetModel = model
-        self.fiducialId: int = id
-        self.objDetClassId: int = -1
-        self.objDetConf: float = -1.0
+
+        if len(args) == 0:
+            # VisionTargetSim(pose, model)
+            self.fiducialId: int = -1
+            self.objDetClassId: int = -1
+            self.objDetConf: float = -1.0
+        elif len(args) == 1:
+            # VisionTargetSim(pose, model, id)
+            self.fiducialId = args[0]
+            self.objDetClassId = -1
+            self.objDetConf = -1.0
+        elif len(args) == 2:
+            # VisionTargetSim(pose, model, objDetClassId, objDetConf)
+            self.fiducialId = -1
+            self.objDetClassId = args[0]
+            self.objDetConf = args[1]
+        else:
+            raise ValueError(
+                f"VisionTargetSim takes 2-4 arguments, got {2 + len(args)}"
+            )

    def __lt__(self, right) -> bool:
        return self.pose.translation().norm() < right.pose.translation().norm()
--- a/photon-lib/src/main/java/org/photonvision/simulation/PhotonCameraSim.java
+++ b/photon-lib/src/main/java/org/photonvision/simulation/PhotonCameraSim.java
@@ -525,6 +525,16 @@ public class PhotonCameraSim implements AutoCloseable {
                                .get();
            }

+            // If object detection (user classId valid) but conf wasn't provided, estimate
+            int classId = tgt.objDetClassId;
+            float conf = tgt.objDetConf;
+            if (classId >= 0 && conf < 0) {
+                // Simulate confidence using sqrt-scaled area for a more realistic
+                // curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
+                // gives reasonable values even for small-but-visible objects.
+                conf = (float) MathUtil.clamp(Math.sqrt(areaPercent / 100.0) * 2.0, 0.0, 1.0);
+            }
+
            detectableTgts.add(
                    new PhotonTrackedTarget(
                            -Math.toDegrees(centerRot.getZ()),
@@ -532,8 +542,8 @@ public class PhotonCameraSim implements AutoCloseable {
                            areaPercent,
                            Math.toDegrees(centerRot.getX()),
                            tgt.fiducialID,
-                            -1,
-                            -1,
+                            classId,
+                            conf,
                            pnpSim.best,
                            pnpSim.alt,
                            pnpSim.ambiguity,
--- a/photon-lib/src/main/java/org/photonvision/simulation/VisionTargetSim.java
+++ b/photon-lib/src/main/java/org/photonvision/simulation/VisionTargetSim.java
@@ -36,16 +36,21 @@ public class VisionTargetSim {

    public final int fiducialID;

+    /** The object detection class ID, or -1 if not applicable. */
+    public final int objDetClassId;
+
+    /** The object detection confidence, or -1 if not applicable. */
+    public final float objDetConf;
+
    /**
-     * Describes a vision target located somewhere on the field that your vision system can detect.
+     * Describes a retro-reflective/colored shape vision target located somewhere on the field that
+     * your vision system can detect.
     *
     * @param pose Pose3d of the tag in field-relative coordinates
     * @param model TargetModel which describes the geometry of the target
     */
    public VisionTargetSim(Pose3d pose, TargetModel model) {
-        this.pose = pose;
-        this.model = model;
-        this.fiducialID = -1;
+        this(pose, model, -1, -1, -1);
    }

    /**
@@ -56,9 +61,33 @@ public class VisionTargetSim {
     * @param id The ID of this fiducial tag
     */
    public VisionTargetSim(Pose3d pose, TargetModel model, int id) {
+        this(pose, model, id, -1, -1);
+    }
+
+    /**
+     * Describes an object-detection vision target located somewhere on the field that your vision
+     * system can detect. Class ID is the (zero-indexed) index of the object's class ID in the list of
+     * all classes. Confidence can be specified, or pass -1 to estimate confidence based on 2 *
+     * sqrt(target area / total image area)
+     *
+     * @param pose Pose3d of the target in field-relative coordinates
+     * @param model TargetModel which describes the geometry of the target
+     * @param objDetClassId The object detection class ID, if -1 it will not be detected by object
+     *     detection
+     * @param objDetConf The object detection confidence, or -1 in which case the simulation will
+     *     compute a confidence based on the area of the target in the camera's field of view
+     */
+    public VisionTargetSim(Pose3d pose, TargetModel model, int objDetClassId, float objDetConf) {
+        this(pose, model, -1, objDetClassId, objDetConf);
+    }
+
+    private VisionTargetSim(
+            Pose3d pose, TargetModel model, int id, int objDetClassId, float objDetConf) {
        this.pose = pose;
        this.model = model;
        this.fiducialID = id;
+        this.objDetClassId = objDetClassId;
+        this.objDetConf = objDetConf;
    }

    /**
@@ -97,7 +126,11 @@ public class VisionTargetSim {
        return model;
    }

-    /** This target's vertices offset from its field pose. */
+    /**
+     * This target's vertices offset from its field pose.
+     *
+     * @return A vector of Translation3d representing the vertices of the target
+     */
    public List<Translation3d> getFieldVertices() {
        return model.getFieldVertices(pose);
    }
--- a/photon-lib/src/main/native/cpp/photon/simulation/PhotonCameraSim.cpp
+++ b/photon-lib/src/main/native/cpp/photon/simulation/PhotonCameraSim.cpp
@@ -214,12 +214,23 @@ PhotonPipelineResult PhotonCameraSim::Process(
    }

    std::optional<photon::PnpResult> pnpSim = std::nullopt;
-    if (tgt.fiducialId >= 0 && tgt.GetFieldVertices().size() == 4) {
+    if (tgt.GetFiducialId() >= 0 && tgt.GetFieldVertices().size() == 4) {
      pnpSim = OpenCVHelp::SolvePNP_Square(
          prop.GetIntrinsics(), prop.GetDistCoeffs(),
          tgt.GetModel().GetVertices(), noisyTargetCorners);
    }

+    // Compute object detection confidence if this is an obj det target
+    int classId = tgt.GetObjDetClassId();
+    float conf = tgt.GetObjDetConf();
+    if (classId >= 0 && conf < 0) {
+      // Simulate confidence using sqrt-scaled area for a more realistic
+      // curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
+      // gives reasonable values even for small-but-visible objects.
+      conf = static_cast<float>(
+          std::clamp(std::sqrt(areaPercent / 100.0) * 2.0, 0.0, 1.0));
+    }
+
    std::vector<std::pair<float, float>> tempCorners =
        OpenCVHelp::PointsToCorners(minAreaRectPts);
    std::vector<TargetCorner> smallVec;
@@ -236,8 +247,8 @@ PhotonPipelineResult PhotonCameraSim::Process(
    detectableTgts.emplace_back(
        -centerRot.Z().convert<units::degrees>().to<double>(),
        -centerRot.Y().convert<units::degrees>().to<double>(), areaPercent,
-        centerRot.X().convert<units::degrees>().to<double>(), tgt.fiducialId,
-        tgt.objDetClassId, tgt.objDetConf,
+        centerRot.X().convert<units::degrees>().to<double>(),
+        tgt.GetFiducialId(), classId, conf,
        pnpSim ? pnpSim->best : frc::Transform3d{},
        pnpSim ? pnpSim->alt : frc::Transform3d{},
        pnpSim ? pnpSim->ambiguity : -1, smallVec, cornersDouble);
@@ -254,8 +265,8 @@ PhotonPipelineResult PhotonCameraSim::Process(
      VisionTargetSim tgt = pair.first;
      std::vector<cv::Point2f> corners = pair.second;

-      if (tgt.fiducialId > 0) {
-        VideoSimUtil::Warp165h5TagImage(tgt.fiducialId, corners, true,
+      if (tgt.GetFiducialId() > 0) {
+        VideoSimUtil::Warp165h5TagImage(tgt.GetFiducialId(), corners, true,
                                        videoSimFrameRaw);
      } else if (!tgt.GetModel().GetIsSpherical()) {
        std::vector<cv::Point2f> contour = corners;
--- a/photon-lib/src/main/native/include/photon/simulation/VisionSystemSim.h
+++ b/photon-lib/src/main/native/include/photon/simulation/VisionSystemSim.h
@@ -346,11 +346,14 @@ class VisionSystemSim {
      const std::vector<VisionTargetSim>& targets) {
    std::vector<VisionTargetSim> removedList;
    for (auto& entry : targetSets) {
-      for (auto target : entry.second) {
-        auto it = std::find(targets.begin(), targets.end(), target);
-        if (it != targets.end()) {
-          removedList.emplace_back(target);
-          entry.second.erase(it);
+      auto& vec = entry.second;
+      auto it = vec.begin();
+      while (it != vec.end()) {
+        if (std::find(targets.begin(), targets.end(), *it) != targets.end()) {
+          removedList.emplace_back(*it);
+          it = vec.erase(it);
+        } else {
+          ++it;
        }
      }
    }
--- a/photon-lib/src/main/native/include/photon/simulation/VisionTargetSim.h
+++ b/photon-lib/src/main/native/include/photon/simulation/VisionTargetSim.h
@@ -31,23 +31,120 @@
 #include "photon/estimation/TargetModel.h"

 namespace photon {
+/** Describes a vision target located somewhere on the field that your vision
+ * system can detect. */
 class VisionTargetSim {
 public:
+  /**
+   * Describes a retro-reflective/colored shape vision target located somewhere
+   * on the field that your vision system can detect.
+   *
+   * @param pose Pose3d of the tag in field-relative coordinates
+   * @param model TargetModel which describes the geometry of the target
+   */
  VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model)
-      : fiducialId(-1), pose(pose), model(model) {}
+      : fiducialId(-1),
+        objDetClassId(-1),
+        objDetConf(-1),
+        pose(pose),
+        model(model) {}
+
+  /**
+   * Describes a fiducial tag located somewhere on the field that your vision
+   * system can detect.
+   *
+   * @param pose Pose3d of the tag in field-relative coordinates
+   * @param model TargetModel which describes the geometry of the target(tag)
+   * @param id The ID of this fiducial tag
+   */
  VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model, int id)
-      : fiducialId(id), pose(pose), model(model) {}
+      : fiducialId(id),
+        objDetClassId(-1),
+        objDetConf(-1),
+        pose(pose),
+        model(model) {}
+
+  /**
+   * Describes an object-detection vision target located somewhere on the field
+   * that your vision system can detect. Class ID is the (zero-indexed) index of
+   * the object's class ID in the list of all classes. Confidence can be
+   * specified, or pass -1 to estimate confidence based on 2 * sqrt(target area
+   * / total image area)
+   *
+   * @param pose Pose3d of the target in field-relative coordinates
+   * @param model TargetModel which describes the geometry of the target
+   * @param objDetClassId The object detection class ID, if -1 it will not be
+   * detected by object detection
+   * @param objDetConf The object detection confidence, or -1 in which case the
+   * simulation will compute a confidence based on the area of the target in the
+   * camera's field of view
+   */
+  VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model,
+                  int objDetClassId, float objDetConf)
+      : fiducialId(-1),
+        objDetClassId(objDetClassId),
+        objDetConf(objDetConf),
+        pose(pose),
+        model(model) {}
+
+  /**
+   * Sets the pose of this target on the field.
+   *
+   * @param newPose The pose in field-relative coordinates
+   */
  void SetPose(const frc::Pose3d& newPose) { pose = newPose; }
+
+  /**
+   * Sets the model describing this target's geometry.
+   *
+   * @param newModel The model of the target
+   */
  void SetModel(const TargetModel& newModel) { model = newModel; }
+
+  /**
+   * Returns the pose of this target on the field.
+   *
+   * @return The pose in field-relative coordinates
+   */
  frc::Pose3d GetPose() const { return pose; }
+
+  /**
+   * Returns the model describing this target's geometry.
+   *
+   * @return The model of the target
+   */
  TargetModel GetModel() const { return model; }
+
+  /**
+   * Returns the fiducial ID of this target, or -1 if not a fiducial target.
+   *
+   * @return The fiducial ID
+   */
+  int GetFiducialId() const { return fiducialId; }
+
+  /**
+   * Returns the object detection class ID of this target, or -1 if not an
+   * object detection target.
+   *
+   * @return The object detection class ID
+   */
+  int GetObjDetClassId() const { return objDetClassId; }
+
+  /**
+   * Returns the object detection confidence of this target, or -1 if
+   * confidence is estimated from target area or is not an object.
+   *
+   * @return The object detection confidence
+   */
+  float GetObjDetConf() const { return objDetConf; }
+
+  /**
+   * This target's vertices offset from its field pose.
+   * @return A vector of Translation3d representing the vertices of the target
+   */
  std::vector<frc::Translation3d> GetFieldVertices() const {
    return model.GetFieldVertices(pose);
  }
-  int fiducialId;
-
-  int objDetClassId = -1;
-  float objDetConf = -1;

  bool operator<(const VisionTargetSim& right) const {
    return pose.Translation().Norm() < right.pose.Translation().Norm();
@@ -70,6 +167,9 @@ class VisionTargetSim {
  }

 private:
+  int fiducialId;
+  int objDetClassId;
+  float objDetConf;
  frc::Pose3d pose;
  TargetModel model;
 };
--- a/photon-lib/src/test/java/org/photonvision/VisionSystemSimTest.java
+++ b/photon-lib/src/test/java/org/photonvision/VisionSystemSimTest.java
@@ -601,7 +601,49 @@ class VisionSystemSimTest {

        robotPose = new Pose2d(-2, -2, Rotation2d.fromDegrees(30));
        visionSysSim.update(robotPose);
-        ambiguity = waitForSequenceNumber(camera, 2).getBestTarget().getPoseAmbiguity();
+        var target2 = waitForSequenceNumber(camera, 2).getBestTarget();
+        ambiguity = target2.getPoseAmbiguity();
        assertTrue(0 < ambiguity && ambiguity < 0.2, "Tag ambiguity expected to be low");
+
+        // and prove that object detection class id/conf are -1 when we look at a tag
+        assertEquals(-1, target2.objDetectId);
+        assertEquals(-1, target2.objDetectConf);
+    }
+
+    @Test
+    public void testObjectDetection() {
+        var visionSysSim = new VisionSystemSim("Test");
+        var camera = new PhotonCamera(inst, "camera");
+        var cameraSim = new PhotonCameraSim(camera);
+        visionSysSim.addCamera(cameraSim, new Transform3d());
+        cameraSim.prop.setCalibration(640, 480, Rotation2d.fromDegrees(80));
+        cameraSim.setMinTargetAreaPixels(20.0);
+
+        final var targetPose = new Pose3d(new Translation3d(2, 0, 0), new Rotation3d(0, 0, Math.PI));
+        final int classId = 3;
+        final float conf = 0.67f;
+        final TargetModel ballModel = new TargetModel(Units.inchesToMeters(6));
+        final var ballTarget = new VisionTargetSim(targetPose, ballModel, classId, conf);
+
+        visionSysSim.addVisionTargets(ballTarget);
+
+        var robotPose = Pose2d.kZero;
+        visionSysSim.update(robotPose);
+        var target1 = waitForSequenceNumber(camera, 1).getBestTarget();
+        assertEquals(classId, target1.objDetectId);
+        assertEquals(conf, target1.objDetectConf);
+        assertEquals(-1, target1.fiducialId);
+
+        // much around with the target to force PhotonCameraSim::process calculate conf
+        visionSysSim.removeVisionTargets(ballTarget);
+        final float conf2 = -1;
+        final var ballTarget2 = new VisionTargetSim(targetPose, ballModel, classId, conf2);
+        visionSysSim.addVisionTargets(ballTarget2);
+        visionSysSim.update(robotPose);
+        var target2 = waitForSequenceNumber(camera, 2).getBestTarget();
+        assertEquals(classId, target2.objDetectId);
+        // 2 * sqrt(area pixels) at this particular pose
+        assertEquals(0.131, target2.objDetectConf, 0.01);
+        assertEquals(-1, target2.fiducialId);
    }
 }