Populate classId and confidence level for object detection in Java simulation (#2372)

## Description

compute confidence level based on target area in total image size and
populate classId and confidence level in Java (while building the
PhotonTrackedTarget)

## Changes
- Add new VisionTargetSim constructor for object detection
- If class ID specified but confidence = -1, estimate based on total
area

---------

Co-authored-by: Matt Morley <matthew.morley.ca@gmail.com>
This commit is contained in:
Stéphane Dalton
2026-03-18 23:42:37 -04:00
committed by GitHub
parent 846528ce9c
commit 12446a6c44
9 changed files with 321 additions and 30 deletions

View File

@@ -95,6 +95,27 @@ These `TargetModel` are paired with a target pose to create a `VisionTargetSim`.
The pose of a `VisionTargetSim` object can be updated to simulate moving targets. Note, however, that this will break latency simulation for that target.
:::
To use simulated object detection, you must provide an objDetClassId (zero-indexed class ID) and confidence value. When you set objDetConf to -1, the simulation computes confidence based on the area of the target in the camera's field of view. To simulate a object detection model with one class (fuel, index 0) and specify confidence, you'd write:
```{eval-rst}
.. tab-set-code::
.. code-block:: java
// arbitrary position on field
final var targetPose = new Pose3d(new Translation3d(2, 0, 0), new Rotation3d());
// Class id, zero-indexed
final int classId = 0;
// Confidence, between 0 and 1.
final float conf = 0.67f;
// 6 inch diameter ball
final TargetModel ballModel = new TargetModel(Units.inchesToMeters(6));
final var ballTargetSim = new VisionTargetSim(targetPose, ballModel, classId, conf);
// Add this vision target to the vision system simulation to make it visible
visionSim.addVisionTargets(visionTarget);
```
For convenience, an `AprilTagFieldLayout` can also be added to automatically create a target for each of its AprilTags.
```{eval-rst}

View File

@@ -368,6 +368,15 @@ class PhotonCameraSim:
noisyTargetCorners,
)
# Compute object detection confidence if this is an obj det target
classId = tgt.objDetClassId
conf = tgt.objDetConf
if classId >= 0 and conf < 0:
# Simulate confidence using sqrt-scaled area for a more realistic
# curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
# gives reasonable values even for small-but-visible objects.
conf = max(0.0, min(1.0, math.sqrt(areaPercent / 100.0) * 2.0))
smallVec: list[TargetCorner] = []
for corner in minAreaRectPts:
smallVec.append(TargetCorner(corner[0], corner[1]))
@@ -381,6 +390,8 @@ class PhotonCameraSim:
area=areaPercent,
skew=math.degrees(centerRot.X()),
fiducialId=tgt.fiducialId,
objDetectId=classId,
objDetectConf=conf,
detectedCorners=cornersFloat,
minAreaRectCorners=smallVec,
bestCameraToTarget=pnpSim.best if pnpSim else Transform3d(),

View File

@@ -1,4 +1,5 @@
import math
from typing import overload
from wpimath.geometry import Pose3d, Translation3d
@@ -8,19 +9,78 @@ from ..estimation.targetModel import TargetModel
class VisionTargetSim:
"""Describes a vision target located somewhere on the field that your vision system can detect."""
def __init__(self, pose: Pose3d, model: TargetModel, id: int = -1):
"""Describes a fiducial tag located somewhere on the field that your vision system can detect.
@overload
def __init__(self, pose: Pose3d, model: TargetModel) -> None:
"""
Describes a retro-reflective/colored shape vision target located somewhere on the field that
your vision system can detect.
:param pose: Pose3d of the target in field-relative coordinates
:param model: TargetModel which describes the shape of the target
"""
...
@overload
def __init__(self, pose: Pose3d, model: TargetModel, id: int) -> None:
"""
Describes a fiducial tag located somewhere on the field that your vision system can detect.
:param pose: Pose3d of the tag in field-relative coordinates
:param model: TargetModel which describes the shape of the target(tag)
:param model: TargetModel which describes the geometry of the target (tag)
:param id: The ID of this fiducial tag
"""
...
@overload
def __init__(
self, pose: Pose3d, model: TargetModel, objDetClassId: int, objDetConf: float
) -> None:
"""
Describes an object-detection vision target located somewhere on the field that your vision
system can detect.
:param pose: Pose3d of the target in field-relative coordinates
:param model: TargetModel which describes the shape of the target
:param objDetClassId: The object detection class ID, or -1 to exclude from object detection
:param objDetConf: The object detection confidence, or -1.0 to compute from target area
in the camera's field of view
"""
...
def __init__(
self,
pose: Pose3d,
model: TargetModel,
*args,
**kwargs,
):
if kwargs:
raise TypeError(
f"VisionTargetSim does not accept keyword arguments: {list(kwargs.keys())}"
)
self.pose: Pose3d = pose
self.model: TargetModel = model
self.fiducialId: int = id
self.objDetClassId: int = -1
self.objDetConf: float = -1.0
if len(args) == 0:
# VisionTargetSim(pose, model)
self.fiducialId: int = -1
self.objDetClassId: int = -1
self.objDetConf: float = -1.0
elif len(args) == 1:
# VisionTargetSim(pose, model, id)
self.fiducialId = args[0]
self.objDetClassId = -1
self.objDetConf = -1.0
elif len(args) == 2:
# VisionTargetSim(pose, model, objDetClassId, objDetConf)
self.fiducialId = -1
self.objDetClassId = args[0]
self.objDetConf = args[1]
else:
raise ValueError(
f"VisionTargetSim takes 2-4 arguments, got {2 + len(args)}"
)
def __lt__(self, right) -> bool:
return self.pose.translation().norm() < right.pose.translation().norm()

View File

@@ -525,6 +525,16 @@ public class PhotonCameraSim implements AutoCloseable {
.get();
}
// If object detection (user classId valid) but conf wasn't provided, estimate
int classId = tgt.objDetClassId;
float conf = tgt.objDetConf;
if (classId >= 0 && conf < 0) {
// Simulate confidence using sqrt-scaled area for a more realistic
// curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
// gives reasonable values even for small-but-visible objects.
conf = (float) MathUtil.clamp(Math.sqrt(areaPercent / 100.0) * 2.0, 0.0, 1.0);
}
detectableTgts.add(
new PhotonTrackedTarget(
-Math.toDegrees(centerRot.getZ()),
@@ -532,8 +542,8 @@ public class PhotonCameraSim implements AutoCloseable {
areaPercent,
Math.toDegrees(centerRot.getX()),
tgt.fiducialID,
-1,
-1,
classId,
conf,
pnpSim.best,
pnpSim.alt,
pnpSim.ambiguity,

View File

@@ -36,16 +36,21 @@ public class VisionTargetSim {
public final int fiducialID;
/** The object detection class ID, or -1 if not applicable. */
public final int objDetClassId;
/** The object detection confidence, or -1 if not applicable. */
public final float objDetConf;
/**
* Describes a vision target located somewhere on the field that your vision system can detect.
* Describes a retro-reflective/colored shape vision target located somewhere on the field that
* your vision system can detect.
*
* @param pose Pose3d of the tag in field-relative coordinates
* @param model TargetModel which describes the geometry of the target
*/
public VisionTargetSim(Pose3d pose, TargetModel model) {
this.pose = pose;
this.model = model;
this.fiducialID = -1;
this(pose, model, -1, -1, -1);
}
/**
@@ -56,9 +61,33 @@ public class VisionTargetSim {
* @param id The ID of this fiducial tag
*/
public VisionTargetSim(Pose3d pose, TargetModel model, int id) {
this(pose, model, id, -1, -1);
}
/**
* Describes an object-detection vision target located somewhere on the field that your vision
* system can detect. Class ID is the (zero-indexed) index of the object's class ID in the list of
* all classes. Confidence can be specified, or pass -1 to estimate confidence based on 2 *
* sqrt(target area / total image area)
*
* @param pose Pose3d of the target in field-relative coordinates
* @param model TargetModel which describes the geometry of the target
* @param objDetClassId The object detection class ID, if -1 it will not be detected by object
* detection
* @param objDetConf The object detection confidence, or -1 in which case the simulation will
* compute a confidence based on the area of the target in the camera's field of view
*/
public VisionTargetSim(Pose3d pose, TargetModel model, int objDetClassId, float objDetConf) {
this(pose, model, -1, objDetClassId, objDetConf);
}
private VisionTargetSim(
Pose3d pose, TargetModel model, int id, int objDetClassId, float objDetConf) {
this.pose = pose;
this.model = model;
this.fiducialID = id;
this.objDetClassId = objDetClassId;
this.objDetConf = objDetConf;
}
/**
@@ -97,7 +126,11 @@ public class VisionTargetSim {
return model;
}
/** This target's vertices offset from its field pose. */
/**
* This target's vertices offset from its field pose.
*
* @return A vector of Translation3d representing the vertices of the target
*/
public List<Translation3d> getFieldVertices() {
return model.getFieldVertices(pose);
}

View File

@@ -214,12 +214,23 @@ PhotonPipelineResult PhotonCameraSim::Process(
}
std::optional<photon::PnpResult> pnpSim = std::nullopt;
if (tgt.fiducialId >= 0 && tgt.GetFieldVertices().size() == 4) {
if (tgt.GetFiducialId() >= 0 && tgt.GetFieldVertices().size() == 4) {
pnpSim = OpenCVHelp::SolvePNP_Square(
prop.GetIntrinsics(), prop.GetDistCoeffs(),
tgt.GetModel().GetVertices(), noisyTargetCorners);
}
// Compute object detection confidence if this is an obj det target
int classId = tgt.GetObjDetClassId();
float conf = tgt.GetObjDetConf();
if (classId >= 0 && conf < 0) {
// Simulate confidence using sqrt-scaled area for a more realistic
// curve. Raw areaPercent/100 is tiny for most targets; sqrt scaling
// gives reasonable values even for small-but-visible objects.
conf = static_cast<float>(
std::clamp(std::sqrt(areaPercent / 100.0) * 2.0, 0.0, 1.0));
}
std::vector<std::pair<float, float>> tempCorners =
OpenCVHelp::PointsToCorners(minAreaRectPts);
std::vector<TargetCorner> smallVec;
@@ -236,8 +247,8 @@ PhotonPipelineResult PhotonCameraSim::Process(
detectableTgts.emplace_back(
-centerRot.Z().convert<units::degrees>().to<double>(),
-centerRot.Y().convert<units::degrees>().to<double>(), areaPercent,
centerRot.X().convert<units::degrees>().to<double>(), tgt.fiducialId,
tgt.objDetClassId, tgt.objDetConf,
centerRot.X().convert<units::degrees>().to<double>(),
tgt.GetFiducialId(), classId, conf,
pnpSim ? pnpSim->best : frc::Transform3d{},
pnpSim ? pnpSim->alt : frc::Transform3d{},
pnpSim ? pnpSim->ambiguity : -1, smallVec, cornersDouble);
@@ -254,8 +265,8 @@ PhotonPipelineResult PhotonCameraSim::Process(
VisionTargetSim tgt = pair.first;
std::vector<cv::Point2f> corners = pair.second;
if (tgt.fiducialId > 0) {
VideoSimUtil::Warp165h5TagImage(tgt.fiducialId, corners, true,
if (tgt.GetFiducialId() > 0) {
VideoSimUtil::Warp165h5TagImage(tgt.GetFiducialId(), corners, true,
videoSimFrameRaw);
} else if (!tgt.GetModel().GetIsSpherical()) {
std::vector<cv::Point2f> contour = corners;

View File

@@ -346,11 +346,14 @@ class VisionSystemSim {
const std::vector<VisionTargetSim>& targets) {
std::vector<VisionTargetSim> removedList;
for (auto& entry : targetSets) {
for (auto target : entry.second) {
auto it = std::find(targets.begin(), targets.end(), target);
if (it != targets.end()) {
removedList.emplace_back(target);
entry.second.erase(it);
auto& vec = entry.second;
auto it = vec.begin();
while (it != vec.end()) {
if (std::find(targets.begin(), targets.end(), *it) != targets.end()) {
removedList.emplace_back(*it);
it = vec.erase(it);
} else {
++it;
}
}
}

View File

@@ -31,23 +31,120 @@
#include "photon/estimation/TargetModel.h"
namespace photon {
/** Describes a vision target located somewhere on the field that your vision
* system can detect. */
class VisionTargetSim {
public:
/**
* Describes a retro-reflective/colored shape vision target located somewhere
* on the field that your vision system can detect.
*
* @param pose Pose3d of the tag in field-relative coordinates
* @param model TargetModel which describes the geometry of the target
*/
VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model)
: fiducialId(-1), pose(pose), model(model) {}
: fiducialId(-1),
objDetClassId(-1),
objDetConf(-1),
pose(pose),
model(model) {}
/**
* Describes a fiducial tag located somewhere on the field that your vision
* system can detect.
*
* @param pose Pose3d of the tag in field-relative coordinates
* @param model TargetModel which describes the geometry of the target(tag)
* @param id The ID of this fiducial tag
*/
VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model, int id)
: fiducialId(id), pose(pose), model(model) {}
: fiducialId(id),
objDetClassId(-1),
objDetConf(-1),
pose(pose),
model(model) {}
/**
* Describes an object-detection vision target located somewhere on the field
* that your vision system can detect. Class ID is the (zero-indexed) index of
* the object's class ID in the list of all classes. Confidence can be
* specified, or pass -1 to estimate confidence based on 2 * sqrt(target area
* / total image area)
*
* @param pose Pose3d of the target in field-relative coordinates
* @param model TargetModel which describes the geometry of the target
* @param objDetClassId The object detection class ID, if -1 it will not be
* detected by object detection
* @param objDetConf The object detection confidence, or -1 in which case the
* simulation will compute a confidence based on the area of the target in the
* camera's field of view
*/
VisionTargetSim(const frc::Pose3d& pose, const TargetModel& model,
int objDetClassId, float objDetConf)
: fiducialId(-1),
objDetClassId(objDetClassId),
objDetConf(objDetConf),
pose(pose),
model(model) {}
/**
* Sets the pose of this target on the field.
*
* @param newPose The pose in field-relative coordinates
*/
void SetPose(const frc::Pose3d& newPose) { pose = newPose; }
/**
* Sets the model describing this target's geometry.
*
* @param newModel The model of the target
*/
void SetModel(const TargetModel& newModel) { model = newModel; }
/**
* Returns the pose of this target on the field.
*
* @return The pose in field-relative coordinates
*/
frc::Pose3d GetPose() const { return pose; }
/**
* Returns the model describing this target's geometry.
*
* @return The model of the target
*/
TargetModel GetModel() const { return model; }
/**
* Returns the fiducial ID of this target, or -1 if not a fiducial target.
*
* @return The fiducial ID
*/
int GetFiducialId() const { return fiducialId; }
/**
* Returns the object detection class ID of this target, or -1 if not an
* object detection target.
*
* @return The object detection class ID
*/
int GetObjDetClassId() const { return objDetClassId; }
/**
* Returns the object detection confidence of this target, or -1 if
* confidence is estimated from target area or is not an object.
*
* @return The object detection confidence
*/
float GetObjDetConf() const { return objDetConf; }
/**
* This target's vertices offset from its field pose.
* @return A vector of Translation3d representing the vertices of the target
*/
std::vector<frc::Translation3d> GetFieldVertices() const {
return model.GetFieldVertices(pose);
}
int fiducialId;
int objDetClassId = -1;
float objDetConf = -1;
bool operator<(const VisionTargetSim& right) const {
return pose.Translation().Norm() < right.pose.Translation().Norm();
@@ -70,6 +167,9 @@ class VisionTargetSim {
}
private:
int fiducialId;
int objDetClassId;
float objDetConf;
frc::Pose3d pose;
TargetModel model;
};

View File

@@ -601,7 +601,49 @@ class VisionSystemSimTest {
robotPose = new Pose2d(-2, -2, Rotation2d.fromDegrees(30));
visionSysSim.update(robotPose);
ambiguity = waitForSequenceNumber(camera, 2).getBestTarget().getPoseAmbiguity();
var target2 = waitForSequenceNumber(camera, 2).getBestTarget();
ambiguity = target2.getPoseAmbiguity();
assertTrue(0 < ambiguity && ambiguity < 0.2, "Tag ambiguity expected to be low");
// and prove that object detection class id/conf are -1 when we look at a tag
assertEquals(-1, target2.objDetectId);
assertEquals(-1, target2.objDetectConf);
}
@Test
public void testObjectDetection() {
var visionSysSim = new VisionSystemSim("Test");
var camera = new PhotonCamera(inst, "camera");
var cameraSim = new PhotonCameraSim(camera);
visionSysSim.addCamera(cameraSim, new Transform3d());
cameraSim.prop.setCalibration(640, 480, Rotation2d.fromDegrees(80));
cameraSim.setMinTargetAreaPixels(20.0);
final var targetPose = new Pose3d(new Translation3d(2, 0, 0), new Rotation3d(0, 0, Math.PI));
final int classId = 3;
final float conf = 0.67f;
final TargetModel ballModel = new TargetModel(Units.inchesToMeters(6));
final var ballTarget = new VisionTargetSim(targetPose, ballModel, classId, conf);
visionSysSim.addVisionTargets(ballTarget);
var robotPose = Pose2d.kZero;
visionSysSim.update(robotPose);
var target1 = waitForSequenceNumber(camera, 1).getBestTarget();
assertEquals(classId, target1.objDetectId);
assertEquals(conf, target1.objDetectConf);
assertEquals(-1, target1.fiducialId);
// much around with the target to force PhotonCameraSim::process calculate conf
visionSysSim.removeVisionTargets(ballTarget);
final float conf2 = -1;
final var ballTarget2 = new VisionTargetSim(targetPose, ballModel, classId, conf2);
visionSysSim.addVisionTargets(ballTarget2);
visionSysSim.update(robotPose);
var target2 = waitForSequenceNumber(camera, 2).getBestTarget();
assertEquals(classId, target2.objDetectId);
// 2 * sqrt(area pixels) at this particular pose
assertEquals(0.131, target2.objDetectConf, 0.01);
assertEquals(-1, target2.fiducialId);
}
}