Support selecting Object Detection models (#1359)

This PR is for part 1 of #1354. It focuses on adding a model selection
interface for models that exist in `photonvision_config/models/`. Upon
completion we can ship more than 1 model and users could upload their
own through `ssh` without deleting the shipped model. This PR also adds
the abstractions need to support more DNN backends (say OpenCV, or RPI
AI Kit)

Up next is adding a CRUD interface for managing models through the UI.
This commit is contained in:
Christopher Mahoney
2024-09-21 16:08:00 -04:00
committed by GitHub
parent 24fb6af5f4
commit 27cb69c094
20 changed files with 901 additions and 326 deletions

View File

@@ -132,7 +132,7 @@ const validNewPipelineTypes = computed(() => {
{ name: "AprilTag", value: WebsocketPipelineType.AprilTag },
{ name: "Aruco", value: WebsocketPipelineType.Aruco }
];
if (useSettingsStore().general.rknnSupported) {
if (useSettingsStore().general.supportedBackends.length > 0) {
pipelineTypes.push({ name: "Object Detection", value: WebsocketPipelineType.ObjectDetection });
}
return pipelineTypes;
@@ -170,7 +170,7 @@ const pipelineTypesWrapper = computed<{ name: string; value: number }[]>(() => {
{ name: "AprilTag", value: WebsocketPipelineType.AprilTag },
{ name: "Aruco", value: WebsocketPipelineType.Aruco }
];
if (useSettingsStore().general.rknnSupported) {
if (useSettingsStore().general.supportedBackends.length > 0) {
pipelineTypes.push({ name: "Object Detection", value: WebsocketPipelineType.ObjectDetection });
}

View File

@@ -1,14 +1,15 @@
<script setup lang="ts">
import { useCameraSettingsStore } from "@/stores/settings/CameraSettingsStore";
import { type ActivePipelineSettings, PipelineType } from "@/types/PipelineTypes";
import { type ObjectDetectionPipelineSettings, PipelineType } from "@/types/PipelineTypes";
import PvSlider from "@/components/common/pv-slider.vue";
import { computed, getCurrentInstance } from "vue";
import { useStateStore } from "@/stores/StateStore";
import { useSettingsStore } from "@/stores/settings/GeneralSettingsStore";
// TODO fix pipeline typing in order to fix this, the store settings call should be able to infer that only valid pipeline type settings are exposed based on pre-checks for the entire config section
// Defer reference to store access method
const currentPipelineSettings = computed<ActivePipelineSettings>(
() => useCameraSettingsStore().currentPipelineSettings
const currentPipelineSettings = computed<ObjectDetectionPipelineSettings>(
() => useCameraSettingsStore().currentPipelineSettings as ObjectDetectionPipelineSettings
);
// TODO fix pv-range-slider so that store access doesn't need to be deferred
@@ -27,10 +28,30 @@ const interactiveCols = computed(() =>
? 9
: 8
);
// Filters out models that are not supported by the current backend, and returns a flattened list.
const supportedModels = computed(() => {
const { availableModels, supportedBackends } = useSettingsStore().general;
return supportedBackends.flatMap((backend) => availableModels[backend] || []);
});
const selectedModel = computed({
get: () => supportedModels.value.indexOf(currentPipelineSettings.value.model),
set: (v) => {
useCameraSettingsStore().changeCurrentPipelineSetting({ model: supportedModels.value[v] }, false);
}
});
</script>
<template>
<div v-if="currentPipelineSettings.pipelineType === PipelineType.ObjectDetection">
<pv-select
v-model="selectedModel"
label="Model"
tooltip="The model used to detect objects in the camera feed"
:select-cols="interactiveCols"
:items="supportedModels"
/>
<pv-slider
v-model="currentPipelineSettings.confidence"
class="pt-2"

View File

@@ -28,7 +28,8 @@ export const useSettingsStore = defineStore("settings", {
hardwareModel: undefined,
hardwarePlatform: undefined,
mrCalWorking: true,
rknnSupported: false
availableModels: {},
supportedBackends: []
},
network: {
ntServerAddress: "",
@@ -105,7 +106,8 @@ export const useSettingsStore = defineStore("settings", {
hardwarePlatform: data.general.hardwarePlatform || undefined,
gpuAcceleration: data.general.gpuAcceleration || undefined,
mrCalWorking: data.general.mrCalWorking,
rknnSupported: data.general.rknnSupported
availableModels: data.general.availableModels || undefined,
supportedBackends: data.general.supportedBackends || []
};
this.lighting = data.lighting;
this.network = data.networkSettings;

View File

@@ -289,6 +289,7 @@ export interface ObjectDetectionPipelineSettings extends PipelineSettings {
confidence: number;
nms: number;
box_thresh: number;
model: string;
}
export type ConfigurableObjectDetectionPipelineSettings = Partial<
Omit<ObjectDetectionPipelineSettings, "pipelineType">
@@ -304,7 +305,8 @@ export const DefaultObjectDetectionPipelineSettings: ObjectDetectionPipelineSett
cameraExposureRaw: 6,
confidence: 0.9,
nms: 0.45,
box_thresh: 0.25
box_thresh: 0.25,
model: ""
};
export type ActivePipelineSettings =

View File

@@ -7,7 +7,8 @@ export interface GeneralSettings {
hardwareModel?: string;
hardwarePlatform?: string;
mrCalWorking: boolean;
rknnSupported: boolean;
availableModels: Record<string, string[]>;
supportedBackends: string[];
}
export interface MetricData {

View File

@@ -18,24 +18,63 @@
package org.photonvision.common.configuration;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import org.photonvision.common.hardware.Platform;
import org.photonvision.common.logging.LogGroup;
import org.photonvision.common.logging.Logger;
import org.photonvision.rknn.RknnJNI;
import org.photonvision.vision.objects.Model;
import org.photonvision.vision.objects.RknnModel;
/**
* Manages the loading of neural network models.
*
* <p>Models are loaded from the filesystem at the <code>modelsFolder</code> location. PhotonVision
* also supports shipping pre-trained models as resources in the JAR. If the model has already been
* extracted to the filesystem, it will not be extracted again.
*
* <p>Each model must have a corresponding <code>labels</code> file. The labels file format is
* simply a list of string names per label, one label per line. The labels file must have the same
* name as the model file, but with the suffix <code>-labels.txt</code> instead of <code>.rknn
* </code>.
*/
public class NeuralNetworkModelManager {
/** Singleton instance of the NeuralNetworkModelManager */
private static NeuralNetworkModelManager INSTANCE;
private static final Logger logger = new Logger(NeuralNetworkModelManager.class, LogGroup.Config);
private final String MODEL_NAME = "note-640-640-yolov5s.rknn";
private final RknnJNI.ModelVersion modelVersion = RknnJNI.ModelVersion.YOLO_V5;
private File defaultModelFile;
private List<String> labels;
/**
* Private constructor to prevent instantiation
*
* @return The NeuralNetworkModelManager instance
*/
private NeuralNetworkModelManager() {
ArrayList<NeuralNetworkBackend> backends = new ArrayList<>();
if (Platform.isRK3588()) {
backends.add(NeuralNetworkBackend.RKNN);
}
supportedBackends = backends;
}
/**
* Returns the singleton instance of the NeuralNetworkModelManager
*
* @return The singleton instance
*/
public static NeuralNetworkModelManager getInstance() {
if (INSTANCE == null) {
INSTANCE = new NeuralNetworkModelManager();
@@ -43,62 +82,226 @@ public class NeuralNetworkModelManager {
return INSTANCE;
}
/**
* Perform initial setup and extract default model from JAR to the filesystem
*
* @param modelsFolder Where models live
*/
public void initialize(File modelsFolder) {
var modelResourcePath = "/models/" + MODEL_NAME;
this.defaultModelFile = new File(modelsFolder, MODEL_NAME);
extractResource(modelResourcePath, defaultModelFile);
/** Logger for the NeuralNetworkModelManager */
private static final Logger logger = new Logger(NeuralNetworkModelManager.class, LogGroup.Config);
File labelsFile = new File(modelsFolder, "labels_v5.txt");
var labelResourcePath = "/models/" + labelsFile.getName();
extractResource(labelResourcePath, labelsFile);
public enum NeuralNetworkBackend {
RKNN(".rknn");
private String format;
private NeuralNetworkBackend(String format) {
this.format = format;
}
}
private final List<NeuralNetworkBackend> supportedBackends;
/**
* Retrieves the list of supported backends.
*
* @return the list
*/
public List<String> getSupportedBackends() {
return supportedBackends.stream().map(Enum::toString).toList();
}
/**
* Stores model information, such as the model file, labels, and version.
*
* <p>The first model in the list is the default model.
*/
private Map<NeuralNetworkBackend, ArrayList<Model>> models;
/**
* Retrieves the deep neural network models available, in a format that can be used by the
* frontend.
*
* @return A map containing the available models, where the key is the backend and the value is a
* list of model names.
*/
public HashMap<String, ArrayList<String>> getModels() {
HashMap<String, ArrayList<String>> modelMap = new HashMap<>();
if (models == null) {
return modelMap;
}
models.forEach(
(backend, backendModels) -> {
ArrayList<String> modelNames = new ArrayList<>();
backendModels.forEach(model -> modelNames.add(model.getName()));
modelMap.put(backend.toString(), modelNames);
});
return modelMap;
}
/**
* Retrieves the model with the specified name, assuming it is available under a supported
* backend.
*
* <p>If this method returns `Optional.of(..)` then the model should be safe to load.
*
* @param modelName the name of the model to retrieve
* @return an Optional containing the model if found, or an empty Optional if not found
*/
public Optional<Model> getModel(String modelName) {
if (models == null) {
return Optional.empty();
}
// Check if the model exists in any supported backend
for (NeuralNetworkBackend backend : supportedBackends) {
if (models.containsKey(backend)) {
Optional<Model> model =
models.get(backend).stream().filter(m -> m.getName().equals(modelName)).findFirst();
if (model.isPresent()) {
return model;
}
}
}
return Optional.empty();
}
/** The default model when no model is specified. */
public Optional<Model> getDefaultModel() {
if (models == null) {
return Optional.empty();
}
if (supportedBackends.isEmpty()) {
return Optional.empty();
}
return models.get(supportedBackends.get(0)).stream().findFirst();
}
private void loadModel(File model) {
if (models == null) {
models = new HashMap<>();
}
// Get the model extension and check if it is supported
String modelExtension = model.getName().substring(model.getName().lastIndexOf('.'));
if (modelExtension.equals(".txt")) {
return;
}
Optional<NeuralNetworkBackend> backend =
Arrays.stream(NeuralNetworkBackend.values())
.filter(b -> b.format.equals(modelExtension))
.findFirst();
if (!backend.isPresent()) {
logger.warn("Model " + model.getName() + " has an unknown extension.");
return;
}
String labels = model.getAbsolutePath().replace(backend.get().format, "-labels.txt");
if (!models.containsKey(backend.get())) {
models.put(backend.get(), new ArrayList<>());
}
try {
labels = Files.readAllLines(Paths.get(labelsFile.getPath()));
switch (backend.get()) {
case RKNN:
models.get(backend.get()).add(new RknnModel(model, labels));
logger.info(
"Loaded model " + model.getName() + " for backend " + backend.get().toString());
break;
default:
break;
}
} catch (IllegalArgumentException e) {
logger.error("Failed to load model " + model.getName(), e);
} catch (IOException e) {
logger.error("Error reading labels.txt", e);
logger.error("Failed to read labels for model " + model.getName(), e);
}
}
private void extractResource(String resourcePath, File outputFile) {
try (var in = NeuralNetworkModelManager.class.getResourceAsStream(resourcePath)) {
if (in == null) {
logger.error("Failed to find jar resource at " + resourcePath);
return;
}
/**
* Discovers DNN models from the specified folder.
*
* @param modelsFolder The folder where the models are stored
*/
public void discoverModels(File modelsFolder) {
logger.info("Supported backends: " + supportedBackends);
if (!outputFile.exists()) {
try (FileOutputStream fos = new FileOutputStream(outputFile)) {
int read = -1;
byte[] buffer = new byte[1024];
while ((read = in.read(buffer)) != -1) {
fos.write(buffer, 0, read);
if (!modelsFolder.exists()) {
logger.error("Models folder " + modelsFolder.getAbsolutePath() + " does not exist.");
return;
}
if (models == null) {
models = new HashMap<>();
}
try {
Files.walk(modelsFolder.toPath())
.filter(Files::isRegularFile)
.forEach(path -> loadModel(path.toFile()));
} catch (IOException e) {
logger.error("Failed to discover models at " + modelsFolder.getAbsolutePath(), e);
}
// After loading all of the models, sort them by name to ensure a consistent ordering
models.forEach(
(backend, backendModels) ->
backendModels.sort((a, b) -> a.getName().compareTo(b.getName())));
// Log
StringBuilder sb = new StringBuilder();
sb.append("Discovered models: ");
models.forEach(
(backend, backendModels) -> {
sb.append(backend).append(" [");
backendModels.forEach(model -> sb.append(model.getName()).append(", "));
sb.append("] ");
});
}
/**
* Extracts models from the JAR and copies them to disk.
*
* @param modelsDirectory the directory on disk to save models
*/
public void extractModels(File modelsDirectory) {
if (!modelsDirectory.exists() && !modelsDirectory.mkdirs()) {
throw new RuntimeException("Failed to create directory: " + modelsDirectory);
}
String resource = "models";
try {
String jarPath =
getClass().getProtectionDomain().getCodeSource().getLocation().toURI().getPath();
try (JarFile jarFile = new JarFile(jarPath)) {
Enumeration<JarEntry> entries = jarFile.entries();
while (entries.hasMoreElements()) {
JarEntry entry = entries.nextElement();
if (!entry.getName().startsWith(resource + "/") || entry.isDirectory()) {
continue;
}
Path outputPath =
modelsDirectory.toPath().resolve(entry.getName().substring(resource.length() + 1));
if (Files.exists(outputPath)) {
logger.info("Skipping extraction of DNN resource: " + entry.getName());
continue;
}
Files.createDirectories(outputPath.getParent());
try (InputStream inputStream = jarFile.getInputStream(entry)) {
Files.copy(inputStream, outputPath, StandardCopyOption.REPLACE_EXISTING);
logger.info("Extracted DNN resource: " + entry.getName());
} catch (IOException e) {
logger.error("Failed to extract DNN resource: " + entry.getName(), e);
}
} catch (IOException e) {
logger.error("Error extracting resource to " + outputFile.toPath().toString(), e);
}
} else {
logger.info(
"File " + outputFile.toPath().toString() + " already exists. Skipping extraction.");
}
} catch (IOException e) {
logger.error("Error finding jar resource " + resourcePath, e);
} catch (IOException | URISyntaxException e) {
logger.error("Error extracting models", e);
}
}
public File getDefaultRknnModel() {
return defaultModelFile;
}
public List<String> getLabels() {
return labels;
}
public RknnJNI.ModelVersion getModelVersion() {
return modelVersion;
}
}

View File

@@ -28,7 +28,6 @@ import org.photonvision.common.hardware.Platform;
import org.photonvision.common.networking.NetworkManager;
import org.photonvision.common.networking.NetworkUtils;
import org.photonvision.common.util.SerializationUtils;
import org.photonvision.jni.RknnDetectorJNI;
import org.photonvision.mrcal.MrCalJNILoader;
import org.photonvision.raspi.LibCameraJNILoader;
import org.photonvision.vision.calibration.UICameraCalibrationCoefficients;
@@ -139,7 +138,9 @@ public class PhotonConfiguration {
? "Zerocopy Libcamera Working"
: ""); // TODO add support for other types of GPU accel
generalSubmap.put("mrCalWorking", MrCalJNILoader.getInstance().isLoaded());
generalSubmap.put("rknnSupported", RknnDetectorJNI.getInstance().isLoaded());
generalSubmap.put("availableModels", NeuralNetworkModelManager.getInstance().getModels());
generalSubmap.put(
"supportedBackends", NeuralNetworkModelManager.getInstance().getSupportedBackends());
generalSubmap.put("hardwareModel", hardwareConfig.deviceName);
generalSubmap.put("hardwarePlatform", Platform.getPlatformName());
settingsSubmap.put("general", generalSubmap);

View File

@@ -18,20 +18,10 @@
package org.photonvision.jni;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;
import org.opencv.core.Mat;
import org.photonvision.common.logging.LogGroup;
import org.photonvision.common.logging.Logger;
import org.photonvision.common.util.TestUtils;
import org.photonvision.rknn.RknnJNI;
import org.photonvision.rknn.RknnJNI.RknnResult;
import org.photonvision.vision.pipe.impl.NeuralNetworkPipeResult;
public class RknnDetectorJNI extends PhotonJNICommon {
private static final Logger logger = new Logger(RknnDetectorJNI.class, LogGroup.General);
private boolean isLoaded;
private static RknnDetectorJNI instance = null;
@@ -60,90 +50,4 @@ public class RknnDetectorJNI extends PhotonJNICommon {
public void setLoaded(boolean state) {
isLoaded = state;
}
public static class RknnObjectDetector {
long objPointer = -1;
private List<String> labels;
private final Object lock = new Object();
private static final CopyOnWriteArrayList<RknnObjectDetector> detectors =
new CopyOnWriteArrayList<>();
static volatile boolean hook = false;
public RknnObjectDetector(String modelPath, List<String> labels, RknnJNI.ModelVersion version) {
synchronized (lock) {
objPointer = RknnJNI.create(modelPath, labels.size(), version.ordinal(), -1);
detectors.add(this);
logger.debug(
"Created detector "
+ objPointer
+ " from path "
+ modelPath
+ "! Detectors: "
+ Arrays.toString(detectors.toArray()));
}
this.labels = labels;
// the kernel should probably alredy deal with this for us, but I'm gunna be paranoid anyways.
if (!hook) {
Runtime.getRuntime()
.addShutdownHook(
new Thread(
() -> {
System.err.println("Shutdown hook rknn");
for (var d : detectors) {
d.release();
}
}));
hook = true;
}
}
public List<String> getClasses() {
return labels;
}
/**
* Detect forwards using this model
*
* @param in The image to process
* @param nmsThresh Non-maximum supression threshold. Probably should not change
* @param boxThresh Minimum confidence for a box to be added. Basically just confidence
* threshold
*/
public List<NeuralNetworkPipeResult> detect(Mat in, double nmsThresh, double boxThresh) {
RknnResult[] ret;
synchronized (lock) {
// We can technically be asked to detect and the lock might be acquired _after_ release has
// been called. This would mean objPointer would be invalid which would call everything to
// explode.
if (objPointer > 0) {
ret = RknnJNI.detect(objPointer, in.getNativeObjAddr(), nmsThresh, boxThresh);
} else {
logger.warn("Detect called after destroy -- giving up");
return List.of();
}
}
if (ret == null) {
return List.of();
}
return List.of(ret).stream()
.map(it -> new NeuralNetworkPipeResult(it.rect, it.class_id, it.conf))
.collect(Collectors.toList());
}
public void release() {
synchronized (lock) {
if (objPointer > 0) {
RknnJNI.destroy(objPointer);
detectors.remove(this);
System.out.println(
"Killed " + objPointer + "! Detectors: " + Arrays.toString(detectors.toArray()));
objPointer = -1;
} else {
logger.error("RKNN Detector has already been destroyed!");
}
}
}
}
}

View File

@@ -0,0 +1,147 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.jni;
import java.awt.Color;
import java.lang.ref.Cleaner;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import org.opencv.core.Mat;
import org.opencv.core.Size;
import org.photonvision.common.logging.LogGroup;
import org.photonvision.common.logging.Logger;
import org.photonvision.common.util.ColorHelper;
import org.photonvision.rknn.RknnJNI;
import org.photonvision.vision.objects.Letterbox;
import org.photonvision.vision.objects.ObjectDetector;
import org.photonvision.vision.objects.RknnModel;
import org.photonvision.vision.pipe.impl.NeuralNetworkPipeResult;
/** Manages an object detector using the rknn backend. */
public class RknnObjectDetector implements ObjectDetector {
private static final Logger logger = new Logger(RknnDetectorJNI.class, LogGroup.General);
/** Cleaner instance to release the detector when it goes out of scope */
private final Cleaner cleaner = Cleaner.create();
/** Atomic boolean to ensure that the native object can only be released once. */
private AtomicBoolean released = new AtomicBoolean(false);
/** Pointer to the native object */
private final long objPointer;
private final RknnModel model;
private final Size inputSize;
/** Returns the model in use by this detector. */
@Override
public RknnModel getModel() {
return model;
}
/**
* Creates a new RknnObjectDetector from the given model.
*
* @param model The model to create the detector from.
* @param inputSize The required image dimensions for the model. Images will be {@link
* Letterbox}ed to this shape.
*/
public RknnObjectDetector(RknnModel model, Size inputSize) {
this.model = model;
this.inputSize = inputSize;
// Create the detector
objPointer =
RknnJNI.create(model.modelFile.getPath(), model.labels.size(), model.version.ordinal(), -1);
if (objPointer <= 0) {
throw new RuntimeException(
"Failed to create detector from path " + model.modelFile.getPath());
}
logger.debug("Created detector for model " + model.modelFile.getName());
// Register the cleaner to release the detector when it goes out of scope
cleaner.register(this, this::release);
}
/**
* Returns the classes that the detector can detect
*
* @return The classes
*/
@Override
public List<String> getClasses() {
return model.labels;
}
/**
* Detects objects in the given input image using the RknnDetector.
*
* @param in The input image to perform object detection on.
* @param nmsThresh The threshold value for non-maximum suppression.
* @param boxThresh The threshold value for bounding box detection.
* @return A list of NeuralNetworkPipeResult objects representing the detected objects. Returns an
* empty list if the detector is not initialized or if no objects are detected.
*/
@Override
public List<NeuralNetworkPipeResult> detect(Mat in, double nmsThresh, double boxThresh) {
if (objPointer <= 0) {
// Report error and make sure to include the model name
logger.error("Detector is not initialized! Model: " + model.modelFile.getName());
return List.of();
}
// Resize the frame to the input size of the model
Mat letterboxed = new Mat();
Letterbox scale =
Letterbox.letterbox(in, letterboxed, this.inputSize, ColorHelper.colorToScalar(Color.GRAY));
if (!letterboxed.size().equals(this.inputSize)) {
throw new RuntimeException("Letterboxed frame is not the right size!");
}
// Detect objects in the letterboxed frame
var results = RknnJNI.detect(objPointer, letterboxed.getNativeObjAddr(), nmsThresh, boxThresh);
if (results == null) {
return List.of();
}
letterboxed.release();
return scale.resizeDetections(
List.of(results).stream()
.map(it -> new NeuralNetworkPipeResult(it.rect, it.class_id, it.conf))
.toList());
}
/** Thread-safe method to release the detector. */
@Override
public void release() {
// Checks if the atomic is 'false', and if so, sets it to 'true'
if (released.compareAndSet(false, true)) {
if (objPointer <= 0) {
logger.error(
"Detector is not initialized, and so it can't be released! Model: "
+ model.modelFile.getName());
return;
}
RknnJNI.destroy(objPointer);
logger.debug("Released detector for model " + model.modelFile.getName());
}
}
}

View File

@@ -0,0 +1,105 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.objects;
import java.util.ArrayList;
import java.util.List;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.Rect2d;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;
import org.photonvision.vision.pipe.impl.NeuralNetworkPipeResult;
public class Letterbox {
double dx;
double dy;
double scale;
public Letterbox(double dx, double dy, double scale) {
this.dx = dx;
this.dy = dy;
this.scale = scale;
}
/**
* Resize the frame to the new shape and "letterbox" it.
*
* <p>Letterboxing is the process of resizing an image to a new shape while maintaining the aspect
* ratio of the original image. The new image is padded with a color to fill the remaining space.
*
* @param frame
* @param letterboxed
* @param newShape
* @param color
* @return
*/
public static Letterbox letterbox(Mat frame, Mat letterboxed, Size newShape, Scalar color) {
// from https://github.com/ultralytics/yolov5/issues/8427#issuecomment-1172469631
var frameSize = frame.size();
var r = Math.min(newShape.height / frameSize.height, newShape.width / frameSize.width);
var newUnpad = new Size(Math.round(frameSize.width * r), Math.round(frameSize.height * r));
if (!(frameSize.equals(newUnpad))) {
Imgproc.resize(frame, letterboxed, newUnpad, Imgproc.INTER_LINEAR);
} else {
frame.copyTo(letterboxed);
}
var dw = newShape.width - newUnpad.width;
var dh = newShape.height - newUnpad.height;
dw /= 2;
dh /= 2;
int top = (int) (Math.round(dh - 0.1f));
int bottom = (int) (Math.round(dh + 0.1f));
int left = (int) (Math.round(dw - 0.1f));
int right = (int) (Math.round(dw + 0.1f));
Core.copyMakeBorder(
letterboxed, letterboxed, top, bottom, left, right, Core.BORDER_CONSTANT, color);
return new Letterbox(dw, dh, r);
}
/**
* Resizes the detections to the original frame size.
*
* @param unscaled The detections to resize
* @return The resized detections
*/
public List<NeuralNetworkPipeResult> resizeDetections(List<NeuralNetworkPipeResult> unscaled) {
var ret = new ArrayList<NeuralNetworkPipeResult>();
for (var t : unscaled) {
var scale = 1.0 / this.scale;
var boundingBox = t.bbox;
double x = (boundingBox.x - this.dx) * scale;
double y = (boundingBox.y - this.dy) * scale;
double width = boundingBox.width * scale;
double height = boundingBox.height * scale;
ret.add(
new NeuralNetworkPipeResult(new Rect2d(x, y, width, height), t.classIdx, t.confidence));
}
return ret;
}
}

View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.objects;
public interface Model {
public ObjectDetector load();
public String getName();
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.objects;
import java.util.List;
import org.opencv.core.Mat;
import org.photonvision.vision.pipe.impl.NeuralNetworkPipeResult;
/**
* A 'null' implementation of the {@link Model} and {@link ObjectDetector} interfaces. This is used
* when no model is available to load.
*/
public class NullModel implements Model, ObjectDetector {
// Singleton instance
public static final NullModel INSTANCE = new NullModel();
private NullModel() {}
public static NullModel getInstance() {
return INSTANCE;
}
@Override
public ObjectDetector load() {
return this;
}
@Override
public String getName() {
return "NullModel";
}
@Override
public void release() {
// Do nothing
}
@Override
public Model getModel() {
return this;
}
@Override
public List<String> getClasses() {
return List.of();
}
@Override
public List<NeuralNetworkPipeResult> detect(Mat in, double nmsThresh, double boxThresh) {
return List.of();
}
}

View File

@@ -0,0 +1,60 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.objects;
import java.util.List;
import org.opencv.core.Mat;
import org.photonvision.common.configuration.NeuralNetworkModelManager;
import org.photonvision.vision.opencv.Releasable;
import org.photonvision.vision.pipe.impl.NeuralNetworkPipeResult;
/**
* ObjectDetector lifecycle:
*
* <ol>
* <li>{@link Model}s are discovered by {@link NeuralNetworkModelManager}
* <li>{@link Model} is selected as a parameter of {@link
* org.photonvision.vision.pipe.impl.ObjectDetectionPipe ObjectDetectionPipe}
* <li>{@link Model#load()} is called to create a ObjectDetector instance
* <li>{@link ObjectDetector#detect(Mat, double, double)} is called to perform object detection
* <li>{@link ObjectDetector#release()} is called to release resources
* </ol>
*/
public interface ObjectDetector extends Releasable {
/** Returns the model that created this ObjectDetector. */
public Model getModel();
/**
* Returns the classes that the detector can detect
*
* @return The classes
*/
public List<String> getClasses();
/**
* Detects objects in the given input image. Preprocessing and postprocessing steps should be
* embedded into this call.
*
* @param in The input image to perform object detection on.
* @param nmsThresh The threshold value for non-maximum suppression.
* @param boxThresh The threshold value for bounding box detection.
* @return A list of NeuralNetworkPipeResult objects representing the detected objects. Returns an
* empty list if the detector is not initialized or if no objects are detected.
*/
public List<NeuralNetworkPipeResult> detect(Mat in, double nmsThresh, double boxThresh);
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.objects;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import org.opencv.core.Size;
import org.photonvision.jni.RknnObjectDetector;
import org.photonvision.rknn.RknnJNI;
public class RknnModel implements Model {
public final File modelFile;
public final RknnJNI.ModelVersion version;
public final List<String> labels;
public final Size inputSize;
/**
* Determines the model version based on the model's filename.
*
* <p>"yolov5" -> "YOLO_V5"
*
* <p>"yolov8" -> "YOLO_V8"
*
* @param modelName The model's filename
* @return The model version
*/
private static RknnJNI.ModelVersion getModelVersion(String modelName)
throws IllegalArgumentException {
if (modelName.contains("yolov5")) {
return RknnJNI.ModelVersion.YOLO_V5;
} else if (modelName.contains("yolov8")) {
return RknnJNI.ModelVersion.YOLO_V8;
} else {
throw new IllegalArgumentException("Unknown model version for model " + modelName);
}
}
/**
* rknn model constructor.
*
* @param modelFile path to model on disk. Format: `name-width-height-model.rknn`
* @param labels path to labels file on disk
* @throws IllegalArgumentException
*/
public RknnModel(File modelFile, String labels) throws IllegalArgumentException, IOException {
this.modelFile = modelFile;
String[] parts = modelFile.getName().split("-");
if (parts.length != 4) {
throw new IllegalArgumentException("Invalid model file name: " + modelFile);
}
this.version = getModelVersion(parts[3]);
int width = Integer.parseInt(parts[1]);
int height = Integer.parseInt(parts[2]);
this.inputSize = new Size(width, height);
try {
this.labels = Files.readAllLines(Paths.get(labels));
} catch (IOException e) {
throw new IllegalArgumentException("Failed to read labels file " + labels, e);
}
}
public String getName() {
return modelFile.getName();
}
public ObjectDetector load() {
return new RknnObjectDetector(this, inputSize);
}
}

View File

@@ -0,0 +1,75 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.pipe.impl;
import java.util.List;
import java.util.Optional;
import org.opencv.core.Mat;
import org.photonvision.common.configuration.NeuralNetworkModelManager;
import org.photonvision.vision.objects.Model;
import org.photonvision.vision.objects.NullModel;
import org.photonvision.vision.objects.ObjectDetector;
import org.photonvision.vision.opencv.CVMat;
import org.photonvision.vision.opencv.Releasable;
import org.photonvision.vision.pipe.CVPipe;
public class ObjectDetectionPipe
extends CVPipe<
CVMat, List<NeuralNetworkPipeResult>, ObjectDetectionPipe.ObjectDetectionPipeParams>
implements Releasable {
private ObjectDetector detector;
public ObjectDetectionPipe() {
Optional<Model> defaultModel = NeuralNetworkModelManager.getInstance().getDefaultModel();
detector = defaultModel.map(Model::load).orElse(NullModel.getInstance());
}
@Override
protected List<NeuralNetworkPipeResult> process(CVMat in) {
// Check if the model has changed
if (detector.getModel() != params.model) {
detector.release();
detector = params.model.load();
}
Mat frame = in.getMat();
if (frame.empty()) {
return List.of();
}
return detector.detect(in.getMat(), params.nms, params.confidence);
}
public static class ObjectDetectionPipeParams {
public double confidence;
public double nms;
public int max_detections;
public Model model;
public ObjectDetectionPipeParams() {}
}
public List<String> getClassNames() {
return detector.getClasses();
}
@Override
public void release() {
detector.release();
}
}

View File

@@ -1,153 +0,0 @@
/*
* Copyright (C) Photon Vision.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.photonvision.vision.pipe.impl;
import java.awt.Color;
import java.util.ArrayList;
import java.util.List;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.Rect2d;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.imgproc.Imgproc;
import org.photonvision.common.configuration.NeuralNetworkModelManager;
import org.photonvision.common.util.ColorHelper;
import org.photonvision.jni.RknnDetectorJNI.RknnObjectDetector;
import org.photonvision.vision.opencv.CVMat;
import org.photonvision.vision.opencv.Releasable;
import org.photonvision.vision.pipe.CVPipe;
public class RknnDetectionPipe
extends CVPipe<CVMat, List<NeuralNetworkPipeResult>, RknnDetectionPipe.RknnDetectionPipeParams>
implements Releasable {
private RknnObjectDetector detector;
public RknnDetectionPipe() {
// For now this is hard-coded to defaults. Should be refactored into set pipe
// params, though.
// And ideally a little wrapper helper for only changing native stuff on content
// change created.
this.detector =
new RknnObjectDetector(
NeuralNetworkModelManager.getInstance().getDefaultRknnModel().getAbsolutePath(),
NeuralNetworkModelManager.getInstance().getLabels(),
NeuralNetworkModelManager.getInstance().getModelVersion());
}
private static class Letterbox {
double dx;
double dy;
double scale;
public Letterbox(double dx, double dy, double scale) {
this.dx = dx;
this.dy = dy;
this.scale = scale;
}
}
@Override
protected List<NeuralNetworkPipeResult> process(CVMat in) {
var frame = in.getMat();
// Make sure we don't get a weird empty frame
if (frame.empty()) {
return List.of();
}
// letterbox
var letterboxed = new Mat();
var scale =
letterbox(frame, letterboxed, new Size(640, 640), ColorHelper.colorToScalar(Color.GRAY));
if (letterboxed.width() != 640 || letterboxed.height() != 640) {
// huh whack give up lol
throw new RuntimeException("RGA bugged but still wrong size");
}
var ret = detector.detect(letterboxed, params.nms, params.confidence);
letterboxed.release();
return resizeDetections(ret, scale);
}
private List<NeuralNetworkPipeResult> resizeDetections(
List<NeuralNetworkPipeResult> unscaled, Letterbox letterbox) {
var ret = new ArrayList<NeuralNetworkPipeResult>();
for (var t : unscaled) {
var scale = 1.0 / letterbox.scale;
var boundingBox = t.bbox;
double x = (boundingBox.x - letterbox.dx) * scale;
double y = (boundingBox.y - letterbox.dy) * scale;
double width = boundingBox.width * scale;
double height = boundingBox.height * scale;
ret.add(
new NeuralNetworkPipeResult(new Rect2d(x, y, width, height), t.classIdx, t.confidence));
}
return ret;
}
private static Letterbox letterbox(Mat frame, Mat letterboxed, Size newShape, Scalar color) {
// from https://github.com/ultralytics/yolov5/issues/8427#issuecomment-1172469631
var frameSize = frame.size();
var r = Math.min(newShape.height / frameSize.height, newShape.width / frameSize.width);
var newUnpad = new Size(Math.round(frameSize.width * r), Math.round(frameSize.height * r));
if (!(frameSize.equals(newUnpad))) {
Imgproc.resize(frame, letterboxed, newUnpad, Imgproc.INTER_LINEAR);
} else {
frame.copyTo(letterboxed);
}
var dw = newShape.width - newUnpad.width;
var dh = newShape.height - newUnpad.height;
dw /= 2;
dh /= 2;
int top = (int) (Math.round(dh - 0.1f));
int bottom = (int) (Math.round(dh + 0.1f));
int left = (int) (Math.round(dw - 0.1f));
int right = (int) (Math.round(dw + 0.1f));
Core.copyMakeBorder(
letterboxed, letterboxed, top, bottom, left, right, Core.BORDER_CONSTANT, color);
return new Letterbox(dw, dh, r);
}
public static class RknnDetectionPipeParams {
public double confidence;
public double nms;
public int max_detections;
public RknnDetectionPipeParams() {}
}
public List<String> getClassNames() {
return detector.getClasses();
}
@Override
public void release() {
detector.release();
}
}

View File

@@ -18,13 +18,17 @@
package org.photonvision.vision.pipeline;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.photonvision.common.configuration.NeuralNetworkModelManager;
import org.photonvision.vision.frame.Frame;
import org.photonvision.vision.frame.FrameThresholdType;
import org.photonvision.vision.objects.Model;
import org.photonvision.vision.objects.NullModel;
import org.photonvision.vision.opencv.DualOffsetValues;
import org.photonvision.vision.pipe.CVPipe.CVPipeResult;
import org.photonvision.vision.pipe.impl.*;
import org.photonvision.vision.pipe.impl.RknnDetectionPipe.RknnDetectionPipeParams;
import org.photonvision.vision.pipe.impl.ObjectDetectionPipe.ObjectDetectionPipeParams;
import org.photonvision.vision.pipeline.result.CVPipelineResult;
import org.photonvision.vision.target.PotentialTarget;
import org.photonvision.vision.target.TargetOrientation;
@@ -33,7 +37,7 @@ import org.photonvision.vision.target.TrackedTarget;
public class ObjectDetectionPipeline
extends CVPipeline<CVPipelineResult, ObjectDetectionPipelineSettings> {
private final CalculateFPSPipe calculateFPSPipe = new CalculateFPSPipe();
private final RknnDetectionPipe rknnPipe = new RknnDetectionPipe();
private final ObjectDetectionPipe objectDetectorPipe = new ObjectDetectionPipe();
private final SortContoursPipe sortContoursPipe = new SortContoursPipe();
private final Collect2dTargetsPipe collect2dTargetsPipe = new Collect2dTargetsPipe();
private final FilterObjectDetectionsPipe filterContoursPipe = new FilterObjectDetectionsPipe();
@@ -52,11 +56,25 @@ public class ObjectDetectionPipeline
@Override
protected void setPipeParamsImpl() {
// this needs to be based off of the current backend selected!!
var params = new RknnDetectionPipeParams();
var params = new ObjectDetectionPipeParams();
params.confidence = settings.confidence;
params.nms = settings.nms;
rknnPipe.setParams(params);
Optional<Model> selectedModel =
NeuralNetworkModelManager.getInstance().getModel(settings.model);
// If the desired model couldn't be found, log an error and try to use the default model
if (selectedModel.isEmpty()) {
selectedModel = NeuralNetworkModelManager.getInstance().getDefaultModel();
}
// If the model remains empty, use the NullModel
if (selectedModel.isEmpty()) {
selectedModel = Optional.of(NullModel.getInstance());
}
params.model = selectedModel.get();
objectDetectorPipe.setParams(params);
DualOffsetValues dualOffsetValues =
new DualOffsetValues(
@@ -97,11 +115,11 @@ public class ObjectDetectionPipeline
// ***************** change based on backend ***********************
CVPipeResult<List<NeuralNetworkPipeResult>> rknnResult = rknnPipe.run(frame.colorImage);
CVPipeResult<List<NeuralNetworkPipeResult>> rknnResult =
objectDetectorPipe.run(frame.colorImage);
sumPipeNanosElapsed += rknnResult.nanosElapsed;
List<NeuralNetworkPipeResult> targetList;
var names = rknnPipe.getClassNames();
var names = objectDetectorPipe.getClassNames();
frame.colorImage.getMat().copyTo(frame.processedImage.getMat());
@@ -130,7 +148,7 @@ public class ObjectDetectionPipeline
@Override
public void release() {
rknnPipe.release();
objectDetectorPipe.release();
super.release();
}
}

View File

@@ -17,9 +17,13 @@
package org.photonvision.vision.pipeline;
import org.photonvision.common.configuration.NeuralNetworkModelManager;
import org.photonvision.vision.objects.Model;
public class ObjectDetectionPipelineSettings extends AdvancedPipelineSettings {
public double confidence;
public double nms; // non maximal suppression
public String model;
public ObjectDetectionPipelineSettings() {
super();
@@ -30,5 +34,7 @@ public class ObjectDetectionPipelineSettings extends AdvancedPipelineSettings {
ledMode = false;
confidence = .9;
nms = .45;
model =
NeuralNetworkModelManager.getInstance().getDefaultModel().map(Model::getName).orElse("");
}
}

View File

@@ -423,6 +423,11 @@ public class Main {
ConfigManager.getInstance().load(); // init config manager
ConfigManager.getInstance().requestSave();
logger.info("Loading ML models...");
var modelManager = NeuralNetworkModelManager.getInstance();
modelManager.extractModels(ConfigManager.getInstance().getModelsDirectory());
modelManager.discoverModels(ConfigManager.getInstance().getModelsDirectory());
logger.debug("Loading HardwareManager...");
// Force load the hardware manager
HardwareManager.getInstance();
@@ -434,10 +439,6 @@ public class Main {
NetworkTablesManager.getInstance()
.setConfig(ConfigManager.getInstance().getConfig().getNetworkConfig());
logger.info("Loading ML models");
NeuralNetworkModelManager.getInstance()
.initialize(ConfigManager.getInstance().getModelsDirectory());
if (isSmoketest) {
logger.info("PhotonVision base functionality loaded -- smoketest complete");
System.exit(0);