mirror of
https://github.com/wpilibsuite/allwpilib
synced 2026-06-19 00:41:43 +00:00
[hal] AddressableLED: Restore alternative color order support (#8130)
Unlike armv7, aarch64 doesn't have alignment assertions for SIMD instructions. The compiler output between the aligned and unaligned variants is the same.
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
#include <networktables/NetworkTableInstance.h>
|
||||
#include <networktables/RawTopic.h>
|
||||
|
||||
#include "AddressableLEDSimd.h"
|
||||
#include "HALInitializer.h"
|
||||
#include "HALInternal.h"
|
||||
#include "PortsInternal.h"
|
||||
@@ -43,6 +44,35 @@ struct AddressableLEDs {
|
||||
|
||||
static AddressableLEDs* leds;
|
||||
|
||||
void ConvertAndCopyLEDData(void* dst, const struct HAL_AddressableLEDData* src,
|
||||
int32_t len, HAL_AddressableLEDColorOrder order) {
|
||||
using namespace hal::detail;
|
||||
switch (order) {
|
||||
case HAL_ALED_RGB:
|
||||
std::memcpy(dst, src, len * sizeof(HAL_AddressableLEDData));
|
||||
break;
|
||||
case HAL_ALED_RBG:
|
||||
ConvertPixels<HAL_ALED_RBG>(reinterpret_cast<const uint8_t*>(src),
|
||||
reinterpret_cast<uint8_t*>(dst), len);
|
||||
break;
|
||||
case HAL_ALED_BGR:
|
||||
ConvertPixels<HAL_ALED_BGR>(reinterpret_cast<const uint8_t*>(src),
|
||||
reinterpret_cast<uint8_t*>(dst), len);
|
||||
break;
|
||||
case HAL_ALED_BRG:
|
||||
ConvertPixels<HAL_ALED_BRG>(reinterpret_cast<const uint8_t*>(src),
|
||||
reinterpret_cast<uint8_t*>(dst), len);
|
||||
break;
|
||||
case HAL_ALED_GBR:
|
||||
ConvertPixels<HAL_ALED_GBR>(reinterpret_cast<const uint8_t*>(src),
|
||||
reinterpret_cast<uint8_t*>(dst), len);
|
||||
break;
|
||||
case HAL_ALED_GRB:
|
||||
ConvertPixels<HAL_ALED_GRB>(reinterpret_cast<const uint8_t*>(src),
|
||||
reinterpret_cast<uint8_t*>(dst), len);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace hal::init {
|
||||
@@ -148,8 +178,7 @@ void HAL_SetAddressableLEDData(int32_t start, int32_t length,
|
||||
*status = PARAMETER_OUT_OF_RANGE;
|
||||
return;
|
||||
}
|
||||
// TODO: handle color order
|
||||
std::memcpy(&leds->s_buffer[start * 3], data, length * 3);
|
||||
ConvertAndCopyLEDData(&leds->s_buffer[start * 3], data, length, colorOrder);
|
||||
leds->rawPub.Set(leds->s_buffer);
|
||||
}
|
||||
} // extern "C"
|
||||
|
||||
224
hal/src/main/native/systemcore/AddressableLEDSimd.h
Normal file
224
hal/src/main/native/systemcore/AddressableLEDSimd.h
Normal file
@@ -0,0 +1,224 @@
|
||||
// Copyright (c) FIRST and other WPILib contributors.
|
||||
// Open Source Software; you can modify and/or share it under the terms of
|
||||
// the WPILib BSD license file in the root directory of this project.
|
||||
|
||||
#pragma once
|
||||
#include <utility>
|
||||
|
||||
#include "hal/AddressableLEDTypes.h"
|
||||
#include "simd/simd.h"
|
||||
|
||||
namespace hal::detail {
|
||||
|
||||
constexpr size_t kPixelSize = 3;
|
||||
static_assert(sizeof(HAL_AddressableLEDData) == kPixelSize);
|
||||
static_assert(offsetof(HAL_AddressableLEDData, r) == 0);
|
||||
static_assert(offsetof(HAL_AddressableLEDData, g) == 1);
|
||||
static_assert(offsetof(HAL_AddressableLEDData, b) == 2);
|
||||
|
||||
using namespace Simd::Neon;
|
||||
|
||||
template <typename T>
|
||||
using ConvertFunc = void (*)(T);
|
||||
|
||||
/*
|
||||
* Conversion funtions perform in-place conversion by swapping elements.
|
||||
* The names of the functions indicate the desired wire output order.
|
||||
*/
|
||||
|
||||
template <typename T>
|
||||
void ToRBG(T val) {
|
||||
std::swap(val[1], val[2]); // swap G and B
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ToBGR(T val) {
|
||||
std::swap(val[0], val[2]); // swap B and R
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ToBRG(T val) {
|
||||
std::swap(val[0], val[2]); // swap R and B
|
||||
std::swap(val[1], val[2]); // swap R and G
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ToGBR(T val) {
|
||||
std::swap(val[0], val[2]); // swap R and B
|
||||
std::swap(val[0], val[1]); // swap B and G
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ToGRB(T val) {
|
||||
std::swap(val[0], val[1]); // swap R and G
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't use the alignment argument on the Load/Store functions (set it to
|
||||
* false) because aarch64 doesn't have alignment assertions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Copies 16 pixels from src to dst using NEON instructions, converting using
|
||||
* the provided conversion function.
|
||||
* @tparam the conversion function
|
||||
* @param[in] src The source array
|
||||
* @param[out] dst the destination array
|
||||
* @pre src and dst must contain at least 48 bytes (16 pixels)
|
||||
*/
|
||||
template <ConvertFunc<uint8x16_t*> Convert>
|
||||
inline void ConvertNEON_16(const uint8_t* src, uint8_t* dst) {
|
||||
uint8x16x3_t pixels = Load3<false>(src);
|
||||
Convert(pixels.val);
|
||||
Store3<false>(dst, pixels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies 8 pixels from src to dst using NEON instructions, converting using
|
||||
* the provided conversion function.
|
||||
* @tparam the conversion function
|
||||
* @param[in] src The source array
|
||||
* @param[out] dst the destination array
|
||||
* @pre src and dst must contain at least 24 bytes (8 pixels)
|
||||
*/
|
||||
template <ConvertFunc<uint8x8_t*> Convert>
|
||||
inline void ConvertNEON_8(const uint8_t* src, uint8_t* dst) {
|
||||
uint8x8x3_t pixels = LoadHalf3<false>(src);
|
||||
Convert(pixels.val);
|
||||
Store3<false>(dst, pixels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies 16 pixels from src to dst, converting from RGB to the specified order.
|
||||
* @tparam order the color order to convert to
|
||||
* @param[in] src The source array
|
||||
* @param[out] dst the destination array
|
||||
* @pre src and dst must contain at least 48 bytes (16 pixels)
|
||||
*/
|
||||
template <HAL_AddressableLEDColorOrder order>
|
||||
void Convert16Pixels(const uint8_t* src, uint8_t* dst) {
|
||||
switch (order) {
|
||||
case HAL_ALED_RBG:
|
||||
ConvertNEON_16<ToRBG>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_BGR:
|
||||
ConvertNEON_16<ToBGR>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_BRG:
|
||||
ConvertNEON_16<ToBRG>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_GBR:
|
||||
ConvertNEON_16<ToGBR>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_GRB:
|
||||
ConvertNEON_16<ToGRB>(src, dst);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies 8 pixels from src to dst, converting from RGB to the specified order.
|
||||
* @tparam order the color order to convert to
|
||||
* @param[in] src The source array
|
||||
* @param[out] dst the destination array
|
||||
* @pre src and dst must contain at least 24 bytes (8 pixels)
|
||||
*/
|
||||
template <HAL_AddressableLEDColorOrder order>
|
||||
void Convert8Pixels(const uint8_t* src, uint8_t* dst) {
|
||||
switch (order) {
|
||||
case HAL_ALED_RBG:
|
||||
ConvertNEON_8<ToRBG>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_BGR:
|
||||
ConvertNEON_8<ToBGR>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_BRG:
|
||||
ConvertNEON_8<ToBRG>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_GBR:
|
||||
ConvertNEON_8<ToGBR>(src, dst);
|
||||
break;
|
||||
case HAL_ALED_GRB:
|
||||
ConvertNEON_8<ToGRB>(src, dst);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies 1 pixel from src to dst, converting from RGB to the specified order.
|
||||
* @param[in] order the color order to convert to
|
||||
* @param[in] in the source array
|
||||
* @param[out] the destination array
|
||||
* @pre in and out must contain at least 1 pixel (3 bytes).
|
||||
*/
|
||||
inline void Convert1Pixel(HAL_AddressableLEDColorOrder order,
|
||||
const uint8_t* src, uint8_t* dst) {
|
||||
uint8_t tmp[kPixelSize];
|
||||
std::memcpy(tmp, src, kPixelSize); // Load 3 bytes
|
||||
// convert based on order
|
||||
switch (order) {
|
||||
case HAL_ALED_RGB:
|
||||
break; // this shouldn't ever get hit but compiler
|
||||
// wants this to be exhaustive
|
||||
case HAL_ALED_RBG:
|
||||
ToRBG(tmp);
|
||||
break;
|
||||
case HAL_ALED_BGR:
|
||||
ToBGR(tmp);
|
||||
break;
|
||||
case HAL_ALED_BRG:
|
||||
ToBRG(tmp);
|
||||
break;
|
||||
case HAL_ALED_GBR:
|
||||
ToGBR(tmp);
|
||||
break;
|
||||
case HAL_ALED_GRB:
|
||||
ToGRB(tmp);
|
||||
break;
|
||||
}
|
||||
std::memcpy(dst, tmp, kPixelSize); // Store 3 bytes
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies len pixels from src to dst, converting from RGB to the
|
||||
* specified order.
|
||||
* @tparam order the color order to convert to
|
||||
* @param src the source array
|
||||
* @param dst the destination array
|
||||
* @param pixelCount the number of pixels to convert and copy
|
||||
*/
|
||||
template <HAL_AddressableLEDColorOrder order>
|
||||
void ConvertPixels(const uint8_t* src, uint8_t* dst, size_t len) {
|
||||
if (len >= 16) {
|
||||
// Stride of 1 16-pixel conversion operation. (3 NEON Q registers)
|
||||
constexpr size_t stride = A * kPixelSize;
|
||||
// size of whole copy in bytes
|
||||
const size_t size = len * kPixelSize;
|
||||
// number of bytes we can copy with whole 16-pixel strides
|
||||
const size_t aligned = Simd::AlignLo(size, stride);
|
||||
for (size_t i = 0; i < aligned; i += stride) {
|
||||
Convert16Pixels<order>(src + i, dst + i);
|
||||
}
|
||||
if (aligned < size) {
|
||||
const size_t recopyOffset = size - stride;
|
||||
Convert16Pixels<order>(
|
||||
src + recopyOffset,
|
||||
dst + recopyOffset); // copy last 16 pixels, possibly recopying.
|
||||
}
|
||||
} else if (len >= 8) {
|
||||
// If len between 8 and 16, we can do 1 or 2 8-pixel copies
|
||||
Convert8Pixels<order>(src, dst);
|
||||
if (len > 8) {
|
||||
const size_t recopyOffset = (len - HA) * kPixelSize;
|
||||
Convert8Pixels<order>(
|
||||
src + recopyOffset,
|
||||
dst + recopyOffset); // copy last 8 pixels, possibly recopying
|
||||
}
|
||||
} else {
|
||||
// Just copy pixel-by-pixel for <8
|
||||
for (size_t i = 0; i < len; i += kPixelSize) {
|
||||
Convert1Pixel(order, src + i, dst + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace hal::detail
|
||||
173
hal/src/main/native/systemcore/simd/simd.h
Normal file
173
hal/src/main/native/systemcore/simd/simd.h
Normal file
@@ -0,0 +1,173 @@
|
||||
// Copyright (c) FIRST and other WPILib contributors.
|
||||
// Open Source Software; you can modify and/or share it under the terms of
|
||||
// the WPILib BSD license file in the root directory of this project.
|
||||
|
||||
// This file contains modified snippets from the Simd Library by Ihar Yermalayeu
|
||||
// (http://ermig1979.github.io/Simd). The original source file names are listed
|
||||
// above each section.
|
||||
/*
|
||||
* Simd Library (http://ermig1979.github.io/Simd).
|
||||
*
|
||||
* Copyright (c) 2011-2024 Yermalayeu Ihar.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
|
||||
// SimdLib.h
|
||||
#define SIMD_INLINE inline __attribute__((always_inline))
|
||||
|
||||
// SimdMemory.h
|
||||
namespace Simd {
|
||||
SIMD_INLINE size_t AlignLo(size_t size, size_t align) {
|
||||
return size & ~(align - 1);
|
||||
}
|
||||
|
||||
SIMD_INLINE void* AlignLo(const void* ptr, size_t align) {
|
||||
return reinterpret_cast<void*>(reinterpret_cast<size_t>(ptr) & ~(align - 1));
|
||||
}
|
||||
|
||||
SIMD_INLINE bool Aligned(size_t size, size_t align) {
|
||||
return size == AlignLo(size, align);
|
||||
}
|
||||
|
||||
SIMD_INLINE bool Aligned(const void* ptr, size_t align) {
|
||||
return ptr == AlignLo(ptr, align);
|
||||
}
|
||||
} // namespace Simd
|
||||
namespace Simd::Neon {
|
||||
SIMD_INLINE bool Aligned(size_t size, size_t align = sizeof(uint8x16_t)) {
|
||||
return Simd::Aligned(size, align);
|
||||
}
|
||||
|
||||
SIMD_INLINE bool Aligned(const void* ptr, size_t align = sizeof(uint8x16_t)) {
|
||||
return Simd::Aligned(ptr, align);
|
||||
}
|
||||
} // namespace Simd::Neon
|
||||
|
||||
// SimdConst.h
|
||||
namespace Simd::Neon {
|
||||
constexpr size_t A = sizeof(uint8x16_t);
|
||||
constexpr size_t DA = 2 * A;
|
||||
constexpr size_t QA = 4 * A;
|
||||
constexpr size_t OA = 8 * A;
|
||||
constexpr size_t HA = A / 2;
|
||||
} // namespace Simd::Neon
|
||||
|
||||
// SimdLoad.h
|
||||
namespace Simd::Neon {
|
||||
template <bool align>
|
||||
SIMD_INLINE uint8x8x3_t LoadHalf3(const uint8_t* p);
|
||||
|
||||
template <>
|
||||
SIMD_INLINE uint8x8x3_t LoadHalf3<false>(const uint8_t* p) {
|
||||
#if defined(__GNUC__) && SIMD_NEON_PREFECH_SIZE
|
||||
__builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE);
|
||||
#endif
|
||||
return vld3_u8(p);
|
||||
}
|
||||
|
||||
template <>
|
||||
SIMD_INLINE uint8x8x3_t LoadHalf3<true>(const uint8_t* p) {
|
||||
#if defined(__GNUC__)
|
||||
#if SIMD_NEON_PREFECH_SIZE
|
||||
__builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE);
|
||||
#endif
|
||||
uint8_t* _p = static_cast<uint8_t*>(__builtin_assume_aligned(p, 8));
|
||||
return vld3_u8(_p);
|
||||
#elif defined(_MSC_VER)
|
||||
return vld3_u8_ex(p, 64);
|
||||
#else
|
||||
return vld3_u8(p);
|
||||
#endif
|
||||
}
|
||||
template <bool align>
|
||||
SIMD_INLINE uint8x16x3_t Load3(const uint8_t* p);
|
||||
|
||||
template <>
|
||||
SIMD_INLINE uint8x16x3_t Load3<false>(const uint8_t* p) {
|
||||
#if defined(__GNUC__) && SIMD_NEON_PREFECH_SIZE
|
||||
__builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE);
|
||||
#endif
|
||||
return vld3q_u8(p);
|
||||
}
|
||||
|
||||
template <>
|
||||
SIMD_INLINE uint8x16x3_t Load3<true>(const uint8_t* p) {
|
||||
#if defined(__GNUC__)
|
||||
#if SIMD_NEON_PREFECH_SIZE
|
||||
__builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE);
|
||||
#endif
|
||||
uint8_t* _p = static_cast<uint8_t*>(__builtin_assume_aligned(p, 16));
|
||||
return vld3q_u8(_p);
|
||||
#elif defined(_MSC_VER)
|
||||
return vld3q_u8_ex(p, 128);
|
||||
#else
|
||||
return vld3q_u8(p);
|
||||
#endif
|
||||
}
|
||||
|
||||
// SimdStore.h
|
||||
template <bool align>
|
||||
SIMD_INLINE void Store3(uint8_t* p, uint8x16x3_t a);
|
||||
|
||||
template <>
|
||||
SIMD_INLINE void Store3<false>(uint8_t* p, uint8x16x3_t a) {
|
||||
vst3q_u8(p, a);
|
||||
}
|
||||
|
||||
template <>
|
||||
SIMD_INLINE void Store3<true>(uint8_t* p, uint8x16x3_t a) {
|
||||
#if defined(__GNUC__)
|
||||
uint8_t* _p = static_cast<uint8_t*>(__builtin_assume_aligned(p, 16));
|
||||
vst3q_u8(_p, a);
|
||||
#elif defined(_MSC_VER)
|
||||
vst3q_u8_ex(p, a, 128);
|
||||
#else
|
||||
vst3q_u8(p, a);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <bool align>
|
||||
SIMD_INLINE void Store3(uint8_t* p, uint8x8x3_t a);
|
||||
|
||||
template <>
|
||||
SIMD_INLINE void Store3<false>(uint8_t* p, uint8x8x3_t a) {
|
||||
vst3_u8(p, a);
|
||||
}
|
||||
|
||||
template <>
|
||||
SIMD_INLINE void Store3<true>(uint8_t* p, uint8x8x3_t a) {
|
||||
#if defined(__GNUC__)
|
||||
uint8_t* _p = static_cast<uint8_t*>(__builtin_assume_aligned(p, 8));
|
||||
vst3_u8(_p, a);
|
||||
#elif defined(_MSC_VER)
|
||||
vst3_u8_ex(p, a, 64);
|
||||
#else
|
||||
vst3_u8(p, a);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Simd::Neon
|
||||
Reference in New Issue
Block a user