diff --git a/hal/src/main/native/systemcore/AddressableLED.cpp b/hal/src/main/native/systemcore/AddressableLED.cpp index 0386192586..a9eb01fb93 100644 --- a/hal/src/main/native/systemcore/AddressableLED.cpp +++ b/hal/src/main/native/systemcore/AddressableLED.cpp @@ -15,6 +15,7 @@ #include #include +#include "AddressableLEDSimd.h" #include "HALInitializer.h" #include "HALInternal.h" #include "PortsInternal.h" @@ -43,6 +44,35 @@ struct AddressableLEDs { static AddressableLEDs* leds; +void ConvertAndCopyLEDData(void* dst, const struct HAL_AddressableLEDData* src, + int32_t len, HAL_AddressableLEDColorOrder order) { + using namespace hal::detail; + switch (order) { + case HAL_ALED_RGB: + std::memcpy(dst, src, len * sizeof(HAL_AddressableLEDData)); + break; + case HAL_ALED_RBG: + ConvertPixels(reinterpret_cast(src), + reinterpret_cast(dst), len); + break; + case HAL_ALED_BGR: + ConvertPixels(reinterpret_cast(src), + reinterpret_cast(dst), len); + break; + case HAL_ALED_BRG: + ConvertPixels(reinterpret_cast(src), + reinterpret_cast(dst), len); + break; + case HAL_ALED_GBR: + ConvertPixels(reinterpret_cast(src), + reinterpret_cast(dst), len); + break; + case HAL_ALED_GRB: + ConvertPixels(reinterpret_cast(src), + reinterpret_cast(dst), len); + break; + } +} } // namespace namespace hal::init { @@ -148,8 +178,7 @@ void HAL_SetAddressableLEDData(int32_t start, int32_t length, *status = PARAMETER_OUT_OF_RANGE; return; } - // TODO: handle color order - std::memcpy(&leds->s_buffer[start * 3], data, length * 3); + ConvertAndCopyLEDData(&leds->s_buffer[start * 3], data, length, colorOrder); leds->rawPub.Set(leds->s_buffer); } } // extern "C" diff --git a/hal/src/main/native/systemcore/AddressableLEDSimd.h b/hal/src/main/native/systemcore/AddressableLEDSimd.h new file mode 100644 index 0000000000..874444f5ef --- /dev/null +++ b/hal/src/main/native/systemcore/AddressableLEDSimd.h @@ -0,0 +1,224 @@ +// Copyright (c) FIRST and other WPILib contributors. +// Open Source Software; you can modify and/or share it under the terms of +// the WPILib BSD license file in the root directory of this project. + +#pragma once +#include + +#include "hal/AddressableLEDTypes.h" +#include "simd/simd.h" + +namespace hal::detail { + +constexpr size_t kPixelSize = 3; +static_assert(sizeof(HAL_AddressableLEDData) == kPixelSize); +static_assert(offsetof(HAL_AddressableLEDData, r) == 0); +static_assert(offsetof(HAL_AddressableLEDData, g) == 1); +static_assert(offsetof(HAL_AddressableLEDData, b) == 2); + +using namespace Simd::Neon; + +template +using ConvertFunc = void (*)(T); + +/* + * Conversion funtions perform in-place conversion by swapping elements. + * The names of the functions indicate the desired wire output order. + */ + +template +void ToRBG(T val) { + std::swap(val[1], val[2]); // swap G and B +} + +template +void ToBGR(T val) { + std::swap(val[0], val[2]); // swap B and R +} + +template +void ToBRG(T val) { + std::swap(val[0], val[2]); // swap R and B + std::swap(val[1], val[2]); // swap R and G +} + +template +void ToGBR(T val) { + std::swap(val[0], val[2]); // swap R and B + std::swap(val[0], val[1]); // swap B and G +} + +template +void ToGRB(T val) { + std::swap(val[0], val[1]); // swap R and G +} + +/* + * We don't use the alignment argument on the Load/Store functions (set it to + * false) because aarch64 doesn't have alignment assertions. + */ + +/** + * Copies 16 pixels from src to dst using NEON instructions, converting using + * the provided conversion function. + * @tparam the conversion function + * @param[in] src The source array + * @param[out] dst the destination array + * @pre src and dst must contain at least 48 bytes (16 pixels) + */ +template Convert> +inline void ConvertNEON_16(const uint8_t* src, uint8_t* dst) { + uint8x16x3_t pixels = Load3(src); + Convert(pixels.val); + Store3(dst, pixels); +} + +/** + * Copies 8 pixels from src to dst using NEON instructions, converting using + * the provided conversion function. + * @tparam the conversion function + * @param[in] src The source array + * @param[out] dst the destination array + * @pre src and dst must contain at least 24 bytes (8 pixels) + */ +template Convert> +inline void ConvertNEON_8(const uint8_t* src, uint8_t* dst) { + uint8x8x3_t pixels = LoadHalf3(src); + Convert(pixels.val); + Store3(dst, pixels); +} + +/** + * Copies 16 pixels from src to dst, converting from RGB to the specified order. + * @tparam order the color order to convert to + * @param[in] src The source array + * @param[out] dst the destination array + * @pre src and dst must contain at least 48 bytes (16 pixels) + */ +template +void Convert16Pixels(const uint8_t* src, uint8_t* dst) { + switch (order) { + case HAL_ALED_RBG: + ConvertNEON_16(src, dst); + break; + case HAL_ALED_BGR: + ConvertNEON_16(src, dst); + break; + case HAL_ALED_BRG: + ConvertNEON_16(src, dst); + break; + case HAL_ALED_GBR: + ConvertNEON_16(src, dst); + break; + case HAL_ALED_GRB: + ConvertNEON_16(src, dst); + break; + } +} + +/** + * Copies 8 pixels from src to dst, converting from RGB to the specified order. + * @tparam order the color order to convert to + * @param[in] src The source array + * @param[out] dst the destination array + * @pre src and dst must contain at least 24 bytes (8 pixels) + */ +template +void Convert8Pixels(const uint8_t* src, uint8_t* dst) { + switch (order) { + case HAL_ALED_RBG: + ConvertNEON_8(src, dst); + break; + case HAL_ALED_BGR: + ConvertNEON_8(src, dst); + break; + case HAL_ALED_BRG: + ConvertNEON_8(src, dst); + break; + case HAL_ALED_GBR: + ConvertNEON_8(src, dst); + break; + case HAL_ALED_GRB: + ConvertNEON_8(src, dst); + break; + } +} + +/** + * Copies 1 pixel from src to dst, converting from RGB to the specified order. + * @param[in] order the color order to convert to + * @param[in] in the source array + * @param[out] the destination array + * @pre in and out must contain at least 1 pixel (3 bytes). + */ +inline void Convert1Pixel(HAL_AddressableLEDColorOrder order, + const uint8_t* src, uint8_t* dst) { + uint8_t tmp[kPixelSize]; + std::memcpy(tmp, src, kPixelSize); // Load 3 bytes + // convert based on order + switch (order) { + case HAL_ALED_RGB: + break; // this shouldn't ever get hit but compiler + // wants this to be exhaustive + case HAL_ALED_RBG: + ToRBG(tmp); + break; + case HAL_ALED_BGR: + ToBGR(tmp); + break; + case HAL_ALED_BRG: + ToBRG(tmp); + break; + case HAL_ALED_GBR: + ToGBR(tmp); + break; + case HAL_ALED_GRB: + ToGRB(tmp); + break; + } + std::memcpy(dst, tmp, kPixelSize); // Store 3 bytes +} + +/** + * Copies len pixels from src to dst, converting from RGB to the + * specified order. + * @tparam order the color order to convert to + * @param src the source array + * @param dst the destination array + * @param pixelCount the number of pixels to convert and copy + */ +template +void ConvertPixels(const uint8_t* src, uint8_t* dst, size_t len) { + if (len >= 16) { + // Stride of 1 16-pixel conversion operation. (3 NEON Q registers) + constexpr size_t stride = A * kPixelSize; + // size of whole copy in bytes + const size_t size = len * kPixelSize; + // number of bytes we can copy with whole 16-pixel strides + const size_t aligned = Simd::AlignLo(size, stride); + for (size_t i = 0; i < aligned; i += stride) { + Convert16Pixels(src + i, dst + i); + } + if (aligned < size) { + const size_t recopyOffset = size - stride; + Convert16Pixels( + src + recopyOffset, + dst + recopyOffset); // copy last 16 pixels, possibly recopying. + } + } else if (len >= 8) { + // If len between 8 and 16, we can do 1 or 2 8-pixel copies + Convert8Pixels(src, dst); + if (len > 8) { + const size_t recopyOffset = (len - HA) * kPixelSize; + Convert8Pixels( + src + recopyOffset, + dst + recopyOffset); // copy last 8 pixels, possibly recopying + } + } else { + // Just copy pixel-by-pixel for <8 + for (size_t i = 0; i < len; i += kPixelSize) { + Convert1Pixel(order, src + i, dst + i); + } + } +} +} // namespace hal::detail diff --git a/hal/src/main/native/systemcore/simd/simd.h b/hal/src/main/native/systemcore/simd/simd.h new file mode 100644 index 0000000000..07ec891b82 --- /dev/null +++ b/hal/src/main/native/systemcore/simd/simd.h @@ -0,0 +1,173 @@ +// Copyright (c) FIRST and other WPILib contributors. +// Open Source Software; you can modify and/or share it under the terms of +// the WPILib BSD license file in the root directory of this project. + +// This file contains modified snippets from the Simd Library by Ihar Yermalayeu +// (http://ermig1979.github.io/Simd). The original source file names are listed +// above each section. +/* + * Simd Library (http://ermig1979.github.io/Simd). + * + * Copyright (c) 2011-2024 Yermalayeu Ihar. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +#include +#include + +// SimdLib.h +#define SIMD_INLINE inline __attribute__((always_inline)) + +// SimdMemory.h +namespace Simd { +SIMD_INLINE size_t AlignLo(size_t size, size_t align) { + return size & ~(align - 1); +} + +SIMD_INLINE void* AlignLo(const void* ptr, size_t align) { + return reinterpret_cast(reinterpret_cast(ptr) & ~(align - 1)); +} + +SIMD_INLINE bool Aligned(size_t size, size_t align) { + return size == AlignLo(size, align); +} + +SIMD_INLINE bool Aligned(const void* ptr, size_t align) { + return ptr == AlignLo(ptr, align); +} +} // namespace Simd +namespace Simd::Neon { +SIMD_INLINE bool Aligned(size_t size, size_t align = sizeof(uint8x16_t)) { + return Simd::Aligned(size, align); +} + +SIMD_INLINE bool Aligned(const void* ptr, size_t align = sizeof(uint8x16_t)) { + return Simd::Aligned(ptr, align); +} +} // namespace Simd::Neon + +// SimdConst.h +namespace Simd::Neon { +constexpr size_t A = sizeof(uint8x16_t); +constexpr size_t DA = 2 * A; +constexpr size_t QA = 4 * A; +constexpr size_t OA = 8 * A; +constexpr size_t HA = A / 2; +} // namespace Simd::Neon + +// SimdLoad.h +namespace Simd::Neon { +template +SIMD_INLINE uint8x8x3_t LoadHalf3(const uint8_t* p); + +template <> +SIMD_INLINE uint8x8x3_t LoadHalf3(const uint8_t* p) { +#if defined(__GNUC__) && SIMD_NEON_PREFECH_SIZE + __builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE); +#endif + return vld3_u8(p); +} + +template <> +SIMD_INLINE uint8x8x3_t LoadHalf3(const uint8_t* p) { +#if defined(__GNUC__) +#if SIMD_NEON_PREFECH_SIZE + __builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE); +#endif + uint8_t* _p = static_cast(__builtin_assume_aligned(p, 8)); + return vld3_u8(_p); +#elif defined(_MSC_VER) + return vld3_u8_ex(p, 64); +#else + return vld3_u8(p); +#endif +} +template +SIMD_INLINE uint8x16x3_t Load3(const uint8_t* p); + +template <> +SIMD_INLINE uint8x16x3_t Load3(const uint8_t* p) { +#if defined(__GNUC__) && SIMD_NEON_PREFECH_SIZE + __builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE); +#endif + return vld3q_u8(p); +} + +template <> +SIMD_INLINE uint8x16x3_t Load3(const uint8_t* p) { +#if defined(__GNUC__) +#if SIMD_NEON_PREFECH_SIZE + __builtin_prefetch(p + SIMD_NEON_PREFECH_SIZE); +#endif + uint8_t* _p = static_cast(__builtin_assume_aligned(p, 16)); + return vld3q_u8(_p); +#elif defined(_MSC_VER) + return vld3q_u8_ex(p, 128); +#else + return vld3q_u8(p); +#endif +} + +// SimdStore.h +template +SIMD_INLINE void Store3(uint8_t* p, uint8x16x3_t a); + +template <> +SIMD_INLINE void Store3(uint8_t* p, uint8x16x3_t a) { + vst3q_u8(p, a); +} + +template <> +SIMD_INLINE void Store3(uint8_t* p, uint8x16x3_t a) { +#if defined(__GNUC__) + uint8_t* _p = static_cast(__builtin_assume_aligned(p, 16)); + vst3q_u8(_p, a); +#elif defined(_MSC_VER) + vst3q_u8_ex(p, a, 128); +#else + vst3q_u8(p, a); +#endif +} + +template +SIMD_INLINE void Store3(uint8_t* p, uint8x8x3_t a); + +template <> +SIMD_INLINE void Store3(uint8_t* p, uint8x8x3_t a) { + vst3_u8(p, a); +} + +template <> +SIMD_INLINE void Store3(uint8_t* p, uint8x8x3_t a) { +#if defined(__GNUC__) + uint8_t* _p = static_cast(__builtin_assume_aligned(p, 8)); + vst3_u8(_p, a); +#elif defined(_MSC_VER) + vst3_u8_ex(p, a, 64); +#else + vst3_u8(p, a); +#endif +} + +} // namespace Simd::Neon