// Copyright (c) FIRST and other WPILib contributors. // Open Source Software; you can modify and/or share it under the terms of // the WPILib BSD license file in the root directory of this project. #pragma once #include #include "hal/AddressableLEDTypes.h" #include "simd/simd.h" // Timing info // https://developer.arm.com/documentation/ddi0409/i/instruction-timing/instruction-specific-scheduling/advanced-simd-load-store-instructions?lang=en namespace hal::detail { using namespace Simd::Neon; template using ConvertFunc = void (*)(T); /* * Conversion funtions perform in-place conversion by swapping elements. * The names of the functions indicate the wire output (default GRB), * but the FPGA takes sequences of BGR_. */ template void ToRGB(T val) { std::swap(val[1], val[2]); // swap G and R } template void ToRBG(T val) { std::swap(val[1], val[2]); // swap G and R std::swap(val[0], val[2]); // swap B and G } template void ToBGR(T val) { std::swap(val[0], val[1]); // swap B and G std::swap(val[0], val[2]); // swap G and R } template void ToBRG(T val) { std::swap(val[0], val[1]); // swap B and G } template void ToGBR(T val) { std::swap(val[0], val[2]); // swap B and R } /** * Copies 16 pixels from src to dst using NEON instructions, converting using * the provided conversion function. Optimizes based on alignment of input and * output arrays specified by srcAlign and dstAlign * @tparam srcAlign whether src is aligned to the size of a NEON register (16 * bytes) * @tparam dstAlign whether dst is aligned to the size of a NEON register (16 * bytes) * @tparam the conversion function * @param[in] src The source array * @param[out] dst the destination array * @pre src and dst must contain at least 64 bytes (16 pixels) * @pre if srcAlign is true, src must be 16 byte aligned * @pre if dstAlign is true, src muts be 16 byte aligned */ template Convert> void ConvertNEON_16(const uint8_t* src, uint8_t* dst) { uint8x16x4_t pixels = Load4(src); Convert(pixels.val); Store4(dst, pixels); } /** * Copies 8 pixels from src to dst using NEON instructions, converting using * the provided conversion function. Optimizes based on alignment of input and * output arrays specified by srcAlign and dstAlign * @tparam srcAlign whether src is aligned to the size of a NEON register (16 * bytes) * @tparam dstAlign whether dst is aligned to the size of a NEON register (16 * bytes) * @tparam the conversion function * @param[in] src The source array * @param[out] dst the destination array * @pre src and dst must contain at least 32 bytes (8 pixels) * @pre if srcAlign is true, src must be 16 byte aligned * @pre if dstAlign is true, src muts be 16 byte aligned */ template Convert> void ConvertNEON_8(const uint8_t* src, uint8_t* dst) { uint8x8x4_t pixels = LoadHalf4(src); Convert(pixels.val); Store4(dst, pixels); } /** * Copies 16 pixels from src to dst, converting from GRB (wire order) to order. * Optimizes based on alignment of input and output arrays specified by srcAlign * and dstAlign * @tparam order the color order to convert to * @tparam srcAlign whether src is aligned to the size of a NEON register (16 * bytes) * @tparam dstAlign whether dst is aligned to the size of a NEON register (16 * bytes) * @param[in] src The source array * @param[out] dst the destination array * @pre src and dst must contain at least 64 bytes (16 pixels) * @pre if srcAlign is true, src must be 16 byte aligned * @pre if dstAlign is true, src muts be 16 byte aligned */ template void Convert16Pixels(const uint8_t* src, uint8_t* dst) { switch (order) { case HAL_ALED_RGB: ConvertNEON_16(src, dst); break; case HAL_ALED_RBG: ConvertNEON_16(src, dst); break; case HAL_ALED_BGR: ConvertNEON_16(src, dst); break; case HAL_ALED_BRG: ConvertNEON_16(src, dst); break; case HAL_ALED_GBR: ConvertNEON_16(src, dst); break; } } /** * Copies 8 pixels from src to dst, converting from GRB (wire order) to order. * Optimizes based on alignment of input and output arrays specified by srcAlign * and dstAlign * @tparam order the color order to convert to * @tparam srcAlign whether src is aligned to the size of a NEON register (16 * bytes) * @tparam dstAlign whether dst is aligned to the size of a NEON register (16 * bytes) * @param[in] src The source array * @param[out] dst the destination array * @pre src and dst must contain at least 32 bytes (8 pixels) * @pre if srcAlign is true, src must be 16 byte aligned * @pre if dstAlign is true, src muts be 16 byte aligned */ template void Convert8Pixels(const uint8_t* src, uint8_t* dst) { switch (order) { case HAL_ALED_RGB: ConvertNEON_8(src, dst); break; case HAL_ALED_RBG: ConvertNEON_8(src, dst); break; case HAL_ALED_BGR: ConvertNEON_8(src, dst); break; case HAL_ALED_BRG: ConvertNEON_8(src, dst); break; case HAL_ALED_GBR: ConvertNEON_8(src, dst); break; } } /** * Copies 1 pixel from src to dst, converting from RGB to the specified order. * @param[in] order the color order to convert to * @param[in] in the source array * @param[out] the destination array * @pre in and out must contain at least 1 pixel (4 bytes). */ void Convert1Pixel(HAL_AddressableLEDColorOrder order, const uint8_t* src, uint8_t* dst) { uint8_t tmp[4]; std::memcpy(tmp, src, 4); // Load 4 bytes // convert based on order switch (order) { case HAL_ALED_RGB: ToRGB(tmp); break; case HAL_ALED_RBG: ToRBG(tmp); break; case HAL_ALED_BGR: ToBGR(tmp); break; case HAL_ALED_BRG: ToBRG(tmp); break; case HAL_ALED_GBR: ToGBR(tmp); break; case HAL_ALED_GRB: break; // this shouldn't ever get hit but compiler // wants this to be exhaustive } std::memcpy(dst, tmp, 4); // Store 4 bytes } /** * Copies len pixels from src to dst, converting from GRB (wire order) to order. * Optimizes based on alignment of input and output arrays specified by srcAlign * and dstAlign * @tparam order the color order to convert to * @tparam srcAlign whether src is aligned to the size of a NEON register (16 * bytes) * @tparam dstAlign whether dst is aligned to the size of a NEON register (16 * bytes) * @param[in] src The source array * @param[out] dst the destination array * @param[in] len the size (in pixels, len = (size in bytes) / 4) * @pre src and dst must have at least len*4 capacity in bytes * @pre if srcAlign is true, src must be 16 byte aligned * @pre if dstAlign is true, src muts be 16 byte aligned */ template void ConvertPixels(const uint8_t* src, uint8_t* dst, size_t len) { if (len >= 16) { constexpr size_t A4 = A * 4; // Stride of 1 16-pixel conversion operation. (4 NEON registers) size_t size = len * 4; size_t aligned = Simd::AlignLo( size, A4); // number of bytes we can copy with whole 16-pixel strides for (size_t i = 0; i < aligned; i += A4) { Convert16Pixels(src + i, dst + i); } if (aligned < size) { Convert16Pixels( src + size - A4, dst + size - A4); // copy last 16 pixels, possibly recopying. } } else if (len >= 8) { // If len between 8 and 16, we can do 1 or 2 8-pixel copies Convert8Pixels(src, dst); if (len > 8) { size_t recopyOffset = (len * 4) - (HA * 4); Convert8Pixels( src + recopyOffset, dst + recopyOffset); // copy last 8 pixels, possibly recopying } } else { // Just copy pixel-by-pixel for <8 for (size_t i = 0; i < len; i += 4) { Convert1Pixel(order, src + i, dst + i); } } } /** * Copies pixelCount pixels from src to dst, converting from RGB to the * specified order * @tparam order the color order to convert to * @param src the source array * @param dst the destination array * @param pixelCount the number of pixels to convert and copy */ template void ConvertPixels(const uint8_t* src, uint8_t* dst, size_t pixelCount) { if (Aligned(src) && Aligned(dst)) { ConvertPixels(src, dst, pixelCount); } else if (Aligned(src)) { ConvertPixels(src, dst, pixelCount); } else if (Aligned(dst)) { ConvertPixels(src, dst, pixelCount); } else { ConvertPixels(src, dst, pixelCount); } } } // namespace hal::detail