當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

Resize源码详解(参考Opencv4.1)

發布時間：2024/4/18 编程问答 33 豆豆

生活随笔收集整理的這篇文章主要介紹了 Resize源码详解(参考Opencv4.1) 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

inline uint16x8_t v_pack(const uint32x4_t& a, const uint32x4_t& b) { uint16x4_t a1 = vqmovn_u32(a), b1 = vqmovn_u32(b); return uint16x8_t(vcombine_u16(a1, b1)); }

uint16x4_t = vqmovn_u32(const uint32x4_t)? 飽和截斷每個lane值為原來的一半

uint16x8_t?= vcombine_u16(const uint16x4_t, const uint16x4_t) 組合兩個16x4得到16x8

inline uint8x16_t v_rshr_pack_u(const int16x8_t& a, const int16x8_t& b) { uint8x8_t a1 = vqrshrun_n_s16(a, 2);uint8x8_t b1 = vqrshrun_n_s16(b, 2);return uint8x16_t(vcombine_u8(a1, b1)); }

uint8x8_t = vqrshrun_n_s16(const int16x8_t, int a) 右移每個值a位，然后飽和截斷為原來的一半

inline int16x8_t v_mul_hi(const int16x8_t& a, const int16x8_t& b) {return int16x8_t(vcombine_s16(vshrn_n_s32(vmull_s16(vget_low_s16(a), vget_low_s16(b)), 16),vshrn_n_s32(vmull_s16(vget_high_s16(a), vget_high_s16(b)), 16))); }

int16x4_t? = vshrn_n_s32(const int32x4_t,int a) 右移a位截斷

int32x4_t?= vmull_s16(const int16x4_t,const int16x4_t)

struct VResizeLinearVec_32s8u {int operator()(const unsigned char** _src, unsigned char* dst, const unsigned char* _beta, int width ) const{const int** src = (const int**)_src;const short* beta = (const short*)_beta;const int *S0 = src[0], *S1 = src[1];int x = 0;int16x8_t b0 = int16x8_t((vdupq_n_s16(beta[0]))), b1 = int16x8_t((vdupq_n_s16(beta[1])));if( (((size_t)S0|(size_t)S1)&(SIMD_WIDTH - 1)) == 0 )for( ; x <= width - u8_nlanes; x += u8_nlanes)vst1q_u8(dst + x, v_rshr_pack_u(v_mul_hi(v_pack(vld1q_s32(S0 + x ) >> 4, vld1q_s32(S0 + x + i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x ) >> 4, vld1q_s32(S1 + x + i32_nlanes) >> 4), b1),v_mul_hi(v_pack(vld1q_s32(S0 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S0 + x + 3 * i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S1 + x + 3 * i32_nlanes) >> 4), b1)));elsefor( ; x <= width - u8_nlanes; x += u8_nlanes)vst1q_u8(dst + x, v_rshr_pack_u(v_mul_hi(v_pack(vld1q_s32(S0 + x ) >> 4, vld1q_s32(S0 + x + i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x ) >> 4, vld1q_s32(S1 + x + i32_nlanes) >> 4), b1),v_mul_hi(v_pack(vld1q_s32(S0 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S0 + x + 3 * i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S1 + x + 3 * i32_nlanes) >> 4), b1)));for( ; x < width - i16_nlanes; x += i16_nlanes)v_rshr_pack_u_store(dst + x, v_mul_hi(v_pack(vld1q_s32(S0 + x) >> 4, vld1q_s32(S0 + x + i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x) >> 4, vld1q_s32(S1 + x + i32_nlanes) >> 4), b1));return x;} };

注：opencv的resize先計算horizon方向的臨時結果，然后計算v方向得到最終的值，一次計算一行的16個值，由于數據采用定點計算方式，數據流從int32x4_t加載，到乘積之后的int16x4_t，最后導入變為int8x8_t

附上別的博客對一些指令的說明：https://blog.csdn.net/fuwenyan/article/details/78811034

總結

以上是生活随笔為你收集整理的Resize源码详解(参考Opencv4.1)的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：神经网络前向引擎内存复用技术(基于caf
下一篇：目标和—leetcode494