Resize源码详解(参考Opencv4.1)
生活随笔
收集整理的這篇文章主要介紹了
Resize源码详解(参考Opencv4.1)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
inline uint16x8_t v_pack(const uint32x4_t& a, const uint32x4_t& b)
{ uint16x4_t a1 = vqmovn_u32(a), b1 = vqmovn_u32(b); return uint16x8_t(vcombine_u16(a1, b1));
}
uint16x4_t = vqmovn_u32(const uint32x4_t)? 飽和截斷每個lane值為原來的一半
uint16x8_t?= vcombine_u16(const uint16x4_t, const uint16x4_t) 組合兩個16x4得到16x8
inline uint8x16_t v_rshr_pack_u(const int16x8_t& a, const int16x8_t& b) { uint8x8_t a1 = vqrshrun_n_s16(a, 2);uint8x8_t b1 = vqrshrun_n_s16(b, 2);return uint8x16_t(vcombine_u8(a1, b1)); }uint8x8_t = vqrshrun_n_s16(const int16x8_t, int a) 右移每個值a位,然后飽和截斷為原來的一半
inline int16x8_t v_mul_hi(const int16x8_t& a, const int16x8_t& b) {return int16x8_t(vcombine_s16(vshrn_n_s32(vmull_s16(vget_low_s16(a), vget_low_s16(b)), 16),vshrn_n_s32(vmull_s16(vget_high_s16(a), vget_high_s16(b)), 16))); }int16x4_t? = vshrn_n_s32(const int32x4_t,int a) 右移a位截斷
int32x4_t?= vmull_s16(const int16x4_t,const int16x4_t)
struct VResizeLinearVec_32s8u {int operator()(const unsigned char** _src, unsigned char* dst, const unsigned char* _beta, int width ) const{const int** src = (const int**)_src;const short* beta = (const short*)_beta;const int *S0 = src[0], *S1 = src[1];int x = 0;int16x8_t b0 = int16x8_t((vdupq_n_s16(beta[0]))), b1 = int16x8_t((vdupq_n_s16(beta[1])));if( (((size_t)S0|(size_t)S1)&(SIMD_WIDTH - 1)) == 0 )for( ; x <= width - u8_nlanes; x += u8_nlanes)vst1q_u8(dst + x, v_rshr_pack_u(v_mul_hi(v_pack(vld1q_s32(S0 + x ) >> 4, vld1q_s32(S0 + x + i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x ) >> 4, vld1q_s32(S1 + x + i32_nlanes) >> 4), b1),v_mul_hi(v_pack(vld1q_s32(S0 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S0 + x + 3 * i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S1 + x + 3 * i32_nlanes) >> 4), b1)));elsefor( ; x <= width - u8_nlanes; x += u8_nlanes)vst1q_u8(dst + x, v_rshr_pack_u(v_mul_hi(v_pack(vld1q_s32(S0 + x ) >> 4, vld1q_s32(S0 + x + i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x ) >> 4, vld1q_s32(S1 + x + i32_nlanes) >> 4), b1),v_mul_hi(v_pack(vld1q_s32(S0 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S0 + x + 3 * i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x + 2 * i32_nlanes) >> 4, vld1q_s32(S1 + x + 3 * i32_nlanes) >> 4), b1)));for( ; x < width - i16_nlanes; x += i16_nlanes)v_rshr_pack_u_store(dst + x, v_mul_hi(v_pack(vld1q_s32(S0 + x) >> 4, vld1q_s32(S0 + x + i32_nlanes) >> 4), b0) +v_mul_hi(v_pack(vld1q_s32(S1 + x) >> 4, vld1q_s32(S1 + x + i32_nlanes) >> 4), b1));return x;} };注:opencv的resize先計算horizon方向的臨時結果,然后計算v方向得到最終的值,一次計算一行的16個值,由于數據采用定點計算方式,數據流從int32x4_t加載,到乘積之后的int16x4_t,最后導入變為int8x8_t
附上別的博客對一些指令的說明:https://blog.csdn.net/fuwenyan/article/details/78811034
?
總結
以上是生活随笔為你收集整理的Resize源码详解(参考Opencv4.1)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 神经网络前向引擎内存复用技术(基于caf
- 下一篇: 目标和—leetcode494