python 参加某图像去噪比赛有感
python 參加某圖像去噪比賽有感
- 用之前的去噪圖鎮文
- 一、體會
- 二、收獲
- 三、經驗分享(部分源碼展示與注釋)
- 3.1 輸入
- 3.2 網絡
- 3.3 損失函數
- 3.4 傳統濾波方法
- 四、主要參考鏈接
用之前的去噪圖鎮文
一、體會
本菜雞本科畢設在FPGA上搞過圖像濾波等算法,研究生期間雖然搞的是基于深度學習的圖形學,但是主干網絡用的還是卷積… 感覺自己代碼能力還可以,基礎還行,參賽之前還是比較自信的:
覺著看幾篇頂會去噪的文章,復現借鑒一下應該能取得一個不錯的結果,但是-------大概1000+人參賽,一多半沒有提交的或者只提交個baseline,本菜最終100+ 額還沒結束 明天結束了估計排名快接近200了 實在卷不動了
主要有以下三點問題:
雖然知道自己菜,但還是希望嘗試一下。 吐槽結束,進入正題::::
二、收獲
談一下收獲,雖然困難挺多,但是收獲也很多
三、經驗分享(部分源碼展示與注釋)
3.1 輸入
圖片是要切片的,一整張圖太大了,網絡稍大點,32G的顯卡也會爆顯存
把一張圖分塊為多個圖,偽代碼如下:
3.2 網絡
我的網絡主要借鑒的思想:
1.不直接學習端到端的像素值,而是學習噪聲(網絡更容易擬合?)
2.使用通道可分離的卷積,適當增加通道數(顯存太小,跑起來速度很慢)
3.嘗試增加卷積核大小(顯存太小,跑起來速度很慢)
(比賽有模型大小限制)–增大通道和卷積核都會增加顯存的使用,設備不行,故只增了通道數。具體的實現細節如下:
純純的Unet baseline修改而來
class Unet2(nn.Module):def __init__(self, dim=4):super(Unet2, self).__init__()self.dims = [32, 64, 128, 256, 512]self.ks = [3, 3, 3, 3, 3]self.dims_up = self.dims[::-1]self.ks_up = self.ks[-2::-1]self.first_block = Block2(dim, self.dims[0], self.ks[0])self.first_pool = nn.MaxPool2d(kernel_size=2) # AvgPool2d pnsr: 37.683, ssim: 0.902, score: 30.679, time: 52.650for i, dim_in in enumerate(self.dims[:-2]):dim_out = self.dims[i+1]setattr(self, 'Block{}'.format(i), Block2(dim_in, dim_out, k=self.ks[i+1]))setattr(self, 'pool{}'.format(i), nn.MaxPool2d(kernel_size=2))self.conv_mid = Block2(self.dims[-2], self.dims[-1], self.ks[-1])for i, dim_in in enumerate(self.dims_up[:-1]):dim_out = self.dims_up[i+1]setattr(self, 'ConvTrans{}'.format(i), nn.ConvTranspose2d(dim_in, dim_out, 2, stride=2, bias=True))setattr(self, 'up_Block{}'.format(i), Block2(dim_in, dim_out, k=self.ks_up[i]))self.last_conv = nn.Conv2d(self.dims[0], dim, 1, bias=True)def forward(self, x):n, c, h, w = x.shapeh_pad = 32 - h % 32 if not h % 32 == 0 else 0w_pad = 32 - w % 32 if not w % 32 == 0 else 0padded_image = F.pad(x, (0, w_pad, 0, h_pad), 'replicate')list_pools = []x_bk = x# 1.first Blockx = self.first_block(padded_image)list_pools.append(x)x = self.first_pool(x)# 2.Blocksfor i, dim_in in enumerate(self.dims[:-2]):x = getattr(self, 'Block{}'.format(i))(x)list_pools.append(x)x = getattr(self, 'pool{}'.format(i))(x)x = self.conv_mid(x)for i, dim_in in enumerate(self.dims_up[:-1]):x = getattr(self, 'ConvTrans{}'.format(i))(x)# tmp = list_pools.pop()x = torch.cat([x, list_pools.pop()], 1)x = getattr(self, 'up_Block{}'.format(i))(x)# 3.lastx = self.last_conv(x)out = x[:, :, :h, :w] + x_bkreturn outclass Block2(nn.Module):def __init__(self, dim_in, dim_out, k=3):super(Block2, self).__init__()self.conv1 = nn.Conv2d(dim_in, dim_in, kernel_size=k, padding=k // 2, padding_mode='zeros', bias=True)self.conv2 = nn.Conv2d(dim_in, dim_out, kernel_size=k, padding=k // 2, padding_mode='zeros', bias=True)def forward(self, x):x = self.conv1(x)x = self.leaky_relu(x)x = self.conv2(x)x = self.leaky_relu(x)return xdef leaky_relu(self, x, a=0.2):out = torch.max(a * x, x)return out我使用的網絡 魔改ConvNet
class Our(nn.Module):def __init__(self, dim=4):super(Our, self).__init__()self.dims = [128, 256, 512, 1024]self.ks = [3, 3, 3, 3]# 內存不夠啊# self.dims = [16, 32, 64, 128, 256]# self.ks = [23, 23, 23, 17, 3]######################################self.dims_up = self.dims[::-1]self.ks_up = self.ks[-2::-1]self.first_block = Block(dim, self.dims[0], self.ks[0])self.first_pool = nn.MaxPool2d(kernel_size=2)for i, dim_in in enumerate(self.dims[:-2]):dim_out = self.dims[i+1]setattr(self, 'Block{}'.format(i), Block(dim_in, dim_out, k=self.ks[i+1]))setattr(self, 'pool{}'.format(i), nn.MaxPool2d(kernel_size=2))self.conv_mid = Block(self.dims[-2], self.dims[-1], self.ks[-1])for i, dim_in in enumerate(self.dims_up[:-1]):dim_out = self.dims_up[i+1]setattr(self, 'ConvTrans{}'.format(i), nn.ConvTranspose2d(dim_in, dim_out, 2, stride=2))setattr(self, 'up_Block{}'.format(i), Block(dim_in, dim_out, k=self.ks_up[i]))self.last_ln = nn.LayerNorm(self.dims[0], eps=1e-6)self.last_conv = nn.Linear(self.dims[0], dim)def forward(self, x):n, c, h, w = x.shapeh_pad = 32 - h % 32 if not h % 32 == 0 else 0w_pad = 32 - w % 32 if not w % 32 == 0 else 0padded_image = F.pad(x, (0, w_pad, 0, h_pad), 'replicate')list_pools = []x_bk = x# 1.first Blockx = self.first_block(padded_image)list_pools.append(x)x = self.first_pool(x)# 2.Blocksfor i, dim_in in enumerate(self.dims[:-2]):x = getattr(self, 'Block{}'.format(i))(x)list_pools.append(x)x = getattr(self, 'pool{}'.format(i))(x)x = self.conv_mid(x)for i, dim_in in enumerate(self.dims_up[:-1]):x = getattr(self, 'ConvTrans{}'.format(i))(x)# tmp = list_pools.pop()x = torch.cat([x, list_pools.pop()], 1)x = getattr(self, 'up_Block{}'.format(i))(x)# 3.lastx = x.permute(0, 2, 3, 1).contiguous()x = self.last_ln(x)x = self.last_conv(x)x = x.permute(0, 3, 1, 2).contiguous()out = x[:, :, :h, :w] + x_bkreturn outclass Block(nn.Module):def __init__(self, dim_in, dim_out, k=9):super(Block, self).__init__()self.conv = nn.Conv2d(dim_in, dim_in, groups=dim_in, kernel_size=k, padding=k // 2)self.ln = nn.LayerNorm(dim_in,eps=1e-6)self.conv1x1up = nn.Linear(dim_in, dim_in * 2) #nn.Conv2d(dim, dim * 2, 1)self.act = nn.GELU()self.conv1x1dn = nn.Linear(dim_in * 2, dim_out) #nn.Conv2d(dim * 2, dim, 1)self.w = nn.Parameter(torch.zeros(1))# resself.res_conv = nn.Conv2d(dim_in, dim_out, 1)def forward(self, x):identity = xx = self.conv(x)x = x.permute(0, 2, 3, 1).contiguous()x = self.ln(x)x = self.conv1x1up(x)x = self.act(x)x = self.conv1x1dn(x)x = x.permute(0, 3, 1, 2).contiguous()x = x * self.wx = x + self.res_conv(identity)return x3.3 損失函數
loss = torch.nn.L1Loss()實測了一下,還是L1效果好啊
其它L2、SSIM之類的花里胡哨的效果并不理想 (畢竟是煉丹,可能只是不適合我的網絡)
3.4 傳統濾波方法
哈、我還試了一下傳統的去噪,順便使用純python寫了一個雙邊濾波(參考我以前matlab的代碼),不得不說,還是深度學習yyds!
def bilateral_filter(img):# 參考自己博客 matlab的實現 https://blog.csdn.net/qq_38204686/article/details/106929922r = 20 # 窗口半徑 核大小為 2*r + 1sigma_space = 15.0 # 空間標準差sigma_color = 10.0 # 相似標準差w_space = np.zeros((2*r + 1, 2*r + 1))for i in range(-r-1, r):for j in range(-r-1, r):tmp = i * i + j * jw_space[i + r+1, j + r+1] = np.exp(-float(tmp) / (2 * sigma_space * sigma_space))w_color = np.zeros((1, 256))for i in range(256):w_color[0, i] = np.exp(-float(i * i) / (2 * sigma_color * sigma_color))# 開始濾波height, width, channel = img.shapedst_img = img.copy()for h in range(r, height - r):# s = time.time() 0.3sfor w in range(r, height - r):for c in range(channel): # 通道遍歷p_c = img[h, w, c] # 像素值p_win = img[h-r:h+r+1, w-r:w+r+1, c] # 窗口內所有像素c_w = np.abs(p_win - p_c).astype(int)c_w = w_color[0, c_w]w_tmp = w_space * c_wp_sum = p_win * w_tmpp_sum = np.sum(p_sum) / np.sum(w_tmp)dst_img[h, w, c] = p_sumreturn dst_img四、主要參考鏈接
- https://zhuanlan.zhihu.com/p/455913104 (ConvNeXt: A ConvNet for the 2020s)
- https://zhuanlan.zhihu.com/p/349644858 (如何白嫖GPU)
- https://blog.csdn.net/u011447962/article/details/123510680 (CVPR 2022 | RepLKNet)
- https://github.com/gbstack/CVPR-2022-papers#SG (CVPR2022 Papers (Papers/Codes/Demos))
總結
以上是生活随笔為你收集整理的python 参加某图像去噪比赛有感的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: QQ音乐源更换问题
- 下一篇: websocket python爬虫_p