feat(componets): 新增多个图像处理模块
- 添加 DynamicFilter 模块,用于频域滤波和动态调整滤波器权重 - 添加 SCSA 模块,探索空间注意力和通道注意力之间的协同作用 - 添加 SMFA 模块,自调制特征聚合用于高效图像重建 - 添加 TIAM 模块,时空交互注意力用于变化检测 - 添加 UFFC 模块,无偏快速傅里叶卷积用于图像修复 - 更新 net.py,引入 SCSA 模块替换原有注意力机制 - 优化 train.py,调整导入模块以支持新功能
This commit is contained in:
parent
c023c0801d
commit
ece5f30c2d
116
componets/DynamicFilter(频域模块动态滤波器用于CV2维图像).py
Normal file
116
componets/DynamicFilter(频域模块动态滤波器用于CV2维图像).py
Normal file
@ -0,0 +1,116 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from timm.layers.helpers import to_2tuple
|
||||
|
||||
"""
|
||||
配备多头自注意力 (MHSA) 的模型在计算机视觉方面取得了显着的性能。它们的计算复杂度与输入特征图中的二次像素数成正比,导致处理速度缓慢,尤其是在处理高分辨率图像时。
|
||||
为了规避这个问题,提出了一种新型的代币混合器作为MHSA的替代方案:基于FFT的代币混合器涉及类似于MHSA的全局操作,但计算复杂度较低。
|
||||
在这里,我们提出了一种名为动态过滤器的新型令牌混合器以缩小上述差距。
|
||||
DynamicFilter 模块通过频域滤波和动态调整滤波器权重,能够对图像进行复杂的增强和处理。
|
||||
"""
|
||||
|
||||
class StarReLU(nn.Module):
|
||||
"""
|
||||
StarReLU: s * relu(x) ** 2 + b
|
||||
"""
|
||||
|
||||
def __init__(self, scale_value=1.0, bias_value=0.0,
|
||||
scale_learnable=True, bias_learnable=True,
|
||||
mode=None, inplace=False):
|
||||
super().__init__()
|
||||
self.inplace = inplace
|
||||
self.relu = nn.ReLU(inplace=inplace)
|
||||
self.scale = nn.Parameter(scale_value * torch.ones(1),
|
||||
requires_grad=scale_learnable)
|
||||
self.bias = nn.Parameter(bias_value * torch.ones(1),
|
||||
requires_grad=bias_learnable)
|
||||
|
||||
def forward(self, x):
|
||||
return self.scale * self.relu(x) ** 2 + self.bias
|
||||
|
||||
class Mlp(nn.Module):
|
||||
""" MLP as used in MetaFormer models, eg Transformer, MLP-Mixer, PoolFormer, MetaFormer baslines and related networks.
|
||||
Mostly copied from timm.
|
||||
"""
|
||||
|
||||
def __init__(self, dim, mlp_ratio=4, out_features=None, act_layer=StarReLU, drop=0.,
|
||||
bias=False, **kwargs):
|
||||
super().__init__()
|
||||
in_features = dim
|
||||
out_features = out_features or in_features
|
||||
hidden_features = int(mlp_ratio * in_features)
|
||||
drop_probs = to_2tuple(drop)
|
||||
|
||||
self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
|
||||
self.act = act_layer()
|
||||
self.drop1 = nn.Dropout(drop_probs[0])
|
||||
self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
|
||||
self.drop2 = nn.Dropout(drop_probs[1])
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc1(x)
|
||||
x = self.act(x)
|
||||
x = self.drop1(x)
|
||||
x = self.fc2(x)
|
||||
x = self.drop2(x)
|
||||
return x
|
||||
|
||||
|
||||
class DynamicFilter(nn.Module):
|
||||
def __init__(self, dim, expansion_ratio=2, reweight_expansion_ratio=.25,
|
||||
act1_layer=StarReLU, act2_layer=nn.Identity,
|
||||
bias=False, num_filters=4, size=14, weight_resize=False,
|
||||
**kwargs):
|
||||
super().__init__()
|
||||
size = to_2tuple(size)
|
||||
self.size = size[0]
|
||||
self.filter_size = size[1] // 2 + 1
|
||||
self.num_filters = num_filters
|
||||
self.dim = dim
|
||||
self.med_channels = int(expansion_ratio * dim)
|
||||
self.weight_resize = weight_resize
|
||||
self.pwconv1 = nn.Linear(dim, self.med_channels, bias=bias)
|
||||
self.act1 = act1_layer()
|
||||
self.reweight = Mlp(dim, reweight_expansion_ratio, num_filters * self.med_channels)
|
||||
self.complex_weights = nn.Parameter(
|
||||
torch.randn(self.size, self.filter_size, num_filters, 2,
|
||||
dtype=torch.float32) * 0.02)
|
||||
self.act2 = act2_layer()
|
||||
self.pwconv2 = nn.Linear(self.med_channels, dim, bias=bias)
|
||||
|
||||
def forward(self, x):
|
||||
B, H, W, _ = x.shape
|
||||
|
||||
routeing = self.reweight(x.mean(dim=(1, 2))).view(B, self.num_filters,
|
||||
-1).softmax(dim=1)
|
||||
x = self.pwconv1(x)
|
||||
x = self.act1(x)
|
||||
x = x.to(torch.float32)
|
||||
x = torch.fft.rfft2(x, dim=(1, 2), norm='ortho')
|
||||
|
||||
if self.weight_resize:
|
||||
complex_weights = resize_complex_weight(self.complex_weights, x.shape[1],
|
||||
x.shape[2])
|
||||
complex_weights = torch.view_as_complex(complex_weights.contiguous())
|
||||
else:
|
||||
complex_weights = torch.view_as_complex(self.complex_weights)
|
||||
routeing = routeing.to(torch.complex64)
|
||||
weight = torch.einsum('bfc,hwf->bhwc', routeing, complex_weights)
|
||||
if self.weight_resize:
|
||||
weight = weight.view(-1, x.shape[1], x.shape[2], self.med_channels)
|
||||
else:
|
||||
weight = weight.view(-1, self.size, self.filter_size, self.med_channels)
|
||||
x = x * weight
|
||||
x = torch.fft.irfft2(x, s=(H, W), dim=(1, 2), norm='ortho')
|
||||
|
||||
x = self.act2(x)
|
||||
x = self.pwconv2(x)
|
||||
return x
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
block = DynamicFilter(32, size=64) # size==H,W
|
||||
input = torch.rand(3, 64, 64, 32)
|
||||
output = block(input)
|
||||
print(input.size())
|
||||
print(output.size())
|
0
componets/SCSA(CV2维图像).pdf
Normal file
0
componets/SCSA(CV2维图像).pdf
Normal file
156
componets/SCSA.py
Normal file
156
componets/SCSA.py
Normal file
@ -0,0 +1,156 @@
|
||||
import typing as t
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from einops.einops import rearrange
|
||||
from mmengine.model import BaseModule
|
||||
__all__ = ['SCSA']
|
||||
|
||||
"""SCSA:探索空间注意力和通道注意力之间的协同作用
|
||||
通道和空间注意力分别在为各种下游视觉任务提取特征依赖性和空间结构关系方面带来了显着的改进。
|
||||
虽然它们的结合更有利于发挥各自的优势,但通道和空间注意力之间的协同作用尚未得到充分探索,缺乏充分利用多语义信息的协同潜力来进行特征引导和缓解语义差异。
|
||||
我们的研究试图在多个语义层面揭示空间和通道注意力之间的协同关系,提出了一种新颖的空间和通道协同注意力模块(SCSA)。我们的SCSA由两部分组成:可共享的多语义空间注意力(SMSA)和渐进式通道自注意力(PCSA)。
|
||||
SMSA 集成多语义信息并利用渐进式压缩策略将判别性空间先验注入 PCSA 的通道自注意力中,有效地指导通道重新校准。此外,PCSA 中基于自注意力机制的稳健特征交互进一步缓解了 SMSA 中不同子特征之间多语义信息的差异。
|
||||
我们在七个基准数据集上进行了广泛的实验,包括 ImageNet-1K 上的分类、MSCOCO 2017 上的对象检测、ADE20K 上的分割以及其他四个复杂场景检测数据集。我们的结果表明,我们提出的 SCSA 不仅超越了当前最先进的注意力机制,
|
||||
而且在各种任务场景中表现出增强的泛化能力。
|
||||
"""
|
||||
|
||||
class SCSA(BaseModule):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dim: int,
|
||||
head_num: int,
|
||||
window_size: int = 7,
|
||||
group_kernel_sizes: t.List[int] = [3, 5, 7, 9],
|
||||
qkv_bias: bool = False,
|
||||
fuse_bn: bool = False,
|
||||
norm_cfg: t.Dict = dict(type='BN'),
|
||||
act_cfg: t.Dict = dict(type='ReLU'),
|
||||
down_sample_mode: str = 'avg_pool',
|
||||
attn_drop_ratio: float = 0.,
|
||||
gate_layer: str = 'sigmoid',
|
||||
):
|
||||
super(SCSA, self).__init__()
|
||||
self.dim = dim
|
||||
self.head_num = head_num
|
||||
self.head_dim = dim // head_num
|
||||
self.scaler = self.head_dim ** -0.5
|
||||
self.group_kernel_sizes = group_kernel_sizes
|
||||
self.window_size = window_size
|
||||
self.qkv_bias = qkv_bias
|
||||
self.fuse_bn = fuse_bn
|
||||
self.down_sample_mode = down_sample_mode
|
||||
|
||||
assert self.dim // 4, 'The dimension of input feature should be divisible by 4.'
|
||||
self.group_chans = group_chans = self.dim // 4
|
||||
|
||||
self.local_dwc = nn.Conv1d(group_chans, group_chans, kernel_size=group_kernel_sizes[0],
|
||||
padding=group_kernel_sizes[0] // 2, groups=group_chans)
|
||||
self.global_dwc_s = nn.Conv1d(group_chans, group_chans, kernel_size=group_kernel_sizes[1],
|
||||
padding=group_kernel_sizes[1] // 2, groups=group_chans)
|
||||
self.global_dwc_m = nn.Conv1d(group_chans, group_chans, kernel_size=group_kernel_sizes[2],
|
||||
padding=group_kernel_sizes[2] // 2, groups=group_chans)
|
||||
self.global_dwc_l = nn.Conv1d(group_chans, group_chans, kernel_size=group_kernel_sizes[3],
|
||||
padding=group_kernel_sizes[3] // 2, groups=group_chans)
|
||||
self.sa_gate = nn.Softmax(dim=2) if gate_layer == 'softmax' else nn.Sigmoid()
|
||||
self.norm_h = nn.GroupNorm(4, dim)
|
||||
self.norm_w = nn.GroupNorm(4, dim)
|
||||
|
||||
self.conv_d = nn.Identity()
|
||||
self.norm = nn.GroupNorm(1, dim)
|
||||
self.q = nn.Conv2d(in_channels=dim, out_channels=dim, kernel_size=1, bias=qkv_bias, groups=dim)
|
||||
self.k = nn.Conv2d(in_channels=dim, out_channels=dim, kernel_size=1, bias=qkv_bias, groups=dim)
|
||||
self.v = nn.Conv2d(in_channels=dim, out_channels=dim, kernel_size=1, bias=qkv_bias, groups=dim)
|
||||
self.attn_drop = nn.Dropout(attn_drop_ratio)
|
||||
self.ca_gate = nn.Softmax(dim=1) if gate_layer == 'softmax' else nn.Sigmoid()
|
||||
|
||||
if window_size == -1:
|
||||
self.down_func = nn.AdaptiveAvgPool2d((1, 1))
|
||||
else:
|
||||
if down_sample_mode == 'recombination':
|
||||
self.down_func = self.space_to_chans
|
||||
# dimensionality reduction
|
||||
self.conv_d = nn.Conv2d(in_channels=dim * window_size ** 2, out_channels=dim, kernel_size=1, bias=False)
|
||||
elif down_sample_mode == 'avg_pool':
|
||||
self.down_func = nn.AvgPool2d(kernel_size=(window_size, window_size), stride=window_size)
|
||||
elif down_sample_mode == 'max_pool':
|
||||
self.down_func = nn.MaxPool2d(kernel_size=(window_size, window_size), stride=window_size)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
The dim of x is (B, C, H, W)
|
||||
"""
|
||||
# Spatial attention priority calculation
|
||||
b, c, h_, w_ = x.size()
|
||||
# (B, C, H)
|
||||
x_h = x.mean(dim=3)
|
||||
l_x_h, g_x_h_s, g_x_h_m, g_x_h_l = torch.split(x_h, self.group_chans, dim=1)
|
||||
# (B, C, W)
|
||||
x_w = x.mean(dim=2)
|
||||
l_x_w, g_x_w_s, g_x_w_m, g_x_w_l = torch.split(x_w, self.group_chans, dim=1)
|
||||
|
||||
x_h_attn = self.sa_gate(self.norm_h(torch.cat((
|
||||
self.local_dwc(l_x_h),
|
||||
self.global_dwc_s(g_x_h_s),
|
||||
self.global_dwc_m(g_x_h_m),
|
||||
self.global_dwc_l(g_x_h_l),
|
||||
), dim=1)))
|
||||
x_h_attn = x_h_attn.view(b, c, h_, 1)
|
||||
|
||||
x_w_attn = self.sa_gate(self.norm_w(torch.cat((
|
||||
self.local_dwc(l_x_w),
|
||||
self.global_dwc_s(g_x_w_s),
|
||||
self.global_dwc_m(g_x_w_m),
|
||||
self.global_dwc_l(g_x_w_l)
|
||||
), dim=1)))
|
||||
x_w_attn = x_w_attn.view(b, c, 1, w_)
|
||||
|
||||
x = x * x_h_attn * x_w_attn
|
||||
|
||||
# Channel attention based on self attention
|
||||
# reduce calculations
|
||||
y = self.down_func(x)
|
||||
y = self.conv_d(y)
|
||||
_, _, h_, w_ = y.size()
|
||||
|
||||
# normalization first, then reshape -> (B, H, W, C) -> (B, C, H * W) and generate q, k and v
|
||||
y = self.norm(y)
|
||||
q = self.q(y)
|
||||
k = self.k(y)
|
||||
v = self.v(y)
|
||||
# (B, C, H, W) -> (B, head_num, head_dim, N)
|
||||
q = rearrange(q, 'b (head_num head_dim) h w -> b head_num head_dim (h w)', head_num=int(self.head_num),
|
||||
head_dim=int(self.head_dim))
|
||||
k = rearrange(k, 'b (head_num head_dim) h w -> b head_num head_dim (h w)', head_num=int(self.head_num),
|
||||
head_dim=int(self.head_dim))
|
||||
v = rearrange(v, 'b (head_num head_dim) h w -> b head_num head_dim (h w)', head_num=int(self.head_num),
|
||||
head_dim=int(self.head_dim))
|
||||
|
||||
# (B, head_num, head_dim, head_dim)
|
||||
attn = q @ k.transpose(-2, -1) * self.scaler
|
||||
attn = self.attn_drop(attn.softmax(dim=-1))
|
||||
# (B, head_num, head_dim, N)
|
||||
attn = attn @ v
|
||||
# (B, C, H_, W_)
|
||||
attn = rearrange(attn, 'b head_num head_dim (h w) -> b (head_num head_dim) h w', h=int(h_), w=int(w_))
|
||||
# (B, C, 1, 1)
|
||||
attn = attn.mean((2, 3), keepdim=True)
|
||||
attn = self.ca_gate(attn)
|
||||
return attn * x
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
block = SCSA(
|
||||
dim=256,
|
||||
head_num=8,
|
||||
)
|
||||
|
||||
input_tensor = torch.rand(1, 256, 32, 32)
|
||||
|
||||
# 调用模块进行前向传播
|
||||
output_tensor = block(input_tensor)
|
||||
|
||||
# 打印输入和输出张量的大小
|
||||
print("Input size:", input_tensor.size())
|
||||
print("Output size:", output_tensor.size())
|
65
componets/SMFA.py
Normal file
65
componets/SMFA.py
Normal file
@ -0,0 +1,65 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
"""ECCV2024(https://github.com/Zheng-MJ/SMFANet)
|
||||
基于Transformer的恢复方法取得了显著的效果,因为Transformer的自注意力机制(SA)可以探索非局部信息,从而实现更好的高分辨率图像重建。然而,关键的点积自注意力需要大量的计算资源,这限制了其在低功耗设备上的应用。
|
||||
此外,自注意力机制的低通滤波特性限制了其捕捉局部细节的能力,从而导致重建结果过于平滑。为了解决这些问题,我们提出了一种自调制特征聚合(SMFA)模块,协同利用局部和非局部特征交互,以实现更精确的重建。
|
||||
具体而言,SMFA模块采用了高效的自注意力近似(EASA)分支来建模非局部信息,并使用局部细节估计(LDE)分支来捕捉局部细节。此外,我们还引入了基于部分卷积的前馈网络(PCFN),以进一步优化从SMFA提取的代表性特征。
|
||||
大量实验表明,所提出的SMFANet系列在公共基准数据集上实现了更好的重建性能与计算效率的平衡。
|
||||
特别是,与SwinIR-light的×4放大相比,SMFANet+在五个公共测试集上的平均性能提高了0.14dB,运行速度提升了约10倍,且模型复杂度(如FLOPs)仅为其约43%。
|
||||
"""
|
||||
|
||||
class DMlp(nn.Module):
|
||||
def __init__(self, dim, growth_rate=2.0):
|
||||
super().__init__()
|
||||
hidden_dim = int(dim * growth_rate)
|
||||
self.conv_0 = nn.Sequential(
|
||||
nn.Conv2d(dim, hidden_dim, 3, 1, 1, groups=dim),
|
||||
nn.Conv2d(hidden_dim, hidden_dim, 1, 1, 0)
|
||||
)
|
||||
self.act = nn.GELU()
|
||||
self.conv_1 = nn.Conv2d(hidden_dim, dim, 1, 1, 0)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv_0(x)
|
||||
x = self.act(x)
|
||||
x = self.conv_1(x)
|
||||
return x
|
||||
|
||||
|
||||
class SMFA(nn.Module):
|
||||
def __init__(self, dim=36):
|
||||
super(SMFA, self).__init__()
|
||||
self.linear_0 = nn.Conv2d(dim, dim * 2, 1, 1, 0)
|
||||
self.linear_1 = nn.Conv2d(dim, dim, 1, 1, 0)
|
||||
self.linear_2 = nn.Conv2d(dim, dim, 1, 1, 0)
|
||||
|
||||
self.lde = DMlp(dim, 2)
|
||||
|
||||
self.dw_conv = nn.Conv2d(dim, dim, 3, 1, 1, groups=dim)
|
||||
|
||||
self.gelu = nn.GELU()
|
||||
self.down_scale = 8
|
||||
|
||||
self.alpha = nn.Parameter(torch.ones((1, dim, 1, 1)))
|
||||
self.belt = nn.Parameter(torch.zeros((1, dim, 1, 1)))
|
||||
|
||||
def forward(self, f):
|
||||
_, _, h, w = f.shape
|
||||
y, x = self.linear_0(f).chunk(2, dim=1)
|
||||
x_s = self.dw_conv(F.adaptive_max_pool2d(x, (h // self.down_scale, w // self.down_scale)))
|
||||
x_v = torch.var(x, dim=(-2, -1), keepdim=True)
|
||||
x_l = x * F.interpolate(self.gelu(self.linear_1(x_s * self.alpha + x_v * self.belt)), size=(h, w),
|
||||
mode='nearest')
|
||||
y_d = self.lde(y)
|
||||
return self.linear_2(x_l + y_d)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
block = SMFA(dim=36)
|
||||
input = torch.randn(3, 36, 64, 64)
|
||||
output = block(input)
|
||||
print(input.size())
|
||||
print(output.size())
|
110
componets/TIAM.py
Normal file
110
componets/TIAM.py
Normal file
@ -0,0 +1,110 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
"""Elsevier2024
|
||||
变化检测 (CD) 是地球观测中一种重要的监测方法,尤其适用于土地利用分析、城市管理和灾害损失评估。然而,在星座互联和空天协作时代,感兴趣区域 (ROI) 的变化由于几何透视旋转和时间风格差异而导致许多错误检测。
|
||||
为了应对这些挑战,我们引入了 CDNeXt,该框架阐明了一种稳健而有效的方法,用于将基于预训练主干的 Siamese 网络与用于遥感图像的创新时空交互注意模块 (TIAM) 相结合。
|
||||
CDNeXt 可分为四个主要组件:编码器、交互器、解码器和检测器。值得注意的是,由 TIAM 提供支持的交互器从编码器提取的二进制时间特征中查询和重建空间透视依赖关系和时间风格相关性,以扩大 ROI 变化的差异。
|
||||
最后,检测器集成解码器生成的分层特征,随后生成二进制变化掩码。
|
||||
"""
|
||||
|
||||
class SpatiotemporalAttentionFullNotWeightShared(nn.Module):
|
||||
def __init__(self, in_channels, inter_channels=None, dimension=2, sub_sample=False):
|
||||
super(SpatiotemporalAttentionFullNotWeightShared, self).__init__()
|
||||
assert dimension in [2, ]
|
||||
self.dimension = dimension
|
||||
self.sub_sample = sub_sample
|
||||
self.in_channels = in_channels
|
||||
self.inter_channels = inter_channels
|
||||
|
||||
if self.inter_channels is None:
|
||||
self.inter_channels = in_channels // 2
|
||||
if self.inter_channels == 0:
|
||||
self.inter_channels = 1
|
||||
|
||||
self.g1 = nn.Sequential(
|
||||
nn.BatchNorm2d(self.in_channels),
|
||||
nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
)
|
||||
self.g2 = nn.Sequential(
|
||||
nn.BatchNorm2d(self.in_channels),
|
||||
nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
|
||||
kernel_size=1, stride=1, padding=0),
|
||||
)
|
||||
|
||||
self.W1 = nn.Sequential(
|
||||
nn.Conv2d(in_channels=self.inter_channels, out_channels=self.in_channels,
|
||||
kernel_size=1, stride=1, padding=0),
|
||||
nn.BatchNorm2d(self.in_channels)
|
||||
)
|
||||
self.W2 = nn.Sequential(
|
||||
nn.Conv2d(in_channels=self.inter_channels, out_channels=self.in_channels,
|
||||
kernel_size=1, stride=1, padding=0),
|
||||
nn.BatchNorm2d(self.in_channels)
|
||||
)
|
||||
self.theta = nn.Sequential(
|
||||
nn.BatchNorm2d(self.in_channels),
|
||||
nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
|
||||
kernel_size=1, stride=1, padding=0),
|
||||
)
|
||||
self.phi = nn.Sequential(
|
||||
nn.BatchNorm2d(self.in_channels),
|
||||
nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
|
||||
kernel_size=1, stride=1, padding=0),
|
||||
)
|
||||
|
||||
def forward(self, x1, x2):
|
||||
"""
|
||||
:param x: (b, c, h, w)
|
||||
:param return_nl_map: if True return z, nl_map, else only return z.
|
||||
:return:
|
||||
"""
|
||||
batch_size = x1.size(0)
|
||||
g_x11 = self.g1(x1).reshape(batch_size, self.inter_channels, -1)
|
||||
g_x12 = g_x11.permute(0, 2, 1)
|
||||
g_x21 = self.g2(x2).reshape(batch_size, self.inter_channels, -1)
|
||||
g_x22 = g_x21.permute(0, 2, 1)
|
||||
|
||||
theta_x1 = self.theta(x1).reshape(batch_size, self.inter_channels, -1)
|
||||
theta_x2 = theta_x1.permute(0, 2, 1)
|
||||
|
||||
phi_x1 = self.phi(x2).reshape(batch_size, self.inter_channels, -1)
|
||||
phi_x2 = phi_x1.permute(0, 2, 1)
|
||||
|
||||
energy_time_1 = torch.matmul(theta_x1, phi_x2)
|
||||
energy_time_2 = energy_time_1.permute(0, 2, 1)
|
||||
energy_space_1 = torch.matmul(theta_x2, phi_x1)
|
||||
energy_space_2 = energy_space_1.permute(0, 2, 1)
|
||||
|
||||
energy_time_1s = F.softmax(energy_time_1, dim=-1)
|
||||
energy_time_2s = F.softmax(energy_time_2, dim=-1)
|
||||
energy_space_2s = F.softmax(energy_space_1, dim=-2)
|
||||
energy_space_1s = F.softmax(energy_space_2, dim=-2)
|
||||
# C1*S(C2) energy_time_1s * C1*H1W1 g_x12 * energy_space_1s S(H2W2)*H1W1 -> C1*H1W1
|
||||
y1 = torch.matmul(torch.matmul(energy_time_2s, g_x11), energy_space_2s).contiguous() # C2*H2W2
|
||||
# C2*S(C1) energy_time_2s * C2*H2W2 g_x21 * energy_space_2s S(H1W1)*H2W2 -> C2*H2W2
|
||||
y2 = torch.matmul(torch.matmul(energy_time_1s, g_x21), energy_space_1s).contiguous() # C1*H1W1
|
||||
y1 = y1.reshape(batch_size, self.inter_channels, *x2.size()[2:])
|
||||
y2 = y2.reshape(batch_size, self.inter_channels, *x1.size()[2:])
|
||||
return x1 + self.W1(y1), x2 + self.W2(y2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
in_channels = 64
|
||||
batch_size = 8
|
||||
height = 32
|
||||
width = 32
|
||||
|
||||
block = SpatiotemporalAttentionFullNotWeightShared(in_channels=in_channels)
|
||||
|
||||
input1 = torch.rand(batch_size, in_channels, height, width)
|
||||
input2 = torch.rand(batch_size, in_channels, height, width)
|
||||
|
||||
output1, output2 = block(input1, input2)
|
||||
|
||||
print(f"Input1 size: {input1.size()}")
|
||||
print(f"Input2 size: {input2.size()}")
|
||||
print(f"Output1 size: {output1.size()}")
|
||||
print(f"Output2 size: {output2.size()}")
|
0
componets/UFFC(CV2维任务).pdf
Normal file
0
componets/UFFC(CV2维任务).pdf
Normal file
123
componets/UFFC.py
Normal file
123
componets/UFFC.py
Normal file
@ -0,0 +1,123 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
"""ICCV2023
|
||||
最近提出的图像修复方法 LaMa 以快速傅里叶卷积 (FFC) 为基础构建了其网络,该网络最初是为图像分类等高级视觉任务而提出的。
|
||||
FFC 使全卷积网络在其早期层中拥有全局感受野。得益于 FFC 模块的独特特性,LaMa 能够生成稳健的重复纹理,
|
||||
这是以前的修复方法无法实现的。但是,原始 FFC 模块是否适合图像修复等低级视觉任务?
|
||||
在本文中,我们分析了在图像修复中使用 FFC 的基本缺陷,即 1) 频谱偏移、2) 意外的空间激活和 3) 频率感受野有限。
|
||||
这些缺陷使得基于 FFC 的修复框架难以生成复杂纹理并执行完美重建。
|
||||
基于以上分析,我们提出了一种新颖的无偏快速傅里叶卷积 (UFFC) 模块,该模块通过
|
||||
1) 范围变换和逆变换、2) 绝对位置嵌入、3) 动态跳过连接和 4) 自适应剪辑对原始 FFC 模块进行了修改,以克服这些缺陷,
|
||||
实现更好的修复效果。在多个基准数据集上进行的大量实验证明了我们方法的有效性,在纹理捕捉能力和表现力方面均优于最先进的方法。
|
||||
"""
|
||||
|
||||
class FourierUnit_modified(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, out_channels, groups=1, spatial_scale_factor=None, spatial_scale_mode='bilinear',
|
||||
spectral_pos_encoding=False, use_se=False, ffc3d=False, fft_norm='ortho'):
|
||||
# bn_layer not used
|
||||
super(FourierUnit_modified, self).__init__()
|
||||
self.groups = groups
|
||||
|
||||
self.input_shape = 32 # change!!!!!it!!!!!!manually!!!!!!
|
||||
self.in_channels = in_channels
|
||||
|
||||
self.locMap = nn.Parameter(torch.rand(self.input_shape, self.input_shape // 2 + 1))
|
||||
|
||||
self.lambda_base = nn.Parameter(torch.tensor(0.), requires_grad=True)
|
||||
|
||||
self.conv_layer_down55 = torch.nn.Conv2d(in_channels=in_channels * 2 + 1, # +1 for locmap
|
||||
out_channels=out_channels * 2,
|
||||
kernel_size=1, stride=1, padding=0, dilation=1, groups=self.groups,
|
||||
bias=False, padding_mode='reflect')
|
||||
self.conv_layer_down55_shift = torch.nn.Conv2d(in_channels=in_channels * 2 + 1, # +1 for locmap
|
||||
out_channels=out_channels * 2,
|
||||
kernel_size=3, stride=1, padding=2, dilation=2,
|
||||
groups=self.groups, bias=False, padding_mode='reflect')
|
||||
|
||||
self.norm = nn.BatchNorm2d(out_channels)
|
||||
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
self.spatial_scale_factor = spatial_scale_factor
|
||||
self.spatial_scale_mode = spatial_scale_mode
|
||||
self.spectral_pos_encoding = spectral_pos_encoding
|
||||
self.ffc3d = ffc3d
|
||||
self.fft_norm = fft_norm
|
||||
|
||||
self.img_freq = None
|
||||
self.distill = None
|
||||
|
||||
def forward(self, x):
|
||||
batch = x.shape[0]
|
||||
|
||||
if self.spatial_scale_factor is not None:
|
||||
orig_size = x.shape[-2:]
|
||||
x = F.interpolate(x, scale_factor=self.spatial_scale_factor, mode=self.spatial_scale_mode,
|
||||
align_corners=False)
|
||||
|
||||
fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1)
|
||||
ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm)
|
||||
ffted = torch.stack((ffted.real, ffted.imag), dim=-1)
|
||||
ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1)
|
||||
ffted = ffted.view((batch, -1,) + ffted.size()[3:])
|
||||
|
||||
locMap = self.locMap.expand_as(ffted[:, :1, :, :]) # B 1 H' W'
|
||||
ffted_copy = ffted.clone()
|
||||
|
||||
cat_img_mask_freq = torch.cat((ffted[:, :self.in_channels, :, :],
|
||||
ffted[:, self.in_channels:, :, :],
|
||||
locMap), dim=1)
|
||||
|
||||
ffted = self.conv_layer_down55(cat_img_mask_freq)
|
||||
ffted = torch.fft.fftshift(ffted, dim=-2)
|
||||
|
||||
ffted = self.relu(ffted)
|
||||
|
||||
locMap_shift = torch.fft.fftshift(locMap, dim=-2) ## ONLY IF NOT SHIFT BACK
|
||||
|
||||
# REPEAT CONV
|
||||
cat_img_mask_freq1 = torch.cat((ffted[:, :self.in_channels, :, :],
|
||||
ffted[:, self.in_channels:, :, :],
|
||||
locMap_shift), dim=1)
|
||||
|
||||
ffted = self.conv_layer_down55_shift(cat_img_mask_freq1)
|
||||
ffted = torch.fft.fftshift(ffted, dim=-2)
|
||||
|
||||
lambda_base = torch.sigmoid(self.lambda_base)
|
||||
|
||||
ffted = ffted_copy * lambda_base + ffted * (1 - lambda_base)
|
||||
|
||||
# irfft
|
||||
ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute(
|
||||
0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2)
|
||||
ffted = torch.complex(ffted[..., 0], ffted[..., 1])
|
||||
|
||||
ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:]
|
||||
output = torch.fft.irfftn(ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm)
|
||||
|
||||
if self.spatial_scale_factor is not None:
|
||||
output = F.interpolate(output, size=orig_size, mode=self.spatial_scale_mode, align_corners=False)
|
||||
|
||||
epsilon = 0.5
|
||||
output = output - torch.mean(output) + torch.mean(x)
|
||||
output = torch.clip(output, float(x.min() - epsilon), float(x.max() + epsilon))
|
||||
|
||||
self.distill = output # for self perc
|
||||
return output
|
||||
|
||||
if __name__ == '__main__':
|
||||
in_channels = 16
|
||||
out_channels = 16
|
||||
|
||||
block = FourierUnit_modified(in_channels=in_channels, out_channels=out_channels)
|
||||
|
||||
input_tensor = torch.rand(8, in_channels, 32, 32)
|
||||
|
||||
output = block(input_tensor)
|
||||
|
||||
print("Input size:", input_tensor.size())
|
||||
print("Output size:", output.size())
|
3
net.py
3
net.py
@ -7,6 +7,7 @@ from timm.models.layers import DropPath, to_2tuple, trunc_normal_
|
||||
from einops import rearrange
|
||||
|
||||
from componets.WTConvCV2 import WTConv2d
|
||||
from componets.SCSA import SCSA
|
||||
|
||||
|
||||
# 以一定概率随机丢弃输入张量中的路径,用于正则化模型
|
||||
@ -325,7 +326,7 @@ class DetailNode(nn.Module):
|
||||
class DetailFeatureExtraction(nn.Module):
|
||||
def __init__(self, num_layers=3):
|
||||
super(DetailFeatureExtraction, self).__init__()
|
||||
INNmodules = [DetailNode(use) for _ in range(num_layers)]
|
||||
INNmodules = [DetailNode() for _ in range(num_layers)]
|
||||
self.net = nn.Sequential(*INNmodules)
|
||||
# self.enhancement_module = WTConv2d(32, 32)
|
||||
|
||||
|
3
train.py
3
train.py
@ -6,8 +6,7 @@ Import packages
|
||||
------------------------------------------------------------------------------
|
||||
'''
|
||||
|
||||
from net import Restormer_Encoder, Restormer_Decoder, BaseFeatureExtraction, DetailFeatureExtraction, BaseFeatureFusion, \
|
||||
DetailFeatureFusioin
|
||||
from net import Restormer_Encoder, Restormer_Decoder, BaseFeatureExtraction, DetailFeatureExtraction, BaseFeatureFusion,DetailFeatureFusion
|
||||
from utils.dataset import H5Dataset
|
||||
import os
|
||||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
|
||||
|
Loading…
Reference in New Issue
Block a user