Compare commits

...

10 Commits

Author SHA1 Message Date
c9e054e236 修改代码实现,提高代码可读性和可维护性 2024-10-05 20:49:35 +08:00
HXY13
5e3fc11c37
Add files via upload 2024-09-19 10:31:39 +08:00
HXY13
f0765f0835
Update net.py 2024-06-29 10:54:33 +08:00
HXY13
282d38e52a
Update README.md 2024-06-29 10:53:11 +08:00
HXY13
8f50f55503
Update README.md 2024-06-26 15:32:37 +08:00
HXY13
6265086a58
Delete model directory 2024-06-09 19:13:33 +08:00
HXY13
d65393b3c4
Update test_IVF.py 2024-06-09 19:06:32 +08:00
HXY13
656c8ba0a1
Add files via upload 2024-06-05 10:48:04 +08:00
HXY13
b02b41f794
Add files via upload 2024-06-04 22:00:55 +08:00
HXY13
8b1957743f
Update README.md 2024-06-03 19:48:42 +08:00
11 changed files with 49 additions and 43 deletions

BIN
PFCFuse_IVF.pth Normal file

Binary file not shown.

View File

@ -1,5 +1,4 @@
# PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion
Poolformer-cnn图像融合框架
The implementation of our paper "PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion".
## Recommended Environment:
python=3.8\
@ -26,3 +25,14 @@ Run
```
python test_IVF.py
```
## 相关工作
```
@inproceedings{zhao2023cddfuse,
title={Cddfuse: Correlation-driven dual-branch feature decomposition for multi-modality image fusion},
author={Zhao, Zixiang and Bai, Haowen and Zhang, Jiangshe and Zhang, Yulun and Xu, Shuang and Lin, Zudi and Timofte, Radu and Van Gool, Luc},
booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
pages={5906--5916},
year={2023}
}
```

BIN
image/Poolformer.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

BIN
image/en_decoder.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

BIN
image/encoder_decoder.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 350 KiB

BIN
image/qualitative.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 MiB

BIN
image/stage.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 271 KiB

51
net.py
View File

@ -1,4 +1,3 @@
# poolformer
import torch
import torch.nn as nn
import math
@ -9,14 +8,6 @@ from einops import rearrange
def drop_path(x, drop_prob: float = 0., training: bool = False):
"""
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
'survival rate' as the argument.
"""
if drop_prob == 0. or not training:
return x
keep_prob = 1 - drop_prob
@ -40,8 +31,7 @@ class DropPath(nn.Module):
def forward(self, x):
return drop_path(x, self.drop_prob, self.training)
# 改点使用Pooling替换AttentionBase
class Pooling(nn.Module):
def __init__(self, kernel_size=3):
super().__init__()
@ -54,8 +44,8 @@ class Pooling(nn.Module):
class PoolMlp(nn.Module):
"""
Implementation of MLP with 1*1 convolutions.
Input: tensor with shape [B, C, H, W]
实现基于1x1卷积的MLP模块
输入形状为[B, C, H, W]的张量
"""
def __init__(self,
@ -65,6 +55,17 @@ class PoolMlp(nn.Module):
act_layer=nn.GELU,
bias=False,
drop=0.):
"""
初始化PoolMlp模块
参数:
in_features (int): 输入特征的数量
hidden_features (int, 可选): 隐藏层特征的数量默认为None设置为与in_features相同
out_features (int, 可选): 输出特征的数量默认为None设置为与in_features相同
act_layer (nn.Module, 可选): 使用的激活层默认为nn.GELU
bias (bool, 可选): 是否在卷积层中包含偏置项默认为False
drop (float, 可选): Dropout比率默认为0
"""
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
@ -72,15 +73,17 @@ class PoolMlp(nn.Module):
self.act = act_layer()
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, bias=bias)
self.drop = nn.Dropout(drop)
# self.apply(self._init_weights)
# def _init_weights(self, m):
# if isinstance(m, nn.Conv2D):
# trunc_normal_(m.weight)
# if m.bias is not None:
# zeros_(m.bias)
def forward(self, x):
"""
通过PoolMlp模块的前向传播
参数:
x (torch.Tensor): 形状为[B, C, H, W]的输入张量
返回:
torch.Tensor: 形状为[B, C, H, W]的输出张量
"""
x = self.fc1(x) # (B, C, H, W) --> (B, C, H, W)
x = self.act(x)
x = self.drop(x)
@ -126,7 +129,7 @@ class BaseFeatureExtraction(nn.Module):
self.layer_scale_2.unsqueeze(-1).unsqueeze(-1)
* self.poolmlp(self.norm2(x)))
else:
x = x + self.drop_path(self.token_mixer(self.norm1(x)))
x = x + self.drop_path(self.token_mixer(self.norm1(x))) # 匹配cddfuse
x = x + self.drop_path(self.poolmlp(self.norm2(x)))
return x
@ -149,11 +152,9 @@ class InvertedResidualBlock(nn.Module):
nn.Conv2d(hidden_dim, oup, 1, bias=False),
# nn.BatchNorm2d(oup),
)
def forward(self, x):
return self.bottleneckBlock(x)
class DetailNode(nn.Module):
def __init__(self):
super(DetailNode, self).__init__()
@ -181,14 +182,12 @@ class DetailFeatureExtraction(nn.Module):
super(DetailFeatureExtraction, self).__init__()
INNmodules = [DetailNode() for _ in range(num_layers)]
self.net = nn.Sequential(*INNmodules)
def forward(self, x):
z1, z2 = x[:, :x.shape[1] // 2], x[:, x.shape[1] // 2:x.shape[1]]
for layer in self.net:
z1, z2 = layer(z1, z2)
return torch.cat((z1, z2), dim=1)
# =============================================================================
# =============================================================================
@ -369,7 +368,6 @@ class Restormer_Encoder(nn.Module):
self.encoder_level1 = nn.Sequential(
*[TransformerBlock(dim=dim, num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor,
bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
self.baseFeature = BaseFeatureExtraction(dim=dim)
self.detailFeature = DetailFeatureExtraction()
@ -424,4 +422,3 @@ if __name__ == '__main__':
window_size = 8
modelE = Restormer_Encoder().cuda()
modelD = Restormer_Decoder().cuda()

5
requirement.txt Normal file
View File

@ -0,0 +1,5 @@
scipy==1.9.3
scikit-image==0.19.2
scikit-learn==1.1.3
tqdm==4.62.0

View File

@ -8,25 +8,23 @@ import torch.nn as nn
from utils.img_read_save import img_save,image_read_cv2
import warnings
import logging
# 增加
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.CRITICAL)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
ckpt_path= r"models/PFCFuse.pth"
ckpt_path= r"/home/star/whaiDir/PFCFuse/models/PFCFusion10-05-18-13.pth"
for dataset_name in ["MSRS","TNO","RoadScene"]:
for dataset_name in ["TNO"]:
print("\n"*2+"="*80)
model_name="PFCFuse "
print("The test result of "+dataset_name+' :')
test_folder=os.path.join('test_img',dataset_name)
test_folder=os.path.join('/home/star/whaiDir/CDDFuse/test_img/',dataset_name)
test_out_folder=os.path.join('test_result',dataset_name)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
Encoder = nn.DataParallel(Restormer_Encoder()).to(device)
Decoder = nn.DataParallel(Restormer_Decoder()).to(device)
# BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64, num_heads=8)).to(device)
BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64)).to(device)
DetailFuseLayer = nn.DataParallel(DetailFeatureExtraction(num_layers=1)).to(device)
@ -41,14 +39,12 @@ for dataset_name in ["MSRS","TNO","RoadScene"]:
with torch.no_grad():
for img_name in os.listdir(os.path.join(test_folder,"ir")):
print(img_name)
data_IR=image_read_cv2(os.path.join(test_folder,"ir",img_name),mode='GRAY')[np.newaxis,np.newaxis, ...]/255.0
# 改
data_VIS = cv2.split(image_read_cv2(os.path.join(test_folder, "vi", img_name), mode='YCrCb'))[0][np.newaxis, np.newaxis, ...] / 255.0
# ycrcb, uint8
data_VIS_BGR = cv2.imread(os.path.join(test_folder, "vi", img_name))
_, data_VIS_Cr, data_VIS_Cb = cv2.split(cv2.cvtColor(data_VIS_BGR, cv2.COLOR_BGR2YCrCb))
# 改
data_IR,data_VIS = torch.FloatTensor(data_IR),torch.FloatTensor(data_VIS)
data_VIS, data_IR = data_VIS.cuda(), data_IR.cuda()
@ -60,15 +56,12 @@ for dataset_name in ["MSRS","TNO","RoadScene"]:
data_Fuse, _ = Decoder(data_VIS, feature_F_B, feature_F_D)
data_Fuse=(data_Fuse-torch.min(data_Fuse))/(torch.max(data_Fuse)-torch.min(data_Fuse))
fi = np.squeeze((data_Fuse * 255).cpu().numpy())
# 改
# float32 to uint8
fi = fi.astype(np.uint8)
ycrcb_fi = np.dstack((fi, data_VIS_Cr, data_VIS_Cb))
rgb_fi = cv2.cvtColor(ycrcb_fi, cv2.COLOR_YCrCb2RGB)
img_save(rgb_fi, img_name.split(sep='.')[0], test_out_folder)
# 改
eval_folder=test_out_folder
eval_folder=test_out_folder
ori_img_folder=test_folder
metric_result = np.zeros((8))
@ -92,4 +85,4 @@ for dataset_name in ["MSRS","TNO","RoadScene"]:
+str(np.round(metric_result[6], 2))+'\t'
+str(np.round(metric_result[7], 2))
)
print("="*80)
print("="*80)

View File

@ -87,7 +87,7 @@ Loss_ssim = kornia.losses.SSIM(11, reduction='mean')
HuberLoss = nn.HuberLoss()
# data loader
trainloader = DataLoader(H5Dataset(r"data/MSRS_train_imgsize_128_stride_200.h5"),
trainloader = DataLoader(H5Dataset(r"/home/star/whaiDir/CDDFuse/data/MSRS_train_imgsize_128_stride_200.h5"),
batch_size=batch_size,
shuffle=True,
num_workers=0)
@ -201,13 +201,14 @@ for epoch in range(num_epochs):
epoch_time = time.time() - prev_time
prev_time = time.time()
sys.stdout.write(
"\r[Epoch %d/%d] [Batch %d/%d] [loss: %f]"
"\r[Epoch %d/%d] [Batch %d/%d] [loss: %f] ETA: %.10s"
% (
epoch,
num_epochs,
i,
len(loader['train']),
loss.item(),
time_left,
)
)