Compare commits
10 Commits
1ad1204210
...
c9e054e236
Author | SHA1 | Date | |
---|---|---|---|
c9e054e236 | |||
|
5e3fc11c37 | ||
|
f0765f0835 | ||
|
282d38e52a | ||
|
8f50f55503 | ||
|
6265086a58 | ||
|
d65393b3c4 | ||
|
656c8ba0a1 | ||
|
b02b41f794 | ||
|
8b1957743f |
BIN
PFCFuse_IVF.pth
Normal file
BIN
PFCFuse_IVF.pth
Normal file
Binary file not shown.
12
README.md
12
README.md
@ -1,5 +1,4 @@
|
|||||||
# PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion
|
# PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion
|
||||||
Poolformer-cnn图像融合框架
|
|
||||||
The implementation of our paper "PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion".
|
The implementation of our paper "PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion".
|
||||||
## Recommended Environment:
|
## Recommended Environment:
|
||||||
python=3.8\
|
python=3.8\
|
||||||
@ -26,3 +25,14 @@ Run
|
|||||||
```
|
```
|
||||||
python test_IVF.py
|
python test_IVF.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 相关工作
|
||||||
|
```
|
||||||
|
@inproceedings{zhao2023cddfuse,
|
||||||
|
title={Cddfuse: Correlation-driven dual-branch feature decomposition for multi-modality image fusion},
|
||||||
|
author={Zhao, Zixiang and Bai, Haowen and Zhang, Jiangshe and Zhang, Yulun and Xu, Shuang and Lin, Zudi and Timofte, Radu and Van Gool, Luc},
|
||||||
|
booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
|
||||||
|
pages={5906--5916},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
BIN
image/Poolformer.png
Normal file
BIN
image/Poolformer.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
BIN
image/en_decoder.png
Normal file
BIN
image/en_decoder.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 105 KiB |
BIN
image/encoder_decoder.png
Normal file
BIN
image/encoder_decoder.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 350 KiB |
BIN
image/qualitative.png
Normal file
BIN
image/qualitative.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 17 MiB |
BIN
image/stage.png
Normal file
BIN
image/stage.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 271 KiB |
51
net.py
51
net.py
@ -1,4 +1,3 @@
|
|||||||
# poolformer
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import math
|
import math
|
||||||
@ -9,14 +8,6 @@ from einops import rearrange
|
|||||||
|
|
||||||
|
|
||||||
def drop_path(x, drop_prob: float = 0., training: bool = False):
|
def drop_path(x, drop_prob: float = 0., training: bool = False):
|
||||||
"""
|
|
||||||
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
|
|
||||||
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
|
|
||||||
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
|
|
||||||
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
|
|
||||||
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
|
|
||||||
'survival rate' as the argument.
|
|
||||||
"""
|
|
||||||
if drop_prob == 0. or not training:
|
if drop_prob == 0. or not training:
|
||||||
return x
|
return x
|
||||||
keep_prob = 1 - drop_prob
|
keep_prob = 1 - drop_prob
|
||||||
@ -40,8 +31,7 @@ class DropPath(nn.Module):
|
|||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
return drop_path(x, self.drop_prob, self.training)
|
return drop_path(x, self.drop_prob, self.training)
|
||||||
|
# 改点,使用Pooling替换AttentionBase
|
||||||
|
|
||||||
class Pooling(nn.Module):
|
class Pooling(nn.Module):
|
||||||
def __init__(self, kernel_size=3):
|
def __init__(self, kernel_size=3):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -54,8 +44,8 @@ class Pooling(nn.Module):
|
|||||||
|
|
||||||
class PoolMlp(nn.Module):
|
class PoolMlp(nn.Module):
|
||||||
"""
|
"""
|
||||||
Implementation of MLP with 1*1 convolutions.
|
实现基于1x1卷积的MLP模块。
|
||||||
Input: tensor with shape [B, C, H, W]
|
输入:形状为[B, C, H, W]的张量。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -65,6 +55,17 @@ class PoolMlp(nn.Module):
|
|||||||
act_layer=nn.GELU,
|
act_layer=nn.GELU,
|
||||||
bias=False,
|
bias=False,
|
||||||
drop=0.):
|
drop=0.):
|
||||||
|
"""
|
||||||
|
初始化PoolMlp模块。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
in_features (int): 输入特征的数量。
|
||||||
|
hidden_features (int, 可选): 隐藏层特征的数量。默认为None,设置为与in_features相同。
|
||||||
|
out_features (int, 可选): 输出特征的数量。默认为None,设置为与in_features相同。
|
||||||
|
act_layer (nn.Module, 可选): 使用的激活层。默认为nn.GELU。
|
||||||
|
bias (bool, 可选): 是否在卷积层中包含偏置项。默认为False。
|
||||||
|
drop (float, 可选): Dropout比率。默认为0。
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
out_features = out_features or in_features
|
out_features = out_features or in_features
|
||||||
hidden_features = hidden_features or in_features
|
hidden_features = hidden_features or in_features
|
||||||
@ -72,15 +73,17 @@ class PoolMlp(nn.Module):
|
|||||||
self.act = act_layer()
|
self.act = act_layer()
|
||||||
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, bias=bias)
|
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, bias=bias)
|
||||||
self.drop = nn.Dropout(drop)
|
self.drop = nn.Dropout(drop)
|
||||||
# self.apply(self._init_weights)
|
|
||||||
|
|
||||||
# def _init_weights(self, m):
|
|
||||||
# if isinstance(m, nn.Conv2D):
|
|
||||||
# trunc_normal_(m.weight)
|
|
||||||
# if m.bias is not None:
|
|
||||||
# zeros_(m.bias)
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
"""
|
||||||
|
通过PoolMlp模块的前向传播。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
x (torch.Tensor): 形状为[B, C, H, W]的输入张量。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
torch.Tensor: 形状为[B, C, H, W]的输出张量。
|
||||||
|
"""
|
||||||
x = self.fc1(x) # (B, C, H, W) --> (B, C, H, W)
|
x = self.fc1(x) # (B, C, H, W) --> (B, C, H, W)
|
||||||
x = self.act(x)
|
x = self.act(x)
|
||||||
x = self.drop(x)
|
x = self.drop(x)
|
||||||
@ -126,7 +129,7 @@ class BaseFeatureExtraction(nn.Module):
|
|||||||
self.layer_scale_2.unsqueeze(-1).unsqueeze(-1)
|
self.layer_scale_2.unsqueeze(-1).unsqueeze(-1)
|
||||||
* self.poolmlp(self.norm2(x)))
|
* self.poolmlp(self.norm2(x)))
|
||||||
else:
|
else:
|
||||||
x = x + self.drop_path(self.token_mixer(self.norm1(x)))
|
x = x + self.drop_path(self.token_mixer(self.norm1(x))) # 匹配cddfuse
|
||||||
x = x + self.drop_path(self.poolmlp(self.norm2(x)))
|
x = x + self.drop_path(self.poolmlp(self.norm2(x)))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
@ -149,11 +152,9 @@ class InvertedResidualBlock(nn.Module):
|
|||||||
nn.Conv2d(hidden_dim, oup, 1, bias=False),
|
nn.Conv2d(hidden_dim, oup, 1, bias=False),
|
||||||
# nn.BatchNorm2d(oup),
|
# nn.BatchNorm2d(oup),
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
return self.bottleneckBlock(x)
|
return self.bottleneckBlock(x)
|
||||||
|
|
||||||
|
|
||||||
class DetailNode(nn.Module):
|
class DetailNode(nn.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(DetailNode, self).__init__()
|
super(DetailNode, self).__init__()
|
||||||
@ -181,14 +182,12 @@ class DetailFeatureExtraction(nn.Module):
|
|||||||
super(DetailFeatureExtraction, self).__init__()
|
super(DetailFeatureExtraction, self).__init__()
|
||||||
INNmodules = [DetailNode() for _ in range(num_layers)]
|
INNmodules = [DetailNode() for _ in range(num_layers)]
|
||||||
self.net = nn.Sequential(*INNmodules)
|
self.net = nn.Sequential(*INNmodules)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
z1, z2 = x[:, :x.shape[1] // 2], x[:, x.shape[1] // 2:x.shape[1]]
|
z1, z2 = x[:, :x.shape[1] // 2], x[:, x.shape[1] // 2:x.shape[1]]
|
||||||
for layer in self.net:
|
for layer in self.net:
|
||||||
z1, z2 = layer(z1, z2)
|
z1, z2 = layer(z1, z2)
|
||||||
return torch.cat((z1, z2), dim=1)
|
return torch.cat((z1, z2), dim=1)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@ -369,7 +368,6 @@ class Restormer_Encoder(nn.Module):
|
|||||||
self.encoder_level1 = nn.Sequential(
|
self.encoder_level1 = nn.Sequential(
|
||||||
*[TransformerBlock(dim=dim, num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor,
|
*[TransformerBlock(dim=dim, num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor,
|
||||||
bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
|
bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
|
||||||
|
|
||||||
self.baseFeature = BaseFeatureExtraction(dim=dim)
|
self.baseFeature = BaseFeatureExtraction(dim=dim)
|
||||||
|
|
||||||
self.detailFeature = DetailFeatureExtraction()
|
self.detailFeature = DetailFeatureExtraction()
|
||||||
@ -424,4 +422,3 @@ if __name__ == '__main__':
|
|||||||
window_size = 8
|
window_size = 8
|
||||||
modelE = Restormer_Encoder().cuda()
|
modelE = Restormer_Encoder().cuda()
|
||||||
modelD = Restormer_Decoder().cuda()
|
modelD = Restormer_Decoder().cuda()
|
||||||
|
|
||||||
|
5
requirement.txt
Normal file
5
requirement.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
scipy==1.9.3
|
||||||
|
scikit-image==0.19.2
|
||||||
|
scikit-learn==1.1.3
|
||||||
|
tqdm==4.62.0
|
19
test_IVF.py
19
test_IVF.py
@ -8,25 +8,23 @@ import torch.nn as nn
|
|||||||
from utils.img_read_save import img_save,image_read_cv2
|
from utils.img_read_save import img_save,image_read_cv2
|
||||||
import warnings
|
import warnings
|
||||||
import logging
|
import logging
|
||||||
# 增加
|
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
logging.basicConfig(level=logging.CRITICAL)
|
logging.basicConfig(level=logging.CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||||
ckpt_path= r"models/PFCFuse.pth"
|
ckpt_path= r"/home/star/whaiDir/PFCFuse/models/PFCFusion10-05-18-13.pth"
|
||||||
|
|
||||||
for dataset_name in ["MSRS","TNO","RoadScene"]:
|
for dataset_name in ["TNO"]:
|
||||||
print("\n"*2+"="*80)
|
print("\n"*2+"="*80)
|
||||||
model_name="PFCFuse "
|
model_name="PFCFuse "
|
||||||
print("The test result of "+dataset_name+' :')
|
print("The test result of "+dataset_name+' :')
|
||||||
test_folder=os.path.join('test_img',dataset_name)
|
test_folder=os.path.join('/home/star/whaiDir/CDDFuse/test_img/',dataset_name)
|
||||||
test_out_folder=os.path.join('test_result',dataset_name)
|
test_out_folder=os.path.join('test_result',dataset_name)
|
||||||
|
|
||||||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
Encoder = nn.DataParallel(Restormer_Encoder()).to(device)
|
Encoder = nn.DataParallel(Restormer_Encoder()).to(device)
|
||||||
Decoder = nn.DataParallel(Restormer_Decoder()).to(device)
|
Decoder = nn.DataParallel(Restormer_Decoder()).to(device)
|
||||||
# BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64, num_heads=8)).to(device)
|
|
||||||
BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64)).to(device)
|
BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64)).to(device)
|
||||||
DetailFuseLayer = nn.DataParallel(DetailFeatureExtraction(num_layers=1)).to(device)
|
DetailFuseLayer = nn.DataParallel(DetailFeatureExtraction(num_layers=1)).to(device)
|
||||||
|
|
||||||
@ -41,14 +39,12 @@ for dataset_name in ["MSRS","TNO","RoadScene"]:
|
|||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for img_name in os.listdir(os.path.join(test_folder,"ir")):
|
for img_name in os.listdir(os.path.join(test_folder,"ir")):
|
||||||
|
print(img_name)
|
||||||
|
|
||||||
data_IR=image_read_cv2(os.path.join(test_folder,"ir",img_name),mode='GRAY')[np.newaxis,np.newaxis, ...]/255.0
|
data_IR=image_read_cv2(os.path.join(test_folder,"ir",img_name),mode='GRAY')[np.newaxis,np.newaxis, ...]/255.0
|
||||||
# 改
|
|
||||||
data_VIS = cv2.split(image_read_cv2(os.path.join(test_folder, "vi", img_name), mode='YCrCb'))[0][np.newaxis, np.newaxis, ...] / 255.0
|
data_VIS = cv2.split(image_read_cv2(os.path.join(test_folder, "vi", img_name), mode='YCrCb'))[0][np.newaxis, np.newaxis, ...] / 255.0
|
||||||
# ycrcb, uint8
|
|
||||||
data_VIS_BGR = cv2.imread(os.path.join(test_folder, "vi", img_name))
|
data_VIS_BGR = cv2.imread(os.path.join(test_folder, "vi", img_name))
|
||||||
_, data_VIS_Cr, data_VIS_Cb = cv2.split(cv2.cvtColor(data_VIS_BGR, cv2.COLOR_BGR2YCrCb))
|
_, data_VIS_Cr, data_VIS_Cb = cv2.split(cv2.cvtColor(data_VIS_BGR, cv2.COLOR_BGR2YCrCb))
|
||||||
# 改
|
|
||||||
|
|
||||||
data_IR,data_VIS = torch.FloatTensor(data_IR),torch.FloatTensor(data_VIS)
|
data_IR,data_VIS = torch.FloatTensor(data_IR),torch.FloatTensor(data_VIS)
|
||||||
data_VIS, data_IR = data_VIS.cuda(), data_IR.cuda()
|
data_VIS, data_IR = data_VIS.cuda(), data_IR.cuda()
|
||||||
@ -60,15 +56,12 @@ for dataset_name in ["MSRS","TNO","RoadScene"]:
|
|||||||
data_Fuse, _ = Decoder(data_VIS, feature_F_B, feature_F_D)
|
data_Fuse, _ = Decoder(data_VIS, feature_F_B, feature_F_D)
|
||||||
data_Fuse=(data_Fuse-torch.min(data_Fuse))/(torch.max(data_Fuse)-torch.min(data_Fuse))
|
data_Fuse=(data_Fuse-torch.min(data_Fuse))/(torch.max(data_Fuse)-torch.min(data_Fuse))
|
||||||
fi = np.squeeze((data_Fuse * 255).cpu().numpy())
|
fi = np.squeeze((data_Fuse * 255).cpu().numpy())
|
||||||
# 改
|
|
||||||
# float32 to uint8
|
|
||||||
fi = fi.astype(np.uint8)
|
fi = fi.astype(np.uint8)
|
||||||
ycrcb_fi = np.dstack((fi, data_VIS_Cr, data_VIS_Cb))
|
ycrcb_fi = np.dstack((fi, data_VIS_Cr, data_VIS_Cb))
|
||||||
rgb_fi = cv2.cvtColor(ycrcb_fi, cv2.COLOR_YCrCb2RGB)
|
rgb_fi = cv2.cvtColor(ycrcb_fi, cv2.COLOR_YCrCb2RGB)
|
||||||
img_save(rgb_fi, img_name.split(sep='.')[0], test_out_folder)
|
img_save(rgb_fi, img_name.split(sep='.')[0], test_out_folder)
|
||||||
# 改
|
|
||||||
|
|
||||||
eval_folder=test_out_folder
|
eval_folder=test_out_folder
|
||||||
ori_img_folder=test_folder
|
ori_img_folder=test_folder
|
||||||
|
|
||||||
metric_result = np.zeros((8))
|
metric_result = np.zeros((8))
|
||||||
@ -92,4 +85,4 @@ for dataset_name in ["MSRS","TNO","RoadScene"]:
|
|||||||
+str(np.round(metric_result[6], 2))+'\t'
|
+str(np.round(metric_result[6], 2))+'\t'
|
||||||
+str(np.round(metric_result[7], 2))
|
+str(np.round(metric_result[7], 2))
|
||||||
)
|
)
|
||||||
print("="*80)
|
print("="*80)
|
||||||
|
5
train.py
5
train.py
@ -87,7 +87,7 @@ Loss_ssim = kornia.losses.SSIM(11, reduction='mean')
|
|||||||
HuberLoss = nn.HuberLoss()
|
HuberLoss = nn.HuberLoss()
|
||||||
|
|
||||||
# data loader
|
# data loader
|
||||||
trainloader = DataLoader(H5Dataset(r"data/MSRS_train_imgsize_128_stride_200.h5"),
|
trainloader = DataLoader(H5Dataset(r"/home/star/whaiDir/CDDFuse/data/MSRS_train_imgsize_128_stride_200.h5"),
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
num_workers=0)
|
num_workers=0)
|
||||||
@ -201,13 +201,14 @@ for epoch in range(num_epochs):
|
|||||||
epoch_time = time.time() - prev_time
|
epoch_time = time.time() - prev_time
|
||||||
prev_time = time.time()
|
prev_time = time.time()
|
||||||
sys.stdout.write(
|
sys.stdout.write(
|
||||||
"\r[Epoch %d/%d] [Batch %d/%d] [loss: %f]"
|
"\r[Epoch %d/%d] [Batch %d/%d] [loss: %f] ETA: %.10s"
|
||||||
% (
|
% (
|
||||||
epoch,
|
epoch,
|
||||||
num_epochs,
|
num_epochs,
|
||||||
i,
|
i,
|
||||||
len(loader['train']),
|
len(loader['train']),
|
||||||
loss.item(),
|
loss.item(),
|
||||||
|
time_left,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user