11 changed files with 43 additions and 49 deletions
--- a/PFCFuse_IVF.pth
+++ b/PFCFuse_IVF.pth
--- a/README.md
+++ b/README.md
@ -1,4 +1,5 @@
 # PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion
+Poolformer-cnn图像融合框架
 The implementation of our paper "PFCFuse: A Poolformer and CNN fusion network for Infrared-Visible Image Fusion".
 ## Recommended Environment:
 python=3.8\
@ -25,14 +26,3 @@ Run
 ```
 python test_IVF.py
 ```
-
-## 相关工作
-```
-@inproceedings{zhao2023cddfuse,
-  title={Cddfuse: Correlation-driven dual-branch feature decomposition for multi-modality image fusion},
-  author={Zhao, Zixiang and Bai, Haowen and Zhang, Jiangshe and Zhang, Yulun and Xu, Shuang and Lin, Zudi and Timofte, Radu and Van Gool, Luc},
-  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
-  pages={5906--5916},
-  year={2023}
-}
-```
--- a/image/Poolformer.png
+++ b/image/Poolformer.png
--- a/image/en_decoder.png
+++ b/image/en_decoder.png
--- a/image/encoder_decoder.png
+++ b/image/encoder_decoder.png
--- a/image/qualitative.png
+++ b/image/qualitative.png
--- a/image/stage.png
+++ b/image/stage.png
--- a/net.py
+++ b/net.py
@ -1,3 +1,4 @@
+# poolformer
 import torch
 import torch.nn as nn
 import math
@ -8,6 +9,14 @@ from einops import rearrange


 def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """
+    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
@ -31,7 +40,8 @@ class DropPath(nn.Module):

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)
-# 改点，使用Pooling替换AttentionBase
+
+
 class Pooling(nn.Module):
    def __init__(self, kernel_size=3):
        super().__init__()
@ -44,8 +54,8 @@ class Pooling(nn.Module):

 class PoolMlp(nn.Module):
    """
-    实现基于1x1卷积的MLP模块。
-    输入：形状为[B, C, H, W]的张量。
+      Implementation of MLP with 1*1 convolutions.
+      Input: tensor with shape [B, C, H, W]
    """

    def __init__(self,
@ -55,17 +65,6 @@ class PoolMlp(nn.Module):
                 act_layer=nn.GELU,
                 bias=False,
                 drop=0.):
-        """
-        初始化PoolMlp模块。
-
-        参数:
-            in_features (int): 输入特征的数量。
-            hidden_features (int, 可选): 隐藏层特征的数量。默认为None，设置为与in_features相同。
-            out_features (int, 可选): 输出特征的数量。默认为None，设置为与in_features相同。
-            act_layer (nn.Module, 可选): 使用的激活层。默认为nn.GELU。
-            bias (bool, 可选): 是否在卷积层中包含偏置项。默认为False。
-            drop (float, 可选): Dropout比率。默认为0。
-        """
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
@ -73,17 +72,15 @@ class PoolMlp(nn.Module):
        self.act = act_layer()
        self.fc2 = nn.Conv2d(hidden_features, out_features, 1, bias=bias)
        self.drop = nn.Dropout(drop)
+        # self.apply(self._init_weights)
+
+    # def _init_weights(self, m):
+    #     if isinstance(m, nn.Conv2D):
+    #         trunc_normal_(m.weight)
+    #         if m.bias is not None:
+    #             zeros_(m.bias)

    def forward(self, x):
-        """
-        通过PoolMlp模块的前向传播。
-
-        参数:
-            x (torch.Tensor): 形状为[B, C, H, W]的输入张量。
-
-        返回:
-            torch.Tensor: 形状为[B, C, H, W]的输出张量。
-        """
        x = self.fc1(x)  # (B, C, H, W) --> (B, C, H, W)
        x = self.act(x)
        x = self.drop(x)
@ -129,7 +126,7 @@ class BaseFeatureExtraction(nn.Module):
                self.layer_scale_2.unsqueeze(-1).unsqueeze(-1)
                * self.poolmlp(self.norm2(x)))
        else:
-            x = x + self.drop_path(self.token_mixer(self.norm1(x))) # 匹配cddfuse
+            x = x + self.drop_path(self.token_mixer(self.norm1(x)))
            x = x + self.drop_path(self.poolmlp(self.norm2(x)))
        return x

@ -152,9 +149,11 @@ class InvertedResidualBlock(nn.Module):
            nn.Conv2d(hidden_dim, oup, 1, bias=False),
            # nn.BatchNorm2d(oup),
        )
+
    def forward(self, x):
        return self.bottleneckBlock(x)

+
 class DetailNode(nn.Module):
    def __init__(self):
        super(DetailNode, self).__init__()
@ -182,12 +181,14 @@ class DetailFeatureExtraction(nn.Module):
        super(DetailFeatureExtraction, self).__init__()
        INNmodules = [DetailNode() for _ in range(num_layers)]
        self.net = nn.Sequential(*INNmodules)
+
    def forward(self, x):
        z1, z2 = x[:, :x.shape[1] // 2], x[:, x.shape[1] // 2:x.shape[1]]
        for layer in self.net:
            z1, z2 = layer(z1, z2)
        return torch.cat((z1, z2), dim=1)

+
 # =============================================================================

 # =============================================================================
@ -368,6 +369,7 @@ class Restormer_Encoder(nn.Module):
        self.encoder_level1 = nn.Sequential(
            *[TransformerBlock(dim=dim, num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor,
                               bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
+
        self.baseFeature = BaseFeatureExtraction(dim=dim)

        self.detailFeature = DetailFeatureExtraction()
@ -422,3 +424,4 @@ if __name__ == '__main__':
    window_size = 8
    modelE = Restormer_Encoder().cuda()
    modelD = Restormer_Decoder().cuda()
+
--- a/requirement.txt
+++ b/requirement.txt
@ -1,5 +0,0 @@
-
-scipy==1.9.3
-scikit-image==0.19.2
-scikit-learn==1.1.3
-tqdm==4.62.0
--- a/test_IVF.py
+++ b/test_IVF.py
@ -8,23 +8,25 @@ import torch.nn as nn
 from utils.img_read_save import img_save,image_read_cv2
 import warnings
 import logging
+# 增加
 warnings.filterwarnings("ignore")
 logging.basicConfig(level=logging.CRITICAL)


 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-ckpt_path= r"/home/star/whaiDir/PFCFuse/models/PFCFusion10-05-18-13.pth"
+ckpt_path= r"models/PFCFuse.pth"

-for dataset_name in ["TNO"]:
+for dataset_name in ["MSRS","TNO","RoadScene"]:
    print("\n"*2+"="*80)
    model_name="PFCFuse    "
    print("The test result of "+dataset_name+' :')
-    test_folder=os.path.join('/home/star/whaiDir/CDDFuse/test_img/',dataset_name)
+    test_folder=os.path.join('test_img',dataset_name) 
    test_out_folder=os.path.join('test_result',dataset_name)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    Encoder = nn.DataParallel(Restormer_Encoder()).to(device)
    Decoder = nn.DataParallel(Restormer_Decoder()).to(device)
+    # BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64, num_heads=8)).to(device)
    BaseFuseLayer = nn.DataParallel(BaseFeatureExtraction(dim=64)).to(device)
    DetailFuseLayer = nn.DataParallel(DetailFeatureExtraction(num_layers=1)).to(device)

@ -39,12 +41,14 @@ for dataset_name in ["TNO"]:

    with torch.no_grad():
        for img_name in os.listdir(os.path.join(test_folder,"ir")):
-            print(img_name)

            data_IR=image_read_cv2(os.path.join(test_folder,"ir",img_name),mode='GRAY')[np.newaxis,np.newaxis, ...]/255.0
+            # 改
            data_VIS = cv2.split(image_read_cv2(os.path.join(test_folder, "vi", img_name), mode='YCrCb'))[0][np.newaxis, np.newaxis, ...] / 255.0
+            # ycrcb, uint8
            data_VIS_BGR = cv2.imread(os.path.join(test_folder, "vi", img_name))
            _, data_VIS_Cr, data_VIS_Cb = cv2.split(cv2.cvtColor(data_VIS_BGR, cv2.COLOR_BGR2YCrCb))
+            # 改

            data_IR,data_VIS = torch.FloatTensor(data_IR),torch.FloatTensor(data_VIS)
            data_VIS, data_IR = data_VIS.cuda(), data_IR.cuda()
@ -56,12 +60,15 @@ for dataset_name in ["TNO"]:
            data_Fuse, _ = Decoder(data_VIS, feature_F_B, feature_F_D)
            data_Fuse=(data_Fuse-torch.min(data_Fuse))/(torch.max(data_Fuse)-torch.min(data_Fuse))
            fi = np.squeeze((data_Fuse * 255).cpu().numpy())
+            # 改
+            # float32 to uint8
            fi = fi.astype(np.uint8)
            ycrcb_fi = np.dstack((fi, data_VIS_Cr, data_VIS_Cb))
            rgb_fi = cv2.cvtColor(ycrcb_fi, cv2.COLOR_YCrCb2RGB)
            img_save(rgb_fi, img_name.split(sep='.')[0], test_out_folder)
+            # 改

-    eval_folder=test_out_folder
+    eval_folder=test_out_folder  
    ori_img_folder=test_folder

    metric_result = np.zeros((8))
@ -85,4 +92,4 @@ for dataset_name in ["TNO"]:
            +str(np.round(metric_result[6], 2))+'\t'
            +str(np.round(metric_result[7], 2))
            )
-    print("="*80)
+    print("="*80)
--- a/train.py
+++ b/train.py
@ -87,7 +87,7 @@ Loss_ssim = kornia.losses.SSIM(11, reduction='mean')
 HuberLoss = nn.HuberLoss()

 # data loader
-trainloader = DataLoader(H5Dataset(r"/home/star/whaiDir/CDDFuse/data/MSRS_train_imgsize_128_stride_200.h5"),
+trainloader = DataLoader(H5Dataset(r"data/MSRS_train_imgsize_128_stride_200.h5"),
                         batch_size=batch_size,
                         shuffle=True,
                         num_workers=0)
@ -201,14 +201,13 @@ for epoch in range(num_epochs):
        epoch_time = time.time() - prev_time
        prev_time = time.time()
        sys.stdout.write(
-            "\r[Epoch %d/%d] [Batch %d/%d] [loss: %f] ETA: %.10s"
+            "\r[Epoch %d/%d] [Batch %d/%d] [loss: %f]"
            % (
                epoch,
                num_epochs,
                i,
                len(loader['train']),
                loss.item(),
-                time_left,
            )
        )