Skip to the content.

失之交臂的灵感-卷积权重标准化

Contact me

本系列博客主页及相关见此处


看了Johns Hopkins University的一篇文章Weight Standardization,发现前段时间做的差不多,思路基本一直,但是当时没有做出来,可能就是运气吧.

这篇文章就是把卷积的权重做标准化,代码修改只有一点:

# Pytorch
class Conv2d(nn.Conv2d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1, bias=True):
        super(Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride,
                 padding, dilation, groups, bias)

    def forward(self, x):
        weight = self.weight
        weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2,
                                  keepdim=True).mean(dim=3, keepdim=True)
        weight = weight - weight_mean
        std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
        weight = weight / std.expand_as(weight)
        return F.conv2d(x, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

# Tensorflow
kernel_mean = tf.math.reduce_mean(kernel, axis=[0, 1, 2], keepdims=True, name='kernel_mean')
kernel = kernel - kernel_mean
#kernel_std = tf.math.reduce_std(kernel, axis=[0, 1, 2], keepdims=True, name='kernel_std')
kernel_std = tf.keras.backend.std(kernel, axis=[0, 1, 2], keepdims=True)
kernel = kernel / (kernel_std + 1e-5)

而我当时做的时候估计实现的太粗糙了,可能这些代码写的不对:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 5, 5, 1)
        self.conv1_bn = nn.BatchNorm2d(1)
        self.conv2 = nn.Conv2d(5, 5, 5, 1)
        self.conv2_bn = nn.BatchNorm2d(5)
        self.fc = nn.Linear(4*4*5, 10)

    def forward(self, x):
        self.conv1.weight.data = self.conv1_bn(self.conv1.weight.data)
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        self.conv2.weight.data = self.conv2_bn(self.conv2.weight.data)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*5)
        x = self.fc(x)
        return F.log_softmax(x, dim=1)

现在看起来思路还是有的,只不过不巧罢了.