Python NumPy 实现卷积操作：从基础到多通道输入

本文通过Python代码演示了使用NumPy库实现卷积操作的步骤，从简单的二维输入到多通道输入 "[C, H, W]"。代码涵盖了 padding、stride 和卷积计算等关键概念，并附带详细注释和输出验证。

1、对input进行padding

import numpy as np

input = np.arange(1, 13).reshape(3, 4)
pad = 1
input_pad = np.zeros([input.shape[0] + 2 * pad, input.shape[1] + 2 * pad])
input_pad[pad:pad + input.shape[0],pad: pad + input.shape[1]] = input
print(input)
print(input_pad)

2、完成简单的卷积计算

kernel = np.array([[1, 2],[3,4]])
output_h = input_pad.shape[0] - kernel.shape[0] + 1
output_w = input_pad.shape[1] - kernel.shape[1] + 1
output = np.zeros([output_h, output_w])
line_kernel = kernel.reshape(-1)
# print(line_kernel)
for h in range(output_h):
    for w in range(output_w):
        line_input = input_pad[h:h+kernel.shape[0],w:w+kernel.shape[1]].reshape(-1)
        output[h,w] = np.dot(line_input, line_kernel)
print(kernel)
print(output)

3、增加stride

stride = 2
kernel = np.array([[1, 2],[3,4]])
output_h = int((input_pad.shape[0] - kernel.shape[0])/stride) + 1
output_w = int((input_pad.shape[1] - kernel.shape[1])/stride) + 1
output = np.zeros([output_h, output_w])
line_kernel = kernel.reshape(-1)
print(line_kernel)
for h in range(output_h):
    for w in range(output_w):
        line_input = input_pad[h * stride : h * stride + kernel.shape[0],w * stride : w * stride + kernel.shape[1]].reshape(-1)
        output[h,w] = np.sum(np.dot(line_input, line_kernel))
print(kernel)
print(output)

增加输入通道 input [C, H, W] 的代码段如下：

import numpy as np

# 1、对input进行padding
input = np.arange(1, 25).reshape(2, 3, 4)
pad = 1
input_pad = np.pad(input, ((0, 0), (pad, pad), (pad, pad)), 'constant')
print("Input:
", input)
print("Padded Input:
", input_pad)

# 2、完成简单的卷积计算
kernel = np.array([[1, 2], [3, 4]])
output_h = input_pad.shape[1] - kernel.shape[0] + 1
output_w = input_pad.shape[2] - kernel.shape[1] + 1
output = np.zeros([input.shape[0], output_h, output_w])
line_kernel = kernel.reshape(-1)
print("Kernel:
", kernel)

for c in range(input.shape[0]):
    for h in range(output_h):
        for w in range(output_w):
            line_input = input_pad[c, h:h+kernel.shape[0], w:w+kernel.shape[1]].reshape(-1)
            output[c, h, w] = np.dot(line_input, line_kernel)

print("Output:
", output)

# 3、增加stride
stride = 2
kernel = np.array([[1, 2], [3, 4]])
output_h = int((input_pad.shape[1] - kernel.shape[0]) / stride) + 1
output_w = int((input_pad.shape[2] - kernel.shape[1]) / stride) + 1
output = np.zeros([input.shape[0], output_h, output_w])
line_kernel = kernel.reshape(-1)
print("Kernel:
", kernel)

for c in range(input.shape[0]):
    for h in range(output_h):
        for w in range(output_w):
            line_input = input_pad[c, h * stride: h * stride + kernel.shape[0], w * stride: w * stride + kernel.shape[1]].reshape(-1)
            output[c, h, w] = np.sum(np.dot(line_input, line_kernel))

print("Output:
", output)