渐进式网络:为每个新任务增加可扩展的旁路

FreeGuideOnline 最新 2026-06-23

python import torch import torch.nn as nn

class ProgressiveColumn(nn.Module): def init(self, input_size, hidden_size, output_size, adapter_size=32): super().init() self.layer1 = nn.Linear(input_size, hidden_size) self.layer2 = nn.Linear(hidden_size, output_size) self.adapters = nn.ModuleList() # 用于接收旧列输入

def forward(self, x, lateral_inputs=None):
    # x: 当前列前一层的输出(或原始输入)
    # lateral_inputs: 来自旧列对应层输出的列表
    if lateral_inputs is not None:
        for ii, lat_input in enumerate(lateral_inputs):
            # 通过对应适配器融合
            x = x + self.adapters[ii](lat_input)
    out = torch.relu(self.layer1(x))
    return self.layer2(out)

class ProgressiveNet(nn.Module): def init(self, columns_info): super().init() self.columns = nn.ModuleList() # columns_info: 例如 [(784, 312, 10), (784, 312, 6)] 表示两个任务的输入/隐藏/输出大小 for i, (in_sz, hid, out_sz) in enumerate(columns_info): col = ProgressiveColumn(in_sz, hid, out_sz) # 为每个更早的列建立适配器 if i > 0: col.adapters = nn.ModuleList([ nn.Linear(hid, hid) for _ in range(i) # 简单的线性适配器 ]) self.columns.append(col)

def forward(self, x, task_id):
    # 模拟任务特定前向
    lateral = []
    for col_idx in range(task_id):
        # 前向传播旧列以提供旁路输入,使用torch.no_grad加速且不干扰梯度
        with torch.no_grad():
            lat_out = self.columns[col_idx](x)
            lateral.append(lat_out)
    # 当前列接收旁路
    return self.columns[task_id](x, lateral)

实例化:两个任务,输入784,隐藏312,输出分别为10和6

model = ProgressiveNet([(784, 312, 10), (784, 312, 6)])