渐进式网络:为每个新任务增加可扩展的旁路
python import torch import torch.nn as nn
class ProgressiveColumn(nn.Module): def init(self, input_size, hidden_size, output_size, adapter_size=32): super().init() self.layer1 = nn.Linear(input_size, hidden_size) self.layer2 = nn.Linear(hidden_size, output_size) self.adapters = nn.ModuleList() # 用于接收旧列输入
def forward(self, x, lateral_inputs=None):
# x: 当前列前一层的输出(或原始输入)
# lateral_inputs: 来自旧列对应层输出的列表
if lateral_inputs is not None:
for ii, lat_input in enumerate(lateral_inputs):
# 通过对应适配器融合
x = x + self.adapters[ii](lat_input)
out = torch.relu(self.layer1(x))
return self.layer2(out)
class ProgressiveNet(nn.Module): def init(self, columns_info): super().init() self.columns = nn.ModuleList() # columns_info: 例如 [(784, 312, 10), (784, 312, 6)] 表示两个任务的输入/隐藏/输出大小 for i, (in_sz, hid, out_sz) in enumerate(columns_info): col = ProgressiveColumn(in_sz, hid, out_sz) # 为每个更早的列建立适配器 if i > 0: col.adapters = nn.ModuleList([ nn.Linear(hid, hid) for _ in range(i) # 简单的线性适配器 ]) self.columns.append(col)
def forward(self, x, task_id):
# 模拟任务特定前向
lateral = []
for col_idx in range(task_id):
# 前向传播旧列以提供旁路输入,使用torch.no_grad加速且不干扰梯度
with torch.no_grad():
lat_out = self.columns[col_idx](x)
lateral.append(lat_out)
# 当前列接收旁路
return self.columns[task_id](x, lateral)
实例化:两个任务,输入784,隐藏312,输出分别为10和6
model = ProgressiveNet([(784, 312, 10), (784, 312, 6)])