def sgd(params, lr, batch_size): #params(模型的可训练参数)、lr(学习率)和batch_size(批大小) """小批量随机梯度下降。""" with torch.no_grad():#上下文管理器,用于包裹在其内部的操作,告诉PyTorch在这个上下文中不要计算梯度 for param in params: param -= lr * param.grad / batch_size#梯度乘以学习率,再除以批大小来更新参数 param.grad.zero_()#将参数的梯度清零,在PyTorch中,梯度是累积的,需要手动将梯度置零
1.8 训练过程
1 2 3 4 5 6 7 8 9 10 11 12 13
lr = 0.03 num_epochs = 3 net = linreg loss = squared_loss
for epoch in range(num_epochs): for X, y in data_iter(batch_size, features, labels): l = loss(net(X, w, b), y) l.sum().backward()# l是向量,用sum转换成标量,再对损失进行反向传播,计算模型参数 w 和 b 的梯度。 sgd([w, b], lr, batch_size) with torch.no_grad(): train_l = loss(net(features, w, b), labels) print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
net = nn.Sequential(nn.Linear(2, 1)) #初始化模型参数 net[0].weight.data.normal_(0, 0.01)#net[0]表示模型中的第一个层 net[0].bias.data.fill_(0)
loss = nn.MSELoss()#定义损失函数 trainer = torch.optim.SGD(net.parameters(), lr=0.03)#实例化SGD实例
2.4 训练过程
1 2 3 4 5 6 7 8 9
num_epochs = 3 for epoch in range(num_epochs): for X, y in data_iter: l = loss(net(X), y) trainer.zero_grad() l.backward() #pytorch自动对l求sum,无需再求sum trainer.step() l = loss(net(features), labels) print(f'epoch {epoch + 1}, loss {l:f}')