# data: 存储了Tensor, 是本体的数据
# grad: 保存了data的梯度,本事是个Variable而非Tensor, 与data形状一致
# grad_fn: 指向Function对象,用于反向传播的梯度计算只用
import torch
from torch.autograd import Variable
x = Variable(torch.ones(2,2), requires_grad = True)
y = x[0,0] + x[0,1] + x[1,0] + x[1,1] #Variable的运算结果也是Variable,但是中间结果反向传播中不会被求导。
y = x.sum()
y#其实查询的是x.data, 是个tensor。
Variable containing:
[torch.FloatTensor of size 1]
y.grad_fn # 目标函数的.grad_fn方法,它用来求梯度
<SumBackward0 at 0x18bcbfcdd30>
y.backward() # 反向传播
x.grad #Variable的梯度保存在Variable.grad中
Variable containing:
1 1
1 1
[torch.FloatTensor of size 2x2]
#grad属性保存在Variable中, 新的梯度下来会进行累加,再次求导后结果变成了2
x.grad # 可以看到变量梯度是累加的
Variable containing:
2 2
2 2
#所以梯度要归零 在模型训练时直接优化器梯度清零, optimizer.zero_grad()
x.grad.data.zero_() # 归零梯度, 注意, 在torch中所有的Inplace操作都是要带下划线的,所以就没有.data.zero()方法
0 0
0 0
[torch.FloatTensor of size 2x2]
x = Variable(torch.ones(4, 5))
y = torch.cos(x) # 传入Variable
x_tensor_cos = torch.cos(x.data) # 传入Tensor
Variable containing:
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
[torch.FloatTensor of size 4x5]
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
0.5403, 0.5403, 0.5403, 0.5403, 0.5403
[torch.FloatTensor of size 4x5]