Homepage of Jinghua Xu

2023-10-05发表2025-04-30更新动手学深度学习20 分钟读完 (大约3052个字)

3.1 线性回归

%matplotlib inline
import math
import time
import numpy as np
import torch
from d2l import torch as d2l

n=10000
a=torch.ones([n])
b=torch.ones([n])

#定义计时器
class Timer: #@save
    #记录多次运行时间
    def __init__(self):
        self.times=[]
        self.start()
        
    def start(self):
        #启动计时器
        self.tik=time.time()
    
    def stop(self):
        #停止计时器并将时间记录在列表中
        self.times.append(time.time()-self.tik)
        return self.times[-1]
    
    def avg(self):
        #返回平均时间
        return sum(self.times)/len(self.times)
    
    def sum(self):
        #返回时间总和
        return sum(self.times)
    
    def cumsum(self):
        #返回累计时间
        return np.array(self.times).cumsum().tolist()

c=torch.zeros(n)
timer=Timer()
for i in range(n):
    c[i]=a[i]+b[i]
f'{timer.stop():.5f}sec'

'0.14660sec'

1
2
3

timer.start()
d=a+b
f'{timer.stop():.5f}sec'

'0.00000sec'

def normal(x,mu,sigma):
    p=1/math.sqrt(2*math.pi*sigma**2)
    return p*np.exp(-0.5/sigma**2*(x-mu)**2)

#再次使用numpy进行可视化
x=np.arange(-7,7,0.01)

#均值和标准差对
params=[(0,1),(0,2),(3,1)]
d2l.plot(x,[normal(x,mu,sigma)for mu,sigma in params],xlabel='x',ylabel='p(x)',figsize=(4.5,2.5),legend=[f'mean{mu},std{sigma}'for mu,sigma in params])

svg

3.2 线性回归的从零开始实现

%matplotlib inline
import random
import torch
from d2l import torch as d2l

#生成数据集
def synthetic_data(w,b,num_examples): #@save
    #生成y=Xw+b+噪声
    X=torch.normal(0,1,(num_examples,len(w)))
    y=torch.matmul(X,w)+b
    y+=torch.normal(0,0.01,y.shape)
    return X,y.reshape((-1,1))

true_w=torch.tensor([2,-3.4])
true_b=4.2
features,labels=synthetic_data(true_w,true_b,1000)

print('features:',features[0],'\nlabel:',labels[0])

features: tensor([ 0.7328, -0.5520]) 
label: tensor([7.5513])

1 2	d2l.set_figsize() d2l.plt.scatter(features[:,1].detach().numpy(),labels.detach().numpy(),1);

svg

#读取数据集
def data_iter(batch_size,features,labels):
    num_examples=len(features)
    indices=list(range(num_examples))
    #这些样本是随机读取的，没有特定顺序
    random.shuffle(indices)
    for i in range(0,num_examples,batch_size):
        batch_indices=torch.tensor(indices[i:min(i+batch_size,num_examples)])
        yield features[batch_indices],labels[batch_indices]
        
batch_size=10

for X,y in data_iter(batch_size,features,labels):
    print(X,'\n',y)
    break

tensor([[ 0.9175, -0.1441],
        [-0.3328, -0.4237],
        [-0.1287,  1.6801],
        [ 0.8705, -0.9030],
        [-0.4966,  1.4015],
        [ 1.3378, -1.8026],
        [ 0.5129,  1.2806],
        [ 1.1026,  1.2080],
        [ 0.6151,  0.6337],
        [-0.4683, -0.4388]]) 
 tensor([[ 6.5336],
        [ 4.9801],
        [-1.7645],
        [ 9.0089],
        [-1.5652],
        [12.9979],
        [ 0.8680],
        [ 2.2893],
        [ 3.2902],
        [ 4.7695]])

#初始化模型参数
w=torch.normal(0,0.01,size=(2,1),requires_grad=True)
b=torch.zeros(1,requires_grad=True)

#定义模型
def linreg(X,w,b): #@save
    #线性回归模型
    return torch.matmul(X,w)+b

#定义损失函数
def squared_loss(y_hat,y): #@save
    #均方损失
    return(y_hat-y.reshape(y_hat.shape))**2/2

#定义优化算法——小批量随机梯度下降
#lr:学习速率(梯度下降步长)；batch_size:批量大小
def sgd(params,lr,batch_size): #@save
    #小批量随机梯度下降
    with torch.no_grad():
        for param in params:
            param-=lr*param.grad/batch_size
            param.grad.zero_()
            
#训练
#设置超参数
lr=0.03#学习率
num_epochs=3#迭代周期个数
net=linreg
loss=squared_loss

for epoch in range(num_epochs):
    for X,y in data_iter(batch_size,features,labels):
        l=loss(net(X,w,b),y)#X和y的小批量损失
        #因为l的形状是（batch_size,1），而不是一个标量。l中的所有元素被加到一起，并以此计算关于[w,b]的梯度
        l.sum().backward()
        sgd([w,b],lr,batch_size)#使用参数的梯度以更新参数
    with torch.no_grad():
        train_l=loss(net(features,w,b),labels)
        print(f'epoch{epoch+1},loss{float(train_l.mean()):f}')

epoch1,loss0.036941
epoch2,loss0.000134
epoch3,loss0.000049

1 2	print(f'w的估计误差：{true_w-w.reshape(true_w.shape)}') print(f'b的估计误差：{true_b-b}')

w的估计误差：tensor([ 0.0002, -0.0002], grad_fn=<SubBackward0>)
b的估计误差：tensor([0.0002], grad_fn=<RsubBackward1>)

3.3 线性回归的简洁实现

#生成数据集
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

true_w=torch.tensor([2,-3.4])
true_b=4.2
features,labels=d2l.synthetic_data(true_w,true_b,1000)

#读取数据集
def load_array(data_arrays,batch_size,is_train=True): #@save
    #构造一个pytorch数据迭代器
    dataset=data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset,batch_size,shuffle=is_train)

batch_size=10
data_iter=load_array((features,labels),batch_size)

next(iter(data_iter))

[tensor([[-0.6422, -0.7470],
         [-2.1785,  0.3340],
         [ 1.4011, -1.1104],
         [-0.8083, -0.3035],
         [ 0.1077, -0.1201],
         [-0.4151, -0.1079],
         [ 1.8074,  0.0904],
         [-0.3707,  0.6197],
         [ 0.3739,  0.2972],
         [ 0.2383,  1.1791]]),
 tensor([[ 5.4457],
         [-1.3122],
         [10.7837],
         [ 3.6281],
         [ 4.8105],
         [ 3.7284],
         [ 7.5017],
         [ 1.3465],
         [ 3.9247],
         [ 0.6800]])]

#定义模型
from torch import nn#nn:神经网络
net=nn.Sequential(nn.Linear(2,1))

#初始化模型参数
net[0].weight.data.normal_(0,0.01),net[0].bias.data.fill_(0)

(tensor([[ 0.0106, -0.0055]]), tensor([0.]))

#定义损失函数
loss=nn.MSELoss()#均方误差：MSELoss类，平方L2范数
#定义优化算法
trainer=torch.optim.SGD(net.parameters(),lr=0.03)

num_epochs=3#迭代周期个数
for epoch in range(num_epochs):
    for X,y in data_iter:
        l=loss(net(X),y)#X和y的小批量损失
        trainer.zero_grad()
        l.backward()
        trainer.step()#使用参数的梯度以更新参数
    l=loss(net(features),labels)
    print(f'epoch{epoch+1},loss{l:f}')

epoch1,loss0.000223
epoch2,loss0.000112
epoch3,loss0.000112

w=net[0].weight.data
print('w的估计误差：',true_w-w.reshape(true_w.shape))
b=net[0].bias.data
print('b的估计误差：',true_b-b)

w的估计误差： tensor([ 0.0014, -0.0004])
b的估计误差： tensor([0.0008])

3.5 图像分类数据集

%matplotlib inline
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l

d2l.use_svg_display()

#通过框架内内置函数将Fashion-MNIST数据集下载并读取到内存中
#通过ToTensor实例将图像数据由PIL类型变换为32位浮点数格式，并除以255使得所有像素数值均在0-1之间
trans=transforms.ToTensor()
mnist_train=torchvision.datasets.FashionMNIST(root="../data",train=True,transform=trans,download=True)
mnist_test=torchvision.datasets.FashionMNIST(root="../data",train=False,transform=trans,download=True)

1	len(mnist_train),len(mnist_test)

(60000, 10000)

1	mnist_train[0][0].shape

torch.Size([1, 28, 28])

#在数字标签索引与文本名称之间进行转换
def get_fashion_mnist_labels(labels): #@save
    #返回Fashion-MNIST数据集的文本标签
    text_labels=['t-shirt','trouser','pullover','dress','coat','sandal','shirt','sneaker','bag','ankle boot']
    return [text_labels[int(i)] for i in labels]

#可视化样本
def show_images(imgs,num_rows,num_cols,titles=None,scale=1.5): #@save
    #绘制图像列表
    figsize=(num_cols*scale,num_rows*scale)
    _,axes=d2l.plt.subplots(num_rows,num_cols,figsize=figsize)
    axes=axes.flatten()
    for i,(ax,img) in enumerate(zip(axes,imgs)):
        if torch.is_tensor(img):
            #图片张量
            ax.imshow(img.numpy())
        else:
            #PIL图片
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes

X,y=next(iter(data.DataLoader(mnist_train,batch_size=50)))
show_images(X.reshape(50,28,28),10,5,titles=get_fashion_mnist_labels(y));

svg

#读取小批量
batch_size=256 #批量大小

def get_dataloader_workers():  #@save
    #使用4个进程来读取数据
    return 4

train_iter=data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=get_dataloader_workers())

#读取训练数据所需时间
timer=d2l.Timer()
for X,y in train_iter:
    continue
f'{timer.stop():.2f}sec'

'2.11sec'

def load_data_fashion_mnist(batch_size,resize=None): #@save
    #下载Fashion-MNIST数据集，然后将其加载到内存中
    trans=[transforms.ToTensor()]
    if resize:
        trans.insert(0,transforms.Resize(resize))
    trans=transforms.Compose(trans)
    mnist_train=torchvision.datasets.FashionMNIST(root="../data",train=True,transform=trans,download=True)
    mnist_test=torchvision.datasets.FashionMNIST(root="../data",train=False,transform=trans,download=True)
    return(data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=get_dataloader_workers()),data.DataLoader(mnist_test,batch_size,shuffle=False,num_workers=get_dataloader_workers()))

train_iter,test_iter=load_data_fashion_mnist(32,resize=64)
for X,y in train_iter:
    print(X.shape,X.dtype,y.shape,y.dtype)
    break

torch.Size([32, 1, 64, 64]) torch.float32 torch.Size([32]) torch.int64

3.6 softmax回归的从零开始实现

import torch
from IPython import display
from d2l import torch as d2l

batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)

#初始化模型参数
num_inputs=784
num_outputs=10

W=torch.normal(0,0.01,size=(num_inputs,num_outputs),requires_grad=True)#用正态分布初始化权重W
b=torch.zeros(num_outputs,requires_grad=True)#偏置b初始化为0

1 2	X=torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]]) X.sum(0,keepdim=True),X.sum(1,keepdim=True)

(tensor([[5., 7., 9.]]),
 tensor([[ 6.],
         [15.]]))

#定义softmax操作
def softmax(X):
    X_exp=torch.exp(X)
    partition=X_exp.sum(1,keepdim=True)
    return X_exp/partition #应用广播机制

X=torch.normal(0,1,(2,5))
X_prob=softmax(X)
X_prob,X_prob.sum(1)

(tensor([[0.2143, 0.0127, 0.1268, 0.2248, 0.4214],
         [0.3360, 0.2826, 0.0913, 0.1454, 0.1446]]),
 tensor([1.0000, 1.0000]))

1
2
3

#定义模型
def net(X):
    return softmax(torch.matmul(X.reshape((-1,W.shape[0])),W)+b)   #使用reshape函数将每张原始图像展平为向量

1
2
3

y=torch.tensor([0,2])
y_hat=torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y_hat[[0,1],y]

tensor([0.1000, 0.5000])

#定义交叉熵损失函数
def cross_entropy(y_hat,y):
    return - torch.log(y_hat[range(len(y_hat)),y])

cross_entropy(y_hat,y)

tensor([2.3026, 0.6931])

def accuracy(y_hat,y): #@save
    #计算正确预测的数量
    if len(y_hat.shape)>1 and y_hat.shape[1]>1:
        y_hat=y_hat.argmax(axis=1)
    cmp=y_hat.type(y.dtype)==y
    return float(cmp.type(y.dtype).sum())

accuracy(y_hat,y)/len(y)  #正确预测的概率（分类精度率）

0.5

def evaluate_accuracy(net,data_iter): #@save
    #计算在指定数据集上模型的精度
    if isinstance(net,torch.nn.Module):
        net.eval() #将模型设置为评估模式
    metric=Accumulator(2) #正确预测数、预测总数的叠加
    with torch.no_grad():
        for X,y in data_iter:
            metric.add(accuracy(net(X),y),y.numel())  #accracy(net(X),y):正确预测数；y.numel()预测总数
    return metric[0]/metric[1]

class Accumulator: #@save
    #在n个变量上累加
    def __init__(self,n):
        self.data=[0.0]*n
    
    def add(self,*args):
        self.data=[a+float(b) for a,b in zip(self.data,args)]
        
    def reset(self):
        self.data=[0.0]*len(self.data)
        
    def __getitem__(self,idx):
        return self.data[idx]
    
evaluate_accuracy(net,test_iter)

0.1326

#训练
def train_epoch_ch3(net,train_iter,loss,updater): #@save
    #训练模型一个迭代周期
    #将模型设置为训练模型
    if isinstance(net,torch.nn.Module):
        net.train()
    #训练损失总和、训练准确度总和、样本数
    metric=Accumulator(3)
    for X,y in train_iter:
        #计算梯度并更新参数
        y_hat=net(X)
        l=loss(y_hat,y)
        if isinstance(updater,torch.optim.Optimizer):
            #使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            #使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()),accuracy(y_hat,y),y.numel())
    #返回训练损失和训练精度
    return metric[0]/metric[2],metric[1]/metric[2]

class Animator:  #@save
    #在动画中绘制数据
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,ylim=None, xscale='linear', yscale='linear',fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,figsize=(3.5, 2.5)):
        # 增量地绘制多条线
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使用lambda函数捕获参数
        self.config_axes = lambda: d2l.set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
        # 向图表中添加多个数据点
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)
        
def train_ch3(net,train_iter,test_iter,loss,num_epochs,updater): #@save
    #训练模型
    animator=Animator(xlabel='epoch',xlim=[1,num_epochs],ylim=[0.3,0.9],legend=['train loss','train acc','test acc'])
    for epoch in range(num_epochs):
        train_metrics=train_epoch_ch3(net,train_iter,loss,updater)
        test_acc=evaluate_accuracy(net,test_iter)
        animator.add(epoch+1,train_metrics+(test_acc,))
    train_loss,train_acc=train_metrics
    assert train_loss<0.5,train_loss
    assert train_acc<=1 and train_acc>0.7,train_acc
    assert test_acc<=1 and test_acc>0.7,test_acc
    
lr=0.1 #学习率
def updater(batch_size):
    return d2l.sgd([W,b],lr,batch_size)#小批量随机梯度下降

num_epochs=10 #迭代周期
train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,updater)

svg

#预测
def predict_ch3(net,test_iter,n=6): #@save
    #预测标签
    for X,y in test_iter:
        break
    trues=d2l.get_fashion_mnist_labels(y)
    preds=d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles=[true+'\n'+pred for true,pred in zip(trues,preds)]
    d2l.show_images(X[0:n].reshape((n,28,28)),1,n,titles=titles[0:n])
    
predict_ch3(net,test_iter)

svg

3.7 softmax回归的简洁实现

import torch
from torch import nn
from d2l import torch as d2l

batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)

#初始化模型参数
#PyTorch不会隐式地调整输入的形状，因此，我们在线性层前定义了展平层（flatten),来调整网络输入的形状
net=nn.Sequential(nn.Flatten(),nn.Linear(784,10))

def init_weights(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,std=0.01)

net.apply(init_weights);

1	loss=nn.CrossEntropyLoss(reduction='none') #保留softmax函数，但在计算交叉熵损失函数时传递未规范化的预测并同时计算softmax及其对数以防止数值上溢或下溢

1 2	#优化算法：学习度为0.1的小批量随机梯度下降 trainer=torch.optim.SGD(net.parameters(),lr=0.1)

1
2
3

#训练
num_epochs=10
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,trainer)

svg

2023-09-27发表2025-03-01更新动手学深度学习10 分钟读完 (大约1440个字)

2.预备知识

2.1 数据操作

1
2
3

import torch
x = torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

x.shape

torch.Size([12])

x.numel()

1 2	X=x.reshape(3,4) X

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

X.shape

torch.Size([3, 4])

1 2	Y=x.reshape(-1,4) Y

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

1 2	Z=x.reshape(3,-1) Z

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

1	torch.zeros((2,3,4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

1	torch.ones((2,3,4))

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

1	torch.randn(3,4)

tensor([[ 1.6438, -1.2879,  0.2324,  0.2719],
        [-0.6636,  0.9939, -0.8435, -1.0906],
        [-0.5617,  0.2107, -0.9530,  0.7362]])

1
2
3

x=torch.tensor([1.0,2,4,8])
y=torch.tensor([2,2,2,2])
x+y,x-y,x*y,x/y,x**y

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

1	torch.exp(x)

tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])

1 2	X=torch.arange(12,dtype=torch.float32).reshape((3,4)) X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

1 2	Y=torch.tensor([[2.0,1,4,3],[1,2,3,4],[4,3,2,1]]) Y

tensor([[2., 1., 4., 3.],
        [1., 2., 3., 4.],
        [4., 3., 2., 1.]])

1	torch.cat((X,Y),dim=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

1	torch.cat((X,Y),dim=1)

tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]])

X==Y,X<Y

(tensor([[False,  True, False,  True],
         [False, False, False, False],
         [False, False, False, False]]),
 tensor([[ True, False,  True, False],
         [False, False, False, False],
         [False, False, False, False]]))

X.sum()

tensor(66.)

1
2
3

a=torch.arange(6).reshape(3,2,1)
b=torch.arange(2).reshape(1,2)
a,b

(tensor([[[0],
          [1]],
 
         [[2],
          [3]],
 
         [[4],
          [5]]]),
 tensor([[0, 1]]))

1
2

c=a+b
c

tensor([[[0, 1],
         [1, 2]],

        [[2, 3],
         [3, 4]],

        [[4, 5],
         [5, 6]]])

c[0]

tensor([[0, 1],
        [1, 2]])

1	X[-1],X[1:3]

(tensor([ 8.,  9., 10., 11.]),
 tensor([[ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]))

1 2	X[1,2]=9 X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  9.,  7.],
        [ 8.,  9., 10., 11.]])

1 2	X[0:2,:]=12 X

tensor([[12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [ 8.,  9., 10., 11.]])

1
2
3

before=id(Y)
Y=Y+X
id(Y)==before

False

1 2	Z=torch.zeros_like(Y) Z

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

1	print('id(Z):',id(Z))

id(Z): 3055861362752

1 2	Z[:]=X+Y print('id(Z):',id(Z))

id(Z): 3055861362752

1
2
3

before=id(X)
X+=Y
id(X)==before

True

1
2
3

A=X.numpy()
B=torch.tensor(A)
type(A),type(B)

(numpy.ndarray, torch.Tensor)

A,B

(array([[26., 25., 28., 27.],
        [25., 26., 27., 28.],
        [20., 21., 22., 23.]], dtype=float32),
 tensor([[26., 25., 28., 27.],
         [25., 26., 27., 28.],
         [20., 21., 22., 23.]]))

1 2	a=torch.tensor([3.5]) a,a.item(),float(a),int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

1
2
3

x = torch.arange(12)
X=x.reshape(3,2,2)
X

tensor([[[ 0,  1],
         [ 2,  3]],

        [[ 4,  5],
         [ 6,  7]],

        [[ 8,  9],
         [10, 11]]])

2.2 数据预处理

import os

os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')  # 列名
    f.write('NA,Pave,127500\n')  # 每行表示一个数据样本
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

import pandas as pd

data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000

1
2
3

inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN

1
2
3

inputs = pd.get_dummies(inputs)
#inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1

import torch

X = torch.tensor(inputs.to_numpy(dtype=float))
y = torch.tensor(outputs.to_numpy(dtype=float))
#X，y = torch.tensor(inputs.values),torch.tensor(outputs.values)
X, y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))

2.3 线性代数

import torch

x = torch.tensor(3.0)
y = torch.tensor(2.0)

x + y, x * y, x / y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

1 2	x=torch.arange(4) x

tensor([0, 1, 2, 3])

x[-1]

tensor(3)

len(x)

x.shape

torch.Size([4])

1 2	A=torch.arange(20).reshape(5,4) A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])

A.T

tensor([[ 0,  4,  8, 12, 16],
        [ 1,  5,  9, 13, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19]])

1 2	B=torch.tensor([[1,2,3],[2,0,4],[3,4,5]]) B

tensor([[1, 2, 3],
        [2, 0, 4],
        [3, 4, 5]])

B==B.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

1 2	X=torch.arange(24).reshape(2,3,4) X

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

1
2
3

A=torch.arange(20,dtype=torch.float32).reshape(5,4)
B=A.clone()
A,A+B

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  2.,  4.,  6.],
         [ 8., 10., 12., 14.],
         [16., 18., 20., 22.],
         [24., 26., 28., 30.],
         [32., 34., 36., 38.]]))

A*B

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])

1
2
3

a=2
X=torch.arange(24).reshape(2,3,4)
a+X,(a*X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

1 2	x=torch.arange(4,dtype=torch.float32) x,x.sum()

(tensor([0., 1., 2., 3.]), tensor(6.))

1	A.shape,A.sum()

(torch.Size([5, 4]), tensor(190.))

1 2	A_sum_axis0=A.sum(axis=0) A_sum_axis0,A_sum_axis0.shape

(tensor([40., 45., 50., 55.]), torch.Size([4]))

1 2	A_sum_axis1=A.sum(axis=1) A_sum_axis1,A_sum_axis1.shape

(tensor([ 6., 22., 38., 54., 70.]), torch.Size([5]))

1	A.sum(axis=[0,1])

tensor(190.)

1	A.mean(),A.sum(),A.numel(),A.sum()/A.numel()

(tensor(9.5000), tensor(190.), 20, tensor(9.5000))

1	A.mean(axis=0),A.sum(axis=0)/A.shape[0]

(tensor([ 8.,  9., 10., 11.]), tensor([ 8.,  9., 10., 11.]))

1 2	sum_A=A.sum(axis=1,keepdims=True) sum_A

tensor([[ 6.],
        [22.],
        [38.],
        [54.],
        [70.]])

A / sum_A

tensor([[0.0000, 0.1667, 0.3333, 0.5000],
        [0.1818, 0.2273, 0.2727, 0.3182],
        [0.2105, 0.2368, 0.2632, 0.2895],
        [0.2222, 0.2407, 0.2593, 0.2778],
        [0.2286, 0.2429, 0.2571, 0.2714]])

1	A.cumsum(axis=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  6.,  8., 10.],
        [12., 15., 18., 21.],
        [24., 28., 32., 36.],
        [40., 45., 50., 55.]])

1 2	y=torch.ones(4,dtype=torch.float32) x,y,torch.dot(x,y)

(tensor([0., 1., 2., 3.]), tensor([1., 1., 1., 1.]), tensor(6.))

1	torch.sum(x*y)

tensor(6.)

1	A.shape,x.shape,torch.mv(A,x)

(torch.Size([5, 4]), torch.Size([4]), tensor([ 14.,  38.,  62.,  86., 110.]))

1 2	B=torch.ones(4,3) torch.mm(A,B)

tensor([[ 6.,  6.,  6.],
        [22., 22., 22.],
        [38., 38., 38.],
        [54., 54., 54.],
        [70., 70., 70.]])

1 2	u=torch.tensor([3.0,-4.0]) torch.norm(u)

tensor(5.)

1	torch.abs(u).sum()

tensor(7.)

1	torch.norm(torch.ones((4,9)))

tensor(6.)

1 2	x=torch.arange(24,dtype=torch.float32).reshape((2,3,4)) x,len(x)

(tensor([[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.]],
 
         [[12., 13., 14., 15.],
          [16., 17., 18., 19.],
          [20., 21., 22., 23.]]]),
 2)

1	x.sum(axis=0),x.sum(axis=1),x.sum(axis=2)

(tensor([[12., 14., 16., 18.],
         [20., 22., 24., 26.],
         [28., 30., 32., 34.]]),
 tensor([[12., 15., 18., 21.],
         [48., 51., 54., 57.]]),
 tensor([[ 6., 22., 38.],
         [54., 70., 86.]]))

1 2	y=torch.arange(120,dtype=torch.float32).reshape(2,3,4,5) y

tensor([[[[  0.,   1.,   2.,   3.,   4.],
          [  5.,   6.,   7.,   8.,   9.],
          [ 10.,  11.,  12.,  13.,  14.],
          [ 15.,  16.,  17.,  18.,  19.]],

         [[ 20.,  21.,  22.,  23.,  24.],
          [ 25.,  26.,  27.,  28.,  29.],
          [ 30.,  31.,  32.,  33.,  34.],
          [ 35.,  36.,  37.,  38.,  39.]],

         [[ 40.,  41.,  42.,  43.,  44.],
          [ 45.,  46.,  47.,  48.,  49.],
          [ 50.,  51.,  52.,  53.,  54.],
          [ 55.,  56.,  57.,  58.,  59.]]],

[[[ 60., 61., 62., 63., 64.],
[ 65., 66., 67., 68., 69.],
[ 70., 71., 72., 73., 74.],
[ 75., 76., 77., 78., 79.]],

[[ 80., 81., 82., 83., 84.],
[ 85., 86., 87., 88., 89.],
[ 90., 91., 92., 93., 94.],
[ 95., 96., 97., 98., 99.]],

[[100., 101., 102., 103., 104.],
[105., 106., 107., 108., 109.],
[110., 111., 112., 113., 114.],
[115., 116., 117., 118., 119.]]]])

z=torch.linalg.norm(y)#torch.linalg.norm函数可用于求解多轴张量的类L2范数，要求张量各元素数据类型为浮点数或者复数
#z=torch.linalg.norm(input,p,dim)
#input：输入张量。它的数据类型必须是浮点型或复数型。对于复数的输入，范数使用每个元素的绝对值。注意，输入张量中元素的数据类型一定得是浮点型或者是复数哦，不然就会报错！这个就是主要变化，其次是不能使用 input.norm
#p：范数的阶数。默认是2阶—“fro”，也就是弗罗贝尼乌斯范数（Frobenius norm）。如果输入p=某个正整数，则求解对应的p阶范数。其公式为  sum(abs(x)**p)**(1./p)。
#dim：对输入的张量计算其指定维度（如dim=1，则表示计算第二个维度）上所有元素的范数。如果不对dim进行赋值，则会计算输入张量所有维度上的范数。当然如果指定维数不在输入张量的尺寸之内，将出现错误。
z

tensor(754.2015)

2.4 微积分

import numpy as np
from matplotlib_inline import backend_inline
from d2l import torch as d2l

def f(x):
    return 3*x**2-4*x

def numerical_lim(f,x,h):
    return (f(x+h)-f(x))/h

h=0.1
for i in range(5):
    print(f'h={h:.5f},numerical limit={numerical_lim(f,1,h):.5f}')
    h*=0.1

h=0.10000,numerical limit=2.30000
h=0.01000,numerical limit=2.03000
h=0.00100,numerical limit=2.00300
h=0.00010,numerical limit=2.00030
h=0.00001,numerical limit=2.00003

#三个用于图形配置的函数
def use_svg_display():   #@save
    ##@save标记可将对应函数/类/语句保存在d2l包中，以后无需定义就可以直接调用；e.g:d2l.use_svg_display()
    #使用svg格式在Jupyter中显示绘图
    backend_inline.set_matplotlib_formats('svg')

from matplotlib import pyplot as plt
def set_figsize(figsize=(3.5,2.5)):   #@save
    #设置matplotlib的图表大小
    use_svg_display()
    d2l.plt.rcParams['figure.figsize']=figsize
    
#@save
def set_axes(axes,xlabel,ylabel,xlim,ylim,xscale,yscale,legend):
    #设置matplotlib的轴
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid()
    
#plot函数：可绘制多条曲线
#@save
def plot(X,Y=None,xlabel=None,ylabel=None,legend=None,xlim=None,ylim=None,xscale='linear',yscale='linear',fmts=('-','m--','g-.','r:'),figsize=(3.5,2.5),axes=None):
    #绘制数据点
    if legend is None:
        legend=[]

    set_figsize(figsize)
    axes=axes if axes else d2l.plt.gca()
    
    #如果X有一个轴，输出True
    def has_one_axis(X):
        return(hasattr(X,"ndim") and X.ndim==1 or isinstance(X,list) and not hasattr(X[0],"__len__"))
    
    if has_one_axis(X):
        X=[X]
    if Y is None:
        X,Y=[[]]*len(X),X
    elif has_one_axis(Y):
        X=X*len(Y)
    if len(X)!=len(Y):
        X=X*len(Y)
    axes.cla()
    for x,y,fmt in zip(X,Y,fmts):
        if len(x):
            axes.plot(x,y,fmt)
        else:
            axes.plot(y,fmt)
    set_axes(axes,xlabel,ylabel,xlim,ylim,xscale,yscale,legend)

1 2	x=np.arange(0,3,0.1) plot(x,[f(x),2*x-3],'x','f(x)',legend=['f(x)','Tangent line(x=1)'])

svg

1
2
3

def g(x):
    return x**3-(1/x)
plot(x,[g(x),4*x-4],'x','g(x)',legend=['g(x)','Tangent line(x=1)'])

F:\user\Temp\ipykernel_25528\1423519574.py:2: RuntimeWarning: divide by zero encountered in true_divide
  return x**3-(1/x)

svg

2.5 自动微分

import torch

x=torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

1 2	y=2*torch.dot(x,x) y

tensor(28.)

1 2	x.requires_grad_(True) #等价于x=torch.arange(4.0,requires_grad=True) x.grad #默认值为None

1 2	y=2*torch.dot(x,x) y

tensor(28., grad_fn=<MulBackward0>)

1 2	y.backward()#通过调用反向传播函数自动计算y关于x每个分量的梯度 x.grad

tensor([ 0.,  4.,  8., 12.])

1	x.grad==4*x

tensor([True, True, True, True])

x.grad.zero_()#在默认情况下，PyTorch会累积梯度，我们需要清除之前的值

y=x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

#对非标量变量：不计算微分矩阵，而是单独计算批量中每个样本的偏导数之和
#对[非标量]调用backward需要传入一个gradient参数，该参数指定微分函数关于self的梯度
x.grad.zero_()
y=x*x
y.sum().backward()#等价于y.backward(torch.ones(len(x)))——传递1的梯度合适：只求偏导数的和
y,x.grad

(tensor([0., 1., 4., 9.], grad_fn=<MulBackward0>), tensor([0., 2., 4., 6.]))

x.grad.zero_()
y=x*x
u=y.detach()#分离变量（复制副本，保留计算结果，后续处理的u不带有y除数值外的其他性质）
z=u*x

z.sum().backward()
x.grad==u

tensor([True, True, True, True])

1
2
3

x.grad.zero_()
y.sum().backward()
x.grad==2*x

tensor([True, True, True, True])

def f(a):
    b=a*2
    while b.norm()<1000:
        b=b*2
    if b.sum()>0:
        c=b
    else:
        c=100*b
    return c

a=torch.randn(size=(),requires_grad=True)
d=f(a)
d.backward()#注意：运行backward函数会自动清除计算图；但可通过在第一次backward中加一句retain_grad=True，即d.backward(retain_graph=True)，意思为一直保留计算图

a,d,a.grad==d/a,a.grad

(tensor(0.1050, requires_grad=True),
 tensor(1719.5204, grad_fn=<MulBackward0>),
 tensor(True),
 tensor(16384.))

2.6 概率

import torch
from torch.distributions import multinomial#multinomial 多项分布
from d2l import torch as d2l

fair_probs=torch.ones([6])/6
fair_probs,multinomial.Multinomial(1,fair_probs).sample()

(tensor([0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667]),
 tensor([0., 0., 0., 0., 1., 0.]))

1	multinomial.Multinomial(10,fair_probs).sample()

tensor([2., 0., 1., 3., 1., 3.])

1 2	counts=multinomial.Multinomial(1000,fair_probs).sample()#将结果储存为float32以进行除法 counts/1000#相对频率作为估计值

tensor([0.1640, 0.1610, 0.1720, 0.1730, 0.1610, 0.1690])

counts=multinomial.Multinomial(10,fair_probs).sample((500,))
cum_counts=counts.cumsum(dim=0)#cumsum:累加函数
estimates=cum_counts/cum_counts.sum(dim=1,keepdims=True)

d2l.set_figsize((6,4.5))
for i in range(6):
    d2l.plt.plot(estimates[:,i].numpy(),label=("P(die="+str(i+1)+")"))
d2l.plt.axhline(y=0.167,color='black',linestyle='dashed')
d2l.plt.gca().set_xlabel('Groups of experiments')
d2l.plt.gca().set_ylabel('Estimated probability')
d2l.plt.legend();

svg

2.7 查阅文档

1 2	import torch print(dir(torch.distributions))

['AbsTransform', 'AffineTransform', 'Bernoulli', 'Beta', 'Binomial', 'CatTransform', 'Categorical', 'Cauchy', 'Chi2', 'ComposeTransform', 'ContinuousBernoulli', 'CorrCholeskyTransform', 'CumulativeDistributionTransform', 'Dirichlet', 'Distribution', 'ExpTransform', 'Exponential', 'ExponentialFamily', 'FisherSnedecor', 'Gamma', 'Geometric', 'Gumbel', 'HalfCauchy', 'HalfNormal', 'Independent', 'IndependentTransform', 'Kumaraswamy', 'LKJCholesky', 'Laplace', 'LogNormal', 'LogisticNormal', 'LowRankMultivariateNormal', 'LowerCholeskyTransform', 'MixtureSameFamily', 'Multinomial', 'MultivariateNormal', 'NegativeBinomial', 'Normal', 'OneHotCategorical', 'OneHotCategoricalStraightThrough', 'Pareto', 'Poisson', 'PowerTransform', 'RelaxedBernoulli', 'RelaxedOneHotCategorical', 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', 'SoftplusTransform', 'StackTransform', 'StickBreakingTransform', 'StudentT', 'TanhTransform', 'Transform', 'TransformedDistribution', 'Uniform', 'VonMises', 'Weibull', 'Wishart', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'bernoulli', 'beta', 'biject_to', 'binomial', 'categorical', 'cauchy', 'chi2', 'constraint_registry', 'constraints', 'continuous_bernoulli', 'dirichlet', 'distribution', 'exp_family', 'exponential', 'fishersnedecor', 'gamma', 'geometric', 'gumbel', 'half_cauchy', 'half_normal', 'identity_transform', 'independent', 'kl', 'kl_divergence', 'kumaraswamy', 'laplace', 'lkj_cholesky', 'log_normal', 'logistic_normal', 'lowrank_multivariate_normal', 'mixture_same_family', 'multinomial', 'multivariate_normal', 'negative_binomial', 'normal', 'one_hot_categorical', 'pareto', 'poisson', 'register_kl', 'relaxed_bernoulli', 'relaxed_categorical', 'studentT', 'transform_to', 'transformed_distribution', 'transforms', 'uniform', 'utils', 'von_mises', 'weibull', 'wishart']

1	help(torch.ones)

Help on built-in function ones in module torch:

ones(...)
    ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
    
    Returns a tensor filled with the scalar value `1`, with the shape defined
    by the variable argument :attr:`size`.
    
    Args:
        size (int...): a sequence of integers defining the shape of the output tensor.
            Can be a variable number of arguments or a collection like a list or tuple.
    
    Keyword arguments:
        out (Tensor, optional): the output tensor.
        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).
        layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.
            Default: ``torch.strided``.
        device (:class:`torch.device`, optional): the desired device of returned tensor.
            Default: if ``None``, uses the current device for the default tensor type
            (see :func:`torch.set_default_tensor_type`). :attr:`device` will be the CPU
            for CPU tensor types and the current CUDA device for CUDA tensor types.
        requires_grad (bool, optional): If autograd should record operations on the
            returned tensor. Default: ``False``.
    
    Example::
    
        >>> torch.ones(2, 3)
        tensor([[ 1.,  1.,  1.],
                [ 1.,  1.,  1.]])
    
        >>> torch.ones(5)
        tensor([ 1.,  1.,  1.,  1.,  1.])

1	torch.ones(4)

tensor([1., 1., 1., 1.])

list?

1	help(list)

Help on class list in module builtins:

class list(object)
 |  list(iterable=(), /)
 |  
 |  Built-in mutable sequence.
 |  
 |  If no argument is given, the constructor creates a new empty list.
 |  The argument must be an iterable if specified.
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __contains__(self, key, /)
 |      Return key in self.
 |  
 |  __delitem__(self, key, /)
 |      Delete self[key].
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iadd__(self, value, /)
 |      Implement self+=value.
 |  
 |  __imul__(self, value, /)
 |      Implement self*=value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __len__(self, /)
 |      Return len(self).
 |  
 |  __lt__(self, value, /)
 |      Return self<value.
 |  
 |  __mul__(self, value, /)
 |      Return self*value.
 |  
 |  __ne__(self, value, /)
 |      Return self!=value.
 |  
 |  __repr__(self, /)
 |      Return repr(self).
 |  
 |  __reversed__(self, /)
 |      Return a reverse iterator over the list.
 |  
 |  __rmul__(self, value, /)
 |      Return value*self.
 |  
 |  __setitem__(self, key, value, /)
 |      Set self[key] to value.
 |  
 |  __sizeof__(self, /)
 |      Return the size of the list in memory, in bytes.
 |  
 |  append(self, object, /)
 |      Append object to the end of the list.
 |  
 |  clear(self, /)
 |      Remove all items from list.
 |  
 |  copy(self, /)
 |      Return a shallow copy of the list.
 |  
 |  count(self, value, /)
 |      Return number of occurrences of value.
 |  
 |  extend(self, iterable, /)
 |      Extend list by appending elements from the iterable.
 |  
 |  index(self, value, start=0, stop=9223372036854775807, /)
 |      Return first index of value.
 |      
 |      Raises ValueError if the value is not present.
 |  
 |  insert(self, index, object, /)
 |      Insert object before index.
 |  
 |  pop(self, index=-1, /)
 |      Remove and return item at index (default last).
 |      
 |      Raises IndexError if list is empty or index is out of range.
 |  
 |  remove(self, value, /)
 |      Remove first occurrence of value.
 |      
 |      Raises ValueError if the value is not present.
 |  
 |  reverse(self, /)
 |      Reverse *IN PLACE*.
 |  
 |  sort(self, /, *, key=None, reverse=False)
 |      Sort the list in ascending order and return None.
 |      
 |      The sort is in-place (i.e. the list itself is modified) and stable (i.e. the
 |      order of two equal elements is maintained).
 |      
 |      If a key function is given, apply it once to each list item and sort them,
 |      ascending or descending, according to their function values.
 |      
 |      The reverse flag can be set to sort in descending order.
 |  
 |  ----------------------------------------------------------------------
 |  Class methods defined here:
 |  
 |  __class_getitem__(...) from builtins.type
 |      See PEP 585
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __hash__ = None