3.1 线性回归 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 %matplotlib inline import  mathimport  timeimport  numpy as  npimport  torchfrom  d2l import  torch as  d2ln=10000  a=torch.ones([n]) b=torch.ones([n]) class  Timer :          def  __init__ (self ):         self.times=[]         self.start()              def  start (self ):                  self.tik=time.time()          def  stop (self ):                  self.times.append(time.time()-self.tik)         return  self.times[-1 ]          def  avg (self ):                  return  sum (self.times)/len (self.times)          def  sum (self ):                  return  sum (self.times)          def  cumsum (self ):                  return  np.array(self.times).cumsum().tolist()      
1 2 3 4 5 c=torch.zeros(n) timer=Timer() for  i in  range (n):    c[i]=a[i]+b[i] f'{timer.stop():.5 f} sec' 
'0.14660sec'
1 2 3 timer.start() d=a+b f'{timer.stop():.5 f} sec' 
'0.00000sec'
1 2 3 4 5 6 7 8 9 10 def  normal (x,mu,sigma ):    p=1 /math.sqrt(2 *math.pi*sigma**2 )     return  p*np.exp(-0.5 /sigma**2 *(x-mu)**2 ) x=np.arange(-7 ,7 ,0.01 ) params=[(0 ,1 ),(0 ,2 ),(3 ,1 )] d2l.plot(x,[normal(x,mu,sigma)for  mu,sigma in  params],xlabel='x' ,ylabel='p(x)' ,figsize=(4.5 ,2.5 ),legend=[f'mean{mu} ,std{sigma} ' for  mu,sigma in  params]) 
3.2 线性回归的从零开始实现 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 %matplotlib inline import  randomimport  torchfrom  d2l import  torch as  d2ldef  synthetic_data (w,b,num_examples ):          X=torch.normal(0 ,1 ,(num_examples,len (w)))     y=torch.matmul(X,w)+b     y+=torch.normal(0 ,0.01 ,y.shape)     return  X,y.reshape((-1 ,1 )) true_w=torch.tensor([2 ,-3.4 ]) true_b=4.2  features,labels=synthetic_data(true_w,true_b,1000 ) print ('features:' ,features[0 ],'\nlabel:' ,labels[0 ])
features: tensor([ 0.7328, -0.5520]) 
label: tensor([7.5513])
1 2 d2l.set_figsize() d2l.plt.scatter(features[:,1 ].detach().numpy(),labels.detach().numpy(),1 ); 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 def  data_iter (batch_size,features,labels ):    num_examples=len (features)     indices=list (range (num_examples))          random.shuffle(indices)     for  i in  range (0 ,num_examples,batch_size):         batch_indices=torch.tensor(indices[i:min (i+batch_size,num_examples)])         yield  features[batch_indices],labels[batch_indices]          batch_size=10  for  X,y in  data_iter(batch_size,features,labels):    print (X,'\n' ,y)     break  
tensor([[ 0.9175, -0.1441],
        [-0.3328, -0.4237],
        [-0.1287,  1.6801],
        [ 0.8705, -0.9030],
        [-0.4966,  1.4015],
        [ 1.3378, -1.8026],
        [ 0.5129,  1.2806],
        [ 1.1026,  1.2080],
        [ 0.6151,  0.6337],
        [-0.4683, -0.4388]]) 
 tensor([[ 6.5336],
        [ 4.9801],
        [-1.7645],
        [ 9.0089],
        [-1.5652],
        [12.9979],
        [ 0.8680],
        [ 2.2893],
        [ 3.2902],
        [ 4.7695]])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 w=torch.normal(0 ,0.01 ,size=(2 ,1 ),requires_grad=True ) b=torch.zeros(1 ,requires_grad=True ) def  linreg (X,w,b ):          return  torch.matmul(X,w)+b def  squared_loss (y_hat,y ):          return (y_hat-y.reshape(y_hat.shape))**2 /2  def  sgd (params,lr,batch_size ):          with  torch.no_grad():         for  param in  params:             param-=lr*param.grad/batch_size             param.grad.zero_()              lr=0.03  num_epochs=3  net=linreg loss=squared_loss for  epoch in  range (num_epochs):    for  X,y in  data_iter(batch_size,features,labels):         l=loss(net(X,w,b),y)                  l.sum ().backward()         sgd([w,b],lr,batch_size)     with  torch.no_grad():         train_l=loss(net(features,w,b),labels)         print (f'epoch{epoch+1 } ,loss{float (train_l.mean()):f} ' ) 
epoch1,loss0.036941
epoch2,loss0.000134
epoch3,loss0.000049
1 2 print (f'w的估计误差:{true_w-w.reshape(true_w.shape)} ' )print (f'b的估计误差:{true_b-b} ' )
w的估计误差:tensor([ 0.0002, -0.0002], grad_fn=<SubBackward0>)
b的估计误差:tensor([0.0002], grad_fn=<RsubBackward1>)
3.3 线性回归的简洁实现 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 import  numpy as  npimport  torchfrom  torch.utils import  datafrom  d2l import  torch as  d2ltrue_w=torch.tensor([2 ,-3.4 ]) true_b=4.2  features,labels=d2l.synthetic_data(true_w,true_b,1000 ) def  load_array (data_arrays,batch_size,is_train=True  ):          dataset=data.TensorDataset(*data_arrays)     return  data.DataLoader(dataset,batch_size,shuffle=is_train) batch_size=10  data_iter=load_array((features,labels),batch_size) next (iter (data_iter))
[tensor([[-0.6422, -0.7470],
         [-2.1785,  0.3340],
         [ 1.4011, -1.1104],
         [-0.8083, -0.3035],
         [ 0.1077, -0.1201],
         [-0.4151, -0.1079],
         [ 1.8074,  0.0904],
         [-0.3707,  0.6197],
         [ 0.3739,  0.2972],
         [ 0.2383,  1.1791]]),
 tensor([[ 5.4457],
         [-1.3122],
         [10.7837],
         [ 3.6281],
         [ 4.8105],
         [ 3.7284],
         [ 7.5017],
         [ 1.3465],
         [ 3.9247],
         [ 0.6800]])]
1 2 3 4 5 6 from  torch import  nnnet=nn.Sequential(nn.Linear(2 ,1 )) net[0 ].weight.data.normal_(0 ,0.01 ),net[0 ].bias.data.fill_(0 ) 
(tensor([[ 0.0106, -0.0055]]), tensor([0.]))
1 2 3 4 loss=nn.MSELoss() trainer=torch.optim.SGD(net.parameters(),lr=0.03 ) 
1 2 3 4 5 6 7 8 9 num_epochs=3  for  epoch in  range (num_epochs):    for  X,y in  data_iter:         l=loss(net(X),y)         trainer.zero_grad()         l.backward()         trainer.step()     l=loss(net(features),labels)     print (f'epoch{epoch+1 } ,loss{l:f} ' ) 
epoch1,loss0.000223
epoch2,loss0.000112
epoch3,loss0.000112
1 2 3 4 w=net[0 ].weight.data print ('w的估计误差:' ,true_w-w.reshape(true_w.shape))b=net[0 ].bias.data print ('b的估计误差:' ,true_b-b)
w的估计误差: tensor([ 0.0014, -0.0004])
b的估计误差: tensor([0.0008])
3.5 图像分类数据集 1 2 3 4 5 6 7 8 %matplotlib inline import  torchimport  torchvisionfrom  torch.utils import  datafrom  torchvision import  transformsfrom  d2l import  torch as  d2ld2l.use_svg_display() 
1 2 3 4 5 trans=transforms.ToTensor() mnist_train=torchvision.datasets.FashionMNIST(root="../data" ,train=True ,transform=trans,download=True ) mnist_test=torchvision.datasets.FashionMNIST(root="../data" ,train=False ,transform=trans,download=True ) 
1 len (mnist_train),len (mnist_test)
(60000, 10000)
torch.Size([1, 28, 28])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 def  get_fashion_mnist_labels (labels ):          text_labels=['t-shirt' ,'trouser' ,'pullover' ,'dress' ,'coat' ,'sandal' ,'shirt' ,'sneaker' ,'bag' ,'ankle boot' ]     return  [text_labels[int (i)] for  i in  labels] def  show_images (imgs,num_rows,num_cols,titles=None ,scale=1.5  ):          figsize=(num_cols*scale,num_rows*scale)     _,axes=d2l.plt.subplots(num_rows,num_cols,figsize=figsize)     axes=axes.flatten()     for  i,(ax,img) in  enumerate (zip (axes,imgs)):         if  torch.is_tensor(img):                          ax.imshow(img.numpy())         else :                          ax.imshow(img)         ax.axes.get_xaxis().set_visible(False )         ax.axes.get_yaxis().set_visible(False )         if  titles:             ax.set_title(titles[i])     return  axes X,y=next (iter (data.DataLoader(mnist_train,batch_size=50 ))) show_images(X.reshape(50 ,28 ,28 ),10 ,5 ,titles=get_fashion_mnist_labels(y)); 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 batch_size=256   def  get_dataloader_workers ():           return  4  train_iter=data.DataLoader(mnist_train,batch_size,shuffle=True ,num_workers=get_dataloader_workers()) timer=d2l.Timer() for  X,y in  train_iter:    continue  f'{timer.stop():.2 f} sec' 
'2.11sec'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 def  load_data_fashion_mnist (batch_size,resize=None  ):          trans=[transforms.ToTensor()]     if  resize:         trans.insert(0 ,transforms.Resize(resize))     trans=transforms.Compose(trans)     mnist_train=torchvision.datasets.FashionMNIST(root="../data" ,train=True ,transform=trans,download=True )     mnist_test=torchvision.datasets.FashionMNIST(root="../data" ,train=False ,transform=trans,download=True )     return (data.DataLoader(mnist_train,batch_size,shuffle=True ,num_workers=get_dataloader_workers()),data.DataLoader(mnist_test,batch_size,shuffle=False ,num_workers=get_dataloader_workers())) train_iter,test_iter=load_data_fashion_mnist(32 ,resize=64 ) for  X,y in  train_iter:    print (X.shape,X.dtype,y.shape,y.dtype)     break  
torch.Size([32, 1, 64, 64]) torch.float32 torch.Size([32]) torch.int64
3.6 softmax回归的从零开始实现 1 2 3 4 5 6 import  torchfrom  IPython import  displayfrom  d2l import  torch as  d2lbatch_size=256  train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size) 
1 2 3 4 5 6 num_inputs=784  num_outputs=10  W=torch.normal(0 ,0.01 ,size=(num_inputs,num_outputs),requires_grad=True ) b=torch.zeros(num_outputs,requires_grad=True ) 
1 2 X=torch.tensor([[1.0 ,2.0 ,3.0 ],[4.0 ,5.0 ,6.0 ]]) X.sum (0 ,keepdim=True ),X.sum (1 ,keepdim=True ) 
(tensor([[5., 7., 9.]]),
 tensor([[ 6.],
         [15.]]))
1 2 3 4 5 6 7 8 9 def  softmax (X ):    X_exp=torch.exp(X)     partition=X_exp.sum (1 ,keepdim=True )     return  X_exp/partition  X=torch.normal(0 ,1 ,(2 ,5 )) X_prob=softmax(X) X_prob,X_prob.sum (1 ) 
(tensor([[0.2143, 0.0127, 0.1268, 0.2248, 0.4214],
         [0.3360, 0.2826, 0.0913, 0.1454, 0.1446]]),
 tensor([1.0000, 1.0000]))
1 2 3 def  net (X ):    return  softmax(torch.matmul(X.reshape((-1 ,W.shape[0 ])),W)+b)    
1 2 3 y=torch.tensor([0 ,2 ]) y_hat=torch.tensor([[0.1 ,0.3 ,0.6 ],[0.3 ,0.2 ,0.5 ]]) y_hat[[0 ,1 ],y] 
tensor([0.1000, 0.5000])
1 2 3 4 5 def  cross_entropy (y_hat,y ):    return  - torch.log(y_hat[range (len (y_hat)),y]) cross_entropy(y_hat,y) 
tensor([2.3026, 0.6931])
1 2 3 4 5 6 7 8 def  accuracy (y_hat,y ):          if  len (y_hat.shape)>1  and  y_hat.shape[1 ]>1 :         y_hat=y_hat.argmax(axis=1 )     cmp=y_hat.type (y.dtype)==y     return  float (cmp.type (y.dtype).sum ()) accuracy(y_hat,y)/len (y)   
0.5
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 def  evaluate_accuracy (net,data_iter ):          if  isinstance (net,torch.nn.Module):         net.eval ()      metric=Accumulator(2 )      with  torch.no_grad():         for  X,y in  data_iter:             metric.add(accuracy(net(X),y),y.numel())       return  metric[0 ]/metric[1 ] class  Accumulator :          def  __init__ (self,n ):         self.data=[0.0 ]*n          def  add (self,*args ):         self.data=[a+float (b) for  a,b in  zip (self.data,args)]              def  reset (self ):         self.data=[0.0 ]*len (self.data)              def  __getitem__ (self,idx ):         return  self.data[idx]      evaluate_accuracy(net,test_iter) 
0.1326
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 def  train_epoch_ch3 (net,train_iter,loss,updater ):               if  isinstance (net,torch.nn.Module):         net.train()          metric=Accumulator(3 )     for  X,y in  train_iter:                  y_hat=net(X)         l=loss(y_hat,y)         if  isinstance (updater,torch.optim.Optimizer):                          updater.zero_grad()             l.mean().backward()             updater.step()         else :                          l.sum ().backward()             updater(X.shape[0 ])         metric.add(float (l.sum ()),accuracy(y_hat,y),y.numel())          return  metric[0 ]/metric[2 ],metric[1 ]/metric[2 ] class  Animator :           def  __init__ (self, xlabel=None , ylabel=None , legend=None , xlim=None ,ylim=None , xscale='linear' , yscale='linear' ,fmts=('-' , 'm--' , 'g-.' , 'r:' 1 , ncols=1 ,figsize=(3.5 , 2.5  ):                  if  legend is  None :             legend = []         d2l.use_svg_display()         self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)         if  nrows * ncols == 1 :             self.axes = [self.axes, ]                  self.config_axes = lambda : d2l.set_axes(             self.axes[0 ], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)         self.X, self.Y, self.fmts = None , None , fmts     def  add (self, x, y ):                  if  not  hasattr (y, "__len__" ):             y = [y]         n = len (y)         if  not  hasattr (x, "__len__" ):             x = [x] * n         if  not  self.X:             self.X = [[] for  _ in  range (n)]         if  not  self.Y:             self.Y = [[] for  _ in  range (n)]         for  i, (a, b) in  enumerate (zip (x, y)):             if  a is  not  None  and  b is  not  None :                 self.X[i].append(a)                 self.Y[i].append(b)         self.axes[0 ].cla()         for  x, y, fmt in  zip (self.X, self.Y, self.fmts):             self.axes[0 ].plot(x, y, fmt)         self.config_axes()         display.display(self.fig)         display.clear_output(wait=True )          def  train_ch3 (net,train_iter,test_iter,loss,num_epochs,updater ):          animator=Animator(xlabel='epoch' ,xlim=[1 ,num_epochs],ylim=[0.3 ,0.9 ],legend=['train loss' ,'train acc' ,'test acc' ])     for  epoch in  range (num_epochs):         train_metrics=train_epoch_ch3(net,train_iter,loss,updater)         test_acc=evaluate_accuracy(net,test_iter)         animator.add(epoch+1 ,train_metrics+(test_acc,))     train_loss,train_acc=train_metrics     assert  train_loss<0.5 ,train_loss     assert  train_acc<=1  and  train_acc>0.7 ,train_acc     assert  test_acc<=1  and  test_acc>0.7 ,test_acc      lr=0.1   def  updater (batch_size ):    return  d2l.sgd([W,b],lr,batch_size) num_epochs=10   train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,updater) 
1 2 3 4 5 6 7 8 9 10 11 def  predict_ch3 (net,test_iter,n=6  ):          for  X,y in  test_iter:         break      trues=d2l.get_fashion_mnist_labels(y)     preds=d2l.get_fashion_mnist_labels(net(X).argmax(axis=1 ))     titles=[true+'\n' +pred for  true,pred in  zip (trues,preds)]     d2l.show_images(X[0 :n].reshape((n,28 ,28 )),1 ,n,titles=titles[0 :n])      predict_ch3(net,test_iter) 
3.7 softmax回归的简洁实现 1 2 3 4 5 6 import  torchfrom  torch import  nnfrom  d2l import  torch as  d2lbatch_size=256  train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size) 
1 2 3 4 5 6 7 8 9 net=nn.Sequential(nn.Flatten(),nn.Linear(784 ,10 )) def  init_weights (m ):    if  type (m)==nn.Linear:         nn.init.normal_(m.weight,std=0.01 ) net.apply(init_weights); 
1 loss=nn.CrossEntropyLoss(reduction='none' )  
1 2 trainer=torch.optim.SGD(net.parameters(),lr=0.1 ) 
1 2 3 num_epochs=10  d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,trainer)