3.1 线性回归 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 %matplotlib inline import mathimport timeimport numpy as npimport torchfrom d2l import torch as d2ln=10000 a=torch.ones([n]) b=torch.ones([n]) class Timer : def __init__ (self ): self.times=[] self.start() def start (self ): self.tik=time.time() def stop (self ): self.times.append(time.time()-self.tik) return self.times[-1 ] def avg (self ): return sum (self.times)/len (self.times) def sum (self ): return sum (self.times) def cumsum (self ): return np.array(self.times).cumsum().tolist()
1 2 3 4 5 c=torch.zeros(n) timer=Timer() for i in range (n): c[i]=a[i]+b[i] f'{timer.stop():.5 f} sec'
'0.14660sec'
1 2 3 timer.start() d=a+b f'{timer.stop():.5 f} sec'
'0.00000sec'
1 2 3 4 5 6 7 8 9 10 def normal (x,mu,sigma ): p=1 /math.sqrt(2 *math.pi*sigma**2 ) return p*np.exp(-0.5 /sigma**2 *(x-mu)**2 ) x=np.arange(-7 ,7 ,0.01 ) params=[(0 ,1 ),(0 ,2 ),(3 ,1 )] d2l.plot(x,[normal(x,mu,sigma)for mu,sigma in params],xlabel='x' ,ylabel='p(x)' ,figsize=(4.5 ,2.5 ),legend=[f'mean{mu} ,std{sigma} ' for mu,sigma in params])
3.2 线性回归的从零开始实现 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 %matplotlib inline import randomimport torchfrom d2l import torch as d2ldef synthetic_data (w,b,num_examples ): X=torch.normal(0 ,1 ,(num_examples,len (w))) y=torch.matmul(X,w)+b y+=torch.normal(0 ,0.01 ,y.shape) return X,y.reshape((-1 ,1 )) true_w=torch.tensor([2 ,-3.4 ]) true_b=4.2 features,labels=synthetic_data(true_w,true_b,1000 ) print ('features:' ,features[0 ],'\nlabel:' ,labels[0 ])
features: tensor([ 0.7328, -0.5520])
label: tensor([7.5513])
1 2 d2l.set_figsize() d2l.plt.scatter(features[:,1 ].detach().numpy(),labels.detach().numpy(),1 );
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 def data_iter (batch_size,features,labels ): num_examples=len (features) indices=list (range (num_examples)) random.shuffle(indices) for i in range (0 ,num_examples,batch_size): batch_indices=torch.tensor(indices[i:min (i+batch_size,num_examples)]) yield features[batch_indices],labels[batch_indices] batch_size=10 for X,y in data_iter(batch_size,features,labels): print (X,'\n' ,y) break
tensor([[ 0.9175, -0.1441],
[-0.3328, -0.4237],
[-0.1287, 1.6801],
[ 0.8705, -0.9030],
[-0.4966, 1.4015],
[ 1.3378, -1.8026],
[ 0.5129, 1.2806],
[ 1.1026, 1.2080],
[ 0.6151, 0.6337],
[-0.4683, -0.4388]])
tensor([[ 6.5336],
[ 4.9801],
[-1.7645],
[ 9.0089],
[-1.5652],
[12.9979],
[ 0.8680],
[ 2.2893],
[ 3.2902],
[ 4.7695]])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 w=torch.normal(0 ,0.01 ,size=(2 ,1 ),requires_grad=True ) b=torch.zeros(1 ,requires_grad=True ) def linreg (X,w,b ): return torch.matmul(X,w)+b def squared_loss (y_hat,y ): return (y_hat-y.reshape(y_hat.shape))**2 /2 def sgd (params,lr,batch_size ): with torch.no_grad(): for param in params: param-=lr*param.grad/batch_size param.grad.zero_() lr=0.03 num_epochs=3 net=linreg loss=squared_loss for epoch in range (num_epochs): for X,y in data_iter(batch_size,features,labels): l=loss(net(X,w,b),y) l.sum ().backward() sgd([w,b],lr,batch_size) with torch.no_grad(): train_l=loss(net(features,w,b),labels) print (f'epoch{epoch+1 } ,loss{float (train_l.mean()):f} ' )
epoch1,loss0.036941
epoch2,loss0.000134
epoch3,loss0.000049
1 2 print (f'w的估计误差:{true_w-w.reshape(true_w.shape)} ' )print (f'b的估计误差:{true_b-b} ' )
w的估计误差:tensor([ 0.0002, -0.0002], grad_fn=<SubBackward0>)
b的估计误差:tensor([0.0002], grad_fn=<RsubBackward1>)
3.3 线性回归的简洁实现 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 import numpy as npimport torchfrom torch.utils import datafrom d2l import torch as d2ltrue_w=torch.tensor([2 ,-3.4 ]) true_b=4.2 features,labels=d2l.synthetic_data(true_w,true_b,1000 ) def load_array (data_arrays,batch_size,is_train=True ): dataset=data.TensorDataset(*data_arrays) return data.DataLoader(dataset,batch_size,shuffle=is_train) batch_size=10 data_iter=load_array((features,labels),batch_size) next (iter (data_iter))
[tensor([[-0.6422, -0.7470],
[-2.1785, 0.3340],
[ 1.4011, -1.1104],
[-0.8083, -0.3035],
[ 0.1077, -0.1201],
[-0.4151, -0.1079],
[ 1.8074, 0.0904],
[-0.3707, 0.6197],
[ 0.3739, 0.2972],
[ 0.2383, 1.1791]]),
tensor([[ 5.4457],
[-1.3122],
[10.7837],
[ 3.6281],
[ 4.8105],
[ 3.7284],
[ 7.5017],
[ 1.3465],
[ 3.9247],
[ 0.6800]])]
1 2 3 4 5 6 from torch import nnnet=nn.Sequential(nn.Linear(2 ,1 )) net[0 ].weight.data.normal_(0 ,0.01 ),net[0 ].bias.data.fill_(0 )
(tensor([[ 0.0106, -0.0055]]), tensor([0.]))
1 2 3 4 loss=nn.MSELoss() trainer=torch.optim.SGD(net.parameters(),lr=0.03 )
1 2 3 4 5 6 7 8 9 num_epochs=3 for epoch in range (num_epochs): for X,y in data_iter: l=loss(net(X),y) trainer.zero_grad() l.backward() trainer.step() l=loss(net(features),labels) print (f'epoch{epoch+1 } ,loss{l:f} ' )
epoch1,loss0.000223
epoch2,loss0.000112
epoch3,loss0.000112
1 2 3 4 w=net[0 ].weight.data print ('w的估计误差:' ,true_w-w.reshape(true_w.shape))b=net[0 ].bias.data print ('b的估计误差:' ,true_b-b)
w的估计误差: tensor([ 0.0014, -0.0004])
b的估计误差: tensor([0.0008])
3.5 图像分类数据集 1 2 3 4 5 6 7 8 %matplotlib inline import torchimport torchvisionfrom torch.utils import datafrom torchvision import transformsfrom d2l import torch as d2ld2l.use_svg_display()
1 2 3 4 5 trans=transforms.ToTensor() mnist_train=torchvision.datasets.FashionMNIST(root="../data" ,train=True ,transform=trans,download=True ) mnist_test=torchvision.datasets.FashionMNIST(root="../data" ,train=False ,transform=trans,download=True )
1 len (mnist_train),len (mnist_test)
(60000, 10000)
torch.Size([1, 28, 28])
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 def get_fashion_mnist_labels (labels ): text_labels=['t-shirt' ,'trouser' ,'pullover' ,'dress' ,'coat' ,'sandal' ,'shirt' ,'sneaker' ,'bag' ,'ankle boot' ] return [text_labels[int (i)] for i in labels] def show_images (imgs,num_rows,num_cols,titles=None ,scale=1.5 ): figsize=(num_cols*scale,num_rows*scale) _,axes=d2l.plt.subplots(num_rows,num_cols,figsize=figsize) axes=axes.flatten() for i,(ax,img) in enumerate (zip (axes,imgs)): if torch.is_tensor(img): ax.imshow(img.numpy()) else : ax.imshow(img) ax.axes.get_xaxis().set_visible(False ) ax.axes.get_yaxis().set_visible(False ) if titles: ax.set_title(titles[i]) return axes X,y=next (iter (data.DataLoader(mnist_train,batch_size=50 ))) show_images(X.reshape(50 ,28 ,28 ),10 ,5 ,titles=get_fashion_mnist_labels(y));
1 2 3 4 5 6 7 8 9 10 11 12 13 14 batch_size=256 def get_dataloader_workers (): return 4 train_iter=data.DataLoader(mnist_train,batch_size,shuffle=True ,num_workers=get_dataloader_workers()) timer=d2l.Timer() for X,y in train_iter: continue f'{timer.stop():.2 f} sec'
'2.11sec'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 def load_data_fashion_mnist (batch_size,resize=None ): trans=[transforms.ToTensor()] if resize: trans.insert(0 ,transforms.Resize(resize)) trans=transforms.Compose(trans) mnist_train=torchvision.datasets.FashionMNIST(root="../data" ,train=True ,transform=trans,download=True ) mnist_test=torchvision.datasets.FashionMNIST(root="../data" ,train=False ,transform=trans,download=True ) return (data.DataLoader(mnist_train,batch_size,shuffle=True ,num_workers=get_dataloader_workers()),data.DataLoader(mnist_test,batch_size,shuffle=False ,num_workers=get_dataloader_workers())) train_iter,test_iter=load_data_fashion_mnist(32 ,resize=64 ) for X,y in train_iter: print (X.shape,X.dtype,y.shape,y.dtype) break
torch.Size([32, 1, 64, 64]) torch.float32 torch.Size([32]) torch.int64
3.6 softmax回归的从零开始实现 1 2 3 4 5 6 import torchfrom IPython import displayfrom d2l import torch as d2lbatch_size=256 train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
1 2 3 4 5 6 num_inputs=784 num_outputs=10 W=torch.normal(0 ,0.01 ,size=(num_inputs,num_outputs),requires_grad=True ) b=torch.zeros(num_outputs,requires_grad=True )
1 2 X=torch.tensor([[1.0 ,2.0 ,3.0 ],[4.0 ,5.0 ,6.0 ]]) X.sum (0 ,keepdim=True ),X.sum (1 ,keepdim=True )
(tensor([[5., 7., 9.]]),
tensor([[ 6.],
[15.]]))
1 2 3 4 5 6 7 8 9 def softmax (X ): X_exp=torch.exp(X) partition=X_exp.sum (1 ,keepdim=True ) return X_exp/partition X=torch.normal(0 ,1 ,(2 ,5 )) X_prob=softmax(X) X_prob,X_prob.sum (1 )
(tensor([[0.2143, 0.0127, 0.1268, 0.2248, 0.4214],
[0.3360, 0.2826, 0.0913, 0.1454, 0.1446]]),
tensor([1.0000, 1.0000]))
1 2 3 def net (X ): return softmax(torch.matmul(X.reshape((-1 ,W.shape[0 ])),W)+b)
1 2 3 y=torch.tensor([0 ,2 ]) y_hat=torch.tensor([[0.1 ,0.3 ,0.6 ],[0.3 ,0.2 ,0.5 ]]) y_hat[[0 ,1 ],y]
tensor([0.1000, 0.5000])
1 2 3 4 5 def cross_entropy (y_hat,y ): return - torch.log(y_hat[range (len (y_hat)),y]) cross_entropy(y_hat,y)
tensor([2.3026, 0.6931])
1 2 3 4 5 6 7 8 def accuracy (y_hat,y ): if len (y_hat.shape)>1 and y_hat.shape[1 ]>1 : y_hat=y_hat.argmax(axis=1 ) cmp=y_hat.type (y.dtype)==y return float (cmp.type (y.dtype).sum ()) accuracy(y_hat,y)/len (y)
0.5
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 def evaluate_accuracy (net,data_iter ): if isinstance (net,torch.nn.Module): net.eval () metric=Accumulator(2 ) with torch.no_grad(): for X,y in data_iter: metric.add(accuracy(net(X),y),y.numel()) return metric[0 ]/metric[1 ] class Accumulator : def __init__ (self,n ): self.data=[0.0 ]*n def add (self,*args ): self.data=[a+float (b) for a,b in zip (self.data,args)] def reset (self ): self.data=[0.0 ]*len (self.data) def __getitem__ (self,idx ): return self.data[idx] evaluate_accuracy(net,test_iter)
0.1326
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 def train_epoch_ch3 (net,train_iter,loss,updater ): if isinstance (net,torch.nn.Module): net.train() metric=Accumulator(3 ) for X,y in train_iter: y_hat=net(X) l=loss(y_hat,y) if isinstance (updater,torch.optim.Optimizer): updater.zero_grad() l.mean().backward() updater.step() else : l.sum ().backward() updater(X.shape[0 ]) metric.add(float (l.sum ()),accuracy(y_hat,y),y.numel()) return metric[0 ]/metric[2 ],metric[1 ]/metric[2 ] class Animator : def __init__ (self, xlabel=None , ylabel=None , legend=None , xlim=None ,ylim=None , xscale='linear' , yscale='linear' ,fmts=('-' , 'm--' , 'g-.' , 'r:' ), nrows=1 , ncols=1 ,figsize=(3.5 , 2.5 ) ): if legend is None : legend = [] d2l.use_svg_display() self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize) if nrows * ncols == 1 : self.axes = [self.axes, ] self.config_axes = lambda : d2l.set_axes( self.axes[0 ], xlabel, ylabel, xlim, ylim, xscale, yscale, legend) self.X, self.Y, self.fmts = None , None , fmts def add (self, x, y ): if not hasattr (y, "__len__" ): y = [y] n = len (y) if not hasattr (x, "__len__" ): x = [x] * n if not self.X: self.X = [[] for _ in range (n)] if not self.Y: self.Y = [[] for _ in range (n)] for i, (a, b) in enumerate (zip (x, y)): if a is not None and b is not None : self.X[i].append(a) self.Y[i].append(b) self.axes[0 ].cla() for x, y, fmt in zip (self.X, self.Y, self.fmts): self.axes[0 ].plot(x, y, fmt) self.config_axes() display.display(self.fig) display.clear_output(wait=True ) def train_ch3 (net,train_iter,test_iter,loss,num_epochs,updater ): animator=Animator(xlabel='epoch' ,xlim=[1 ,num_epochs],ylim=[0.3 ,0.9 ],legend=['train loss' ,'train acc' ,'test acc' ]) for epoch in range (num_epochs): train_metrics=train_epoch_ch3(net,train_iter,loss,updater) test_acc=evaluate_accuracy(net,test_iter) animator.add(epoch+1 ,train_metrics+(test_acc,)) train_loss,train_acc=train_metrics assert train_loss<0.5 ,train_loss assert train_acc<=1 and train_acc>0.7 ,train_acc assert test_acc<=1 and test_acc>0.7 ,test_acc lr=0.1 def updater (batch_size ): return d2l.sgd([W,b],lr,batch_size) num_epochs=10 train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,updater)
1 2 3 4 5 6 7 8 9 10 11 def predict_ch3 (net,test_iter,n=6 ): for X,y in test_iter: break trues=d2l.get_fashion_mnist_labels(y) preds=d2l.get_fashion_mnist_labels(net(X).argmax(axis=1 )) titles=[true+'\n' +pred for true,pred in zip (trues,preds)] d2l.show_images(X[0 :n].reshape((n,28 ,28 )),1 ,n,titles=titles[0 :n]) predict_ch3(net,test_iter)
3.7 softmax回归的简洁实现 1 2 3 4 5 6 import torchfrom torch import nnfrom d2l import torch as d2lbatch_size=256 train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
1 2 3 4 5 6 7 8 9 net=nn.Sequential(nn.Flatten(),nn.Linear(784 ,10 )) def init_weights (m ): if type (m)==nn.Linear: nn.init.normal_(m.weight,std=0.01 ) net.apply(init_weights);
1 loss=nn.CrossEntropyLoss(reduction='none' )
1 2 trainer=torch.optim.SGD(net.parameters(),lr=0.1 )
1 2 3 num_epochs=10 d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,trainer)