记得第一次接触手写数字识别数据集还在学习 TensorFlow, 各种 sess.run(), 头都绕晕了. 自从接触 pytorch 以来, 一直想写点什么. 曾经在 2017 年 5 月, Andrej Karpathy 发表的一片 Twitter, 调侃道: l've been using PyTorch a few months now, l've never felt better, l've more energy.My skin is clearer. My eye sight has improved. 确实, 使用 pytorch 以来, 确实感觉心情要好多了, 不像 TensorFlow 那样晦涩难懂. 迫不及待的用 pytorch 实战了一把 MNIST 数据集, 构建 LeNet 神经网络. 话不多说, 直接上代码!
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import torch.optim as optim
- from torchvision import datasets,transforms
- import torchvision
- from torch.autograd import Variable
- from torch.utils.data import DataLoader
- import cv2
- class LeNet(nn.Module):
- def __init__(self):
- super(LeNet, self).__init__()
- self.conv1 = nn.Sequential(
- nn.Conv2d(1, 6, 3, 1, 2),
- nn.ReLU(),
- nn.MaxPool2d(2, 2)
- )
- self.conv2 = nn.Sequential(
- nn.Conv2d(6, 16, 5),
- nn.ReLU(),
- nn.MaxPool2d(2, 2)
- )
- self.fc1 = nn.Sequential(
- nn.Linear(16 * 5 * 5, 120),
- nn.BatchNorm1d(120),
- nn.ReLU()
- )
- self.fc2 = nn.Sequential(
- nn.Linear(120, 84),
- nn.BatchNorm1d(84),# 加快收敛速度的方法 (注: 批标准化一般放在全连接层后面, 激活函数层的前面)
- nn.ReLU()
- )
- self.fc3 = nn.Linear(84, 10)
- # self.sfx = nn.Softmax()
- def forward(self, x):
- x = self.conv1(x)
- x = self.conv2(x)
- # print(x.shape)
- x = x.view(x.size()[0], -1)
- x = self.fc1(x)
- x = self.fc2(x)
- x = self.fc3(x)
- # x = self.sfx(x)
- return x
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- batch_size = 64
- LR = 0.001
- Momentum = 0.9
- # 下载数据集
- train_dataset = datasets.MNIST(root = './data/',
- train=True,
- transform = transforms.ToTensor(),
- download=False)
- test_dataset =datasets.MNIST(root = './data/',
- train=False,
- transform=transforms.ToTensor(),
- download=False)
- # 建立一个数据迭代器
- train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
- batch_size = batch_size,
- shuffle = True)
- test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
- batch_size = batch_size,
- shuffle = False)
- # 实现单张图片可视化
- # images,labels = next(iter(train_loader))
- # img = torchvision.utils.make_grid(images)
- # img = img.numpy().transpose(1,2,0)
- # # img.shape
- # std = [0.5,0.5,0.5]
- # mean = [0.5,0.5,0.5]
- # img = img*std +mean
- # cv2.imshow('win',img)
- # key_pressed = cv2.waitKey(0)
- net = LeNet().to(device)
- criterion = nn.CrossEntropyLoss()# 定义损失函数
- optimizer = optim.SGD.NET.parameters(),lr=LR,momentum=Momentum)
- epoch = 1
- if __name__ == '__main__':
- for epoch in range(epoch):
- sum_loss = 0.0
- for i, data in enumerate(train_loader):
- inputs, labels = data
- inputs, labels = Variable(inputs).cuda(), Variable(labels).cuda()
- optimizer.zero_grad()# 将梯度归零
- outputs = net(inputs)# 将数据传入网络进行前向运算
- loss = criterion(outputs, labels)# 得到损失函数
- loss.backward()# 反向传播
- optimizer.step()# 通过梯度做一步参数更新
- # print(loss)
- sum_loss += loss.item()
- if i % 100 == 99:
- print('[%d,%d] loss:%.03f' % (epoch + 1, i + 1, sum_loss / 100))
- sum_loss = 0.0
- #验证测试集
- net.eval()# 将模型变换为测试模式
- correct = 0
- total = 0
- for data_test in test_loader:
- images, labels = data_test
- images, labels = Variable(images).cuda(), Variable(labels).cuda()
- output_test = net(images)
- # print("output_test:",output_test.shape)
- _, predicted = torch.max(output_test, 1)# 此处的 predicted 获取的是最大值的下标
- # print("predicted:",predicted.shape)
- total += labels.size(0)
- correct += (predicted == labels).sum()
- print("correct1:",correct)
- print("Test acc: {0}".format(correct.item() / len(test_dataset)))#.CPU().numpy()
本次识别手写数字, 只做了 1 个 epoch,train_loss:0.250, 测试集上的准确率: 0.9685, 相当不错的结果.
来源: https://www.cnblogs.com/shenpings1314/p/10463647.html