摘要: 本文概括地介绍 CNN 的基本原理 , 并通过阿拉伯字母分类例子具体介绍其实现过程, 理论与实践的结合体.
对于卷积神经网络 (CNN) 而言, 相信很多读者并不陌生, 该网络近年来在大多数领域都表现优异, 尤其是在计算机视觉领域中. 但是很多工作人员可能直接调用相关的深度学习工具箱搭建卷积神经网络模型, 并不清楚其中具体的原理. 本文将简单介绍卷积神经网络(CNN), 方便读者大体上了解其基本原理及实现过程, 便于后续工作中的实际应用. 本文将按以下顺序展开:
了解卷积操作
了解神经网络
数据预处理
了解 CNN
了解优化器
理解 ImageDataGenerator
进行预测并计算准确性
demo
什么是卷积?
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- import tflearn.data_utils as du
- from keras.models import Sequential
- from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
- from keras.optimizers import RMSprop
- from keras.preprocessing.image import ImageDataGenerator
- from sklearn.metrics import confusion_matrix
- train_data = pd.read_csv('../input/csvTrainImages 13440x1024.csv', header = None)
- train_label = pd.read_csv('../input/csvTrainLabel 13440x1.csv', header = None)
- test_data = pd.read_csv('../input/csvTestImages 3360x1024.csv', header = None)
- test_label = pd.read_csv('../input/csvTestLabel 3360x1.csv', header = None)
- train_data = train_data.iloc[:,:].values.astype('float32')
- train_label = train_label.iloc[:,:].values.astype('int32')-1
- test_data = test_data.iloc[:,:].values.astype('float32')
- test_label = test_label.iloc[:,:].values.astype('int32')-1
- def row_calculator(number_of_images, number_of_columns):
- if number_of_images % number_of_columns != 0:
- return (number_of_images / number_of_columns)+1
- else:
- return (number_of_images / number_of_columns)
- def display_image(x, img_size, number_of_images):
- plt.figure(figsize = (8, 7))
- if x.shape[0]> 0:
- n_samples = x.shape[0]
- x = x.reshape(n_samples, img_size, img_size)
- number_of_rows = row_calculator(number_of_images, 4)
- for i in range(number_of_images):
- plt.subplot(number_of_rows, 4, i+1)
- plt.imshow(x[i])
- train_data = train_data/255
- test_data = test_data/255
- train_data = train_data.reshape([-1, 32, 32, 1])
- test_data = test_data.reshape([-1, 32, 32, 1])
- train_data, mean1 = du.featurewise_zero_center(train_data)
- test_data, mean2 = du.featurewise_zero_center(test_data)
- recognizer = Sequential()
- recognizer.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (32,32,1)))
- recognizer.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu'))
- recognizer.add(MaxPool2D(pool_size=(2,2)))
- recognizer.add(Dropout(0.25))
- recognizer.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
- recognizer.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
- recognizer.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
- recognizer.add(Dropout(0.25))
- recognizer.add(Flatten())
- recognizer.add(Dense(units = 256, input_dim = 1024, activation = 'relu'))
- recognizer.add(Dense(units = 256, activation = "relu"))
- recognizer.add(Dropout(0.5))
- recognizer.add(Dense(28, activation = "softmax"))
- datagen = ImageDataGenerator(
- featurewise_center=False,
- samplewise_center=False,
- featurewise_std_normalization=False,
- samplewise_std_normalization=False,
- zca_whitening=False,
- rotation_range=10,
- zoom_range = 0.1,
- width_shift_range=0.1,
- height_shift_range=0.1,
- horizontal_flip=False,
- vertical_flip=False)
- datagen.fit(train_data)
- predictions = recognizer.predict(test_data)
- predictions = np.argmax(predictions,axis = 1)
- accuracy = sum(cm[i][i] for i in range(28)) / test_label.shape[0]
- print("accuracy =" + str(accuracy))
来源: https://juejin.im/entry/5b88e1abe51d4538a423e5c3