- import numpy as np
- from tensorflow.examples.tutorials.mnist import input_data
- import matplotlib.pyplot as plt
- mndata = input_data.read_data_sets("MNIST-data/", one_hot=True)
- X_train=mndata.train.images # training set
- y_train=mndata.train.labels
- X_test=mndata.test.images # testing set
- y_test=mndata.test.labels
- input_layer_size = 28 * 28
- hidden_layer_size = 15
- output_layer_size = 10
- reg_lambda = .01
- learning_rate = .01
- # visualize grid data of a matrix, zero cell shown as empty
- def plt_grid(data):
- fig, ax = plt.subplots()
- fig.set_size_inches(30,30)
- width, height = data.shape
- #imshow portion
- imshow_data = np.random.rand(width, height, 2)
- ax.imshow(imshow_data, cmap=plt.cm.Pastel1, interpolation='nearest')
- for x in range(0, height):
- for y in range(0, width):
- if (data[y][x]>0):
- ax.text(x, y, np.round(data[y][x],8), va='center',
- ha='center', fontsize=20)
- plt.show()
- def softmax(x):
- e_x = np.exp(x - np.max(x))
- return e_x / e_x.sum()
- def relu(x):
- return np.maximum(x, 0)
- def cross_entropy_loss(probs, y_onehot):
- indices = np.argmax(y_onehot, axis = 0).astype(int)
- predicted_prob = probs[np.arange(len(probs)), indices]
- log_preds = np.log(predicted_prob)
- loss = -1.0 * np.sum(log_preds) / len(log_preds)
- return loss
- # init weights and bias
- def init_weights_bias():
- np.random.seed(1)
- W1 = np.random.random([input_layer_size, hidden_layer_size])/5 # 784 x 15
- b1 = np.zeros((1, hidden_layer_size))
- W2 = np.random.random([hidden_layer_size, output_layer_size])/5 # 15 x 10
- b2 = np.zeros((1, output_layer_size))
- model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
- return model
- # derivative weights and bias
- def derivative_weights_bias(output_error, hidden_layer, X, model):
- W1, _, W2, _ = model['W1'], model['b1'], model['W2'], model['b2']
- hidden_error = np.dot(output_error, W2.T)
- hidden_error[hidden_layer <= 0] = 0
- # gradient layer2 weights and bias
- g2_weights = np.dot(hidden_layer.T, output_error)
- g2_bias = np.sum(output_error, axis = 0, keepdims = True)
- # gradient layer1 weights and bias
- g1_weights = np.dot(X.reshape(input_layer_size,1), hidden_error)
- g1_bias = np.sum(hidden_error, axis = 0, keepdims = True)
- # add regularization terms
g2_weights += reg_lambda * W2
g1_weights += reg_lambda * W1
- param = { 'dW1': g1_weights, 'db1': g1_bias, 'dW2': g2_weights, 'db2': g2_bias}
- return param
- def forward_propagation(X, model):
- W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
- input_layer = np.dot(X, W1)
- hidden_layer = relu(input_layer + b1)
- output_layer = np.dot(hidden_layer, W2) + b2
- probs = softmax(output_layer)
- return probs, hidden_layer
- def accuracy(predictions, labels):
- preds_correct_boolean = np.argmax(predictions, 1) == np.argmax(labels, 1)
- correct_predictions = np.sum(preds_correct_boolean)
- accuracy = 100.0 * correct_predictions / predictions.shape[0]
- return accuracy
- #predict test set
- def predict(X, model):
- W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
- input_layer = np.dot(X_test[:10000], W1)
- hidden_layer = relu(input_layer + b1)
- output_layer = np.dot(hidden_layer, W2) + b2
- probs = softmax(output_layer)
- print ('Test accuracy: {0}%'.format(accuracy(probs, y_test[:10000])))
- # - batch: Size of passes through the training data for gradient descent
- def train_model(batch, X, y):
- model = init_weights_bias()
- W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
- # Gradient descent. For each batch...
- for i in range(0, batch):
- output_probs, hidden_layer = forward_propagation(X[i], model)
- output_error = (output_probs - y[i]) / output_probs.shape[0]
- param = derivative_weights_bias(output_error, hidden_layer, X[i], model)
- dW1, db1, dW2, db2 = param['dW1'], param['db1'], param['dW2'], param['db2']
- # gradient descent parameter update
W1 -= learning_rate * dW1
b1 -= learning_rate * db1
W2 -= learning_rate * dW2
b2 -= learning_rate * db2
- model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
- loss = cross_entropy_loss(output_probs, y[i])
- if (i % 2000 == 0):
- print('loss @ %d is %f' % (i, loss))
- return model
- model = train_model(50000, X_train[:50000], y_train[:50000])
- predict(X_test[:10000], model)
来源: http://www.jianshu.com/p/9386a0c99da2