手工打造神经网络: 透视分析

import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
mndata = input_data.read_data_sets("MNIST-data/", one_hot=True)
X_train=mndata.train.images    # training set
y_train=mndata.train.labels
X_test=mndata.test.images    # testing set
y_test=mndata.test.labels
input_layer_size = 28 * 28
hidden_layer_size = 15
output_layer_size = 10
reg_lambda = .01
learning_rate = .01
# visualize grid data of a matrix, zero cell shown as empty
def plt_grid(data):
    fig, ax = plt.subplots()
    fig.set_size_inches(30,30)
    width, height = data.shape
    #imshow portion
    imshow_data = np.random.rand(width, height, 2)
    ax.imshow(imshow_data, cmap=plt.cm.Pastel1, interpolation='nearest')
    for x in range(0, height):
        for y in range(0, width):
            if (data[y][x]>0):
                ax.text(x, y, np.round(data[y][x],8), va='center',
                        ha='center', fontsize=20)
    plt.show()
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()
def relu(x):
    return np.maximum(x, 0)
def cross_entropy_loss(probs, y_onehot):
    indices = np.argmax(y_onehot, axis = 0).astype(int)
    predicted_prob = probs[np.arange(len(probs)), indices]
    log_preds = np.log(predicted_prob)
    loss = -1.0 * np.sum(log_preds) / len(log_preds)
    return loss
# init weights and bias
def init_weights_bias():
    np.random.seed(1)
    W1 = np.random.random([input_layer_size, hidden_layer_size])/5 # 784 x 15
    b1 = np.zeros((1, hidden_layer_size))
    W2 = np.random.random([hidden_layer_size, output_layer_size])/5 # 15 x 10
    b2 = np.zeros((1, output_layer_size))
    model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model
# derivative weights and bias
def derivative_weights_bias(output_error, hidden_layer, X, model):
    W1, _, W2, _ = model['W1'], model['b1'], model['W2'], model['b2']
    hidden_error = np.dot(output_error, W2.T)
    hidden_error[hidden_layer <= 0] = 0
    # gradient layer2 weights and bias
    g2_weights = np.dot(hidden_layer.T, output_error)
    g2_bias = np.sum(output_error, axis = 0, keepdims = True)
    # gradient layer1 weights and bias
    g1_weights = np.dot(X.reshape(input_layer_size,1), hidden_error)
    g1_bias = np.sum(hidden_error, axis = 0, keepdims = True)
    # add regularization terms

g2_weights += reg_lambda * W2

g1_weights += reg_lambda * W1

param = { 'dW1': g1_weights, 'db1': g1_bias, 'dW2': g2_weights, 'db2': g2_bias}
    return param
def forward_propagation(X, model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    input_layer = np.dot(X, W1)
    hidden_layer = relu(input_layer + b1)
    output_layer = np.dot(hidden_layer, W2) + b2
    probs = softmax(output_layer)
    return probs, hidden_layer
def accuracy(predictions, labels):
    preds_correct_boolean =  np.argmax(predictions, 1) == np.argmax(labels, 1)
    correct_predictions = np.sum(preds_correct_boolean)
    accuracy = 100.0 * correct_predictions / predictions.shape[0]
    return accuracy
#predict test set
def predict(X, model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    input_layer = np.dot(X_test[:10000], W1)
    hidden_layer = relu(input_layer + b1)
    output_layer = np.dot(hidden_layer, W2) + b2
    probs = softmax(output_layer)
    print ('Test accuracy: {0}%'.format(accuracy(probs, y_test[:10000])))
# - batch: Size of passes through the training data for gradient descent
def train_model(batch, X, y):
    model = init_weights_bias()
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # Gradient descent. For each batch...
    for i in range(0, batch):
        output_probs, hidden_layer = forward_propagation(X[i], model)
        output_error = (output_probs - y[i]) / output_probs.shape[0]
        param = derivative_weights_bias(output_error, hidden_layer, X[i], model)
        dW1, db1, dW2, db2 = param['dW1'], param['db1'], param['dW2'], param['db2']
        # gradient descent parameter update

W1 -= learning_rate * dW1

b1 -= learning_rate * db1

W2 -= learning_rate * dW2

b2 -= learning_rate * db2

model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        loss = cross_entropy_loss(output_probs, y[i])
        if (i % 2000 == 0):
            print('loss @ %d is %f' % (i, loss))
    return model
model = train_model(50000, X_train[:50000], y_train[:50000])
predict(X_test[:10000], model)

来源: http://www.jianshu.com/p/9386a0c99da2

与本文相关文章

暂无,快来抢沙发吧！