通过多层感知机, 直接对样本 ASCII 进行监督学习, 隐藏层 1 层, 节点数目 50 个,
功能分析:
准确率 92.8%, 准确率还行, 实际应用有问题, 这里有坑的, 黑白样本比例对准确率影响很大, 当 1:1 的时候, 几乎无法区分黑白测试样本, 所以无特征 MLP 几乎是个笑话. 同样, 开始反思, 之前的模型对新数据的预测能力不能仅仅看准确率.
性能分析:
1 个样本耗时 0.000466 秒, 均摊 0.466ms,
10 个样本耗时 0.000619 秒, 均摊 0.06ms,
100 个样本耗时 0.000939 秒, 均摊 9.93us,
1000 个样本耗时 0.003112 秒, 均摊 3.11us
完整代码:
- import sys
- import urllib
- import numpy as np
- import tensorflow as tf
- import tflearn
- from tflearn.data_utils import to_categorical, pad_sequences
- from sklearn.model_selection import train_test_split
- from sklearn.neural_network import MLPClassifier
- import time
- NUM = 100
- def elt(line):
- x = []
- for i, c in enumerate(line):
- c = c.lower()
- x.append(ord(c))
- return x
- def load_file(filename,label,ms=[],ns=[]):
- with open(filename) as f:
- for line in f:
- line = line.strip('\n')
- line = urllib.unquote(line)
- if len(line)<= NUM:
- m = elt(line)
- if(label):
- n = 1
- else:
- n = 0
- ms.append(m)
- ns.append(n)
- print(len(ms))
- def load_files(file1,file2):
- xs = []
- ys = []
- load_file(file1,1,xs,ys)
- load_file(file2,0,xs,ys)
- return xs,ys
- def train(x,y):
- graph1 = tf.Graph()
- with graph1.as_default():
- x_train, x_test, y_train, y_test=train_test_split( x,y, test_size=0.4,random_state=0)
- x_train = pad_sequences(x_train,maxlen=NUM,value=0.)
- x_test = pad_sequences(x_test,maxlen=NUM,value=0.)
- y_train = to_categorical(y_train, nb_classes=2)
- y_test = to_categorical(y_test, nb_classes=2)
- mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
- solver='sgd', verbose=10, tol=1e-4, random_state=1,
- learning_rate_init=.1)
- mlp.fit(x_train,y_train)
- n = mlp.score(x_test, y_test)
- p1 = mlp.predict(x_test[1100:1101])
- print("Training set score: %f"%n)
- print(p1)
- if __name__ == "__main__":
- xs,ys = load_files(sys.argv[1],sys.argv[2])
- train(xs,ys)
来源: https://juejin.im/entry/5be12f9ef265da61553a4a9b