- # coding: utf-8
- """
- """
- import numpy as np
- import pandas as pd
- import lightgbm as lgb
- # SVC 向量机
- from sklearn.ensemble import RandomForestClassifier
- # 训练测试数据分割
- # 准确率
- # from keras.utils import to_categorical
- from sklearn.metrics import accuracy_score
- from sklearn.model_selection import train_test_split
- from sklearn.neural_network import MLPClassifier
- from sklearn.preprocessing import LabelEncoder
- import matplotlib.pyplot as plt
- from sklearn.svm import SVC
- import matplotlib as mpl
- from xgboost import XGBClassifier
- from sklearn import naive_bayes
- # MultiLabelBinarizer
- # LabelBinarizer
- mpl.rcParams['font.sans-serif'] = [u'SimHei']
- mpl.rcParams['axes.unicode_minus'] = False
- encoder = LabelEncoder()
- # lb = LabelBinarizer()
- def load_data():
- raw_data = np.loadtxt("./iris.txt", dtype='str', encoding='utf-8')
- x, y = list(), list()
- for index in range(len(raw_data)):
- x.append([float(i) for i in raw_data[index].split(",")[:-1]])
- y.append(raw_data[index].split(",")[-1])
- x = np.array(x)
- # y = encoder.fit_transform(_y)
- # print(lb.fit_transform(y))
- y = pd.Categorical(y).codes
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=33)
- return x_train, x_test, y_train, y_test
- if __name__ == '__main__':
- x_train, x_test, y_train, y_test = load_data()
- forest = RandomForestClassifier(n_estimators=10)
- clf1 = forest.fit(x_train, y_train)
- svm = SVC(C=1, kernel='poly')
- clf2 = svm.fit(x_train, y_train)
- model = XGBClassifier()
- clf3 = model.fit(x_train, y_train)
- lgb_train = lgb.Dataset(x_train, y_train)
- params = {'max_depth': 6, 'min_data_in_leaf': 20, 'num_leaves': 35, 'learning_rate': 0.1, 'lambda_l1': 0.1,
- 'lambda_l2': 0.2, 'objective': 'multiclass', 'num_class': 50, 'verbose': -1}
- clf4 = lgb.train(params, lgb_train, num_boost_round=1000, verbose_eval=100)
- nb = naive_bayes.MultinomialNB(alpha=1.0, fit_prior=True, class_prior=None)
- clf5 = nb.fit(x_train, y_train)
- mlp = MLPClassifier(activation='relu', solver='adam', alpha=0.0001)
- clf6 = mlp.fit(x_train, y_train)
- # classfier = bayes_classfier.navie_bayes_classifier()
- x_plot = ["forest", "svm", "xg", "lgb", "nb", "mlp"]
- clf1_accuracy = accuracy_score(y_test, clf1.predict(x_test))
- clf2_accuracy = accuracy_score(y_test, clf2.predict(x_test))
- clf3_accuracy = accuracy_score(y_test, clf3.predict(x_test))
- result = clf4.predict(x_test, num_iteration=clf4.best_iteration)
- clf4_accuracy = accuracy_score(y_test, np.argmax(result, axis=1))
- clf5_accuracy = accuracy_score(y_test, clf5.predict(x_test))
- clf6_accuracy = accuracy_score(y_test, clf6.predict(x_test))
- print(clf5.predict(x_test))
- y_plot = [clf1_accuracy, clf2_accuracy, clf3_accuracy, clf4_accuracy, clf5_accuracy, clf6_accuracy]
- print(clf1_accuracy, clf2_accuracy, clf3_accuracy, clf4_accuracy, clf5_accuracy, clf6_accuracy)
- plt.bar(x_plot, y_plot, label=u'测试集准确率')
- plt.legend()
- plt.show()
- from sklearn.model_selection import cross_val_score
- scores = cross_val_score(clf6, x_test, y_test, cv=5)
- print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
- # print(scores)
来源: http://www.bubuko.com/infodetail-3368023.html