- # coding: utf-8
- # In[1]:
- import pandas as pd
- import numpy as np
- from sklearn import tree
- from sklearn.svm import SVC
- from sklearn.grid_search import GridSearchCV
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import classification_report, confusion_matrix
- from sklearn.preprocessing import binarize
- from sklearn.preprocessing import LabelEncoder
- from sklearn.preprocessing import OneHotEncoder
- from sklearn.preprocessing import Normalizer
- from sklearn.metrics import f1_score
- from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc
- from imblearn.over_sampling import SMOTE
- # In[37]:
- data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")
- data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors='coerce')
- data.Target = data.Target.astype("category")
- for i in range(7,25):
- med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
- data.iloc[:,i] = data.iloc[:,i].fillna(med)
- nz = Normalizer()
- data.iloc[:,17:19]=pd.DataFrame(nz.fit_transform(data.iloc[:,17:19]),columns=data.iloc[:,17:19].columns)
- data.iloc[:,7:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,7:10]),columns=data.iloc[:,7:10].columns)
- data.to_csv(r"D:\Users\sgg91044\Desktop\impution\AEM214_imputed_normalized.csv")
- # In[2]:
- data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")
- data.head()
- # In[3]:
- data.iloc[:,5:23] = data.iloc[:,5:23].apply(pd.to_numeric,errors='coerce')
- data.Target = data.Target.astype("category")
- # In[4]:
- Y = data.Target
- X = data.drop(columns='Target')
- # In[5]:
- X=X.drop(columns=['slotid','Recipe_Name','defect_count'])
- # In[6]:
- X
- # In[7]:
- X_train, X_test, y_train, y_test = train_test_split(
- X, Y, test_size=0.2, random_state=0)
- # In[8]:
- sm = SMOTE(random_state=12, ratio = 1.0)
- x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
- # In[9]:
- print(y_train.value_counts(), np.bincount(y_train_smote))
- # In[10]:
- from sklearn.ensemble import RandomForestClassifier
- # Make the random forest classifier
- random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
- # In[11]:
- # Train on the training data
- random_forest.fit(x_train_smote,y_train_smote)
- # In[ ]:
- # Make predictions on the test data
- y_pred = random_forest.predict_proba(X_test)
- # In[13]:
- print(classification_report(y_pred=y_pred,y_true=y_test))
- # In[14]:
- f1_score(y_pred=y_pred,y_true=y_test)
- # In[15]:
- print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
- print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")
- # In[16]:
- print(confusion_matrix(y_pred=y_pred,y_true=y_test))
- # In[21]:
- svc=SVC(kernel='poly',degree=2,gamma=1,coef0=0)
- # In[ ]:
- svc.fit(x_train_smote,y_train_smote)
- # In[ ]:
- from sklearn.neural_network import MLPClassifier
- mlp = MLPClassifier(activation='relu', solver='adam', alpha=0.0001)
- # In[17]:
- tuned_parameters = [{
- 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
- 'C': [1, 10, 100, 1000]
- },
- {
- 'kernel': ['linear'], 'C': [1, 10, 100, 1000]
- },
- {
- 'kernel':['poly'],'degree':[2,3,5]
- }]
- clf = GridSearchCV(SVC(),param_grid=tuned_parameters,cv=3,scoring='recall',verbose=True)
- clf.fit(x_train_smote,y_train_smote)
- # In[18]:
- data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling1.csv")
- data.iloc[:,7:26] = data.iloc[:,7:26].apply(pd.to_numeric,errors='coerce')
- data.Target = data.Target.astype("category")
- data.eqpid = data.eqpid.astype("category")
- Y = data.Target
- X = data.drop(columns='Target')
- X=X.drop(columns=['eqpid','lotid','Chamber','slotid','Step','Recipie_Name','defect_count'])
- X_train, X_test, y_train, y_test = train_test_split(
- X, Y, test_size=0.2, random_state=0)
- sm = SMOTE(random_state=12, ratio = 1.0)
- x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
- print(y_train.value_counts(), np.bincount(y_train_smote))
- from sklearn.ensemble import RandomForestClassifier
- # Make the random forest classifier
- random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
- # Train on the training data
- random_forest.fit(x_train_smote,y_train_smote)
- # In[19]:
- # Make predictions on the test data
- y_pred = random_forest.predict(X_test)
- print(classification_report(y_pred=y_pred,y_true=y_test))
- # In[20]:
- print(confusion_matrix(y_pred=y_pred,y_true=y_test))
- # In[21]:
- f1_score(y_pred=y_pred,y_true=y_test)
- # In[22]:
- print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
- print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")
- # In[71]:
- data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling3.csv")
- data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors='coerce')
- data.Target = data.Target.astype("category")
- Y = data.Target
- X = data.drop(columns='Target')
- X=X.drop(columns=['eqpid','lotid','Chamber','slotid','Step','Recipie_Name','defect_count'])
- X_train, X_test, y_train, y_test = train_test_split(
- X, Y, test_size=0.2, random_state=0)
- sm = SMOTE(random_state=12, ratio = 1.0)
- x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
- print(y_train.value_counts(), np.bincount(y_train_smote))
- from sklearn.ensemble import RandomForestClassifier
- # Make the random forest classifier
- random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
- # Train on the training data
- random_forest.fit(x_train_smote,y_train_smote)
- # In[72]:
- # Make predictions on the test data
- y_pred = random_forest.predict(X_test)
- print(classification_report(y_pred=y_pred,y_true=y_test))
- # In[53]:
- f1_score(y_pred=y_pred,y_true=y_test)
- # In[54]:
- print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
- print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")
- # In[55]:
- data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling2.csv")
- data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors='coerce')
- data.Target = data.Target.astype("category")
- Y = data.Target
- X = data.drop(columns='Target')
- X=X.drop(columns=['eqpid','lotid','Chamber','slotid','Step','Recipie_Name','defect_count'])
- X_train, X_test, y_train, y_test = train_test_split(
- X, Y, test_size=0.2, random_state=0)
- sm = SMOTE(random_state=12, ratio = 1.0)
- x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
- print(y_train.value_counts(), np.bincount(y_train_smote))
- from sklearn.ensemble import RandomForestClassifier
- # Make the random forest classifier
- random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
- # Train on the training data
- random_forest.fit(x_train_smote,y_train_smote)
- # In[57]:
- # Make predictions on the test data
- y_pred = random_forest.predict(X_test)
- print(classification_report(y_pred=y_pred,y_true=y_test))
- # In[58]:
- f1_score(y_pred=y_pred,y_true=y_test)
- # In[59]:
- print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
- print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")
- # In[ ]:
- import flask
来源: http://www.bubuko.com/infodetail-2889745.html