이언배 연구노트

[Python] Classification 모델들 본문

Python

[Python] Classification 모델들

이언배 2024. 12. 11. 15:33

내가 쓸 모델들은 총 4개.

 

1. Multinomial Logit Regression

 

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

######################학습, 테스트 셋 구분
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

######################Multinomial Logit Regression 수행
model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

######################평가를 위한 confusion matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)

correct_predictions = cm[0,0] + cm[1,1] + cm[2,2]
total_predictions = sum(sum(row) for row in cm)

######################평가를 위한 accuracy
accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)

######################평가를 위한 precision, recall
precision_class_0 = precision_score(y_test, y_pred, average=None)[0]
recall_class_0 = recall_score(y_test, y_pred, average=None)[0]
f1_class_0 = f1_score(y_test, y_pred, average=None)[0]

print("F1-score for class 0:", f1_class_0)

 

2. Gaussian Naive Bayesian

 

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier 
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import export_graphviz
import graphviz
from sklearn import tree
from IPython import display
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from xgboost import XGBClassifier, XGBRegressor
import shap #얘는 쓸지 안쓸지 두고보자

#############################GaussianNB 모델 선언
gNB = GaussianNB()
y_pred = gNB.fit(X_train, y_train).predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

class_names = ['NDTP', 'DTP', 'ADTP']
# Plot the confusion matrix
#plot_confusion_matrix(gNB, X_test, y_test, display_labels=class_names, cmap=plt.cm.Blues)
#plt.title("Gausian-Naive Bayesian Confusion Matrix")
#plt.show()
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)

correct_predictions = cm[0,0] + cm[1,1] + cm[2,2]
total_predictions = sum(sum(row) for row in cm)

accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)

precision_class_0 = precision_score(y_test, y_pred, average=None)[0]*100
recall_class_0 = recall_score(y_test, y_pred, average=None)[0]*100
print(precision_class_0.round(3), recall_class_0.round(3))

precision_class_1 = precision_score(y_test, y_pred, average=None)[1]*100
recall_class_1 = recall_score(y_test, y_pred, average=None)[1]*100
print(precision_class_1.round(3), recall_class_1.round(3))

precision_class_2 = precision_score(y_test, y_pred, average=None)[2]*100
recall_class_2 = recall_score(y_test, y_pred, average=None)[2]*100
print(precision_class_2.round(3), recall_class_2.round(3))

 

3. Support Vector Machine

 

from sklearn import svm
############################SVC 선언
clf = svm.SVC(decision_function_shape = 'ovo')
clf.fit(X, Y)
y_pred = clf.fit(X_train, y_train).predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)

correct_predictions = cm[0,0] + cm[1,1] + cm[2,2]
total_predictions = sum(sum(row) for row in cm)

accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)

precision_class_0 = precision_score(y_test, y_pred, average=None)[0]*100
recall_class_0 = recall_score(y_test, y_pred, average=None)[0]*100
print(precision_class_0.round(3), recall_class_0.round(3))

precision_class_1 = precision_score(y_test, y_pred, average=None)[1]*100
recall_class_1 = recall_score(y_test, y_pred, average=None)[1]*100
print(precision_class_1.round(3), recall_class_1.round(3))

precision_class_2 = precision_score(y_test, y_pred, average=None)[2]*100
recall_class_2 = recall_score(y_test, y_pred, average=None)[2]*100
print(precision_class_2.round(3), recall_class_2.round(3))

 

4. Random Forest

# Create a Random Forest classifier
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(max_depth = None, min_samples_leaf = 4, min_samples_split = 5, n_estimators = 50, random_state=42)

# Fit the model on the training data
clf.fit(X_train, y_train)

# Predict on the test data
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


## 성능 평가
print('정확도 : ', clf.score(X_test,y_test)) ## 테스트 성능 평가 점수(Accuracy)
print()

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)

correct_predictions = cm[0,0] + cm[1,1] + cm[2,2]
total_predictions = sum(sum(row) for row in cm)

accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)

precision_class_0 = precision_score(y_test, y_pred, average=None)[0]*100
recall_class_0 = recall_score(y_test, y_pred, average=None)[0]*100
print(precision_class_0.round(3), recall_class_0.round(3))

precision_class_1 = precision_score(y_test, y_pred, average=None)[1]*100
recall_class_1 = recall_score(y_test, y_pred, average=None)[1]*100
print(precision_class_1.round(3), recall_class_1.round(3))

precision_class_2 = precision_score(y_test, y_pred, average=None)[2]*100
recall_class_2 = recall_score(y_test, y_pred, average=None)[2]*100
print(precision_class_2.round(3), recall_class_2.round(3))
728x90