generate2D.py (2740B)
1 import pandas as pd 2 import numpy as np 3 import matplotlib.pyplot as plt 4 from sklearn.tree import DecisionTreeClassifier, plot_tree 5 from sklearn.metrics import accuracy_score 6 import os 7 8 # Create output directory 9 os.makedirs("images", exist_ok=True) 10 11 # Dark mode settings using matplotlib only 12 # plt.rcParams.update({ 13 # "axes.facecolor": "#000000", 14 # "axes.edgecolor": "#333333", 15 # "figure.facecolor": "#000000", 16 # "savefig.facecolor": "#000000", 17 # "text.color": "white", 18 # "axes.labelcolor": "white", 19 # "xtick.color": "white", 20 # "ytick.color": "white", 21 # "grid.color": "gray", 22 # "axes.grid": True 23 # }) 24 25 # Load dataset 26 df = pd.read_csv('./diabetes.csv') 27 28 # Extract features and target 29 X_bmi = df['BMI'].to_numpy() 30 X_glucose = df['Glucose'].to_numpy() 31 y = df['Outcome'].to_numpy() 32 33 # Combine features into a 2D array 34 data = np.column_stack((X_bmi, X_glucose)) 35 36 # Create meshgrid for decision region plotting 37 xx, yy = np.meshgrid(np.linspace(0, 70, 500), np.linspace(0, 200, 500)) 38 grid = np.c_[xx.ravel(), yy.ravel()] 39 40 # Plot the original data 41 plt.figure(figsize=(12, 6)) 42 plt.scatter(X_bmi, X_glucose, c=y, cmap='bwr', s=60, edgecolors="#000000") 43 plt.xlabel("BMI") 44 plt.ylabel("Glucose") 45 plt.title("Diabetes Dataset: BMI vs Glucose") 46 plt.xlim(0, 70) 47 plt.ylim(0, 200) 48 plt.tight_layout() 49 plt.savefig("original_diabetes_plot_bmi_glucose.png", dpi=300, bbox_inches='tight') 50 plt.close() 51 52 # Train decision trees and plot decision boundaries 53 for depth in range(1, 5): 54 clf = DecisionTreeClassifier(max_depth=depth, random_state=42) 55 clf.fit(data, y) 56 preds = clf.predict(data) 57 58 # Plot decision tree at max depth 59 if depth == 4: 60 fig, ax = plt.subplots(figsize=(14, 14)) 61 plot_tree(clf, feature_names=["BMI", "Glucose"], class_names=["No Diabetes", "Diabetes"], ax=ax, filled=True) 62 63 # Save decision tree plot with a black background 64 plt.savefig("tree2.pdf", bbox_inches='tight') 65 plt.close() 66 67 # Print accuracy 68 acc = accuracy_score(y_pred=preds, y_true=y) 69 print(f"DEPTH: {depth} - Accuracy: {acc:.4f}") 70 71 # Plot decision boundaries 72 Z = clf.predict_proba(grid)[:, 1].reshape(xx.shape) 73 74 plt.figure(figsize=(12, 6)) 75 plt.contourf(xx, yy, Z, levels=[0, 0.5, 1], colors=['#0000FF', '#FF0000'], alpha=0.3) 76 plt.contour(xx, yy, Z, levels=[0.5], colors='black', linewidths=2) 77 78 plt.scatter(X_bmi, X_glucose, c=y, cmap='bwr', s=60, edgecolors="#000000") 79 plt.xlabel("BMI") 80 plt.ylabel("Glucose") 81 plt.title(f"Decision Tree (Depth {depth})") 82 plt.xlim(0, 70) 83 plt.ylim(0, 200) 84 plt.tight_layout() 85 plt.savefig(f"images/diabetes_tree_bmi_glucose_depth_{depth}.png", dpi=300, bbox_inches='tight') 86 plt.close()