cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

generate2D.py (2740B)


      1 import pandas as pd
      2 import numpy as np
      3 import matplotlib.pyplot as plt
      4 from sklearn.tree import DecisionTreeClassifier, plot_tree
      5 from sklearn.metrics import accuracy_score
      6 import os
      7 
      8 # Create output directory
      9 os.makedirs("images", exist_ok=True)
     10 
     11 # Dark mode settings using matplotlib only
     12 # plt.rcParams.update({
     13 #     "axes.facecolor": "#000000",
     14 #     "axes.edgecolor": "#333333",
     15 #     "figure.facecolor": "#000000",
     16 #     "savefig.facecolor": "#000000",
     17 #     "text.color": "white",
     18 #     "axes.labelcolor": "white",
     19 #     "xtick.color": "white",
     20 #     "ytick.color": "white",
     21 #     "grid.color": "gray",
     22 #     "axes.grid": True
     23 # })
     24 
     25 # Load dataset
     26 df = pd.read_csv('./diabetes.csv')
     27 
     28 # Extract features and target
     29 X_bmi = df['BMI'].to_numpy()
     30 X_glucose = df['Glucose'].to_numpy()
     31 y = df['Outcome'].to_numpy()
     32 
     33 # Combine features into a 2D array
     34 data = np.column_stack((X_bmi, X_glucose))
     35 
     36 # Create meshgrid for decision region plotting
     37 xx, yy = np.meshgrid(np.linspace(0, 70, 500), np.linspace(0, 200, 500))
     38 grid = np.c_[xx.ravel(), yy.ravel()]
     39 
     40 # Plot the original data
     41 plt.figure(figsize=(12, 6))
     42 plt.scatter(X_bmi, X_glucose, c=y, cmap='bwr', s=60, edgecolors="#000000")
     43 plt.xlabel("BMI")
     44 plt.ylabel("Glucose")
     45 plt.title("Diabetes Dataset: BMI vs Glucose")
     46 plt.xlim(0, 70)
     47 plt.ylim(0, 200)
     48 plt.tight_layout()
     49 plt.savefig("original_diabetes_plot_bmi_glucose.png", dpi=300, bbox_inches='tight')
     50 plt.close()
     51 
     52 # Train decision trees and plot decision boundaries
     53 for depth in range(1, 5):
     54     clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
     55     clf.fit(data, y)
     56     preds = clf.predict(data)
     57 
     58     # Plot decision tree at max depth
     59     if depth == 4:
     60         fig, ax = plt.subplots(figsize=(14, 14))
     61         plot_tree(clf, feature_names=["BMI", "Glucose"], class_names=["No Diabetes", "Diabetes"], ax=ax, filled=True)
     62         
     63         # Save decision tree plot with a black background
     64         plt.savefig("tree2.pdf", bbox_inches='tight')
     65         plt.close()
     66 
     67     # Print accuracy
     68     acc = accuracy_score(y_pred=preds, y_true=y)
     69     print(f"DEPTH: {depth} - Accuracy: {acc:.4f}")
     70 
     71     # Plot decision boundaries
     72     Z = clf.predict_proba(grid)[:, 1].reshape(xx.shape)
     73 
     74     plt.figure(figsize=(12, 6))
     75     plt.contourf(xx, yy, Z, levels=[0, 0.5, 1], colors=['#0000FF', '#FF0000'], alpha=0.3)
     76     plt.contour(xx, yy, Z, levels=[0.5], colors='black', linewidths=2)
     77 
     78     plt.scatter(X_bmi, X_glucose, c=y, cmap='bwr', s=60, edgecolors="#000000")
     79     plt.xlabel("BMI")
     80     plt.ylabel("Glucose")
     81     plt.title(f"Decision Tree (Depth {depth})")
     82     plt.xlim(0, 70)
     83     plt.ylim(0, 200)
     84     plt.tight_layout()
     85     plt.savefig(f"images/diabetes_tree_bmi_glucose_depth_{depth}.png", dpi=300, bbox_inches='tight')
     86     plt.close()