decision-tree-classifier

Decision tree classifier implementation in C++
git clone git://git.laack.co/decision-tree-classifier.git
Log | Files | Refs | README | LICENSE

Usage.py (1867B)


      1 import numpy as np
      2 import pandas as pd
      3 from decision_tree import DecisionTreeClassifier
      4 from sklearn.metrics import accuracy_score
      5 from sklearn.datasets import fetch_openml
      6 from sklearn.model_selection import train_test_split
      7 from sklearn.decomposition import PCA
      8 import time
      9 import matplotlib.pyplot as plt
     10 
     11 mnist = fetch_openml("mnist_784", version=1)
     12 X, y = mnist["data"], mnist["target"]
     13 X = X.astype(np.float32)  # Ensure data is in float64 format for the classifier
     14 y = y.astype(np.int32)    # Ensure target labels are integers
     15 
     16 accLs = []
     17 xVals = []
     18 
     19 SEED = 110
     20 
     21 np.random.seed(SEED)
     22 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)
     23 y_train = np.array(y_train)
     24 X_train = np.array(X_train)
     25 X_test = np.array(X_test)
     26 
     27 for i in range(1, 11001):
     28     if (i <= 100 and i % 10 == 0) or ((i) % 500 == 0) or (i == 1) or (i <= 500 and i % 50 == 0):
     29         X_train_pca = np.array(X_train).copy()
     30         X_test_pca = np.array(X_test).copy()
     31 
     32 
     33 
     34         num_samples = i
     35         random_indices = np.random.choice(len(X_train_pca) - 1, num_samples, replace=False)
     36 
     37 
     38         X_train_pca = X_train_pca[random_indices]
     39         y_train_current = y_train[random_indices]
     40 
     41         clf = DecisionTreeClassifier(100)
     42 
     43         start_time = time.time()
     44         clf.fit(X_train_pca, len(X_train_pca), y_train_current, len(X_train_pca[0]))
     45         end_time = time.time()
     46         elapsed_time = end_time - start_time
     47         print(f"{i}: Time taken: {elapsed_time:.4f} seconds")
     48         y_pred = clf.predict(X_test_pca, len(X_test_pca), len(X_test_pca[0]))
     49         accuracy = accuracy_score(y_test, y_pred)
     50         print(f"Accuracy of Decision Tree Classifier on MNIST: {accuracy * 100:.2f}%")
     51 
     52         accLs.append(accuracy)
     53         xVals.append(i)
     54 
     55 df = pd.DataFrame(accLs, xVals)
     56 df.to_csv('output.csv') 
     57 
     58 
     59 plt.plot(xVals, accLs)
     60 plt.show()