Usage.py (1867B)
1 import numpy as np 2 import pandas as pd 3 from decision_tree import DecisionTreeClassifier 4 from sklearn.metrics import accuracy_score 5 from sklearn.datasets import fetch_openml 6 from sklearn.model_selection import train_test_split 7 from sklearn.decomposition import PCA 8 import time 9 import matplotlib.pyplot as plt 10 11 mnist = fetch_openml("mnist_784", version=1) 12 X, y = mnist["data"], mnist["target"] 13 X = X.astype(np.float32) # Ensure data is in float64 format for the classifier 14 y = y.astype(np.int32) # Ensure target labels are integers 15 16 accLs = [] 17 xVals = [] 18 19 SEED = 110 20 21 np.random.seed(SEED) 22 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED) 23 y_train = np.array(y_train) 24 X_train = np.array(X_train) 25 X_test = np.array(X_test) 26 27 for i in range(1, 11001): 28 if (i <= 100 and i % 10 == 0) or ((i) % 500 == 0) or (i == 1) or (i <= 500 and i % 50 == 0): 29 X_train_pca = np.array(X_train).copy() 30 X_test_pca = np.array(X_test).copy() 31 32 33 34 num_samples = i 35 random_indices = np.random.choice(len(X_train_pca) - 1, num_samples, replace=False) 36 37 38 X_train_pca = X_train_pca[random_indices] 39 y_train_current = y_train[random_indices] 40 41 clf = DecisionTreeClassifier(100) 42 43 start_time = time.time() 44 clf.fit(X_train_pca, len(X_train_pca), y_train_current, len(X_train_pca[0])) 45 end_time = time.time() 46 elapsed_time = end_time - start_time 47 print(f"{i}: Time taken: {elapsed_time:.4f} seconds") 48 y_pred = clf.predict(X_test_pca, len(X_test_pca), len(X_test_pca[0])) 49 accuracy = accuracy_score(y_test, y_pred) 50 print(f"Accuracy of Decision Tree Classifier on MNIST: {accuracy * 100:.2f}%") 51 52 accLs.append(accuracy) 53 xVals.append(i) 54 55 df = pd.DataFrame(accLs, xVals) 56 df.to_csv('output.csv') 57 58 59 plt.plot(xVals, accLs) 60 plt.show()