decision-tree-classifier

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 98cfd66ea326623abf7766c2564ae4256d8c94aa
parent 7103ec3a11e61323b8a261e188584744da94800a
Author: Andrew <andrewlaack1@gmail.com>
Date:   Thu, 26 Dec 2024 12:50:26 -0600

Tested edge detection... not good

Diffstat:
Mrewrite/CMakeLists.txt | 1-
Mrewrite/Makefile | 27---------------------------
Mrewrite/Test.py | 32+++++++++++++++++++++++++++++---
Mrewrite/cpp/DecisionTreeClassifier.cpp | 39+--------------------------------------
Mrewrite/cpp/DecisionTreeClassifier.h | 5+----
Drewrite/cpp/Kernel.cpp | 79-------------------------------------------------------------------------------
Drewrite/cpp/Kernel.h | 16----------------
Mrewrite/cpp/bindings.cpp | 2+-
Drewrite/cpp/test.cpp | 35-----------------------------------
Drewrite/output.csv | 29-----------------------------
10 files changed, 32 insertions(+), 233 deletions(-)

diff --git a/rewrite/CMakeLists.txt b/rewrite/CMakeLists.txt @@ -25,7 +25,6 @@ set(SOURCES cpp/TreeNode.cpp cpp/Criterion.cpp cpp/bindings.cpp - cpp/Kernel.cpp ) # Create the shared library diff --git a/rewrite/Makefile b/rewrite/Makefile @@ -190,30 +190,6 @@ cpp/DecisionTreeClassifier.cpp.s: $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/DecisionTreeClassifier.cpp.s .PHONY : cpp/DecisionTreeClassifier.cpp.s -cpp/Kernel.o: cpp/Kernel.cpp.o -.PHONY : cpp/Kernel.o - -# target to build an object file -cpp/Kernel.cpp.o: - $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/Kernel.cpp.o -.PHONY : cpp/Kernel.cpp.o - -cpp/Kernel.i: cpp/Kernel.cpp.i -.PHONY : cpp/Kernel.i - -# target to preprocess a source file -cpp/Kernel.cpp.i: - $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/Kernel.cpp.i -.PHONY : cpp/Kernel.cpp.i - -cpp/Kernel.s: cpp/Kernel.cpp.s -.PHONY : cpp/Kernel.s - -# target to generate assembly for a file -cpp/Kernel.cpp.s: - $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/Kernel.cpp.s -.PHONY : cpp/Kernel.cpp.s - cpp/TreeNode.o: cpp/TreeNode.cpp.o .PHONY : cpp/TreeNode.o @@ -278,9 +254,6 @@ help: @echo "... cpp/DecisionTreeClassifier.o" @echo "... cpp/DecisionTreeClassifier.i" @echo "... cpp/DecisionTreeClassifier.s" - @echo "... cpp/Kernel.o" - @echo "... cpp/Kernel.i" - @echo "... cpp/Kernel.s" @echo "... cpp/TreeNode.o" @echo "... cpp/TreeNode.i" @echo "... cpp/TreeNode.s" diff --git a/rewrite/Test.py b/rewrite/Test.py @@ -7,7 +7,7 @@ from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA import time import matplotlib.pyplot as plt - +import cv2 # OpenCV is required for Sobel operations mnist = fetch_openml("mnist_784", version=1) X, y = mnist["data"], mnist["target"] @@ -23,10 +23,36 @@ SEED = 110 np.random.seed(SEED) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED) y_train = np.array(y_train) +X_train = np.array(X_train) +X_test = np.array(X_test) + +def append_sobel_features(X): + # Assume X is a 3D array with shape (n_samples, height, width) for image data + sobel_features = [] + for sample in X: + # Compute Sobel edges along x and y directions + sobel_x = cv2.Sobel(sample, cv2.CV_64F, 1, 0, ksize=3) + sobel_y = cv2.Sobel(sample, cv2.CV_64F, 0, 1, ksize=3) + sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2) # Combine both directions + sobel_combined = sobel_combined.flatten() # Flatten the 2D Sobel result + + # Append to list + sobel_features.append(sobel_combined) + + # Convert to NumPy array + sobel_features = np.array(sobel_features) + return np.hstack((X.reshape(len(X), -1), sobel_features)) + +# Apply the function to X_train and X_test +X_train = append_sobel_features(X_train) +X_test = append_sobel_features(X_test) + +print(X_train.shape) +print(X_test.shape) + for i in range(1000, 5001): if (i <= 100 and i % 10 == 0) or ((i) % 500 == 0) or (i == 1) or (i <= 500 and i % 50 == 0): - X_train_pca = np.array(X_train).copy() X_test_pca = np.array(X_test).copy() @@ -39,7 +65,7 @@ for i in range(1000, 5001): X_train_pca = X_train_pca[random_indices] y_train_current = y_train[random_indices] - clf = DecisionTreeClassifier(100, True) + clf = DecisionTreeClassifier(100) start_time = time.time() clf.fit(X_train_pca, len(X_train_pca), y_train_current, len(X_train_pca[0])) diff --git a/rewrite/cpp/DecisionTreeClassifier.cpp b/rewrite/cpp/DecisionTreeClassifier.cpp @@ -7,17 +7,12 @@ using namespace std; -DecisionTreeClassifier::DecisionTreeClassifier(int maxDepth, bool useImageKernelAugmentation){ +DecisionTreeClassifier::DecisionTreeClassifier(int maxDepth){ this->depth = maxDepth; - this->useImageKernelPreprocessing = useImageKernelAugmentation; } void DecisionTreeClassifier::fit(float* X, int samples, int* y, int features){ - if(this->kernel != nullptr){ - delete kernel; - } - if (splittingTree != nullptr){ deleteTree(splittingTree); } @@ -30,26 +25,9 @@ void DecisionTreeClassifier::fit(float* X, int samples, int* y, int features){ throw invalid_argument("Invalid argument, there must be 1 or more samples to train on."); } - if(useImageKernelPreprocessing){ - float kernelMatrix[9] = { - -1, 0, 1, - -2, 0, -2, - -1, 0, 1 - }; - this->kernel = new Kernel(3,3, kernelMatrix); - KernelOutput out = this->kernel->augmentWithKernel(X,samples,features); - X = out.features; - features = out.featureCount; - } - - - splittingTree = recurse(X, samples, y, features, depth); featureCount = features; - if(useImageKernelPreprocessing){ - delete[] X; - } } @@ -198,13 +176,6 @@ int* DecisionTreeClassifier::predict(float* X, int samples, int features) { throw logic_error("Unable to predict prior to calling fit()."); } - - if(useImageKernelPreprocessing){ - KernelOutput out = this->kernel->augmentWithKernel(X,samples,features); - X = out.features; - features = out.featureCount; - } - if(features != this->featureCount){ throw invalid_argument("Incorrect number of features for prediction."); } @@ -227,20 +198,12 @@ int* DecisionTreeClassifier::predict(float* X, int samples, int features) { predictions[i] = current->getClassification(); } - if(useImageKernelPreprocessing){ - delete [] X; - } - return predictions; } DecisionTreeClassifier::~DecisionTreeClassifier(){ deleteTree(splittingTree); - if(this->kernel != nullptr){ - delete kernel; - } - } void DecisionTreeClassifier::deleteTree(TreeNode* node){ diff --git a/rewrite/cpp/DecisionTreeClassifier.h b/rewrite/cpp/DecisionTreeClassifier.h @@ -1,10 +1,9 @@ #include "TreeNode.h" #include <vector> -#include "Kernel.h" class DecisionTreeClassifier{ public: - DecisionTreeClassifier(int depth, bool useImageKernelAugmentation); + DecisionTreeClassifier(int depth); void fit(float* X, int samples, int* y, int features); int* predict(float* X, int samples, int features); std::string getDot(); @@ -17,6 +16,4 @@ class DecisionTreeClassifier{ TreeNode* recurse(float* X, int samples, int* y, int features, int depth); int primaryClass(int* y, int labelCount); void deleteTree(TreeNode* node); - bool useImageKernelPreprocessing; - Kernel* kernel = nullptr; }; diff --git a/rewrite/cpp/Kernel.cpp b/rewrite/cpp/Kernel.cpp @@ -1,79 +0,0 @@ -#include "Kernel.h" -#include <iostream> -#include <stdexcept> -#include "math.h" - -// verified proper inputs -Kernel::Kernel(int X_dim, int y_dim, float* kernel){ - - if(X_dim <= 2 || y_dim <= 2){ - throw std::invalid_argument("Kernel has a minimum size of 3x3."); - } - - if(X_dim % 2 == 0 || y_dim % 2 == 0){ - throw std::invalid_argument("Kernel must have odd size edges."); - } - - this->X_dim = X_dim; - this->y_dim = y_dim; - this->kernel = kernel; - - float summed = 0; - for(int y = 0 ; y < y_dim; ++y){ - for(int x = 0 ; x < X_dim; ++x){ - summed += kernel[x + (X_dim * y)]; - } - } - - this->sum = summed; -} - -KernelOutput Kernel::augmentWithKernel(float* X, int samples, int features){ - KernelOutput out = KernelOutput(); - out.featureCount = features * 2; - - float* X_return = new float[features*samples*2]; - - for(int i = 0 ; i < samples; ++i){ - for(int x = 0 ; x < features; ++x){ - X_return[i*features + x] = X[i*features + x]; - } - } - - - int currentOffset = samples * features; - for(int y = 0 ; y < features; ++y){ - for(int x = 0 ; x < samples; ++x){ - X_return[currentOffset] = computeIndex(X, x, y, features, samples); - currentOffset += 1; - } - } - - out.features = X_return; - - return out; -} - -float Kernel::computeIndex(float* X, int xPos, int yPos, int features, int samples){ - - float average = 0; - int itr = 0; - for(int i = 0 ; i < y_dim ; ++i){ - for(int x = 0; x < X_dim ; ++x){ - - int currentX = xPos + x; - int currentY = (i + yPos); - if(currentX < 0 || currentY < 0 || currentX >= features || currentY >= samples){ - continue; - } - float currentKern = kernel[x + i*X_dim]; - float currentValue = X[xPos + x + ((i + yPos) * features)]; - - itr += 1; - average += currentValue * currentKern; - } - } - average /= sum; - - return average; -} diff --git a/rewrite/cpp/Kernel.h b/rewrite/cpp/Kernel.h @@ -1,16 +0,0 @@ -struct KernelOutput{ - float* features; - int featureCount; -}; - -class Kernel{ - public: - Kernel(int X_dim, int y_dim, float* kernel); - KernelOutput augmentWithKernel(float* X, int samples, int features); - private: - float* kernel; - int X_dim; - int y_dim; - float sum; - float computeIndex(float* X, int xPos, int yPos, int features, int samples); -}; diff --git a/rewrite/cpp/bindings.cpp b/rewrite/cpp/bindings.cpp @@ -8,7 +8,7 @@ namespace py = pybind11; PYBIND11_MODULE(decision_tree, m) { py::class_<DecisionTreeClassifier>(m, "DecisionTreeClassifier") - .def(py::init<int, bool>()) + .def(py::init<int>()) .def("fit", [](DecisionTreeClassifier &self, py::array_t<float> X, int samples, py::array_t<int> y, int features) { auto X_buf = X.request(); // Request a buffer from NumPy array auto y_buf = y.request(); // Request a buffer from NumPy array diff --git a/rewrite/cpp/test.cpp b/rewrite/cpp/test.cpp @@ -1,35 +0,0 @@ -#include "Kernel.h" -#include "iostream" -int main(){ - - - float X[] = { - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f - }; - - float input[] = { - 1.0f, 1.0f, 1.0f, - 1.0f, 5.0f, 1.0f, - 1.0f, 1.0f, 1.0f - }; - - Kernel kern = Kernel(3,3,X); - KernelOutput out = kern.augmentWithKernel(input, 3, 3); - for(int i = 0 ; i < 3; ++i){ - for(int x = 0 ; x < 3 ; ++x){ - std::cout << out.features[i*3 + x] << " "; - } - std::cout << std::endl; - } - - std::cout << std::endl; - - for(int i = 0 ; i < 3; ++i){ - for(int x = 0 ; x < 3 ; ++x){ - std::cout << out.features[(i*3 + x) + 9] << " "; - } - std::cout << std::endl; - } -} diff --git a/rewrite/output.csv b/rewrite/output.csv @@ -1,29 +0,0 @@ -,0 -1,0.0945 -10,0.1327142857142857 -20,0.19542857142857142 -30,0.22857142857142856 -40,0.344 -50,0.29642857142857143 -60,0.3717142857142857 -70,0.4052142857142857 -80,0.3412857142857143 -90,0.44307142857142856 -100,0.4712857142857143 -150,0.4295 -200,0.5545 -250,0.5513571428571429 -300,0.5530714285714285 -350,0.5372142857142858 -400,0.5695 -450,0.6051428571428571 -500,0.5994285714285714 -1000,0.6572857142857143 -1500,0.6706428571428571 -2000,0.6952857142857143 -2500,0.7142142857142857 -3000,0.7461428571428571 -3500,0.7458571428571429 -4000,0.7458571428571429 -4500,0.7596428571428572 -5000,0.7695714285714286