commit 98cfd66ea326623abf7766c2564ae4256d8c94aa
parent 7103ec3a11e61323b8a261e188584744da94800a
Author: Andrew <andrewlaack1@gmail.com>
Date: Thu, 26 Dec 2024 12:50:26 -0600
Tested edge detection... not good
Diffstat:
10 files changed, 32 insertions(+), 233 deletions(-)
diff --git a/rewrite/CMakeLists.txt b/rewrite/CMakeLists.txt
@@ -25,7 +25,6 @@ set(SOURCES
cpp/TreeNode.cpp
cpp/Criterion.cpp
cpp/bindings.cpp
- cpp/Kernel.cpp
)
# Create the shared library
diff --git a/rewrite/Makefile b/rewrite/Makefile
@@ -190,30 +190,6 @@ cpp/DecisionTreeClassifier.cpp.s:
$(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/DecisionTreeClassifier.cpp.s
.PHONY : cpp/DecisionTreeClassifier.cpp.s
-cpp/Kernel.o: cpp/Kernel.cpp.o
-.PHONY : cpp/Kernel.o
-
-# target to build an object file
-cpp/Kernel.cpp.o:
- $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/Kernel.cpp.o
-.PHONY : cpp/Kernel.cpp.o
-
-cpp/Kernel.i: cpp/Kernel.cpp.i
-.PHONY : cpp/Kernel.i
-
-# target to preprocess a source file
-cpp/Kernel.cpp.i:
- $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/Kernel.cpp.i
-.PHONY : cpp/Kernel.cpp.i
-
-cpp/Kernel.s: cpp/Kernel.cpp.s
-.PHONY : cpp/Kernel.s
-
-# target to generate assembly for a file
-cpp/Kernel.cpp.s:
- $(MAKE) $(MAKESILENT) -f CMakeFiles/decision_tree.dir/build.make CMakeFiles/decision_tree.dir/cpp/Kernel.cpp.s
-.PHONY : cpp/Kernel.cpp.s
-
cpp/TreeNode.o: cpp/TreeNode.cpp.o
.PHONY : cpp/TreeNode.o
@@ -278,9 +254,6 @@ help:
@echo "... cpp/DecisionTreeClassifier.o"
@echo "... cpp/DecisionTreeClassifier.i"
@echo "... cpp/DecisionTreeClassifier.s"
- @echo "... cpp/Kernel.o"
- @echo "... cpp/Kernel.i"
- @echo "... cpp/Kernel.s"
@echo "... cpp/TreeNode.o"
@echo "... cpp/TreeNode.i"
@echo "... cpp/TreeNode.s"
diff --git a/rewrite/Test.py b/rewrite/Test.py
@@ -7,7 +7,7 @@ from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import time
import matplotlib.pyplot as plt
-
+import cv2 # OpenCV is required for Sobel operations
mnist = fetch_openml("mnist_784", version=1)
X, y = mnist["data"], mnist["target"]
@@ -23,10 +23,36 @@ SEED = 110
np.random.seed(SEED)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)
y_train = np.array(y_train)
+X_train = np.array(X_train)
+X_test = np.array(X_test)
+
+def append_sobel_features(X):
+ # Assume X is a 3D array with shape (n_samples, height, width) for image data
+ sobel_features = []
+ for sample in X:
+ # Compute Sobel edges along x and y directions
+ sobel_x = cv2.Sobel(sample, cv2.CV_64F, 1, 0, ksize=3)
+ sobel_y = cv2.Sobel(sample, cv2.CV_64F, 0, 1, ksize=3)
+ sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2) # Combine both directions
+ sobel_combined = sobel_combined.flatten() # Flatten the 2D Sobel result
+
+ # Append to list
+ sobel_features.append(sobel_combined)
+
+ # Convert to NumPy array
+ sobel_features = np.array(sobel_features)
+ return np.hstack((X.reshape(len(X), -1), sobel_features))
+
+# Apply the function to X_train and X_test
+X_train = append_sobel_features(X_train)
+X_test = append_sobel_features(X_test)
+
+print(X_train.shape)
+print(X_test.shape)
+
for i in range(1000, 5001):
if (i <= 100 and i % 10 == 0) or ((i) % 500 == 0) or (i == 1) or (i <= 500 and i % 50 == 0):
-
X_train_pca = np.array(X_train).copy()
X_test_pca = np.array(X_test).copy()
@@ -39,7 +65,7 @@ for i in range(1000, 5001):
X_train_pca = X_train_pca[random_indices]
y_train_current = y_train[random_indices]
- clf = DecisionTreeClassifier(100, True)
+ clf = DecisionTreeClassifier(100)
start_time = time.time()
clf.fit(X_train_pca, len(X_train_pca), y_train_current, len(X_train_pca[0]))
diff --git a/rewrite/cpp/DecisionTreeClassifier.cpp b/rewrite/cpp/DecisionTreeClassifier.cpp
@@ -7,17 +7,12 @@
using namespace std;
-DecisionTreeClassifier::DecisionTreeClassifier(int maxDepth, bool useImageKernelAugmentation){
+DecisionTreeClassifier::DecisionTreeClassifier(int maxDepth){
this->depth = maxDepth;
- this->useImageKernelPreprocessing = useImageKernelAugmentation;
}
void DecisionTreeClassifier::fit(float* X, int samples, int* y, int features){
- if(this->kernel != nullptr){
- delete kernel;
- }
-
if (splittingTree != nullptr){
deleteTree(splittingTree);
}
@@ -30,26 +25,9 @@ void DecisionTreeClassifier::fit(float* X, int samples, int* y, int features){
throw invalid_argument("Invalid argument, there must be 1 or more samples to train on.");
}
- if(useImageKernelPreprocessing){
- float kernelMatrix[9] = {
- -1, 0, 1,
- -2, 0, -2,
- -1, 0, 1
- };
- this->kernel = new Kernel(3,3, kernelMatrix);
- KernelOutput out = this->kernel->augmentWithKernel(X,samples,features);
- X = out.features;
- features = out.featureCount;
- }
-
-
-
splittingTree = recurse(X, samples, y, features, depth);
featureCount = features;
- if(useImageKernelPreprocessing){
- delete[] X;
- }
}
@@ -198,13 +176,6 @@ int* DecisionTreeClassifier::predict(float* X, int samples, int features) {
throw logic_error("Unable to predict prior to calling fit().");
}
-
- if(useImageKernelPreprocessing){
- KernelOutput out = this->kernel->augmentWithKernel(X,samples,features);
- X = out.features;
- features = out.featureCount;
- }
-
if(features != this->featureCount){
throw invalid_argument("Incorrect number of features for prediction.");
}
@@ -227,20 +198,12 @@ int* DecisionTreeClassifier::predict(float* X, int samples, int features) {
predictions[i] = current->getClassification();
}
- if(useImageKernelPreprocessing){
- delete [] X;
- }
-
return predictions;
}
DecisionTreeClassifier::~DecisionTreeClassifier(){
deleteTree(splittingTree);
- if(this->kernel != nullptr){
- delete kernel;
- }
-
}
void DecisionTreeClassifier::deleteTree(TreeNode* node){
diff --git a/rewrite/cpp/DecisionTreeClassifier.h b/rewrite/cpp/DecisionTreeClassifier.h
@@ -1,10 +1,9 @@
#include "TreeNode.h"
#include <vector>
-#include "Kernel.h"
class DecisionTreeClassifier{
public:
- DecisionTreeClassifier(int depth, bool useImageKernelAugmentation);
+ DecisionTreeClassifier(int depth);
void fit(float* X, int samples, int* y, int features);
int* predict(float* X, int samples, int features);
std::string getDot();
@@ -17,6 +16,4 @@ class DecisionTreeClassifier{
TreeNode* recurse(float* X, int samples, int* y, int features, int depth);
int primaryClass(int* y, int labelCount);
void deleteTree(TreeNode* node);
- bool useImageKernelPreprocessing;
- Kernel* kernel = nullptr;
};
diff --git a/rewrite/cpp/Kernel.cpp b/rewrite/cpp/Kernel.cpp
@@ -1,79 +0,0 @@
-#include "Kernel.h"
-#include <iostream>
-#include <stdexcept>
-#include "math.h"
-
-// verified proper inputs
-Kernel::Kernel(int X_dim, int y_dim, float* kernel){
-
- if(X_dim <= 2 || y_dim <= 2){
- throw std::invalid_argument("Kernel has a minimum size of 3x3.");
- }
-
- if(X_dim % 2 == 0 || y_dim % 2 == 0){
- throw std::invalid_argument("Kernel must have odd size edges.");
- }
-
- this->X_dim = X_dim;
- this->y_dim = y_dim;
- this->kernel = kernel;
-
- float summed = 0;
- for(int y = 0 ; y < y_dim; ++y){
- for(int x = 0 ; x < X_dim; ++x){
- summed += kernel[x + (X_dim * y)];
- }
- }
-
- this->sum = summed;
-}
-
-KernelOutput Kernel::augmentWithKernel(float* X, int samples, int features){
- KernelOutput out = KernelOutput();
- out.featureCount = features * 2;
-
- float* X_return = new float[features*samples*2];
-
- for(int i = 0 ; i < samples; ++i){
- for(int x = 0 ; x < features; ++x){
- X_return[i*features + x] = X[i*features + x];
- }
- }
-
-
- int currentOffset = samples * features;
- for(int y = 0 ; y < features; ++y){
- for(int x = 0 ; x < samples; ++x){
- X_return[currentOffset] = computeIndex(X, x, y, features, samples);
- currentOffset += 1;
- }
- }
-
- out.features = X_return;
-
- return out;
-}
-
-float Kernel::computeIndex(float* X, int xPos, int yPos, int features, int samples){
-
- float average = 0;
- int itr = 0;
- for(int i = 0 ; i < y_dim ; ++i){
- for(int x = 0; x < X_dim ; ++x){
-
- int currentX = xPos + x;
- int currentY = (i + yPos);
- if(currentX < 0 || currentY < 0 || currentX >= features || currentY >= samples){
- continue;
- }
- float currentKern = kernel[x + i*X_dim];
- float currentValue = X[xPos + x + ((i + yPos) * features)];
-
- itr += 1;
- average += currentValue * currentKern;
- }
- }
- average /= sum;
-
- return average;
-}
diff --git a/rewrite/cpp/Kernel.h b/rewrite/cpp/Kernel.h
@@ -1,16 +0,0 @@
-struct KernelOutput{
- float* features;
- int featureCount;
-};
-
-class Kernel{
- public:
- Kernel(int X_dim, int y_dim, float* kernel);
- KernelOutput augmentWithKernel(float* X, int samples, int features);
- private:
- float* kernel;
- int X_dim;
- int y_dim;
- float sum;
- float computeIndex(float* X, int xPos, int yPos, int features, int samples);
-};
diff --git a/rewrite/cpp/bindings.cpp b/rewrite/cpp/bindings.cpp
@@ -8,7 +8,7 @@ namespace py = pybind11;
PYBIND11_MODULE(decision_tree, m) {
py::class_<DecisionTreeClassifier>(m, "DecisionTreeClassifier")
- .def(py::init<int, bool>())
+ .def(py::init<int>())
.def("fit", [](DecisionTreeClassifier &self, py::array_t<float> X, int samples, py::array_t<int> y, int features) {
auto X_buf = X.request(); // Request a buffer from NumPy array
auto y_buf = y.request(); // Request a buffer from NumPy array
diff --git a/rewrite/cpp/test.cpp b/rewrite/cpp/test.cpp
@@ -1,35 +0,0 @@
-#include "Kernel.h"
-#include "iostream"
-int main(){
-
-
- float X[] = {
- 1.0f, 1.0f, 1.0f,
- 1.0f, 1.0f, 1.0f,
- 1.0f, 1.0f, 1.0f
- };
-
- float input[] = {
- 1.0f, 1.0f, 1.0f,
- 1.0f, 5.0f, 1.0f,
- 1.0f, 1.0f, 1.0f
- };
-
- Kernel kern = Kernel(3,3,X);
- KernelOutput out = kern.augmentWithKernel(input, 3, 3);
- for(int i = 0 ; i < 3; ++i){
- for(int x = 0 ; x < 3 ; ++x){
- std::cout << out.features[i*3 + x] << " ";
- }
- std::cout << std::endl;
- }
-
- std::cout << std::endl;
-
- for(int i = 0 ; i < 3; ++i){
- for(int x = 0 ; x < 3 ; ++x){
- std::cout << out.features[(i*3 + x) + 9] << " ";
- }
- std::cout << std::endl;
- }
-}
diff --git a/rewrite/output.csv b/rewrite/output.csv
@@ -1,29 +0,0 @@
-,0
-1,0.0945
-10,0.1327142857142857
-20,0.19542857142857142
-30,0.22857142857142856
-40,0.344
-50,0.29642857142857143
-60,0.3717142857142857
-70,0.4052142857142857
-80,0.3412857142857143
-90,0.44307142857142856
-100,0.4712857142857143
-150,0.4295
-200,0.5545
-250,0.5513571428571429
-300,0.5530714285714285
-350,0.5372142857142858
-400,0.5695
-450,0.6051428571428571
-500,0.5994285714285714
-1000,0.6572857142857143
-1500,0.6706428571428571
-2000,0.6952857142857143
-2500,0.7142142857142857
-3000,0.7461428571428571
-3500,0.7458571428571429
-4000,0.7458571428571429
-4500,0.7596428571428572
-5000,0.7695714285714286