commit af5ed06408451e386fc2c14d47a5732252523704
parent 109df438b78ef299e6937f869f2ed3250b8954d8
Author: AndrewLockVI <andrew@laack.co>
Date: Fri, 9 May 2025 09:58:18 -0500
Added all uws presentation materials.
Diffstat:
13 files changed, 1262 insertions(+), 0 deletions(-)
diff --git a/presentations/uws/presentation.pdf b/presentations/uws/presentation.pdf
Binary files differ.
diff --git a/presentations/uws/presentation.tex b/presentations/uws/presentation.tex
@@ -0,0 +1,451 @@
+\documentclass[10pt]{beamer}
+\usepackage{booktabs}
+
+\usepackage{animate}
+\usepackage{float}
+\usecolortheme[snowy]{owl}
+\usepackage{listings}
+\usepackage{graphicx} % Load graphicx package for image handling
+\usepackage{natbib}
+
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
+\title{CART-ELC: Oblique Decision Tree \\ Induction via Exhaustive Search}
+\author{Andrew D. Laack}
+\institute{University of Wisconsin-Superior}
+\date{May 13, 2025}
+
+\usepackage{xcolor} % Ensure xcolor is loaded
+
+\usepackage{float}
+\usepackage{pgfplots}
+\pgfplotsset{compat=1.17} % or another compatible version for arXiv
+
+\usepackage{ifthen}
+\usepackage{multirow}
+\usepackage{booktabs}
+
+\newboolean{isFinal}
+\setboolean{isFinal}{false}
+
+\setbeamercolor{section in toc}{fg=white} % Section titles in white
+\setbeamercolor{subsection in toc}{fg=lightgray}
+
+\renewcommand{\thealgorithm}{\arabic{algorithm}:}
+
+\renewcommand{\alglinenumber}[1]{\tiny\color{white}\@#1.}
+
+\usepackage{url}
+\usepackage{hyperref} % Hides ugly link borders
+
+
+\begin{document}
+\section{}
+
+
+\begin{frame}
+\titlepage
+
+\footnotesize
+Source Available:
+
+\texttt{https://github.com/andrewlaack/cart-elc}
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{Problem}
+ How can we determine if someone has diabetes based on their BMI?
+\end{frame}
+
+\begin{frame}
+ \frametitle{Visualization}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.4]{images/dia1.pdf}
+ \caption{Diabetes Dataset \citep{diabetes} Graph}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Solution}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.4]{images/dia2.pdf}
+ \caption{Diabetes Dataset Graph w/ Boundary}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tree}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.3]{images/tree1.pdf}
+ \caption{Tree Representation}
+ \end{figure}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{But...}
+ 62.2\% Accuracy
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.4]{images/dia3.pdf}
+ \caption{Full Diabetes Dataset Graph w/ Boundary}
+ \end{figure}
+\end{frame}
+
+
+
+
+% not density, mass
+
+\begin{frame}
+ \frametitle{Extra Feature}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.4]{images/dia4.pdf}
+ \caption{Diabetes Dataset w/ Glucose (a bit complicated)}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Pseudocode}
+
+ \begin{algorithm}[H]
+ \tiny
+ \caption{CART Algorithm \citep{breiman}}
+ \begin{algorithmic}[1]
+ \Function{fit}{samples, labels, featureCount}
+ \If{homogeneous(labels)}
+ \State \Return Node(majorityClass(labels))
+ \EndIf
+ \State bestSplit, bestSplittingScore $\gets$ None, worstSplittingScore()
+ \For{sample in samples}
+ \For{feature in range(0,featureCount)}
+ \State currentSplit $\gets$ (feature, sample[feature])
+ \State currentSplittingScore $\gets$ evaluateSplit(currentSplit, samples)
+ \If{ isBetterThan(currentSplittingScore, bestSplittingScore)}
+ \State bestSplittingScore, bestSplit $\gets$ currentSplittingScore, currentSplit
+ \EndIf
+ \EndFor
+ \EndFor
+ \State left, right $\gets$ splitDataByBestSplit(samples, labels, bestSplit)
+ \If{left is empty or right is empty}
+ \State \Return Node(majorityClass(labels))
+ \EndIf
+ \State leftSubtree $\gets$ fit(left.samples, left.labels, featureCount)
+ \State rightSubtree $\gets$ fit(right.samples, right.labels, featureCount)
+ \State tree $\gets$ Node(bestSplit)
+ \State tree.left, tree.right $\gets$ leftSubtree, rightSubtree
+ \State \Return tree
+ \EndFunction
+ \end{algorithmic}
+ \end{algorithm}
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{Quantification of Goodness (Splitting Criterion)}
+
+ \begin{equation*}
+ G = p_L \left(1 - \sum_{j\in C} p_{Lj}^2 \right) + p_R \left(1 - \sum_{j \in C} p_{Rj}^2 \right)
+ \label{gini_equ}
+ \end{equation*}
+
+\end{frame}
+
+
+% DEPTH: 1 - 0.7356770833333334
+% DEPTH: 2 - 0.7721354166666666
+% DEPTH: 3 - 0.7734375
+% DEPTH: 4 - 0.78515625
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 1)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.35]{images/images/diabetes_tree_bmi_glucose_depth_1.png}
+ \caption{73.6\% Accuracy}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 2)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.35]{images/images/diabetes_tree_bmi_glucose_depth_2.png}
+ \caption{77.2\% Accuracy}
+ \end{figure}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 3)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.35]{images/images/diabetes_tree_bmi_glucose_depth_3.png}
+ \caption{77.3\% Accuracy}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 4)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.35]{images/images/diabetes_tree_bmi_glucose_depth_4.png}
+ \caption{78.5\% Accuracy}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tree (Depth = 4)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.225]{images/tree2.pdf}
+ \caption{Tree Representation}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{New Problem}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.25]{correlation/original_graph.png}
+ \caption{Cancer Diagnosis (Synthetic)}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 1)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.25]{correlation/images/decision_tree_depth_1.png}
+ \caption{Cancer Diagnosis (Synthetic)}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 5)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.25]{correlation/images/decision_tree_depth_5.png}
+ \caption{Cancer Diagnosis (Synthetic)}
+ \end{figure}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 10)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.25]{correlation/images/decision_tree_depth_10.png}
+ \caption{Cancer Diagnosis (Synthetic)}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Boundary (Depth = 20)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.25]{correlation/images/decision_tree_depth_20.png}
+ \caption{Cancer Diagnosis (Synthetic)}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tree (Depth = 20)}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.04]{correlation/tree3.pdf}
+ \caption{Tree Visualization}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Solution}
+ \begin{figure}[h]
+ \centering
+ \includegraphics[scale=.25]{correlation/oblique.png}
+ \caption{Oblique Split}
+ \end{figure}
+\end{frame}
+
+\begin{frame}
+ \frametitle{How?}
+ \begin{enumerate}
+ \item CART-LC \citep{breiman}
+ \item OC1 \citep{oc1}
+ \item HHCART \citep{hhcart}
+ \item CART-ELC \citep{laack}
+ \end{enumerate}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Pseudocode}
+
+ \begin{algorithm}[H]
+ \tiny
+ \caption{CART-ELC}
+ \begin{algorithmic}[1]
+ \Function{fit}{samples, labels, $m$, $r$}
+ \If{homogeneous(labels)}
+ \State \Return Node(majorityClass(labels))
+ \EndIf
+ \State bestSplit, bestSplittingScore $\gets$ None, worstSplittingScore()
+ \For{selectedSamples in combinations(samples, $r$)}
+ \For{selectedFeatures in combinations($m$, $r$)}
+ \State vectorsToPassThrough $\gets$ featureSubset(selectedSamples, selectedFeatures)
+ \State currentSplit $\gets$ findHyperplanePassingThrough(vectorsToPassThrough)
+ \State currentSplittingScore $\gets$ evaluateSplit(currentSplit, samples)
+ \If{ isBetterThan(currentSplittingScore, bestSplittingScore)}
+ \State bestSplittingScore, bestSplit $\gets$ currentSplittingScore, currentSplit
+ \EndIf
+ \EndFor
+ \EndFor
+ \State left, right $\gets$ splitDataByBestSplit(samples, labels, bestSplit)
+ \If{left is empty or right is empty}
+ \State \Return Node(majorityClass(labels))
+ \EndIf
+ \State leftSubtree $\gets$ fit(left.samples, left.labels, $m$, $r$)
+ \State rightSubtree $\gets$ fit(right.samples, right.labels, $m$, $r$)
+ \State tree $\gets$ Node(bestSplit)
+ \State tree.left, tree.right $\gets$ leftSubtree, rightSubtree
+ \State \Return tree
+ \EndFunction
+ \end{algorithmic}
+ \end{algorithm}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Asymptotic Time Complexity per Split (CART-ELC)}
+\[
+ \Theta\left( \binom{n}{r} \cdot \binom{m}{r} \cdot r(r^2 + n) \right)
+\]
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{Operations for a Single Split (CART-ELC)}
+
+\begin{table}[h]
+ \centering
+ \footnotesize
+ \caption{Operations for single split assuming multiplicative factor of one, no additive constants, and $r = m$.}
+ \begin{tabular}{lcccccc}
+ \addlinespace
+ \toprule
+ \multirow{2}{*}{r} & \multicolumn{6}{c}{n} \\
+ \cmidrule(lr){2-7}
+ & 100 & 500 & 1000 & 5000 & 10000 & 20000 \\
+ \midrule
+ 1 & 1.01e+04 & 2.50e+05 & 1.00e+06 & 2.50e+07 & 1.00e+08 & 4.00e+08 \\
+ 2 & 1.03e+06 & 1.26e+08 & 1.00e+09 & 1.25e+11 & 1.00e+12 & 8.00e+12 \\
+ 3 & 5.29e+07 & 3.16e+10 & 5.03e+11 & 3.13e+14 & 5.00e+15 & 8.00e+16 \\
+ 4 & 1.82e+09 & 5.31e+12 & 1.68e+14 & 5.22e+17 & 1.67e+19 & 5.34e+20 \\
+ 5 & 4.71e+10 & 6.70e+14 & 4.23e+16 & 6.53e+20 & 4.17e+22 & 2.67e+24 \\
+ 6 & 9.73e+11 & 6.77e+16 & 8.50e+18 & 6.54e+23 & 8.35e+25 & 1.07e+28 \\
+ 7 & 1.67e+13 & 5.71e+18 & 1.43e+21 & 5.46e+26 & 1.39e+29 & 3.56e+31 \\
+ 8 & 2.44e+14 & 4.13e+20 & 2.05e+23 & 3.90e+29 & 1.99e+32 & 1.02e+35 \\
+ 9 & 3.10e+15 & 2.62e+22 & 2.59e+25 & 2.44e+32 & 2.49e+35 & 2.55e+38 \\
+ 10 & 3.46e+16 & 1.47e+24 & 2.90e+27 & 1.36e+35 & 2.77e+38 & 5.66e+41 \\
+ \bottomrule
+ \end{tabular}
+ \label{fig:operations}
+\end{table}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Empirical Comparison}
+ \tiny
+
+\begin{table}[h]
+ \centering
+ \caption{Accuracy and tree size comparison across decision tree induction algorithms.}
+ \begin{tabular}{lcccccc}
+ \addlinespace
+ \toprule
+ \multirow{2}{*}{Algorithm} & \multicolumn{6}{c}{Accuracy} \\
+ \cmidrule(lr){2-7}
+ & S/G Bright & S/G Dim & Cancer & Iris & Housing & Diabetes \\
+ \midrule
+ CART-ELC & \textbf{98.9 ± 0.2} & \textbf{95.2 ± 0.5} & 96.3 ± 0.4 & 95.1 ± 0.8 & 83.5 ± 0.7 & \textbf{74.5 ± 1.3} \\
+
+ HHCART(A) & 98.3 ± 0.5 & 93.7 ± 0.8 & \textbf{96.9 ± 0.3} & \textbf{95.5 ± 1.4} & \textbf{83.9 ± 0.8} & 73.2 ± 1.2 \\
+ HHCART(D) & 98.1 ± 0.4 & 93.7 ± 0.9 & \textbf{96.9 ± 0.3} & 94.3 ± 1.5 & 82.2 ± 1.4 & 73.2 ± 1.2 \\
+ OC1 & \textbf{98.9 ± 0.2} & 95.0 ± 0.3 & 96.2 ± 0.3 & 94.7 ± 3.1 & 82.4 ± 0.8 & 74.4 ± 1.0 \\
+ OC1-AP & 98.1 ± 0.2 & 94.0 ± 0.2 & 94.5 ± 0.5 & 92.7 ± 2.4 & 81.8 ± 1.0 & 73.8 ± 1.0 \\
+ CART-LC & 98.8 ± 0.2 & 92.8 ± 0.5 & 95.3 ± 0.6 & 93.5 ± 2.9 & 81.4 ± 1.2 & 73.7 ± 1.2 \\
+ CART-AP & 98.5 ± 0.5 & 94.2 ± 0.7 & 95.0 ± 1.6 & 93.8 ± 3.7 & 82.1 ± 3.5 & 73.9 ± 3.4 \\
+ C4.5 & 98.5 ± 0.5 & 93.3 ± 0.8 & 95.3 ± 2.0 & 95.1 ± 3.2 & 83.2 ± 3.1 & 71.4 ± 3.3 \\
+ \midrule
+ \multirow{2}{*}{Algorithm} & \multicolumn{6}{c}{Tree Size} \\
+ \cmidrule(lr){2-7}
+ & S/G Bright & S/G Dim & Cancer & Iris & Housing & Diabetes \\
+ \midrule
+ CART-ELC & \textbf{3.7 ± 0.2} & \textbf{9.8 ± 4.2} & \textbf{2.0 ± 0.0} & 4.8 ± 0.1 & \textbf{4.0 ± 0.0} & \textbf{4.0 ± 0.0} \\
+ HHCART(A) & 6.1 ± 0.3 & 14.6 ± 4.8 & \textbf{2.0 ± 0.0} & \textbf{3.1 ± 0.1} & 7.8 ± 0.2 & \textbf{4.0 ± 0.0} \\
+ HHCART(D) & 6.3 ± 0.4 & 14.9 ± 5.0 & \textbf{2.0 ± 0.0} & 4.7 ± 0.1 & 23.3 ± 0.8 & \textbf{4.0 ± 0.0} \\
+ OC1 & 4.3 ± 1.0 & 13.0 ± 8.7 & 2.8 ± 0.9 & \textbf{3.1 ± 0.2} & 6.9 ± 3.2 & 5.4 ± 3.8 \\
+ OC1-AP & 6.9 ± 2.4 & 29.3 ± 8.8 & 6.4 ± 1.7 & 3.2 ± 0.3 & 8.6 ± 4.5 & 11.4 ± 7.5 \\
+ CART-LC & 3.9 ± 1.3 & 24.2 ± 8.7 & 3.5 ± 0.9 & 3.2 ± 0.3 & 5.8 ± 3.2 & 8.0 ± 5.2 \\
+ CART-AP & 13.9 ± 5.7 & 30.4 ± 10.0 & 11.5 ± 7.2 & 4.3 ± 1.6 & 15.1 ± 10 & 11.5 ± 9.1 \\
+ C4.5 & 14.3 ± 2.2 & 77.9 ± 7.4 & 9.8 ± 2.2 & 4.6 ± 0.8 & 28.2 ± 3.3 & 56.3 ± 7.9 \\
+ \bottomrule
+ \end{tabular}
+ \label{fig:results}
+\end{table}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Cohen's d}
+ \tiny
+
+\begin{table}[h]
+ \centering
+ \caption{Cohen's d effect size for accuracies between various models and CART-ELC. Comparisons with $p < 0.05$ are bolded.}
+ \begin{tabular}{lccccccc}
+ \addlinespace
+ \toprule
+ \multirow{1}{*}{Algorithm} & \multirow{1}{*}{S/G Bright} & \multirow{1}{*}{S/G Dim} & \multirow{1}{*}{Cancer} & \multirow{1}{*}{Iris} & \multirow{1}{*}{Housing} & \multirow{1}{*}{Diabetes} \\
+ \midrule
+
+ HHCART(A) & \textbf{1.576} & \textbf{2.249} & \textbf{-1.697} & -0.351 & -0.532 & \textbf{1.039} \\
+ HHCART(D) & \textbf{2.530} & \textbf{2.060} & \textbf{-1.697} & 0.666 & \textbf{1.175} & \textbf{1.039} \\
+ OC1 & 0.000 & 0.485 & 0.283 & 0.177 & \textbf{1.463} & 0.086 \\
+ CART-LC & 0.500 & \textbf{4.800} & \textbf{1.961} & 0.752 & \textbf{2.138} & 0.639 \\
+
+ OC1-AP & \textbf{4.000} & \textbf{3.151} & \textbf{3.976} & \textbf{1.342} & \textbf{1.970} & 0.604 \\
+ CART-AP & \textbf{1.050} & \textbf{1.644} & \textbf{1.115} & 0.486 & 0.555 & 0.233 \\
+ C4.5 & \textbf{1.050} & \textbf{2.848} & 0.693 & 0.000 & 0.133 & \textbf{1.236} \\
+ \bottomrule
+\end{tabular}
+\label{fig:cohens_d}
+\end{table}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Future Directions}
+
+ \begin{enumerate}
+ \item Smaller Subsets of Candidates
+ \begin{enumerate}
+ \item Active Sampling
+ \item Feature Selection
+ \end{enumerate}
+
+ \item Random Forests \citep{rndfrst}
+ \item Stochastic Gradient Boosting \citep{sgboost}
+ \end{enumerate}
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{References}
+\bibliographystyle{plain}
+\bibliography{references}
+\end{frame}
+
+\end{document}
diff --git a/presentations/uws/python-scripts/combinationGraphDataGen.py b/presentations/uws/python-scripts/combinationGraphDataGen.py
@@ -0,0 +1,28 @@
+import math
+
+nVals = [100, 200, 300, 400, 500]
+rVals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+results = []
+
+for r in rVals:
+ results.append([])
+ for n in nVals:
+ # With m = r, math.comb(m, r) = math.comb(r, r) = 1.
+ # Therefore, the complexity is:
+ # math.comb(n, r) * (r^3 + r * n)
+ result = math.comb(n, r) * (r**3 + r*n)
+ results[-1].append(str(result))
+
+for res in rVals:
+ if res == 0:
+ continue # Skip r = 0 since it gives 0 instructions.
+ print(str(res) + " & ", end="")
+ itr = 1
+ for r_val in results[res]:
+ if itr == len(nVals):
+ print(r_val + " \\\\")
+ else:
+ print(r_val + " &", end=" ")
+ itr += 1
+
diff --git a/presentations/uws/python-scripts/correlatedDataGraph.py b/presentations/uws/python-scripts/correlatedDataGraph.py
@@ -0,0 +1,100 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.tree import DecisionTreeClassifier, plot_tree
+import os
+
+# Create directory for output images if not exists
+os.makedirs("images", exist_ok=True)
+
+# Dark mode settings using matplotlib only
+# plt.rcParams.update({
+# "axes.facecolor": "#000000",
+# "axes.edgecolor": "#333333",
+# "figure.facecolor": "#000000",
+# "savefig.facecolor": "#000000",
+# "text.color": "white",
+# "axes.labelcolor": "white",
+# "xtick.color": "white",
+# "ytick.color": "white",
+# "grid.color": "gray",
+# "axes.grid": True
+# })
+
+ARRSIZE = 500
+
+# Generate data for two classes
+X_red = np.random.rand(ARRSIZE) * 10
+X_blue = np.random.rand(ARRSIZE) * 10
+
+y_red = X_red / 1.5 + np.random.normal(0, 0.5, ARRSIZE)
+y_blue = X_blue / 1.5 + np.random.normal(0, 0.5, ARRSIZE)
+y_blue += 1.5 # Shift blue class upward
+
+# Combine the data
+X_combined = np.concatenate([X_red, X_blue])
+y_combined = np.concatenate([y_red, y_blue])
+labelsRed = np.zeros(ARRSIZE)
+labelsBlue = np.ones(ARRSIZE)
+labels = np.concatenate([labelsRed, labelsBlue])
+
+# Prepare the feature matrix for training
+data = np.column_stack((X_combined, y_combined))
+
+# Create meshgrid for decision boundaries
+xx, yy = np.meshgrid(np.linspace(0, 10, 500), np.linspace(0, 10, 500))
+grid = np.c_[xx.ravel(), yy.ravel()]
+
+# Save the original scatter plot
+plt.figure(figsize=(16, 9))
+plt.scatter(X_combined, y_combined, c=labels, cmap='bwr', s=60, edgecolors='#000000')
+plt.xlim(0, 10)
+plt.ylim(0, 10)
+plt.xlabel('Tumor Size')
+plt.ylabel('Mass')
+plt.tight_layout()
+plt.savefig("original_graph.png", dpi=300, bbox_inches='tight')
+plt.close()
+
+# Loop through depths
+for depth in range(1, 21):
+ clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
+ clf.fit(data, labels)
+
+ Z = clf.predict_proba(grid)[:, 1].reshape(xx.shape)
+
+ plt.figure(figsize=(16, 9))
+
+ plt.contourf(xx, yy, Z, levels=[0, 0.5, 1], colors=['#0000FF', '#FF0000'], alpha=0.3)
+ plt.contour(xx, yy, Z, levels=[0.5], colors='#000000', linewidths=2)
+
+ plt.scatter(X_combined, y_combined, c=labels, cmap='bwr', s=60, edgecolors='#000000')
+
+ plt.xlim(0, 10)
+ plt.ylim(0, 10)
+ plt.xlabel('Tumor Size')
+ plt.ylabel('Mass')
+ plt.tight_layout()
+ plt.savefig(f"images/decision_tree_depth_{depth}.png", dpi=300, bbox_inches='tight')
+ plt.close()
+
+ if depth == 20:
+ fig, ax = plt.subplots(figsize=(90, 90))
+
+ # Set dark background
+
+ # Plot the tree without default fill
+ plot_tree(
+ clf,
+ ax=ax,
+ filled=True,
+ feature_names=["Tumor Size", "Mass"],
+ class_names=["Red", "Blue"],
+ impurity=False,
+ proportion=False,
+ rounded=True,
+ fontsize=10 # Use readable size, adjust as needed
+ )
+
+ # Save with dark facecolor
+ plt.savefig("tree3.pdf", dpi=300, bbox_inches='tight', facecolor=fig.get_facecolor())
+ plt.close()
diff --git a/presentations/uws/python-scripts/dia3.py b/presentations/uws/python-scripts/dia3.py
@@ -0,0 +1,50 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Dark mode settings using matplotlib only
+# plt.rcParams.update({
+# "axes.facecolor": "#000000",
+# "axes.edgecolor": "#333333",
+# "figure.facecolor": "#000000",
+# "savefig.facecolor": "#000000",
+# "text.color": "white",
+# "axes.labelcolor": "white",
+# "xtick.color": "white",
+# "ytick.color": "white",
+# "grid.color": "gray",
+# "axes.grid": True
+# })
+
+df = pd.read_csv('./diabetes.csv')
+X = df['BMI'].to_numpy()
+y = df['Outcome'].to_numpy()
+
+# Create a scatter plot using matplotlib only
+plt.figure(figsize=(10, 6))
+plt.scatter(X, y, c=y, cmap='bwr', s=40, edgecolors='#000000')
+
+plt.xlabel("BMI")
+plt.ylabel("Diagnosis")
+
+plt.axvspan(0, 32.25, color='blue', alpha=0.3) # Light blue to the left
+plt.axvspan(32.25, 50, color='red', alpha=0.3) # Light red to the right
+
+plt.axvline(32.25, color="black", linestyle="-")
+
+
+plt.xlim([0, 50])
+plt.tight_layout()
+
+# Save the figure instead of displaying it
+plt.savefig("dia3.pdf")
+
+plt.close() # Closes the plot so it doesn't display or use memory
+
+# Calculate classification accuracy based on threshold
+wrong = 0
+for i in range(len(X)):
+ if (y[i] == 1 and X[i] < 32.25) or (y[i] == 0 and X[i] > 32.25):
+ wrong += 1
+
+print((len(X) - wrong) / len(X))
diff --git a/presentations/uws/python-scripts/dia4.py b/presentations/uws/python-scripts/dia4.py
@@ -0,0 +1,54 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn import tree
+from sklearn import metrics
+
+# Dark mode settings using matplotlib only
+# plt.rcParams.update({
+# "axes.facecolor": "#000000",
+# "axes.edgecolor": "#333333",
+# "figure.facecolor": "#000000",
+# "savefig.facecolor": "#000000",
+# "text.color": "white",
+# "axes.labelcolor": "white",
+# "xtick.color": "white",
+# "ytick.color": "white",
+# "grid.color": "gray",
+# "axes.grid": True
+# })
+
+df = pd.read_csv('./diabetes.csv')
+X = df['BMI']
+y = df['Glucose']
+color = df['Outcome']
+
+xSub = []
+ySub = []
+X = X.to_numpy()
+y = y.to_numpy()
+
+# Create scatter plot with dark styling
+plt.figure(figsize=(10, 6))
+plt.scatter(X, y, c=color, cmap='bwr', s=60, edgecolors="#000000")
+plt.xlabel("BMI")
+plt.ylabel("Glucose")
+plt.xlim([0, 50])
+plt.tight_layout()
+
+# Save the graph as dia4.pdf
+plt.savefig("dia4.pdf")
+plt.close()
+
+# Decision Tree for prediction
+targ = []
+
+for i in range(0, len(X)):
+ targ.append([X[i], y[i]])
+
+dt = tree.DecisionTreeClassifier(max_depth=5)
+dt.fit(targ, color)
+preds = dt.predict(targ)
+
+# Print the accuracy score
+print(metrics.accuracy_score(y_pred=preds, y_true=color))
diff --git a/presentations/uws/python-scripts/diabetes.py b/presentations/uws/python-scripts/diabetes.py
@@ -0,0 +1,62 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier, plot_tree
+
+
+df = pd.read_csv('./diabetes.csv')
+X = df['BMI']
+y = df['Outcome']
+
+xSub = []
+ySub = []
+X = X.to_numpy()
+y = y.to_numpy()
+
+for i in range(60, 90):
+ if y[i] == 0:
+ if X[i] > 32:
+ continue
+ xSub.append(X[i])
+ ySub.append(y[i])
+
+xSub = np.array(xSub).reshape(-1, 1)
+ySub = np.array(ySub).reshape(-1, 1)
+
+clf = DecisionTreeClassifier(max_depth=1)
+clf.fit(xSub, ySub)
+
+# Save decision tree with dark background
+fig, ax = plt.subplots(figsize=(8, 6))
+plot_tree(clf, filled=True, feature_names=["BMI"], class_names=["No Diabetes", "Diabetes"], ax=ax)
+plt.tight_layout()
+plt.savefig("tree1.pdf")
+plt.close()
+
+# Save first scatter plot (dia1)
+plt.figure(figsize=(10, 6))
+plt.scatter(xSub, ySub, c=ySub, cmap='bwr', s=60, edgecolors='#000000')
+plt.xlabel("BMI")
+plt.ylabel("Diagnosis")
+plt.xlim([0, 50])
+plt.tight_layout()
+plt.savefig("dia1.pdf")
+plt.close()
+
+# Save second scatter plot with vertical line at 32.25 (dia2)
+plt.figure(figsize=(10, 6))
+plt.scatter(xSub, ySub, c=ySub, cmap='bwr', s=60, edgecolors="#000000")
+
+# Add shaded regions
+plt.axvspan(0, 32.25, color='blue', alpha=0.3) # Light blue to the left
+plt.axvspan(32.25, 50, color='red', alpha=0.3) # Light red to the right
+
+# Vertical decision boundary
+plt.axvline(32.25, color="black", linestyle="-")
+
+plt.xlabel("BMI")
+plt.ylabel("Diagnosis")
+plt.xlim([0, 50])
+plt.tight_layout()
+plt.savefig("dia2.pdf")
+plt.close()
diff --git a/presentations/uws/python-scripts/genBetter.py b/presentations/uws/python-scripts/genBetter.py
@@ -0,0 +1,30 @@
+import matplotlib.pyplot as plt
+import seaborn as sns
+import random
+
+x = []
+y = []
+for i in range(0,100):
+ rnd = (random.random() * 300) + 100
+ rndAdd = rnd + (random.random() * 140 - 70)
+
+ x.append(rndAdd)
+
+ if rnd > (286-249)/2 + 249:
+ y.append(1)
+ else:
+ y.append(0)
+plt.scatter(x,y, c=y, cmap='bwr')
+plt.axvline((286-249)/2 + 249)
+
+# plt.show()
+
+wrong = 0
+for i in range(0,len(x)):
+ if x[i] > (286-249)/2 + 249:
+ if y[i] == 0:
+ wrong += 1
+ else:
+ if y[i] == 1:
+ wrong += 1
+print((len(x) - wrong) / len(x))
diff --git a/presentations/uws/python-scripts/genNew.py b/presentations/uws/python-scripts/genNew.py
@@ -0,0 +1,22 @@
+import matplotlib.pyplot as plt
+import seaborn as sns
+import random
+
+x = []
+y = []
+target = []
+
+for i in range(0,1000):
+ rnd = (random.random() * 300) + 100
+ rndAdd = rnd + (random.random() * 140 - 70)
+ x.append(rndAdd)
+ height = (random.random() * 40 + 30) + rndAdd * 0.1
+ y.append(height)
+
+ if rnd / height > 4:
+ target.append(1)
+ else:
+ target.append(0)
+plt.scatter(y,x, c=target, cmap='bwr')
+
+plt.show()
diff --git a/presentations/uws/python-scripts/generate2D.py b/presentations/uws/python-scripts/generate2D.py
@@ -0,0 +1,86 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.tree import DecisionTreeClassifier, plot_tree
+from sklearn.metrics import accuracy_score
+import os
+
+# Create output directory
+os.makedirs("images", exist_ok=True)
+
+# Dark mode settings using matplotlib only
+# plt.rcParams.update({
+# "axes.facecolor": "#000000",
+# "axes.edgecolor": "#333333",
+# "figure.facecolor": "#000000",
+# "savefig.facecolor": "#000000",
+# "text.color": "white",
+# "axes.labelcolor": "white",
+# "xtick.color": "white",
+# "ytick.color": "white",
+# "grid.color": "gray",
+# "axes.grid": True
+# })
+
+# Load dataset
+df = pd.read_csv('./diabetes.csv')
+
+# Extract features and target
+X_bmi = df['BMI'].to_numpy()
+X_glucose = df['Glucose'].to_numpy()
+y = df['Outcome'].to_numpy()
+
+# Combine features into a 2D array
+data = np.column_stack((X_bmi, X_glucose))
+
+# Create meshgrid for decision region plotting
+xx, yy = np.meshgrid(np.linspace(0, 70, 500), np.linspace(0, 200, 500))
+grid = np.c_[xx.ravel(), yy.ravel()]
+
+# Plot the original data
+plt.figure(figsize=(12, 6))
+plt.scatter(X_bmi, X_glucose, c=y, cmap='bwr', s=60, edgecolors="#000000")
+plt.xlabel("BMI")
+plt.ylabel("Glucose")
+plt.title("Diabetes Dataset: BMI vs Glucose")
+plt.xlim(0, 70)
+plt.ylim(0, 200)
+plt.tight_layout()
+plt.savefig("original_diabetes_plot_bmi_glucose.png", dpi=300, bbox_inches='tight')
+plt.close()
+
+# Train decision trees and plot decision boundaries
+for depth in range(1, 5):
+ clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
+ clf.fit(data, y)
+ preds = clf.predict(data)
+
+ # Plot decision tree at max depth
+ if depth == 4:
+ fig, ax = plt.subplots(figsize=(14, 14))
+ plot_tree(clf, feature_names=["BMI", "Glucose"], class_names=["No Diabetes", "Diabetes"], ax=ax, filled=True)
+
+ # Save decision tree plot with a black background
+ plt.savefig("tree2.pdf", bbox_inches='tight')
+ plt.close()
+
+ # Print accuracy
+ acc = accuracy_score(y_pred=preds, y_true=y)
+ print(f"DEPTH: {depth} - Accuracy: {acc:.4f}")
+
+ # Plot decision boundaries
+ Z = clf.predict_proba(grid)[:, 1].reshape(xx.shape)
+
+ plt.figure(figsize=(12, 6))
+ plt.contourf(xx, yy, Z, levels=[0, 0.5, 1], colors=['#0000FF', '#FF0000'], alpha=0.3)
+ plt.contour(xx, yy, Z, levels=[0.5], colors='black', linewidths=2)
+
+ plt.scatter(X_bmi, X_glucose, c=y, cmap='bwr', s=60, edgecolors="#000000")
+ plt.xlabel("BMI")
+ plt.ylabel("Glucose")
+ plt.title(f"Decision Tree (Depth {depth})")
+ plt.xlim(0, 70)
+ plt.ylim(0, 200)
+ plt.tight_layout()
+ plt.savefig(f"images/diabetes_tree_bmi_glucose_depth_{depth}.png", dpi=300, bbox_inches='tight')
+ plt.close()
diff --git a/presentations/uws/python-scripts/generateDiabetes.py b/presentations/uws/python-scripts/generateDiabetes.py
@@ -0,0 +1,17 @@
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+x = [249 , 286 , 380 , 112 , 385 , 163 , 208 , 121 , 166 , 218]
+y = [0 ,1 ,1 ,0 ,1 ,0 ,0 , 0 ,0 ,0]
+
+plt.scatter(x,y, c=y, cmap='bwr')
+plt.show()
+
+
+x = [249 , 286 , 380 , 112 , 385 , 163 , 208 , 121 , 166 , 218]
+y = [0 ,1 ,1 ,0 ,1 ,0 ,0 , 0 ,0 ,0]
+
+plt.scatter(x,y, c=y, cmap='bwr')
+plt.axvline((286-249)/2 + 249)
+
+plt.show()
diff --git a/presentations/uws/python-scripts/main.py b/presentations/uws/python-scripts/main.py
@@ -0,0 +1,163 @@
+from manim import *
+import numpy as np
+class CreateSimpleExample(Scene):
+ def construct(self):
+ colorList = [RED, GREEN, BLUE, YELLOW]
+
+ # Create the grid
+ grid = Axes(
+ x_range=[0, 10, .5],
+ y_range=[0, 10, .5],
+ x_length=9,
+ y_length=5.5,
+ axis_config={
+ "numbers_to_include": np.arange(0, 11, 1),
+ "font_size": 24,
+ },
+ tips=False,
+ )
+
+ y_label = grid.get_y_axis_label("y")
+ x_label = grid.get_x_axis_label("x")
+
+ self.play(Create(grid), Create(y_label), Create(x_label))
+
+ points = [] # To store points and their target locations
+ animations = [] # To store the animations for moving points
+
+ for i in range(200):
+ target_blue = [(np.random.random() * 7.8) - 3.9, (0.5 * np.random.random() * 3.5) + .1, 0]
+ target_red = [(np.random.random() * 7.8) - 3.9, (0.5 * np.random.random() * -3.8) - .1, 0]
+
+ blue_point = Dot(point=[0, 0, 0], color=BLUE, radius=.05)
+ red_point = Dot(point=[0, 0, 0], color=RED, radius=.05)
+
+ self.add(blue_point, red_point)
+
+ # Store the points and their animations to move to target positions
+ animations.append(blue_point.animate.move_to(target_blue))
+ animations.append(red_point.animate.move_to(target_red))
+
+ # Play animations to move points to their target positions
+ self.play(*animations)
+ self.wait(duration=1)
+
+
+ text = Text("Impurity = 0.5").move_to([0,3,0])
+
+
+ self.play(Create(text))
+
+ self.wait(duration=1)
+
+ self.wait(1)
+
+ line = Line(start=[-4, 0,0], end=[4,0,0])
+ self.play(Create(line))
+
+ self.wait(2)
+
+
+ t1 = Rectangle(width=8, height=0).set_fill(BLUE, opacity=.2)
+ t2 = Rectangle(width=8).set_fill(BLUE, opacity=.3).move_to([0,1,0])
+
+ m1 = Rectangle(width=8, height=0).set_fill(RED, opacity=.2)
+ m2 = Rectangle(width=8).set_fill(RED, opacity=.3).move_to([0,-1,0])
+ self.play(Transform(m1,m2), Transform(t1,t2))
+
+ self.wait(duration=1)
+
+ text3 = Text("Impurity = 0.0").move_to([0,3,0])
+ self.play(Transform(text,text3))
+
+ self.wait(duration=2)
+
+
+
+class CreateMoreComplex(Scene):
+ def construct(self):
+ colorList = [RED, GREEN, BLUE, YELLOW]
+
+ # Create the grid
+ grid = Axes(
+ x_range=[0, 10, .5],
+ y_range=[0, 10, .5],
+ x_length=9,
+ y_length=5.5,
+ axis_config={
+ "numbers_to_include": np.arange(0, 11, 1),
+ "font_size": 24,
+ },
+ tips=False,
+ )
+
+ y_label = grid.get_y_axis_label("y")
+ x_label = grid.get_x_axis_label("x")
+
+ self.play(Create(grid), Create(y_label), Create(x_label))
+
+ points = [] # To store points and their target locations
+ animations = [] # To store the animations for moving points
+
+ for i in range(200):
+ target_blue = [(np.random.random() * 7.8) - 3.9, (0.5 * np.random.random() * 3.5) + .1, 0]
+ target_red = [(np.random.random() * (7.4 / 2)) - 3.9, (0.5 * np.random.random() * -3.5) - .1, 0]
+ target_yellow = [(np.random.random() * (7.4 / 2)), (0.5 * np.random.random() * -3.5) - .1, 0]
+
+ blue_point = Dot(point=[0, 0, 0], color=BLUE, radius=.05)
+ red_point = Dot(point=[0, 0, 0], color=RED, radius=.05)
+ yellow_point= Dot(point=[0, 0, 0], color=YELLOW, radius=.05)
+
+ self.add(blue_point, red_point, yellow_point)
+
+ # Store the points and their animations to move to target positions
+ animations.append(blue_point.animate.move_to(target_blue))
+ animations.append(red_point.animate.move_to(target_red))
+ animations.append(yellow_point.animate.move_to(target_yellow))
+
+ # Play animations to move points to their target positions
+ self.play(*animations)
+ self.wait(duration=1)
+
+
+ text = Text("Impurity = 0.666").move_to([0,3,0])
+
+ self.play(Create(text))
+
+ self.wait(duration=1)
+
+ self.wait(1)
+
+ line = Line(start=[-4, 0,0], end=[4,0,0])
+ self.play(Create(line))
+
+ self.wait(2)
+
+
+ t1 = Rectangle(width=8).set_fill(BLUE, opacity=.2).move_to([0,1,0])
+
+ m1 = Rectangle(width=8).set_fill(RED, opacity=.2).move_to([0,-1,0])
+
+ self.play(GrowFromCenter(t1))
+ self.play(GrowFromCenter(m1))
+
+ self.wait(duration=1)
+
+ text3 = Text("Impurity = 0.5").move_to([0,3,0])
+ self.play(Transform(text,text3))
+
+
+ line = Line(start=[0, 0,0], end=[0,-2,0])
+ self.play(Create(line))
+
+ self.play(m1.animate.stretch(.5, dim=0).align_to([0, -1, 0], RIGHT))
+
+ rb = Rectangle(width=4).set_fill(YELLOW, opacity=.3).align_to([0,-1,0], LEFT)
+ rb.shift(DOWN)
+
+ self.play(GrowFromCenter(rb))
+
+
+ text4 = Text("Impurity = 0.0").move_to([0,3,0])
+ self.play(Transform(text,text4))
+ self.wait(duration=2)
diff --git a/presentations/uws/references.bib b/presentations/uws/references.bib
@@ -0,0 +1,199 @@
+@book{breiman,
+ author = {Breiman, L. and Friedman, J. and Olshen, R. A. and Stone, C. J.},
+ title = {Classification and Regression Trees},
+ edition = {1st},
+ year = {1984},
+ publisher = {Chapman and Hall/CRC},
+ doi = {10.1201/9781315139470}
+}
+
+@article{oc1,
+
+ author = {Murthy, S. K. and Kasif, S. and Salzberg, S. L.},
+ title = {A System for Induction of Oblique Decision Trees},
+ journal = {Journal of Artificial Intelligence Research},
+ volume = {2},
+ pages = {1--32},
+ year = {1994},
+ doi = {10.1613/jair.63},
+}
+
+@article{quinlan1986,
+ author = {Quinlan, J. R.},
+ title = {Induction of Decision Trees},
+ journal = {Machine Learning},
+ volume = {1},
+ number = {1},
+ pages = {81--106},
+ year = {1986},
+ doi = {10.1007/BF00116251}
+}
+
+@article{hhcart,
+ author = {Wickramarachchi, D. C. and Robertson, B. L. and Reale, M. and Price, C. J. and Brown, J.},
+ title = {{HHCART: An oblique decision tree}},
+ journal = {Computational Statistics \& Data Analysis},
+ volume = {96},
+ pages = {12--23},
+ year = {2016},
+ issn = {0167-9473},
+ doi = {10.1016/j.csda.2015.11.006},
+}
+
+@article{odewahn,
+ author = {Odewahn, S. C. and Stockwell, E. B. and Pennington, R. L. and Humphreys, R. M. and Zumach, W. A.},
+ title = {Automated Star/Galaxy Discrimination With Neural Networks},
+ journal = {The Astronomical Journal},
+ year = {1992},
+ volume = {103},
+ pages = {318},
+ doi = {10.1086/116063},
+}
+
+@misc{breast_cancer,
+ author = {Wolberg, W.},
+ title = {{Breast Cancer Wisconsin (Original)}},
+ year = {1990},
+ howpublished = {UCI Machine Learning Repository},
+ note = {{doi}: 10.24432/C5HP4Z}
+}
+
+@misc{iris,
+ author = {Fisher, R. A.},
+ title = {Iris},
+ year = {1936},
+ note = {{doi}: 10.24432/C56C76},
+ howpublished = {UCI Machine Learning Repository},
+}
+
+@article{boston,
+title = {Hedonic housing prices and the demand for clean air},
+journal = {Journal of Environmental Economics and Management},
+volume = {5},
+number = {1},
+pages = {81--102},
+year = {1978},
+issn = {0095-0696},
+doi = {10.1016/0095-0696(78)90006-2},
+author = {D. Harrison and D. L. Rubinfeld},
+}
+
+
+@inproceedings{diabetes,
+ author = {Smith, J. W. and Everhart, J. E. and Dickson, W. C. and Knowler, W. C. and Johannes, R. S.},
+ title = {Using the ADAP Learning Algorithm to Forecast the Onset of Diabetes Mellitus},
+ booktitle = {Proceedings of the Annual Symposium on Computer Application in Medical Care},
+ pages = {261--265},
+ year = {1988}
+}
+
+@inproceedings{boosting,
+ author = {Drucker, H. and Cortes, C.},
+ booktitle = {Advances in Neural Information Processing Systems},
+ publisher = {MIT Press},
+ title = {Boosting Decision Trees},
+ volume = {8},
+ year = {1995}
+}
+
+@article{sgboost,
+ title = {Stochastic gradient boosting},
+ journal = {Computational Statistics \& Data Analysis},
+ volume = {38},
+ number = {4},
+ pages = {367--378},
+ year = {2002},
+ issn = {0167-9473},
+ doi = {10.1016/S0167-9473(01)00065-2},
+ author = {J. H. Friedman},
+}
+
+@article{survey,
+ author={Mienye, I. D. and Jere, N.},
+ journal={IEEE Access},
+ title={A Survey of Decision Trees: Concepts, Algorithms, and Applications},
+ year={2024},
+ volume={12},
+ pages={86716--86727},
+ doi={10.1109/ACCESS.2024.3416838}}
+
+@article{recent_overview,
+ title={Decision trees: a recent overview},
+ author={Kotsiantis, S. B.},
+ journal={Artificial Intelligence Review},
+ volume={39},
+ number={4},
+ pages={261--283},
+ year={2011},
+ publisher={Springer},
+ doi={10.1007/s10462-011-9272-4}
+}
+
+@article{it,
+ author={Shannon, C. E.},
+ journal={The Bell System Technical Journal},
+ title={A mathematical theory of communication},
+ year={1948},
+ volume={27},
+ number={3},
+ pages={379--423},
+ doi={10.1002/j.1538-7305.1948.tb01338.x},
+}
+
+
+@book{matrix,
+ title={Matrix Computations},
+ author={Golub, G.H. and Van Loan, C.F.},
+ isbn={9781421407944},
+ lccn={2012943449},
+ series={Johns Hopkins Studies in the Mathematical Sciences},
+ year={2013},
+ publisher={Johns Hopkins University Press}
+}
+
+
+@inproceedings{buntine,
+ title={Tree Classification Software},
+ author={W. L. Buntine},
+ booktitle = {Technology 2002: The Third National Technology Transfer Conference and Exposition, Volume 1},
+ year={1993},
+}
+
+@article{rndfrst,
+ author = {L. Breiman},
+ title = {Random Forests},
+ journal = {Machine Learning},
+ year = {2001},
+ volume = {45},
+ number = {1},
+ pages = {5--32},
+ month = {Oct},
+ issn = {1573-0565},
+ doi = {10.1023/A:1010933404324}
+}
+
+@book{c4.5,
+ title={C4.5: Programs for Machine Learning},
+ author={Quinlan, J. R.},
+ isbn={9781558602380},
+ series={Morgan Kaufmann series in machine learning},
+ year={1993},
+ publisher={Elsevier Science}
+}
+
+@inproceedings{cv,
+ author = {R. Kohavi},
+ title = {A study of cross-validation and bootstrap for accuracy estimation and model selection},
+ booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
+ year = {1995},
+ pages = {1137--1143},
+ address = {Montreal, Canada},
+}
+
+@unpublished{laack,
+ title = {CART-ELC: Oblique Decision Tree Induction via Exhaustive Search},
+ author = {Laack, A. D.},
+ year = {2025},
+ doi = {10.48550/arXiv.2505.05402},
+ note = {Manuscript under review at Transactions on Machine Learning Research (TMLR)},
+}