Added from scratch directory - machinelearning - Unnamed repository; edit this file 'description' to name the repository.

commit 9e7ca9326bf012a30976c94000b52aba15e86f79
parent e73bbe68068fc543da8bdb0953cd5051b4d9fb63
Author: Andrew Laack <andrew@laack.co>
Date:   Sun, 15 Feb 2026 20:26:39 -0600

Added from scratch directory

Diffstat:
A fromScratch/KNN.py  | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 149 insertions(+), 0 deletions(-)
diff --git a/fromScratch/KNN.py b/fromScratch/KNN.py
@@ -0,0 +1,149 @@
+# K-Nearest-Neighbors
+import random
+import heapq
+from matplotlib import pyplot as plt
+import numpy as np
+
+
+class Neighbor:
+    def __init__(self, distance, index):
+        self.distance = distance
+        self.index = index
+
+    # this is done to make min heap simulate a max heap
+    def __lt__(self, other):
+        return -self.distance < -other.distance
+
+    def __str__(self):
+        return "Distance: " + str(self.distance) + " Index: " + str(self.index)
+
+
+class KNN:
+    def __init__(self, n):
+        self.n = n
+
+    def fit(self, X, y):
+        self.X = X.copy()
+        self.y = y.copy()
+
+    def predict(self, X):
+
+        preds = []
+
+        for i in range(len(X)):
+
+            nearest_neighbors = []
+
+            for index, instance in enumerate(self.X):
+                euc_dist = euc_dist_squared(instance, X[i])
+                neighbor = Neighbor(euc_dist, index)
+
+                if len(nearest_neighbors) > 0:
+                    if len(nearest_neighbors) < self.n:
+                        heapq.heappush(nearest_neighbors, neighbor)
+                    else:
+                        if neighbor.distance < nearest_neighbors[0].distance:
+                            heapq.heappop(nearest_neighbors)
+                            heapq.heappush(nearest_neighbors, neighbor)
+                else:
+                    heapq.heappush(nearest_neighbors, neighbor)
+
+            if self.n > len(self.X):
+                assert len(nearest_neighbors) == self.n
+
+            preds.append(self._most_common_vote(nearest_neighbors))
+
+        return preds
+
+    def _most_common_vote(self, neighbors):
+
+        votes = {}
+
+        for neighbor in neighbors:
+            classification = self.y[neighbor.index]
+            votes[classification] = votes.get(classification, 0) + 1
+
+        max = 0
+        most_popular = ""
+
+        for key in votes:
+            if votes[key] > max:
+                max = votes[key]
+                most_popular = key
+
+        return most_popular
+
+
+def euc_dist_squared(input_1, input_2):
+    assert len(input_1) == len(input_2)
+    sum = 0
+    for i in range(len(input_1)):
+        sum += (input_1[i] - input_2[i]) ** 2
+    return sum
+
+
+if __name__ == "__main__":
+    knn = KNN(5)
+
+    samples_1 = 500
+    samples_2 = 500
+
+    cluster_1 = [[random.gauss(0, 5), random.gauss(0, 5)] for _ in range(samples_1)]
+    cluster_2 = [[random.gauss(10, 5), random.gauss(10, 5)] for _ in range(samples_2)]
+    cluster_1.extend(cluster_2)
+    X = cluster_1
+
+    y = [0] * samples_1
+    y.extend([1] * samples_2)
+
+    knn.fit(X, y)
+
+    test_0 = [[random.gauss(0, 5), random.gauss(0, 5)] for _ in range(samples_1)]
+    test_1 = [[random.gauss(10, 5), random.gauss(10, 5)] for _ in range(samples_2)]
+
+    test_0.extend(test_1)
+    test_X = test_0
+
+    preds = knn.predict(test_X)
+
+    X = np.array(X)
+    test_X = np.array(test_X)
+    test_y = [0] * samples_1
+    test_y.extend([1] * samples_2)
+
+    correctness = []
+
+    for i in range(len(test_y)):
+        if test_y[i] != preds[i]:
+            correctness.append(0)
+        else:
+            correctness.append(1)
+
+    # ORIGINAL
+    plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap="jet", alpha=0.1)
+
+    alphas = []
+    for correct in correctness:
+        if correct == 1:
+            alphas.append(0.1)
+        else:
+            alphas.append(1)
+
+    # PREDICTIONS
+    plt.scatter(
+        x=test_X[:, 0],
+        y=test_X[:, 1],
+        c=test_y,
+        cmap="jet",
+        edgecolors="black",
+        alpha=alphas,
+    )
+
+    plt.show()
+
+    correct_count = 0
+    for correct in correctness:
+        if correct == 1:
+            correct_count += 1
+
+    print("Accuracy: ", correct_count / len(correctness))

	machinelearning Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs