commit 8fa467e6be59b877c3d939c0fcc5892498a62970
parent 1715254ba00f33007f0e55c46b6e82bb30a907f9
Author: Andrew Laack <andrew@laack.co>
Date: Sun, 9 Nov 2025 18:19:29 -0600
Started a ML thing
Diffstat:
4 files changed, 165 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
*.out
*artifacts*
+*.csv
diff --git a/c-book/ch1/simple-exponentiation.c b/c-book/ch1/simple-exponentiation.c
@@ -0,0 +1,12 @@
+// the goal was to make this as few lines as possible...
+// that makes this janky af with the additional input variable instead of another definition...
+#include <stdio.h>
+#include <stdlib.h>
+int power(int x, int y, int result){
+ for(int n = 0; n < y - 1; ++n)
+ result *= x;
+ return result;
+}
+int main(int argc, char** argv){
+ printf("%d ^ %d = %d\n", atoi(argv[1]), atoi(argv[2]), power(atoi(argv[1]), atoi(argv[2]), atoi(argv[1])));
+}
diff --git a/genFiles.c b/genFiles.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char** argv){
+ int upperBound = atoi(argv[1]);
+ char str [20];
+
+ for(int i = 0 ; i < upperBound; ++i){
+ sprintf(str, "files/%d.txt", i);
+ FILE* file = fopen(str, "w");
+
+ for(int i = 0 ; i < 20 ; ++ i){
+ putc(str[i], file);
+ }
+ fclose(file);
+ }
+
+ return 0;
+}
diff --git a/ml/knn.c b/ml/knn.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#define INVALID -1
+#define VALID 0
+
+#define TRUE 1
+#define FALSE 0
+
+typedef struct Inputs{
+ char* trainingDatasetPath;
+ char* predictionSamplesPath;
+ int k;
+} Inputs;
+
+
+
+typedef struct Sample{
+ float* features;
+ int target;
+} Sample;
+
+typedef struct Dataset{
+ Sample* samples;
+ int sampleCount;
+ int featureCount;
+ int valid;
+} Dataset;
+
+Inputs inputParse(int argc, char ** argv){
+ Inputs in;
+
+
+ if (argc != 4 || (in.k = atoi(argv[3])) <= 0){
+ in.k = INVALID;
+ return in;
+ }
+
+ in.trainingDatasetPath = argv[1];
+ in.predictionSamplesPath = argv[2];
+
+ return in;
+}
+
+void printInputs(Inputs inputs){
+ printf("training path: %40s\nprediction path: %40s\nk: %40d\n", inputs.predictionSamplesPath, inputs.trainingDatasetPath, inputs.k);
+}
+
+// This does not support line end commas
+// This also doesn't support escaped strings as the input should be float and ints
+int csvColumns(char* fileName){
+ FILE* fp = fopen(fileName, "r");
+ int length = 0;
+ char c;
+
+ while ((c = fgetc(fp)) != EOF && c != '\n'){
+ if(c == ','){
+ length += 1;
+ }
+ }
+
+ fclose(fp);
+ return length;
+}
+
+
+Dataset* readCSV(char* fileName, int containsTarget){
+
+ FILE* fp = fopen(fileName, "r");
+ Dataset* dataset = malloc(sizeof(Dataset));
+ dataset->valid = VALID;
+
+ char c;
+ char buffered[51];
+ int length = 0;
+ int csvColumnCount = csvColumns(fileName);
+ int featureCount = csvColumnCount;
+
+ if(containsTarget){
+ featureCount = csvColumnCount -1;
+ }
+
+
+ Sample* current = malloc(sizeof(Sample));
+ current->features = malloc(sizeof(float) * featureCount);
+ current->target = 0;
+ int featureNum = 0;
+
+ while((c = fgetc(fp)) != EOF){
+
+ // this doesn't account for target. it would likely make more sense to just load everything in at the same time...
+ if(c == ',' || c == "\n"){
+ buffered[length] = 0;
+ float currentF = atof(buffered);
+ current->features[featureNum] = currentF;
+ featureNum += 1;
+ length = 0;
+ }
+ else if (length < 50){
+ buffered[length] = c;
+ length += 1;
+ }
+ else{
+ dataset->valid = INVALID;
+ fclose(fp);
+ return dataset;
+ }
+
+ }
+
+ fclose(fp);
+ return dataset;
+}
+
+
+int main(int argc, char ** argv){
+ Inputs input = inputParse(argc,argv);
+ if (input.k == INVALID){
+ printf("Usage: knn {training_dataset.csv} {prediction_samples.csv} {k}\n");
+ return -1;
+ }
+
+ printInputs(input);
+ Dataset* trainingData = readCSV(input.trainingDatasetPath, TRUE);
+
+ if(trainingData->valid == INVALID){
+ free(trainingData);
+ printf("Invalid training dataset");
+ return -1;
+ }
+
+ free(trainingData);
+}