c-programming

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 8fa467e6be59b877c3d939c0fcc5892498a62970
parent 1715254ba00f33007f0e55c46b6e82bb30a907f9
Author: Andrew Laack <andrew@laack.co>
Date:   Sun,  9 Nov 2025 18:19:29 -0600

Started a ML thing

Diffstat:
M.gitignore | 1+
Ac-book/ch1/simple-exponentiation.c | 12++++++++++++
AgenFiles.c | 19+++++++++++++++++++
Aml/knn.c | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 165 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,2 +1,3 @@ *.out *artifacts* +*.csv diff --git a/c-book/ch1/simple-exponentiation.c b/c-book/ch1/simple-exponentiation.c @@ -0,0 +1,12 @@ +// the goal was to make this as few lines as possible... +// that makes this janky af with the additional input variable instead of another definition... +#include <stdio.h> +#include <stdlib.h> +int power(int x, int y, int result){ + for(int n = 0; n < y - 1; ++n) + result *= x; + return result; +} +int main(int argc, char** argv){ + printf("%d ^ %d = %d\n", atoi(argv[1]), atoi(argv[2]), power(atoi(argv[1]), atoi(argv[2]), atoi(argv[1]))); +} diff --git a/genFiles.c b/genFiles.c @@ -0,0 +1,19 @@ +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char** argv){ + int upperBound = atoi(argv[1]); + char str [20]; + + for(int i = 0 ; i < upperBound; ++i){ + sprintf(str, "files/%d.txt", i); + FILE* file = fopen(str, "w"); + + for(int i = 0 ; i < 20 ; ++ i){ + putc(str[i], file); + } + fclose(file); + } + + return 0; +} diff --git a/ml/knn.c b/ml/knn.c @@ -0,0 +1,133 @@ +#include <stdio.h> +#include <stdlib.h> + +#define INVALID -1 +#define VALID 0 + +#define TRUE 1 +#define FALSE 0 + +typedef struct Inputs{ + char* trainingDatasetPath; + char* predictionSamplesPath; + int k; +} Inputs; + + + +typedef struct Sample{ + float* features; + int target; +} Sample; + +typedef struct Dataset{ + Sample* samples; + int sampleCount; + int featureCount; + int valid; +} Dataset; + +Inputs inputParse(int argc, char ** argv){ + Inputs in; + + + if (argc != 4 || (in.k = atoi(argv[3])) <= 0){ + in.k = INVALID; + return in; + } + + in.trainingDatasetPath = argv[1]; + in.predictionSamplesPath = argv[2]; + + return in; +} + +void printInputs(Inputs inputs){ + printf("training path: %40s\nprediction path: %40s\nk: %40d\n", inputs.predictionSamplesPath, inputs.trainingDatasetPath, inputs.k); +} + +// This does not support line end commas +// This also doesn't support escaped strings as the input should be float and ints +int csvColumns(char* fileName){ + FILE* fp = fopen(fileName, "r"); + int length = 0; + char c; + + while ((c = fgetc(fp)) != EOF && c != '\n'){ + if(c == ','){ + length += 1; + } + } + + fclose(fp); + return length; +} + + +Dataset* readCSV(char* fileName, int containsTarget){ + + FILE* fp = fopen(fileName, "r"); + Dataset* dataset = malloc(sizeof(Dataset)); + dataset->valid = VALID; + + char c; + char buffered[51]; + int length = 0; + int csvColumnCount = csvColumns(fileName); + int featureCount = csvColumnCount; + + if(containsTarget){ + featureCount = csvColumnCount -1; + } + + + Sample* current = malloc(sizeof(Sample)); + current->features = malloc(sizeof(float) * featureCount); + current->target = 0; + int featureNum = 0; + + while((c = fgetc(fp)) != EOF){ + + // this doesn't account for target. it would likely make more sense to just load everything in at the same time... + if(c == ',' || c == "\n"){ + buffered[length] = 0; + float currentF = atof(buffered); + current->features[featureNum] = currentF; + featureNum += 1; + length = 0; + } + else if (length < 50){ + buffered[length] = c; + length += 1; + } + else{ + dataset->valid = INVALID; + fclose(fp); + return dataset; + } + + } + + fclose(fp); + return dataset; +} + + +int main(int argc, char ** argv){ + Inputs input = inputParse(argc,argv); + if (input.k == INVALID){ + printf("Usage: knn {training_dataset.csv} {prediction_samples.csv} {k}\n"); + return -1; + } + + printInputs(input); + Dataset* trainingData = readCSV(input.trainingDatasetPath, TRUE); + + if(trainingData->valid == INVALID){ + free(trainingData); + printf("Invalid training dataset"); + return -1; + } + + free(trainingData); +}