knn.c (2883B)
1 #include <stdio.h> 2 #include <stdlib.h> 3 4 #define INVALID -1 5 #define VALID 0 6 7 #define TRUE 1 8 #define FALSE 0 9 10 typedef struct Inputs{ 11 char* trainingDatasetPath; 12 char* predictionSamplesPath; 13 int k; 14 } Inputs; 15 16 17 18 typedef struct Sample{ 19 float* features; 20 int target; 21 } Sample; 22 23 typedef struct Dataset{ 24 Sample* samples; 25 int sampleCount; 26 int featureCount; 27 int valid; 28 } Dataset; 29 30 Inputs inputParse(int argc, char ** argv){ 31 Inputs in; 32 33 34 if (argc != 4 || (in.k = atoi(argv[3])) <= 0){ 35 in.k = INVALID; 36 return in; 37 } 38 39 in.trainingDatasetPath = argv[1]; 40 in.predictionSamplesPath = argv[2]; 41 42 return in; 43 } 44 45 void printInputs(Inputs inputs){ 46 printf("training path: %40s\nprediction path: %40s\nk: %40d\n", inputs.predictionSamplesPath, inputs.trainingDatasetPath, inputs.k); 47 } 48 49 // This does not support line end commas 50 // This also doesn't support escaped strings as the input should be float and ints 51 int csvColumns(char* fileName){ 52 FILE* fp = fopen(fileName, "r"); 53 int length = 0; 54 char c; 55 56 while ((c = fgetc(fp)) != EOF && c != '\n'){ 57 if(c == ','){ 58 length += 1; 59 } 60 } 61 62 fclose(fp); 63 return length; 64 } 65 66 67 Dataset* readCSV(char* fileName, int containsTarget){ 68 69 FILE* fp = fopen(fileName, "r"); 70 Dataset* dataset = malloc(sizeof(Dataset)); 71 dataset->valid = VALID; 72 73 char c; 74 char buffered[51]; 75 int length = 0; 76 int csvColumnCount = csvColumns(fileName); 77 int featureCount = csvColumnCount; 78 79 if(containsTarget){ 80 featureCount = csvColumnCount -1; 81 } 82 83 84 Sample* current = malloc(sizeof(Sample)); 85 current->features = malloc(sizeof(float) * featureCount); 86 current->target = 0; 87 int featureNum = 0; 88 89 while((c = fgetc(fp)) != EOF){ 90 91 // this doesn't account for target. it would likely make more sense to just load everything in at the same time... 92 if(c == ',' || c == "\n"){ 93 buffered[length] = 0; 94 float currentF = atof(buffered); 95 current->features[featureNum] = currentF; 96 featureNum += 1; 97 length = 0; 98 } 99 else if (length < 50){ 100 buffered[length] = c; 101 length += 1; 102 } 103 else{ 104 dataset->valid = INVALID; 105 fclose(fp); 106 return dataset; 107 } 108 109 } 110 111 fclose(fp); 112 return dataset; 113 } 114 115 116 int main(int argc, char ** argv){ 117 Inputs input = inputParse(argc,argv); 118 if (input.k == INVALID){ 119 printf("Usage: knn {training_dataset.csv} {prediction_samples.csv} {k}\n"); 120 return -1; 121 } 122 123 printInputs(input); 124 Dataset* trainingData = readCSV(input.trainingDatasetPath, TRUE); 125 126 if(trainingData->valid == INVALID){ 127 free(trainingData); 128 printf("Invalid training dataset"); 129 return -1; 130 } 131 132 free(trainingData); 133 }