commit d2ab03772cfccbac849b1e2ad8e4f7353be94d2d
parent da2f30b9ab22f5277a82998c1c0247d058a2fcb1
Author: Andrew Laack <andrew@laack.co>
Date: Thu, 13 Nov 2025 16:10:09 -0600
Reading CSVs for ML
Diffstat:
6 files changed, 94 insertions(+), 21 deletions(-)
diff --git a/ml/count-columns.c b/ml/old/count-columns.c
diff --git a/ml/count-columns.h b/ml/old/count-columns.h
diff --git a/ml/dynamic-array.c b/ml/old/dynamic-array.c
diff --git a/ml/dynamic-array.h b/ml/old/dynamic-array.h
diff --git a/ml/old/read-csv.c b/ml/old/read-csv.c
@@ -0,0 +1,38 @@
+// this is wrong; please do this by counting the characters then allocating.
+#include <stdio.h>
+#include "count-columns.h"
+#include "dynamic-array.h"
+#include <stdlib.h>
+
+int main(int argc, char ** argv){
+
+ char* filename = argv[1];
+ int columns = countColumns(filename);
+ printf("columns: %d\n", columns);
+
+ int size = 0;
+ char data [size];
+ FILE* fp = fopen(filename, "r");
+ DynamicArray* first = makeArr(data,size);
+ DynamicArray* second = makeArr(data,size);
+
+ char c;
+ while ((c = getc(fp)) != ','){
+ arrAppend(first, c);
+ }
+
+ while ((c = getc(fp)) != '\n'){
+ arrAppend(second, c);
+ }
+
+ printArr(first);
+ printf("\n");
+ printArr(second);
+ printf("\n");
+
+ // todo: cleanup allocations
+
+ fclose(fp);
+
+ return 0;
+}
diff --git a/ml/read-csv.c b/ml/read-csv.c
@@ -1,35 +1,70 @@
#include <stdio.h>
-#include "count-columns.h"
-#include "dynamic-array.h"
#include <stdlib.h>
-int main(int argc, char ** argv){
-
- char* filename = argv[1];
- int columns = countColumns(filename);
- printf("columns: %d\n", columns);
+#define TRUE 1
+#define FALSE 0
- int size = 0;
- char data [size];
- FILE* fp = fopen(filename, "r");
- DynamicArray* first = makeArr(data,size);
- DynamicArray* second = makeArr(data,size);
+int getRows(char* fileName){
char c;
- while ((c = getc(fp)) != ','){
- arrAppend(first, c);
+ int count = 0;
+ FILE* fp = fopen(fileName, "r");
+ while((c = getc(fp)) != EOF){
+ if(c == '\n'){
+ count += 1;
+ }
}
+ return count + 1;
- while ((c = getc(fp)) != '\n'){
- arrAppend(second, c);
+}
+
+// there's more too it than just this
+// not sure how \n characters work.
+// not sure how to handle cases where escaping for \" charcter.
+
+int getColumns(char* fileName){
+ char c;
+ int count = 0;
+ int escaped = FALSE;
+
+ FILE* fp = fopen(fileName, "r");
+ c = getc(fp);
+ while(c != EOF && c != '\n'){
+ if(c == ',' && escaped == FALSE){
+ count += 1;
+ }
+ else if (c == '"'){
+ if (escaped == TRUE){
+ escaped = FALSE;
+ }
+ else{
+ escaped = TRUE;
+ }
+ }
+ c = getc(fp);
}
- printArr(first);
- printf("\n");
- printArr(second);
- printf("\n");
+ if(escaped == TRUE){
+ return -1;
+ }
+ return count + 1;
+}
+
+
+int main(int argc, char** argv){
+
+ char* fileName = argv[1];
+ FILE* fp = fopen(fileName, "r");
+
+ char** allRecords;
+ int rows = getRows(fileName);
+ int columns = getColumns(fileName);
+
+ if (columns == -1){
+ printf("Non-matching quotations in first row.");
+ }
- // todo: cleanup allocations
+ printf("Rows: %d, Columns: %d\n", rows, columns);
fclose(fp);