machinelearning

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 54d99d4eb66d157bea7c1e93e1713d1623d5c61d
parent 38163490de5aeae453dacdfca14964676c0968c0
Author: Andrew <andrewlaack1@gmail.com>
Date:   Sat, 25 May 2024 16:15:58 -0500

updated

Diffstat:
Mstandardization/standardization.ipynb | 84+++++++++++++++++++++++++++++++++++++++++--------------------------------------
1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/standardization/standardization.ipynb b/standardization/standardization.ipynb @@ -2,7 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pathlib import Path \n", + "\n", + "df = pd.read_csv(Path('../datasets/housing/housing.csv'))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -10,53 +22,45 @@ "output_type": "stream", "text": [ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", - "0 -122.23 37.88 41.0 880.0 129.0 \n", - "1 -122.22 37.86 21.0 7099.0 1106.0 \n", - "2 -122.24 37.85 52.0 1467.0 190.0 \n", - "3 -122.25 37.85 52.0 1274.0 235.0 \n", - "4 -122.25 37.85 52.0 1627.0 280.0 \n", + "0 -1.327803 1.052523 0.982119 -0.804800 -0.970301 \n", + "1 -1.322812 1.043159 -0.607004 2.045841 1.348243 \n", + "2 -1.332794 1.038478 1.856137 -0.535733 -0.825541 \n", + "3 -1.337785 1.038478 1.856137 -0.624199 -0.718750 \n", + "4 -1.337785 1.038478 1.856137 -0.462393 -0.611959 \n", "... ... ... ... ... ... \n", - "20635 -121.09 39.48 25.0 1665.0 374.0 \n", - "20636 -121.21 39.49 18.0 697.0 150.0 \n", - "20637 -121.22 39.43 17.0 2254.0 485.0 \n", - "20638 -121.32 39.43 18.0 1860.0 409.0 \n", - "20639 -121.24 39.37 16.0 2785.0 616.0 \n", + "20635 -0.758808 1.801603 -0.289180 -0.444974 -0.388886 \n", + "20636 -0.818702 1.806285 -0.845373 -0.888682 -0.920466 \n", + "20637 -0.823693 1.778194 -0.924829 -0.174991 -0.125468 \n", + "20638 -0.873605 1.778194 -0.845373 -0.355591 -0.305826 \n", + "20639 -0.833676 1.750104 -1.004285 0.068407 0.185411 \n", "\n", - " population households median_income median_house_value \\\n", - "0 322.0 126.0 8.3252 452600.0 \n", - "1 2401.0 1138.0 8.3014 358500.0 \n", - "2 496.0 177.0 7.2574 352100.0 \n", - "3 558.0 219.0 5.6431 341300.0 \n", - "4 565.0 259.0 3.8462 342200.0 \n", - "... ... ... ... ... \n", - "20635 845.0 330.0 1.5603 78100.0 \n", - "20636 356.0 114.0 2.5568 77100.0 \n", - "20637 1007.0 433.0 1.7000 92300.0 \n", - "20638 741.0 349.0 1.8672 84700.0 \n", - "20639 1387.0 530.0 2.3886 89400.0 \n", + " population households median_income median_house_value \n", + "0 -0.974405 -0.977009 2.344709 2.129580 \n", + "1 0.861418 1.669921 2.332181 1.314124 \n", + "2 -0.820757 -0.843616 1.782656 1.258663 \n", + "3 -0.766010 -0.733764 0.932945 1.165072 \n", + "4 -0.759828 -0.629142 -0.012881 1.172871 \n", + "... ... ... ... ... \n", + "20635 -0.512579 -0.443438 -1.216099 -1.115777 \n", + "20636 -0.944382 -1.008396 -0.691576 -1.124443 \n", + "20637 -0.369528 -0.174037 -1.142566 -0.992722 \n", + "20638 -0.604415 -0.393743 -1.054557 -1.058583 \n", + "20639 -0.033976 0.079670 -0.780111 -1.017853 \n", "\n", - " ocean_proximity \n", - "0 NEAR BAY \n", - "1 NEAR BAY \n", - "2 NEAR BAY \n", - "3 NEAR BAY \n", - "4 NEAR BAY \n", - "... ... \n", - "20635 INLAND \n", - "20636 INLAND \n", - "20637 INLAND \n", - "20638 INLAND \n", - "20639 INLAND \n", - "\n", - "[20640 rows x 10 columns]\n" + "[20640 rows x 9 columns]\n" ] } ], "source": [ - "import pandas as pd\n", - "from pathlib import Path \n", + "# Get number columns\n", + "df = df.select_dtypes(include=['number'])\n", "\n", - "df = pd.read_csv(Path('../datasets/housing/housing.csv'))" + "for i in df:\n", + " mean = df[i].mean()\n", + " std = df[i].std()\n", + " df[i] = (df[i] - mean) / std\n", + "\n", + "print(df)" ] } ],