machinelearning

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 38163490de5aeae453dacdfca14964676c0968c0
parent 5e94d00f154d263ad235bccf885cfb1b61d207f5
Author: Andrew <andrewlaack1@gmail.com>
Date:   Sat, 25 May 2024 16:15:41 -0500

implemented standardization algorithm for pandas data frame.

Diffstat:
MlinearRegression/LinearRegressionHousingV2.ipynb | 104++++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Astandardization/standardization.ipynb | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 144 insertions(+), 44 deletions(-)

diff --git a/linearRegression/LinearRegressionHousingV2.ipynb b/linearRegression/LinearRegressionHousingV2.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 481, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 482, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 483, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -101,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 484, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -117,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 485, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 486, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -192,7 +192,7 @@ " <Axes: title={'center': 'median_house_value'}>]], dtype=object)" ] }, - "execution_count": 486, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, @@ -235,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 487, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -247,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 488, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -260,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 489, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -290,7 +290,7 @@ }, { "cell_type": "code", - "execution_count": 490, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -308,7 +308,7 @@ "Name: median_house_value, dtype: float64" ] }, - "execution_count": 490, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -328,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 491, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -353,7 +353,7 @@ " dtype=object)" ] }, - "execution_count": 491, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, @@ -394,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 492, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -403,7 +403,7 @@ "<Axes: xlabel='median_income', ylabel='median_house_value'>" ] }, - "execution_count": 492, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, @@ -425,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 493, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -446,7 +446,7 @@ "Name: median_house_value, dtype: float64" ] }, - "execution_count": 493, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -462,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 494, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -488,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 495, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -510,7 +510,7 @@ "Name: median_house_value, dtype: float64" ] }, - "execution_count": 495, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -528,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 496, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -540,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": 497, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -562,39 +562,55 @@ }, { "cell_type": "code", - "execution_count": 513, + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "\n", + "# Use one hot encoding to map each option for the ocean proximity feature\n", + "# to its own boolean column\n", + "\n", + "housing_cat = housing[[\"ocean_proximity\"]]\n", + "encoder = OneHotEncoder()\n", + "housing_1hot = encoder.fit_transform(housing_cat)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " 0\n", - "0 (0, 0)\\t1.0\n", - "1 (0, 0)\\t1.0\n", - "2 (0, 0)\\t1.0\n", - "3 (0, 0)\\t1.0\n", - "4 (0, 0)\\t1.0\n", - "... ...\n", - "16507 (0, 1)\\t1.0\n", - "16508 (0, 1)\\t1.0\n", - "16509 (0, 4)\\t1.0\n", - "16510 (0, 3)\\t1.0\n", - "16511 (0, 1)\\t1.0\n", - "\n", - "[16512 rows x 1 columns]\n" + "[[ 0.23279352 -0.65568544 -0.29411765 ... -0.86955152 -0.73359645\n", + " -0.63126026]\n", + " [ 0.21659919 -0.7194474 0.33333333 ... -0.92962924 -0.88653182\n", + " -0.31322327]\n", + " [ 0.24089069 -0.7109458 0.45098039 ... -0.95266604 -0.94540372\n", + " -0.5640336 ]\n", + " ...\n", + " [ 0.46963563 -0.97662062 -0.25490196 ... -0.96008823 -0.89047854\n", + " -0.70771438]\n", + " [-0.61336032 0.11583422 1. ... -0.94419354 -0.84015787\n", + " -0.52068247]\n", + " [-0.38866397 0.21997875 -0.49019608 ... -0.95406645 -0.94014142\n", + " -0.45784196]]\n" ] } ], "source": [ - "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.preprocessing import MinMaxScaler\n", "\n", - "# Use one hot encoding to map each option for the ocean proximity feature\n", - "# to its own boolean column\n", + "# Built in min max computation ranging from -1 to 1. \n", + "# See minMaxScaling folder for custom implementation of this from 0 to 1.\n", "\n", - "housing_cat = housing[[\"ocean_proximity\"]]\n", - "encoder = OneHotEncoder()\n", - "housing_1hot = encoder.fit_transform(housing_cat)\n" + "min_max = MinMaxScaler(feature_range=(-1,1))\n", + "housing_num_min_max = min_max.fit_transform(housing_num)\n", + "\n", + "print(housing_num_min_max)" ] } ], diff --git a/standardization/standardization.ipynb b/standardization/standardization.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", + "0 -122.23 37.88 41.0 880.0 129.0 \n", + "1 -122.22 37.86 21.0 7099.0 1106.0 \n", + "2 -122.24 37.85 52.0 1467.0 190.0 \n", + "3 -122.25 37.85 52.0 1274.0 235.0 \n", + "4 -122.25 37.85 52.0 1627.0 280.0 \n", + "... ... ... ... ... ... \n", + "20635 -121.09 39.48 25.0 1665.0 374.0 \n", + "20636 -121.21 39.49 18.0 697.0 150.0 \n", + "20637 -121.22 39.43 17.0 2254.0 485.0 \n", + "20638 -121.32 39.43 18.0 1860.0 409.0 \n", + "20639 -121.24 39.37 16.0 2785.0 616.0 \n", + "\n", + " population households median_income median_house_value \\\n", + "0 322.0 126.0 8.3252 452600.0 \n", + "1 2401.0 1138.0 8.3014 358500.0 \n", + "2 496.0 177.0 7.2574 352100.0 \n", + "3 558.0 219.0 5.6431 341300.0 \n", + "4 565.0 259.0 3.8462 342200.0 \n", + "... ... ... ... ... \n", + "20635 845.0 330.0 1.5603 78100.0 \n", + "20636 356.0 114.0 2.5568 77100.0 \n", + "20637 1007.0 433.0 1.7000 92300.0 \n", + "20638 741.0 349.0 1.8672 84700.0 \n", + "20639 1387.0 530.0 2.3886 89400.0 \n", + "\n", + " ocean_proximity \n", + "0 NEAR BAY \n", + "1 NEAR BAY \n", + "2 NEAR BAY \n", + "3 NEAR BAY \n", + "4 NEAR BAY \n", + "... ... \n", + "20635 INLAND \n", + "20636 INLAND \n", + "20637 INLAND \n", + "20638 INLAND \n", + "20639 INLAND \n", + "\n", + "[20640 rows x 10 columns]\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from pathlib import Path \n", + "\n", + "df = pd.read_csv(Path('../datasets/housing/housing.csv'))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "notebook", + "language": "python", + "name": "notebook" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}