commit 5e94d00f154d263ad235bccf885cfb1b61d207f5
parent 3196bb39b068759b5e879e3dfb28eada6978242a
Author: Andrew <andrewlaack1@gmail.com>
Date: Fri, 24 May 2024 19:06:48 -0500
Completed some stuff
Diffstat:
3 files changed, 343 insertions(+), 280 deletions(-)
diff --git a/linearRegression/LinearRegressionHousingV2.ipynb b/linearRegression/LinearRegressionHousingV2.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 288,
+ "execution_count": 481,
"metadata": {},
"outputs": [],
"source": [
@@ -36,7 +36,7 @@
},
{
"cell_type": "code",
- "execution_count": 289,
+ "execution_count": 482,
"metadata": {},
"outputs": [
{
@@ -78,7 +78,7 @@
},
{
"cell_type": "code",
- "execution_count": 290,
+ "execution_count": 483,
"metadata": {},
"outputs": [
{
@@ -101,7 +101,7 @@
},
{
"cell_type": "code",
- "execution_count": 291,
+ "execution_count": 484,
"metadata": {},
"outputs": [],
"source": [
@@ -117,7 +117,7 @@
},
{
"cell_type": "code",
- "execution_count": 292,
+ "execution_count": 485,
"metadata": {},
"outputs": [
{
@@ -155,7 +155,7 @@
},
{
"cell_type": "code",
- "execution_count": 293,
+ "execution_count": 486,
"metadata": {},
"outputs": [
{
@@ -192,7 +192,7 @@
" <Axes: title={'center': 'median_house_value'}>]], dtype=object)"
]
},
- "execution_count": 293,
+ "execution_count": 486,
"metadata": {},
"output_type": "execute_result"
},
@@ -235,7 +235,7 @@
},
{
"cell_type": "code",
- "execution_count": 294,
+ "execution_count": 487,
"metadata": {},
"outputs": [],
"source": [
@@ -247,7 +247,7 @@
},
{
"cell_type": "code",
- "execution_count": 295,
+ "execution_count": 488,
"metadata": {},
"outputs": [],
"source": [
@@ -260,7 +260,7 @@
},
{
"cell_type": "code",
- "execution_count": 296,
+ "execution_count": 489,
"metadata": {},
"outputs": [
{
@@ -290,7 +290,7 @@
},
{
"cell_type": "code",
- "execution_count": 297,
+ "execution_count": 490,
"metadata": {},
"outputs": [
{
@@ -308,7 +308,7 @@
"Name: median_house_value, dtype: float64"
]
},
- "execution_count": 297,
+ "execution_count": 490,
"metadata": {},
"output_type": "execute_result"
}
@@ -328,7 +328,7 @@
},
{
"cell_type": "code",
- "execution_count": 298,
+ "execution_count": 491,
"metadata": {},
"outputs": [
{
@@ -353,7 +353,7 @@
" dtype=object)"
]
},
- "execution_count": 298,
+ "execution_count": 491,
"metadata": {},
"output_type": "execute_result"
},
@@ -394,7 +394,7 @@
},
{
"cell_type": "code",
- "execution_count": 299,
+ "execution_count": 492,
"metadata": {},
"outputs": [
{
@@ -403,7 +403,7 @@
"<Axes: xlabel='median_income', ylabel='median_house_value'>"
]
},
- "execution_count": 299,
+ "execution_count": 492,
"metadata": {},
"output_type": "execute_result"
},
@@ -425,7 +425,7 @@
},
{
"cell_type": "code",
- "execution_count": 300,
+ "execution_count": 493,
"metadata": {},
"outputs": [
{
@@ -446,7 +446,7 @@
"Name: median_house_value, dtype: float64"
]
},
- "execution_count": 300,
+ "execution_count": 493,
"metadata": {},
"output_type": "execute_result"
}
@@ -462,7 +462,7 @@
},
{
"cell_type": "code",
- "execution_count": 301,
+ "execution_count": 494,
"metadata": {},
"outputs": [
{
@@ -488,271 +488,29 @@
},
{
"cell_type": "code",
- "execution_count": 310,
+ "execution_count": 495,
"metadata": {},
"outputs": [
{
"data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>longitude</th>\n",
- " <th>latitude</th>\n",
- " <th>housing_median_age</th>\n",
- " <th>total_rooms</th>\n",
- " <th>total_bedrooms</th>\n",
- " <th>population</th>\n",
- " <th>households</th>\n",
- " <th>median_income</th>\n",
- " <th>median_house_value</th>\n",
- " <th>rooms_per_house</th>\n",
- " <th>bedroom_ratio</th>\n",
- " <th>people_per_house</th>\n",
- " <th>ocean_dist</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>5419</th>\n",
- " <td>-118.26</td>\n",
- " <td>34.16</td>\n",
- " <td>19.0</td>\n",
- " <td>2919.0</td>\n",
- " <td>857.0</td>\n",
- " <td>1866.0</td>\n",
- " <td>811.0</td>\n",
- " <td>3.1733</td>\n",
- " <td>206300.0</td>\n",
- " <td>3.599260</td>\n",
- " <td>0.293594</td>\n",
- " <td>2.300863</td>\n",
- " <td>240104.670351</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3598</th>\n",
- " <td>-118.34</td>\n",
- " <td>33.86</td>\n",
- " <td>35.0</td>\n",
- " <td>1936.0</td>\n",
- " <td>343.0</td>\n",
- " <td>1008.0</td>\n",
- " <td>346.0</td>\n",
- " <td>5.4791</td>\n",
- " <td>285900.0</td>\n",
- " <td>5.595376</td>\n",
- " <td>0.177169</td>\n",
- " <td>2.913295</td>\n",
- " <td>240104.670351</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13269</th>\n",
- " <td>-118.22</td>\n",
- " <td>33.90</td>\n",
- " <td>38.0</td>\n",
- " <td>796.0</td>\n",
- " <td>159.0</td>\n",
- " <td>679.0</td>\n",
- " <td>167.0</td>\n",
- " <td>3.6607</td>\n",
- " <td>110400.0</td>\n",
- " <td>4.766467</td>\n",
- " <td>0.199749</td>\n",
- " <td>4.065868</td>\n",
- " <td>240104.670351</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>16983</th>\n",
- " <td>-118.15</td>\n",
- " <td>34.19</td>\n",
- " <td>47.0</td>\n",
- " <td>1717.0</td>\n",
- " <td>314.0</td>\n",
- " <td>868.0</td>\n",
- " <td>295.0</td>\n",
- " <td>3.6094</td>\n",
- " <td>160700.0</td>\n",
- " <td>5.820339</td>\n",
- " <td>0.182877</td>\n",
- " <td>2.942373</td>\n",
- " <td>240104.670351</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7364</th>\n",
- " <td>-117.14</td>\n",
- " <td>32.93</td>\n",
- " <td>14.0</td>\n",
- " <td>1946.0</td>\n",
- " <td>463.0</td>\n",
- " <td>1205.0</td>\n",
- " <td>390.0</td>\n",
- " <td>4.2109</td>\n",
- " <td>171200.0</td>\n",
- " <td>4.989744</td>\n",
- " <td>0.237924</td>\n",
- " <td>3.089744</td>\n",
- " <td>240104.670351</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1153</th>\n",
- " <td>-117.42</td>\n",
- " <td>33.94</td>\n",
- " <td>26.0</td>\n",
- " <td>2420.0</td>\n",
- " <td>532.0</td>\n",
- " <td>1383.0</td>\n",
- " <td>469.0</td>\n",
- " <td>3.5403</td>\n",
- " <td>113500.0</td>\n",
- " <td>5.159915</td>\n",
- " <td>0.219835</td>\n",
- " <td>2.948827</td>\n",
- " <td>125032.908322</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14075</th>\n",
- " <td>-121.46</td>\n",
- " <td>38.60</td>\n",
- " <td>29.0</td>\n",
- " <td>1978.0</td>\n",
- " <td>538.0</td>\n",
- " <td>823.0</td>\n",
- " <td>490.0</td>\n",
- " <td>1.9688</td>\n",
- " <td>135600.0</td>\n",
- " <td>4.036735</td>\n",
- " <td>0.271992</td>\n",
- " <td>1.679592</td>\n",
- " <td>125032.908322</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7277</th>\n",
- " <td>-117.09</td>\n",
- " <td>32.65</td>\n",
- " <td>20.0</td>\n",
- " <td>1445.0</td>\n",
- " <td>323.0</td>\n",
- " <td>573.0</td>\n",
- " <td>334.0</td>\n",
- " <td>2.6190</td>\n",
- " <td>145800.0</td>\n",
- " <td>4.326347</td>\n",
- " <td>0.223529</td>\n",
- " <td>1.715569</td>\n",
- " <td>250327.236143</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9621</th>\n",
- " <td>-122.44</td>\n",
- " <td>37.79</td>\n",
- " <td>52.0</td>\n",
- " <td>1817.0</td>\n",
- " <td>535.0</td>\n",
- " <td>800.0</td>\n",
- " <td>487.0</td>\n",
- " <td>3.9750</td>\n",
- " <td>500001.0</td>\n",
- " <td>3.731006</td>\n",
- " <td>0.294441</td>\n",
- " <td>1.642710</td>\n",
- " <td>256986.420765</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9665</th>\n",
- " <td>-121.33</td>\n",
- " <td>38.28</td>\n",
- " <td>14.0</td>\n",
- " <td>980.0</td>\n",
- " <td>171.0</td>\n",
- " <td>659.0</td>\n",
- " <td>183.0</td>\n",
- " <td>4.4306</td>\n",
- " <td>170100.0</td>\n",
- " <td>5.355191</td>\n",
- " <td>0.174490</td>\n",
- " <td>3.601093</td>\n",
- " <td>125032.908322</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>16512 rows × 13 columns</p>\n",
- "</div>"
- ],
"text/plain": [
- " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
- "5419 -118.26 34.16 19.0 2919.0 857.0 \n",
- "3598 -118.34 33.86 35.0 1936.0 343.0 \n",
- "13269 -118.22 33.90 38.0 796.0 159.0 \n",
- "16983 -118.15 34.19 47.0 1717.0 314.0 \n",
- "7364 -117.14 32.93 14.0 1946.0 463.0 \n",
- "... ... ... ... ... ... \n",
- "1153 -117.42 33.94 26.0 2420.0 532.0 \n",
- "14075 -121.46 38.60 29.0 1978.0 538.0 \n",
- "7277 -117.09 32.65 20.0 1445.0 323.0 \n",
- "9621 -122.44 37.79 52.0 1817.0 535.0 \n",
- "9665 -121.33 38.28 14.0 980.0 171.0 \n",
- "\n",
- " population households median_income median_house_value \\\n",
- "5419 1866.0 811.0 3.1733 206300.0 \n",
- "3598 1008.0 346.0 5.4791 285900.0 \n",
- "13269 679.0 167.0 3.6607 110400.0 \n",
- "16983 868.0 295.0 3.6094 160700.0 \n",
- "7364 1205.0 390.0 4.2109 171200.0 \n",
- "... ... ... ... ... \n",
- "1153 1383.0 469.0 3.5403 113500.0 \n",
- "14075 823.0 490.0 1.9688 135600.0 \n",
- "7277 573.0 334.0 2.6190 145800.0 \n",
- "9621 800.0 487.0 3.9750 500001.0 \n",
- "9665 659.0 183.0 4.4306 170100.0 \n",
- "\n",
- " rooms_per_house bedroom_ratio people_per_house ocean_dist \n",
- "5419 3.599260 0.293594 2.300863 240104.670351 \n",
- "3598 5.595376 0.177169 2.913295 240104.670351 \n",
- "13269 4.766467 0.199749 4.065868 240104.670351 \n",
- "16983 5.820339 0.182877 2.942373 240104.670351 \n",
- "7364 4.989744 0.237924 3.089744 240104.670351 \n",
- "... ... ... ... ... \n",
- "1153 5.159915 0.219835 2.948827 125032.908322 \n",
- "14075 4.036735 0.271992 1.679592 125032.908322 \n",
- "7277 4.326347 0.223529 1.715569 250327.236143 \n",
- "9621 3.731006 0.294441 1.642710 256986.420765 \n",
- "9665 5.355191 0.174490 3.601093 125032.908322 \n",
- "\n",
- "[16512 rows x 13 columns]"
+ "median_house_value 1.000000\n",
+ "median_income 0.689222\n",
+ "ocean_dist 0.484102\n",
+ "rooms_per_house 0.148076\n",
+ "total_rooms 0.128957\n",
+ "housing_median_age 0.101160\n",
+ "households 0.059776\n",
+ "total_bedrooms 0.043272\n",
+ "people_per_house -0.021860\n",
+ "population -0.027846\n",
+ "longitude -0.046072\n",
+ "latitude -0.143096\n",
+ "bedroom_ratio -0.258569\n",
+ "Name: median_house_value, dtype: float64"
]
},
- "execution_count": 310,
+ "execution_count": 495,
"metadata": {},
"output_type": "execute_result"
}
@@ -767,6 +525,77 @@
"\n",
"corr['median_house_value'].sort_values(ascending=False)\n"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 496,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Reload Data\n",
+ "\n",
+ "housing = stratTrain.drop(\"median_house_value\", axis=1)\n",
+ "housing_labels = stratTrain[\"median_house_value\"].copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 497,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.impute import SimpleImputer\n",
+ "\n",
+ "imputer = SimpleImputer(strategy='median')\n",
+ "\n",
+ "# All sample attributes that have a type of np.number. In essence, this removes the proximity \n",
+ "# string attribute\n",
+ "housing_num = housing.select_dtypes(include=[np.number])\n",
+ "imputer.fit(housing_num)\n",
+ "\n",
+ "# Imputer all null values of the dataset.\n",
+ "X = imputer.transform(housing_num)\n",
+ "\n",
+ "# Take the numpy array and put it back into a dataframe\n",
+ "housing_tr = pd.DataFrame(X, columns=housing_num.columns, index=housing_num.index)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 513,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 0\n",
+ "0 (0, 0)\\t1.0\n",
+ "1 (0, 0)\\t1.0\n",
+ "2 (0, 0)\\t1.0\n",
+ "3 (0, 0)\\t1.0\n",
+ "4 (0, 0)\\t1.0\n",
+ "... ...\n",
+ "16507 (0, 1)\\t1.0\n",
+ "16508 (0, 1)\\t1.0\n",
+ "16509 (0, 4)\\t1.0\n",
+ "16510 (0, 3)\\t1.0\n",
+ "16511 (0, 1)\\t1.0\n",
+ "\n",
+ "[16512 rows x 1 columns]\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.preprocessing import OneHotEncoder\n",
+ "\n",
+ "# Use one hot encoding to map each option for the ocean proximity feature\n",
+ "# to its own boolean column\n",
+ "\n",
+ "housing_cat = housing[[\"ocean_proximity\"]]\n",
+ "encoder = OneHotEncoder()\n",
+ "housing_1hot = encoder.fit_transform(housing_cat)\n"
+ ]
}
],
"metadata": {
diff --git a/minMaxScaling/MinMaxScaling.ipynb b/minMaxScaling/MinMaxScaling.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import pathlib as path\n",
+ "# Load in saved csv data\n",
+ "df = pd.read_csv(path.Path('../datasets/housing/housing.csv'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Remove string column\n",
+ "df.drop(columns='ocean_proximity', axis=1, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>longitude</th>\n",
+ " <th>latitude</th>\n",
+ " <th>housing_median_age</th>\n",
+ " <th>total_rooms</th>\n",
+ " <th>total_bedrooms</th>\n",
+ " <th>population</th>\n",
+ " <th>households</th>\n",
+ " <th>median_income</th>\n",
+ " <th>median_house_value</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>count</th>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20433.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " <td>20640.000000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>mean</th>\n",
+ " <td>0.476125</td>\n",
+ " <td>0.328572</td>\n",
+ " <td>0.541951</td>\n",
+ " <td>0.066986</td>\n",
+ " <td>0.083313</td>\n",
+ " <td>0.039869</td>\n",
+ " <td>0.081983</td>\n",
+ " <td>0.232464</td>\n",
+ " <td>0.395579</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>std</th>\n",
+ " <td>0.199555</td>\n",
+ " <td>0.226988</td>\n",
+ " <td>0.246776</td>\n",
+ " <td>0.055486</td>\n",
+ " <td>0.065392</td>\n",
+ " <td>0.031740</td>\n",
+ " <td>0.062873</td>\n",
+ " <td>0.131020</td>\n",
+ " <td>0.237928</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>min</th>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " <td>0.000000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>25%</th>\n",
+ " <td>0.253984</td>\n",
+ " <td>0.147715</td>\n",
+ " <td>0.333333</td>\n",
+ " <td>0.036771</td>\n",
+ " <td>0.045779</td>\n",
+ " <td>0.021974</td>\n",
+ " <td>0.045881</td>\n",
+ " <td>0.142308</td>\n",
+ " <td>0.215671</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>50%</th>\n",
+ " <td>0.583665</td>\n",
+ " <td>0.182784</td>\n",
+ " <td>0.549020</td>\n",
+ " <td>0.054046</td>\n",
+ " <td>0.067349</td>\n",
+ " <td>0.032596</td>\n",
+ " <td>0.067094</td>\n",
+ " <td>0.209301</td>\n",
+ " <td>0.339588</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>75%</th>\n",
+ " <td>0.631474</td>\n",
+ " <td>0.549416</td>\n",
+ " <td>0.705882</td>\n",
+ " <td>0.080014</td>\n",
+ " <td>0.100248</td>\n",
+ " <td>0.048264</td>\n",
+ " <td>0.099326</td>\n",
+ " <td>0.292641</td>\n",
+ " <td>0.514897</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>max</th>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " <td>1.000000</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " longitude latitude housing_median_age total_rooms \\\n",
+ "count 20640.000000 20640.000000 20640.000000 20640.000000 \n",
+ "mean 0.476125 0.328572 0.541951 0.066986 \n",
+ "std 0.199555 0.226988 0.246776 0.055486 \n",
+ "min 0.000000 0.000000 0.000000 0.000000 \n",
+ "25% 0.253984 0.147715 0.333333 0.036771 \n",
+ "50% 0.583665 0.182784 0.549020 0.054046 \n",
+ "75% 0.631474 0.549416 0.705882 0.080014 \n",
+ "max 1.000000 1.000000 1.000000 1.000000 \n",
+ "\n",
+ " total_bedrooms population households median_income \\\n",
+ "count 20433.000000 20640.000000 20640.000000 20640.000000 \n",
+ "mean 0.083313 0.039869 0.081983 0.232464 \n",
+ "std 0.065392 0.031740 0.062873 0.131020 \n",
+ "min 0.000000 0.000000 0.000000 0.000000 \n",
+ "25% 0.045779 0.021974 0.045881 0.142308 \n",
+ "50% 0.067349 0.032596 0.067094 0.209301 \n",
+ "75% 0.100248 0.048264 0.099326 0.292641 \n",
+ "max 1.000000 1.000000 1.000000 1.000000 \n",
+ "\n",
+ " median_house_value \n",
+ "count 20640.000000 \n",
+ "mean 0.395579 \n",
+ "std 0.237928 \n",
+ "min 0.000000 \n",
+ "25% 0.215671 \n",
+ "50% 0.339588 \n",
+ "75% 0.514897 \n",
+ "max 1.000000 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# For each column (assuming they are numbers) iterate through them and set all\n",
+ "# features to be equal to the (current - min) / diff. \n",
+ "\n",
+ "for i in df:\n",
+ " min = df[i].min()\n",
+ " diff = df[i].max() - min\n",
+ " df[i] = (df[i] - min) / diff \n",
+ "\n",
+ "df.describe()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "notebook",
+ "language": "python",
+ "name": "notebook"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sigmoidFunction/Sigmoid.ipynb b/sigmoidFunction/Sigmoid.ipynb
@@ -2,16 +2,16 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[<matplotlib.lines.Line2D at 0x7fe4267b2b50>]"
+ "[<matplotlib.lines.Line2D at 0x7fe42672a510>]"
]
},
- "execution_count": 44,
+ "execution_count": 45,
"metadata": {},
"output_type": "execute_result"
},