machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

MinMaxScaling.ipynb (7680B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "code",
      5    "execution_count": 3,
      6    "metadata": {},
      7    "outputs": [],
      8    "source": [
      9     "import pandas as pd\n",
     10     "import pathlib as path\n",
     11     "# Load in saved csv data\n",
     12     "df = pd.read_csv(path.Path('../datasets/housing/housing.csv'))"
     13    ]
     14   },
     15   {
     16    "cell_type": "code",
     17    "execution_count": 4,
     18    "metadata": {},
     19    "outputs": [],
     20    "source": [
     21     "# Remove string column\n",
     22     "df.drop(columns='ocean_proximity', axis=1, inplace=True)"
     23    ]
     24   },
     25   {
     26    "cell_type": "code",
     27    "execution_count": 14,
     28    "metadata": {},
     29    "outputs": [
     30     {
     31      "data": {
     32       "text/html": [
     33        "<div>\n",
     34        "<style scoped>\n",
     35        "    .dataframe tbody tr th:only-of-type {\n",
     36        "        vertical-align: middle;\n",
     37        "    }\n",
     38        "\n",
     39        "    .dataframe tbody tr th {\n",
     40        "        vertical-align: top;\n",
     41        "    }\n",
     42        "\n",
     43        "    .dataframe thead th {\n",
     44        "        text-align: right;\n",
     45        "    }\n",
     46        "</style>\n",
     47        "<table border=\"1\" class=\"dataframe\">\n",
     48        "  <thead>\n",
     49        "    <tr style=\"text-align: right;\">\n",
     50        "      <th></th>\n",
     51        "      <th>longitude</th>\n",
     52        "      <th>latitude</th>\n",
     53        "      <th>housing_median_age</th>\n",
     54        "      <th>total_rooms</th>\n",
     55        "      <th>total_bedrooms</th>\n",
     56        "      <th>population</th>\n",
     57        "      <th>households</th>\n",
     58        "      <th>median_income</th>\n",
     59        "      <th>median_house_value</th>\n",
     60        "    </tr>\n",
     61        "  </thead>\n",
     62        "  <tbody>\n",
     63        "    <tr>\n",
     64        "      <th>count</th>\n",
     65        "      <td>20640.000000</td>\n",
     66        "      <td>20640.000000</td>\n",
     67        "      <td>20640.000000</td>\n",
     68        "      <td>20640.000000</td>\n",
     69        "      <td>20433.000000</td>\n",
     70        "      <td>20640.000000</td>\n",
     71        "      <td>20640.000000</td>\n",
     72        "      <td>20640.000000</td>\n",
     73        "      <td>20640.000000</td>\n",
     74        "    </tr>\n",
     75        "    <tr>\n",
     76        "      <th>mean</th>\n",
     77        "      <td>0.476125</td>\n",
     78        "      <td>0.328572</td>\n",
     79        "      <td>0.541951</td>\n",
     80        "      <td>0.066986</td>\n",
     81        "      <td>0.083313</td>\n",
     82        "      <td>0.039869</td>\n",
     83        "      <td>0.081983</td>\n",
     84        "      <td>0.232464</td>\n",
     85        "      <td>0.395579</td>\n",
     86        "    </tr>\n",
     87        "    <tr>\n",
     88        "      <th>std</th>\n",
     89        "      <td>0.199555</td>\n",
     90        "      <td>0.226988</td>\n",
     91        "      <td>0.246776</td>\n",
     92        "      <td>0.055486</td>\n",
     93        "      <td>0.065392</td>\n",
     94        "      <td>0.031740</td>\n",
     95        "      <td>0.062873</td>\n",
     96        "      <td>0.131020</td>\n",
     97        "      <td>0.237928</td>\n",
     98        "    </tr>\n",
     99        "    <tr>\n",
    100        "      <th>min</th>\n",
    101        "      <td>0.000000</td>\n",
    102        "      <td>0.000000</td>\n",
    103        "      <td>0.000000</td>\n",
    104        "      <td>0.000000</td>\n",
    105        "      <td>0.000000</td>\n",
    106        "      <td>0.000000</td>\n",
    107        "      <td>0.000000</td>\n",
    108        "      <td>0.000000</td>\n",
    109        "      <td>0.000000</td>\n",
    110        "    </tr>\n",
    111        "    <tr>\n",
    112        "      <th>25%</th>\n",
    113        "      <td>0.253984</td>\n",
    114        "      <td>0.147715</td>\n",
    115        "      <td>0.333333</td>\n",
    116        "      <td>0.036771</td>\n",
    117        "      <td>0.045779</td>\n",
    118        "      <td>0.021974</td>\n",
    119        "      <td>0.045881</td>\n",
    120        "      <td>0.142308</td>\n",
    121        "      <td>0.215671</td>\n",
    122        "    </tr>\n",
    123        "    <tr>\n",
    124        "      <th>50%</th>\n",
    125        "      <td>0.583665</td>\n",
    126        "      <td>0.182784</td>\n",
    127        "      <td>0.549020</td>\n",
    128        "      <td>0.054046</td>\n",
    129        "      <td>0.067349</td>\n",
    130        "      <td>0.032596</td>\n",
    131        "      <td>0.067094</td>\n",
    132        "      <td>0.209301</td>\n",
    133        "      <td>0.339588</td>\n",
    134        "    </tr>\n",
    135        "    <tr>\n",
    136        "      <th>75%</th>\n",
    137        "      <td>0.631474</td>\n",
    138        "      <td>0.549416</td>\n",
    139        "      <td>0.705882</td>\n",
    140        "      <td>0.080014</td>\n",
    141        "      <td>0.100248</td>\n",
    142        "      <td>0.048264</td>\n",
    143        "      <td>0.099326</td>\n",
    144        "      <td>0.292641</td>\n",
    145        "      <td>0.514897</td>\n",
    146        "    </tr>\n",
    147        "    <tr>\n",
    148        "      <th>max</th>\n",
    149        "      <td>1.000000</td>\n",
    150        "      <td>1.000000</td>\n",
    151        "      <td>1.000000</td>\n",
    152        "      <td>1.000000</td>\n",
    153        "      <td>1.000000</td>\n",
    154        "      <td>1.000000</td>\n",
    155        "      <td>1.000000</td>\n",
    156        "      <td>1.000000</td>\n",
    157        "      <td>1.000000</td>\n",
    158        "    </tr>\n",
    159        "  </tbody>\n",
    160        "</table>\n",
    161        "</div>"
    162       ],
    163       "text/plain": [
    164        "          longitude      latitude  housing_median_age   total_rooms  \\\n",
    165        "count  20640.000000  20640.000000        20640.000000  20640.000000   \n",
    166        "mean       0.476125      0.328572            0.541951      0.066986   \n",
    167        "std        0.199555      0.226988            0.246776      0.055486   \n",
    168        "min        0.000000      0.000000            0.000000      0.000000   \n",
    169        "25%        0.253984      0.147715            0.333333      0.036771   \n",
    170        "50%        0.583665      0.182784            0.549020      0.054046   \n",
    171        "75%        0.631474      0.549416            0.705882      0.080014   \n",
    172        "max        1.000000      1.000000            1.000000      1.000000   \n",
    173        "\n",
    174        "       total_bedrooms    population    households  median_income  \\\n",
    175        "count    20433.000000  20640.000000  20640.000000   20640.000000   \n",
    176        "mean         0.083313      0.039869      0.081983       0.232464   \n",
    177        "std          0.065392      0.031740      0.062873       0.131020   \n",
    178        "min          0.000000      0.000000      0.000000       0.000000   \n",
    179        "25%          0.045779      0.021974      0.045881       0.142308   \n",
    180        "50%          0.067349      0.032596      0.067094       0.209301   \n",
    181        "75%          0.100248      0.048264      0.099326       0.292641   \n",
    182        "max          1.000000      1.000000      1.000000       1.000000   \n",
    183        "\n",
    184        "       median_house_value  \n",
    185        "count        20640.000000  \n",
    186        "mean             0.395579  \n",
    187        "std              0.237928  \n",
    188        "min              0.000000  \n",
    189        "25%              0.215671  \n",
    190        "50%              0.339588  \n",
    191        "75%              0.514897  \n",
    192        "max              1.000000  "
    193       ]
    194      },
    195      "execution_count": 14,
    196      "metadata": {},
    197      "output_type": "execute_result"
    198     }
    199    ],
    200    "source": [
    201     "# For each column (assuming they are numbers) iterate through them and set all\n",
    202     "# features to be equal to the (current - min) / diff. \n",
    203     "\n",
    204     "for i in df:\n",
    205     "    min = df[i].min()\n",
    206     "    diff = df[i].max() - min\n",
    207     "    df[i] = (df[i] - min) / diff \n",
    208     "\n",
    209     "df.describe()"
    210    ]
    211   }
    212  ],
    213  "metadata": {
    214   "kernelspec": {
    215    "display_name": "notebook",
    216    "language": "python",
    217    "name": "notebook"
    218   },
    219   "language_info": {
    220    "codemirror_mode": {
    221     "name": "ipython",
    222     "version": 3
    223    },
    224    "file_extension": ".py",
    225    "mimetype": "text/x-python",
    226    "name": "python",
    227    "nbconvert_exporter": "python",
    228    "pygments_lexer": "ipython3",
    229    "version": "3.11.2"
    230   }
    231  },
    232  "nbformat": 4,
    233  "nbformat_minor": 2
    234 }