machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

standardization.ipynb (3224B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "code",
      5    "execution_count": 9,
      6    "metadata": {},
      7    "outputs": [],
      8    "source": [
      9     "import pandas as pd\n",
     10     "from pathlib import Path \n",
     11     "\n",
     12     "df = pd.read_csv(Path('../datasets/housing/housing.csv'))"
     13    ]
     14   },
     15   {
     16    "cell_type": "code",
     17    "execution_count": 27,
     18    "metadata": {},
     19    "outputs": [
     20     {
     21      "name": "stdout",
     22      "output_type": "stream",
     23      "text": [
     24       "       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
     25       "0      -1.327803  1.052523            0.982119    -0.804800       -0.970301   \n",
     26       "1      -1.322812  1.043159           -0.607004     2.045841        1.348243   \n",
     27       "2      -1.332794  1.038478            1.856137    -0.535733       -0.825541   \n",
     28       "3      -1.337785  1.038478            1.856137    -0.624199       -0.718750   \n",
     29       "4      -1.337785  1.038478            1.856137    -0.462393       -0.611959   \n",
     30       "...          ...       ...                 ...          ...             ...   \n",
     31       "20635  -0.758808  1.801603           -0.289180    -0.444974       -0.388886   \n",
     32       "20636  -0.818702  1.806285           -0.845373    -0.888682       -0.920466   \n",
     33       "20637  -0.823693  1.778194           -0.924829    -0.174991       -0.125468   \n",
     34       "20638  -0.873605  1.778194           -0.845373    -0.355591       -0.305826   \n",
     35       "20639  -0.833676  1.750104           -1.004285     0.068407        0.185411   \n",
     36       "\n",
     37       "       population  households  median_income  median_house_value  \n",
     38       "0       -0.974405   -0.977009       2.344709            2.129580  \n",
     39       "1        0.861418    1.669921       2.332181            1.314124  \n",
     40       "2       -0.820757   -0.843616       1.782656            1.258663  \n",
     41       "3       -0.766010   -0.733764       0.932945            1.165072  \n",
     42       "4       -0.759828   -0.629142      -0.012881            1.172871  \n",
     43       "...           ...         ...            ...                 ...  \n",
     44       "20635   -0.512579   -0.443438      -1.216099           -1.115777  \n",
     45       "20636   -0.944382   -1.008396      -0.691576           -1.124443  \n",
     46       "20637   -0.369528   -0.174037      -1.142566           -0.992722  \n",
     47       "20638   -0.604415   -0.393743      -1.054557           -1.058583  \n",
     48       "20639   -0.033976    0.079670      -0.780111           -1.017853  \n",
     49       "\n",
     50       "[20640 rows x 9 columns]\n"
     51      ]
     52     }
     53    ],
     54    "source": [
     55     "# Get number columns\n",
     56     "df = df.select_dtypes(include=['number'])\n",
     57     "\n",
     58     "for i in df:\n",
     59     "    mean = df[i].mean()\n",
     60     "    std = df[i].std()\n",
     61     "    df[i] = (df[i] - mean) / std\n",
     62     "\n",
     63     "print(df)"
     64    ]
     65   }
     66  ],
     67  "metadata": {
     68   "kernelspec": {
     69    "display_name": "notebook",
     70    "language": "python",
     71    "name": "notebook"
     72   },
     73   "language_info": {
     74    "codemirror_mode": {
     75     "name": "ipython",
     76     "version": 3
     77    },
     78    "file_extension": ".py",
     79    "mimetype": "text/x-python",
     80    "name": "python",
     81    "nbconvert_exporter": "python",
     82    "pygments_lexer": "ipython3",
     83    "version": "3.11.2"
     84   }
     85  },
     86  "nbformat": 4,
     87  "nbformat_minor": 2
     88 }