standardization.ipynb (3224B)
1 { 2 "cells": [ 3 { 4 "cell_type": "code", 5 "execution_count": 9, 6 "metadata": {}, 7 "outputs": [], 8 "source": [ 9 "import pandas as pd\n", 10 "from pathlib import Path \n", 11 "\n", 12 "df = pd.read_csv(Path('../datasets/housing/housing.csv'))" 13 ] 14 }, 15 { 16 "cell_type": "code", 17 "execution_count": 27, 18 "metadata": {}, 19 "outputs": [ 20 { 21 "name": "stdout", 22 "output_type": "stream", 23 "text": [ 24 " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", 25 "0 -1.327803 1.052523 0.982119 -0.804800 -0.970301 \n", 26 "1 -1.322812 1.043159 -0.607004 2.045841 1.348243 \n", 27 "2 -1.332794 1.038478 1.856137 -0.535733 -0.825541 \n", 28 "3 -1.337785 1.038478 1.856137 -0.624199 -0.718750 \n", 29 "4 -1.337785 1.038478 1.856137 -0.462393 -0.611959 \n", 30 "... ... ... ... ... ... \n", 31 "20635 -0.758808 1.801603 -0.289180 -0.444974 -0.388886 \n", 32 "20636 -0.818702 1.806285 -0.845373 -0.888682 -0.920466 \n", 33 "20637 -0.823693 1.778194 -0.924829 -0.174991 -0.125468 \n", 34 "20638 -0.873605 1.778194 -0.845373 -0.355591 -0.305826 \n", 35 "20639 -0.833676 1.750104 -1.004285 0.068407 0.185411 \n", 36 "\n", 37 " population households median_income median_house_value \n", 38 "0 -0.974405 -0.977009 2.344709 2.129580 \n", 39 "1 0.861418 1.669921 2.332181 1.314124 \n", 40 "2 -0.820757 -0.843616 1.782656 1.258663 \n", 41 "3 -0.766010 -0.733764 0.932945 1.165072 \n", 42 "4 -0.759828 -0.629142 -0.012881 1.172871 \n", 43 "... ... ... ... ... \n", 44 "20635 -0.512579 -0.443438 -1.216099 -1.115777 \n", 45 "20636 -0.944382 -1.008396 -0.691576 -1.124443 \n", 46 "20637 -0.369528 -0.174037 -1.142566 -0.992722 \n", 47 "20638 -0.604415 -0.393743 -1.054557 -1.058583 \n", 48 "20639 -0.033976 0.079670 -0.780111 -1.017853 \n", 49 "\n", 50 "[20640 rows x 9 columns]\n" 51 ] 52 } 53 ], 54 "source": [ 55 "# Get number columns\n", 56 "df = df.select_dtypes(include=['number'])\n", 57 "\n", 58 "for i in df:\n", 59 " mean = df[i].mean()\n", 60 " std = df[i].std()\n", 61 " df[i] = (df[i] - mean) / std\n", 62 "\n", 63 "print(df)" 64 ] 65 } 66 ], 67 "metadata": { 68 "kernelspec": { 69 "display_name": "notebook", 70 "language": "python", 71 "name": "notebook" 72 }, 73 "language_info": { 74 "codemirror_mode": { 75 "name": "ipython", 76 "version": 3 77 }, 78 "file_extension": ".py", 79 "mimetype": "text/x-python", 80 "name": "python", 81 "nbconvert_exporter": "python", 82 "pygments_lexer": "ipython3", 83 "version": "3.11.2" 84 } 85 }, 86 "nbformat": 4, 87 "nbformat_minor": 2 88 }