Completed implementation of person's correlation coefficient calculation. - machinelearning - Unnamed repository; edit this file 'description' to name the repository.

commit 9bb96b95ae2ca9a32dd5afbd536aeefae357cc08
parent e026d0e3698bb995921fa61158c09c7316c97bd1
Author: Andrew <andrewlaack1@gmail.com>
Date:   Fri, 17 May 2024 20:43:42 -0500

Completed implementation of person's correlation coefficient calculation.

Diffstat:
M correlationCoefficient/CorrelationCoefficient.ipynb  | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----

1 file changed, 93 insertions(+), 6 deletions(-)
diff --git a/correlationCoefficient/CorrelationCoefficient.ipynb b/correlationCoefficient/CorrelationCoefficient.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 64,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 65,
    "metadata": {},
    "outputs": [
     {
@@ -181,7 +181,7 @@
        "max         500001.000000  "
       ]
      },
-     "execution_count": 13,
+     "execution_count": 65,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -194,10 +194,97 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 66,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6880752079585545\n"
+     ]
+    }
+   ],
+   "source": [
+    "import math\n",
+    "\n",
+    "# Find means of dataset\n",
+    "\n",
+    "meanIncome = 0\n",
+    "meanPrice = 0\n",
+    "\n",
+    "for x in df['median_income']:\n",
+    "    meanIncome += x\n",
+    "for i in df['median_house_value']:\n",
+    "    meanPrice += i\n",
+    "\n",
+    "meanPrice = meanPrice / len(df['median_house_value'])\n",
+    "meanIncome = meanIncome / len(df['median_income'])\n",
+    "\n",
+    "# Subtract means from each value\n",
+    "\n",
+    "incomes = df['median_income'].values\n",
+    "prices = df['median_house_value'].values\n",
+    "\n",
+    "count = 0\n",
+    "for i in incomes:\n",
+    "    incomes[count] = i - meanIncome\n",
+    "    count += 1\n",
+    "\n",
+    "count = 0\n",
+    "for i in prices:\n",
+    "    prices[count] = i - meanPrice\n",
+    "    count += 1\n",
+    "\n",
+    "# Multiply deviations for each pair to find sum\n",
+    "\n",
+    "count = 0\n",
+    "sumOfDeviations = 0\n",
+    "while count < len(incomes):\n",
+    "    sumOfDeviations += (prices[count] * incomes[count])\n",
+    "    count += 1\n",
+    "\n",
+    "# Sum of squared deviations\n",
+    "\n",
+    "squaredIncomes = 0\n",
+    "squaredPrices = 0\n",
+    "for i in incomes:\n",
+    "    squaredIncomes += i * i\n",
+    "\n",
+    "for p in prices:\n",
+    "    squaredPrices += p * p\n",
+    "\n",
+    "\n",
+    "# Sqrt of squared deviations\n",
+    "\n",
+    "squaredCombined = squaredIncomes * squaredPrices\n",
+    "denominator = math.sqrt(squaredCombined)\n",
+    "\n",
+    "# Final calculation\n",
+    "\n",
+    "r = sumOfDeviations / denominator\n",
+    "print(r)\n",
+    "\n",
+    "\n",
+    "# To summarize:\n",
+    "\n",
+    "# For the numerator find the sum of the standard deviation for each ordered pair \n",
+    "# multiplied together. To do this find the mean of x and y values then subtract the actual\n",
+    "# values from the mean and multiply the corresponding x and y values together. You then\n",
+    "# add these all up. This is the numerator.\n",
+    "\n",
+    "# To find the denominator we use the mean we found earlier and square the standard deviation\n",
+    "# for each x value and sum these together. We then do the same thing for all y values and\n",
+    "# multiply the x and y squared deviations together. The last step is taking the squareroot.\n",
+    "# We now have the denominator.\n",
+    "\n",
+    "# The final step is the divide the numerator by the denominator to find the r value which is\n",
+    "# Pearsons's correlation coefficient generally defined using r, R, or the character rho (looks\n",
+    "# like a lowercase p).\n",
+    "\n",
+    "# I verified this code works correctly by checking the true correlation between median house\n",
+    "# value and median income. "
+   ]
   }
  ],
  "metadata": {

	machinelearning Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs