commit a5db4d6e38614a40c3b903da9420538e5e5d5c59
parent ea014ed4cd52caa174a440059a49ff79396dfc2f
Author: Andrew <andrewlaack1@gmail.com>
Date: Mon, 10 Jun 2024 11:28:04 -0500
Updated parameters for random forest and I used PCA projection to decrease the total number of dimensions needed to 23 from 784.
Diffstat:
1 file changed, 522 insertions(+), 0 deletions(-)
diff --git a/mnist/MNISTCustomClassicalML.ipynb b/mnist/MNISTCustomClassicalML.ipynb
@@ -0,0 +1,522 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(70000, 784)\n",
+ "(70000, 23)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.decomposition import PCA\n",
+ "\n",
+ "mnist = fetch_openml(\"mnist_784\", as_frame=False)\n",
+ "X,y = mnist.data,mnist.target\n",
+ "\n",
+ "# Reduce dimensions to 23 (optimal found using rndsearchcv)\n",
+ "\n",
+ "pca = PCA(n_components=23)\n",
+ "print(X.shape)\n",
+ "\n",
+ "X = pca.fit_transform(X)\n",
+ "print(X.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<style>#sk-container-id-11 {\n",
+ " /* Definition of color scheme common for light and dark mode */\n",
+ " --sklearn-color-text: black;\n",
+ " --sklearn-color-line: gray;\n",
+ " /* Definition of color scheme for unfitted estimators */\n",
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
+ " /* Definition of color scheme for fitted estimators */\n",
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
+ "\n",
+ " /* Specific color for light theme */\n",
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+ " --sklearn-color-icon: #696969;\n",
+ "\n",
+ " @media (prefers-color-scheme: dark) {\n",
+ " /* Redefinition of color scheme for dark theme */\n",
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+ " --sklearn-color-icon: #878787;\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 {\n",
+ " color: var(--sklearn-color-text);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 pre {\n",
+ " padding: 0;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 input.sk-hidden--visually {\n",
+ " border: 0;\n",
+ " clip: rect(1px 1px 1px 1px);\n",
+ " clip: rect(1px, 1px, 1px, 1px);\n",
+ " height: 1px;\n",
+ " margin: -1px;\n",
+ " overflow: hidden;\n",
+ " padding: 0;\n",
+ " position: absolute;\n",
+ " width: 1px;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-dashed-wrapped {\n",
+ " border: 1px dashed var(--sklearn-color-line);\n",
+ " margin: 0 0.4em 0.5em 0.4em;\n",
+ " box-sizing: border-box;\n",
+ " padding-bottom: 0.4em;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-container {\n",
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+ " so we also need the `!important` here to be able to override the\n",
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+ " display: inline-block !important;\n",
+ " position: relative;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-text-repr-fallback {\n",
+ " display: none;\n",
+ "}\n",
+ "\n",
+ "div.sk-parallel-item,\n",
+ "div.sk-serial,\n",
+ "div.sk-item {\n",
+ " /* draw centered vertical line to link estimators */\n",
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+ " background-size: 2px 100%;\n",
+ " background-repeat: no-repeat;\n",
+ " background-position: center center;\n",
+ "}\n",
+ "\n",
+ "/* Parallel-specific style estimator block */\n",
+ "\n",
+ "#sk-container-id-11 div.sk-parallel-item::after {\n",
+ " content: \"\";\n",
+ " width: 100%;\n",
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+ " flex-grow: 1;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-parallel {\n",
+ " display: flex;\n",
+ " align-items: stretch;\n",
+ " justify-content: center;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " position: relative;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-parallel-item {\n",
+ " display: flex;\n",
+ " flex-direction: column;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-parallel-item:first-child::after {\n",
+ " align-self: flex-end;\n",
+ " width: 50%;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-parallel-item:last-child::after {\n",
+ " align-self: flex-start;\n",
+ " width: 50%;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-parallel-item:only-child::after {\n",
+ " width: 0;\n",
+ "}\n",
+ "\n",
+ "/* Serial-specific style estimator block */\n",
+ "\n",
+ "#sk-container-id-11 div.sk-serial {\n",
+ " display: flex;\n",
+ " flex-direction: column;\n",
+ " align-items: center;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " padding-right: 1em;\n",
+ " padding-left: 1em;\n",
+ "}\n",
+ "\n",
+ "\n",
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+ "clickable and can be expanded/collapsed.\n",
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+ "*/\n",
+ "\n",
+ "/* Pipeline and ColumnTransformer style (default) */\n",
+ "\n",
+ "#sk-container-id-11 div.sk-toggleable {\n",
+ " /* Default theme specific background. It is overwritten whether we have a\n",
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
+ " background-color: var(--sklearn-color-background);\n",
+ "}\n",
+ "\n",
+ "/* Toggleable label */\n",
+ "#sk-container-id-11 label.sk-toggleable__label {\n",
+ " cursor: pointer;\n",
+ " display: block;\n",
+ " width: 100%;\n",
+ " margin-bottom: 0;\n",
+ " padding: 0.5em;\n",
+ " box-sizing: border-box;\n",
+ " text-align: center;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 label.sk-toggleable__label-arrow:before {\n",
+ " /* Arrow on the left of the label */\n",
+ " content: \"▸\";\n",
+ " float: left;\n",
+ " margin-right: 0.25em;\n",
+ " color: var(--sklearn-color-icon);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 label.sk-toggleable__label-arrow:hover:before {\n",
+ " color: var(--sklearn-color-text);\n",
+ "}\n",
+ "\n",
+ "/* Toggleable content - dropdown */\n",
+ "\n",
+ "#sk-container-id-11 div.sk-toggleable__content {\n",
+ " max-height: 0;\n",
+ " max-width: 0;\n",
+ " overflow: hidden;\n",
+ " text-align: left;\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-toggleable__content.fitted {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-toggleable__content pre {\n",
+ " margin: 0.2em;\n",
+ " border-radius: 0.25em;\n",
+ " color: var(--sklearn-color-text);\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-toggleable__content.fitted pre {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+ " /* Expand drop-down */\n",
+ " max-height: 200px;\n",
+ " max-width: 100%;\n",
+ " overflow: auto;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+ " content: \"▾\";\n",
+ "}\n",
+ "\n",
+ "/* Pipeline/ColumnTransformer-specific style */\n",
+ "\n",
+ "#sk-container-id-11 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " color: var(--sklearn-color-text);\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Estimator-specific style */\n",
+ "\n",
+ "/* Colorize estimator box */\n",
+ "#sk-container-id-11 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-label label.sk-toggleable__label,\n",
+ "#sk-container-id-11 div.sk-label label {\n",
+ " /* The background is the default theme color */\n",
+ " color: var(--sklearn-color-text-on-default-background);\n",
+ "}\n",
+ "\n",
+ "/* On hover, darken the color of the background */\n",
+ "#sk-container-id-11 div.sk-label:hover label.sk-toggleable__label {\n",
+ " color: var(--sklearn-color-text);\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Label box, darken color on hover, fitted */\n",
+ "#sk-container-id-11 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+ " color: var(--sklearn-color-text);\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Estimator label */\n",
+ "\n",
+ "#sk-container-id-11 div.sk-label label {\n",
+ " font-family: monospace;\n",
+ " font-weight: bold;\n",
+ " display: inline-block;\n",
+ " line-height: 1.2em;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-label-container {\n",
+ " text-align: center;\n",
+ "}\n",
+ "\n",
+ "/* Estimator-specific */\n",
+ "#sk-container-id-11 div.sk-estimator {\n",
+ " font-family: monospace;\n",
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
+ " border-radius: 0.25em;\n",
+ " box-sizing: border-box;\n",
+ " margin-bottom: 0.5em;\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-estimator.fitted {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
+ "}\n",
+ "\n",
+ "/* on hover */\n",
+ "#sk-container-id-11 div.sk-estimator:hover {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 div.sk-estimator.fitted:hover {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+ "\n",
+ "/* Common style for \"i\" and \"?\" */\n",
+ "\n",
+ ".sk-estimator-doc-link,\n",
+ "a:link.sk-estimator-doc-link,\n",
+ "a:visited.sk-estimator-doc-link {\n",
+ " float: right;\n",
+ " font-size: smaller;\n",
+ " line-height: 1em;\n",
+ " font-family: monospace;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " border-radius: 1em;\n",
+ " height: 1em;\n",
+ " width: 1em;\n",
+ " text-decoration: none !important;\n",
+ " margin-left: 1ex;\n",
+ " /* unfitted */\n",
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+ " color: var(--sklearn-color-unfitted-level-1);\n",
+ "}\n",
+ "\n",
+ ".sk-estimator-doc-link.fitted,\n",
+ "a:link.sk-estimator-doc-link.fitted,\n",
+ "a:visited.sk-estimator-doc-link.fitted {\n",
+ " /* fitted */\n",
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+ " color: var(--sklearn-color-fitted-level-1);\n",
+ "}\n",
+ "\n",
+ "/* On hover */\n",
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+ ".sk-estimator-doc-link:hover,\n",
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+ ".sk-estimator-doc-link:hover {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
+ " color: var(--sklearn-color-background);\n",
+ " text-decoration: none;\n",
+ "}\n",
+ "\n",
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+ ".sk-estimator-doc-link.fitted:hover,\n",
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+ ".sk-estimator-doc-link.fitted:hover {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
+ " color: var(--sklearn-color-background);\n",
+ " text-decoration: none;\n",
+ "}\n",
+ "\n",
+ "/* Span, style for the box shown on hovering the info icon */\n",
+ ".sk-estimator-doc-link span {\n",
+ " display: none;\n",
+ " z-index: 9999;\n",
+ " position: relative;\n",
+ " font-weight: normal;\n",
+ " right: .2ex;\n",
+ " padding: .5ex;\n",
+ " margin: .5ex;\n",
+ " width: min-content;\n",
+ " min-width: 20ex;\n",
+ " max-width: 50ex;\n",
+ " color: var(--sklearn-color-text);\n",
+ " box-shadow: 2pt 2pt 4pt #999;\n",
+ " /* unfitted */\n",
+ " background: var(--sklearn-color-unfitted-level-0);\n",
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+ "}\n",
+ "\n",
+ ".sk-estimator-doc-link.fitted span {\n",
+ " /* fitted */\n",
+ " background: var(--sklearn-color-fitted-level-0);\n",
+ " border: var(--sklearn-color-fitted-level-3);\n",
+ "}\n",
+ "\n",
+ ".sk-estimator-doc-link:hover span {\n",
+ " display: block;\n",
+ "}\n",
+ "\n",
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+ "\n",
+ "#sk-container-id-11 a.estimator_doc_link {\n",
+ " float: right;\n",
+ " font-size: 1rem;\n",
+ " line-height: 1em;\n",
+ " font-family: monospace;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " border-radius: 1rem;\n",
+ " height: 1rem;\n",
+ " width: 1rem;\n",
+ " text-decoration: none;\n",
+ " /* unfitted */\n",
+ " color: var(--sklearn-color-unfitted-level-1);\n",
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 a.estimator_doc_link.fitted {\n",
+ " /* fitted */\n",
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+ " color: var(--sklearn-color-fitted-level-1);\n",
+ "}\n",
+ "\n",
+ "/* On hover */\n",
+ "#sk-container-id-11 a.estimator_doc_link:hover {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
+ " color: var(--sklearn-color-background);\n",
+ " text-decoration: none;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-11 a.estimator_doc_link.fitted:hover {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
+ "}\n",
+ "</style><div id=\"sk-container-id-11\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestRegressor(max_depth=30, max_features=10, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" checked><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> RandomForestRegressor<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.ensemble.RandomForestRegressor.html\">?<span>Documentation for RandomForestRegressor</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>RandomForestRegressor(max_depth=30, max_features=10, n_jobs=-1)</pre></div> </div></div></div></div>"
+ ],
+ "text/plain": [
+ "RandomForestRegressor(max_depth=30, max_features=10, n_jobs=-1)"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "\n",
+ "rnd_regressor = RandomForestRegressor(\n",
+ " n_estimators=100,\n",
+ " max_depth=30,\n",
+ " max_features=10,\n",
+ " n_jobs=-1\n",
+ ")\n",
+ "\n",
+ "rnd_regressor.fit(X_train,y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8842829028107758"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rnd_regressor.score(X_test, y_test)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "notebook",
+ "language": "python",
+ "name": "notebook"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}