machinelearning

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 887aa0d6e96e0d9f6ee830611609d5368970f87e
parent bd2b8c1864d3591fb6b6d78d499ad1f89231fab3
Author: Andrew <andrewlaack1@gmail.com>
Date:   Wed, 26 Jun 2024 16:43:09 -0500

Fixed graph. Did some other stuff as well.

Diffstat:
MirisClassification/CustomDecisionTree.ipynb | 491+++----------------------------------------------------------------------------
Arecidivism/RecidivismLogReg.ipynb | 738+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arecidivism/RecidivismNN.ipynb | 0
3 files changed, 755 insertions(+), 474 deletions(-)

diff --git a/irisClassification/CustomDecisionTree.ipynb b/irisClassification/CustomDecisionTree.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2766, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 2767, + "execution_count": 114, "metadata": {}, "outputs": [ { @@ -32,7 +32,7 @@ "source": [ "from sklearn.model_selection import train_test_split\n", "\n", - "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y)\n", "\n", "print(X_train.shape)\n", "print(X_test.shape)" @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 2768, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ "# Pass in x values (one feature only) and classes\n", "def bestSplit(x_vals, classes):\n", "\n", - " sorted = np.sort(x_vals)\n", + " sorted = np.sort(x_vals) \n", " count = 1\n", "\n", " # Find pivot values (average between adjacent values)\n", @@ -125,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 2769, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 2770, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -207,7 +207,7 @@ "\n", " label = ''\n", " if(node.colName != None):\n", - " label = str(node.colName) + ' <= ' + str(node.splitOn)\n", + " label = str(node.colName) + ' <= ' + str(round(node.splitVal,2))\n", " else:\n", " label = 'Leaf'\n", "\n", @@ -224,473 +224,9 @@ }, { "cell_type": "code", - "execution_count": 2771, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", - "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", - " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", - "<!-- Generated by graphviz version 2.43.0 (0)\n", - " -->\n", - "<!-- Title: %3 Pages: 1 -->\n", - "<svg width=\"1339pt\" height=\"692pt\"\n", - " viewBox=\"0.00 0.00 1339.09 692.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 688)\">\n", - "<title>%3</title>\n", - "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-688 1335.09,-688 1335.09,4 -4,4\"/>\n", - "<!-- 140582739727952 -->\n", - "<g id=\"node1\" class=\"node\">\n", - "<title>140582739727952</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"550.55\" cy=\"-666\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"550.55\" y=\"-662.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582734780688 -->\n", - "<g id=\"node2\" class=\"node\">\n", - "<title>140582734780688</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"469.55\" cy=\"-594\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"469.55\" y=\"-590.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582739727952&#45;&gt;140582734780688 -->\n", - "<g id=\"edge1\" class=\"edge\">\n", - "<title>140582739727952&#45;&gt;140582734780688</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M530.94,-648.05C519.76,-638.39 505.64,-626.19 493.77,-615.93\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"495.83,-613.09 485.97,-609.19 491.25,-618.38 495.83,-613.09\"/>\n", - "</g>\n", - "<!-- 140582763367632 -->\n", - "<g id=\"node3\" class=\"node\">\n", - "<title>140582763367632</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"632.55\" cy=\"-594\" rx=\"114.28\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"632.55\" y=\"-590.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal width (cm) &lt;= 3</text>\n", - "</g>\n", - "<!-- 140582739727952&#45;&gt;140582763367632 -->\n", - "<g id=\"edge2\" class=\"edge\">\n", - "<title>140582739727952&#45;&gt;140582763367632</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M570.4,-648.05C580.77,-639.2 593.63,-628.22 604.96,-618.55\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"607.55,-620.94 612.88,-611.79 603,-615.62 607.55,-620.94\"/>\n", - "</g>\n", - "<!-- 140582618925200 -->\n", - "<g id=\"node4\" class=\"node\">\n", - "<title>140582618925200</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"487.55\" cy=\"-522\" rx=\"115.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"487.55\" y=\"-518.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) &lt;= 1</text>\n", - "</g>\n", - "<!-- 140582763367632&#45;&gt;140582618925200 -->\n", - "<g id=\"edge3\" class=\"edge\">\n", - "<title>140582763367632&#45;&gt;140582618925200</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M598.92,-576.76C578.49,-566.9 552.23,-554.22 530.33,-543.65\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"531.77,-540.46 521.25,-539.27 528.73,-546.77 531.77,-540.46\"/>\n", - "</g>\n", - "<!-- 140582756536400 -->\n", - "<g id=\"node17\" class=\"node\">\n", - "<title>140582756536400</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-522\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"777.55\" y=\"-518.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582763367632&#45;&gt;140582756536400 -->\n", - "<g id=\"edge16\" class=\"edge\">\n", - "<title>140582763367632&#45;&gt;140582756536400</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M666.18,-576.76C686.6,-566.9 712.87,-554.22 734.76,-543.65\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"736.36,-546.77 743.85,-539.27 733.32,-540.46 736.36,-546.77\"/>\n", - "</g>\n", - "<!-- 140582756510480 -->\n", - "<g id=\"node5\" class=\"node\">\n", - "<title>140582756510480</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"196.55\" cy=\"-450\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"196.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582618925200&#45;&gt;140582756510480 -->\n", - "<g id=\"edge4\" class=\"edge\">\n", - "<title>140582618925200&#45;&gt;140582756510480</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M427.6,-506.58C380.91,-495.35 315.98,-479.73 266.83,-467.91\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"267.45,-464.46 256.91,-465.52 265.82,-471.26 267.45,-464.46\"/>\n", - "</g>\n", - "<!-- 140582756532368 -->\n", - "<g id=\"node10\" class=\"node\">\n", - "<title>140582756532368</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"487.55\" cy=\"-450\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"487.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582618925200&#45;&gt;140582756532368 -->\n", - "<g id=\"edge9\" class=\"edge\">\n", - "<title>140582618925200&#45;&gt;140582756532368</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M487.55,-503.7C487.55,-495.98 487.55,-486.71 487.55,-478.11\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"491.05,-478.1 487.55,-468.1 484.05,-478.1 491.05,-478.1\"/>\n", - "</g>\n", - "<!-- 140582738061776 -->\n", - "<g id=\"node6\" class=\"node\">\n", - "<title>140582738061776</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"30.55\" cy=\"-378\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"30.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756510480&#45;&gt;140582738061776 -->\n", - "<g id=\"edge5\" class=\"edge\">\n", - "<title>140582756510480&#45;&gt;140582738061776</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M157.57,-432.92C132.41,-422.49 98.99,-408.56 69.55,-396 67.71,-395.22 65.83,-394.41 63.93,-393.59\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"65.01,-390.25 54.45,-389.49 62.23,-396.67 65.01,-390.25\"/>\n", - "</g>\n", - "<!-- 140582738061392 -->\n", - "<g id=\"node7\" class=\"node\">\n", - "<title>140582738061392</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"196.55\" cy=\"-378\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"196.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582756510480&#45;&gt;140582738061392 -->\n", - "<g id=\"edge6\" class=\"edge\">\n", - "<title>140582756510480&#45;&gt;140582738061392</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M196.55,-431.7C196.55,-423.98 196.55,-414.71 196.55,-406.11\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"200.05,-406.1 196.55,-396.1 193.05,-406.1 200.05,-406.1\"/>\n", - "</g>\n", - "<!-- 140582756540368 -->\n", - "<g id=\"node8\" class=\"node\">\n", - "<title>140582756540368</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"117.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"117.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582738061392&#45;&gt;140582756540368 -->\n", - "<g id=\"edge7\" class=\"edge\">\n", - "<title>140582738061392&#45;&gt;140582756540368</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M177.42,-360.05C166.59,-350.46 152.94,-338.36 141.41,-328.14\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"143.63,-325.44 133.83,-321.42 138.99,-330.68 143.63,-325.44\"/>\n", - "</g>\n", - "<!-- 140582756533008 -->\n", - "<g id=\"node9\" class=\"node\">\n", - "<title>140582756533008</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"196.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"196.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582738061392&#45;&gt;140582756533008 -->\n", - "<g id=\"edge8\" class=\"edge\">\n", - "<title>140582738061392&#45;&gt;140582756533008</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M196.55,-359.7C196.55,-351.98 196.55,-342.71 196.55,-334.11\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"200.05,-334.1 196.55,-324.1 193.05,-334.1 200.05,-334.1\"/>\n", - "</g>\n", - "<!-- 140582756541520 -->\n", - "<g id=\"node11\" class=\"node\">\n", - "<title>140582756541520</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"362.55\" cy=\"-378\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"362.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756532368&#45;&gt;140582756541520 -->\n", - "<g id=\"edge10\" class=\"edge\">\n", - "<title>140582756532368&#45;&gt;140582756541520</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M457.92,-432.41C438.35,-421.45 412.77,-407.13 393.03,-396.07\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"394.57,-392.92 384.14,-391.09 391.15,-399.03 394.57,-392.92\"/>\n", - "</g>\n", - "<!-- 140582756535120 -->\n", - "<g id=\"node12\" class=\"node\">\n", - "<title>140582756535120</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"526.55\" cy=\"-378\" rx=\"115.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"526.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) &lt;= 1</text>\n", - "</g>\n", - "<!-- 140582756532368&#45;&gt;140582756535120 -->\n", - "<g id=\"edge11\" class=\"edge\">\n", - "<title>140582756532368&#45;&gt;140582756535120</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M496.99,-432.05C501.47,-424.01 506.93,-414.2 511.94,-405.23\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"515.11,-406.71 516.92,-396.28 509,-403.31 515.11,-406.71\"/>\n", - "</g>\n", - "<!-- 140582756541648 -->\n", - "<g id=\"node13\" class=\"node\">\n", - "<title>140582756541648</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"385.55\" cy=\"-306\" rx=\"118.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"385.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) &lt;= 0</text>\n", - "</g>\n", - "<!-- 140582756535120&#45;&gt;140582756541648 -->\n", - "<g id=\"edge12\" class=\"edge\">\n", - "<title>140582756535120&#45;&gt;140582756541648</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M493.49,-360.59C473.78,-350.8 448.56,-338.29 427.46,-327.81\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"428.91,-324.62 418.4,-323.31 425.8,-330.89 428.91,-324.62\"/>\n", - "</g>\n", - "<!-- 140582756540752 -->\n", - "<g id=\"node16\" class=\"node\">\n", - "<title>140582756540752</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"552.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"552.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756535120&#45;&gt;140582756540752 -->\n", - "<g id=\"edge15\" class=\"edge\">\n", - "<title>140582756535120&#45;&gt;140582756540752</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M532.97,-359.7C535.9,-351.81 539.43,-342.3 542.69,-333.55\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"545.99,-334.7 546.19,-324.1 539.43,-332.26 545.99,-334.7\"/>\n", - "</g>\n", - "<!-- 140582756532624 -->\n", - "<g id=\"node14\" class=\"node\">\n", - "<title>140582756532624</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"345.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"345.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756541648&#45;&gt;140582756532624 -->\n", - "<g id=\"edge13\" class=\"edge\">\n", - "<title>140582756541648&#45;&gt;140582756532624</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M375.86,-288.05C371.08,-279.68 365.21,-269.4 359.91,-260.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"362.87,-258.25 354.87,-251.31 356.79,-261.73 362.87,-258.25\"/>\n", - "</g>\n", - "<!-- 140582756538448 -->\n", - "<g id=\"node15\" class=\"node\">\n", - "<title>140582756538448</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"424.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"424.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756541648&#45;&gt;140582756538448 -->\n", - "<g id=\"edge14\" class=\"edge\">\n", - "<title>140582756541648&#45;&gt;140582756538448</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M394.99,-288.05C399.65,-279.68 405.38,-269.4 410.54,-260.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"413.65,-261.75 415.46,-251.31 407.54,-258.34 413.65,-261.75\"/>\n", - "</g>\n", - "<!-- 140582756534160 -->\n", - "<g id=\"node18\" class=\"node\">\n", - "<title>140582756534160</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-450\" rx=\"115.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"777.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) &lt;= 1</text>\n", - "</g>\n", - "<!-- 140582756536400&#45;&gt;140582756534160 -->\n", - "<g id=\"edge17\" class=\"edge\">\n", - "<title>140582756536400&#45;&gt;140582756534160</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M777.55,-503.7C777.55,-495.98 777.55,-486.71 777.55,-478.11\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"781.05,-478.1 777.55,-468.1 774.05,-478.1 781.05,-478.1\"/>\n", - "</g>\n", - "<!-- 140582684160784 -->\n", - "<g id=\"node37\" class=\"node\">\n", - "<title>140582684160784</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"941.55\" cy=\"-450\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"941.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756536400&#45;&gt;140582684160784 -->\n", - "<g id=\"edge36\" class=\"edge\">\n", - "<title>140582756536400&#45;&gt;140582684160784</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M815.17,-504.94C843.13,-493.01 880.95,-476.87 907.91,-465.36\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"909.49,-468.49 917.31,-461.34 906.74,-462.05 909.49,-468.49\"/>\n", - "</g>\n", - "<!-- 140582756534288 -->\n", - "<g id=\"node19\" class=\"node\">\n", - "<title>140582756534288</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-378\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"777.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582756534160&#45;&gt;140582756534288 -->\n", - "<g id=\"edge18\" class=\"edge\">\n", - "<title>140582756534160&#45;&gt;140582756534288</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M777.55,-431.7C777.55,-423.98 777.55,-414.71 777.55,-406.11\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"781.05,-406.1 777.55,-396.1 774.05,-406.1 781.05,-406.1\"/>\n", - "</g>\n", - "<!-- 140582741948816 -->\n", - "<g id=\"node26\" class=\"node\">\n", - "<title>140582741948816</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1027.55\" cy=\"-378\" rx=\"114.28\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1027.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal width (cm) &lt;= 3</text>\n", - "</g>\n", - "<!-- 140582756534160&#45;&gt;140582741948816 -->\n", - "<g id=\"edge25\" class=\"edge\">\n", - "<title>140582756534160&#45;&gt;140582741948816</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M831.18,-433.98C870.23,-423.05 923.25,-408.2 964.36,-396.69\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"965.32,-400.06 974,-393.99 963.43,-393.32 965.32,-400.06\"/>\n", - "</g>\n", - "<!-- 140582756548560 -->\n", - "<g id=\"node20\" class=\"node\">\n", - "<title>140582756548560</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"631.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"631.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756534288&#45;&gt;140582756548560 -->\n", - "<g id=\"edge19\" class=\"edge\">\n", - "<title>140582756534288&#45;&gt;140582756548560</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M743.32,-360.59C719.32,-349.08 687.44,-333.8 663.87,-322.5\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"665.17,-319.24 654.64,-318.07 662.15,-325.55 665.17,-319.24\"/>\n", - "</g>\n", - "<!-- 140582756548176 -->\n", - "<g id=\"node21\" class=\"node\">\n", - "<title>140582756548176</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"798.55\" cy=\"-306\" rx=\"118.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"798.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) &lt;= 0</text>\n", - "</g>\n", - "<!-- 140582756534288&#45;&gt;140582756548176 -->\n", - "<g id=\"edge20\" class=\"edge\">\n", - "<title>140582756534288&#45;&gt;140582756548176</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M782.74,-359.7C785.08,-351.9 787.89,-342.51 790.5,-333.83\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"793.89,-334.69 793.42,-324.1 787.19,-332.68 793.89,-334.69\"/>\n", - "</g>\n", - "<!-- 140582756540688 -->\n", - "<g id=\"node22\" class=\"node\">\n", - "<title>140582756540688</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"650.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"650.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582756548176&#45;&gt;140582756540688 -->\n", - "<g id=\"edge21\" class=\"edge\">\n", - "<title>140582756548176&#45;&gt;140582756540688</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M764.22,-288.76C739.65,-277.14 706.78,-261.6 682.71,-250.21\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"684.17,-247.03 673.63,-245.92 681.18,-253.36 684.17,-247.03\"/>\n", - "</g>\n", - "<!-- 140582741936848 -->\n", - "<g id=\"node23\" class=\"node\">\n", - "<title>140582741936848</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"817.55\" cy=\"-234\" rx=\"118.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"817.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) &lt;= 0</text>\n", - "</g>\n", - "<!-- 140582756548176&#45;&gt;140582741936848 -->\n", - "<g id=\"edge22\" class=\"edge\">\n", - "<title>140582756548176&#45;&gt;140582741936848</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M803.24,-287.7C805.36,-279.9 807.91,-270.51 810.27,-261.83\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"813.66,-262.67 812.9,-252.1 806.91,-260.84 813.66,-262.67\"/>\n", - "</g>\n", - "<!-- 140582741935248 -->\n", - "<g id=\"node24\" class=\"node\">\n", - "<title>140582741935248</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-162\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"777.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582741936848&#45;&gt;140582741935248 -->\n", - "<g id=\"edge23\" class=\"edge\">\n", - "<title>140582741936848&#45;&gt;140582741935248</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M807.86,-216.05C803.08,-207.68 797.21,-197.4 791.91,-188.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"794.87,-186.25 786.87,-179.31 788.79,-189.73 794.87,-186.25\"/>\n", - "</g>\n", - "<!-- 140582741947920 -->\n", - "<g id=\"node25\" class=\"node\">\n", - "<title>140582741947920</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"856.55\" cy=\"-162\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"856.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582741936848&#45;&gt;140582741947920 -->\n", - "<g id=\"edge24\" class=\"edge\">\n", - "<title>140582741936848&#45;&gt;140582741947920</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M826.99,-216.05C831.65,-207.68 837.38,-197.4 842.54,-188.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"845.65,-189.75 847.46,-179.31 839.54,-186.34 845.65,-189.75\"/>\n", - "</g>\n", - "<!-- 140582741934288 -->\n", - "<g id=\"node27\" class=\"node\">\n", - "<title>140582741934288</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"986.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"986.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582741948816&#45;&gt;140582741934288 -->\n", - "<g id=\"edge26\" class=\"edge\">\n", - "<title>140582741948816&#45;&gt;140582741934288</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1017.62,-360.05C1012.72,-351.68 1006.7,-341.4 1001.27,-332.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1004.17,-330.17 996.1,-323.31 998.13,-333.7 1004.17,-330.17\"/>\n", - "</g>\n", - "<!-- 140582684159888 -->\n", - "<g id=\"node28\" class=\"node\">\n", - "<title>140582684159888</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1152.55\" cy=\"-306\" rx=\"117.78\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1152.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) &lt;= 2</text>\n", - "</g>\n", - "<!-- 140582741948816&#45;&gt;140582684159888 -->\n", - "<g id=\"edge27\" class=\"edge\">\n", - "<title>140582741948816&#45;&gt;140582684159888</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1056.85,-360.59C1073.97,-351 1095.76,-338.8 1114.23,-328.46\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1115.98,-331.49 1122.99,-323.55 1112.56,-325.38 1115.98,-331.49\"/>\n", - "</g>\n", - "<!-- 140582684150992 -->\n", - "<g id=\"node29\" class=\"node\">\n", - "<title>140582684150992</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1133.55\" cy=\"-234\" rx=\"118.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1133.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) &lt;= 0</text>\n", - "</g>\n", - "<!-- 140582684159888&#45;&gt;140582684150992 -->\n", - "<g id=\"edge28\" class=\"edge\">\n", - "<title>140582684159888&#45;&gt;140582684150992</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1147.85,-287.7C1145.73,-279.9 1143.19,-270.51 1140.83,-261.83\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1144.19,-260.84 1138.19,-252.1 1137.43,-262.67 1144.19,-260.84\"/>\n", - "</g>\n", - "<!-- 140582684150544 -->\n", - "<g id=\"node36\" class=\"node\">\n", - "<title>140582684150544</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1300.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1300.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582684159888&#45;&gt;140582684150544 -->\n", - "<g id=\"edge35\" class=\"edge\">\n", - "<title>140582684159888&#45;&gt;140582684150544</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1186.87,-288.76C1211.45,-277.14 1244.31,-261.6 1268.38,-250.21\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1269.92,-253.36 1277.46,-245.92 1266.93,-247.03 1269.92,-253.36\"/>\n", - "</g>\n", - "<!-- 140582684159632 -->\n", - "<g id=\"node30\" class=\"node\">\n", - "<title>140582684159632</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1067.55\" cy=\"-162\" rx=\"118.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1067.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) &lt;= 0</text>\n", - "</g>\n", - "<!-- 140582684150992&#45;&gt;140582684159632 -->\n", - "<g id=\"edge29\" class=\"edge\">\n", - "<title>140582684150992&#45;&gt;140582684159632</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1117.57,-216.05C1109.58,-207.58 1099.76,-197.17 1090.94,-187.82\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1093.24,-185.15 1083.84,-180.28 1088.15,-189.95 1093.24,-185.15\"/>\n", - "</g>\n", - "<!-- 140582684157584 -->\n", - "<g id=\"node35\" class=\"node\">\n", - "<title>140582684157584</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1234.55\" cy=\"-162\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1234.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582684150992&#45;&gt;140582684157584 -->\n", - "<g id=\"edge34\" class=\"edge\">\n", - "<title>140582684150992&#45;&gt;140582684157584</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1157.74,-216.23C1172.57,-205.95 1191.6,-192.77 1207.01,-182.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1209.38,-184.7 1215.6,-176.13 1205.39,-178.95 1209.38,-184.7\"/>\n", - "</g>\n", - "<!-- 140582684153744 -->\n", - "<g id=\"node31\" class=\"node\">\n", - "<title>140582684153744</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"985.55\" cy=\"-90\" rx=\"115.08\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"985.55\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) &lt;= 1</text>\n", - "</g>\n", - "<!-- 140582684159632&#45;&gt;140582684153744 -->\n", - "<g id=\"edge30\" class=\"edge\">\n", - "<title>140582684159632&#45;&gt;140582684153744</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1047.7,-144.05C1037.32,-135.2 1024.46,-124.22 1013.13,-114.55\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1015.09,-111.62 1005.21,-107.79 1010.55,-116.94 1015.09,-111.62\"/>\n", - "</g>\n", - "<!-- 140582684148880 -->\n", - "<g id=\"node34\" class=\"node\">\n", - "<title>140582684148880</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1149.55\" cy=\"-90\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1149.55\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582684159632&#45;&gt;140582684148880 -->\n", - "<g id=\"edge33\" class=\"edge\">\n", - "<title>140582684159632&#45;&gt;140582684148880</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M1087.4,-144.05C1098.72,-134.39 1113.01,-122.19 1125.02,-111.93\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1127.59,-114.35 1132.92,-105.19 1123.04,-109.03 1127.59,-114.35\"/>\n", - "</g>\n", - "<!-- 140582684162000 -->\n", - "<g id=\"node32\" class=\"node\">\n", - "<title>140582684162000</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"946.55\" cy=\"-18\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"946.55\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582684153744&#45;&gt;140582684162000 -->\n", - "<g id=\"edge31\" class=\"edge\">\n", - "<title>140582684153744&#45;&gt;140582684162000</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M976.11,-72.05C971.44,-63.68 965.72,-53.4 960.55,-44.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"963.56,-42.34 955.63,-35.31 957.44,-45.75 963.56,-42.34\"/>\n", - "</g>\n", - "<!-- 140582684158544 -->\n", - "<g id=\"node33\" class=\"node\">\n", - "<title>140582684158544</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"1025.55\" cy=\"-18\" rx=\"30.59\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"1025.55\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n", - "</g>\n", - "<!-- 140582684153744&#45;&gt;140582684158544 -->\n", - "<g id=\"edge32\" class=\"edge\">\n", - "<title>140582684153744&#45;&gt;140582684158544</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M995.23,-72.05C1000.02,-63.68 1005.89,-53.4 1011.18,-44.13\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"1014.31,-45.73 1016.23,-35.31 1008.23,-42.25 1014.31,-45.73\"/>\n", - "</g>\n", - "</g>\n", - "</svg>\n" - ], - "text/plain": [ - "<graphviz.graphs.Digraph at 0x7fdbf5ba52d0>" - ] - }, - "execution_count": 2771, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\n", "X = X_train\n", @@ -700,6 +236,13 @@ "graphTree(node)\n", "graph" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Evaluate!!!" + ] } ], "metadata": { diff --git a/recidivism/RecidivismLogReg.ipynb b/recidivism/RecidivismLogReg.ipynb @@ -0,0 +1,738 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 313, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Recidivism Reporting Year',\n", + " 'Fiscal Year Admitted',\n", + " 'Region Code',\n", + " 'Convicting Offense Classification',\n", + " 'Convicting Offense Type',\n", + " 'Convicting Offense Subtype',\n", + " 'Race - Ethnicity',\n", + " 'Sex',\n", + " 'Level of Supervision',\n", + " 'Recidivism - Prison Admission',\n", + " 'Recidivism Type',\n", + " 'New Conviction Offense Class',\n", + " 'New Conviction Offense Type',\n", + " 'New Conviction Offense SubType',\n", + " 'Days to Recidivism',\n", + " 'Part of Target Population']" + ] + }, + "execution_count": 313, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv('../datasets/recidivism/Recidivism.csv')\n", + "df.columns.to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 314, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Recidivism Reporting Year</th>\n", + " <th>Fiscal Year Admitted</th>\n", + " <th>Region Code</th>\n", + " <th>Convicting Offense Classification</th>\n", + " <th>Convicting Offense Type</th>\n", + " <th>Convicting Offense Subtype</th>\n", + " <th>Race - Ethnicity</th>\n", + " <th>Sex</th>\n", + " <th>Level of Supervision</th>\n", + " <th>Recidivism - Prison Admission</th>\n", + " <th>Recidivism Type</th>\n", + " <th>New Conviction Offense Class</th>\n", + " <th>New Conviction Offense Type</th>\n", + " <th>New Conviction Offense SubType</th>\n", + " <th>Days to Recidivism</th>\n", + " <th>Part of Target Population</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>2013</td>\n", + " <td>2010</td>\n", + " <td>NaN</td>\n", + " <td>D Felony</td>\n", + " <td>Public Order</td>\n", + " <td>OWI</td>\n", + " <td>White - Hispanic</td>\n", + " <td>Male</td>\n", + " <td>NaN</td>\n", + " <td>No</td>\n", + " <td>No Recidivism</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>No</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Recidivism Reporting Year Fiscal Year Admitted Region Code \\\n", + "0 2013 2010 NaN \n", + "\n", + " Convicting Offense Classification Convicting Offense Type \\\n", + "0 D Felony Public Order \n", + "\n", + " Convicting Offense Subtype Race - Ethnicity Sex Level of Supervision \\\n", + "0 OWI White - Hispanic Male NaN \n", + "\n", + " Recidivism - Prison Admission Recidivism Type New Conviction Offense Class \\\n", + "0 No No Recidivism NaN \n", + "\n", + " New Conviction Offense Type New Conviction Offense SubType \\\n", + "0 NaN NaN \n", + "\n", + " Days to Recidivism Part of Target Population \n", + "0 NaN No " + ] + }, + "execution_count": 314, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 315, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Recidivism - Prison Admission\n", + "False 85431\n", + "True 10926\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 315, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = df.drop(axis=1, columns=df.columns.to_list()[9:])\n", + "y = df['Recidivism - Prison Admission']\n", + "y = y == 'Yes'\n", + "\n", + "y.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 316, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Recidivism Reporting Year int64\n", + "Fiscal Year Admitted int64\n", + "Region Code object\n", + "Convicting Offense Classification object\n", + "Convicting Offense Type object\n", + "Convicting Offense Subtype object\n", + "Race - Ethnicity object\n", + "Sex object\n", + "Level of Supervision object\n", + "dtype: object" + ] + }, + "execution_count": 316, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 317, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "ohc = OneHotEncoder(sparse_output=False)\n", + "\n", + "def encode(X, name):\n", + " trans = ohc.fit_transform(X[[name]])\n", + " transformed_df = pd.DataFrame(trans, columns=ohc.get_feature_names_out([name]))\n", + " X = pd.concat([X,transformed_df], axis=1)\n", + " X = X.drop(columns=[name], axis=1)\n", + " return X" + ] + }, + { + "cell_type": "code", + "execution_count": 318, + "metadata": {}, + "outputs": [], + "source": [ + "X = encode(X,'Convicting Offense Classification')\n", + "X = encode(X,'Convicting Offense Type')\n", + "X = encode(X,'Convicting Offense Subtype')\n", + "X = encode(X,'Level of Supervision')\n", + "X = encode(X,'Sex')\n", + "X = encode(X,'Race - Ethnicity')\n", + "X = encode(X,'Region Code')" + ] + }, + { + "cell_type": "code", + "execution_count": 320, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "std = StandardScaler()\n", + "X = std.fit_transform(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 321, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(72267, 88) (72267,)\n", + "(12045, 88) (12045,)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n", + "X_test, X_val, y_test, y_val = train_test_split(X_test,y_test,random_state=10, test_size=.5)\n", + "\n", + "print(X_train.shape , y_train.shape)\n", + "print(X_test.shape , y_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 322, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-container-id-8 {\n", + " /* Definition of color scheme common for light and dark mode */\n", + " --sklearn-color-text: black;\n", + " --sklearn-color-line: gray;\n", + " /* Definition of color scheme for unfitted estimators */\n", + " --sklearn-color-unfitted-level-0: #fff5e6;\n", + " --sklearn-color-unfitted-level-1: #f6e4d2;\n", + " --sklearn-color-unfitted-level-2: #ffe0b3;\n", + " --sklearn-color-unfitted-level-3: chocolate;\n", + " /* Definition of color scheme for fitted estimators */\n", + " --sklearn-color-fitted-level-0: #f0f8ff;\n", + " --sklearn-color-fitted-level-1: #d4ebff;\n", + " --sklearn-color-fitted-level-2: #b3dbfd;\n", + " --sklearn-color-fitted-level-3: cornflowerblue;\n", + "\n", + " /* Specific color for light theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-icon: #696969;\n", + "\n", + " @media (prefers-color-scheme: dark) {\n", + " /* Redefinition of color scheme for dark theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-icon: #878787;\n", + " }\n", + "}\n", + "\n", + "#sk-container-id-8 {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "#sk-container-id-8 pre {\n", + " padding: 0;\n", + "}\n", + "\n", + "#sk-container-id-8 input.sk-hidden--visually {\n", + " border: 0;\n", + " clip: rect(1px 1px 1px 1px);\n", + " clip: rect(1px, 1px, 1px, 1px);\n", + " height: 1px;\n", + " margin: -1px;\n", + " overflow: hidden;\n", + " padding: 0;\n", + " position: absolute;\n", + " width: 1px;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-dashed-wrapped {\n", + " border: 1px dashed var(--sklearn-color-line);\n", + " margin: 0 0.4em 0.5em 0.4em;\n", + " box-sizing: border-box;\n", + " padding-bottom: 0.4em;\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-container {\n", + " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", + " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", + " so we also need the `!important` here to be able to override the\n", + " default hidden behavior on the sphinx rendered scikit-learn.org.\n", + " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", + " display: inline-block !important;\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-text-repr-fallback {\n", + " display: none;\n", + "}\n", + "\n", + "div.sk-parallel-item,\n", + "div.sk-serial,\n", + "div.sk-item {\n", + " /* draw centered vertical line to link estimators */\n", + " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", + " background-size: 2px 100%;\n", + " background-repeat: no-repeat;\n", + " background-position: center center;\n", + "}\n", + "\n", + "/* Parallel-specific style estimator block */\n", + "\n", + "#sk-container-id-8 div.sk-parallel-item::after {\n", + " content: \"\";\n", + " width: 100%;\n", + " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", + " flex-grow: 1;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-parallel {\n", + " display: flex;\n", + " align-items: stretch;\n", + " justify-content: center;\n", + " background-color: var(--sklearn-color-background);\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-parallel-item {\n", + " display: flex;\n", + " flex-direction: column;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-parallel-item:first-child::after {\n", + " align-self: flex-end;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-parallel-item:last-child::after {\n", + " align-self: flex-start;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-parallel-item:only-child::after {\n", + " width: 0;\n", + "}\n", + "\n", + "/* Serial-specific style estimator block */\n", + "\n", + "#sk-container-id-8 div.sk-serial {\n", + " display: flex;\n", + " flex-direction: column;\n", + " align-items: center;\n", + " background-color: var(--sklearn-color-background);\n", + " padding-right: 1em;\n", + " padding-left: 1em;\n", + "}\n", + "\n", + "\n", + "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", + "clickable and can be expanded/collapsed.\n", + "- Pipeline and ColumnTransformer use this feature and define the default style\n", + "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", + "*/\n", + "\n", + "/* Pipeline and ColumnTransformer style (default) */\n", + "\n", + "#sk-container-id-8 div.sk-toggleable {\n", + " /* Default theme specific background. It is overwritten whether we have a\n", + " specific estimator or a Pipeline/ColumnTransformer */\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "/* Toggleable label */\n", + "#sk-container-id-8 label.sk-toggleable__label {\n", + " cursor: pointer;\n", + " display: block;\n", + " width: 100%;\n", + " margin-bottom: 0;\n", + " padding: 0.5em;\n", + " box-sizing: border-box;\n", + " text-align: center;\n", + "}\n", + "\n", + "#sk-container-id-8 label.sk-toggleable__label-arrow:before {\n", + " /* Arrow on the left of the label */\n", + " content: \"▸\";\n", + " float: left;\n", + " margin-right: 0.25em;\n", + " color: var(--sklearn-color-icon);\n", + "}\n", + "\n", + "#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "/* Toggleable content - dropdown */\n", + "\n", + "#sk-container-id-8 div.sk-toggleable__content {\n", + " max-height: 0;\n", + " max-width: 0;\n", + " overflow: hidden;\n", + " text-align: left;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-toggleable__content.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-toggleable__content pre {\n", + " margin: 0.2em;\n", + " border-radius: 0.25em;\n", + " color: var(--sklearn-color-text);\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-toggleable__content.fitted pre {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", + " /* Expand drop-down */\n", + " max-height: 200px;\n", + " max-width: 100%;\n", + " overflow: auto;\n", + "}\n", + "\n", + "#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", + " content: \"▾\";\n", + "}\n", + "\n", + "/* Pipeline/ColumnTransformer-specific style */\n", + "\n", + "#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator-specific style */\n", + "\n", + "/* Colorize estimator box */\n", + "#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-label label.sk-toggleable__label,\n", + "#sk-container-id-8 div.sk-label label {\n", + " /* The background is the default theme color */\n", + " color: var(--sklearn-color-text-on-default-background);\n", + "}\n", + "\n", + "/* On hover, darken the color of the background */\n", + "#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "/* Label box, darken color on hover, fitted */\n", + "#sk-container-id-8 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator label */\n", + "\n", + "#sk-container-id-8 div.sk-label label {\n", + " font-family: monospace;\n", + " font-weight: bold;\n", + " display: inline-block;\n", + " line-height: 1.2em;\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-label-container {\n", + " text-align: center;\n", + "}\n", + "\n", + "/* Estimator-specific */\n", + "#sk-container-id-8 div.sk-estimator {\n", + " font-family: monospace;\n", + " border: 1px dotted var(--sklearn-color-border-box);\n", + " border-radius: 0.25em;\n", + " box-sizing: border-box;\n", + " margin-bottom: 0.5em;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-estimator.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "/* on hover */\n", + "#sk-container-id-8 div.sk-estimator:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-8 div.sk-estimator.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", + "\n", + "/* Common style for \"i\" and \"?\" */\n", + "\n", + ".sk-estimator-doc-link,\n", + "a:link.sk-estimator-doc-link,\n", + "a:visited.sk-estimator-doc-link {\n", + " float: right;\n", + " font-size: smaller;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1em;\n", + " height: 1em;\n", + " width: 1em;\n", + " text-decoration: none !important;\n", + " margin-left: 1ex;\n", + " /* unfitted */\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted,\n", + "a:link.sk-estimator-doc-link.fitted,\n", + "a:visited.sk-estimator-doc-link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "/* Span, style for the box shown on hovering the info icon */\n", + ".sk-estimator-doc-link span {\n", + " display: none;\n", + " z-index: 9999;\n", + " position: relative;\n", + " font-weight: normal;\n", + " right: .2ex;\n", + " padding: .5ex;\n", + " margin: .5ex;\n", + " width: min-content;\n", + " min-width: 20ex;\n", + " max-width: 50ex;\n", + " color: var(--sklearn-color-text);\n", + " box-shadow: 2pt 2pt 4pt #999;\n", + " /* unfitted */\n", + " background: var(--sklearn-color-unfitted-level-0);\n", + " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted span {\n", + " /* fitted */\n", + " background: var(--sklearn-color-fitted-level-0);\n", + " border: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link:hover span {\n", + " display: block;\n", + "}\n", + "\n", + "/* \"?\"-specific style due to the `<a>` HTML tag */\n", + "\n", + "#sk-container-id-8 a.estimator_doc_link {\n", + " float: right;\n", + " font-size: 1rem;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1rem;\n", + " height: 1rem;\n", + " width: 1rem;\n", + " text-decoration: none;\n", + " /* unfitted */\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + "}\n", + "\n", + "#sk-container-id-8 a.estimator_doc_link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "#sk-container-id-8 a.estimator_doc_link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "#sk-container-id-8 a.estimator_doc_link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "</style><div id=\"sk-container-id-8\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" checked><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression()</pre></div> </div></div></div></div>" + ], + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 322, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "logReg = LogisticRegression()\n", + "\n", + "logReg.fit(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 323, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8881693648816936" + ] + }, + "execution_count": 323, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "y_test_pred = logReg.predict(X_test)\n", + "\n", + "accuracy_score(y_test,y_test_pred)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/recidivism/RecidivismNN.ipynb b/recidivism/RecidivismNN.ipynb