commit 887aa0d6e96e0d9f6ee830611609d5368970f87e
parent bd2b8c1864d3591fb6b6d78d499ad1f89231fab3
Author: Andrew <andrewlaack1@gmail.com>
Date: Wed, 26 Jun 2024 16:43:09 -0500
Fixed graph. Did some other stuff as well.
Diffstat:
3 files changed, 755 insertions(+), 474 deletions(-)
diff --git a/irisClassification/CustomDecisionTree.ipynb b/irisClassification/CustomDecisionTree.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2766,
+ "execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
@@ -17,7 +17,7 @@
},
{
"cell_type": "code",
- "execution_count": 2767,
+ "execution_count": 114,
"metadata": {},
"outputs": [
{
@@ -32,7 +32,7 @@
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
- "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X,y)\n",
"\n",
"print(X_train.shape)\n",
"print(X_test.shape)"
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": 2768,
+ "execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
@@ -81,7 +81,7 @@
"# Pass in x values (one feature only) and classes\n",
"def bestSplit(x_vals, classes):\n",
"\n",
- " sorted = np.sort(x_vals)\n",
+ " sorted = np.sort(x_vals) \n",
" count = 1\n",
"\n",
" # Find pivot values (average between adjacent values)\n",
@@ -125,7 +125,7 @@
},
{
"cell_type": "code",
- "execution_count": 2769,
+ "execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
@@ -172,7 +172,7 @@
},
{
"cell_type": "code",
- "execution_count": 2770,
+ "execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
@@ -207,7 +207,7 @@
"\n",
" label = ''\n",
" if(node.colName != None):\n",
- " label = str(node.colName) + ' <= ' + str(node.splitOn)\n",
+ " label = str(node.colName) + ' <= ' + str(round(node.splitVal,2))\n",
" else:\n",
" label = 'Leaf'\n",
"\n",
@@ -224,473 +224,9 @@
},
{
"cell_type": "code",
- "execution_count": 2771,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "image/svg+xml": [
- "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
- "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
- " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
- "<!-- Generated by graphviz version 2.43.0 (0)\n",
- " -->\n",
- "<!-- Title: %3 Pages: 1 -->\n",
- "<svg width=\"1339pt\" height=\"692pt\"\n",
- " viewBox=\"0.00 0.00 1339.09 692.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
- "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 688)\">\n",
- "<title>%3</title>\n",
- "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-688 1335.09,-688 1335.09,4 -4,4\"/>\n",
- "<!-- 140582739727952 -->\n",
- "<g id=\"node1\" class=\"node\">\n",
- "<title>140582739727952</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"550.55\" cy=\"-666\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"550.55\" y=\"-662.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582734780688 -->\n",
- "<g id=\"node2\" class=\"node\">\n",
- "<title>140582734780688</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"469.55\" cy=\"-594\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"469.55\" y=\"-590.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582739727952->140582734780688 -->\n",
- "<g id=\"edge1\" class=\"edge\">\n",
- "<title>140582739727952->140582734780688</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M530.94,-648.05C519.76,-638.39 505.64,-626.19 493.77,-615.93\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"495.83,-613.09 485.97,-609.19 491.25,-618.38 495.83,-613.09\"/>\n",
- "</g>\n",
- "<!-- 140582763367632 -->\n",
- "<g id=\"node3\" class=\"node\">\n",
- "<title>140582763367632</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"632.55\" cy=\"-594\" rx=\"114.28\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"632.55\" y=\"-590.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal width (cm) <= 3</text>\n",
- "</g>\n",
- "<!-- 140582739727952->140582763367632 -->\n",
- "<g id=\"edge2\" class=\"edge\">\n",
- "<title>140582739727952->140582763367632</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M570.4,-648.05C580.77,-639.2 593.63,-628.22 604.96,-618.55\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"607.55,-620.94 612.88,-611.79 603,-615.62 607.55,-620.94\"/>\n",
- "</g>\n",
- "<!-- 140582618925200 -->\n",
- "<g id=\"node4\" class=\"node\">\n",
- "<title>140582618925200</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"487.55\" cy=\"-522\" rx=\"115.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"487.55\" y=\"-518.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) <= 1</text>\n",
- "</g>\n",
- "<!-- 140582763367632->140582618925200 -->\n",
- "<g id=\"edge3\" class=\"edge\">\n",
- "<title>140582763367632->140582618925200</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M598.92,-576.76C578.49,-566.9 552.23,-554.22 530.33,-543.65\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"531.77,-540.46 521.25,-539.27 528.73,-546.77 531.77,-540.46\"/>\n",
- "</g>\n",
- "<!-- 140582756536400 -->\n",
- "<g id=\"node17\" class=\"node\">\n",
- "<title>140582756536400</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-522\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"777.55\" y=\"-518.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582763367632->140582756536400 -->\n",
- "<g id=\"edge16\" class=\"edge\">\n",
- "<title>140582763367632->140582756536400</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M666.18,-576.76C686.6,-566.9 712.87,-554.22 734.76,-543.65\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"736.36,-546.77 743.85,-539.27 733.32,-540.46 736.36,-546.77\"/>\n",
- "</g>\n",
- "<!-- 140582756510480 -->\n",
- "<g id=\"node5\" class=\"node\">\n",
- "<title>140582756510480</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"196.55\" cy=\"-450\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"196.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582618925200->140582756510480 -->\n",
- "<g id=\"edge4\" class=\"edge\">\n",
- "<title>140582618925200->140582756510480</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M427.6,-506.58C380.91,-495.35 315.98,-479.73 266.83,-467.91\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"267.45,-464.46 256.91,-465.52 265.82,-471.26 267.45,-464.46\"/>\n",
- "</g>\n",
- "<!-- 140582756532368 -->\n",
- "<g id=\"node10\" class=\"node\">\n",
- "<title>140582756532368</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"487.55\" cy=\"-450\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"487.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582618925200->140582756532368 -->\n",
- "<g id=\"edge9\" class=\"edge\">\n",
- "<title>140582618925200->140582756532368</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M487.55,-503.7C487.55,-495.98 487.55,-486.71 487.55,-478.11\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"491.05,-478.1 487.55,-468.1 484.05,-478.1 491.05,-478.1\"/>\n",
- "</g>\n",
- "<!-- 140582738061776 -->\n",
- "<g id=\"node6\" class=\"node\">\n",
- "<title>140582738061776</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"30.55\" cy=\"-378\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"30.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756510480->140582738061776 -->\n",
- "<g id=\"edge5\" class=\"edge\">\n",
- "<title>140582756510480->140582738061776</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M157.57,-432.92C132.41,-422.49 98.99,-408.56 69.55,-396 67.71,-395.22 65.83,-394.41 63.93,-393.59\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"65.01,-390.25 54.45,-389.49 62.23,-396.67 65.01,-390.25\"/>\n",
- "</g>\n",
- "<!-- 140582738061392 -->\n",
- "<g id=\"node7\" class=\"node\">\n",
- "<title>140582738061392</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"196.55\" cy=\"-378\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"196.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582756510480->140582738061392 -->\n",
- "<g id=\"edge6\" class=\"edge\">\n",
- "<title>140582756510480->140582738061392</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M196.55,-431.7C196.55,-423.98 196.55,-414.71 196.55,-406.11\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"200.05,-406.1 196.55,-396.1 193.05,-406.1 200.05,-406.1\"/>\n",
- "</g>\n",
- "<!-- 140582756540368 -->\n",
- "<g id=\"node8\" class=\"node\">\n",
- "<title>140582756540368</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"117.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"117.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582738061392->140582756540368 -->\n",
- "<g id=\"edge7\" class=\"edge\">\n",
- "<title>140582738061392->140582756540368</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M177.42,-360.05C166.59,-350.46 152.94,-338.36 141.41,-328.14\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"143.63,-325.44 133.83,-321.42 138.99,-330.68 143.63,-325.44\"/>\n",
- "</g>\n",
- "<!-- 140582756533008 -->\n",
- "<g id=\"node9\" class=\"node\">\n",
- "<title>140582756533008</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"196.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"196.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582738061392->140582756533008 -->\n",
- "<g id=\"edge8\" class=\"edge\">\n",
- "<title>140582738061392->140582756533008</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M196.55,-359.7C196.55,-351.98 196.55,-342.71 196.55,-334.11\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"200.05,-334.1 196.55,-324.1 193.05,-334.1 200.05,-334.1\"/>\n",
- "</g>\n",
- "<!-- 140582756541520 -->\n",
- "<g id=\"node11\" class=\"node\">\n",
- "<title>140582756541520</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"362.55\" cy=\"-378\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"362.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756532368->140582756541520 -->\n",
- "<g id=\"edge10\" class=\"edge\">\n",
- "<title>140582756532368->140582756541520</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M457.92,-432.41C438.35,-421.45 412.77,-407.13 393.03,-396.07\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"394.57,-392.92 384.14,-391.09 391.15,-399.03 394.57,-392.92\"/>\n",
- "</g>\n",
- "<!-- 140582756535120 -->\n",
- "<g id=\"node12\" class=\"node\">\n",
- "<title>140582756535120</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"526.55\" cy=\"-378\" rx=\"115.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"526.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) <= 1</text>\n",
- "</g>\n",
- "<!-- 140582756532368->140582756535120 -->\n",
- "<g id=\"edge11\" class=\"edge\">\n",
- "<title>140582756532368->140582756535120</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M496.99,-432.05C501.47,-424.01 506.93,-414.2 511.94,-405.23\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"515.11,-406.71 516.92,-396.28 509,-403.31 515.11,-406.71\"/>\n",
- "</g>\n",
- "<!-- 140582756541648 -->\n",
- "<g id=\"node13\" class=\"node\">\n",
- "<title>140582756541648</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"385.55\" cy=\"-306\" rx=\"118.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"385.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) <= 0</text>\n",
- "</g>\n",
- "<!-- 140582756535120->140582756541648 -->\n",
- "<g id=\"edge12\" class=\"edge\">\n",
- "<title>140582756535120->140582756541648</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M493.49,-360.59C473.78,-350.8 448.56,-338.29 427.46,-327.81\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"428.91,-324.62 418.4,-323.31 425.8,-330.89 428.91,-324.62\"/>\n",
- "</g>\n",
- "<!-- 140582756540752 -->\n",
- "<g id=\"node16\" class=\"node\">\n",
- "<title>140582756540752</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"552.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"552.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756535120->140582756540752 -->\n",
- "<g id=\"edge15\" class=\"edge\">\n",
- "<title>140582756535120->140582756540752</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M532.97,-359.7C535.9,-351.81 539.43,-342.3 542.69,-333.55\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"545.99,-334.7 546.19,-324.1 539.43,-332.26 545.99,-334.7\"/>\n",
- "</g>\n",
- "<!-- 140582756532624 -->\n",
- "<g id=\"node14\" class=\"node\">\n",
- "<title>140582756532624</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"345.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"345.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756541648->140582756532624 -->\n",
- "<g id=\"edge13\" class=\"edge\">\n",
- "<title>140582756541648->140582756532624</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M375.86,-288.05C371.08,-279.68 365.21,-269.4 359.91,-260.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"362.87,-258.25 354.87,-251.31 356.79,-261.73 362.87,-258.25\"/>\n",
- "</g>\n",
- "<!-- 140582756538448 -->\n",
- "<g id=\"node15\" class=\"node\">\n",
- "<title>140582756538448</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"424.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"424.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756541648->140582756538448 -->\n",
- "<g id=\"edge14\" class=\"edge\">\n",
- "<title>140582756541648->140582756538448</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M394.99,-288.05C399.65,-279.68 405.38,-269.4 410.54,-260.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"413.65,-261.75 415.46,-251.31 407.54,-258.34 413.65,-261.75\"/>\n",
- "</g>\n",
- "<!-- 140582756534160 -->\n",
- "<g id=\"node18\" class=\"node\">\n",
- "<title>140582756534160</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-450\" rx=\"115.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"777.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) <= 1</text>\n",
- "</g>\n",
- "<!-- 140582756536400->140582756534160 -->\n",
- "<g id=\"edge17\" class=\"edge\">\n",
- "<title>140582756536400->140582756534160</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M777.55,-503.7C777.55,-495.98 777.55,-486.71 777.55,-478.11\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"781.05,-478.1 777.55,-468.1 774.05,-478.1 781.05,-478.1\"/>\n",
- "</g>\n",
- "<!-- 140582684160784 -->\n",
- "<g id=\"node37\" class=\"node\">\n",
- "<title>140582684160784</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"941.55\" cy=\"-450\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"941.55\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756536400->140582684160784 -->\n",
- "<g id=\"edge36\" class=\"edge\">\n",
- "<title>140582756536400->140582684160784</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M815.17,-504.94C843.13,-493.01 880.95,-476.87 907.91,-465.36\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"909.49,-468.49 917.31,-461.34 906.74,-462.05 909.49,-468.49\"/>\n",
- "</g>\n",
- "<!-- 140582756534288 -->\n",
- "<g id=\"node19\" class=\"node\">\n",
- "<title>140582756534288</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-378\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"777.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582756534160->140582756534288 -->\n",
- "<g id=\"edge18\" class=\"edge\">\n",
- "<title>140582756534160->140582756534288</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M777.55,-431.7C777.55,-423.98 777.55,-414.71 777.55,-406.11\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"781.05,-406.1 777.55,-396.1 774.05,-406.1 781.05,-406.1\"/>\n",
- "</g>\n",
- "<!-- 140582741948816 -->\n",
- "<g id=\"node26\" class=\"node\">\n",
- "<title>140582741948816</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1027.55\" cy=\"-378\" rx=\"114.28\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1027.55\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal width (cm) <= 3</text>\n",
- "</g>\n",
- "<!-- 140582756534160->140582741948816 -->\n",
- "<g id=\"edge25\" class=\"edge\">\n",
- "<title>140582756534160->140582741948816</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M831.18,-433.98C870.23,-423.05 923.25,-408.2 964.36,-396.69\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"965.32,-400.06 974,-393.99 963.43,-393.32 965.32,-400.06\"/>\n",
- "</g>\n",
- "<!-- 140582756548560 -->\n",
- "<g id=\"node20\" class=\"node\">\n",
- "<title>140582756548560</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"631.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"631.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756534288->140582756548560 -->\n",
- "<g id=\"edge19\" class=\"edge\">\n",
- "<title>140582756534288->140582756548560</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M743.32,-360.59C719.32,-349.08 687.44,-333.8 663.87,-322.5\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"665.17,-319.24 654.64,-318.07 662.15,-325.55 665.17,-319.24\"/>\n",
- "</g>\n",
- "<!-- 140582756548176 -->\n",
- "<g id=\"node21\" class=\"node\">\n",
- "<title>140582756548176</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"798.55\" cy=\"-306\" rx=\"118.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"798.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) <= 0</text>\n",
- "</g>\n",
- "<!-- 140582756534288->140582756548176 -->\n",
- "<g id=\"edge20\" class=\"edge\">\n",
- "<title>140582756534288->140582756548176</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M782.74,-359.7C785.08,-351.9 787.89,-342.51 790.5,-333.83\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"793.89,-334.69 793.42,-324.1 787.19,-332.68 793.89,-334.69\"/>\n",
- "</g>\n",
- "<!-- 140582756540688 -->\n",
- "<g id=\"node22\" class=\"node\">\n",
- "<title>140582756540688</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"650.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"650.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582756548176->140582756540688 -->\n",
- "<g id=\"edge21\" class=\"edge\">\n",
- "<title>140582756548176->140582756540688</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M764.22,-288.76C739.65,-277.14 706.78,-261.6 682.71,-250.21\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"684.17,-247.03 673.63,-245.92 681.18,-253.36 684.17,-247.03\"/>\n",
- "</g>\n",
- "<!-- 140582741936848 -->\n",
- "<g id=\"node23\" class=\"node\">\n",
- "<title>140582741936848</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"817.55\" cy=\"-234\" rx=\"118.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"817.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) <= 0</text>\n",
- "</g>\n",
- "<!-- 140582756548176->140582741936848 -->\n",
- "<g id=\"edge22\" class=\"edge\">\n",
- "<title>140582756548176->140582741936848</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M803.24,-287.7C805.36,-279.9 807.91,-270.51 810.27,-261.83\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"813.66,-262.67 812.9,-252.1 806.91,-260.84 813.66,-262.67\"/>\n",
- "</g>\n",
- "<!-- 140582741935248 -->\n",
- "<g id=\"node24\" class=\"node\">\n",
- "<title>140582741935248</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"777.55\" cy=\"-162\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"777.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582741936848->140582741935248 -->\n",
- "<g id=\"edge23\" class=\"edge\">\n",
- "<title>140582741936848->140582741935248</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M807.86,-216.05C803.08,-207.68 797.21,-197.4 791.91,-188.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"794.87,-186.25 786.87,-179.31 788.79,-189.73 794.87,-186.25\"/>\n",
- "</g>\n",
- "<!-- 140582741947920 -->\n",
- "<g id=\"node25\" class=\"node\">\n",
- "<title>140582741947920</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"856.55\" cy=\"-162\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"856.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582741936848->140582741947920 -->\n",
- "<g id=\"edge24\" class=\"edge\">\n",
- "<title>140582741936848->140582741947920</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M826.99,-216.05C831.65,-207.68 837.38,-197.4 842.54,-188.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"845.65,-189.75 847.46,-179.31 839.54,-186.34 845.65,-189.75\"/>\n",
- "</g>\n",
- "<!-- 140582741934288 -->\n",
- "<g id=\"node27\" class=\"node\">\n",
- "<title>140582741934288</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"986.55\" cy=\"-306\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"986.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582741948816->140582741934288 -->\n",
- "<g id=\"edge26\" class=\"edge\">\n",
- "<title>140582741948816->140582741934288</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1017.62,-360.05C1012.72,-351.68 1006.7,-341.4 1001.27,-332.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1004.17,-330.17 996.1,-323.31 998.13,-333.7 1004.17,-330.17\"/>\n",
- "</g>\n",
- "<!-- 140582684159888 -->\n",
- "<g id=\"node28\" class=\"node\">\n",
- "<title>140582684159888</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1152.55\" cy=\"-306\" rx=\"117.78\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1152.55\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">petal length (cm) <= 2</text>\n",
- "</g>\n",
- "<!-- 140582741948816->140582684159888 -->\n",
- "<g id=\"edge27\" class=\"edge\">\n",
- "<title>140582741948816->140582684159888</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1056.85,-360.59C1073.97,-351 1095.76,-338.8 1114.23,-328.46\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1115.98,-331.49 1122.99,-323.55 1112.56,-325.38 1115.98,-331.49\"/>\n",
- "</g>\n",
- "<!-- 140582684150992 -->\n",
- "<g id=\"node29\" class=\"node\">\n",
- "<title>140582684150992</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1133.55\" cy=\"-234\" rx=\"118.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1133.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) <= 0</text>\n",
- "</g>\n",
- "<!-- 140582684159888->140582684150992 -->\n",
- "<g id=\"edge28\" class=\"edge\">\n",
- "<title>140582684159888->140582684150992</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1147.85,-287.7C1145.73,-279.9 1143.19,-270.51 1140.83,-261.83\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1144.19,-260.84 1138.19,-252.1 1137.43,-262.67 1144.19,-260.84\"/>\n",
- "</g>\n",
- "<!-- 140582684150544 -->\n",
- "<g id=\"node36\" class=\"node\">\n",
- "<title>140582684150544</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1300.55\" cy=\"-234\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1300.55\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582684159888->140582684150544 -->\n",
- "<g id=\"edge35\" class=\"edge\">\n",
- "<title>140582684159888->140582684150544</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1186.87,-288.76C1211.45,-277.14 1244.31,-261.6 1268.38,-250.21\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1269.92,-253.36 1277.46,-245.92 1266.93,-247.03 1269.92,-253.36\"/>\n",
- "</g>\n",
- "<!-- 140582684159632 -->\n",
- "<g id=\"node30\" class=\"node\">\n",
- "<title>140582684159632</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1067.55\" cy=\"-162\" rx=\"118.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1067.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal length (cm) <= 0</text>\n",
- "</g>\n",
- "<!-- 140582684150992->140582684159632 -->\n",
- "<g id=\"edge29\" class=\"edge\">\n",
- "<title>140582684150992->140582684159632</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1117.57,-216.05C1109.58,-207.58 1099.76,-197.17 1090.94,-187.82\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1093.24,-185.15 1083.84,-180.28 1088.15,-189.95 1093.24,-185.15\"/>\n",
- "</g>\n",
- "<!-- 140582684157584 -->\n",
- "<g id=\"node35\" class=\"node\">\n",
- "<title>140582684157584</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1234.55\" cy=\"-162\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1234.55\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582684150992->140582684157584 -->\n",
- "<g id=\"edge34\" class=\"edge\">\n",
- "<title>140582684150992->140582684157584</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1157.74,-216.23C1172.57,-205.95 1191.6,-192.77 1207.01,-182.09\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1209.38,-184.7 1215.6,-176.13 1205.39,-178.95 1209.38,-184.7\"/>\n",
- "</g>\n",
- "<!-- 140582684153744 -->\n",
- "<g id=\"node31\" class=\"node\">\n",
- "<title>140582684153744</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"985.55\" cy=\"-90\" rx=\"115.08\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"985.55\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">sepal width (cm) <= 1</text>\n",
- "</g>\n",
- "<!-- 140582684159632->140582684153744 -->\n",
- "<g id=\"edge30\" class=\"edge\">\n",
- "<title>140582684159632->140582684153744</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1047.7,-144.05C1037.32,-135.2 1024.46,-124.22 1013.13,-114.55\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1015.09,-111.62 1005.21,-107.79 1010.55,-116.94 1015.09,-111.62\"/>\n",
- "</g>\n",
- "<!-- 140582684148880 -->\n",
- "<g id=\"node34\" class=\"node\">\n",
- "<title>140582684148880</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1149.55\" cy=\"-90\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1149.55\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582684159632->140582684148880 -->\n",
- "<g id=\"edge33\" class=\"edge\">\n",
- "<title>140582684159632->140582684148880</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M1087.4,-144.05C1098.72,-134.39 1113.01,-122.19 1125.02,-111.93\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1127.59,-114.35 1132.92,-105.19 1123.04,-109.03 1127.59,-114.35\"/>\n",
- "</g>\n",
- "<!-- 140582684162000 -->\n",
- "<g id=\"node32\" class=\"node\">\n",
- "<title>140582684162000</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"946.55\" cy=\"-18\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"946.55\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582684153744->140582684162000 -->\n",
- "<g id=\"edge31\" class=\"edge\">\n",
- "<title>140582684153744->140582684162000</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M976.11,-72.05C971.44,-63.68 965.72,-53.4 960.55,-44.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"963.56,-42.34 955.63,-35.31 957.44,-45.75 963.56,-42.34\"/>\n",
- "</g>\n",
- "<!-- 140582684158544 -->\n",
- "<g id=\"node33\" class=\"node\">\n",
- "<title>140582684158544</title>\n",
- "<ellipse fill=\"none\" stroke=\"black\" cx=\"1025.55\" cy=\"-18\" rx=\"30.59\" ry=\"18\"/>\n",
- "<text text-anchor=\"middle\" x=\"1025.55\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf</text>\n",
- "</g>\n",
- "<!-- 140582684153744->140582684158544 -->\n",
- "<g id=\"edge32\" class=\"edge\">\n",
- "<title>140582684153744->140582684158544</title>\n",
- "<path fill=\"none\" stroke=\"black\" d=\"M995.23,-72.05C1000.02,-63.68 1005.89,-53.4 1011.18,-44.13\"/>\n",
- "<polygon fill=\"black\" stroke=\"black\" points=\"1014.31,-45.73 1016.23,-35.31 1008.23,-42.25 1014.31,-45.73\"/>\n",
- "</g>\n",
- "</g>\n",
- "</svg>\n"
- ],
- "text/plain": [
- "<graphviz.graphs.Digraph at 0x7fdbf5ba52d0>"
- ]
- },
- "execution_count": 2771,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"\n",
"X = X_train\n",
@@ -700,6 +236,13 @@
"graphTree(node)\n",
"graph"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Evaluate!!!"
+ ]
}
],
"metadata": {
diff --git a/recidivism/RecidivismLogReg.ipynb b/recidivism/RecidivismLogReg.ipynb
@@ -0,0 +1,738 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 313,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Recidivism Reporting Year',\n",
+ " 'Fiscal Year Admitted',\n",
+ " 'Region Code',\n",
+ " 'Convicting Offense Classification',\n",
+ " 'Convicting Offense Type',\n",
+ " 'Convicting Offense Subtype',\n",
+ " 'Race - Ethnicity',\n",
+ " 'Sex',\n",
+ " 'Level of Supervision',\n",
+ " 'Recidivism - Prison Admission',\n",
+ " 'Recidivism Type',\n",
+ " 'New Conviction Offense Class',\n",
+ " 'New Conviction Offense Type',\n",
+ " 'New Conviction Offense SubType',\n",
+ " 'Days to Recidivism',\n",
+ " 'Part of Target Population']"
+ ]
+ },
+ "execution_count": 313,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv('../datasets/recidivism/Recidivism.csv')\n",
+ "df.columns.to_list()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 314,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>Recidivism Reporting Year</th>\n",
+ " <th>Fiscal Year Admitted</th>\n",
+ " <th>Region Code</th>\n",
+ " <th>Convicting Offense Classification</th>\n",
+ " <th>Convicting Offense Type</th>\n",
+ " <th>Convicting Offense Subtype</th>\n",
+ " <th>Race - Ethnicity</th>\n",
+ " <th>Sex</th>\n",
+ " <th>Level of Supervision</th>\n",
+ " <th>Recidivism - Prison Admission</th>\n",
+ " <th>Recidivism Type</th>\n",
+ " <th>New Conviction Offense Class</th>\n",
+ " <th>New Conviction Offense Type</th>\n",
+ " <th>New Conviction Offense SubType</th>\n",
+ " <th>Days to Recidivism</th>\n",
+ " <th>Part of Target Population</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>2013</td>\n",
+ " <td>2010</td>\n",
+ " <td>NaN</td>\n",
+ " <td>D Felony</td>\n",
+ " <td>Public Order</td>\n",
+ " <td>OWI</td>\n",
+ " <td>White - Hispanic</td>\n",
+ " <td>Male</td>\n",
+ " <td>NaN</td>\n",
+ " <td>No</td>\n",
+ " <td>No Recidivism</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>No</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " Recidivism Reporting Year Fiscal Year Admitted Region Code \\\n",
+ "0 2013 2010 NaN \n",
+ "\n",
+ " Convicting Offense Classification Convicting Offense Type \\\n",
+ "0 D Felony Public Order \n",
+ "\n",
+ " Convicting Offense Subtype Race - Ethnicity Sex Level of Supervision \\\n",
+ "0 OWI White - Hispanic Male NaN \n",
+ "\n",
+ " Recidivism - Prison Admission Recidivism Type New Conviction Offense Class \\\n",
+ "0 No No Recidivism NaN \n",
+ "\n",
+ " New Conviction Offense Type New Conviction Offense SubType \\\n",
+ "0 NaN NaN \n",
+ "\n",
+ " Days to Recidivism Part of Target Population \n",
+ "0 NaN No "
+ ]
+ },
+ "execution_count": 314,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head(1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 315,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Recidivism - Prison Admission\n",
+ "False 85431\n",
+ "True 10926\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 315,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X = df.drop(axis=1, columns=df.columns.to_list()[9:])\n",
+ "y = df['Recidivism - Prison Admission']\n",
+ "y = y == 'Yes'\n",
+ "\n",
+ "y.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 316,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Recidivism Reporting Year int64\n",
+ "Fiscal Year Admitted int64\n",
+ "Region Code object\n",
+ "Convicting Offense Classification object\n",
+ "Convicting Offense Type object\n",
+ "Convicting Offense Subtype object\n",
+ "Race - Ethnicity object\n",
+ "Sex object\n",
+ "Level of Supervision object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 316,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 317,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import OneHotEncoder\n",
+ "ohc = OneHotEncoder(sparse_output=False)\n",
+ "\n",
+ "def encode(X, name):\n",
+ " trans = ohc.fit_transform(X[[name]])\n",
+ " transformed_df = pd.DataFrame(trans, columns=ohc.get_feature_names_out([name]))\n",
+ " X = pd.concat([X,transformed_df], axis=1)\n",
+ " X = X.drop(columns=[name], axis=1)\n",
+ " return X"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 318,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X = encode(X,'Convicting Offense Classification')\n",
+ "X = encode(X,'Convicting Offense Type')\n",
+ "X = encode(X,'Convicting Offense Subtype')\n",
+ "X = encode(X,'Level of Supervision')\n",
+ "X = encode(X,'Sex')\n",
+ "X = encode(X,'Race - Ethnicity')\n",
+ "X = encode(X,'Region Code')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 320,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "std = StandardScaler()\n",
+ "X = std.fit_transform(X)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 321,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(72267, 88) (72267,)\n",
+ "(12045, 88) (12045,)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n",
+ "X_test, X_val, y_test, y_val = train_test_split(X_test,y_test,random_state=10, test_size=.5)\n",
+ "\n",
+ "print(X_train.shape , y_train.shape)\n",
+ "print(X_test.shape , y_test.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 322,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<style>#sk-container-id-8 {\n",
+ " /* Definition of color scheme common for light and dark mode */\n",
+ " --sklearn-color-text: black;\n",
+ " --sklearn-color-line: gray;\n",
+ " /* Definition of color scheme for unfitted estimators */\n",
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
+ " /* Definition of color scheme for fitted estimators */\n",
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
+ "\n",
+ " /* Specific color for light theme */\n",
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+ " --sklearn-color-icon: #696969;\n",
+ "\n",
+ " @media (prefers-color-scheme: dark) {\n",
+ " /* Redefinition of color scheme for dark theme */\n",
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+ " --sklearn-color-icon: #878787;\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 {\n",
+ " color: var(--sklearn-color-text);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 pre {\n",
+ " padding: 0;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 input.sk-hidden--visually {\n",
+ " border: 0;\n",
+ " clip: rect(1px 1px 1px 1px);\n",
+ " clip: rect(1px, 1px, 1px, 1px);\n",
+ " height: 1px;\n",
+ " margin: -1px;\n",
+ " overflow: hidden;\n",
+ " padding: 0;\n",
+ " position: absolute;\n",
+ " width: 1px;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-dashed-wrapped {\n",
+ " border: 1px dashed var(--sklearn-color-line);\n",
+ " margin: 0 0.4em 0.5em 0.4em;\n",
+ " box-sizing: border-box;\n",
+ " padding-bottom: 0.4em;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-container {\n",
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+ " so we also need the `!important` here to be able to override the\n",
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+ " display: inline-block !important;\n",
+ " position: relative;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-text-repr-fallback {\n",
+ " display: none;\n",
+ "}\n",
+ "\n",
+ "div.sk-parallel-item,\n",
+ "div.sk-serial,\n",
+ "div.sk-item {\n",
+ " /* draw centered vertical line to link estimators */\n",
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+ " background-size: 2px 100%;\n",
+ " background-repeat: no-repeat;\n",
+ " background-position: center center;\n",
+ "}\n",
+ "\n",
+ "/* Parallel-specific style estimator block */\n",
+ "\n",
+ "#sk-container-id-8 div.sk-parallel-item::after {\n",
+ " content: \"\";\n",
+ " width: 100%;\n",
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+ " flex-grow: 1;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-parallel {\n",
+ " display: flex;\n",
+ " align-items: stretch;\n",
+ " justify-content: center;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " position: relative;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-parallel-item {\n",
+ " display: flex;\n",
+ " flex-direction: column;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-parallel-item:first-child::after {\n",
+ " align-self: flex-end;\n",
+ " width: 50%;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-parallel-item:last-child::after {\n",
+ " align-self: flex-start;\n",
+ " width: 50%;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-parallel-item:only-child::after {\n",
+ " width: 0;\n",
+ "}\n",
+ "\n",
+ "/* Serial-specific style estimator block */\n",
+ "\n",
+ "#sk-container-id-8 div.sk-serial {\n",
+ " display: flex;\n",
+ " flex-direction: column;\n",
+ " align-items: center;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " padding-right: 1em;\n",
+ " padding-left: 1em;\n",
+ "}\n",
+ "\n",
+ "\n",
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+ "clickable and can be expanded/collapsed.\n",
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+ "*/\n",
+ "\n",
+ "/* Pipeline and ColumnTransformer style (default) */\n",
+ "\n",
+ "#sk-container-id-8 div.sk-toggleable {\n",
+ " /* Default theme specific background. It is overwritten whether we have a\n",
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
+ " background-color: var(--sklearn-color-background);\n",
+ "}\n",
+ "\n",
+ "/* Toggleable label */\n",
+ "#sk-container-id-8 label.sk-toggleable__label {\n",
+ " cursor: pointer;\n",
+ " display: block;\n",
+ " width: 100%;\n",
+ " margin-bottom: 0;\n",
+ " padding: 0.5em;\n",
+ " box-sizing: border-box;\n",
+ " text-align: center;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 label.sk-toggleable__label-arrow:before {\n",
+ " /* Arrow on the left of the label */\n",
+ " content: \"▸\";\n",
+ " float: left;\n",
+ " margin-right: 0.25em;\n",
+ " color: var(--sklearn-color-icon);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {\n",
+ " color: var(--sklearn-color-text);\n",
+ "}\n",
+ "\n",
+ "/* Toggleable content - dropdown */\n",
+ "\n",
+ "#sk-container-id-8 div.sk-toggleable__content {\n",
+ " max-height: 0;\n",
+ " max-width: 0;\n",
+ " overflow: hidden;\n",
+ " text-align: left;\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-toggleable__content.fitted {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-toggleable__content pre {\n",
+ " margin: 0.2em;\n",
+ " border-radius: 0.25em;\n",
+ " color: var(--sklearn-color-text);\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-toggleable__content.fitted pre {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+ " /* Expand drop-down */\n",
+ " max-height: 200px;\n",
+ " max-width: 100%;\n",
+ " overflow: auto;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+ " content: \"▾\";\n",
+ "}\n",
+ "\n",
+ "/* Pipeline/ColumnTransformer-specific style */\n",
+ "\n",
+ "#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " color: var(--sklearn-color-text);\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Estimator-specific style */\n",
+ "\n",
+ "/* Colorize estimator box */\n",
+ "#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-label label.sk-toggleable__label,\n",
+ "#sk-container-id-8 div.sk-label label {\n",
+ " /* The background is the default theme color */\n",
+ " color: var(--sklearn-color-text-on-default-background);\n",
+ "}\n",
+ "\n",
+ "/* On hover, darken the color of the background */\n",
+ "#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {\n",
+ " color: var(--sklearn-color-text);\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Label box, darken color on hover, fitted */\n",
+ "#sk-container-id-8 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+ " color: var(--sklearn-color-text);\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Estimator label */\n",
+ "\n",
+ "#sk-container-id-8 div.sk-label label {\n",
+ " font-family: monospace;\n",
+ " font-weight: bold;\n",
+ " display: inline-block;\n",
+ " line-height: 1.2em;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-label-container {\n",
+ " text-align: center;\n",
+ "}\n",
+ "\n",
+ "/* Estimator-specific */\n",
+ "#sk-container-id-8 div.sk-estimator {\n",
+ " font-family: monospace;\n",
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
+ " border-radius: 0.25em;\n",
+ " box-sizing: border-box;\n",
+ " margin-bottom: 0.5em;\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-estimator.fitted {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
+ "}\n",
+ "\n",
+ "/* on hover */\n",
+ "#sk-container-id-8 div.sk-estimator:hover {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 div.sk-estimator.fitted:hover {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
+ "}\n",
+ "\n",
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+ "\n",
+ "/* Common style for \"i\" and \"?\" */\n",
+ "\n",
+ ".sk-estimator-doc-link,\n",
+ "a:link.sk-estimator-doc-link,\n",
+ "a:visited.sk-estimator-doc-link {\n",
+ " float: right;\n",
+ " font-size: smaller;\n",
+ " line-height: 1em;\n",
+ " font-family: monospace;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " border-radius: 1em;\n",
+ " height: 1em;\n",
+ " width: 1em;\n",
+ " text-decoration: none !important;\n",
+ " margin-left: 1ex;\n",
+ " /* unfitted */\n",
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+ " color: var(--sklearn-color-unfitted-level-1);\n",
+ "}\n",
+ "\n",
+ ".sk-estimator-doc-link.fitted,\n",
+ "a:link.sk-estimator-doc-link.fitted,\n",
+ "a:visited.sk-estimator-doc-link.fitted {\n",
+ " /* fitted */\n",
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+ " color: var(--sklearn-color-fitted-level-1);\n",
+ "}\n",
+ "\n",
+ "/* On hover */\n",
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+ ".sk-estimator-doc-link:hover,\n",
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+ ".sk-estimator-doc-link:hover {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
+ " color: var(--sklearn-color-background);\n",
+ " text-decoration: none;\n",
+ "}\n",
+ "\n",
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+ ".sk-estimator-doc-link.fitted:hover,\n",
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+ ".sk-estimator-doc-link.fitted:hover {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
+ " color: var(--sklearn-color-background);\n",
+ " text-decoration: none;\n",
+ "}\n",
+ "\n",
+ "/* Span, style for the box shown on hovering the info icon */\n",
+ ".sk-estimator-doc-link span {\n",
+ " display: none;\n",
+ " z-index: 9999;\n",
+ " position: relative;\n",
+ " font-weight: normal;\n",
+ " right: .2ex;\n",
+ " padding: .5ex;\n",
+ " margin: .5ex;\n",
+ " width: min-content;\n",
+ " min-width: 20ex;\n",
+ " max-width: 50ex;\n",
+ " color: var(--sklearn-color-text);\n",
+ " box-shadow: 2pt 2pt 4pt #999;\n",
+ " /* unfitted */\n",
+ " background: var(--sklearn-color-unfitted-level-0);\n",
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+ "}\n",
+ "\n",
+ ".sk-estimator-doc-link.fitted span {\n",
+ " /* fitted */\n",
+ " background: var(--sklearn-color-fitted-level-0);\n",
+ " border: var(--sklearn-color-fitted-level-3);\n",
+ "}\n",
+ "\n",
+ ".sk-estimator-doc-link:hover span {\n",
+ " display: block;\n",
+ "}\n",
+ "\n",
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+ "\n",
+ "#sk-container-id-8 a.estimator_doc_link {\n",
+ " float: right;\n",
+ " font-size: 1rem;\n",
+ " line-height: 1em;\n",
+ " font-family: monospace;\n",
+ " background-color: var(--sklearn-color-background);\n",
+ " border-radius: 1rem;\n",
+ " height: 1rem;\n",
+ " width: 1rem;\n",
+ " text-decoration: none;\n",
+ " /* unfitted */\n",
+ " color: var(--sklearn-color-unfitted-level-1);\n",
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 a.estimator_doc_link.fitted {\n",
+ " /* fitted */\n",
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+ " color: var(--sklearn-color-fitted-level-1);\n",
+ "}\n",
+ "\n",
+ "/* On hover */\n",
+ "#sk-container-id-8 a.estimator_doc_link:hover {\n",
+ " /* unfitted */\n",
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
+ " color: var(--sklearn-color-background);\n",
+ " text-decoration: none;\n",
+ "}\n",
+ "\n",
+ "#sk-container-id-8 a.estimator_doc_link.fitted:hover {\n",
+ " /* fitted */\n",
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
+ "}\n",
+ "</style><div id=\"sk-container-id-8\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" checked><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression()</pre></div> </div></div></div></div>"
+ ],
+ "text/plain": [
+ "LogisticRegression()"
+ ]
+ },
+ "execution_count": 322,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "\n",
+ "logReg = LogisticRegression()\n",
+ "\n",
+ "logReg.fit(X_train,y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 323,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8881693648816936"
+ ]
+ },
+ "execution_count": 323,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import accuracy_score\n",
+ "y_test_pred = logReg.predict(X_test)\n",
+ "\n",
+ "accuracy_score(y_test,y_test_pred)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/recidivism/RecidivismNN.ipynb b/recidivism/RecidivismNN.ipynb