RecidivismNN.ipynb (6308B)
1 { 2 "cells": [ 3 { 4 "cell_type": "markdown", 5 "metadata": {}, 6 "source": [ 7 "https://www.kaggle.com/datasets/slonnadube/recidivism" 8 ] 9 }, 10 { 11 "cell_type": "code", 12 "execution_count": 51, 13 "metadata": {}, 14 "outputs": [ 15 { 16 "name": "stdout", 17 "output_type": "stream", 18 "text": [ 19 "(72267, 88) (72267,)\n", 20 "(12045, 88) (12045,)\n" 21 ] 22 } 23 ], 24 "source": [ 25 "import pandas as pd\n", 26 "\n", 27 "df = pd.read_csv('../datasets/recidivism/Recidivism.csv')\n", 28 "df.columns.to_list()\n", 29 "X = df.drop(axis=1, columns=df.columns.to_list()[9:])\n", 30 "y = df['Recidivism - Prison Admission']\n", 31 "y = y == 'Yes'\n", 32 "\n", 33 "\n", 34 "from sklearn.preprocessing import OneHotEncoder\n", 35 "ohc = OneHotEncoder(sparse_output=False)\n", 36 "\n", 37 "def encode(X, name):\n", 38 " trans = ohc.fit_transform(X[[name]])\n", 39 " transformed_df = pd.DataFrame(trans, columns=ohc.get_feature_names_out([name]))\n", 40 " X = pd.concat([X,transformed_df], axis=1)\n", 41 " X = X.drop(columns=[name], axis=1)\n", 42 " return X\n", 43 "\n", 44 "X = encode(X,'Convicting Offense Classification')\n", 45 "X = encode(X,'Convicting Offense Type')\n", 46 "X = encode(X,'Convicting Offense Subtype')\n", 47 "X = encode(X,'Level of Supervision')\n", 48 "X = encode(X,'Sex')\n", 49 "X = encode(X,'Race - Ethnicity')\n", 50 "X = encode(X,'Region Code')\n", 51 "\n", 52 "\n", 53 "from sklearn.preprocessing import StandardScaler\n", 54 "\n", 55 "std = StandardScaler()\n", 56 "X = std.fit_transform(X)\n", 57 "\n", 58 "from sklearn.model_selection import train_test_split\n", 59 "\n", 60 "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n", 61 "X_test, X_val, y_test, y_val = train_test_split(X_test,y_test,random_state=10, test_size=.5)\n", 62 "\n", 63 "print(X_train.shape , y_train.shape)\n", 64 "print(X_test.shape , y_test.shape)" 65 ] 66 }, 67 { 68 "cell_type": "code", 69 "execution_count": 52, 70 "metadata": {}, 71 "outputs": [], 72 "source": [ 73 "import keras\n", 74 "import tensorflow as tf\n", 75 "\n", 76 "model = keras.Sequential(layers=[\n", 77 "\n", 78 " keras.layers.Input(shape=[88]),\n", 79 " keras.layers.Dense(256, activation='relu'),\n", 80 " keras.layers.Dropout(.2),\n", 81 " keras.layers.Dense(256, activation='relu'),\n", 82 " keras.layers.Dropout(.2),\n", 83 " keras.layers.Dense(1, activation='sigmoid')\n", 84 "])" 85 ] 86 }, 87 { 88 "cell_type": "code", 89 "execution_count": 53, 90 "metadata": {}, 91 "outputs": [], 92 "source": [ 93 "model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=keras.optimizers.Adam())" 94 ] 95 }, 96 { 97 "cell_type": "code", 98 "execution_count": 54, 99 "metadata": {}, 100 "outputs": [ 101 { 102 "name": "stdout", 103 "output_type": "stream", 104 "text": [ 105 "Epoch 1/10\n", 106 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 2ms/step - accuracy: 0.8795 - loss: 0.2929 - val_accuracy: 0.8848 - val_loss: 0.2574\n", 107 "Epoch 2/10\n", 108 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8901 - loss: 0.2505 - val_accuracy: 0.8856 - val_loss: 0.2561\n", 109 "Epoch 3/10\n", 110 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8927 - loss: 0.2461 - val_accuracy: 0.8851 - val_loss: 0.2516\n", 111 "Epoch 4/10\n", 112 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8945 - loss: 0.2429 - val_accuracy: 0.8862 - val_loss: 0.2526\n", 113 "Epoch 5/10\n", 114 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8937 - loss: 0.2379 - val_accuracy: 0.8863 - val_loss: 0.2542\n", 115 "Epoch 6/10\n", 116 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8938 - loss: 0.2395 - val_accuracy: 0.8858 - val_loss: 0.2543\n", 117 "Epoch 7/10\n", 118 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8938 - loss: 0.2404 - val_accuracy: 0.8868 - val_loss: 0.2513\n", 119 "Epoch 8/10\n", 120 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8951 - loss: 0.2373 - val_accuracy: 0.8862 - val_loss: 0.2531\n", 121 "Epoch 9/10\n", 122 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8967 - loss: 0.2333 - val_accuracy: 0.8855 - val_loss: 0.2509\n", 123 "Epoch 10/10\n", 124 "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8965 - loss: 0.2356 - val_accuracy: 0.8866 - val_loss: 0.2529\n" 125 ] 126 }, 127 { 128 "data": { 129 "text/plain": [ 130 "<keras.src.callbacks.history.History at 0x7f03b60be290>" 131 ] 132 }, 133 "execution_count": 54, 134 "metadata": {}, 135 "output_type": "execute_result" 136 } 137 ], 138 "source": [ 139 "model.fit(X_train,y_train,epochs=10,validation_data=[X_test,y_test], batch_size=128)" 140 ] 141 } 142 ], 143 "metadata": { 144 "kernelspec": { 145 "display_name": ".venv", 146 "language": "python", 147 "name": "python3" 148 }, 149 "language_info": { 150 "codemirror_mode": { 151 "name": "ipython", 152 "version": 3 153 }, 154 "file_extension": ".py", 155 "mimetype": "text/x-python", 156 "name": "python", 157 "nbconvert_exporter": "python", 158 "pygments_lexer": "ipython3", 159 "version": "3.11.2" 160 } 161 }, 162 "nbformat": 4, 163 "nbformat_minor": 2 164 }