machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

RecidivismNN.ipynb (6308B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "https://www.kaggle.com/datasets/slonnadube/recidivism"
      8    ]
      9   },
     10   {
     11    "cell_type": "code",
     12    "execution_count": 51,
     13    "metadata": {},
     14    "outputs": [
     15     {
     16      "name": "stdout",
     17      "output_type": "stream",
     18      "text": [
     19       "(72267, 88) (72267,)\n",
     20       "(12045, 88) (12045,)\n"
     21      ]
     22     }
     23    ],
     24    "source": [
     25     "import pandas as pd\n",
     26     "\n",
     27     "df = pd.read_csv('../datasets/recidivism/Recidivism.csv')\n",
     28     "df.columns.to_list()\n",
     29     "X = df.drop(axis=1, columns=df.columns.to_list()[9:])\n",
     30     "y = df['Recidivism - Prison Admission']\n",
     31     "y = y == 'Yes'\n",
     32     "\n",
     33     "\n",
     34     "from sklearn.preprocessing import OneHotEncoder\n",
     35     "ohc = OneHotEncoder(sparse_output=False)\n",
     36     "\n",
     37     "def encode(X, name):\n",
     38     "    trans = ohc.fit_transform(X[[name]])\n",
     39     "    transformed_df = pd.DataFrame(trans, columns=ohc.get_feature_names_out([name]))\n",
     40     "    X = pd.concat([X,transformed_df], axis=1)\n",
     41     "    X = X.drop(columns=[name], axis=1)\n",
     42     "    return X\n",
     43     "\n",
     44     "X = encode(X,'Convicting Offense Classification')\n",
     45     "X = encode(X,'Convicting Offense Type')\n",
     46     "X = encode(X,'Convicting Offense Subtype')\n",
     47     "X = encode(X,'Level of Supervision')\n",
     48     "X = encode(X,'Sex')\n",
     49     "X = encode(X,'Race - Ethnicity')\n",
     50     "X = encode(X,'Region Code')\n",
     51     "\n",
     52     "\n",
     53     "from sklearn.preprocessing import StandardScaler\n",
     54     "\n",
     55     "std = StandardScaler()\n",
     56     "X = std.fit_transform(X)\n",
     57     "\n",
     58     "from sklearn.model_selection import train_test_split\n",
     59     "\n",
     60     "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n",
     61     "X_test, X_val, y_test, y_val = train_test_split(X_test,y_test,random_state=10, test_size=.5)\n",
     62     "\n",
     63     "print(X_train.shape  , y_train.shape)\n",
     64     "print(X_test.shape  , y_test.shape)"
     65    ]
     66   },
     67   {
     68    "cell_type": "code",
     69    "execution_count": 52,
     70    "metadata": {},
     71    "outputs": [],
     72    "source": [
     73     "import keras\n",
     74     "import tensorflow as tf\n",
     75     "\n",
     76     "model = keras.Sequential(layers=[\n",
     77     "\n",
     78     "    keras.layers.Input(shape=[88]),\n",
     79     "    keras.layers.Dense(256, activation='relu'),\n",
     80     "    keras.layers.Dropout(.2),\n",
     81     "    keras.layers.Dense(256, activation='relu'),\n",
     82     "    keras.layers.Dropout(.2),\n",
     83     "    keras.layers.Dense(1, activation='sigmoid')\n",
     84     "])"
     85    ]
     86   },
     87   {
     88    "cell_type": "code",
     89    "execution_count": 53,
     90    "metadata": {},
     91    "outputs": [],
     92    "source": [
     93     "model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=keras.optimizers.Adam())"
     94    ]
     95   },
     96   {
     97    "cell_type": "code",
     98    "execution_count": 54,
     99    "metadata": {},
    100    "outputs": [
    101     {
    102      "name": "stdout",
    103      "output_type": "stream",
    104      "text": [
    105       "Epoch 1/10\n",
    106       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 2ms/step - accuracy: 0.8795 - loss: 0.2929 - val_accuracy: 0.8848 - val_loss: 0.2574\n",
    107       "Epoch 2/10\n",
    108       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8901 - loss: 0.2505 - val_accuracy: 0.8856 - val_loss: 0.2561\n",
    109       "Epoch 3/10\n",
    110       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8927 - loss: 0.2461 - val_accuracy: 0.8851 - val_loss: 0.2516\n",
    111       "Epoch 4/10\n",
    112       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8945 - loss: 0.2429 - val_accuracy: 0.8862 - val_loss: 0.2526\n",
    113       "Epoch 5/10\n",
    114       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8937 - loss: 0.2379 - val_accuracy: 0.8863 - val_loss: 0.2542\n",
    115       "Epoch 6/10\n",
    116       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8938 - loss: 0.2395 - val_accuracy: 0.8858 - val_loss: 0.2543\n",
    117       "Epoch 7/10\n",
    118       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8938 - loss: 0.2404 - val_accuracy: 0.8868 - val_loss: 0.2513\n",
    119       "Epoch 8/10\n",
    120       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8951 - loss: 0.2373 - val_accuracy: 0.8862 - val_loss: 0.2531\n",
    121       "Epoch 9/10\n",
    122       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8967 - loss: 0.2333 - val_accuracy: 0.8855 - val_loss: 0.2509\n",
    123       "Epoch 10/10\n",
    124       "\u001b[1m565/565\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step - accuracy: 0.8965 - loss: 0.2356 - val_accuracy: 0.8866 - val_loss: 0.2529\n"
    125      ]
    126     },
    127     {
    128      "data": {
    129       "text/plain": [
    130        "<keras.src.callbacks.history.History at 0x7f03b60be290>"
    131       ]
    132      },
    133      "execution_count": 54,
    134      "metadata": {},
    135      "output_type": "execute_result"
    136     }
    137    ],
    138    "source": [
    139     "model.fit(X_train,y_train,epochs=10,validation_data=[X_test,y_test], batch_size=128)"
    140    ]
    141   }
    142  ],
    143  "metadata": {
    144   "kernelspec": {
    145    "display_name": ".venv",
    146    "language": "python",
    147    "name": "python3"
    148   },
    149   "language_info": {
    150    "codemirror_mode": {
    151     "name": "ipython",
    152     "version": 3
    153    },
    154    "file_extension": ".py",
    155    "mimetype": "text/x-python",
    156    "name": "python",
    157    "nbconvert_exporter": "python",
    158    "pygments_lexer": "ipython3",
    159    "version": "3.11.2"
    160   }
    161  },
    162  "nbformat": 4,
    163  "nbformat_minor": 2
    164 }