machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

RecidivismLogReg.ipynb (33608B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "https://www.kaggle.com/datasets/slonnadube/recidivism"
      8    ]
      9   },
     10   {
     11    "cell_type": "code",
     12    "execution_count": 33,
     13    "metadata": {},
     14    "outputs": [
     15     {
     16      "data": {
     17       "text/plain": [
     18        "['Recidivism Reporting Year',\n",
     19        " 'Fiscal Year Admitted',\n",
     20        " 'Region Code',\n",
     21        " 'Convicting Offense Classification',\n",
     22        " 'Convicting Offense Type',\n",
     23        " 'Convicting Offense Subtype',\n",
     24        " 'Race - Ethnicity',\n",
     25        " 'Sex',\n",
     26        " 'Level of Supervision',\n",
     27        " 'Recidivism - Prison Admission',\n",
     28        " 'Recidivism Type',\n",
     29        " 'New Conviction Offense Class',\n",
     30        " 'New Conviction Offense Type',\n",
     31        " 'New Conviction Offense SubType',\n",
     32        " 'Days to Recidivism',\n",
     33        " 'Part of Target Population']"
     34       ]
     35      },
     36      "execution_count": 33,
     37      "metadata": {},
     38      "output_type": "execute_result"
     39     }
     40    ],
     41    "source": [
     42     "import pandas as pd\n",
     43     "\n",
     44     "df = pd.read_csv('../datasets/recidivism/Recidivism.csv')\n",
     45     "df.columns.to_list()"
     46    ]
     47   },
     48   {
     49    "cell_type": "code",
     50    "execution_count": 34,
     51    "metadata": {},
     52    "outputs": [
     53     {
     54      "data": {
     55       "text/html": [
     56        "<div>\n",
     57        "<style scoped>\n",
     58        "    .dataframe tbody tr th:only-of-type {\n",
     59        "        vertical-align: middle;\n",
     60        "    }\n",
     61        "\n",
     62        "    .dataframe tbody tr th {\n",
     63        "        vertical-align: top;\n",
     64        "    }\n",
     65        "\n",
     66        "    .dataframe thead th {\n",
     67        "        text-align: right;\n",
     68        "    }\n",
     69        "</style>\n",
     70        "<table border=\"1\" class=\"dataframe\">\n",
     71        "  <thead>\n",
     72        "    <tr style=\"text-align: right;\">\n",
     73        "      <th></th>\n",
     74        "      <th>Recidivism Reporting Year</th>\n",
     75        "      <th>Fiscal Year Admitted</th>\n",
     76        "      <th>Region Code</th>\n",
     77        "      <th>Convicting Offense Classification</th>\n",
     78        "      <th>Convicting Offense Type</th>\n",
     79        "      <th>Convicting Offense Subtype</th>\n",
     80        "      <th>Race - Ethnicity</th>\n",
     81        "      <th>Sex</th>\n",
     82        "      <th>Level of Supervision</th>\n",
     83        "      <th>Recidivism - Prison Admission</th>\n",
     84        "      <th>Recidivism Type</th>\n",
     85        "      <th>New Conviction Offense Class</th>\n",
     86        "      <th>New Conviction Offense Type</th>\n",
     87        "      <th>New Conviction Offense SubType</th>\n",
     88        "      <th>Days to Recidivism</th>\n",
     89        "      <th>Part of Target Population</th>\n",
     90        "    </tr>\n",
     91        "  </thead>\n",
     92        "  <tbody>\n",
     93        "    <tr>\n",
     94        "      <th>0</th>\n",
     95        "      <td>2013</td>\n",
     96        "      <td>2010</td>\n",
     97        "      <td>NaN</td>\n",
     98        "      <td>D Felony</td>\n",
     99        "      <td>Public Order</td>\n",
    100        "      <td>OWI</td>\n",
    101        "      <td>White - Hispanic</td>\n",
    102        "      <td>Male</td>\n",
    103        "      <td>NaN</td>\n",
    104        "      <td>No</td>\n",
    105        "      <td>No Recidivism</td>\n",
    106        "      <td>NaN</td>\n",
    107        "      <td>NaN</td>\n",
    108        "      <td>NaN</td>\n",
    109        "      <td>NaN</td>\n",
    110        "      <td>No</td>\n",
    111        "    </tr>\n",
    112        "  </tbody>\n",
    113        "</table>\n",
    114        "</div>"
    115       ],
    116       "text/plain": [
    117        "   Recidivism Reporting Year  Fiscal Year Admitted Region Code  \\\n",
    118        "0                       2013                  2010         NaN   \n",
    119        "\n",
    120        "  Convicting Offense Classification Convicting Offense Type  \\\n",
    121        "0                          D Felony            Public Order   \n",
    122        "\n",
    123        "  Convicting Offense Subtype  Race - Ethnicity   Sex Level of Supervision  \\\n",
    124        "0                        OWI  White - Hispanic  Male                  NaN   \n",
    125        "\n",
    126        "  Recidivism - Prison Admission Recidivism Type New Conviction Offense Class  \\\n",
    127        "0                            No   No Recidivism                          NaN   \n",
    128        "\n",
    129        "  New Conviction Offense Type New Conviction Offense SubType  \\\n",
    130        "0                         NaN                            NaN   \n",
    131        "\n",
    132        "   Days to Recidivism Part of Target Population  \n",
    133        "0                 NaN                        No  "
    134       ]
    135      },
    136      "execution_count": 34,
    137      "metadata": {},
    138      "output_type": "execute_result"
    139     }
    140    ],
    141    "source": [
    142     "df.head(1)"
    143    ]
    144   },
    145   {
    146    "cell_type": "code",
    147    "execution_count": 35,
    148    "metadata": {},
    149    "outputs": [
    150     {
    151      "data": {
    152       "text/plain": [
    153        "Recidivism - Prison Admission\n",
    154        "False    85431\n",
    155        "True     10926\n",
    156        "Name: count, dtype: int64"
    157       ]
    158      },
    159      "execution_count": 35,
    160      "metadata": {},
    161      "output_type": "execute_result"
    162     }
    163    ],
    164    "source": [
    165     "X = df.drop(axis=1, columns=df.columns.to_list()[9:])\n",
    166     "y = df['Recidivism - Prison Admission']\n",
    167     "y = y == 'Yes'\n",
    168     "\n",
    169     "y.value_counts()"
    170    ]
    171   },
    172   {
    173    "cell_type": "code",
    174    "execution_count": 36,
    175    "metadata": {},
    176    "outputs": [
    177     {
    178      "data": {
    179       "text/plain": [
    180        "Recidivism Reporting Year             int64\n",
    181        "Fiscal Year Admitted                  int64\n",
    182        "Region Code                          object\n",
    183        "Convicting Offense Classification    object\n",
    184        "Convicting Offense Type              object\n",
    185        "Convicting Offense Subtype           object\n",
    186        "Race - Ethnicity                     object\n",
    187        "Sex                                  object\n",
    188        "Level of Supervision                 object\n",
    189        "dtype: object"
    190       ]
    191      },
    192      "execution_count": 36,
    193      "metadata": {},
    194      "output_type": "execute_result"
    195     }
    196    ],
    197    "source": [
    198     "X.dtypes"
    199    ]
    200   },
    201   {
    202    "cell_type": "code",
    203    "execution_count": 37,
    204    "metadata": {},
    205    "outputs": [],
    206    "source": [
    207     "from sklearn.preprocessing import OneHotEncoder\n",
    208     "ohc = OneHotEncoder(sparse_output=False)\n",
    209     "\n",
    210     "def encode(X, name):\n",
    211     "    trans = ohc.fit_transform(X[[name]])\n",
    212     "    transformed_df = pd.DataFrame(trans, columns=ohc.get_feature_names_out([name]))\n",
    213     "    X = pd.concat([X,transformed_df], axis=1)\n",
    214     "    X = X.drop(columns=[name], axis=1)\n",
    215     "    return X"
    216    ]
    217   },
    218   {
    219    "cell_type": "code",
    220    "execution_count": 38,
    221    "metadata": {},
    222    "outputs": [],
    223    "source": [
    224     "X = encode(X,'Convicting Offense Classification')\n",
    225     "X = encode(X,'Convicting Offense Type')\n",
    226     "X = encode(X,'Convicting Offense Subtype')\n",
    227     "X = encode(X,'Level of Supervision')\n",
    228     "X = encode(X,'Sex')\n",
    229     "X = encode(X,'Race - Ethnicity')\n",
    230     "X = encode(X,'Region Code')"
    231    ]
    232   },
    233   {
    234    "cell_type": "code",
    235    "execution_count": 39,
    236    "metadata": {},
    237    "outputs": [],
    238    "source": [
    239     "from sklearn.preprocessing import StandardScaler\n",
    240     "\n",
    241     "std = StandardScaler()\n",
    242     "X = std.fit_transform(X)"
    243    ]
    244   },
    245   {
    246    "cell_type": "code",
    247    "execution_count": 40,
    248    "metadata": {},
    249    "outputs": [
    250     {
    251      "name": "stdout",
    252      "output_type": "stream",
    253      "text": [
    254       "(72267, 88) (72267,)\n",
    255       "(12045, 88) (12045,)\n"
    256      ]
    257     }
    258    ],
    259    "source": [
    260     "from sklearn.model_selection import train_test_split\n",
    261     "\n",
    262     "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)\n",
    263     "X_test, X_val, y_test, y_val = train_test_split(X_test,y_test,random_state=10, test_size=.5)\n",
    264     "\n",
    265     "print(X_train.shape  , y_train.shape)\n",
    266     "print(X_test.shape  , y_test.shape)"
    267    ]
    268   },
    269   {
    270    "cell_type": "code",
    271    "execution_count": 41,
    272    "metadata": {},
    273    "outputs": [
    274     {
    275      "data": {
    276       "text/html": [
    277        "<style>#sk-container-id-2 {\n",
    278        "  /* Definition of color scheme common for light and dark mode */\n",
    279        "  --sklearn-color-text: black;\n",
    280        "  --sklearn-color-line: gray;\n",
    281        "  /* Definition of color scheme for unfitted estimators */\n",
    282        "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
    283        "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
    284        "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
    285        "  --sklearn-color-unfitted-level-3: chocolate;\n",
    286        "  /* Definition of color scheme for fitted estimators */\n",
    287        "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
    288        "  --sklearn-color-fitted-level-1: #d4ebff;\n",
    289        "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
    290        "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
    291        "\n",
    292        "  /* Specific color for light theme */\n",
    293        "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
    294        "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
    295        "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
    296        "  --sklearn-color-icon: #696969;\n",
    297        "\n",
    298        "  @media (prefers-color-scheme: dark) {\n",
    299        "    /* Redefinition of color scheme for dark theme */\n",
    300        "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
    301        "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
    302        "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
    303        "    --sklearn-color-icon: #878787;\n",
    304        "  }\n",
    305        "}\n",
    306        "\n",
    307        "#sk-container-id-2 {\n",
    308        "  color: var(--sklearn-color-text);\n",
    309        "}\n",
    310        "\n",
    311        "#sk-container-id-2 pre {\n",
    312        "  padding: 0;\n",
    313        "}\n",
    314        "\n",
    315        "#sk-container-id-2 input.sk-hidden--visually {\n",
    316        "  border: 0;\n",
    317        "  clip: rect(1px 1px 1px 1px);\n",
    318        "  clip: rect(1px, 1px, 1px, 1px);\n",
    319        "  height: 1px;\n",
    320        "  margin: -1px;\n",
    321        "  overflow: hidden;\n",
    322        "  padding: 0;\n",
    323        "  position: absolute;\n",
    324        "  width: 1px;\n",
    325        "}\n",
    326        "\n",
    327        "#sk-container-id-2 div.sk-dashed-wrapped {\n",
    328        "  border: 1px dashed var(--sklearn-color-line);\n",
    329        "  margin: 0 0.4em 0.5em 0.4em;\n",
    330        "  box-sizing: border-box;\n",
    331        "  padding-bottom: 0.4em;\n",
    332        "  background-color: var(--sklearn-color-background);\n",
    333        "}\n",
    334        "\n",
    335        "#sk-container-id-2 div.sk-container {\n",
    336        "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
    337        "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
    338        "     so we also need the `!important` here to be able to override the\n",
    339        "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
    340        "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
    341        "  display: inline-block !important;\n",
    342        "  position: relative;\n",
    343        "}\n",
    344        "\n",
    345        "#sk-container-id-2 div.sk-text-repr-fallback {\n",
    346        "  display: none;\n",
    347        "}\n",
    348        "\n",
    349        "div.sk-parallel-item,\n",
    350        "div.sk-serial,\n",
    351        "div.sk-item {\n",
    352        "  /* draw centered vertical line to link estimators */\n",
    353        "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
    354        "  background-size: 2px 100%;\n",
    355        "  background-repeat: no-repeat;\n",
    356        "  background-position: center center;\n",
    357        "}\n",
    358        "\n",
    359        "/* Parallel-specific style estimator block */\n",
    360        "\n",
    361        "#sk-container-id-2 div.sk-parallel-item::after {\n",
    362        "  content: \"\";\n",
    363        "  width: 100%;\n",
    364        "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
    365        "  flex-grow: 1;\n",
    366        "}\n",
    367        "\n",
    368        "#sk-container-id-2 div.sk-parallel {\n",
    369        "  display: flex;\n",
    370        "  align-items: stretch;\n",
    371        "  justify-content: center;\n",
    372        "  background-color: var(--sklearn-color-background);\n",
    373        "  position: relative;\n",
    374        "}\n",
    375        "\n",
    376        "#sk-container-id-2 div.sk-parallel-item {\n",
    377        "  display: flex;\n",
    378        "  flex-direction: column;\n",
    379        "}\n",
    380        "\n",
    381        "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
    382        "  align-self: flex-end;\n",
    383        "  width: 50%;\n",
    384        "}\n",
    385        "\n",
    386        "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
    387        "  align-self: flex-start;\n",
    388        "  width: 50%;\n",
    389        "}\n",
    390        "\n",
    391        "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
    392        "  width: 0;\n",
    393        "}\n",
    394        "\n",
    395        "/* Serial-specific style estimator block */\n",
    396        "\n",
    397        "#sk-container-id-2 div.sk-serial {\n",
    398        "  display: flex;\n",
    399        "  flex-direction: column;\n",
    400        "  align-items: center;\n",
    401        "  background-color: var(--sklearn-color-background);\n",
    402        "  padding-right: 1em;\n",
    403        "  padding-left: 1em;\n",
    404        "}\n",
    405        "\n",
    406        "\n",
    407        "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
    408        "clickable and can be expanded/collapsed.\n",
    409        "- Pipeline and ColumnTransformer use this feature and define the default style\n",
    410        "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
    411        "*/\n",
    412        "\n",
    413        "/* Pipeline and ColumnTransformer style (default) */\n",
    414        "\n",
    415        "#sk-container-id-2 div.sk-toggleable {\n",
    416        "  /* Default theme specific background. It is overwritten whether we have a\n",
    417        "  specific estimator or a Pipeline/ColumnTransformer */\n",
    418        "  background-color: var(--sklearn-color-background);\n",
    419        "}\n",
    420        "\n",
    421        "/* Toggleable label */\n",
    422        "#sk-container-id-2 label.sk-toggleable__label {\n",
    423        "  cursor: pointer;\n",
    424        "  display: block;\n",
    425        "  width: 100%;\n",
    426        "  margin-bottom: 0;\n",
    427        "  padding: 0.5em;\n",
    428        "  box-sizing: border-box;\n",
    429        "  text-align: center;\n",
    430        "}\n",
    431        "\n",
    432        "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
    433        "  /* Arrow on the left of the label */\n",
    434        "  content: \"▸\";\n",
    435        "  float: left;\n",
    436        "  margin-right: 0.25em;\n",
    437        "  color: var(--sklearn-color-icon);\n",
    438        "}\n",
    439        "\n",
    440        "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
    441        "  color: var(--sklearn-color-text);\n",
    442        "}\n",
    443        "\n",
    444        "/* Toggleable content - dropdown */\n",
    445        "\n",
    446        "#sk-container-id-2 div.sk-toggleable__content {\n",
    447        "  max-height: 0;\n",
    448        "  max-width: 0;\n",
    449        "  overflow: hidden;\n",
    450        "  text-align: left;\n",
    451        "  /* unfitted */\n",
    452        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    453        "}\n",
    454        "\n",
    455        "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
    456        "  /* fitted */\n",
    457        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    458        "}\n",
    459        "\n",
    460        "#sk-container-id-2 div.sk-toggleable__content pre {\n",
    461        "  margin: 0.2em;\n",
    462        "  border-radius: 0.25em;\n",
    463        "  color: var(--sklearn-color-text);\n",
    464        "  /* unfitted */\n",
    465        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    466        "}\n",
    467        "\n",
    468        "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
    469        "  /* unfitted */\n",
    470        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    471        "}\n",
    472        "\n",
    473        "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
    474        "  /* Expand drop-down */\n",
    475        "  max-height: 200px;\n",
    476        "  max-width: 100%;\n",
    477        "  overflow: auto;\n",
    478        "}\n",
    479        "\n",
    480        "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
    481        "  content: \"▾\";\n",
    482        "}\n",
    483        "\n",
    484        "/* Pipeline/ColumnTransformer-specific style */\n",
    485        "\n",
    486        "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    487        "  color: var(--sklearn-color-text);\n",
    488        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    489        "}\n",
    490        "\n",
    491        "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    492        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    493        "}\n",
    494        "\n",
    495        "/* Estimator-specific style */\n",
    496        "\n",
    497        "/* Colorize estimator box */\n",
    498        "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    499        "  /* unfitted */\n",
    500        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    501        "}\n",
    502        "\n",
    503        "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    504        "  /* fitted */\n",
    505        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    506        "}\n",
    507        "\n",
    508        "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
    509        "#sk-container-id-2 div.sk-label label {\n",
    510        "  /* The background is the default theme color */\n",
    511        "  color: var(--sklearn-color-text-on-default-background);\n",
    512        "}\n",
    513        "\n",
    514        "/* On hover, darken the color of the background */\n",
    515        "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
    516        "  color: var(--sklearn-color-text);\n",
    517        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    518        "}\n",
    519        "\n",
    520        "/* Label box, darken color on hover, fitted */\n",
    521        "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
    522        "  color: var(--sklearn-color-text);\n",
    523        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    524        "}\n",
    525        "\n",
    526        "/* Estimator label */\n",
    527        "\n",
    528        "#sk-container-id-2 div.sk-label label {\n",
    529        "  font-family: monospace;\n",
    530        "  font-weight: bold;\n",
    531        "  display: inline-block;\n",
    532        "  line-height: 1.2em;\n",
    533        "}\n",
    534        "\n",
    535        "#sk-container-id-2 div.sk-label-container {\n",
    536        "  text-align: center;\n",
    537        "}\n",
    538        "\n",
    539        "/* Estimator-specific */\n",
    540        "#sk-container-id-2 div.sk-estimator {\n",
    541        "  font-family: monospace;\n",
    542        "  border: 1px dotted var(--sklearn-color-border-box);\n",
    543        "  border-radius: 0.25em;\n",
    544        "  box-sizing: border-box;\n",
    545        "  margin-bottom: 0.5em;\n",
    546        "  /* unfitted */\n",
    547        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    548        "}\n",
    549        "\n",
    550        "#sk-container-id-2 div.sk-estimator.fitted {\n",
    551        "  /* fitted */\n",
    552        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    553        "}\n",
    554        "\n",
    555        "/* on hover */\n",
    556        "#sk-container-id-2 div.sk-estimator:hover {\n",
    557        "  /* unfitted */\n",
    558        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    559        "}\n",
    560        "\n",
    561        "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
    562        "  /* fitted */\n",
    563        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    564        "}\n",
    565        "\n",
    566        "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
    567        "\n",
    568        "/* Common style for \"i\" and \"?\" */\n",
    569        "\n",
    570        ".sk-estimator-doc-link,\n",
    571        "a:link.sk-estimator-doc-link,\n",
    572        "a:visited.sk-estimator-doc-link {\n",
    573        "  float: right;\n",
    574        "  font-size: smaller;\n",
    575        "  line-height: 1em;\n",
    576        "  font-family: monospace;\n",
    577        "  background-color: var(--sklearn-color-background);\n",
    578        "  border-radius: 1em;\n",
    579        "  height: 1em;\n",
    580        "  width: 1em;\n",
    581        "  text-decoration: none !important;\n",
    582        "  margin-left: 1ex;\n",
    583        "  /* unfitted */\n",
    584        "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
    585        "  color: var(--sklearn-color-unfitted-level-1);\n",
    586        "}\n",
    587        "\n",
    588        ".sk-estimator-doc-link.fitted,\n",
    589        "a:link.sk-estimator-doc-link.fitted,\n",
    590        "a:visited.sk-estimator-doc-link.fitted {\n",
    591        "  /* fitted */\n",
    592        "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
    593        "  color: var(--sklearn-color-fitted-level-1);\n",
    594        "}\n",
    595        "\n",
    596        "/* On hover */\n",
    597        "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
    598        ".sk-estimator-doc-link:hover,\n",
    599        "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
    600        ".sk-estimator-doc-link:hover {\n",
    601        "  /* unfitted */\n",
    602        "  background-color: var(--sklearn-color-unfitted-level-3);\n",
    603        "  color: var(--sklearn-color-background);\n",
    604        "  text-decoration: none;\n",
    605        "}\n",
    606        "\n",
    607        "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
    608        ".sk-estimator-doc-link.fitted:hover,\n",
    609        "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
    610        ".sk-estimator-doc-link.fitted:hover {\n",
    611        "  /* fitted */\n",
    612        "  background-color: var(--sklearn-color-fitted-level-3);\n",
    613        "  color: var(--sklearn-color-background);\n",
    614        "  text-decoration: none;\n",
    615        "}\n",
    616        "\n",
    617        "/* Span, style for the box shown on hovering the info icon */\n",
    618        ".sk-estimator-doc-link span {\n",
    619        "  display: none;\n",
    620        "  z-index: 9999;\n",
    621        "  position: relative;\n",
    622        "  font-weight: normal;\n",
    623        "  right: .2ex;\n",
    624        "  padding: .5ex;\n",
    625        "  margin: .5ex;\n",
    626        "  width: min-content;\n",
    627        "  min-width: 20ex;\n",
    628        "  max-width: 50ex;\n",
    629        "  color: var(--sklearn-color-text);\n",
    630        "  box-shadow: 2pt 2pt 4pt #999;\n",
    631        "  /* unfitted */\n",
    632        "  background: var(--sklearn-color-unfitted-level-0);\n",
    633        "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
    634        "}\n",
    635        "\n",
    636        ".sk-estimator-doc-link.fitted span {\n",
    637        "  /* fitted */\n",
    638        "  background: var(--sklearn-color-fitted-level-0);\n",
    639        "  border: var(--sklearn-color-fitted-level-3);\n",
    640        "}\n",
    641        "\n",
    642        ".sk-estimator-doc-link:hover span {\n",
    643        "  display: block;\n",
    644        "}\n",
    645        "\n",
    646        "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
    647        "\n",
    648        "#sk-container-id-2 a.estimator_doc_link {\n",
    649        "  float: right;\n",
    650        "  font-size: 1rem;\n",
    651        "  line-height: 1em;\n",
    652        "  font-family: monospace;\n",
    653        "  background-color: var(--sklearn-color-background);\n",
    654        "  border-radius: 1rem;\n",
    655        "  height: 1rem;\n",
    656        "  width: 1rem;\n",
    657        "  text-decoration: none;\n",
    658        "  /* unfitted */\n",
    659        "  color: var(--sklearn-color-unfitted-level-1);\n",
    660        "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
    661        "}\n",
    662        "\n",
    663        "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
    664        "  /* fitted */\n",
    665        "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
    666        "  color: var(--sklearn-color-fitted-level-1);\n",
    667        "}\n",
    668        "\n",
    669        "/* On hover */\n",
    670        "#sk-container-id-2 a.estimator_doc_link:hover {\n",
    671        "  /* unfitted */\n",
    672        "  background-color: var(--sklearn-color-unfitted-level-3);\n",
    673        "  color: var(--sklearn-color-background);\n",
    674        "  text-decoration: none;\n",
    675        "}\n",
    676        "\n",
    677        "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
    678        "  /* fitted */\n",
    679        "  background-color: var(--sklearn-color-fitted-level-3);\n",
    680        "}\n",
    681        "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression()</pre></div> </div></div></div></div>"
    682       ],
    683       "text/plain": [
    684        "LogisticRegression()"
    685       ]
    686      },
    687      "execution_count": 41,
    688      "metadata": {},
    689      "output_type": "execute_result"
    690     }
    691    ],
    692    "source": [
    693     "from sklearn.linear_model import LogisticRegression\n",
    694     "\n",
    695     "logReg = LogisticRegression()\n",
    696     "\n",
    697     "logReg.fit(X_train,y_train)"
    698    ]
    699   },
    700   {
    701    "cell_type": "code",
    702    "execution_count": 42,
    703    "metadata": {},
    704    "outputs": [
    705     {
    706      "data": {
    707       "text/plain": [
    708        "0.8881693648816936"
    709       ]
    710      },
    711      "execution_count": 42,
    712      "metadata": {},
    713      "output_type": "execute_result"
    714     }
    715    ],
    716    "source": [
    717     "from sklearn.metrics import accuracy_score\n",
    718     "y_test_pred = logReg.predict(X_test)\n",
    719     "\n",
    720     "accuracy_score(y_test,y_test_pred)"
    721    ]
    722   },
    723   {
    724    "cell_type": "code",
    725    "execution_count": 49,
    726    "metadata": {},
    727    "outputs": [
    728     {
    729      "name": "stdout",
    730      "output_type": "stream",
    731      "text": [
    732       "Level of Supervision_Low Risk Probation   -0.5339620957982494\n",
    733       "Level of Supervision_Minimum   -0.5147264201803158\n",
    734       "Level of Supervision_Minimum Risk Program   -0.44074785427274304\n",
    735       "Convicting Offense Classification_Serious Misdemeanor   -0.35849865449503004\n",
    736       "Region Code_6JD   -0.29377538041752616\n",
    737       "Race - Ethnicity_Unknown   -0.27944493177025126\n",
    738       "Race - Ethnicity_Unknown - Non-Hispanic   -0.2387566286558007\n",
    739       "Convicting Offense Classification_Simple Misdemeanor   -0.18613662623857888\n",
    740       "Race - Ethnicity_Unk - NH   -0.16798593793197264\n",
    741       "Level of Supervision_Low Normal   -0.16017010735222512\n",
    742       "Convicting Offense Subtype_OWI   -0.13559556625348412\n",
    743       "Convicting Offense Subtype_Health/Medical   -0.10074187085953795\n",
    744       "Region Code_4JD   -0.08036165789589228\n",
    745       "Sex_Unknown   -0.07636769248279494\n",
    746       "Convicting Offense Type_Public Order   -0.06674645656428356\n",
    747       "Convicting Offense Subtype_Gambling   -0.057086978034176485\n",
    748       "Convicting Offense Subtype_Tax Laws   -0.05657269492640917\n",
    749       "Race - Ethnicity_A/PI - H   -0.05553311522652603\n",
    750       "Region Code_7JD   -0.054376673533109036\n",
    751       "Sex_Female   -0.054225227388666336\n",
    752       "Level of Supervision_nan   -0.036915563071045106\n",
    753       "Level of Supervision_Administrative   -0.030359219733896957\n",
    754       "Race - Ethnicity_Asian or Pacific Islander - Non-Hispanic   -0.028799189656285896\n",
    755       "Region Code_1JD   -0.02615414378498509\n",
    756       "Convicting Offense Subtype_Other Violent   -0.024412840107090127\n",
    757       "Convicting Offense Subtype_Trafficking   -0.02108978164071303\n",
    758       "Convicting Offense Subtype_Other Drug   -0.018756200863952213\n",
    759       "Convicting Offense Subtype_Animals   -0.01822380470925885\n",
    760       "Convicting Offense Subtype_Other Criminal   -0.017046827026266573\n",
    761       "Convicting Offense Subtype_Kidnap   -0.016342620349334896\n",
    762       "Region Code_3JD   -0.01632764665874275\n",
    763       "Convicting Offense Subtype_Business   -0.016213443754824055\n",
    764       "Race - Ethnicity_AI/AN - H   -0.01547977017003544\n",
    765       "Region Code_8JD   -0.015345871062002029\n",
    766       "Convicting Offense Subtype_Natural Resources   -0.015169899630659534\n",
    767       "Convicting Offense Classification_NCIC Virtual Code   -0.011600908676106038\n",
    768       "Convicting Offense Subtype_Vandalism   -0.011287419364530349\n",
    769       "Convicting Offense Subtype_Murder/Manslaughter   -0.008866576866914002\n",
    770       "Convicting Offense Subtype_Arson   -0.005156165190029143\n",
    771       "Convicting Offense Subtype_Sex   -0.0018102182593264116\n",
    772       "Convicting Offense Classification_nan   -0.0005600927940130159\n",
    773       "Convicting Offense Type_nan   -0.0005600927939903566\n",
    774       "Convicting Offense Subtype_nan   -0.0005600927939903566\n",
    775       "Convicting Offense Subtype_Stolen Property   0.0005304053063805491\n",
    776       "Race - Ethnicity_White - Non-Hispanic   0.001153812183389463\n",
    777       "Level of Supervision_Not Available for Supervision   0.001490247455119801\n",
    778       "Convicting Offense Subtype_Prostitution/Pimping   0.0015013093121284365\n",
    779       "Convicting Offense Type_Drug   0.0030623870110162584\n",
    780       "Convicting Offense Type_Violent   0.0034292202922170233\n",
    781       "Convicting Offense Type_Other   0.006015780723941681\n",
    782       "Convicting Offense Subtype_Other Property   0.006130852235344804\n",
    783       "Level of Supervision_Unknown   0.007781758222531563\n",
    784       "Region Code_2JD   0.010238861163768128\n",
    785       "Convicting Offense Classification_Other Misdemeanor   0.012591486492788916\n",
    786       "Convicting Offense Subtype_Robbery   0.014053669830263436\n",
    787       "Convicting Offense Classification_Special Sentence 2005   0.014561589545323982\n",
    788       "Race - Ethnicity_Asian or Pacific Islander - Hispanic   0.015679057334770714\n",
    789       "Race - Ethnicity_American Indian or Alaska Native - Hispanic   0.01773867652759473\n",
    790       "Race - Ethnicity_Black - Hispanic   0.01798930728110183\n",
    791       "Race - Ethnicity_A/PI - NH   0.018819321480771305\n",
    792       "Race - Ethnicity_AI/AN - NH   0.020800565434265\n",
    793       "Region Code_5JD   0.020852413776690646\n",
    794       "Race - Ethnicity_White - Hispanic   0.02327628518150384\n",
    795       "Convicting Offense Subtype_Drug Possession   0.027225124801652227\n",
    796       "Convicting Offense Subtype_Other Government   0.028715642473623113\n",
    797       "Convicting Offense Subtype_Assault   0.029569856997337447\n",
    798       "Convicting Offense Subtype_Flight/Escape   0.03012428515800137\n",
    799       "Convicting Offense Subtype_Traffic   0.03430715996827982\n",
    800       "Convicting Offense Subtype_Weapons   0.03527466491386668\n",
    801       "Convicting Offense Subtype_Forgery/Fraud   0.036233812269670415\n",
    802       "Convicting Offense Classification_B Felony   0.03792206013687437\n",
    803       "Race - Ethnicity_American Indian or Alaska Native - Non-Hispanic   0.03923757766380635\n",
    804       "Convicting Offense Subtype_Burglary   0.045489691213388105\n",
    805       "Convicting Offense Subtype_Theft   0.047935190983844966\n",
    806       "Sex_Male   0.05874268187558015\n",
    807       "Convicting Offense Type_Property   0.07168268636948888\n",
    808       "Race - Ethnicity_Black - Non-Hispanic   0.07207116769606338\n",
    809       "Convicting Offense Subtype_Alcohol   0.07341854193314533\n",
    810       "Convicting Offense Subtype_Other Public Order   0.08202232001715717\n",
    811       "Convicting Offense Classification_Felony - Enhancement to Original Penalty   0.08742027870862958\n",
    812       "Recidivism Reporting Year   0.12049374422377804\n",
    813       "Fiscal Year Admitted   0.12049374422377804\n",
    814       "Convicting Offense Classification_Aggravated Misdemeanor   0.13924900491864936\n",
    815       "Convicting Offense Classification_C Felony   0.1917830084037748\n",
    816       "Convicting Offense Classification_D Felony   0.24811340746039828\n",
    817       "Level of Supervision_High Normal   0.2692817282284991\n",
    818       "Region Code_nan   0.33019477985094964\n",
    819       "Level of Supervision_Intensive   0.9152562744591675\n"
    820      ]
    821     }
    822    ],
    823    "source": [
    824     "from collections import OrderedDict\n",
    825     "# Print coefficients and corresponding feature names\n",
    826     "\n",
    827     "coeff = logReg.coef_\n",
    828     "names = std.feature_names_in_\n",
    829     "\n",
    830     "\n",
    831     "combos = {}\n",
    832     "for i in range(0, len(names)):\n",
    833     "    combos[names[i]] = coeff[0][i]\n",
    834     "\n",
    835     "sorted_dict : OrderedDict = OrderedDict(sorted(combos.items(), key=lambda item: item[1]))\n",
    836     "\n",
    837     "for i in sorted_dict:\n",
    838     "    print(i , \" \" , sorted_dict[i])"
    839    ]
    840   }
    841  ],
    842  "metadata": {
    843   "kernelspec": {
    844    "display_name": ".venv",
    845    "language": "python",
    846    "name": "python3"
    847   },
    848   "language_info": {
    849    "codemirror_mode": {
    850     "name": "ipython",
    851     "version": 3
    852    },
    853    "file_extension": ".py",
    854    "mimetype": "text/x-python",
    855    "name": "python",
    856    "nbconvert_exporter": "python",
    857    "pygments_lexer": "ipython3",
    858    "version": "3.11.2"
    859   }
    860  },
    861  "nbformat": 4,
    862  "nbformat_minor": 2
    863 }