machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

ClassificationTry2.ipynb (26605B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "First try was ugly.\n",
      8     "\n",
      9     "This uses count vectorizer instead which basically one hot encodes and checks how many times a given word is in a message.\n",
     10     "\n",
     11     "This is then passed into the multinomial nb for fitting and evaluation. \n",
     12     "\n",
     13     "This is 96.5% accurate."
     14    ]
     15   },
     16   {
     17    "cell_type": "code",
     18    "execution_count": 34,
     19    "metadata": {},
     20    "outputs": [
     21     {
     22      "data": {
     23       "text/html": [
     24        "<div>\n",
     25        "<style scoped>\n",
     26        "    .dataframe tbody tr th:only-of-type {\n",
     27        "        vertical-align: middle;\n",
     28        "    }\n",
     29        "\n",
     30        "    .dataframe tbody tr th {\n",
     31        "        vertical-align: top;\n",
     32        "    }\n",
     33        "\n",
     34        "    .dataframe thead th {\n",
     35        "        text-align: right;\n",
     36        "    }\n",
     37        "</style>\n",
     38        "<table border=\"1\" class=\"dataframe\">\n",
     39        "  <thead>\n",
     40        "    <tr style=\"text-align: right;\">\n",
     41        "      <th></th>\n",
     42        "      <th>Unnamed: 0</th>\n",
     43        "      <th>Email Text</th>\n",
     44        "      <th>Email Type</th>\n",
     45        "    </tr>\n",
     46        "  </thead>\n",
     47        "  <tbody>\n",
     48        "    <tr>\n",
     49        "      <th>0</th>\n",
     50        "      <td>0</td>\n",
     51        "      <td>re : 6 . 1100 , disc : uniformitarianism , re ...</td>\n",
     52        "      <td>Safe Email</td>\n",
     53        "    </tr>\n",
     54        "    <tr>\n",
     55        "      <th>1</th>\n",
     56        "      <td>1</td>\n",
     57        "      <td>the other side of * galicismos * * galicismo *...</td>\n",
     58        "      <td>Safe Email</td>\n",
     59        "    </tr>\n",
     60        "    <tr>\n",
     61        "      <th>2</th>\n",
     62        "      <td>2</td>\n",
     63        "      <td>re : equistar deal tickets are you still avail...</td>\n",
     64        "      <td>Safe Email</td>\n",
     65        "    </tr>\n",
     66        "    <tr>\n",
     67        "      <th>3</th>\n",
     68        "      <td>3</td>\n",
     69        "      <td>\\nHello I am your hot lil horny toy.\\n    I am...</td>\n",
     70        "      <td>Phishing Email</td>\n",
     71        "    </tr>\n",
     72        "    <tr>\n",
     73        "      <th>4</th>\n",
     74        "      <td>4</td>\n",
     75        "      <td>software at incredibly low prices ( 86 % lower...</td>\n",
     76        "      <td>Phishing Email</td>\n",
     77        "    </tr>\n",
     78        "    <tr>\n",
     79        "      <th>...</th>\n",
     80        "      <td>...</td>\n",
     81        "      <td>...</td>\n",
     82        "      <td>...</td>\n",
     83        "    </tr>\n",
     84        "    <tr>\n",
     85        "      <th>18645</th>\n",
     86        "      <td>18646</td>\n",
     87        "      <td>date a lonely housewife always wanted to date ...</td>\n",
     88        "      <td>Phishing Email</td>\n",
     89        "    </tr>\n",
     90        "    <tr>\n",
     91        "      <th>18646</th>\n",
     92        "      <td>18647</td>\n",
     93        "      <td>request submitted : access request for anita ....</td>\n",
     94        "      <td>Safe Email</td>\n",
     95        "    </tr>\n",
     96        "    <tr>\n",
     97        "      <th>18647</th>\n",
     98        "      <td>18648</td>\n",
     99        "      <td>re : important - prc mtg hi dorn &amp; john , as y...</td>\n",
    100        "      <td>Safe Email</td>\n",
    101        "    </tr>\n",
    102        "    <tr>\n",
    103        "      <th>18648</th>\n",
    104        "      <td>18649</td>\n",
    105        "      <td>press clippings - letter on californian utilit...</td>\n",
    106        "      <td>Safe Email</td>\n",
    107        "    </tr>\n",
    108        "    <tr>\n",
    109        "      <th>18649</th>\n",
    110        "      <td>18650</td>\n",
    111        "      <td>empty</td>\n",
    112        "      <td>Phishing Email</td>\n",
    113        "    </tr>\n",
    114        "  </tbody>\n",
    115        "</table>\n",
    116        "<p>18634 rows × 3 columns</p>\n",
    117        "</div>"
    118       ],
    119       "text/plain": [
    120        "       Unnamed: 0                                         Email Text  \\\n",
    121        "0               0  re : 6 . 1100 , disc : uniformitarianism , re ...   \n",
    122        "1               1  the other side of * galicismos * * galicismo *...   \n",
    123        "2               2  re : equistar deal tickets are you still avail...   \n",
    124        "3               3  \\nHello I am your hot lil horny toy.\\n    I am...   \n",
    125        "4               4  software at incredibly low prices ( 86 % lower...   \n",
    126        "...           ...                                                ...   \n",
    127        "18645       18646  date a lonely housewife always wanted to date ...   \n",
    128        "18646       18647  request submitted : access request for anita ....   \n",
    129        "18647       18648  re : important - prc mtg hi dorn & john , as y...   \n",
    130        "18648       18649  press clippings - letter on californian utilit...   \n",
    131        "18649       18650                                              empty   \n",
    132        "\n",
    133        "           Email Type  \n",
    134        "0          Safe Email  \n",
    135        "1          Safe Email  \n",
    136        "2          Safe Email  \n",
    137        "3      Phishing Email  \n",
    138        "4      Phishing Email  \n",
    139        "...               ...  \n",
    140        "18645  Phishing Email  \n",
    141        "18646      Safe Email  \n",
    142        "18647      Safe Email  \n",
    143        "18648      Safe Email  \n",
    144        "18649  Phishing Email  \n",
    145        "\n",
    146        "[18634 rows x 3 columns]"
    147       ]
    148      },
    149      "execution_count": 34,
    150      "metadata": {},
    151      "output_type": "execute_result"
    152     }
    153    ],
    154    "source": [
    155     "import pandas as pd \n",
    156     "\n",
    157     "df = pd.read_csv('../datasets/phishing/Phishing_Email.csv')\n",
    158     "df = df.dropna()\n",
    159     "df"
    160    ]
    161   },
    162   {
    163    "cell_type": "code",
    164    "execution_count": 35,
    165    "metadata": {},
    166    "outputs": [
    167     {
    168      "data": {
    169       "text/html": [
    170        "<style>#sk-container-id-7 {\n",
    171        "  /* Definition of color scheme common for light and dark mode */\n",
    172        "  --sklearn-color-text: black;\n",
    173        "  --sklearn-color-line: gray;\n",
    174        "  /* Definition of color scheme for unfitted estimators */\n",
    175        "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
    176        "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
    177        "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
    178        "  --sklearn-color-unfitted-level-3: chocolate;\n",
    179        "  /* Definition of color scheme for fitted estimators */\n",
    180        "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
    181        "  --sklearn-color-fitted-level-1: #d4ebff;\n",
    182        "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
    183        "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
    184        "\n",
    185        "  /* Specific color for light theme */\n",
    186        "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
    187        "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
    188        "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
    189        "  --sklearn-color-icon: #696969;\n",
    190        "\n",
    191        "  @media (prefers-color-scheme: dark) {\n",
    192        "    /* Redefinition of color scheme for dark theme */\n",
    193        "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
    194        "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
    195        "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
    196        "    --sklearn-color-icon: #878787;\n",
    197        "  }\n",
    198        "}\n",
    199        "\n",
    200        "#sk-container-id-7 {\n",
    201        "  color: var(--sklearn-color-text);\n",
    202        "}\n",
    203        "\n",
    204        "#sk-container-id-7 pre {\n",
    205        "  padding: 0;\n",
    206        "}\n",
    207        "\n",
    208        "#sk-container-id-7 input.sk-hidden--visually {\n",
    209        "  border: 0;\n",
    210        "  clip: rect(1px 1px 1px 1px);\n",
    211        "  clip: rect(1px, 1px, 1px, 1px);\n",
    212        "  height: 1px;\n",
    213        "  margin: -1px;\n",
    214        "  overflow: hidden;\n",
    215        "  padding: 0;\n",
    216        "  position: absolute;\n",
    217        "  width: 1px;\n",
    218        "}\n",
    219        "\n",
    220        "#sk-container-id-7 div.sk-dashed-wrapped {\n",
    221        "  border: 1px dashed var(--sklearn-color-line);\n",
    222        "  margin: 0 0.4em 0.5em 0.4em;\n",
    223        "  box-sizing: border-box;\n",
    224        "  padding-bottom: 0.4em;\n",
    225        "  background-color: var(--sklearn-color-background);\n",
    226        "}\n",
    227        "\n",
    228        "#sk-container-id-7 div.sk-container {\n",
    229        "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
    230        "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
    231        "     so we also need the `!important` here to be able to override the\n",
    232        "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
    233        "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
    234        "  display: inline-block !important;\n",
    235        "  position: relative;\n",
    236        "}\n",
    237        "\n",
    238        "#sk-container-id-7 div.sk-text-repr-fallback {\n",
    239        "  display: none;\n",
    240        "}\n",
    241        "\n",
    242        "div.sk-parallel-item,\n",
    243        "div.sk-serial,\n",
    244        "div.sk-item {\n",
    245        "  /* draw centered vertical line to link estimators */\n",
    246        "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
    247        "  background-size: 2px 100%;\n",
    248        "  background-repeat: no-repeat;\n",
    249        "  background-position: center center;\n",
    250        "}\n",
    251        "\n",
    252        "/* Parallel-specific style estimator block */\n",
    253        "\n",
    254        "#sk-container-id-7 div.sk-parallel-item::after {\n",
    255        "  content: \"\";\n",
    256        "  width: 100%;\n",
    257        "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
    258        "  flex-grow: 1;\n",
    259        "}\n",
    260        "\n",
    261        "#sk-container-id-7 div.sk-parallel {\n",
    262        "  display: flex;\n",
    263        "  align-items: stretch;\n",
    264        "  justify-content: center;\n",
    265        "  background-color: var(--sklearn-color-background);\n",
    266        "  position: relative;\n",
    267        "}\n",
    268        "\n",
    269        "#sk-container-id-7 div.sk-parallel-item {\n",
    270        "  display: flex;\n",
    271        "  flex-direction: column;\n",
    272        "}\n",
    273        "\n",
    274        "#sk-container-id-7 div.sk-parallel-item:first-child::after {\n",
    275        "  align-self: flex-end;\n",
    276        "  width: 50%;\n",
    277        "}\n",
    278        "\n",
    279        "#sk-container-id-7 div.sk-parallel-item:last-child::after {\n",
    280        "  align-self: flex-start;\n",
    281        "  width: 50%;\n",
    282        "}\n",
    283        "\n",
    284        "#sk-container-id-7 div.sk-parallel-item:only-child::after {\n",
    285        "  width: 0;\n",
    286        "}\n",
    287        "\n",
    288        "/* Serial-specific style estimator block */\n",
    289        "\n",
    290        "#sk-container-id-7 div.sk-serial {\n",
    291        "  display: flex;\n",
    292        "  flex-direction: column;\n",
    293        "  align-items: center;\n",
    294        "  background-color: var(--sklearn-color-background);\n",
    295        "  padding-right: 1em;\n",
    296        "  padding-left: 1em;\n",
    297        "}\n",
    298        "\n",
    299        "\n",
    300        "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
    301        "clickable and can be expanded/collapsed.\n",
    302        "- Pipeline and ColumnTransformer use this feature and define the default style\n",
    303        "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
    304        "*/\n",
    305        "\n",
    306        "/* Pipeline and ColumnTransformer style (default) */\n",
    307        "\n",
    308        "#sk-container-id-7 div.sk-toggleable {\n",
    309        "  /* Default theme specific background. It is overwritten whether we have a\n",
    310        "  specific estimator or a Pipeline/ColumnTransformer */\n",
    311        "  background-color: var(--sklearn-color-background);\n",
    312        "}\n",
    313        "\n",
    314        "/* Toggleable label */\n",
    315        "#sk-container-id-7 label.sk-toggleable__label {\n",
    316        "  cursor: pointer;\n",
    317        "  display: block;\n",
    318        "  width: 100%;\n",
    319        "  margin-bottom: 0;\n",
    320        "  padding: 0.5em;\n",
    321        "  box-sizing: border-box;\n",
    322        "  text-align: center;\n",
    323        "}\n",
    324        "\n",
    325        "#sk-container-id-7 label.sk-toggleable__label-arrow:before {\n",
    326        "  /* Arrow on the left of the label */\n",
    327        "  content: \"▸\";\n",
    328        "  float: left;\n",
    329        "  margin-right: 0.25em;\n",
    330        "  color: var(--sklearn-color-icon);\n",
    331        "}\n",
    332        "\n",
    333        "#sk-container-id-7 label.sk-toggleable__label-arrow:hover:before {\n",
    334        "  color: var(--sklearn-color-text);\n",
    335        "}\n",
    336        "\n",
    337        "/* Toggleable content - dropdown */\n",
    338        "\n",
    339        "#sk-container-id-7 div.sk-toggleable__content {\n",
    340        "  max-height: 0;\n",
    341        "  max-width: 0;\n",
    342        "  overflow: hidden;\n",
    343        "  text-align: left;\n",
    344        "  /* unfitted */\n",
    345        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    346        "}\n",
    347        "\n",
    348        "#sk-container-id-7 div.sk-toggleable__content.fitted {\n",
    349        "  /* fitted */\n",
    350        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    351        "}\n",
    352        "\n",
    353        "#sk-container-id-7 div.sk-toggleable__content pre {\n",
    354        "  margin: 0.2em;\n",
    355        "  border-radius: 0.25em;\n",
    356        "  color: var(--sklearn-color-text);\n",
    357        "  /* unfitted */\n",
    358        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    359        "}\n",
    360        "\n",
    361        "#sk-container-id-7 div.sk-toggleable__content.fitted pre {\n",
    362        "  /* unfitted */\n",
    363        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    364        "}\n",
    365        "\n",
    366        "#sk-container-id-7 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
    367        "  /* Expand drop-down */\n",
    368        "  max-height: 200px;\n",
    369        "  max-width: 100%;\n",
    370        "  overflow: auto;\n",
    371        "}\n",
    372        "\n",
    373        "#sk-container-id-7 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
    374        "  content: \"▾\";\n",
    375        "}\n",
    376        "\n",
    377        "/* Pipeline/ColumnTransformer-specific style */\n",
    378        "\n",
    379        "#sk-container-id-7 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    380        "  color: var(--sklearn-color-text);\n",
    381        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    382        "}\n",
    383        "\n",
    384        "#sk-container-id-7 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    385        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    386        "}\n",
    387        "\n",
    388        "/* Estimator-specific style */\n",
    389        "\n",
    390        "/* Colorize estimator box */\n",
    391        "#sk-container-id-7 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    392        "  /* unfitted */\n",
    393        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    394        "}\n",
    395        "\n",
    396        "#sk-container-id-7 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    397        "  /* fitted */\n",
    398        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    399        "}\n",
    400        "\n",
    401        "#sk-container-id-7 div.sk-label label.sk-toggleable__label,\n",
    402        "#sk-container-id-7 div.sk-label label {\n",
    403        "  /* The background is the default theme color */\n",
    404        "  color: var(--sklearn-color-text-on-default-background);\n",
    405        "}\n",
    406        "\n",
    407        "/* On hover, darken the color of the background */\n",
    408        "#sk-container-id-7 div.sk-label:hover label.sk-toggleable__label {\n",
    409        "  color: var(--sklearn-color-text);\n",
    410        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    411        "}\n",
    412        "\n",
    413        "/* Label box, darken color on hover, fitted */\n",
    414        "#sk-container-id-7 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
    415        "  color: var(--sklearn-color-text);\n",
    416        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    417        "}\n",
    418        "\n",
    419        "/* Estimator label */\n",
    420        "\n",
    421        "#sk-container-id-7 div.sk-label label {\n",
    422        "  font-family: monospace;\n",
    423        "  font-weight: bold;\n",
    424        "  display: inline-block;\n",
    425        "  line-height: 1.2em;\n",
    426        "}\n",
    427        "\n",
    428        "#sk-container-id-7 div.sk-label-container {\n",
    429        "  text-align: center;\n",
    430        "}\n",
    431        "\n",
    432        "/* Estimator-specific */\n",
    433        "#sk-container-id-7 div.sk-estimator {\n",
    434        "  font-family: monospace;\n",
    435        "  border: 1px dotted var(--sklearn-color-border-box);\n",
    436        "  border-radius: 0.25em;\n",
    437        "  box-sizing: border-box;\n",
    438        "  margin-bottom: 0.5em;\n",
    439        "  /* unfitted */\n",
    440        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    441        "}\n",
    442        "\n",
    443        "#sk-container-id-7 div.sk-estimator.fitted {\n",
    444        "  /* fitted */\n",
    445        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    446        "}\n",
    447        "\n",
    448        "/* on hover */\n",
    449        "#sk-container-id-7 div.sk-estimator:hover {\n",
    450        "  /* unfitted */\n",
    451        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    452        "}\n",
    453        "\n",
    454        "#sk-container-id-7 div.sk-estimator.fitted:hover {\n",
    455        "  /* fitted */\n",
    456        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    457        "}\n",
    458        "\n",
    459        "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
    460        "\n",
    461        "/* Common style for \"i\" and \"?\" */\n",
    462        "\n",
    463        ".sk-estimator-doc-link,\n",
    464        "a:link.sk-estimator-doc-link,\n",
    465        "a:visited.sk-estimator-doc-link {\n",
    466        "  float: right;\n",
    467        "  font-size: smaller;\n",
    468        "  line-height: 1em;\n",
    469        "  font-family: monospace;\n",
    470        "  background-color: var(--sklearn-color-background);\n",
    471        "  border-radius: 1em;\n",
    472        "  height: 1em;\n",
    473        "  width: 1em;\n",
    474        "  text-decoration: none !important;\n",
    475        "  margin-left: 1ex;\n",
    476        "  /* unfitted */\n",
    477        "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
    478        "  color: var(--sklearn-color-unfitted-level-1);\n",
    479        "}\n",
    480        "\n",
    481        ".sk-estimator-doc-link.fitted,\n",
    482        "a:link.sk-estimator-doc-link.fitted,\n",
    483        "a:visited.sk-estimator-doc-link.fitted {\n",
    484        "  /* fitted */\n",
    485        "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
    486        "  color: var(--sklearn-color-fitted-level-1);\n",
    487        "}\n",
    488        "\n",
    489        "/* On hover */\n",
    490        "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
    491        ".sk-estimator-doc-link:hover,\n",
    492        "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
    493        ".sk-estimator-doc-link:hover {\n",
    494        "  /* unfitted */\n",
    495        "  background-color: var(--sklearn-color-unfitted-level-3);\n",
    496        "  color: var(--sklearn-color-background);\n",
    497        "  text-decoration: none;\n",
    498        "}\n",
    499        "\n",
    500        "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
    501        ".sk-estimator-doc-link.fitted:hover,\n",
    502        "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
    503        ".sk-estimator-doc-link.fitted:hover {\n",
    504        "  /* fitted */\n",
    505        "  background-color: var(--sklearn-color-fitted-level-3);\n",
    506        "  color: var(--sklearn-color-background);\n",
    507        "  text-decoration: none;\n",
    508        "}\n",
    509        "\n",
    510        "/* Span, style for the box shown on hovering the info icon */\n",
    511        ".sk-estimator-doc-link span {\n",
    512        "  display: none;\n",
    513        "  z-index: 9999;\n",
    514        "  position: relative;\n",
    515        "  font-weight: normal;\n",
    516        "  right: .2ex;\n",
    517        "  padding: .5ex;\n",
    518        "  margin: .5ex;\n",
    519        "  width: min-content;\n",
    520        "  min-width: 20ex;\n",
    521        "  max-width: 50ex;\n",
    522        "  color: var(--sklearn-color-text);\n",
    523        "  box-shadow: 2pt 2pt 4pt #999;\n",
    524        "  /* unfitted */\n",
    525        "  background: var(--sklearn-color-unfitted-level-0);\n",
    526        "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
    527        "}\n",
    528        "\n",
    529        ".sk-estimator-doc-link.fitted span {\n",
    530        "  /* fitted */\n",
    531        "  background: var(--sklearn-color-fitted-level-0);\n",
    532        "  border: var(--sklearn-color-fitted-level-3);\n",
    533        "}\n",
    534        "\n",
    535        ".sk-estimator-doc-link:hover span {\n",
    536        "  display: block;\n",
    537        "}\n",
    538        "\n",
    539        "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
    540        "\n",
    541        "#sk-container-id-7 a.estimator_doc_link {\n",
    542        "  float: right;\n",
    543        "  font-size: 1rem;\n",
    544        "  line-height: 1em;\n",
    545        "  font-family: monospace;\n",
    546        "  background-color: var(--sklearn-color-background);\n",
    547        "  border-radius: 1rem;\n",
    548        "  height: 1rem;\n",
    549        "  width: 1rem;\n",
    550        "  text-decoration: none;\n",
    551        "  /* unfitted */\n",
    552        "  color: var(--sklearn-color-unfitted-level-1);\n",
    553        "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
    554        "}\n",
    555        "\n",
    556        "#sk-container-id-7 a.estimator_doc_link.fitted {\n",
    557        "  /* fitted */\n",
    558        "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
    559        "  color: var(--sklearn-color-fitted-level-1);\n",
    560        "}\n",
    561        "\n",
    562        "/* On hover */\n",
    563        "#sk-container-id-7 a.estimator_doc_link:hover {\n",
    564        "  /* unfitted */\n",
    565        "  background-color: var(--sklearn-color-unfitted-level-3);\n",
    566        "  color: var(--sklearn-color-background);\n",
    567        "  text-decoration: none;\n",
    568        "}\n",
    569        "\n",
    570        "#sk-container-id-7 a.estimator_doc_link.fitted:hover {\n",
    571        "  /* fitted */\n",
    572        "  background-color: var(--sklearn-color-fitted-level-3);\n",
    573        "}\n",
    574        "</style><div id=\"sk-container-id-7\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;vect&#x27;, CountVectorizer()), (&#x27;clf&#x27;, MultinomialNB())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[(&#x27;vect&#x27;, CountVectorizer()), (&#x27;clf&#x27;, MultinomialNB())])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" ><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;CountVectorizer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html\">?<span>Documentation for CountVectorizer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>CountVectorizer()</pre></div> </div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-21\" type=\"checkbox\" ><label for=\"sk-estimator-id-21\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;MultinomialNB<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html\">?<span>Documentation for MultinomialNB</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>MultinomialNB()</pre></div> </div></div></div></div></div></div>"
    575       ],
    576       "text/plain": [
    577        "Pipeline(steps=[('vect', CountVectorizer()), ('clf', MultinomialNB())])"
    578       ]
    579      },
    580      "execution_count": 35,
    581      "metadata": {},
    582      "output_type": "execute_result"
    583     }
    584    ],
    585    "source": [
    586     "from sklearn.model_selection import train_test_split\n",
    587     "from sklearn.naive_bayes import MultinomialNB\n",
    588     "from sklearn.pipeline import Pipeline\n",
    589     "from sklearn.feature_extraction.text import CountVectorizer\n",
    590     "\n",
    591     "# First split for training and initial test split\n",
    592     "X_train, X_test, y_train, y_test = train_test_split(df['Email Text'], df['Email Type'])\n",
    593     "\n",
    594     "\n",
    595     "# Create pipeline\n",
    596     "pipeline = Pipeline([\n",
    597     "    ('vect', CountVectorizer()),  # Use CountVectorizer to convert text into token counts\n",
    598     "    ('clf', MultinomialNB()),     # Naive Bayes classifier\n",
    599     "])\n",
    600     "\n",
    601     "# Fit the model on the training data\n",
    602     "pipeline.fit(X_train, y_train)"
    603    ]
    604   },
    605   {
    606    "cell_type": "code",
    607    "execution_count": 36,
    608    "metadata": {},
    609    "outputs": [
    610     {
    611      "data": {
    612       "text/plain": [
    613        "0.9654432281605495"
    614       ]
    615      },
    616      "execution_count": 36,
    617      "metadata": {},
    618      "output_type": "execute_result"
    619     }
    620    ],
    621    "source": [
    622     "from sklearn.metrics import accuracy_score\n",
    623     "\n",
    624     "pred = pipeline.predict(X_test)\n",
    625     "accuracy_score(y_pred=pred, y_true=y_test)"
    626    ]
    627   }
    628  ],
    629  "metadata": {
    630   "kernelspec": {
    631    "display_name": ".venv",
    632    "language": "python",
    633    "name": "python3"
    634   },
    635   "language_info": {
    636    "codemirror_mode": {
    637     "name": "ipython",
    638     "version": 3
    639    },
    640    "file_extension": ".py",
    641    "mimetype": "text/x-python",
    642    "name": "python",
    643    "nbconvert_exporter": "python",
    644    "pygments_lexer": "ipython3",
    645    "version": "3.11.2"
    646   }
    647  },
    648  "nbformat": 4,
    649  "nbformat_minor": 2
    650 }