machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

PhishingClassification.ipynb (60987B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "https://www.kaggle.com/datasets/subhajournal/phishingemails"
      8    ]
      9   },
     10   {
     11    "cell_type": "code",
     12    "execution_count": 33,
     13    "metadata": {},
     14    "outputs": [
     15     {
     16      "data": {
     17       "text/html": [
     18        "<div>\n",
     19        "<style scoped>\n",
     20        "    .dataframe tbody tr th:only-of-type {\n",
     21        "        vertical-align: middle;\n",
     22        "    }\n",
     23        "\n",
     24        "    .dataframe tbody tr th {\n",
     25        "        vertical-align: top;\n",
     26        "    }\n",
     27        "\n",
     28        "    .dataframe thead th {\n",
     29        "        text-align: right;\n",
     30        "    }\n",
     31        "</style>\n",
     32        "<table border=\"1\" class=\"dataframe\">\n",
     33        "  <thead>\n",
     34        "    <tr style=\"text-align: right;\">\n",
     35        "      <th></th>\n",
     36        "      <th>Unnamed: 0</th>\n",
     37        "      <th>Email Text</th>\n",
     38        "      <th>Email Type</th>\n",
     39        "    </tr>\n",
     40        "  </thead>\n",
     41        "  <tbody>\n",
     42        "    <tr>\n",
     43        "      <th>0</th>\n",
     44        "      <td>0</td>\n",
     45        "      <td>re : 6 . 1100 , disc : uniformitarianism , re ...</td>\n",
     46        "      <td>Safe Email</td>\n",
     47        "    </tr>\n",
     48        "    <tr>\n",
     49        "      <th>1</th>\n",
     50        "      <td>1</td>\n",
     51        "      <td>the other side of * galicismos * * galicismo *...</td>\n",
     52        "      <td>Safe Email</td>\n",
     53        "    </tr>\n",
     54        "    <tr>\n",
     55        "      <th>2</th>\n",
     56        "      <td>2</td>\n",
     57        "      <td>re : equistar deal tickets are you still avail...</td>\n",
     58        "      <td>Safe Email</td>\n",
     59        "    </tr>\n",
     60        "    <tr>\n",
     61        "      <th>3</th>\n",
     62        "      <td>3</td>\n",
     63        "      <td>\\nHello I am your hot lil horny toy.\\n    I am...</td>\n",
     64        "      <td>Phishing Email</td>\n",
     65        "    </tr>\n",
     66        "    <tr>\n",
     67        "      <th>4</th>\n",
     68        "      <td>4</td>\n",
     69        "      <td>software at incredibly low prices ( 86 % lower...</td>\n",
     70        "      <td>Phishing Email</td>\n",
     71        "    </tr>\n",
     72        "    <tr>\n",
     73        "      <th>...</th>\n",
     74        "      <td>...</td>\n",
     75        "      <td>...</td>\n",
     76        "      <td>...</td>\n",
     77        "    </tr>\n",
     78        "    <tr>\n",
     79        "      <th>18645</th>\n",
     80        "      <td>18646</td>\n",
     81        "      <td>date a lonely housewife always wanted to date ...</td>\n",
     82        "      <td>Phishing Email</td>\n",
     83        "    </tr>\n",
     84        "    <tr>\n",
     85        "      <th>18646</th>\n",
     86        "      <td>18647</td>\n",
     87        "      <td>request submitted : access request for anita ....</td>\n",
     88        "      <td>Safe Email</td>\n",
     89        "    </tr>\n",
     90        "    <tr>\n",
     91        "      <th>18647</th>\n",
     92        "      <td>18648</td>\n",
     93        "      <td>re : important - prc mtg hi dorn &amp; john , as y...</td>\n",
     94        "      <td>Safe Email</td>\n",
     95        "    </tr>\n",
     96        "    <tr>\n",
     97        "      <th>18648</th>\n",
     98        "      <td>18649</td>\n",
     99        "      <td>press clippings - letter on californian utilit...</td>\n",
    100        "      <td>Safe Email</td>\n",
    101        "    </tr>\n",
    102        "    <tr>\n",
    103        "      <th>18649</th>\n",
    104        "      <td>18650</td>\n",
    105        "      <td>empty</td>\n",
    106        "      <td>Phishing Email</td>\n",
    107        "    </tr>\n",
    108        "  </tbody>\n",
    109        "</table>\n",
    110        "<p>18650 rows × 3 columns</p>\n",
    111        "</div>"
    112       ],
    113       "text/plain": [
    114        "       Unnamed: 0                                         Email Text  \\\n",
    115        "0               0  re : 6 . 1100 , disc : uniformitarianism , re ...   \n",
    116        "1               1  the other side of * galicismos * * galicismo *...   \n",
    117        "2               2  re : equistar deal tickets are you still avail...   \n",
    118        "3               3  \\nHello I am your hot lil horny toy.\\n    I am...   \n",
    119        "4               4  software at incredibly low prices ( 86 % lower...   \n",
    120        "...           ...                                                ...   \n",
    121        "18645       18646  date a lonely housewife always wanted to date ...   \n",
    122        "18646       18647  request submitted : access request for anita ....   \n",
    123        "18647       18648  re : important - prc mtg hi dorn & john , as y...   \n",
    124        "18648       18649  press clippings - letter on californian utilit...   \n",
    125        "18649       18650                                              empty   \n",
    126        "\n",
    127        "           Email Type  \n",
    128        "0          Safe Email  \n",
    129        "1          Safe Email  \n",
    130        "2          Safe Email  \n",
    131        "3      Phishing Email  \n",
    132        "4      Phishing Email  \n",
    133        "...               ...  \n",
    134        "18645  Phishing Email  \n",
    135        "18646      Safe Email  \n",
    136        "18647      Safe Email  \n",
    137        "18648      Safe Email  \n",
    138        "18649  Phishing Email  \n",
    139        "\n",
    140        "[18650 rows x 3 columns]"
    141       ]
    142      },
    143      "execution_count": 33,
    144      "metadata": {},
    145      "output_type": "execute_result"
    146     }
    147    ],
    148    "source": [
    149     "import pandas as pd \n",
    150     "\n",
    151     "df = pd.read_csv('../datasets/phishing/Phishing_Email.csv')\n",
    152     "df"
    153    ]
    154   },
    155   {
    156    "cell_type": "code",
    157    "execution_count": 34,
    158    "metadata": {},
    159    "outputs": [],
    160    "source": [
    161     "from sklearn.naive_bayes import CategoricalNB\n",
    162     "import keras\n",
    163     "import numpy as np\n",
    164     "\n",
    165     "def toStr(inp):\n",
    166     "    return str(inp)\n",
    167     "\n",
    168     "layer = keras.layers.TextVectorization()\n",
    169     "\n",
    170     "df['Email Text'] = df['Email Text'].apply(toStr)\n",
    171     "arr = ' '.join(np.array(df['Email Text']).tolist())\n",
    172     "layer.adapt(arr)"
    173    ]
    174   },
    175   {
    176    "cell_type": "code",
    177    "execution_count": 35,
    178    "metadata": {},
    179    "outputs": [],
    180    "source": [
    181     "from sklearn.model_selection import train_test_split\n",
    182     "\n",
    183     "X_train, X_test, y_train, y_test = train_test_split(df.drop('Email Type', axis=1), df['Email Type'])\n",
    184     "X_val , X_test , y_val , y_test = train_test_split(X_test, y_test, test_size=.5)"
    185    ]
    186   },
    187   {
    188    "cell_type": "code",
    189    "execution_count": 36,
    190    "metadata": {},
    191    "outputs": [
    192     {
    193      "data": {
    194       "text/plain": [
    195        "191673"
    196       ]
    197      },
    198      "execution_count": 36,
    199      "metadata": {},
    200      "output_type": "execute_result"
    201     }
    202    ],
    203    "source": [
    204     "# All the words vectorized\n",
    205     "len(layer.get_vocabulary())"
    206    ]
    207   },
    208   {
    209    "cell_type": "code",
    210    "execution_count": 37,
    211    "metadata": {},
    212    "outputs": [
    213     {
    214      "data": {
    215       "text/plain": [
    216        "<tf.Tensor: shape=(1, 5), dtype=int64, numpy=array([[    2, 12030,     9,    12, 10064]])>"
    217       ]
    218      },
    219      "execution_count": 37,
    220      "metadata": {},
    221      "output_type": "execute_result"
    222     }
    223    ],
    224    "source": [
    225     "layer(['the fuck is that shit'])"
    226    ]
    227   },
    228   {
    229    "cell_type": "code",
    230    "execution_count": 38,
    231    "metadata": {},
    232    "outputs": [
    233     {
    234      "ename": "KeyboardInterrupt",
    235      "evalue": "",
    236      "output_type": "error",
    237      "traceback": [
    238       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
    239       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
    240       "Cell \u001b[0;32mIn[38], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapplyLayer\u001b[39m(text):\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(layer(text))\n\u001b[0;32m----> 4\u001b[0m X_train[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEmail Text\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mX_train\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mEmail Text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapplyLayer\u001b[49m\u001b[43m)\u001b[49m\n",
    241       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/series.py:4924\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m   4789\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m   4790\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   4791\u001b[0m     func: AggFuncType,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   4796\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m   4797\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m   4798\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   4799\u001b[0m \u001b[38;5;124;03m    Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m   4800\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   4915\u001b[0m \u001b[38;5;124;03m    dtype: float64\u001b[39;00m\n\u001b[1;32m   4916\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m   4917\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   4918\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4919\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4920\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4921\u001b[0m \u001b[43m        \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4922\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4923\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4924\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
    242       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/apply.py:1427\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1424\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m   1426\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1427\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
    243       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/apply.py:1507\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1501\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m   1504\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m   1505\u001b[0m \u001b[38;5;66;03m#  Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m   1506\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1507\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1508\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m   1509\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1511\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m   1512\u001b[0m     \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m   1513\u001b[0m     \u001b[38;5;66;03m#  See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m   1514\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
    244       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m    918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m    919\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n",
    245       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/algorithms.py:1743\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m   1741\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m   1742\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1743\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1744\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1745\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m   1746\u001b[0m         values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m   1747\u001b[0m     )\n",
    246       "File \u001b[0;32mlib.pyx:2972\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
    247       "Cell \u001b[0;32mIn[38], line 2\u001b[0m, in \u001b[0;36mapplyLayer\u001b[0;34m(text)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapplyLayer\u001b[39m(text):\n\u001b[0;32m----> 2\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(\u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m)\u001b[49m)\n",
    248       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:117\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    115\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    116\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    118\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    119\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
    249       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/layers/layer.py:846\u001b[0m, in \u001b[0;36mLayer.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    844\u001b[0m         outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    845\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 846\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    847\u001b[0m \u001b[38;5;66;03m# Change the layout for the layer output if needed.\u001b[39;00m\n\u001b[1;32m    848\u001b[0m \u001b[38;5;66;03m# This is useful for relayout intermediate tensor in the model\u001b[39;00m\n\u001b[1;32m    849\u001b[0m \u001b[38;5;66;03m# to achieve the optimal performance.\u001b[39;00m\n\u001b[1;32m    850\u001b[0m distribution \u001b[38;5;241m=\u001b[39m distribution_lib\u001b[38;5;241m.\u001b[39mdistribution()\n",
    250       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:117\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    115\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    116\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    118\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    119\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
    251       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/ops/operation.py:48\u001b[0m, in \u001b[0;36mOperation.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     43\u001b[0m             call_fn \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcall\n\u001b[1;32m     44\u001b[0m     call_fn \u001b[38;5;241m=\u001b[39m traceback_utils\u001b[38;5;241m.\u001b[39minject_argument_info_in_traceback(\n\u001b[1;32m     45\u001b[0m         call_fn,\n\u001b[1;32m     46\u001b[0m         object_name\u001b[38;5;241m=\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.call()\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m     47\u001b[0m     )\n\u001b[0;32m---> 48\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     50\u001b[0m \u001b[38;5;66;03m# Plain flow.\u001b[39;00m\n\u001b[1;32m     51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m any_symbolic_tensors(args, kwargs):\n",
    252       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:156\u001b[0m, in \u001b[0;36minject_argument_info_in_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    154\u001b[0m bound_signature \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    155\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 156\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    157\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    158\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_keras_call_info_injected\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    159\u001b[0m         \u001b[38;5;66;03m# Only inject info for the innermost failing call\u001b[39;00m\n",
    253       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/text_vectorization.py:574\u001b[0m, in \u001b[0;36mTextVectorization.call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m    569\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\n\u001b[1;32m    570\u001b[0m     inputs, (tf\u001b[38;5;241m.\u001b[39mTensor, tf\u001b[38;5;241m.\u001b[39mRaggedTensor, np\u001b[38;5;241m.\u001b[39mndarray, \u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)\n\u001b[1;32m    571\u001b[0m ):\n\u001b[1;32m    572\u001b[0m     inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mconvert_to_tensor(backend\u001b[38;5;241m.\u001b[39mconvert_to_numpy(inputs))\n\u001b[0;32m--> 574\u001b[0m inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_preprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    576\u001b[0m \u001b[38;5;66;03m# If we're not doing any output processing, return right away.\u001b[39;00m\n\u001b[1;32m    577\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_mode \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
    254       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/text_vectorization.py:553\u001b[0m, in \u001b[0;36mTextVectorization._preprocess\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m    549\u001b[0m         inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39msqueeze(inputs, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m    550\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_split \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwhitespace\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    551\u001b[0m     \u001b[38;5;66;03m# This treats multiple whitespaces as one whitespace, and strips\u001b[39;00m\n\u001b[1;32m    552\u001b[0m     \u001b[38;5;66;03m# leading and trailing whitespace.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m     inputs \u001b[38;5;241m=\u001b[39m \u001b[43mtf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    554\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_split \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcharacter\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    555\u001b[0m     inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mstrings\u001b[38;5;241m.\u001b[39municode_split(inputs, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUTF-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
    255       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    152\u001b[0m   filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
    256       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260\u001b[0m, in \u001b[0;36madd_dispatch_support.<locals>.decorator.<locals>.op_dispatch_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m   1258\u001b[0m \u001b[38;5;66;03m# Fallback dispatch system (dispatch v1):\u001b[39;00m\n\u001b[1;32m   1259\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1260\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdispatch_target\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1261\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n\u001b[1;32m   1262\u001b[0m   \u001b[38;5;66;03m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[39;00m\n\u001b[1;32m   1263\u001b[0m   \u001b[38;5;66;03m# TypeError, when given unexpected types.  So we need to catch both.\u001b[39;00m\n\u001b[1;32m   1264\u001b[0m   result \u001b[38;5;241m=\u001b[39m dispatch(op_dispatch_handler, args, kwargs)\n",
    257       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/ragged/ragged_string_ops.py:514\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(input, sep, maxsplit, name)\u001b[0m\n\u001b[1;32m    512\u001b[0m rank \u001b[38;5;241m=\u001b[39m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;241m.\u001b[39mndims\n\u001b[1;32m    513\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m rank \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 514\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstring_split_v2\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray_ops_stack\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstack\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    515\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m rank \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    516\u001b[0m   sparse_result \u001b[38;5;241m=\u001b[39m string_ops\u001b[38;5;241m.\u001b[39mstring_split_v2(\n\u001b[1;32m    517\u001b[0m       \u001b[38;5;28minput\u001b[39m, sep\u001b[38;5;241m=\u001b[39msep, maxsplit\u001b[38;5;241m=\u001b[39mmaxsplit)\n",
    258       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    152\u001b[0m   filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
    259       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260\u001b[0m, in \u001b[0;36madd_dispatch_support.<locals>.decorator.<locals>.op_dispatch_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m   1258\u001b[0m \u001b[38;5;66;03m# Fallback dispatch system (dispatch v1):\u001b[39;00m\n\u001b[1;32m   1259\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1260\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdispatch_target\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1261\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n\u001b[1;32m   1262\u001b[0m   \u001b[38;5;66;03m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[39;00m\n\u001b[1;32m   1263\u001b[0m   \u001b[38;5;66;03m# TypeError, when given unexpected types.  So we need to catch both.\u001b[39;00m\n\u001b[1;32m   1264\u001b[0m   result \u001b[38;5;241m=\u001b[39m dispatch(op_dispatch_handler, args, kwargs)\n",
    260       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/ragged/ragged_string_ops.py:516\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(input, sep, maxsplit, name)\u001b[0m\n\u001b[1;32m    514\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m string_split_v2(array_ops_stack\u001b[38;5;241m.\u001b[39mstack([\u001b[38;5;28minput\u001b[39m]), sep, maxsplit)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    515\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m rank \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 516\u001b[0m   sparse_result \u001b[38;5;241m=\u001b[39m \u001b[43mstring_ops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstring_split_v2\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    517\u001b[0m \u001b[43m      \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    518\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m ragged_tensor\u001b[38;5;241m.\u001b[39mRaggedTensor\u001b[38;5;241m.\u001b[39mfrom_value_rowids(\n\u001b[1;32m    519\u001b[0m       values\u001b[38;5;241m=\u001b[39msparse_result\u001b[38;5;241m.\u001b[39mvalues,\n\u001b[1;32m    520\u001b[0m       value_rowids\u001b[38;5;241m=\u001b[39msparse_result\u001b[38;5;241m.\u001b[39mindices[:, \u001b[38;5;241m0\u001b[39m],\n\u001b[1;32m    521\u001b[0m       nrows\u001b[38;5;241m=\u001b[39msparse_result\u001b[38;5;241m.\u001b[39mdense_shape[\u001b[38;5;241m0\u001b[39m],\n\u001b[1;32m    522\u001b[0m       validate\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m    523\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
    261       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/string_ops.py:283\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(source, sep, maxsplit)\u001b[0m\n\u001b[1;32m    280\u001b[0m sep \u001b[38;5;241m=\u001b[39m ops\u001b[38;5;241m.\u001b[39mconvert_to_tensor(sep, dtype\u001b[38;5;241m=\u001b[39mdtypes\u001b[38;5;241m.\u001b[39mstring)\n\u001b[1;32m    281\u001b[0m source \u001b[38;5;241m=\u001b[39m ops\u001b[38;5;241m.\u001b[39mconvert_to_tensor(source, dtype\u001b[38;5;241m=\u001b[39mdtypes\u001b[38;5;241m.\u001b[39mstring)\n\u001b[0;32m--> 283\u001b[0m indices, values, shape \u001b[38;5;241m=\u001b[39m \u001b[43mgen_string_ops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstring_split_v2\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    284\u001b[0m \u001b[43m    \u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    285\u001b[0m indices\u001b[38;5;241m.\u001b[39mset_shape([\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m2\u001b[39m])\n\u001b[1;32m    286\u001b[0m values\u001b[38;5;241m.\u001b[39mset_shape([\u001b[38;5;28;01mNone\u001b[39;00m])\n",
    262       "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/gen_string_ops.py:1379\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(input, sep, maxsplit, name)\u001b[0m\n\u001b[1;32m   1377\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tld\u001b[38;5;241m.\u001b[39mis_eager:\n\u001b[1;32m   1378\u001b[0m   \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1379\u001b[0m     _result \u001b[38;5;241m=\u001b[39m \u001b[43mpywrap_tfe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTFE_Py_FastPathExecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1380\u001b[0m \u001b[43m      \u001b[49m\u001b[43m_ctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mStringSplitV2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmaxsplit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1381\u001b[0m     _result \u001b[38;5;241m=\u001b[39m _StringSplitV2Output\u001b[38;5;241m.\u001b[39m_make(_result)\n\u001b[1;32m   1382\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m _result\n",
    263       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
    264      ]
    265     }
    266    ],
    267    "source": [
    268     "def applyLayer(text):\n",
    269     "    return np.array(layer(text))\n",
    270     "\n",
    271     "X_train['Email Text'] = X_train['Email Text'].apply(applyLayer)"
    272    ]
    273   },
    274   {
    275    "cell_type": "code",
    276    "execution_count": null,
    277    "metadata": {},
    278    "outputs": [
    279     {
    280      "data": {
    281       "text/html": [
    282        "<div>\n",
    283        "<style scoped>\n",
    284        "    .dataframe tbody tr th:only-of-type {\n",
    285        "        vertical-align: middle;\n",
    286        "    }\n",
    287        "\n",
    288        "    .dataframe tbody tr th {\n",
    289        "        vertical-align: top;\n",
    290        "    }\n",
    291        "\n",
    292        "    .dataframe thead th {\n",
    293        "        text-align: right;\n",
    294        "    }\n",
    295        "</style>\n",
    296        "<table border=\"1\" class=\"dataframe\">\n",
    297        "  <thead>\n",
    298        "    <tr style=\"text-align: right;\">\n",
    299        "      <th></th>\n",
    300        "      <th>Unnamed: 0</th>\n",
    301        "      <th>Email Text</th>\n",
    302        "    </tr>\n",
    303        "  </thead>\n",
    304        "  <tbody>\n",
    305        "    <tr>\n",
    306        "      <th>66</th>\n",
    307        "      <td>66</td>\n",
    308        "      <td>[1010, 1266, 922, 102, 188, 3053, 3093, 87, 6,...</td>\n",
    309        "    </tr>\n",
    310        "    <tr>\n",
    311        "      <th>13986</th>\n",
    312        "      <td>13987</td>\n",
    313        "      <td>[10171, 6568, 209, 54, 155, 5006, 6273, 11, 2,...</td>\n",
    314        "    </tr>\n",
    315        "    <tr>\n",
    316        "      <th>13711</th>\n",
    317        "      <td>13712</td>\n",
    318        "      <td>[132755, 243, 143884, 656, 186, 180977, 110, 2...</td>\n",
    319        "    </tr>\n",
    320        "    <tr>\n",
    321        "      <th>9031</th>\n",
    322        "      <td>9032</td>\n",
    323        "      <td>[2859, 177, 3753, 2, 545, 8, 303, 8, 2, 2859, ...</td>\n",
    324        "    </tr>\n",
    325        "    <tr>\n",
    326        "      <th>8512</th>\n",
    327        "      <td>8513</td>\n",
    328        "      <td>[934, 934, 877, 13, 580, 129, 121, 65, 4, 336,...</td>\n",
    329        "    </tr>\n",
    330        "  </tbody>\n",
    331        "</table>\n",
    332        "</div>"
    333       ],
    334       "text/plain": [
    335        "       Unnamed: 0                                         Email Text\n",
    336        "66             66  [1010, 1266, 922, 102, 188, 3053, 3093, 87, 6,...\n",
    337        "13986       13987  [10171, 6568, 209, 54, 155, 5006, 6273, 11, 2,...\n",
    338        "13711       13712  [132755, 243, 143884, 656, 186, 180977, 110, 2...\n",
    339        "9031         9032  [2859, 177, 3753, 2, 545, 8, 303, 8, 2, 2859, ...\n",
    340        "8512         8513  [934, 934, 877, 13, 580, 129, 121, 65, 4, 336,..."
    341       ]
    342      },
    343      "execution_count": 7,
    344      "metadata": {},
    345      "output_type": "execute_result"
    346     }
    347    ],
    348    "source": [
    349     "X_train.head()"
    350    ]
    351   },
    352   {
    353    "cell_type": "code",
    354    "execution_count": null,
    355    "metadata": {},
    356    "outputs": [],
    357    "source": [
    358     "from keras.preprocessing.sequence import pad_sequences\n",
    359     "X_train['Email Text'] = list(pad_sequences(X_train['Email Text'], maxlen=100, padding='post'))"
    360    ]
    361   },
    362   {
    363    "cell_type": "code",
    364    "execution_count": null,
    365    "metadata": {},
    366    "outputs": [
    367     {
    368      "data": {
    369       "text/plain": [
    370        "66       [1010, 1266, 922, 102, 188, 3053, 3093, 87, 6,...\n",
    371        "13986    [10171, 6568, 209, 54, 155, 5006, 6273, 11, 2,...\n",
    372        "13711    [132755, 243, 143884, 656, 186, 180977, 110, 2...\n",
    373        "9031     [2859, 177, 3753, 2, 545, 8, 303, 8, 2, 2859, ...\n",
    374        "8512     [934, 934, 877, 13, 580, 129, 121, 65, 4, 336,...\n",
    375        "                               ...                        \n",
    376        "682      [492, 816, 85, 753, 5, 492, 29, 370, 65, 57, 1...\n",
    377        "10530    [1127, 21, 1804, 52, 13208, 12221, 84, 4, 417,...\n",
    378        "17343    [14, 36, 1512, 127, 6643, 2, 18028, 12, 1303, ...\n",
    379        "14529    [11, 38, 5, 8606, 11, 733, 17797, 18999, 15767...\n",
    380        "6313     [148, 295, 12, 18, 70, 268, 8119, 621, 1001, 6...\n",
    381        "Name: Email Text, Length: 13987, dtype: object"
    382       ]
    383      },
    384      "execution_count": 9,
    385      "metadata": {},
    386      "output_type": "execute_result"
    387     }
    388    ],
    389    "source": [
    390     "X_train['Email Text']"
    391    ]
    392   },
    393   {
    394    "cell_type": "code",
    395    "execution_count": null,
    396    "metadata": {},
    397    "outputs": [],
    398    "source": [
    399     "from sklearn.naive_bayes import CategoricalNB\n",
    400     "\n",
    401     "cat_nb = CategoricalNB()\n",
    402     "X_train_padded_flat = np.array(X_train['Email Text'].tolist())\n",
    403     "\n",
    404     "\n",
    405     "def isPhish(txt):\n",
    406     "    if txt == 'Phishing Email':\n",
    407     "        return 1\n",
    408     "    else:\n",
    409     "        return 0\n",
    410     "\n",
    411     "y_train = y_train.apply(isPhish)"
    412    ]
    413   },
    414   {
    415    "cell_type": "code",
    416    "execution_count": null,
    417    "metadata": {},
    418    "outputs": [
    419     {
    420      "data": {
    421       "text/html": [
    422        "<style>#sk-container-id-1 {\n",
    423        "  /* Definition of color scheme common for light and dark mode */\n",
    424        "  --sklearn-color-text: black;\n",
    425        "  --sklearn-color-line: gray;\n",
    426        "  /* Definition of color scheme for unfitted estimators */\n",
    427        "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
    428        "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
    429        "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
    430        "  --sklearn-color-unfitted-level-3: chocolate;\n",
    431        "  /* Definition of color scheme for fitted estimators */\n",
    432        "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
    433        "  --sklearn-color-fitted-level-1: #d4ebff;\n",
    434        "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
    435        "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
    436        "\n",
    437        "  /* Specific color for light theme */\n",
    438        "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
    439        "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
    440        "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
    441        "  --sklearn-color-icon: #696969;\n",
    442        "\n",
    443        "  @media (prefers-color-scheme: dark) {\n",
    444        "    /* Redefinition of color scheme for dark theme */\n",
    445        "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
    446        "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
    447        "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
    448        "    --sklearn-color-icon: #878787;\n",
    449        "  }\n",
    450        "}\n",
    451        "\n",
    452        "#sk-container-id-1 {\n",
    453        "  color: var(--sklearn-color-text);\n",
    454        "}\n",
    455        "\n",
    456        "#sk-container-id-1 pre {\n",
    457        "  padding: 0;\n",
    458        "}\n",
    459        "\n",
    460        "#sk-container-id-1 input.sk-hidden--visually {\n",
    461        "  border: 0;\n",
    462        "  clip: rect(1px 1px 1px 1px);\n",
    463        "  clip: rect(1px, 1px, 1px, 1px);\n",
    464        "  height: 1px;\n",
    465        "  margin: -1px;\n",
    466        "  overflow: hidden;\n",
    467        "  padding: 0;\n",
    468        "  position: absolute;\n",
    469        "  width: 1px;\n",
    470        "}\n",
    471        "\n",
    472        "#sk-container-id-1 div.sk-dashed-wrapped {\n",
    473        "  border: 1px dashed var(--sklearn-color-line);\n",
    474        "  margin: 0 0.4em 0.5em 0.4em;\n",
    475        "  box-sizing: border-box;\n",
    476        "  padding-bottom: 0.4em;\n",
    477        "  background-color: var(--sklearn-color-background);\n",
    478        "}\n",
    479        "\n",
    480        "#sk-container-id-1 div.sk-container {\n",
    481        "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
    482        "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
    483        "     so we also need the `!important` here to be able to override the\n",
    484        "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
    485        "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
    486        "  display: inline-block !important;\n",
    487        "  position: relative;\n",
    488        "}\n",
    489        "\n",
    490        "#sk-container-id-1 div.sk-text-repr-fallback {\n",
    491        "  display: none;\n",
    492        "}\n",
    493        "\n",
    494        "div.sk-parallel-item,\n",
    495        "div.sk-serial,\n",
    496        "div.sk-item {\n",
    497        "  /* draw centered vertical line to link estimators */\n",
    498        "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
    499        "  background-size: 2px 100%;\n",
    500        "  background-repeat: no-repeat;\n",
    501        "  background-position: center center;\n",
    502        "}\n",
    503        "\n",
    504        "/* Parallel-specific style estimator block */\n",
    505        "\n",
    506        "#sk-container-id-1 div.sk-parallel-item::after {\n",
    507        "  content: \"\";\n",
    508        "  width: 100%;\n",
    509        "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
    510        "  flex-grow: 1;\n",
    511        "}\n",
    512        "\n",
    513        "#sk-container-id-1 div.sk-parallel {\n",
    514        "  display: flex;\n",
    515        "  align-items: stretch;\n",
    516        "  justify-content: center;\n",
    517        "  background-color: var(--sklearn-color-background);\n",
    518        "  position: relative;\n",
    519        "}\n",
    520        "\n",
    521        "#sk-container-id-1 div.sk-parallel-item {\n",
    522        "  display: flex;\n",
    523        "  flex-direction: column;\n",
    524        "}\n",
    525        "\n",
    526        "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
    527        "  align-self: flex-end;\n",
    528        "  width: 50%;\n",
    529        "}\n",
    530        "\n",
    531        "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
    532        "  align-self: flex-start;\n",
    533        "  width: 50%;\n",
    534        "}\n",
    535        "\n",
    536        "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
    537        "  width: 0;\n",
    538        "}\n",
    539        "\n",
    540        "/* Serial-specific style estimator block */\n",
    541        "\n",
    542        "#sk-container-id-1 div.sk-serial {\n",
    543        "  display: flex;\n",
    544        "  flex-direction: column;\n",
    545        "  align-items: center;\n",
    546        "  background-color: var(--sklearn-color-background);\n",
    547        "  padding-right: 1em;\n",
    548        "  padding-left: 1em;\n",
    549        "}\n",
    550        "\n",
    551        "\n",
    552        "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
    553        "clickable and can be expanded/collapsed.\n",
    554        "- Pipeline and ColumnTransformer use this feature and define the default style\n",
    555        "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
    556        "*/\n",
    557        "\n",
    558        "/* Pipeline and ColumnTransformer style (default) */\n",
    559        "\n",
    560        "#sk-container-id-1 div.sk-toggleable {\n",
    561        "  /* Default theme specific background. It is overwritten whether we have a\n",
    562        "  specific estimator or a Pipeline/ColumnTransformer */\n",
    563        "  background-color: var(--sklearn-color-background);\n",
    564        "}\n",
    565        "\n",
    566        "/* Toggleable label */\n",
    567        "#sk-container-id-1 label.sk-toggleable__label {\n",
    568        "  cursor: pointer;\n",
    569        "  display: block;\n",
    570        "  width: 100%;\n",
    571        "  margin-bottom: 0;\n",
    572        "  padding: 0.5em;\n",
    573        "  box-sizing: border-box;\n",
    574        "  text-align: center;\n",
    575        "}\n",
    576        "\n",
    577        "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
    578        "  /* Arrow on the left of the label */\n",
    579        "  content: \"▸\";\n",
    580        "  float: left;\n",
    581        "  margin-right: 0.25em;\n",
    582        "  color: var(--sklearn-color-icon);\n",
    583        "}\n",
    584        "\n",
    585        "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
    586        "  color: var(--sklearn-color-text);\n",
    587        "}\n",
    588        "\n",
    589        "/* Toggleable content - dropdown */\n",
    590        "\n",
    591        "#sk-container-id-1 div.sk-toggleable__content {\n",
    592        "  max-height: 0;\n",
    593        "  max-width: 0;\n",
    594        "  overflow: hidden;\n",
    595        "  text-align: left;\n",
    596        "  /* unfitted */\n",
    597        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    598        "}\n",
    599        "\n",
    600        "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
    601        "  /* fitted */\n",
    602        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    603        "}\n",
    604        "\n",
    605        "#sk-container-id-1 div.sk-toggleable__content pre {\n",
    606        "  margin: 0.2em;\n",
    607        "  border-radius: 0.25em;\n",
    608        "  color: var(--sklearn-color-text);\n",
    609        "  /* unfitted */\n",
    610        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    611        "}\n",
    612        "\n",
    613        "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
    614        "  /* unfitted */\n",
    615        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    616        "}\n",
    617        "\n",
    618        "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
    619        "  /* Expand drop-down */\n",
    620        "  max-height: 200px;\n",
    621        "  max-width: 100%;\n",
    622        "  overflow: auto;\n",
    623        "}\n",
    624        "\n",
    625        "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
    626        "  content: \"▾\";\n",
    627        "}\n",
    628        "\n",
    629        "/* Pipeline/ColumnTransformer-specific style */\n",
    630        "\n",
    631        "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    632        "  color: var(--sklearn-color-text);\n",
    633        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    634        "}\n",
    635        "\n",
    636        "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    637        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    638        "}\n",
    639        "\n",
    640        "/* Estimator-specific style */\n",
    641        "\n",
    642        "/* Colorize estimator box */\n",
    643        "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    644        "  /* unfitted */\n",
    645        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    646        "}\n",
    647        "\n",
    648        "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
    649        "  /* fitted */\n",
    650        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    651        "}\n",
    652        "\n",
    653        "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
    654        "#sk-container-id-1 div.sk-label label {\n",
    655        "  /* The background is the default theme color */\n",
    656        "  color: var(--sklearn-color-text-on-default-background);\n",
    657        "}\n",
    658        "\n",
    659        "/* On hover, darken the color of the background */\n",
    660        "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
    661        "  color: var(--sklearn-color-text);\n",
    662        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    663        "}\n",
    664        "\n",
    665        "/* Label box, darken color on hover, fitted */\n",
    666        "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
    667        "  color: var(--sklearn-color-text);\n",
    668        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    669        "}\n",
    670        "\n",
    671        "/* Estimator label */\n",
    672        "\n",
    673        "#sk-container-id-1 div.sk-label label {\n",
    674        "  font-family: monospace;\n",
    675        "  font-weight: bold;\n",
    676        "  display: inline-block;\n",
    677        "  line-height: 1.2em;\n",
    678        "}\n",
    679        "\n",
    680        "#sk-container-id-1 div.sk-label-container {\n",
    681        "  text-align: center;\n",
    682        "}\n",
    683        "\n",
    684        "/* Estimator-specific */\n",
    685        "#sk-container-id-1 div.sk-estimator {\n",
    686        "  font-family: monospace;\n",
    687        "  border: 1px dotted var(--sklearn-color-border-box);\n",
    688        "  border-radius: 0.25em;\n",
    689        "  box-sizing: border-box;\n",
    690        "  margin-bottom: 0.5em;\n",
    691        "  /* unfitted */\n",
    692        "  background-color: var(--sklearn-color-unfitted-level-0);\n",
    693        "}\n",
    694        "\n",
    695        "#sk-container-id-1 div.sk-estimator.fitted {\n",
    696        "  /* fitted */\n",
    697        "  background-color: var(--sklearn-color-fitted-level-0);\n",
    698        "}\n",
    699        "\n",
    700        "/* on hover */\n",
    701        "#sk-container-id-1 div.sk-estimator:hover {\n",
    702        "  /* unfitted */\n",
    703        "  background-color: var(--sklearn-color-unfitted-level-2);\n",
    704        "}\n",
    705        "\n",
    706        "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
    707        "  /* fitted */\n",
    708        "  background-color: var(--sklearn-color-fitted-level-2);\n",
    709        "}\n",
    710        "\n",
    711        "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
    712        "\n",
    713        "/* Common style for \"i\" and \"?\" */\n",
    714        "\n",
    715        ".sk-estimator-doc-link,\n",
    716        "a:link.sk-estimator-doc-link,\n",
    717        "a:visited.sk-estimator-doc-link {\n",
    718        "  float: right;\n",
    719        "  font-size: smaller;\n",
    720        "  line-height: 1em;\n",
    721        "  font-family: monospace;\n",
    722        "  background-color: var(--sklearn-color-background);\n",
    723        "  border-radius: 1em;\n",
    724        "  height: 1em;\n",
    725        "  width: 1em;\n",
    726        "  text-decoration: none !important;\n",
    727        "  margin-left: 1ex;\n",
    728        "  /* unfitted */\n",
    729        "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
    730        "  color: var(--sklearn-color-unfitted-level-1);\n",
    731        "}\n",
    732        "\n",
    733        ".sk-estimator-doc-link.fitted,\n",
    734        "a:link.sk-estimator-doc-link.fitted,\n",
    735        "a:visited.sk-estimator-doc-link.fitted {\n",
    736        "  /* fitted */\n",
    737        "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
    738        "  color: var(--sklearn-color-fitted-level-1);\n",
    739        "}\n",
    740        "\n",
    741        "/* On hover */\n",
    742        "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
    743        ".sk-estimator-doc-link:hover,\n",
    744        "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
    745        ".sk-estimator-doc-link:hover {\n",
    746        "  /* unfitted */\n",
    747        "  background-color: var(--sklearn-color-unfitted-level-3);\n",
    748        "  color: var(--sklearn-color-background);\n",
    749        "  text-decoration: none;\n",
    750        "}\n",
    751        "\n",
    752        "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
    753        ".sk-estimator-doc-link.fitted:hover,\n",
    754        "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
    755        ".sk-estimator-doc-link.fitted:hover {\n",
    756        "  /* fitted */\n",
    757        "  background-color: var(--sklearn-color-fitted-level-3);\n",
    758        "  color: var(--sklearn-color-background);\n",
    759        "  text-decoration: none;\n",
    760        "}\n",
    761        "\n",
    762        "/* Span, style for the box shown on hovering the info icon */\n",
    763        ".sk-estimator-doc-link span {\n",
    764        "  display: none;\n",
    765        "  z-index: 9999;\n",
    766        "  position: relative;\n",
    767        "  font-weight: normal;\n",
    768        "  right: .2ex;\n",
    769        "  padding: .5ex;\n",
    770        "  margin: .5ex;\n",
    771        "  width: min-content;\n",
    772        "  min-width: 20ex;\n",
    773        "  max-width: 50ex;\n",
    774        "  color: var(--sklearn-color-text);\n",
    775        "  box-shadow: 2pt 2pt 4pt #999;\n",
    776        "  /* unfitted */\n",
    777        "  background: var(--sklearn-color-unfitted-level-0);\n",
    778        "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
    779        "}\n",
    780        "\n",
    781        ".sk-estimator-doc-link.fitted span {\n",
    782        "  /* fitted */\n",
    783        "  background: var(--sklearn-color-fitted-level-0);\n",
    784        "  border: var(--sklearn-color-fitted-level-3);\n",
    785        "}\n",
    786        "\n",
    787        ".sk-estimator-doc-link:hover span {\n",
    788        "  display: block;\n",
    789        "}\n",
    790        "\n",
    791        "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
    792        "\n",
    793        "#sk-container-id-1 a.estimator_doc_link {\n",
    794        "  float: right;\n",
    795        "  font-size: 1rem;\n",
    796        "  line-height: 1em;\n",
    797        "  font-family: monospace;\n",
    798        "  background-color: var(--sklearn-color-background);\n",
    799        "  border-radius: 1rem;\n",
    800        "  height: 1rem;\n",
    801        "  width: 1rem;\n",
    802        "  text-decoration: none;\n",
    803        "  /* unfitted */\n",
    804        "  color: var(--sklearn-color-unfitted-level-1);\n",
    805        "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
    806        "}\n",
    807        "\n",
    808        "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
    809        "  /* fitted */\n",
    810        "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
    811        "  color: var(--sklearn-color-fitted-level-1);\n",
    812        "}\n",
    813        "\n",
    814        "/* On hover */\n",
    815        "#sk-container-id-1 a.estimator_doc_link:hover {\n",
    816        "  /* unfitted */\n",
    817        "  background-color: var(--sklearn-color-unfitted-level-3);\n",
    818        "  color: var(--sklearn-color-background);\n",
    819        "  text-decoration: none;\n",
    820        "}\n",
    821        "\n",
    822        "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
    823        "  /* fitted */\n",
    824        "  background-color: var(--sklearn-color-fitted-level-3);\n",
    825        "}\n",
    826        "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>CategoricalNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;CategoricalNB<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.CategoricalNB.html\">?<span>Documentation for CategoricalNB</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>CategoricalNB()</pre></div> </div></div></div></div>"
    827       ],
    828       "text/plain": [
    829        "CategoricalNB()"
    830       ]
    831      },
    832      "execution_count": 11,
    833      "metadata": {},
    834      "output_type": "execute_result"
    835     }
    836    ],
    837    "source": [
    838     "cat_nb.fit(X_train_padded_flat, y_train)"
    839    ]
    840   },
    841   {
    842    "cell_type": "code",
    843    "execution_count": null,
    844    "metadata": {},
    845    "outputs": [],
    846    "source": [
    847     "X_test['Email Text'] = X_test['Email Text'].apply(applyLayer)\n",
    848     "X_test['Email Text'] = list(pad_sequences(X_test['Email Text'], maxlen=5000, padding='post'))\n",
    849     "X_test = np.array(X_test['Email Text'].tolist())"
    850    ]
    851   },
    852   {
    853    "cell_type": "code",
    854    "execution_count": null,
    855    "metadata": {},
    856    "outputs": [
    857     {
    858      "data": {
    859       "text/plain": [
    860        "array([0])"
    861       ]
    862      },
    863      "execution_count": 31,
    864      "metadata": {},
    865      "output_type": "execute_result"
    866     }
    867    ],
    868    "source": [
    869     "cat_nb.predict([X_test[1000]])"
    870    ]
    871   },
    872   {
    873    "cell_type": "code",
    874    "execution_count": null,
    875    "metadata": {},
    876    "outputs": [
    877     {
    878      "data": {
    879       "text/plain": [
    880        "'Phishing Email'"
    881       ]
    882      },
    883      "execution_count": 32,
    884      "metadata": {},
    885      "output_type": "execute_result"
    886     }
    887    ],
    888    "source": [
    889     "y_test.iloc[1000]"
    890    ]
    891   }
    892  ],
    893  "metadata": {
    894   "kernelspec": {
    895    "display_name": ".venv",
    896    "language": "python",
    897    "name": "python3"
    898   },
    899   "language_info": {
    900    "codemirror_mode": {
    901     "name": "ipython",
    902     "version": 3
    903    },
    904    "file_extension": ".py",
    905    "mimetype": "text/x-python",
    906    "name": "python",
    907    "nbconvert_exporter": "python",
    908    "pygments_lexer": "ipython3",
    909    "version": "3.11.2"
    910   }
    911  },
    912  "nbformat": 4,
    913  "nbformat_minor": 2
    914 }