PhishingClassification.ipynb (60987B)
1 { 2 "cells": [ 3 { 4 "cell_type": "markdown", 5 "metadata": {}, 6 "source": [ 7 "https://www.kaggle.com/datasets/subhajournal/phishingemails" 8 ] 9 }, 10 { 11 "cell_type": "code", 12 "execution_count": 33, 13 "metadata": {}, 14 "outputs": [ 15 { 16 "data": { 17 "text/html": [ 18 "<div>\n", 19 "<style scoped>\n", 20 " .dataframe tbody tr th:only-of-type {\n", 21 " vertical-align: middle;\n", 22 " }\n", 23 "\n", 24 " .dataframe tbody tr th {\n", 25 " vertical-align: top;\n", 26 " }\n", 27 "\n", 28 " .dataframe thead th {\n", 29 " text-align: right;\n", 30 " }\n", 31 "</style>\n", 32 "<table border=\"1\" class=\"dataframe\">\n", 33 " <thead>\n", 34 " <tr style=\"text-align: right;\">\n", 35 " <th></th>\n", 36 " <th>Unnamed: 0</th>\n", 37 " <th>Email Text</th>\n", 38 " <th>Email Type</th>\n", 39 " </tr>\n", 40 " </thead>\n", 41 " <tbody>\n", 42 " <tr>\n", 43 " <th>0</th>\n", 44 " <td>0</td>\n", 45 " <td>re : 6 . 1100 , disc : uniformitarianism , re ...</td>\n", 46 " <td>Safe Email</td>\n", 47 " </tr>\n", 48 " <tr>\n", 49 " <th>1</th>\n", 50 " <td>1</td>\n", 51 " <td>the other side of * galicismos * * galicismo *...</td>\n", 52 " <td>Safe Email</td>\n", 53 " </tr>\n", 54 " <tr>\n", 55 " <th>2</th>\n", 56 " <td>2</td>\n", 57 " <td>re : equistar deal tickets are you still avail...</td>\n", 58 " <td>Safe Email</td>\n", 59 " </tr>\n", 60 " <tr>\n", 61 " <th>3</th>\n", 62 " <td>3</td>\n", 63 " <td>\\nHello I am your hot lil horny toy.\\n I am...</td>\n", 64 " <td>Phishing Email</td>\n", 65 " </tr>\n", 66 " <tr>\n", 67 " <th>4</th>\n", 68 " <td>4</td>\n", 69 " <td>software at incredibly low prices ( 86 % lower...</td>\n", 70 " <td>Phishing Email</td>\n", 71 " </tr>\n", 72 " <tr>\n", 73 " <th>...</th>\n", 74 " <td>...</td>\n", 75 " <td>...</td>\n", 76 " <td>...</td>\n", 77 " </tr>\n", 78 " <tr>\n", 79 " <th>18645</th>\n", 80 " <td>18646</td>\n", 81 " <td>date a lonely housewife always wanted to date ...</td>\n", 82 " <td>Phishing Email</td>\n", 83 " </tr>\n", 84 " <tr>\n", 85 " <th>18646</th>\n", 86 " <td>18647</td>\n", 87 " <td>request submitted : access request for anita ....</td>\n", 88 " <td>Safe Email</td>\n", 89 " </tr>\n", 90 " <tr>\n", 91 " <th>18647</th>\n", 92 " <td>18648</td>\n", 93 " <td>re : important - prc mtg hi dorn & john , as y...</td>\n", 94 " <td>Safe Email</td>\n", 95 " </tr>\n", 96 " <tr>\n", 97 " <th>18648</th>\n", 98 " <td>18649</td>\n", 99 " <td>press clippings - letter on californian utilit...</td>\n", 100 " <td>Safe Email</td>\n", 101 " </tr>\n", 102 " <tr>\n", 103 " <th>18649</th>\n", 104 " <td>18650</td>\n", 105 " <td>empty</td>\n", 106 " <td>Phishing Email</td>\n", 107 " </tr>\n", 108 " </tbody>\n", 109 "</table>\n", 110 "<p>18650 rows × 3 columns</p>\n", 111 "</div>" 112 ], 113 "text/plain": [ 114 " Unnamed: 0 Email Text \\\n", 115 "0 0 re : 6 . 1100 , disc : uniformitarianism , re ... \n", 116 "1 1 the other side of * galicismos * * galicismo *... \n", 117 "2 2 re : equistar deal tickets are you still avail... \n", 118 "3 3 \\nHello I am your hot lil horny toy.\\n I am... \n", 119 "4 4 software at incredibly low prices ( 86 % lower... \n", 120 "... ... ... \n", 121 "18645 18646 date a lonely housewife always wanted to date ... \n", 122 "18646 18647 request submitted : access request for anita .... \n", 123 "18647 18648 re : important - prc mtg hi dorn & john , as y... \n", 124 "18648 18649 press clippings - letter on californian utilit... \n", 125 "18649 18650 empty \n", 126 "\n", 127 " Email Type \n", 128 "0 Safe Email \n", 129 "1 Safe Email \n", 130 "2 Safe Email \n", 131 "3 Phishing Email \n", 132 "4 Phishing Email \n", 133 "... ... \n", 134 "18645 Phishing Email \n", 135 "18646 Safe Email \n", 136 "18647 Safe Email \n", 137 "18648 Safe Email \n", 138 "18649 Phishing Email \n", 139 "\n", 140 "[18650 rows x 3 columns]" 141 ] 142 }, 143 "execution_count": 33, 144 "metadata": {}, 145 "output_type": "execute_result" 146 } 147 ], 148 "source": [ 149 "import pandas as pd \n", 150 "\n", 151 "df = pd.read_csv('../datasets/phishing/Phishing_Email.csv')\n", 152 "df" 153 ] 154 }, 155 { 156 "cell_type": "code", 157 "execution_count": 34, 158 "metadata": {}, 159 "outputs": [], 160 "source": [ 161 "from sklearn.naive_bayes import CategoricalNB\n", 162 "import keras\n", 163 "import numpy as np\n", 164 "\n", 165 "def toStr(inp):\n", 166 " return str(inp)\n", 167 "\n", 168 "layer = keras.layers.TextVectorization()\n", 169 "\n", 170 "df['Email Text'] = df['Email Text'].apply(toStr)\n", 171 "arr = ' '.join(np.array(df['Email Text']).tolist())\n", 172 "layer.adapt(arr)" 173 ] 174 }, 175 { 176 "cell_type": "code", 177 "execution_count": 35, 178 "metadata": {}, 179 "outputs": [], 180 "source": [ 181 "from sklearn.model_selection import train_test_split\n", 182 "\n", 183 "X_train, X_test, y_train, y_test = train_test_split(df.drop('Email Type', axis=1), df['Email Type'])\n", 184 "X_val , X_test , y_val , y_test = train_test_split(X_test, y_test, test_size=.5)" 185 ] 186 }, 187 { 188 "cell_type": "code", 189 "execution_count": 36, 190 "metadata": {}, 191 "outputs": [ 192 { 193 "data": { 194 "text/plain": [ 195 "191673" 196 ] 197 }, 198 "execution_count": 36, 199 "metadata": {}, 200 "output_type": "execute_result" 201 } 202 ], 203 "source": [ 204 "# All the words vectorized\n", 205 "len(layer.get_vocabulary())" 206 ] 207 }, 208 { 209 "cell_type": "code", 210 "execution_count": 37, 211 "metadata": {}, 212 "outputs": [ 213 { 214 "data": { 215 "text/plain": [ 216 "<tf.Tensor: shape=(1, 5), dtype=int64, numpy=array([[ 2, 12030, 9, 12, 10064]])>" 217 ] 218 }, 219 "execution_count": 37, 220 "metadata": {}, 221 "output_type": "execute_result" 222 } 223 ], 224 "source": [ 225 "layer(['the fuck is that shit'])" 226 ] 227 }, 228 { 229 "cell_type": "code", 230 "execution_count": 38, 231 "metadata": {}, 232 "outputs": [ 233 { 234 "ename": "KeyboardInterrupt", 235 "evalue": "", 236 "output_type": "error", 237 "traceback": [ 238 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 239 "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 240 "Cell \u001b[0;32mIn[38], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapplyLayer\u001b[39m(text):\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(layer(text))\n\u001b[0;32m----> 4\u001b[0m X_train[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEmail Text\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mX_train\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mEmail Text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapplyLayer\u001b[49m\u001b[43m)\u001b[49m\n", 241 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/series.py:4924\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4789\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4790\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4791\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4796\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4797\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4798\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4799\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4800\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4915\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4916\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 4917\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4918\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4919\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4920\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4921\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4922\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4923\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4924\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", 242 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/apply.py:1427\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1424\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1426\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1427\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", 243 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/apply.py:1507\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1501\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1504\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1505\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1506\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1507\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1508\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1509\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1511\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1512\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1513\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1514\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", 244 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", 245 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/pandas/core/algorithms.py:1743\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1741\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1743\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1746\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1747\u001b[0m )\n", 246 "File \u001b[0;32mlib.pyx:2972\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", 247 "Cell \u001b[0;32mIn[38], line 2\u001b[0m, in \u001b[0;36mapplyLayer\u001b[0;34m(text)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapplyLayer\u001b[39m(text):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray(\u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m)\u001b[49m)\n", 248 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:117\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", 249 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/layers/layer.py:846\u001b[0m, in \u001b[0;36mLayer.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 844\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 845\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 846\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 847\u001b[0m \u001b[38;5;66;03m# Change the layout for the layer output if needed.\u001b[39;00m\n\u001b[1;32m 848\u001b[0m \u001b[38;5;66;03m# This is useful for relayout intermediate tensor in the model\u001b[39;00m\n\u001b[1;32m 849\u001b[0m \u001b[38;5;66;03m# to achieve the optimal performance.\u001b[39;00m\n\u001b[1;32m 850\u001b[0m distribution \u001b[38;5;241m=\u001b[39m distribution_lib\u001b[38;5;241m.\u001b[39mdistribution()\n", 250 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:117\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", 251 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/ops/operation.py:48\u001b[0m, in \u001b[0;36mOperation.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 43\u001b[0m call_fn \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcall\n\u001b[1;32m 44\u001b[0m call_fn \u001b[38;5;241m=\u001b[39m traceback_utils\u001b[38;5;241m.\u001b[39minject_argument_info_in_traceback(\n\u001b[1;32m 45\u001b[0m call_fn,\n\u001b[1;32m 46\u001b[0m object_name\u001b[38;5;241m=\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.call()\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 47\u001b[0m )\n\u001b[0;32m---> 48\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;66;03m# Plain flow.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m any_symbolic_tensors(args, kwargs):\n", 252 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:156\u001b[0m, in \u001b[0;36minject_argument_info_in_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 154\u001b[0m bound_signature \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_keras_call_info_injected\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# Only inject info for the innermost failing call\u001b[39;00m\n", 253 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/text_vectorization.py:574\u001b[0m, in \u001b[0;36mTextVectorization.call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\n\u001b[1;32m 570\u001b[0m inputs, (tf\u001b[38;5;241m.\u001b[39mTensor, tf\u001b[38;5;241m.\u001b[39mRaggedTensor, np\u001b[38;5;241m.\u001b[39mndarray, \u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)\n\u001b[1;32m 571\u001b[0m ):\n\u001b[1;32m 572\u001b[0m inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mconvert_to_tensor(backend\u001b[38;5;241m.\u001b[39mconvert_to_numpy(inputs))\n\u001b[0;32m--> 574\u001b[0m inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_preprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;66;03m# If we're not doing any output processing, return right away.\u001b[39;00m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_mode \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", 254 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/text_vectorization.py:553\u001b[0m, in \u001b[0;36mTextVectorization._preprocess\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 549\u001b[0m inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39msqueeze(inputs, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_split \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwhitespace\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 551\u001b[0m \u001b[38;5;66;03m# This treats multiple whitespaces as one whitespace, and strips\u001b[39;00m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# leading and trailing whitespace.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m inputs \u001b[38;5;241m=\u001b[39m \u001b[43mtf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_split \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcharacter\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 555\u001b[0m inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mstrings\u001b[38;5;241m.\u001b[39municode_split(inputs, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUTF-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", 255 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", 256 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260\u001b[0m, in \u001b[0;36madd_dispatch_support.<locals>.decorator.<locals>.op_dispatch_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1258\u001b[0m \u001b[38;5;66;03m# Fallback dispatch system (dispatch v1):\u001b[39;00m\n\u001b[1;32m 1259\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1260\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdispatch_target\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1261\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n\u001b[1;32m 1262\u001b[0m \u001b[38;5;66;03m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[39;00m\n\u001b[1;32m 1263\u001b[0m \u001b[38;5;66;03m# TypeError, when given unexpected types. So we need to catch both.\u001b[39;00m\n\u001b[1;32m 1264\u001b[0m result \u001b[38;5;241m=\u001b[39m dispatch(op_dispatch_handler, args, kwargs)\n", 257 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/ragged/ragged_string_ops.py:514\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(input, sep, maxsplit, name)\u001b[0m\n\u001b[1;32m 512\u001b[0m rank \u001b[38;5;241m=\u001b[39m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;241m.\u001b[39mndims\n\u001b[1;32m 513\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m rank \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 514\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstring_split_v2\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray_ops_stack\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstack\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 515\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m rank \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 516\u001b[0m sparse_result \u001b[38;5;241m=\u001b[39m string_ops\u001b[38;5;241m.\u001b[39mstring_split_v2(\n\u001b[1;32m 517\u001b[0m \u001b[38;5;28minput\u001b[39m, sep\u001b[38;5;241m=\u001b[39msep, maxsplit\u001b[38;5;241m=\u001b[39mmaxsplit)\n", 258 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", 259 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260\u001b[0m, in \u001b[0;36madd_dispatch_support.<locals>.decorator.<locals>.op_dispatch_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1258\u001b[0m \u001b[38;5;66;03m# Fallback dispatch system (dispatch v1):\u001b[39;00m\n\u001b[1;32m 1259\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1260\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdispatch_target\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1261\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m):\n\u001b[1;32m 1262\u001b[0m \u001b[38;5;66;03m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[39;00m\n\u001b[1;32m 1263\u001b[0m \u001b[38;5;66;03m# TypeError, when given unexpected types. So we need to catch both.\u001b[39;00m\n\u001b[1;32m 1264\u001b[0m result \u001b[38;5;241m=\u001b[39m dispatch(op_dispatch_handler, args, kwargs)\n", 260 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/ragged/ragged_string_ops.py:516\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(input, sep, maxsplit, name)\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m string_split_v2(array_ops_stack\u001b[38;5;241m.\u001b[39mstack([\u001b[38;5;28minput\u001b[39m]), sep, maxsplit)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 515\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m rank \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m rank \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 516\u001b[0m sparse_result \u001b[38;5;241m=\u001b[39m \u001b[43mstring_ops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstring_split_v2\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ragged_tensor\u001b[38;5;241m.\u001b[39mRaggedTensor\u001b[38;5;241m.\u001b[39mfrom_value_rowids(\n\u001b[1;32m 519\u001b[0m values\u001b[38;5;241m=\u001b[39msparse_result\u001b[38;5;241m.\u001b[39mvalues,\n\u001b[1;32m 520\u001b[0m value_rowids\u001b[38;5;241m=\u001b[39msparse_result\u001b[38;5;241m.\u001b[39mindices[:, \u001b[38;5;241m0\u001b[39m],\n\u001b[1;32m 521\u001b[0m nrows\u001b[38;5;241m=\u001b[39msparse_result\u001b[38;5;241m.\u001b[39mdense_shape[\u001b[38;5;241m0\u001b[39m],\n\u001b[1;32m 522\u001b[0m validate\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", 261 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/string_ops.py:283\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(source, sep, maxsplit)\u001b[0m\n\u001b[1;32m 280\u001b[0m sep \u001b[38;5;241m=\u001b[39m ops\u001b[38;5;241m.\u001b[39mconvert_to_tensor(sep, dtype\u001b[38;5;241m=\u001b[39mdtypes\u001b[38;5;241m.\u001b[39mstring)\n\u001b[1;32m 281\u001b[0m source \u001b[38;5;241m=\u001b[39m ops\u001b[38;5;241m.\u001b[39mconvert_to_tensor(source, dtype\u001b[38;5;241m=\u001b[39mdtypes\u001b[38;5;241m.\u001b[39mstring)\n\u001b[0;32m--> 283\u001b[0m indices, values, shape \u001b[38;5;241m=\u001b[39m \u001b[43mgen_string_ops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstring_split_v2\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 284\u001b[0m \u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 285\u001b[0m indices\u001b[38;5;241m.\u001b[39mset_shape([\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m2\u001b[39m])\n\u001b[1;32m 286\u001b[0m values\u001b[38;5;241m.\u001b[39mset_shape([\u001b[38;5;28;01mNone\u001b[39;00m])\n", 262 "File \u001b[0;32m~/gitRepos/machineLearning/.venv/lib/python3.11/site-packages/tensorflow/python/ops/gen_string_ops.py:1379\u001b[0m, in \u001b[0;36mstring_split_v2\u001b[0;34m(input, sep, maxsplit, name)\u001b[0m\n\u001b[1;32m 1377\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tld\u001b[38;5;241m.\u001b[39mis_eager:\n\u001b[1;32m 1378\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1379\u001b[0m _result \u001b[38;5;241m=\u001b[39m \u001b[43mpywrap_tfe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTFE_Py_FastPathExecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1380\u001b[0m \u001b[43m \u001b[49m\u001b[43m_ctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mStringSplitV2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmaxsplit\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmaxsplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1381\u001b[0m _result \u001b[38;5;241m=\u001b[39m _StringSplitV2Output\u001b[38;5;241m.\u001b[39m_make(_result)\n\u001b[1;32m 1382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _result\n", 263 "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 264 ] 265 } 266 ], 267 "source": [ 268 "def applyLayer(text):\n", 269 " return np.array(layer(text))\n", 270 "\n", 271 "X_train['Email Text'] = X_train['Email Text'].apply(applyLayer)" 272 ] 273 }, 274 { 275 "cell_type": "code", 276 "execution_count": null, 277 "metadata": {}, 278 "outputs": [ 279 { 280 "data": { 281 "text/html": [ 282 "<div>\n", 283 "<style scoped>\n", 284 " .dataframe tbody tr th:only-of-type {\n", 285 " vertical-align: middle;\n", 286 " }\n", 287 "\n", 288 " .dataframe tbody tr th {\n", 289 " vertical-align: top;\n", 290 " }\n", 291 "\n", 292 " .dataframe thead th {\n", 293 " text-align: right;\n", 294 " }\n", 295 "</style>\n", 296 "<table border=\"1\" class=\"dataframe\">\n", 297 " <thead>\n", 298 " <tr style=\"text-align: right;\">\n", 299 " <th></th>\n", 300 " <th>Unnamed: 0</th>\n", 301 " <th>Email Text</th>\n", 302 " </tr>\n", 303 " </thead>\n", 304 " <tbody>\n", 305 " <tr>\n", 306 " <th>66</th>\n", 307 " <td>66</td>\n", 308 " <td>[1010, 1266, 922, 102, 188, 3053, 3093, 87, 6,...</td>\n", 309 " </tr>\n", 310 " <tr>\n", 311 " <th>13986</th>\n", 312 " <td>13987</td>\n", 313 " <td>[10171, 6568, 209, 54, 155, 5006, 6273, 11, 2,...</td>\n", 314 " </tr>\n", 315 " <tr>\n", 316 " <th>13711</th>\n", 317 " <td>13712</td>\n", 318 " <td>[132755, 243, 143884, 656, 186, 180977, 110, 2...</td>\n", 319 " </tr>\n", 320 " <tr>\n", 321 " <th>9031</th>\n", 322 " <td>9032</td>\n", 323 " <td>[2859, 177, 3753, 2, 545, 8, 303, 8, 2, 2859, ...</td>\n", 324 " </tr>\n", 325 " <tr>\n", 326 " <th>8512</th>\n", 327 " <td>8513</td>\n", 328 " <td>[934, 934, 877, 13, 580, 129, 121, 65, 4, 336,...</td>\n", 329 " </tr>\n", 330 " </tbody>\n", 331 "</table>\n", 332 "</div>" 333 ], 334 "text/plain": [ 335 " Unnamed: 0 Email Text\n", 336 "66 66 [1010, 1266, 922, 102, 188, 3053, 3093, 87, 6,...\n", 337 "13986 13987 [10171, 6568, 209, 54, 155, 5006, 6273, 11, 2,...\n", 338 "13711 13712 [132755, 243, 143884, 656, 186, 180977, 110, 2...\n", 339 "9031 9032 [2859, 177, 3753, 2, 545, 8, 303, 8, 2, 2859, ...\n", 340 "8512 8513 [934, 934, 877, 13, 580, 129, 121, 65, 4, 336,..." 341 ] 342 }, 343 "execution_count": 7, 344 "metadata": {}, 345 "output_type": "execute_result" 346 } 347 ], 348 "source": [ 349 "X_train.head()" 350 ] 351 }, 352 { 353 "cell_type": "code", 354 "execution_count": null, 355 "metadata": {}, 356 "outputs": [], 357 "source": [ 358 "from keras.preprocessing.sequence import pad_sequences\n", 359 "X_train['Email Text'] = list(pad_sequences(X_train['Email Text'], maxlen=100, padding='post'))" 360 ] 361 }, 362 { 363 "cell_type": "code", 364 "execution_count": null, 365 "metadata": {}, 366 "outputs": [ 367 { 368 "data": { 369 "text/plain": [ 370 "66 [1010, 1266, 922, 102, 188, 3053, 3093, 87, 6,...\n", 371 "13986 [10171, 6568, 209, 54, 155, 5006, 6273, 11, 2,...\n", 372 "13711 [132755, 243, 143884, 656, 186, 180977, 110, 2...\n", 373 "9031 [2859, 177, 3753, 2, 545, 8, 303, 8, 2, 2859, ...\n", 374 "8512 [934, 934, 877, 13, 580, 129, 121, 65, 4, 336,...\n", 375 " ... \n", 376 "682 [492, 816, 85, 753, 5, 492, 29, 370, 65, 57, 1...\n", 377 "10530 [1127, 21, 1804, 52, 13208, 12221, 84, 4, 417,...\n", 378 "17343 [14, 36, 1512, 127, 6643, 2, 18028, 12, 1303, ...\n", 379 "14529 [11, 38, 5, 8606, 11, 733, 17797, 18999, 15767...\n", 380 "6313 [148, 295, 12, 18, 70, 268, 8119, 621, 1001, 6...\n", 381 "Name: Email Text, Length: 13987, dtype: object" 382 ] 383 }, 384 "execution_count": 9, 385 "metadata": {}, 386 "output_type": "execute_result" 387 } 388 ], 389 "source": [ 390 "X_train['Email Text']" 391 ] 392 }, 393 { 394 "cell_type": "code", 395 "execution_count": null, 396 "metadata": {}, 397 "outputs": [], 398 "source": [ 399 "from sklearn.naive_bayes import CategoricalNB\n", 400 "\n", 401 "cat_nb = CategoricalNB()\n", 402 "X_train_padded_flat = np.array(X_train['Email Text'].tolist())\n", 403 "\n", 404 "\n", 405 "def isPhish(txt):\n", 406 " if txt == 'Phishing Email':\n", 407 " return 1\n", 408 " else:\n", 409 " return 0\n", 410 "\n", 411 "y_train = y_train.apply(isPhish)" 412 ] 413 }, 414 { 415 "cell_type": "code", 416 "execution_count": null, 417 "metadata": {}, 418 "outputs": [ 419 { 420 "data": { 421 "text/html": [ 422 "<style>#sk-container-id-1 {\n", 423 " /* Definition of color scheme common for light and dark mode */\n", 424 " --sklearn-color-text: black;\n", 425 " --sklearn-color-line: gray;\n", 426 " /* Definition of color scheme for unfitted estimators */\n", 427 " --sklearn-color-unfitted-level-0: #fff5e6;\n", 428 " --sklearn-color-unfitted-level-1: #f6e4d2;\n", 429 " --sklearn-color-unfitted-level-2: #ffe0b3;\n", 430 " --sklearn-color-unfitted-level-3: chocolate;\n", 431 " /* Definition of color scheme for fitted estimators */\n", 432 " --sklearn-color-fitted-level-0: #f0f8ff;\n", 433 " --sklearn-color-fitted-level-1: #d4ebff;\n", 434 " --sklearn-color-fitted-level-2: #b3dbfd;\n", 435 " --sklearn-color-fitted-level-3: cornflowerblue;\n", 436 "\n", 437 " /* Specific color for light theme */\n", 438 " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", 439 " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", 440 " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", 441 " --sklearn-color-icon: #696969;\n", 442 "\n", 443 " @media (prefers-color-scheme: dark) {\n", 444 " /* Redefinition of color scheme for dark theme */\n", 445 " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", 446 " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", 447 " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", 448 " --sklearn-color-icon: #878787;\n", 449 " }\n", 450 "}\n", 451 "\n", 452 "#sk-container-id-1 {\n", 453 " color: var(--sklearn-color-text);\n", 454 "}\n", 455 "\n", 456 "#sk-container-id-1 pre {\n", 457 " padding: 0;\n", 458 "}\n", 459 "\n", 460 "#sk-container-id-1 input.sk-hidden--visually {\n", 461 " border: 0;\n", 462 " clip: rect(1px 1px 1px 1px);\n", 463 " clip: rect(1px, 1px, 1px, 1px);\n", 464 " height: 1px;\n", 465 " margin: -1px;\n", 466 " overflow: hidden;\n", 467 " padding: 0;\n", 468 " position: absolute;\n", 469 " width: 1px;\n", 470 "}\n", 471 "\n", 472 "#sk-container-id-1 div.sk-dashed-wrapped {\n", 473 " border: 1px dashed var(--sklearn-color-line);\n", 474 " margin: 0 0.4em 0.5em 0.4em;\n", 475 " box-sizing: border-box;\n", 476 " padding-bottom: 0.4em;\n", 477 " background-color: var(--sklearn-color-background);\n", 478 "}\n", 479 "\n", 480 "#sk-container-id-1 div.sk-container {\n", 481 " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", 482 " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", 483 " so we also need the `!important` here to be able to override the\n", 484 " default hidden behavior on the sphinx rendered scikit-learn.org.\n", 485 " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", 486 " display: inline-block !important;\n", 487 " position: relative;\n", 488 "}\n", 489 "\n", 490 "#sk-container-id-1 div.sk-text-repr-fallback {\n", 491 " display: none;\n", 492 "}\n", 493 "\n", 494 "div.sk-parallel-item,\n", 495 "div.sk-serial,\n", 496 "div.sk-item {\n", 497 " /* draw centered vertical line to link estimators */\n", 498 " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", 499 " background-size: 2px 100%;\n", 500 " background-repeat: no-repeat;\n", 501 " background-position: center center;\n", 502 "}\n", 503 "\n", 504 "/* Parallel-specific style estimator block */\n", 505 "\n", 506 "#sk-container-id-1 div.sk-parallel-item::after {\n", 507 " content: \"\";\n", 508 " width: 100%;\n", 509 " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", 510 " flex-grow: 1;\n", 511 "}\n", 512 "\n", 513 "#sk-container-id-1 div.sk-parallel {\n", 514 " display: flex;\n", 515 " align-items: stretch;\n", 516 " justify-content: center;\n", 517 " background-color: var(--sklearn-color-background);\n", 518 " position: relative;\n", 519 "}\n", 520 "\n", 521 "#sk-container-id-1 div.sk-parallel-item {\n", 522 " display: flex;\n", 523 " flex-direction: column;\n", 524 "}\n", 525 "\n", 526 "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n", 527 " align-self: flex-end;\n", 528 " width: 50%;\n", 529 "}\n", 530 "\n", 531 "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n", 532 " align-self: flex-start;\n", 533 " width: 50%;\n", 534 "}\n", 535 "\n", 536 "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n", 537 " width: 0;\n", 538 "}\n", 539 "\n", 540 "/* Serial-specific style estimator block */\n", 541 "\n", 542 "#sk-container-id-1 div.sk-serial {\n", 543 " display: flex;\n", 544 " flex-direction: column;\n", 545 " align-items: center;\n", 546 " background-color: var(--sklearn-color-background);\n", 547 " padding-right: 1em;\n", 548 " padding-left: 1em;\n", 549 "}\n", 550 "\n", 551 "\n", 552 "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", 553 "clickable and can be expanded/collapsed.\n", 554 "- Pipeline and ColumnTransformer use this feature and define the default style\n", 555 "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", 556 "*/\n", 557 "\n", 558 "/* Pipeline and ColumnTransformer style (default) */\n", 559 "\n", 560 "#sk-container-id-1 div.sk-toggleable {\n", 561 " /* Default theme specific background. It is overwritten whether we have a\n", 562 " specific estimator or a Pipeline/ColumnTransformer */\n", 563 " background-color: var(--sklearn-color-background);\n", 564 "}\n", 565 "\n", 566 "/* Toggleable label */\n", 567 "#sk-container-id-1 label.sk-toggleable__label {\n", 568 " cursor: pointer;\n", 569 " display: block;\n", 570 " width: 100%;\n", 571 " margin-bottom: 0;\n", 572 " padding: 0.5em;\n", 573 " box-sizing: border-box;\n", 574 " text-align: center;\n", 575 "}\n", 576 "\n", 577 "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n", 578 " /* Arrow on the left of the label */\n", 579 " content: \"▸\";\n", 580 " float: left;\n", 581 " margin-right: 0.25em;\n", 582 " color: var(--sklearn-color-icon);\n", 583 "}\n", 584 "\n", 585 "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n", 586 " color: var(--sklearn-color-text);\n", 587 "}\n", 588 "\n", 589 "/* Toggleable content - dropdown */\n", 590 "\n", 591 "#sk-container-id-1 div.sk-toggleable__content {\n", 592 " max-height: 0;\n", 593 " max-width: 0;\n", 594 " overflow: hidden;\n", 595 " text-align: left;\n", 596 " /* unfitted */\n", 597 " background-color: var(--sklearn-color-unfitted-level-0);\n", 598 "}\n", 599 "\n", 600 "#sk-container-id-1 div.sk-toggleable__content.fitted {\n", 601 " /* fitted */\n", 602 " background-color: var(--sklearn-color-fitted-level-0);\n", 603 "}\n", 604 "\n", 605 "#sk-container-id-1 div.sk-toggleable__content pre {\n", 606 " margin: 0.2em;\n", 607 " border-radius: 0.25em;\n", 608 " color: var(--sklearn-color-text);\n", 609 " /* unfitted */\n", 610 " background-color: var(--sklearn-color-unfitted-level-0);\n", 611 "}\n", 612 "\n", 613 "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n", 614 " /* unfitted */\n", 615 " background-color: var(--sklearn-color-fitted-level-0);\n", 616 "}\n", 617 "\n", 618 "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", 619 " /* Expand drop-down */\n", 620 " max-height: 200px;\n", 621 " max-width: 100%;\n", 622 " overflow: auto;\n", 623 "}\n", 624 "\n", 625 "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", 626 " content: \"▾\";\n", 627 "}\n", 628 "\n", 629 "/* Pipeline/ColumnTransformer-specific style */\n", 630 "\n", 631 "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", 632 " color: var(--sklearn-color-text);\n", 633 " background-color: var(--sklearn-color-unfitted-level-2);\n", 634 "}\n", 635 "\n", 636 "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", 637 " background-color: var(--sklearn-color-fitted-level-2);\n", 638 "}\n", 639 "\n", 640 "/* Estimator-specific style */\n", 641 "\n", 642 "/* Colorize estimator box */\n", 643 "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", 644 " /* unfitted */\n", 645 " background-color: var(--sklearn-color-unfitted-level-2);\n", 646 "}\n", 647 "\n", 648 "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", 649 " /* fitted */\n", 650 " background-color: var(--sklearn-color-fitted-level-2);\n", 651 "}\n", 652 "\n", 653 "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n", 654 "#sk-container-id-1 div.sk-label label {\n", 655 " /* The background is the default theme color */\n", 656 " color: var(--sklearn-color-text-on-default-background);\n", 657 "}\n", 658 "\n", 659 "/* On hover, darken the color of the background */\n", 660 "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n", 661 " color: var(--sklearn-color-text);\n", 662 " background-color: var(--sklearn-color-unfitted-level-2);\n", 663 "}\n", 664 "\n", 665 "/* Label box, darken color on hover, fitted */\n", 666 "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", 667 " color: var(--sklearn-color-text);\n", 668 " background-color: var(--sklearn-color-fitted-level-2);\n", 669 "}\n", 670 "\n", 671 "/* Estimator label */\n", 672 "\n", 673 "#sk-container-id-1 div.sk-label label {\n", 674 " font-family: monospace;\n", 675 " font-weight: bold;\n", 676 " display: inline-block;\n", 677 " line-height: 1.2em;\n", 678 "}\n", 679 "\n", 680 "#sk-container-id-1 div.sk-label-container {\n", 681 " text-align: center;\n", 682 "}\n", 683 "\n", 684 "/* Estimator-specific */\n", 685 "#sk-container-id-1 div.sk-estimator {\n", 686 " font-family: monospace;\n", 687 " border: 1px dotted var(--sklearn-color-border-box);\n", 688 " border-radius: 0.25em;\n", 689 " box-sizing: border-box;\n", 690 " margin-bottom: 0.5em;\n", 691 " /* unfitted */\n", 692 " background-color: var(--sklearn-color-unfitted-level-0);\n", 693 "}\n", 694 "\n", 695 "#sk-container-id-1 div.sk-estimator.fitted {\n", 696 " /* fitted */\n", 697 " background-color: var(--sklearn-color-fitted-level-0);\n", 698 "}\n", 699 "\n", 700 "/* on hover */\n", 701 "#sk-container-id-1 div.sk-estimator:hover {\n", 702 " /* unfitted */\n", 703 " background-color: var(--sklearn-color-unfitted-level-2);\n", 704 "}\n", 705 "\n", 706 "#sk-container-id-1 div.sk-estimator.fitted:hover {\n", 707 " /* fitted */\n", 708 " background-color: var(--sklearn-color-fitted-level-2);\n", 709 "}\n", 710 "\n", 711 "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", 712 "\n", 713 "/* Common style for \"i\" and \"?\" */\n", 714 "\n", 715 ".sk-estimator-doc-link,\n", 716 "a:link.sk-estimator-doc-link,\n", 717 "a:visited.sk-estimator-doc-link {\n", 718 " float: right;\n", 719 " font-size: smaller;\n", 720 " line-height: 1em;\n", 721 " font-family: monospace;\n", 722 " background-color: var(--sklearn-color-background);\n", 723 " border-radius: 1em;\n", 724 " height: 1em;\n", 725 " width: 1em;\n", 726 " text-decoration: none !important;\n", 727 " margin-left: 1ex;\n", 728 " /* unfitted */\n", 729 " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", 730 " color: var(--sklearn-color-unfitted-level-1);\n", 731 "}\n", 732 "\n", 733 ".sk-estimator-doc-link.fitted,\n", 734 "a:link.sk-estimator-doc-link.fitted,\n", 735 "a:visited.sk-estimator-doc-link.fitted {\n", 736 " /* fitted */\n", 737 " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", 738 " color: var(--sklearn-color-fitted-level-1);\n", 739 "}\n", 740 "\n", 741 "/* On hover */\n", 742 "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", 743 ".sk-estimator-doc-link:hover,\n", 744 "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", 745 ".sk-estimator-doc-link:hover {\n", 746 " /* unfitted */\n", 747 " background-color: var(--sklearn-color-unfitted-level-3);\n", 748 " color: var(--sklearn-color-background);\n", 749 " text-decoration: none;\n", 750 "}\n", 751 "\n", 752 "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", 753 ".sk-estimator-doc-link.fitted:hover,\n", 754 "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", 755 ".sk-estimator-doc-link.fitted:hover {\n", 756 " /* fitted */\n", 757 " background-color: var(--sklearn-color-fitted-level-3);\n", 758 " color: var(--sklearn-color-background);\n", 759 " text-decoration: none;\n", 760 "}\n", 761 "\n", 762 "/* Span, style for the box shown on hovering the info icon */\n", 763 ".sk-estimator-doc-link span {\n", 764 " display: none;\n", 765 " z-index: 9999;\n", 766 " position: relative;\n", 767 " font-weight: normal;\n", 768 " right: .2ex;\n", 769 " padding: .5ex;\n", 770 " margin: .5ex;\n", 771 " width: min-content;\n", 772 " min-width: 20ex;\n", 773 " max-width: 50ex;\n", 774 " color: var(--sklearn-color-text);\n", 775 " box-shadow: 2pt 2pt 4pt #999;\n", 776 " /* unfitted */\n", 777 " background: var(--sklearn-color-unfitted-level-0);\n", 778 " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", 779 "}\n", 780 "\n", 781 ".sk-estimator-doc-link.fitted span {\n", 782 " /* fitted */\n", 783 " background: var(--sklearn-color-fitted-level-0);\n", 784 " border: var(--sklearn-color-fitted-level-3);\n", 785 "}\n", 786 "\n", 787 ".sk-estimator-doc-link:hover span {\n", 788 " display: block;\n", 789 "}\n", 790 "\n", 791 "/* \"?\"-specific style due to the `<a>` HTML tag */\n", 792 "\n", 793 "#sk-container-id-1 a.estimator_doc_link {\n", 794 " float: right;\n", 795 " font-size: 1rem;\n", 796 " line-height: 1em;\n", 797 " font-family: monospace;\n", 798 " background-color: var(--sklearn-color-background);\n", 799 " border-radius: 1rem;\n", 800 " height: 1rem;\n", 801 " width: 1rem;\n", 802 " text-decoration: none;\n", 803 " /* unfitted */\n", 804 " color: var(--sklearn-color-unfitted-level-1);\n", 805 " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", 806 "}\n", 807 "\n", 808 "#sk-container-id-1 a.estimator_doc_link.fitted {\n", 809 " /* fitted */\n", 810 " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", 811 " color: var(--sklearn-color-fitted-level-1);\n", 812 "}\n", 813 "\n", 814 "/* On hover */\n", 815 "#sk-container-id-1 a.estimator_doc_link:hover {\n", 816 " /* unfitted */\n", 817 " background-color: var(--sklearn-color-unfitted-level-3);\n", 818 " color: var(--sklearn-color-background);\n", 819 " text-decoration: none;\n", 820 "}\n", 821 "\n", 822 "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n", 823 " /* fitted */\n", 824 " background-color: var(--sklearn-color-fitted-level-3);\n", 825 "}\n", 826 "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>CategoricalNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> CategoricalNB<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.CategoricalNB.html\">?<span>Documentation for CategoricalNB</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>CategoricalNB()</pre></div> </div></div></div></div>" 827 ], 828 "text/plain": [ 829 "CategoricalNB()" 830 ] 831 }, 832 "execution_count": 11, 833 "metadata": {}, 834 "output_type": "execute_result" 835 } 836 ], 837 "source": [ 838 "cat_nb.fit(X_train_padded_flat, y_train)" 839 ] 840 }, 841 { 842 "cell_type": "code", 843 "execution_count": null, 844 "metadata": {}, 845 "outputs": [], 846 "source": [ 847 "X_test['Email Text'] = X_test['Email Text'].apply(applyLayer)\n", 848 "X_test['Email Text'] = list(pad_sequences(X_test['Email Text'], maxlen=5000, padding='post'))\n", 849 "X_test = np.array(X_test['Email Text'].tolist())" 850 ] 851 }, 852 { 853 "cell_type": "code", 854 "execution_count": null, 855 "metadata": {}, 856 "outputs": [ 857 { 858 "data": { 859 "text/plain": [ 860 "array([0])" 861 ] 862 }, 863 "execution_count": 31, 864 "metadata": {}, 865 "output_type": "execute_result" 866 } 867 ], 868 "source": [ 869 "cat_nb.predict([X_test[1000]])" 870 ] 871 }, 872 { 873 "cell_type": "code", 874 "execution_count": null, 875 "metadata": {}, 876 "outputs": [ 877 { 878 "data": { 879 "text/plain": [ 880 "'Phishing Email'" 881 ] 882 }, 883 "execution_count": 32, 884 "metadata": {}, 885 "output_type": "execute_result" 886 } 887 ], 888 "source": [ 889 "y_test.iloc[1000]" 890 ] 891 } 892 ], 893 "metadata": { 894 "kernelspec": { 895 "display_name": ".venv", 896 "language": "python", 897 "name": "python3" 898 }, 899 "language_info": { 900 "codemirror_mode": { 901 "name": "ipython", 902 "version": 3 903 }, 904 "file_extension": ".py", 905 "mimetype": "text/x-python", 906 "name": "python", 907 "nbconvert_exporter": "python", 908 "pygments_lexer": "ipython3", 909 "version": "3.11.2" 910 } 911 }, 912 "nbformat": 4, 913 "nbformat_minor": 2 914 }