machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

KerasEmbeddingPG.ipynb (22248B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "Embedding playground"
      8    ]
      9   },
     10   {
     11    "cell_type": "code",
     12    "execution_count": 2,
     13    "metadata": {},
     14    "outputs": [
     15     {
     16      "name": "stderr",
     17      "output_type": "stream",
     18      "text": [
     19       "2024-07-01 07:58:30.477487: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.\n",
     20       "2024-07-01 07:58:30.480784: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.\n",
     21       "2024-07-01 07:58:30.522090: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
     22       "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
     23       "2024-07-01 07:58:31.202746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     24      ]
     25     }
     26    ],
     27    "source": [
     28     "import tensorflow as tf\n",
     29     "import pandas as pd\n",
     30     "import keras\n",
     31     "import numpy as np"
     32    ]
     33   },
     34   {
     35    "cell_type": "code",
     36    "execution_count": 3,
     37    "metadata": {},
     38    "outputs": [
     39     {
     40      "name": "stderr",
     41      "output_type": "stream",
     42      "text": [
     43       "2024-07-01 07:58:31.966844: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
     44       "2024-07-01 07:58:31.967654: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
     45       "Skipping registering GPU devices...\n"
     46      ]
     47     }
     48    ],
     49    "source": [
     50     "arrText = np.array(['test', 'why', 'would', 'this', 'work', 'weird', 'array', 'method', 'returns', 'math'])\n",
     51     "arrNums = np.array([])\n",
     52     "mapping = {}\n",
     53     "nextMap = 0\n",
     54     "\n",
     55     "for i in arrText:\n",
     56     "    if i in mapping:\n",
     57     "        arrNums = np.append(arrNums, mapping[i])\n",
     58     "    else:\n",
     59     "        mapping[i] = nextMap\n",
     60     "        arrNums = np.append(arrNums, mapping[i])\n",
     61     "        nextMap += 1\n",
     62     "\n",
     63     "\n",
     64     "embedding = keras.layers.Embedding(input_dim=10, output_dim=2)\n",
     65     "embedded = embedding(arrNums)"
     66    ]
     67   },
     68   {
     69    "cell_type": "code",
     70    "execution_count": 4,
     71    "metadata": {},
     72    "outputs": [
     73     {
     74      "data": {
     75       "text/plain": [
     76        "<tf.Tensor: shape=(10, 2), dtype=float32, numpy=\n",
     77        "array([[ 0.03480979,  0.0453563 ],\n",
     78        "       [ 0.01358359, -0.00183941],\n",
     79        "       [-0.01757333, -0.04817909],\n",
     80        "       [-0.0360611 ,  0.04445745],\n",
     81        "       [-0.00211517, -0.04308406],\n",
     82        "       [ 0.01702977,  0.01570695],\n",
     83        "       [ 0.04625987,  0.00359092],\n",
     84        "       [-0.0155    ,  0.02139652],\n",
     85        "       [ 0.04769083,  0.00969797],\n",
     86        "       [ 0.01138154, -0.04589012]], dtype=float32)>"
     87       ]
     88      },
     89      "execution_count": 4,
     90      "metadata": {},
     91      "output_type": "execute_result"
     92     }
     93    ],
     94    "source": [
     95     "embedded"
     96    ]
     97   },
     98   {
     99    "cell_type": "code",
    100    "execution_count": 5,
    101    "metadata": {},
    102    "outputs": [
    103     {
    104      "data": {
    105       "text/plain": [
    106        "<matplotlib.collections.PathCollection at 0x7fb3f02b8410>"
    107       ]
    108      },
    109      "execution_count": 5,
    110      "metadata": {},
    111      "output_type": "execute_result"
    112     },
    113     {
    114      "data": {
    115       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGdCAYAAADuR1K7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmxUlEQVR4nO3df3TUVX7/8ddMQjKskknDj0yCiaC1CzEUCmxCPHuOrcRNdi1KF49sDohSjlQW0C6UAsqSZtse6rpWcFU4u6ceapGVYrfuhtLsocHuustIIFGXEMKxHhYQMomQzQTR/DBzv3/wzeiYSQhhJpm583yc8zmcufP+zNybT+K8/NzP547DGGMEAABgCedIdwAAACCSCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKskj3QHRkIgEND58+c1ZswYORyOke4OAAAYBGOMLl26pOzsbDmd/Z+fSchwc/78eeXk5Ix0NwAAwBCcPXtWN910U7/PJ2S4GTNmjKQrP5y0tLQR7g0AABiM9vZ25eTkBD/H+5OQ4aZ3KiotLY1wAwBAnLnaJSVcUAwAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWCUhF/EDAOB69QSMak61quVShyaMcalgcoaSnHxfYSwg3AAAcI2q6ptUUdmgJn9HsC3L7VL5vDyV5meNYM8gMS0FAMA1qapv0opddSHBRpJ8/g6t2FWnqvqmEeoZehFuAAAYpJ6AUUVlg0yY53rbKiob1BMIV4HhQriJoJ6Akff9i/rZO+fkff8iv9wAYJmaU619zth8npHU5O9QzanW4esU+uCamwhh/hUA7Ndyqf9gM5Q6RAdnbiKA+VcASAwTxrgiWofoINxcJ+ZfASBxFEzOUJbbpf5u+Hboyln7gskZw9ktfAHh5jox/woAiSPJ6VD5vDxJ6hNweh+Xz8tjvZsRRri5Tsy/AkBiKc3P0vbFM+Vxh049edwubV88k+ssYwAXFF8n5l8BIPGU5mfp7jwPKxTHKMLNdeqdf/X5O8Jed+PQlTTP/CsA2CXJ6VDRrWNHuhsIg2mp68T8KwAAsYVwEwHMvwIAEDuYlooQ5l8BAIgNhJsIYv4VAICRx7QUAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrDEu4eeGFFzRp0iS5XC4VFhaqpqZmwPq9e/dqypQpcrlcmjZtmvbv399v7aOPPiqHw6GtW7dGuNcAACAeRT3c7NmzR2vWrFF5ebnq6uo0ffp0lZSUqKWlJWz9oUOHVFZWpmXLluntt9/W/PnzNX/+fNXX1/ep/c///E+99dZbys7OjvYwAABAnIh6uPnnf/5nPfLII1q6dKny8vK0Y8cOfelLX9JLL70Utn7btm0qLS3VunXrNHXqVP393/+9Zs6cqeeffz6k7ty5c1q9erVeeeUVjRo1KtrDAAAAcSKq4aarq0u1tbUqLi7+7A2dThUXF8vr9Ybdx+v1htRLUklJSUh9IBDQgw8+qHXr1un222+/aj86OzvV3t4esgEAADtFNdxcuHBBPT09yszMDGnPzMyUz+cLu4/P57tq/VNPPaXk5GQ99thjg+rHli1b5Ha7g1tOTs41jgQAAMSLuLtbqra2Vtu2bdPOnTvlcAzue5s2btwov98f3M6ePRvlXgIAgJES1XAzbtw4JSUlqbm5OaS9ublZHo8n7D4ej2fA+jfffFMtLS3Kzc1VcnKykpOTdfr0aa1du1aTJk0K+5qpqalKS0sL2QAAgJ2iGm5SUlI0a9YsVVdXB9sCgYCqq6tVVFQUdp+ioqKQekk6cOBAsP7BBx/Ub3/7W73zzjvBLTs7W+vWrdMvfvGL6A0GAADEhah/K/iaNWv00EMPafbs2SooKNDWrVt1+fJlLV26VJK0ZMkSTZw4UVu2bJEkPf7447rzzjv1zDPP6J577tGrr76qo0eP6kc/+pEkaezYsRo7NvSbt0eNGiWPx6Mvf/nL0R4OAACIcVEPNwsXLtSHH36ozZs3y+fzacaMGaqqqgpeNHzmzBk5nZ+dQLrjjju0e/dubdq0SU888YRuu+02vf7668rPz492VwEAgAUcxhgz0p0Ybu3t7XK73fL7/Vx/AwBAnBjs53fc3S0FAAAwEMINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVkke6A8BI6wkY1ZxqVculDk0Y41LB5AwlOR0j3S0AwBARbpDQquqbVFHZoCZ/R7Aty+1S+bw8leZnjWDPAABDxbQUElZVfZNW7KoLCTaS5PN3aMWuOlXVN41QzwAA14Nwg4TUEzCqqGyQCfNcb1tFZYN6AuEqAACxjHCDhFRzqrXPGZvPM5Ka/B2qOdU6fJ0CAETEsISbF154QZMmTZLL5VJhYaFqamoGrN+7d6+mTJkil8uladOmaf/+/cHnuru7tX79ek2bNk033HCDsrOztWTJEp0/fz7aw4BFWi71H2yGUgcAiB1RDzd79uzRmjVrVF5errq6Ok2fPl0lJSVqaWkJW3/o0CGVlZVp2bJlevvttzV//nzNnz9f9fX1kqSPP/5YdXV1+u53v6u6ujr99Kc/1cmTJ3XvvfdGeyiwyIQxrojWAQBih8MYE9WLCgoLC/WVr3xFzz//vCQpEAgoJydHq1ev1oYNG/rUL1y4UJcvX9a+ffuCbXPmzNGMGTO0Y8eOsO9x5MgRFRQU6PTp08rNzb1qn9rb2+V2u+X3+5WWljbEkSGe9QSMvvrUQfn8HWGvu3FI8rhd+vX6u7gtHABixGA/v6N65qarq0u1tbUqLi7+7A2dThUXF8vr9Ybdx+v1htRLUklJSb/1kuT3++VwOJSenh6RfsN+SU6HyuflSboSZD6v93H5vDyCDQDEoaiGmwsXLqinp0eZmZkh7ZmZmfL5fGH38fl811Tf0dGh9evXq6ysrN8U19nZqfb29pANKM3P0vbFM+Vxh049edwubV88k3VuACBOxfUift3d3XrggQdkjNH27dv7rduyZYsqKiqGsWeIF6X5Wbo7z8MKxQBgkaiGm3HjxikpKUnNzc0h7c3NzfJ4PGH38Xg8g6rvDTanT5/WwYMHB5x727hxo9asWRN83N7erpycnGsdDiyV5HSo6NaxI90NAECERHVaKiUlRbNmzVJ1dXWwLRAIqLq6WkVFRWH3KSoqCqmXpAMHDoTU9wab9957T//zP/+jsWMH/mBKTU1VWlpayAYAAOwU9WmpNWvW6KGHHtLs2bNVUFCgrVu36vLly1q6dKkkacmSJZo4caK2bNkiSXr88cd155136plnntE999yjV199VUePHtWPfvQjSVeCzf3336+6ujrt27dPPT09wetxMjIylJKSEu0hAQCAGBb1cLNw4UJ9+OGH2rx5s3w+n2bMmKGqqqrgRcNnzpyR0/nZCaQ77rhDu3fv1qZNm/TEE0/otttu0+uvv678/HxJ0rlz5/Tzn/9ckjRjxoyQ93rjjTf0p3/6p9EeEgAAiGFRX+cmFrHODQAA8Scm1rkBAAAYboQbAABgFcINAACwCuEGAABYJa5XKAaARNYTMKyuDYRBuAGAOFRV36SKygY1+TuCbVlul8rn5fG9aEh4TEsBQJypqm/Sil11IcFGknz+Dq3YVaeq+qYR6hkQGwg3ABBHegJGFZUNCrdAWW9bRWWDegIJt4QZEES4AYA4UnOqtc8Zm88zkpr8Hao51Tp8nQJiDOEGAOJIy6X+g81Q6gAbEW4AII5MGOOKaB1gI8INAMSRgskZynK71N8N3w5duWuqYHLGcHYLiCmEGwCII0lOh8rn5UlSn4DT+7h8Xh7r3SChEW4AIM6U5mdp++KZ8rhDp548bpe2L57JOjdIeCziBwBxqDQ/S3fneVihGAiDcAMAcSrJ6VDRrWNHuhtAzGFaCgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACswtcvAACAiOgJmJj4vjPCDQAAuG5V9U2qqGxQk78j2Jbldql8Xt6wf1M901IAAOC6VNU3acWuupBgI0k+f4dW7KpTVX3TsPaHcAMAAIasJ2BUUdkgE+a53raKygb1BMJVRAfhBgAADFnNqdY+Z2w+z0hq8neo5lTrsPWJcAMAAIas5VL/wWYodZFAuAEAAEM2YYwronWRQLgBAABDVjA5Q1lul/q74duhK3dNFUzOGLY+EW4AAMCQJTkdKp+XJ0l9Ak7v4/J5ecO63g3hBgAAXJfS/CxtXzxTHnfo1JPH7dL2xTOHfZ0bFvEDAADXrTQ/S3fneVihGAAA2CPJ6VDRrWNHuhtMSwEAALsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVfj6BQAAMGg9ARMT3x81EMINAAAYlKr6JlVUNqjJ3xFsy3K7VD4vb9i/+XsgTEsBAICrqqpv0opddSHBRpJ8/g6t2FWnqvqmEepZX4QbAAAwoJ6AUUVlg0yY53rbKiob1BMIVzH8CDcAAGBANada+5yx+TwjqcnfoZpTrcPXqQEQbgAAwIBaLvUfbIZSF22EGwAAMKAJY1wRrYs2wg0AABhQweQMZbld6u+Gb4eu3DVVMDljOLvVL8INAAAYUJLTofJ5eZLUJ+D0Pi6flxcz690QbgAAwFWV5mdp++KZ8rhDp548bpe2L54ZU+vcsIgfAAAYlNL8LN2d52GFYgAAYI8kp0NFt44d6W4MiGkpAABglWEJNy+88IImTZokl8ulwsJC1dTUDFi/d+9eTZkyRS6XS9OmTdP+/ftDnjfGaPPmzcrKytLo0aNVXFys9957L5pDAAAAcSLq4WbPnj1as2aNysvLVVdXp+nTp6ukpEQtLS1h6w8dOqSysjItW7ZMb7/9tubPn6/58+ervr4+WPP9739fzz33nHbs2KHDhw/rhhtuUElJiTo6YmPxIAAAMHIcxpiofhFEYWGhvvKVr+j555+XJAUCAeXk5Gj16tXasGFDn/qFCxfq8uXL2rdvX7Btzpw5mjFjhnbs2CFjjLKzs7V27Vr9zd/8jSTJ7/crMzNTO3fu1Le+9a2r9qm9vV1ut1t+v19paWkRGikAAIimwX5+R/XMTVdXl2pra1VcXPzZGzqdKi4ultfrDbuP1+sNqZekkpKSYP2pU6fk8/lCatxutwoLC/t9zc7OTrW3t4dsAADATlENNxcuXFBPT48yMzND2jMzM+Xz+cLu4/P5Bqzv/fdaXnPLli1yu93BLScnZ0jjAQAAsS8h7pbauHGj/H5/cDt79uxIdwkAAERJVNe5GTdunJKSktTc3BzS3tzcLI/HE3Yfj8czYH3vv83NzcrKygqpmTFjRtjXTE1NVWpq6lCHAQAR1RMwMb8IGhDPonrmJiUlRbNmzVJ1dXWwLRAIqLq6WkVFRWH3KSoqCqmXpAMHDgTrJ0+eLI/HE1LT3t6uw4cP9/uaABArquqb9NWnDqrsx2/p8VffUdmP39JXnzqoqvqmke4aYI2oT0utWbNGP/7xj/Wv//qvOnHihFasWKHLly9r6dKlkqQlS5Zo48aNwfrHH39cVVVVeuaZZ9TY2Ki/+7u/09GjR7Vq1SpJksPh0F//9V/rH/7hH/Tzn/9cx44d05IlS5Sdna358+dHezgAMGRV9U1asatOTf7QZSt8/g6t2FVHwAEiJOpfv7Bw4UJ9+OGH2rx5s3w+n2bMmKGqqqrgBcFnzpyR0/lZxrrjjju0e/dubdq0SU888YRuu+02vf7668rPzw/W/O3f/q0uX76s5cuXq62tTV/96ldVVVUll8vV5/0BIBb0BIwqKhsUbu0NoyvfrFxR2aC78zxMUQHXKerr3MQi1rkBMNy8719U2Y/fumrdTx6ZE/Pf2wOMlJhY5wYAcEXLpcGtoD7YOgD9I9wAwDCYMGZw0+aDrQPQP8INAAyDgskZynK71N/VNA5JWe4rt4UDuD6EGwAYBklOh8rn5UlSn4DT+7h8Xh4XEwMRQLgBgGFSmp+l7YtnyuMOnXryuF3avnimSvOz+tkTwLWI+q3gAIDPlOZn6e48DysUA1FEuAGAYZbkdHC7NxBFTEsBAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFglauGmtbVVixYtUlpamtLT07Vs2TJ99NFHA+7T0dGhlStXauzYsbrxxhu1YMECNTc3B59/9913VVZWppycHI0ePVpTp07Vtm3bojUEAAAQh6IWbhYtWqTjx4/rwIED2rdvn371q19p+fLlA+7zne98R5WVldq7d69++ctf6vz58/rmN78ZfL62tlYTJkzQrl27dPz4cT355JPauHGjnn/++WgNAwAAxBmHMcZE+kVPnDihvLw8HTlyRLNnz5YkVVVV6Rvf+IY++OADZWdn99nH7/dr/Pjx2r17t+6//35JUmNjo6ZOnSqv16s5c+aEfa+VK1fqxIkTOnjw4KD7197eLrfbLb/fr7S0tCGMEAAADLfBfn5H5cyN1+tVenp6MNhIUnFxsZxOpw4fPhx2n9raWnV3d6u4uDjYNmXKFOXm5srr9fb7Xn6/XxkZGQP2p7OzU+3t7SEbAACwU1TCjc/n04QJE0LakpOTlZGRIZ/P1+8+KSkpSk9PD2nPzMzsd59Dhw5pz549V53u2rJli9xud3DLyckZ/GAAAEBcuaZws2HDBjkcjgG3xsbGaPU1RH19ve677z6Vl5fra1/72oC1GzdulN/vD25nz54dlj4CAIDhl3wtxWvXrtXDDz88YM0tt9wij8ejlpaWkPZPP/1Ura2t8ng8YffzeDzq6upSW1tbyNmb5ubmPvs0NDRo7ty5Wr58uTZt2nTVfqempio1NfWqdQAAIP5dU7gZP368xo8ff9W6oqIitbW1qba2VrNmzZIkHTx4UIFAQIWFhWH3mTVrlkaNGqXq6motWLBAknTy5EmdOXNGRUVFwbrjx4/rrrvu0kMPPaR//Md/vJbuAwCABBCVu6Uk6etf/7qam5u1Y8cOdXd3a+nSpZo9e7Z2794tSTp37pzmzp2rl19+WQUFBZKkFStWaP/+/dq5c6fS0tK0evVqSVeurZGuTEXdddddKikp0dNPPx18r6SkpEGFrl7cLQUAQPwZ7Of3NZ25uRavvPKKVq1apblz58rpdGrBggV67rnngs93d3fr5MmT+vjjj4Ntzz77bLC2s7NTJSUlevHFF4PPv/baa/rwww+1a9cu7dq1K9h+880363e/+120hgIAAOJI1M7cxDLO3AAAEH9GdJ0bAACAkUK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqUQs3ra2tWrRokdLS0pSenq5ly5bpo48+GnCfjo4OrVy5UmPHjtWNN96oBQsWqLm5OWztxYsXddNNN8nhcKitrS0KIwAAAPEoauFm0aJFOn78uA4cOKB9+/bpV7/6lZYvXz7gPt/5zndUWVmpvXv36pe//KXOnz+vb37zm2Frly1bpj/+4z+ORtcBAEAccxhjTKRf9MSJE8rLy9ORI0c0e/ZsSVJVVZW+8Y1v6IMPPlB2dnafffx+v8aPH6/du3fr/vvvlyQ1NjZq6tSp8nq9mjNnTrB2+/bt2rNnjzZv3qy5c+fq97//vdLT0wfdv/b2drndbvn9fqWlpV3fYAEAwLAY7Od3VM7ceL1epaenB4ONJBUXF8vpdOrw4cNh96mtrVV3d7eKi4uDbVOmTFFubq68Xm+wraGhQd/73vf08ssvy+kcXPc7OzvV3t4esgEAADtFJdz4fD5NmDAhpC05OVkZGRny+Xz97pOSktLnDExmZmZwn87OTpWVlenpp59Wbm7uoPuzZcsWud3u4JaTk3NtAwIAAHHjmsLNhg0b5HA4BtwaGxuj1Vdt3LhRU6dO1eLFi695P7/fH9zOnj0bpR4CAICRlnwtxWvXrtXDDz88YM0tt9wij8ejlpaWkPZPP/1Ura2t8ng8YffzeDzq6upSW1tbyNmb5ubm4D4HDx7UsWPH9Nprr0mSei8XGjdunJ588klVVFSEfe3U1FSlpqYOZogAACDOXVO4GT9+vMaPH3/VuqKiIrW1tam2tlazZs2SdCWYBAIBFRYWht1n1qxZGjVqlKqrq7VgwQJJ0smTJ3XmzBkVFRVJkv7jP/5Dn3zySXCfI0eO6C//8i/15ptv6tZbb72WoQAAAEtdU7gZrKlTp6q0tFSPPPKIduzYoe7ubq1atUrf+ta3gndKnTt3TnPnztXLL7+sgoICud1uLVu2TGvWrFFGRobS0tK0evVqFRUVBe+U+mKAuXDhQvD9ruVuKQAAYK+ohBtJeuWVV7Rq1SrNnTtXTqdTCxYs0HPPPRd8vru7WydPntTHH38cbHv22WeDtZ2dnSopKdGLL74YrS4CAAALRWWdm1jHOjfA4PUEjGpOtarlUocmjHGpYHKGkpyOke4WgAQ02M/vqJ25ARD/quqbVFHZoCZ/R7Aty+1S+bw8leZnjWDPAKB/fHEmgLCq6pu0YlddSLCRJJ+/Qyt21amqvmmEegYAAyPcAOijJ2BUUdmgcHPWvW0VlQ3qCSTcrDaAOEC4AdBHzanWPmdsPs9IavJ3qOZU6/B1CgAGiXADoI+WS/0Hm6HUAcBwItwA6GPCGFdE6wBgOBFuAPRRMDlDWW6X+rvh26Erd00VTM4Yzm4BwKAQbgD0keR0qHxeniT1CTi9j8vn5bHeDYCYRLgBEFZpfpa2L54pjzt06snjdmn74pmscwMgZrGIH4B+leZn6e48DysUXwWrOAOxhXADYEBJToeKbh070t2IWaziDMQepqUAYIhYxRmITYQbABgCVnEGYhfhBgCGgFWcgdhFuAGAIWAVZyB2EW4AYAhYxRmIXYQbABgCVnEGYhfhBlbqCRh537+on71zTt73L3JRJyKOVZyB2MU6N7AO645guPSu4vzF3zcPv2/AiHIYYxLuf2nb29vldrvl9/uVlpY20t1BBPWuO/LFX+re/3fmawMQDaxQDAyPwX5+c+YG1rjauiMOXVl35O48Dx88iChWcQZiC9fcwBqsOwIAkAg3sAjrjgAAJMINLMK6IwAAiXADi7DuCABAItzAIqw7AgCQCDewTO+6Ix536NSTx+3iNnAASBDcCg7rlOZn6e48D+uOAECCItzASqw7AgCJi2kpAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGCVhFyh2BgjSWpvbx/hngAAgMHq/dzu/RzvT0KGm0uXLkmScnJyRrgnAADgWl26dElut7vf5x3mavHHQoFAQCdPnlReXp7Onj2rtLS0ke4SrqK9vV05OTkcrzjB8YofHKv4kujHyxijS5cuKTs7W05n/1fWJOSZG6fTqYkTJ0qS0tLSEvIXJF5xvOILxyt+cKziSyIfr4HO2PTigmIAAGAVwg0AALBKwoab1NRUlZeXKzU1daS7gkHgeMUXjlf84FjFF47X4CTkBcUAAMBeCXvmBgAA2IlwAwAArEK4AQAAViHcAAAAq1gdblpbW7Vo0SKlpaUpPT1dy5Yt00cffTTgPh0dHVq5cqXGjh2rG2+8UQsWLFBzc3PY2osXL+qmm26Sw+FQW1tbFEaQOKJxrN59912VlZUpJydHo0eP1tSpU7Vt27ZoD8VKL7zwgiZNmiSXy6XCwkLV1NQMWL93715NmTJFLpdL06ZN0/79+0OeN8Zo8+bNysrK0ujRo1VcXKz33nsvmkNIKJE8Xt3d3Vq/fr2mTZumG264QdnZ2VqyZInOnz8f7WEkjEj/fX3eo48+KofDoa1bt0a41zHOWKy0tNRMnz7dvPXWW+bNN980f/iHf2jKysoG3OfRRx81OTk5prq62hw9etTMmTPH3HHHHWFr77vvPvP1r3/dSDK///3vozCCxBGNY/Uv//Iv5rHHHjP/+7//a95//33zb//2b2b06NHmhz/8YbSHY5VXX33VpKSkmJdeeskcP37cPPLIIyY9Pd00NzeHrf/Nb35jkpKSzPe//33T0NBgNm3aZEaNGmWOHTsWrPmnf/on43a7zeuvv27effddc++995rJkyebTz75ZLiGZa1IH6+2tjZTXFxs9uzZYxobG43X6zUFBQVm1qxZwzksa0Xj76vXT3/6UzN9+nSTnZ1tnn322SiPJLZYG24aGhqMJHPkyJFg23//938bh8Nhzp07F3aftrY2M2rUKLN3795g24kTJ4wk4/V6Q2pffPFFc+edd5rq6mrCzXWK9rH6vG9/+9vmz/7szyLX+QRQUFBgVq5cGXzc09NjsrOzzZYtW8LWP/DAA+aee+4JaSssLDR/9Vd/ZYwxJhAIGI/HY55++ung821tbSY1NdX85Cc/icIIEkukj1c4NTU1RpI5ffp0ZDqdwKJ1vD744AMzceJEU19fb26++eaECzfWTkt5vV6lp6dr9uzZwbbi4mI5nU4dPnw47D61tbXq7u5WcXFxsG3KlCnKzc2V1+sNtjU0NOh73/ueXn755QG/uAuDE81j9UV+v18ZGRmR67zlurq6VFtbG/JzdjqdKi4u7vfn7PV6Q+olqaSkJFh/6tQp+Xy+kBq3263CwsIBjx2uLhrHKxy/3y+Hw6H09PSI9DtRRet4BQIBPfjgg1q3bp1uv/326HQ+xln7yezz+TRhwoSQtuTkZGVkZMjn8/W7T0pKSp8/2MzMzOA+nZ2dKisr09NPP63c3Nyo9D3RROtYfdGhQ4e0Z88eLV++PCL9TgQXLlxQT0+PMjMzQ9oH+jn7fL4B63v/vZbXxOBE43h9UUdHh9avX6+ysrKE/eLGSInW8XrqqaeUnJysxx57LPKdjhNxF242bNggh8Mx4NbY2Bi199+4caOmTp2qxYsXR+09bDHSx+rz6uvrdd9996m8vFxf+9rXhuU9Adt0d3frgQcekDFG27dvH+nuIIza2lpt27ZNO3fulMPhGOnujJjkke7AtVq7dq0efvjhAWtuueUWeTwetbS0hLR/+umnam1tlcfjCbufx+NRV1eX2traQs4INDc3B/c5ePCgjh07ptdee03Slbs+JGncuHF68sknVVFRMcSR2Wekj1WvhoYGzZ07V8uXL9emTZuGNJZENW7cOCUlJfW5YzDcz7mXx+MZsL733+bmZmVlZYXUzJgxI4K9TzzROF69eoPN6dOndfDgQc7aREA0jtebb76plpaWkJmFnp4erV27Vlu3btXvfve7yA4iVo30RT/R0nuR6tGjR4Ntv/jFLwZ1keprr70WbGtsbAy5SPX//u//zLFjx4LbSy+9ZCSZQ4cO9Xt1OwYWrWNljDH19fVmwoQJZt26ddEbgOUKCgrMqlWrgo97enrMxIkTB7zg8c///M9D2oqKivpcUPyDH/wg+Lzf7+eC4giJ9PEyxpiuri4zf/58c/vtt5uWlpbodDxBRfp4XbhwIeQz6tixYyY7O9usX7/eNDY2Rm8gMcbacGPMlduL/+RP/sQcPnzY/PrXvza33XZbyO3FH3zwgfnyl79sDh8+HGx79NFHTW5urjl48KA5evSoKSoqMkVFRf2+xxtvvMHdUhEQjWN17NgxM378eLN48WLT1NQU3PiP87V59dVXTWpqqtm5c6dpaGgwy5cvN+np6cbn8xljjHnwwQfNhg0bgvW/+c1vTHJysvnBD35gTpw4YcrLy8PeCp6enm5+9rOfmd/+9rfmvvvu41bwCIn08erq6jL33nuvuemmm8w777wT8rfU2dk5ImO0STT+vr4oEe+WsjrcXLx40ZSVlZkbb7zRpKWlmaVLl5pLly4Fnz916pSRZN54441g2yeffGK+/e1vmz/4gz8wX/rSl8xf/MVfmKampn7fg3ATGdE4VuXl5UZSn+3mm28expHZ4Yc//KHJzc01KSkppqCgwLz11lvB5+68807z0EMPhdT/+7//u/mjP/ojk5KSYm6//XbzX//1XyHPBwIB893vftdkZmaa1NRUM3fuXHPy5MnhGEpCiOTx6v3bC7d9/u8RQxfpv68vSsRw4zDm/180AgAAYIG4u1sKAABgIIQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFjl/wFvqYjCOnCfGwAAAABJRU5ErkJggg==",
    116       "text/plain": [
    117        "<Figure size 640x480 with 1 Axes>"
    118       ]
    119      },
    120      "metadata": {},
    121      "output_type": "display_data"
    122     }
    123    ],
    124    "source": [
    125     "import matplotlib.pyplot as plt\n",
    126     "def plotEmbedding(embedded):\n",
    127     "    return plt.scatter(x=embedded[:,0], y=embedded[:,1])\n",
    128     "\n",
    129     "plotEmbedding(embedded)"
    130    ]
    131   },
    132   {
    133    "cell_type": "markdown",
    134    "metadata": {},
    135    "source": [
    136     "There is a built in string lookup layer so you don't have to use your own dictionary for this:"
    137    ]
    138   },
    139   {
    140    "cell_type": "code",
    141    "execution_count": 6,
    142    "metadata": {},
    143    "outputs": [
    144     {
    145      "data": {
    146       "text/plain": [
    147        "<tf.Tensor: shape=(2,), dtype=int64, numpy=array([0, 4])>"
    148       ]
    149      },
    150      "execution_count": 6,
    151      "metadata": {},
    152      "output_type": "execute_result"
    153     }
    154    ],
    155    "source": [
    156     "str_lookup = keras.layers.StringLookup()\n",
    157     "str_lookup.adapt(arrText)\n",
    158     "# 0 is reserved for unrecognized stuff. \n",
    159     "str_lookup(['rnd', 'weird'])"
    160    ]
    161   },
    162   {
    163    "cell_type": "markdown",
    164    "metadata": {},
    165    "source": [
    166     "Textvectorization is just a better string lookup that removes punctuation, sets lowercase, and splits by whitespace. \n",
    167     "\n",
    168     "To preserve case and punctuation set Standardize=None\n",
    169     "\n",
    170     "Unknown words are now encoded as 1"
    171    ]
    172   },
    173   {
    174    "cell_type": "code",
    175    "execution_count": 7,
    176    "metadata": {},
    177    "outputs": [],
    178    "source": [
    179     "training = ['the distant realm of Lumina, where the sky shimmered with hues unknown to the mundane world, an ancient prophecy began to unfold. The Great Tree of Elaria, standing tall and majestic in the heart of the enchanted forest, whispered secrets to']\n",
    180     "\n",
    181     "text_vec = keras.layers.TextVectorization()\n",
    182     "text_vec.adapt(training)"
    183    ]
    184   },
    185   {
    186    "cell_type": "code",
    187    "execution_count": 8,
    188    "metadata": {},
    189    "outputs": [
    190     {
    191      "data": {
    192       "text/plain": [
    193        "<tf.Tensor: shape=(1, 6), dtype=int64, numpy=array([[29, 17,  4, 21,  8,  2]])>"
    194       ]
    195      },
    196      "execution_count": 8,
    197      "metadata": {},
    198      "output_type": "execute_result"
    199     }
    200    ],
    201    "source": [
    202     "out = text_vec(['distant realm of Lumina, where the '])\n",
    203     "out"
    204    ]
    205   },
    206   {
    207    "cell_type": "code",
    208    "execution_count": 9,
    209    "metadata": {},
    210    "outputs": [
    211     {
    212      "data": {
    213       "text/plain": [
    214        "array(['', '[UNK]', 'the', 'to', 'of', 'world', 'with', 'whispered',\n",
    215        "       'where', 'unknown', 'unfold', 'tree', 'tall', 'standing', 'sky',\n",
    216        "       'shimmered', 'secrets', 'realm', 'prophecy', 'mundane', 'majestic',\n",
    217        "       'lumina', 'in', 'hues', 'heart', 'great', 'forest', 'enchanted',\n",
    218        "       'elaria', 'distant', 'began', 'and', 'ancient', 'an'], dtype='<U9')"
    219       ]
    220      },
    221      "execution_count": 9,
    222      "metadata": {},
    223      "output_type": "execute_result"
    224     }
    225    ],
    226    "source": [
    227     "vocab = np.array(text_vec.get_vocabulary())\n",
    228     "vocab"
    229    ]
    230   },
    231   {
    232    "cell_type": "code",
    233    "execution_count": 10,
    234    "metadata": {},
    235    "outputs": [
    236     {
    237      "data": {
    238       "text/plain": [
    239        "'distant realm of lumina where the'"
    240       ]
    241      },
    242      "execution_count": 10,
    243      "metadata": {},
    244      "output_type": "execute_result"
    245     }
    246    ],
    247    "source": [
    248     "decoded_texts = []\n",
    249     "for sequence in out:\n",
    250     "    decoded_texts.append([vocab[i] for i in sequence if i != 0])\n",
    251     "' '.join(decoded_texts[0])"
    252    ]
    253   },
    254   {
    255    "cell_type": "code",
    256    "execution_count": 16,
    257    "metadata": {},
    258    "outputs": [
    259     {
    260      "data": {
    261       "text/plain": [
    262        "<tf.Tensor: shape=(1, 17), dtype=int64, numpy=array([[0, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])>"
    263       ]
    264      },
    265      "execution_count": 16,
    266      "metadata": {},
    267      "output_type": "execute_result"
    268     }
    269    ],
    270    "source": [
    271     "# Default this outputs the embeddings but with count you get the count (one hot style) of the words.\n",
    272     "# You can also use 'multi_hot' to get binary (has/does not have). Also, you can use tf_idf to downweight\n",
    273     "# common words and upweight less common ones (still onehot esque).\n",
    274     "\n",
    275     "vect = keras.layers.TextVectorization(output_mode='count')\n",
    276     "st = 'what would that seem to be what would why where when how they are not there when you try that would'\n",
    277     "vect.adapt(st)\n",
    278     "\n",
    279     "vect([st])"
    280    ]
    281   }
    282  ],
    283  "metadata": {
    284   "kernelspec": {
    285    "display_name": ".venv",
    286    "language": "python",
    287    "name": "python3"
    288   },
    289   "language_info": {
    290    "codemirror_mode": {
    291     "name": "ipython",
    292     "version": 3
    293    },
    294    "file_extension": ".py",
    295    "mimetype": "text/x-python",
    296    "name": "python",
    297    "nbconvert_exporter": "python",
    298    "pygments_lexer": "ipython3",
    299    "version": "3.11.2"
    300   }
    301  },
    302  "nbformat": 4,
    303  "nbformat_minor": 2
    304 }