machinelearning

Machine learning code
git clone git://git.laack.co/machinelearning.git
Log | Files | Refs

testingOneHotEncodingAndGraphing.ipynb (14627B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "code",
      5    "execution_count": 135,
      6    "metadata": {},
      7    "outputs": [
      8     {
      9      "data": {
     10       "text/html": [
     11        "<div>\n",
     12        "<style scoped>\n",
     13        "    .dataframe tbody tr th:only-of-type {\n",
     14        "        vertical-align: middle;\n",
     15        "    }\n",
     16        "\n",
     17        "    .dataframe tbody tr th {\n",
     18        "        vertical-align: top;\n",
     19        "    }\n",
     20        "\n",
     21        "    .dataframe thead th {\n",
     22        "        text-align: right;\n",
     23        "    }\n",
     24        "</style>\n",
     25        "<table border=\"1\" class=\"dataframe\">\n",
     26        "  <thead>\n",
     27        "    <tr style=\"text-align: right;\">\n",
     28        "      <th></th>\n",
     29        "      <th>0</th>\n",
     30        "      <th>1</th>\n",
     31        "      <th>2</th>\n",
     32        "      <th>3</th>\n",
     33        "      <th>4</th>\n",
     34        "      <th>5</th>\n",
     35        "    </tr>\n",
     36        "  </thead>\n",
     37        "  <tbody>\n",
     38        "    <tr>\n",
     39        "      <th>0</th>\n",
     40        "      <td>0.947152</td>\n",
     41        "      <td>0.182652</td>\n",
     42        "      <td>0.116222</td>\n",
     43        "      <td>0.249947</td>\n",
     44        "      <td>0.212487</td>\n",
     45        "      <td>cheese</td>\n",
     46        "    </tr>\n",
     47        "    <tr>\n",
     48        "      <th>1</th>\n",
     49        "      <td>0.932965</td>\n",
     50        "      <td>0.404326</td>\n",
     51        "      <td>0.111577</td>\n",
     52        "      <td>0.098454</td>\n",
     53        "      <td>0.684146</td>\n",
     54        "      <td>cheese</td>\n",
     55        "    </tr>\n",
     56        "    <tr>\n",
     57        "      <th>2</th>\n",
     58        "      <td>0.831340</td>\n",
     59        "      <td>0.641161</td>\n",
     60        "      <td>0.722503</td>\n",
     61        "      <td>0.719412</td>\n",
     62        "      <td>0.749303</td>\n",
     63        "      <td>pepper</td>\n",
     64        "    </tr>\n",
     65        "    <tr>\n",
     66        "      <th>3</th>\n",
     67        "      <td>0.088983</td>\n",
     68        "      <td>0.099260</td>\n",
     69        "      <td>0.832301</td>\n",
     70        "      <td>0.269737</td>\n",
     71        "      <td>0.382743</td>\n",
     72        "      <td>cheese</td>\n",
     73        "    </tr>\n",
     74        "    <tr>\n",
     75        "      <th>4</th>\n",
     76        "      <td>0.569650</td>\n",
     77        "      <td>0.321217</td>\n",
     78        "      <td>0.849422</td>\n",
     79        "      <td>0.765569</td>\n",
     80        "      <td>0.082142</td>\n",
     81        "      <td>cheese</td>\n",
     82        "    </tr>\n",
     83        "    <tr>\n",
     84        "      <th>...</th>\n",
     85        "      <td>...</td>\n",
     86        "      <td>...</td>\n",
     87        "      <td>...</td>\n",
     88        "      <td>...</td>\n",
     89        "      <td>...</td>\n",
     90        "      <td>...</td>\n",
     91        "    </tr>\n",
     92        "    <tr>\n",
     93        "      <th>95</th>\n",
     94        "      <td>0.999064</td>\n",
     95        "      <td>0.421309</td>\n",
     96        "      <td>0.795260</td>\n",
     97        "      <td>0.200927</td>\n",
     98        "      <td>0.811947</td>\n",
     99        "      <td>pepper</td>\n",
    100        "    </tr>\n",
    101        "    <tr>\n",
    102        "      <th>96</th>\n",
    103        "      <td>0.913032</td>\n",
    104        "      <td>0.158652</td>\n",
    105        "      <td>0.072846</td>\n",
    106        "      <td>0.320127</td>\n",
    107        "      <td>0.847452</td>\n",
    108        "      <td>cheese</td>\n",
    109        "    </tr>\n",
    110        "    <tr>\n",
    111        "      <th>97</th>\n",
    112        "      <td>0.453406</td>\n",
    113        "      <td>0.829704</td>\n",
    114        "      <td>0.076251</td>\n",
    115        "      <td>0.327103</td>\n",
    116        "      <td>0.698135</td>\n",
    117        "      <td>cheese</td>\n",
    118        "    </tr>\n",
    119        "    <tr>\n",
    120        "      <th>98</th>\n",
    121        "      <td>0.465324</td>\n",
    122        "      <td>0.410674</td>\n",
    123        "      <td>0.752463</td>\n",
    124        "      <td>0.858177</td>\n",
    125        "      <td>0.078763</td>\n",
    126        "      <td>cheese</td>\n",
    127        "    </tr>\n",
    128        "    <tr>\n",
    129        "      <th>99</th>\n",
    130        "      <td>0.931791</td>\n",
    131        "      <td>0.349814</td>\n",
    132        "      <td>0.202655</td>\n",
    133        "      <td>0.480509</td>\n",
    134        "      <td>0.253459</td>\n",
    135        "      <td>cheese</td>\n",
    136        "    </tr>\n",
    137        "  </tbody>\n",
    138        "</table>\n",
    139        "<p>100 rows × 6 columns</p>\n",
    140        "</div>"
    141       ],
    142       "text/plain": [
    143        "           0         1         2         3         4       5\n",
    144        "0   0.947152  0.182652  0.116222  0.249947  0.212487  cheese\n",
    145        "1   0.932965  0.404326  0.111577  0.098454  0.684146  cheese\n",
    146        "2   0.831340  0.641161  0.722503  0.719412  0.749303  pepper\n",
    147        "3   0.088983  0.099260  0.832301  0.269737  0.382743  cheese\n",
    148        "4   0.569650  0.321217  0.849422  0.765569  0.082142  cheese\n",
    149        "..       ...       ...       ...       ...       ...     ...\n",
    150        "95  0.999064  0.421309  0.795260  0.200927  0.811947  pepper\n",
    151        "96  0.913032  0.158652  0.072846  0.320127  0.847452  cheese\n",
    152        "97  0.453406  0.829704  0.076251  0.327103  0.698135  cheese\n",
    153        "98  0.465324  0.410674  0.752463  0.858177  0.078763  cheese\n",
    154        "99  0.931791  0.349814  0.202655  0.480509  0.253459  cheese\n",
    155        "\n",
    156        "[100 rows x 6 columns]"
    157       ]
    158      },
    159      "execution_count": 135,
    160      "metadata": {},
    161      "output_type": "execute_result"
    162     }
    163    ],
    164    "source": [
    165     "import pandas as pd\n",
    166     "import numpy as np\n",
    167     "\n",
    168     "\n",
    169     "data = []\n",
    170     "for i in range(0,100):\n",
    171     "    rnd0 = np.random.random()\n",
    172     "    rnd1 = np.random.random()\n",
    173     "    rnd2 = np.random.random()\n",
    174     "    rnd3 = np.random.random()\n",
    175     "    rnd4 = np.random.random()\n",
    176     "\n",
    177     "    sum = np.sqrt(rnd0 * rnd0 + rnd1 * rnd1 + rnd2 * rnd2 + rnd3 * rnd3 + rnd4 * rnd4)\n",
    178     "    txt = ''\n",
    179     "    if sum > 1.5:\n",
    180     "        txt = 'pepper'\n",
    181     "    else:\n",
    182     "        txt = 'cheese'\n",
    183     "    data.append([rnd0, rnd1,rnd2,rnd3,rnd4,txt])\n",
    184     "\n",
    185     "df = pd.DataFrame(data=data)\n",
    186     "df"
    187    ]
    188   },
    189   {
    190    "cell_type": "code",
    191    "execution_count": 136,
    192    "metadata": {},
    193    "outputs": [
    194     {
    195      "data": {
    196       "text/plain": [
    197        "0    float64\n",
    198        "1    float64\n",
    199        "2    float64\n",
    200        "3    float64\n",
    201        "4    float64\n",
    202        "5     object\n",
    203        "dtype: object"
    204       ]
    205      },
    206      "execution_count": 136,
    207      "metadata": {},
    208      "output_type": "execute_result"
    209     }
    210    ],
    211    "source": [
    212     "df.dtypes"
    213    ]
    214   },
    215   {
    216    "cell_type": "code",
    217    "execution_count": 137,
    218    "metadata": {},
    219    "outputs": [],
    220    "source": [
    221     "from sklearn.preprocessing import OneHotEncoder"
    222    ]
    223   },
    224   {
    225    "cell_type": "code",
    226    "execution_count": 138,
    227    "metadata": {},
    228    "outputs": [
    229     {
    230      "data": {
    231       "text/plain": [
    232        "array([[1., 0.],\n",
    233        "       [1., 0.],\n",
    234        "       [0., 1.],\n",
    235        "       [1., 0.],\n",
    236        "       [1., 0.],\n",
    237        "       [1., 0.],\n",
    238        "       [1., 0.],\n",
    239        "       [1., 0.],\n",
    240        "       [1., 0.],\n",
    241        "       [1., 0.],\n",
    242        "       [0., 1.],\n",
    243        "       [0., 1.],\n",
    244        "       [0., 1.],\n",
    245        "       [0., 1.],\n",
    246        "       [0., 1.],\n",
    247        "       [1., 0.],\n",
    248        "       [0., 1.],\n",
    249        "       [1., 0.],\n",
    250        "       [0., 1.],\n",
    251        "       [0., 1.],\n",
    252        "       [1., 0.],\n",
    253        "       [1., 0.],\n",
    254        "       [1., 0.],\n",
    255        "       [1., 0.],\n",
    256        "       [0., 1.],\n",
    257        "       [1., 0.],\n",
    258        "       [1., 0.],\n",
    259        "       [1., 0.],\n",
    260        "       [1., 0.],\n",
    261        "       [1., 0.],\n",
    262        "       [1., 0.],\n",
    263        "       [1., 0.],\n",
    264        "       [1., 0.],\n",
    265        "       [1., 0.],\n",
    266        "       [1., 0.],\n",
    267        "       [1., 0.],\n",
    268        "       [1., 0.],\n",
    269        "       [1., 0.],\n",
    270        "       [1., 0.],\n",
    271        "       [1., 0.],\n",
    272        "       [1., 0.],\n",
    273        "       [1., 0.],\n",
    274        "       [1., 0.],\n",
    275        "       [0., 1.],\n",
    276        "       [1., 0.],\n",
    277        "       [1., 0.],\n",
    278        "       [1., 0.],\n",
    279        "       [1., 0.],\n",
    280        "       [1., 0.],\n",
    281        "       [1., 0.],\n",
    282        "       [1., 0.],\n",
    283        "       [1., 0.],\n",
    284        "       [1., 0.],\n",
    285        "       [1., 0.],\n",
    286        "       [1., 0.],\n",
    287        "       [1., 0.],\n",
    288        "       [1., 0.],\n",
    289        "       [1., 0.],\n",
    290        "       [1., 0.],\n",
    291        "       [0., 1.],\n",
    292        "       [1., 0.],\n",
    293        "       [1., 0.],\n",
    294        "       [0., 1.],\n",
    295        "       [1., 0.],\n",
    296        "       [1., 0.],\n",
    297        "       [0., 1.],\n",
    298        "       [1., 0.],\n",
    299        "       [1., 0.],\n",
    300        "       [1., 0.],\n",
    301        "       [0., 1.],\n",
    302        "       [1., 0.],\n",
    303        "       [1., 0.],\n",
    304        "       [1., 0.],\n",
    305        "       [1., 0.],\n",
    306        "       [1., 0.],\n",
    307        "       [1., 0.],\n",
    308        "       [1., 0.],\n",
    309        "       [1., 0.],\n",
    310        "       [1., 0.],\n",
    311        "       [1., 0.],\n",
    312        "       [1., 0.],\n",
    313        "       [1., 0.],\n",
    314        "       [1., 0.],\n",
    315        "       [0., 1.],\n",
    316        "       [1., 0.],\n",
    317        "       [1., 0.],\n",
    318        "       [1., 0.],\n",
    319        "       [1., 0.],\n",
    320        "       [1., 0.],\n",
    321        "       [1., 0.],\n",
    322        "       [1., 0.],\n",
    323        "       [1., 0.],\n",
    324        "       [1., 0.],\n",
    325        "       [1., 0.],\n",
    326        "       [1., 0.],\n",
    327        "       [0., 1.],\n",
    328        "       [1., 0.],\n",
    329        "       [1., 0.],\n",
    330        "       [1., 0.],\n",
    331        "       [1., 0.]])"
    332       ]
    333      },
    334      "execution_count": 138,
    335      "metadata": {},
    336      "output_type": "execute_result"
    337     }
    338    ],
    339    "source": [
    340     "oh2 = OneHotEncoder(sparse_output=False)\n",
    341     "out2 = oh2.fit_transform(df[[5]])\n",
    342     "out2"
    343    ]
    344   },
    345   {
    346    "cell_type": "code",
    347    "execution_count": 139,
    348    "metadata": {},
    349    "outputs": [],
    350    "source": [
    351     "comb = pd.DataFrame(data=out2, columns=oh2.get_feature_names_out(['favorite']))\n",
    352     "combined = pd.concat([comb,df], axis=1)\n",
    353     "combined = combined.drop(axis=1 , columns=[5])"
    354    ]
    355   },
    356   {
    357    "cell_type": "code",
    358    "execution_count": 140,
    359    "metadata": {},
    360    "outputs": [
    361     {
    362      "data": {
    363       "text/html": [
    364        "<div>\n",
    365        "<style scoped>\n",
    366        "    .dataframe tbody tr th:only-of-type {\n",
    367        "        vertical-align: middle;\n",
    368        "    }\n",
    369        "\n",
    370        "    .dataframe tbody tr th {\n",
    371        "        vertical-align: top;\n",
    372        "    }\n",
    373        "\n",
    374        "    .dataframe thead th {\n",
    375        "        text-align: right;\n",
    376        "    }\n",
    377        "</style>\n",
    378        "<table border=\"1\" class=\"dataframe\">\n",
    379        "  <thead>\n",
    380        "    <tr style=\"text-align: right;\">\n",
    381        "      <th></th>\n",
    382        "      <th>favorite_cheese</th>\n",
    383        "      <th>favorite_pepper</th>\n",
    384        "      <th>0</th>\n",
    385        "      <th>1</th>\n",
    386        "      <th>2</th>\n",
    387        "      <th>3</th>\n",
    388        "      <th>4</th>\n",
    389        "    </tr>\n",
    390        "  </thead>\n",
    391        "  <tbody>\n",
    392        "    <tr>\n",
    393        "      <th>0</th>\n",
    394        "      <td>1.0</td>\n",
    395        "      <td>0.0</td>\n",
    396        "      <td>0.947152</td>\n",
    397        "      <td>0.182652</td>\n",
    398        "      <td>0.116222</td>\n",
    399        "      <td>0.249947</td>\n",
    400        "      <td>0.212487</td>\n",
    401        "    </tr>\n",
    402        "    <tr>\n",
    403        "      <th>1</th>\n",
    404        "      <td>1.0</td>\n",
    405        "      <td>0.0</td>\n",
    406        "      <td>0.932965</td>\n",
    407        "      <td>0.404326</td>\n",
    408        "      <td>0.111577</td>\n",
    409        "      <td>0.098454</td>\n",
    410        "      <td>0.684146</td>\n",
    411        "    </tr>\n",
    412        "    <tr>\n",
    413        "      <th>2</th>\n",
    414        "      <td>0.0</td>\n",
    415        "      <td>1.0</td>\n",
    416        "      <td>0.831340</td>\n",
    417        "      <td>0.641161</td>\n",
    418        "      <td>0.722503</td>\n",
    419        "      <td>0.719412</td>\n",
    420        "      <td>0.749303</td>\n",
    421        "    </tr>\n",
    422        "    <tr>\n",
    423        "      <th>3</th>\n",
    424        "      <td>1.0</td>\n",
    425        "      <td>0.0</td>\n",
    426        "      <td>0.088983</td>\n",
    427        "      <td>0.099260</td>\n",
    428        "      <td>0.832301</td>\n",
    429        "      <td>0.269737</td>\n",
    430        "      <td>0.382743</td>\n",
    431        "    </tr>\n",
    432        "    <tr>\n",
    433        "      <th>4</th>\n",
    434        "      <td>1.0</td>\n",
    435        "      <td>0.0</td>\n",
    436        "      <td>0.569650</td>\n",
    437        "      <td>0.321217</td>\n",
    438        "      <td>0.849422</td>\n",
    439        "      <td>0.765569</td>\n",
    440        "      <td>0.082142</td>\n",
    441        "    </tr>\n",
    442        "  </tbody>\n",
    443        "</table>\n",
    444        "</div>"
    445       ],
    446       "text/plain": [
    447        "   favorite_cheese  favorite_pepper         0         1         2         3  \\\n",
    448        "0              1.0              0.0  0.947152  0.182652  0.116222  0.249947   \n",
    449        "1              1.0              0.0  0.932965  0.404326  0.111577  0.098454   \n",
    450        "2              0.0              1.0  0.831340  0.641161  0.722503  0.719412   \n",
    451        "3              1.0              0.0  0.088983  0.099260  0.832301  0.269737   \n",
    452        "4              1.0              0.0  0.569650  0.321217  0.849422  0.765569   \n",
    453        "\n",
    454        "          4  \n",
    455        "0  0.212487  \n",
    456        "1  0.684146  \n",
    457        "2  0.749303  \n",
    458        "3  0.382743  \n",
    459        "4  0.082142  "
    460       ]
    461      },
    462      "execution_count": 140,
    463      "metadata": {},
    464      "output_type": "execute_result"
    465     }
    466    ],
    467    "source": [
    468     "combined.head()"
    469    ]
    470   },
    471   {
    472    "cell_type": "code",
    473    "execution_count": 141,
    474    "metadata": {},
    475    "outputs": [],
    476    "source": [
    477     "from sklearn.decomposition import PCA\n",
    478     "\n",
    479     "pca = PCA(n_components=3)\n",
    480     "\n",
    481     "out = pca.fit_transform(X=np.array(combined))"
    482    ]
    483   },
    484   {
    485    "cell_type": "code",
    486    "execution_count": null,
    487    "metadata": {},
    488    "outputs": [],
    489    "source": [
    490     "import plotly.express as px\n",
    491     "\n",
    492     "px.scatter_3d(x=out[:,0], y=out[:,1], z=out[:,2], color=combined['favorite_cheese'])"
    493    ]
    494   }
    495  ],
    496  "metadata": {
    497   "kernelspec": {
    498    "display_name": ".venv",
    499    "language": "python",
    500    "name": "python3"
    501   },
    502   "language_info": {
    503    "codemirror_mode": {
    504     "name": "ipython",
    505     "version": 3
    506    },
    507    "file_extension": ".py",
    508    "mimetype": "text/x-python",
    509    "name": "python",
    510    "nbconvert_exporter": "python",
    511    "pygments_lexer": "ipython3",
    512    "version": "3.11.2"
    513   }
    514  },
    515  "nbformat": 4,
    516  "nbformat_minor": 2
    517 }